* x86-64: Maintain 16-byte stack alignment @ 2017-01-10 14:33 Herbert Xu 2017-01-10 14:39 ` Herbert Xu 2017-01-10 17:30 ` Ard Biesheuvel 0 siblings, 2 replies; 54+ messages in thread From: Herbert Xu @ 2017-01-10 14:33 UTC (permalink / raw) To: Linux Kernel Mailing List, Linux Crypto Mailing List, Linus Torvalds, Ingo Molnar, Thomas Gleixner, Andy Lutomirski, Ard Biesheuvel I recently applied the patch https://patchwork.kernel.org/patch/9468391/ and ended up with a boot crash when it tried to run the x86 chacha20 code. It turned out that the patch changed a manually aligned stack buffer to one that is aligned by gcc. What was happening was that gcc can stack align to any value on x86-64 except 16. The reason is that gcc assumes that the stack is always 16-byte aligned, which is not actually the case in the kernel. The x86-64 CPU actually tries to keep the stack 16-byte aligned, e.g., it'll do so when an IRQ comes in. So the reason it doesn't work in the kernel mostly comes down to the fact that the struct pt_regs which lives near the top of the stack is 168 bytes which is not a multiple of 16. This patch tries to fix this by adding an 8-byte padding at the top of the call-chain involving pt_regs so that when we call a C function later we do so with an aligned stack. The same problem probably exists on i386 too since gcc also assumes 16-byte alignment there. It's harder to fix however as the CPU doesn't help us in the IRQ case. Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 05ed3d3..29d3bcb 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -59,39 +59,42 @@ /* * C ABI says these regs are callee-preserved. They aren't saved on kernel entry * unless syscall needs a complete, fully filled "struct pt_regs". + * + * Note we add 8 extra bytes at the beginning to preserve stack alignment. */ -#define R15 0*8 -#define R14 1*8 -#define R13 2*8 -#define R12 3*8 -#define RBP 4*8 -#define RBX 5*8 +#define R15 1*8 +#define R14 2*8 +#define R13 3*8 +#define R12 4*8 +#define RBP 5*8 +#define RBX 6*8 /* These regs are callee-clobbered. Always saved on kernel entry. */ -#define R11 6*8 -#define R10 7*8 -#define R9 8*8 -#define R8 9*8 -#define RAX 10*8 -#define RCX 11*8 -#define RDX 12*8 -#define RSI 13*8 -#define RDI 14*8 +#define R11 7*8 +#define R10 8*8 +#define R9 9*8 +#define R8 10*8 +#define RAX 11*8 +#define RCX 12*8 +#define RDX 13*8 +#define RSI 14*8 +#define RDI 15*8 /* * On syscall entry, this is syscall#. On CPU exception, this is error code. * On hw interrupt, it's IRQ number: */ -#define ORIG_RAX 15*8 +#define ORIG_RAX 16*8 /* Return frame for iretq */ -#define RIP 16*8 -#define CS 17*8 -#define EFLAGS 18*8 -#define RSP 19*8 -#define SS 20*8 +#define RIP 17*8 +#define CS 18*8 +#define EFLAGS 19*8 +#define RSP 20*8 +#define SS 21*8 +/* Note that this excludes the 8-byte padding. */ #define SIZEOF_PTREGS 21*8 .macro ALLOC_PT_GPREGS_ON_STACK - addq $-(15*8), %rsp + addq $-(16*8), %rsp .endm .macro SAVE_C_REGS_HELPER offset=0 rax=1 rcx=1 r8910=1 r11=1 @@ -114,7 +117,7 @@ movq %rdi, 14*8+\offset(%rsp) .endm .macro SAVE_C_REGS offset=0 - SAVE_C_REGS_HELPER \offset, 1, 1, 1, 1 + SAVE_C_REGS_HELPER 8+\offset, 1, 1, 1, 1 .endm .macro SAVE_C_REGS_EXCEPT_RAX_RCX offset=0 SAVE_C_REGS_HELPER \offset, 0, 0, 1, 1 @@ -130,43 +133,43 @@ .endm .macro SAVE_EXTRA_REGS offset=0 - movq %r15, 0*8+\offset(%rsp) - movq %r14, 1*8+\offset(%rsp) - movq %r13, 2*8+\offset(%rsp) - movq %r12, 3*8+\offset(%rsp) - movq %rbp, 4*8+\offset(%rsp) - movq %rbx, 5*8+\offset(%rsp) + movq %r15, 1*8+\offset(%rsp) + movq %r14, 2*8+\offset(%rsp) + movq %r13, 3*8+\offset(%rsp) + movq %r12, 4*8+\offset(%rsp) + movq %rbp, 5*8+\offset(%rsp) + movq %rbx, 6*8+\offset(%rsp) .endm .macro RESTORE_EXTRA_REGS offset=0 - movq 0*8+\offset(%rsp), %r15 - movq 1*8+\offset(%rsp), %r14 - movq 2*8+\offset(%rsp), %r13 - movq 3*8+\offset(%rsp), %r12 - movq 4*8+\offset(%rsp), %rbp - movq 5*8+\offset(%rsp), %rbx + movq 1*8+\offset(%rsp), %r15 + movq 2*8+\offset(%rsp), %r14 + movq 3*8+\offset(%rsp), %r13 + movq 4*8+\offset(%rsp), %r12 + movq 5*8+\offset(%rsp), %rbp + movq 6*8+\offset(%rsp), %rbx .endm .macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1 .if \rstor_r11 - movq 6*8(%rsp), %r11 + movq 7*8(%rsp), %r11 .endif .if \rstor_r8910 - movq 7*8(%rsp), %r10 - movq 8*8(%rsp), %r9 - movq 9*8(%rsp), %r8 + movq 8*8(%rsp), %r10 + movq 9*8(%rsp), %r9 + movq 10*8(%rsp), %r8 .endif .if \rstor_rax - movq 10*8(%rsp), %rax + movq 11*8(%rsp), %rax .endif .if \rstor_rcx - movq 11*8(%rsp), %rcx + movq 12*8(%rsp), %rcx .endif .if \rstor_rdx - movq 12*8(%rsp), %rdx + movq 13*8(%rsp), %rdx .endif - movq 13*8(%rsp), %rsi - movq 14*8(%rsp), %rdi + movq 14*8(%rsp), %rsi + movq 15*8(%rsp), %rdi .endm .macro RESTORE_C_REGS RESTORE_C_REGS_HELPER 1,1,1,1,1 @@ -185,7 +188,7 @@ .endm .macro REMOVE_PT_GPREGS_FROM_STACK addskip=0 - subq $-(15*8+\addskip), %rsp + subq $-(16*8+\addskip), %rsp .endm .macro icebp @@ -203,11 +206,7 @@ */ .macro ENCODE_FRAME_POINTER ptregs_offset=0 #ifdef CONFIG_FRAME_POINTER - .if \ptregs_offset - leaq \ptregs_offset(%rsp), %rbp - .else - mov %rsp, %rbp - .endif + leaq 8+\ptregs_offset(%rsp), %rbp orq $0x1, %rbp #endif .endm diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 5b21970..880bbb8 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -168,7 +168,7 @@ GLOBAL(entry_SYSCALL_64_after_swapgs) pushq %r9 /* pt_regs->r9 */ pushq %r10 /* pt_regs->r10 */ pushq %r11 /* pt_regs->r11 */ - sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */ + sub $(7*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */ /* * If we need to do entry work or if we guess we'll need to do @@ -234,14 +234,14 @@ entry_SYSCALL_64_fastpath: TRACE_IRQS_ON ENABLE_INTERRUPTS(CLBR_NONE) SAVE_EXTRA_REGS - movq %rsp, %rdi + leaq 8(%rsp), %rdi call syscall_return_slowpath /* returns with IRQs disabled */ jmp return_from_SYSCALL_64 entry_SYSCALL64_slow_path: /* IRQs are off. */ SAVE_EXTRA_REGS - movq %rsp, %rdi + leaq 8(%rsp), %rdi call do_syscall_64 /* returns with IRQs disabled */ return_from_SYSCALL_64: @@ -342,9 +342,9 @@ ENTRY(stub_ptregs_64) * Called from fast path -- disable IRQs again, pop return address * and jump to slow path */ + popq %rax DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF - popq %rax jmp entry_SYSCALL64_slow_path 1: @@ -409,13 +409,14 @@ END(__switch_to_asm) */ ENTRY(ret_from_fork) movq %rax, %rdi + subq $8, %rsp call schedule_tail /* rdi: 'prev' task parameter */ testq %rbx, %rbx /* from kernel_thread? */ jnz 1f /* kernel threads are uncommon */ 2: - movq %rsp, %rdi + leaq 8(%rsp), %rdi call syscall_return_slowpath /* returns with IRQs disabled */ TRACE_IRQS_ON /* user mode is traced as IRQS on */ SWAPGS @@ -494,10 +495,12 @@ END(irq_entries_start) * a little cheaper to use a separate counter in the PDA (short of * moving irq_enter into assembly, which would be too much work) */ - movq %rsp, %rdi + movq %rsp, %rax + leaq 8(%rsp), %rdi incl PER_CPU_VAR(irq_count) cmovzq PER_CPU_VAR(irq_stack_ptr), %rsp - pushq %rdi + sub $8, %rsp + pushq %rax /* We entered an interrupt context - irqs are off: */ TRACE_IRQS_OFF @@ -527,7 +530,7 @@ ret_from_intr: /* Interrupt came from user space */ GLOBAL(retint_user) - mov %rsp,%rdi + leaq 8(%rsp), %rdi call prepare_exit_to_usermode TRACE_IRQS_IRETQ SWAPGS @@ -774,7 +777,7 @@ ENTRY(\sym) .endif .endif - movq %rsp, %rdi /* pt_regs pointer */ + leaq 8(%rsp), %rdi /* pt_regs pointer */ .if \has_error_code movq ORIG_RAX(%rsp), %rsi /* get error code */ @@ -810,11 +813,11 @@ ENTRY(\sym) call error_entry - movq %rsp, %rdi /* pt_regs pointer */ + leaq 8(%rsp), %rdi /* pt_regs pointer */ call sync_regs - movq %rax, %rsp /* switch stack */ + leaq -8(%rax), %rsp /* switch stack */ - movq %rsp, %rdi /* pt_regs pointer */ + movq %rax, %rdi /* pt_regs pointer */ .if \has_error_code movq ORIG_RAX(%rsp), %rsi /* get error code */ @@ -895,6 +898,7 @@ ENTRY(do_softirq_own_stack) mov %rsp, %rbp incl PER_CPU_VAR(irq_count) cmove PER_CPU_VAR(irq_stack_ptr), %rsp + sub $8, %rsp push %rbp /* frame pointer backlink */ call __do_softirq leaveq @@ -924,10 +928,11 @@ ENTRY(xen_do_hypervisor_callback) /* do_hypervisor_callback(struct *pt_regs) */ * Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will * see the correct pointer to the pt_regs */ - movq %rdi, %rsp /* we don't return, adjust the stack frame */ + leaq -8(%rdi), %rsp /* we don't return, adjust the stack frame */ 11: incl PER_CPU_VAR(irq_count) movq %rsp, %rbp cmovzq PER_CPU_VAR(irq_stack_ptr), %rsp + subq $8, %rsp pushq %rbp /* frame pointer backlink */ call xen_evtchn_do_upcall popq %rsp @@ -1264,6 +1269,7 @@ ENTRY(nmi) */ movq %rsp, %rdi + subq $8, %rsp movq $-1, %rsi call do_nmi @@ -1475,7 +1481,7 @@ end_repeat_nmi: call paranoid_entry /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ - movq %rsp, %rdi + leaq 8(%rsp), %rdi movq $-1, %rsi call do_nmi @@ -1519,7 +1525,7 @@ ENTRY(rewind_stack_do_exit) xorl %ebp, %ebp movq PER_CPU_VAR(cpu_current_top_of_stack), %rax - leaq -TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE(%rax), %rsp + leaq -TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE-8(%rax), %rsp call do_exit 1: jmp 1b diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index e1721da..7d3f1e3 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -89,6 +89,8 @@ ENTRY(entry_SYSENTER_compat) pushq $0 /* pt_regs->r13 = 0 */ pushq $0 /* pt_regs->r14 = 0 */ pushq $0 /* pt_regs->r15 = 0 */ + + subq $8, %rsp cld /* @@ -120,7 +122,7 @@ ENTRY(entry_SYSENTER_compat) */ TRACE_IRQS_OFF - movq %rsp, %rdi + leaq 8(%rsp), %rdi call do_fast_syscall_32 /* XEN PV guests always use IRET path */ ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \ @@ -215,13 +217,15 @@ ENTRY(entry_SYSCALL_compat) pushq $0 /* pt_regs->r14 = 0 */ pushq $0 /* pt_regs->r15 = 0 */ + subq $8, %rsp + /* * User mode is traced as though IRQs are on, and SYSENTER * turned them off. */ TRACE_IRQS_OFF - movq %rsp, %rdi + leaq 8(%rsp), %rdi call do_fast_syscall_32 /* XEN PV guests always use IRET path */ ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \ @@ -324,6 +328,8 @@ ENTRY(entry_INT80_compat) pushq %r13 /* pt_regs->r13 */ pushq %r14 /* pt_regs->r14 */ pushq %r15 /* pt_regs->r15 */ + + subq $8, %rsp cld /* @@ -332,7 +338,7 @@ ENTRY(entry_INT80_compat) */ TRACE_IRQS_OFF - movq %rsp, %rdi + leaq 8(%rsp), %rdi call do_int80_syscall_32 .Lsyscall_32_done: diff --git a/arch/x86/entry/thunk_64.S b/arch/x86/entry/thunk_64.S index be36bf4..3c80aac 100644 --- a/arch/x86/entry/thunk_64.S +++ b/arch/x86/entry/thunk_64.S @@ -33,6 +33,7 @@ movq 8(%rbp), %rdi .endif + sub $8, %rsp call \func jmp .L_restore _ASM_NOKPROBE(\name) @@ -58,6 +59,7 @@ || defined(CONFIG_DEBUG_LOCK_ALLOC) \ || defined(CONFIG_PREEMPT) .L_restore: + add $8, %rsp popq %r11 popq %r10 popq %r9 diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index b467b14..d03ab72 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -384,6 +384,8 @@ early_idt_handler_common: pushq %r14 /* pt_regs->r14 */ pushq %r15 /* pt_regs->r15 */ + sub $8, %rsp + cmpq $14,%rsi /* Page fault? */ jnz 10f GET_CR2_INTO(%rdi) /* Can clobber any volatile register if pv */ @@ -392,7 +394,7 @@ early_idt_handler_common: jz 20f /* All good */ 10: - movq %rsp,%rdi /* RDI = pt_regs; RSI is already trapnr */ + leaq 8(%rsp), %rdi /* RDI = pt_regs; RSI is already trapnr */ call early_fixup_exception 20: diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index bf0c6d0..2af9f81 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -590,6 +590,7 @@ asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs) struct bad_iret_stack { void *error_entry_ret; + void *padding; struct pt_regs regs; }; -- Email: Herbert Xu <herbert@gondor.apana.org.au> Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt ^ permalink raw reply related [flat|nested] 54+ messages in thread
* Re: x86-64: Maintain 16-byte stack alignment 2017-01-10 14:33 x86-64: Maintain 16-byte stack alignment Herbert Xu @ 2017-01-10 14:39 ` Herbert Xu 2017-01-10 17:05 ` Linus Torvalds 2017-01-10 17:30 ` Ard Biesheuvel 1 sibling, 1 reply; 54+ messages in thread From: Herbert Xu @ 2017-01-10 14:39 UTC (permalink / raw) To: Linux Kernel Mailing List, Linux Crypto Mailing List, Linus Torvalds, Ingo Molnar, Thomas Gleixner, Andy Lutomirski, Ard Biesheuvel On Tue, Jan 10, 2017 at 10:33:40PM +0800, Herbert Xu wrote: > I recently applied the patch > > https://patchwork.kernel.org/patch/9468391/ > > and ended up with a boot crash when it tried to run the x86 chacha20 > code. It turned out that the patch changed a manually aligned > stack buffer to one that is aligned by gcc. What was happening was > that gcc can stack align to any value on x86-64 except 16. The > reason is that gcc assumes that the stack is always 16-byte aligned, > which is not actually the case in the kernel. BTW this is with Debian gcc 4.7.2 which does not allow an 8-byte stack alignment as attempted by the Makefile: $ gcc -S -O2 -mno-sse -mpreferred-stack-boundary=3 a.c a.c:1:0: error: -mpreferred-stack-boundary=3 is not between 4 and 12 $ Obviously this is not an issue if your compiler actually allows the 8-byte alignment. Cheers, -- Email: Herbert Xu <herbert@gondor.apana.org.au> Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt ^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: x86-64: Maintain 16-byte stack alignment 2017-01-10 14:39 ` Herbert Xu @ 2017-01-10 17:05 ` Linus Torvalds 2017-01-10 17:09 ` Andy Lutomirski ` (2 more replies) 0 siblings, 3 replies; 54+ messages in thread From: Linus Torvalds @ 2017-01-10 17:05 UTC (permalink / raw) To: Herbert Xu Cc: Linux Kernel Mailing List, Linux Crypto Mailing List, Ingo Molnar, Thomas Gleixner, Andy Lutomirski, Ard Biesheuvel On Tue, Jan 10, 2017 at 6:39 AM, Herbert Xu <herbert@gondor.apana.org.au> wrote: > > BTW this is with Debian gcc 4.7.2 which does not allow an 8-byte > stack alignment as attempted by the Makefile: I'm pretty sure we have random asm code that may not maintain a 16-byte stack alignment when it calls other code (including, in some cases, calling C code). So I'm not at all convinced that this is a good idea. We shouldn't expect 16-byte alignment to be something trustworthy. Linus ^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: x86-64: Maintain 16-byte stack alignment 2017-01-10 17:05 ` Linus Torvalds @ 2017-01-10 17:09 ` Andy Lutomirski 2017-01-11 3:11 ` Herbert Xu 2017-01-12 7:05 ` Herbert Xu 2 siblings, 0 replies; 54+ messages in thread From: Andy Lutomirski @ 2017-01-10 17:09 UTC (permalink / raw) To: Linus Torvalds Cc: Herbert Xu, Linux Kernel Mailing List, Linux Crypto Mailing List, Ingo Molnar, Thomas Gleixner, Andy Lutomirski, Ard Biesheuvel On Tue, Jan 10, 2017 at 9:05 AM, Linus Torvalds <torvalds@linux-foundation.org> wrote: > On Tue, Jan 10, 2017 at 6:39 AM, Herbert Xu <herbert@gondor.apana.org.au> wrote: >> >> BTW this is with Debian gcc 4.7.2 which does not allow an 8-byte >> stack alignment as attempted by the Makefile: > > I'm pretty sure we have random asm code that may not maintain a > sus16-byte stack alignment when it calls other code (including, in some > cases, calling C code). I suspect so. If we change this, changing pt_regs might make sense but is kind of weird. It also needs to be tested with and without frame pointers. > > So I'm not at all convinced that this is a good idea. We shouldn't > expect 16-byte alignment to be something trustworthy. > > Linus -- Andy Lutomirski AMA Capital Management, LLC ^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: x86-64: Maintain 16-byte stack alignment 2017-01-10 17:05 ` Linus Torvalds 2017-01-10 17:09 ` Andy Lutomirski @ 2017-01-11 3:11 ` Herbert Xu 2017-01-11 3:30 ` Linus Torvalds 2017-01-12 7:05 ` Herbert Xu 2 siblings, 1 reply; 54+ messages in thread From: Herbert Xu @ 2017-01-11 3:11 UTC (permalink / raw) To: Linus Torvalds Cc: Linux Kernel Mailing List, Linux Crypto Mailing List, Ingo Molnar, Thomas Gleixner, Andy Lutomirski, Ard Biesheuvel On Tue, Jan 10, 2017 at 09:05:28AM -0800, Linus Torvalds wrote: > On Tue, Jan 10, 2017 at 6:39 AM, Herbert Xu <herbert@gondor.apana.org.au> wrote: > > > > BTW this is with Debian gcc 4.7.2 which does not allow an 8-byte > > stack alignment as attempted by the Makefile: > > I'm pretty sure we have random asm code that may not maintain a > 16-byte stack alignment when it calls other code (including, in some > cases, calling C code). > > So I'm not at all convinced that this is a good idea. We shouldn't > expect 16-byte alignment to be something trustworthy. Well the only other alternative I see is to ban compilers which enforce 16-byte stack alignment, such as gcc 4.7.2. Or is there another way? Cheers, -- Email: Herbert Xu <herbert@gondor.apana.org.au> Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt ^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: x86-64: Maintain 16-byte stack alignment 2017-01-11 3:11 ` Herbert Xu @ 2017-01-11 3:30 ` Linus Torvalds 2017-01-11 4:17 ` Linus Torvalds 0 siblings, 1 reply; 54+ messages in thread From: Linus Torvalds @ 2017-01-11 3:30 UTC (permalink / raw) To: Herbert Xu Cc: Linux Kernel Mailing List, Linux Crypto Mailing List, Ingo Molnar, Thomas Gleixner, Andy Lutomirski, Ard Biesheuvel On Tue, Jan 10, 2017 at 7:11 PM, Herbert Xu <herbert@gondor.apana.org.au> wrote: > > Well the only other alternative I see is to ban compilers which > enforce 16-byte stack alignment, such as gcc 4.7.2. No, you don't have to ban the compiler - it's just a "generate overly stupid code that just uses extra instructions to likely mis-align the stack more" issue. So it's "stupid code generation" vs "buggy". What we should ban is code that assumes that stack objects can be aligned to more than word boundary. __attribute__((align)) simply doesn't work on stack objects, because the stack isn't aligned. If you really want more stack alignment, you have to generate that alignment yourself by hand (and have a bigger buffer that you do that alignment inside). So this was just simply buggy: u32 state[16] __aligned(CHACHA20_STATE_ALIGN); because you just can't do that. It's that simple. There is a reason why the code does the dance with u32 *state, state_buf[16 + (CHACHA20_STATE_ALIGN / sizeof(u32)) - 1]; state = (u32 *)roundup((uintptr_t)state_buf, CHACHA20_STATE_ALIGN); rather than ask the compiler to do something invalid. Linus ^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: x86-64: Maintain 16-byte stack alignment 2017-01-11 3:30 ` Linus Torvalds @ 2017-01-11 4:17 ` Linus Torvalds 2017-01-11 4:35 ` Herbert Xu 0 siblings, 1 reply; 54+ messages in thread From: Linus Torvalds @ 2017-01-11 4:17 UTC (permalink / raw) To: Herbert Xu Cc: Linux Kernel Mailing List, Linux Crypto Mailing List, Ingo Molnar, Thomas Gleixner, Andy Lutomirski, Ard Biesheuvel On Tue, Jan 10, 2017 at 7:30 PM, Linus Torvalds <torvalds@linux-foundation.org> wrote: > > If you really want more stack alignment, you have to generate that > alignment yourself by hand (and have a bigger buffer that you do that > alignment inside). Side note: gcc can (and does) actually generate forced alignment using "and" instructions on %rsp rather than assuming pre-existing alignment. And that would be valid. The problem with "alignof(16)" is not that gcc couldn't generate the alignment itself, it's just the broken "it's already aligned to 16 bytes" assumption because -mpreferred-stack-boundary=3 doesn't work. You *could* try to hack around it by forcing a 32-byte alignment instead. That (I think) will make gcc generate the "and" instruction mess. And it shouldn't actually use any more memory than doing it by hand (by having twice the alignment and hand-aligning the pointer). So we *could* try to just have a really hacky rule saying that you can align stack data to 8 or 32 bytes, but *not* to 16 bytes. That said, I do think that the "don't assume stack alignment, do it by hand" may be the safer thing. Because who knows what the random rules will be on other architectures. Linus ^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: x86-64: Maintain 16-byte stack alignment 2017-01-11 4:17 ` Linus Torvalds @ 2017-01-11 4:35 ` Herbert Xu 2017-01-11 6:01 ` Andy Lutomirski [not found] ` <CA+55aFw+Z_ieo6DzTVB6_-TvQ0jj60s=T0mvXfqkBVFdKFPw_Q@mail.gmail.com> 0 siblings, 2 replies; 54+ messages in thread From: Herbert Xu @ 2017-01-11 4:35 UTC (permalink / raw) To: Linus Torvalds Cc: Linux Kernel Mailing List, Linux Crypto Mailing List, Ingo Molnar, Thomas Gleixner, Andy Lutomirski, Ard Biesheuvel On Tue, Jan 10, 2017 at 08:17:17PM -0800, Linus Torvalds wrote: > > That said, I do think that the "don't assume stack alignment, do it by > hand" may be the safer thing. Because who knows what the random rules > will be on other architectures. Sure we can ban the use of attribute aligned on stacks. But what about indirect uses through structures? For example, if someone does struct foo { } __attribute__ ((__aligned__(16))); int bar(...) { struct foo f; return baz(&f); } then baz will end up with an unaligned argument. The worst part is that it is not at all obvious to the person writing the function bar. Cheers, -- Email: Herbert Xu <herbert@gondor.apana.org.au> Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt ^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: x86-64: Maintain 16-byte stack alignment 2017-01-11 4:35 ` Herbert Xu @ 2017-01-11 6:01 ` Andy Lutomirski 2017-01-12 6:21 ` Andy Lutomirski [not found] ` <CA+55aFw+Z_ieo6DzTVB6_-TvQ0jj60s=T0mvXfqkBVFdKFPw_Q@mail.gmail.com> 1 sibling, 1 reply; 54+ messages in thread From: Andy Lutomirski @ 2017-01-11 6:01 UTC (permalink / raw) To: Herbert Xu Cc: Linus Torvalds, Linux Kernel Mailing List, Linux Crypto Mailing List, Ingo Molnar, Thomas Gleixner, Andy Lutomirski, Ard Biesheuvel On Tue, Jan 10, 2017 at 8:35 PM, Herbert Xu <herbert@gondor.apana.org.au> wrote: > On Tue, Jan 10, 2017 at 08:17:17PM -0800, Linus Torvalds wrote: >> >> That said, I do think that the "don't assume stack alignment, do it by >> hand" may be the safer thing. Because who knows what the random rules >> will be on other architectures. > > Sure we can ban the use of attribute aligned on stacks. But > what about indirect uses through structures? For example, if > someone does > > struct foo { > } __attribute__ ((__aligned__(16))); > > int bar(...) > { > struct foo f; > > return baz(&f); > } > > then baz will end up with an unaligned argument. The worst part > is that it is not at all obvious to the person writing the function > bar. Linus, I'm starting to lean toward agreeing with Herbert here, except that we should consider making it conditional on having a silly GCC version. After all, the silly GCC versions are wasting space and time with alignment instructions no matter what we do, so this would just mean tweaking the asm and adding some kind of check_stack_alignment() helper to throw out a WARN_ONCE() if we miss one. The problem with making it conditional is that making pt_regs effectively live at a variable offset from %rsp is just nasty. --Andy ^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: x86-64: Maintain 16-byte stack alignment 2017-01-11 6:01 ` Andy Lutomirski @ 2017-01-12 6:21 ` Andy Lutomirski 2017-01-12 7:40 ` Ingo Molnar 2017-01-12 14:02 ` Josh Poimboeuf 0 siblings, 2 replies; 54+ messages in thread From: Andy Lutomirski @ 2017-01-12 6:21 UTC (permalink / raw) To: Herbert Xu, Josh Poimboeuf Cc: Linus Torvalds, Linux Kernel Mailing List, Linux Crypto Mailing List, Ingo Molnar, Thomas Gleixner, Andy Lutomirski, Ard Biesheuvel On Tue, Jan 10, 2017 at 10:01 PM, Andy Lutomirski <luto@amacapital.net> wrote: > On Tue, Jan 10, 2017 at 8:35 PM, Herbert Xu <herbert@gondor.apana.org.au> wrote: >> On Tue, Jan 10, 2017 at 08:17:17PM -0800, Linus Torvalds wrote: >>> >>> That said, I do think that the "don't assume stack alignment, do it by >>> hand" may be the safer thing. Because who knows what the random rules >>> will be on other architectures. >> >> Sure we can ban the use of attribute aligned on stacks. But >> what about indirect uses through structures? For example, if >> someone does >> >> struct foo { >> } __attribute__ ((__aligned__(16))); >> >> int bar(...) >> { >> struct foo f; >> >> return baz(&f); >> } >> >> then baz will end up with an unaligned argument. The worst part >> is that it is not at all obvious to the person writing the function >> bar. > > Linus, I'm starting to lean toward agreeing with Herbert here, except > that we should consider making it conditional on having a silly GCC > version. After all, the silly GCC versions are wasting space and time > with alignment instructions no matter what we do, so this would just > mean tweaking the asm and adding some kind of check_stack_alignment() > helper to throw out a WARN_ONCE() if we miss one. The problem with > making it conditional is that making pt_regs effectively live at a > variable offset from %rsp is just nasty. So actually doing this is gross because we have calls from asm to C all over the place. But... maybe we can automate all the testing. Josh, how hard would it be to teach objtool to (if requested by an option) check that stack frames with statically known size preserve 16-byte stack alignment? I find it rather annoying that gcc before 4.8 malfunctions when it sees __aligned__(16) on x86_64 kernels. Sigh. --Andy ^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: x86-64: Maintain 16-byte stack alignment 2017-01-12 6:21 ` Andy Lutomirski @ 2017-01-12 7:40 ` Ingo Molnar 2017-01-12 14:02 ` Josh Poimboeuf 1 sibling, 0 replies; 54+ messages in thread From: Ingo Molnar @ 2017-01-12 7:40 UTC (permalink / raw) To: Andy Lutomirski Cc: Herbert Xu, Josh Poimboeuf, Linus Torvalds, Linux Kernel Mailing List, Linux Crypto Mailing List, Thomas Gleixner, Andy Lutomirski, Ard Biesheuvel * Andy Lutomirski <luto@amacapital.net> wrote: > I find it rather annoying that gcc before 4.8 malfunctions when it > sees __aligned__(16) on x86_64 kernels. Sigh. Ran into this when writing silly FPU in-kernel testcases a couple of months ago... Thanks, Ingo ^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: x86-64: Maintain 16-byte stack alignment 2017-01-12 6:21 ` Andy Lutomirski 2017-01-12 7:40 ` Ingo Molnar @ 2017-01-12 14:02 ` Josh Poimboeuf 2017-01-12 19:51 ` Linus Torvalds 1 sibling, 1 reply; 54+ messages in thread From: Josh Poimboeuf @ 2017-01-12 14:02 UTC (permalink / raw) To: Andy Lutomirski Cc: Herbert Xu, Linus Torvalds, Linux Kernel Mailing List, Linux Crypto Mailing List, Ingo Molnar, Thomas Gleixner, Andy Lutomirski, Ard Biesheuvel On Wed, Jan 11, 2017 at 10:21:07PM -0800, Andy Lutomirski wrote: > On Tue, Jan 10, 2017 at 10:01 PM, Andy Lutomirski <luto@amacapital.net> wrote: > > On Tue, Jan 10, 2017 at 8:35 PM, Herbert Xu <herbert@gondor.apana.org.au> wrote: > >> On Tue, Jan 10, 2017 at 08:17:17PM -0800, Linus Torvalds wrote: > >>> > >>> That said, I do think that the "don't assume stack alignment, do it by > >>> hand" may be the safer thing. Because who knows what the random rules > >>> will be on other architectures. > >> > >> Sure we can ban the use of attribute aligned on stacks. But > >> what about indirect uses through structures? For example, if > >> someone does > >> > >> struct foo { > >> } __attribute__ ((__aligned__(16))); > >> > >> int bar(...) > >> { > >> struct foo f; > >> > >> return baz(&f); > >> } > >> > >> then baz will end up with an unaligned argument. The worst part > >> is that it is not at all obvious to the person writing the function > >> bar. > > > > Linus, I'm starting to lean toward agreeing with Herbert here, except > > that we should consider making it conditional on having a silly GCC > > version. After all, the silly GCC versions are wasting space and time > > with alignment instructions no matter what we do, so this would just > > mean tweaking the asm and adding some kind of check_stack_alignment() > > helper to throw out a WARN_ONCE() if we miss one. The problem with > > making it conditional is that making pt_regs effectively live at a > > variable offset from %rsp is just nasty. > > So actually doing this is gross because we have calls from asm to C > all over the place. But... maybe we can automate all the testing. > Josh, how hard would it be to teach objtool to (if requested by an > option) check that stack frames with statically known size preserve > 16-byte stack alignment? > > I find it rather annoying that gcc before 4.8 malfunctions when it > sees __aligned__(16) on x86_64 kernels. Sigh. Just to clarify, I think you're asking if, for versions of gcc which don't support -mpreferred-stack-boundary=3, objtool can analyze all C functions to ensure their stacks are 16-byte aligned. It's certainly possible, but I don't see how that solves the problem. The stack will still be misaligned by entry code. Or am I missing something? -- Josh ^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: x86-64: Maintain 16-byte stack alignment 2017-01-12 14:02 ` Josh Poimboeuf @ 2017-01-12 19:51 ` Linus Torvalds 2017-01-12 20:08 ` Andy Lutomirski 0 siblings, 1 reply; 54+ messages in thread From: Linus Torvalds @ 2017-01-12 19:51 UTC (permalink / raw) To: Josh Poimboeuf Cc: Andy Lutomirski, Herbert Xu, Linux Kernel Mailing List, Linux Crypto Mailing List, Ingo Molnar, Thomas Gleixner, Andy Lutomirski, Ard Biesheuvel [-- Attachment #1: Type: text/plain, Size: 3363 bytes --] On Thu, Jan 12, 2017 at 6:02 AM, Josh Poimboeuf <jpoimboe@redhat.com> wrote: > > Just to clarify, I think you're asking if, for versions of gcc which > don't support -mpreferred-stack-boundary=3, objtool can analyze all C > functions to ensure their stacks are 16-byte aligned. > > It's certainly possible, but I don't see how that solves the problem. > The stack will still be misaligned by entry code. Or am I missing > something? I think the argument is that we *could* try to align things, if we just had some tool that actually then verified that we aren't missing anything. I'm not entirely happy with checking the generated code, though, because as Ingo says, you have a 50:50 chance of just getting it right by mistake. So I'd much rather have some static tool that checks things at a code level (ie coccinelle or sparse). Almost totally untested "sparse" patch appended. The problem with sparse, obviously, is that few enough people run it, and it gives a lot of other warnings. But maybe Herbert can test whether this would actually have caught his situation, doing something like an allmodconfig build with "C=2" to force a sparse run on everything, and redirecting the warnings to stderr. But this patch does seem to give a warning for the patch that Herbert had, and that caused problems. And in fact it seems to find a few other possible problems (most, but not all, in crypto). This run was with the broken chacha20 patch applied, to verify that I get a warning for that case: arch/x86/crypto/chacha20_glue.c:70:13: warning: symbol 'state' has excessive alignment (16) arch/x86/crypto/aesni-intel_glue.c:724:12: warning: symbol 'iv' has excessive alignment (16) arch/x86/crypto/aesni-intel_glue.c:803:12: warning: symbol 'iv' has excessive alignment (16) crypto/shash.c:82:12: warning: symbol 'ubuf' has excessive alignment (16) crypto/shash.c:118:12: warning: symbol 'ubuf' has excessive alignment (16) drivers/char/hw_random/via-rng.c:89:14: warning: symbol 'buf' has excessive alignment (16) net/bridge/netfilter/ebtables.c:1809:31: warning: symbol 'tinfo' has excessive alignment (64) drivers/crypto/padlock-sha.c:85:14: warning: symbol 'buf' has excessive alignment (16) drivers/crypto/padlock-sha.c:147:14: warning: symbol 'buf' has excessive alignment (16) drivers/crypto/padlock-sha.c:304:12: warning: symbol 'buf' has excessive alignment (16) drivers/crypto/padlock-sha.c:388:12: warning: symbol 'buf' has excessive alignment (16) net/openvswitch/actions.c:797:33: warning: symbol 'ovs_rt' has excessive alignment (64) drivers/net/ethernet/neterion/vxge/vxge-config.c:1006:38: warning: symbol 'vpath' has excessive alignment (64) although I think at least some of these happen to be ok. There are a few places that clearly don't care about exact alignment, and use "__attribute__((aligned))" without any specific alignment value. It's just sparse that thinks that implies 16-byte alignment (it doesn't, really - it's unspecified, and is telling gcc to use "maximum useful alignment", so who knows _what_ gcc will assume). But some of them may well be real issues - if the alignment is about correctness rather than anything else. Anyway, the advantage of this kind of source-level check is that it should really catch things regardless of "luck" wrt alignment. Linus [-- Attachment #2: patch.diff --] [-- Type: text/plain, Size: 887 bytes --] flow.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/flow.c b/flow.c index 7db9548..c876869 100644 --- a/flow.c +++ b/flow.c @@ -601,6 +601,20 @@ static void simplify_one_symbol(struct entrypoint *ep, struct symbol *sym) unsigned long mod; int all, stores, complex; + /* + * Warn about excessive local variable alignment. + * + * This needs to be linked up with some flag to enable + * it, and specify the alignment. The 'max_int_alignment' + * just happens to be what we want for the kernel for x86-64. + */ + mod = sym->ctype.modifiers; + if (!(mod & (MOD_NONLOCAL | MOD_STATIC))) { + unsigned int alignment = sym->ctype.alignment; + if (alignment > max_int_alignment) + warning(sym->pos, "symbol '%s' has excessive alignment (%u)", show_ident(sym->ident), alignment); + } + /* Never used as a symbol? */ pseudo = sym->pseudo; if (!pseudo) ^ permalink raw reply related [flat|nested] 54+ messages in thread
* Re: x86-64: Maintain 16-byte stack alignment 2017-01-12 19:51 ` Linus Torvalds @ 2017-01-12 20:08 ` Andy Lutomirski 2017-01-12 20:15 ` Josh Poimboeuf 2017-01-13 8:36 ` Herbert Xu 0 siblings, 2 replies; 54+ messages in thread From: Andy Lutomirski @ 2017-01-12 20:08 UTC (permalink / raw) To: Linus Torvalds Cc: Josh Poimboeuf, Herbert Xu, Linux Kernel Mailing List, Linux Crypto Mailing List, Ingo Molnar, Thomas Gleixner, Andy Lutomirski, Ard Biesheuvel On Thu, Jan 12, 2017 at 11:51 AM, Linus Torvalds <torvalds@linux-foundation.org> wrote: > On Thu, Jan 12, 2017 at 6:02 AM, Josh Poimboeuf <jpoimboe@redhat.com> wrote: >> >> Just to clarify, I think you're asking if, for versions of gcc which >> don't support -mpreferred-stack-boundary=3, objtool can analyze all C >> functions to ensure their stacks are 16-byte aligned. >> >> It's certainly possible, but I don't see how that solves the problem. >> The stack will still be misaligned by entry code. Or am I missing >> something? > > I think the argument is that we *could* try to align things, if we > just had some tool that actually then verified that we aren't missing > anything. > > I'm not entirely happy with checking the generated code, though, > because as Ingo says, you have a 50:50 chance of just getting it right > by mistake. So I'd much rather have some static tool that checks > things at a code level (ie coccinelle or sparse). What I meant was checking the entry code to see if it aligns stack frames, and good luck getting sparse to do that. Hmm, getting 16-byte alignment for real may actually be entirely a lost cause. After all, I think we have some inline functions that do asm volatile ("call ..."), and I don't see any credible way of forcing alignment short of generating an entirely new stack frame and aligning that. Ick. This whole situation stinks, and I wish that the gcc developers had been less daft here in the first place or that we'd noticed and gotten it fixed much longer ago. Can we come up with a macro like STACK_ALIGN_16 that turns into __aligned__(32) on bad gcc versions and combine that with your sparse patch? ^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: x86-64: Maintain 16-byte stack alignment 2017-01-12 20:08 ` Andy Lutomirski @ 2017-01-12 20:15 ` Josh Poimboeuf 2017-01-12 20:55 ` Josh Poimboeuf 2017-01-13 1:46 ` Andy Lutomirski 2017-01-13 8:36 ` Herbert Xu 1 sibling, 2 replies; 54+ messages in thread From: Josh Poimboeuf @ 2017-01-12 20:15 UTC (permalink / raw) To: Andy Lutomirski Cc: Linus Torvalds, Herbert Xu, Linux Kernel Mailing List, Linux Crypto Mailing List, Ingo Molnar, Thomas Gleixner, Andy Lutomirski, Ard Biesheuvel On Thu, Jan 12, 2017 at 12:08:07PM -0800, Andy Lutomirski wrote: > On Thu, Jan 12, 2017 at 11:51 AM, Linus Torvalds > <torvalds@linux-foundation.org> wrote: > > On Thu, Jan 12, 2017 at 6:02 AM, Josh Poimboeuf <jpoimboe@redhat.com> wrote: > >> > >> Just to clarify, I think you're asking if, for versions of gcc which > >> don't support -mpreferred-stack-boundary=3, objtool can analyze all C > >> functions to ensure their stacks are 16-byte aligned. > >> > >> It's certainly possible, but I don't see how that solves the problem. > >> The stack will still be misaligned by entry code. Or am I missing > >> something? > > > > I think the argument is that we *could* try to align things, if we > > just had some tool that actually then verified that we aren't missing > > anything. > > > > I'm not entirely happy with checking the generated code, though, > > because as Ingo says, you have a 50:50 chance of just getting it right > > by mistake. So I'd much rather have some static tool that checks > > things at a code level (ie coccinelle or sparse). > > What I meant was checking the entry code to see if it aligns stack > frames, and good luck getting sparse to do that. Hmm, getting 16-byte > alignment for real may actually be entirely a lost cause. After all, > I think we have some inline functions that do asm volatile ("call > ..."), and I don't see any credible way of forcing alignment short of > generating an entirely new stack frame and aligning that. Actually we already found all such cases and fixed them by forcing a new stack frame, thanks to objtool. For example, see 55a76b59b5fe. > Ick. This > whole situation stinks, and I wish that the gcc developers had been > less daft here in the first place or that we'd noticed and gotten it > fixed much longer ago. > > Can we come up with a macro like STACK_ALIGN_16 that turns into > __aligned__(32) on bad gcc versions and combine that with your sparse > patch? -- Josh ^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: x86-64: Maintain 16-byte stack alignment 2017-01-12 20:15 ` Josh Poimboeuf @ 2017-01-12 20:55 ` Josh Poimboeuf 2017-01-12 21:40 ` Linus Torvalds 2017-01-13 1:46 ` Andy Lutomirski 1 sibling, 1 reply; 54+ messages in thread From: Josh Poimboeuf @ 2017-01-12 20:55 UTC (permalink / raw) To: Andy Lutomirski Cc: Linus Torvalds, Herbert Xu, Linux Kernel Mailing List, Linux Crypto Mailing List, Ingo Molnar, Thomas Gleixner, Andy Lutomirski, Ard Biesheuvel On Thu, Jan 12, 2017 at 02:15:11PM -0600, Josh Poimboeuf wrote: > On Thu, Jan 12, 2017 at 12:08:07PM -0800, Andy Lutomirski wrote: > > On Thu, Jan 12, 2017 at 11:51 AM, Linus Torvalds > > <torvalds@linux-foundation.org> wrote: > > > On Thu, Jan 12, 2017 at 6:02 AM, Josh Poimboeuf <jpoimboe@redhat.com> wrote: > > >> > > >> Just to clarify, I think you're asking if, for versions of gcc which > > >> don't support -mpreferred-stack-boundary=3, objtool can analyze all C > > >> functions to ensure their stacks are 16-byte aligned. > > >> > > >> It's certainly possible, but I don't see how that solves the problem. > > >> The stack will still be misaligned by entry code. Or am I missing > > >> something? > > > > > > I think the argument is that we *could* try to align things, if we > > > just had some tool that actually then verified that we aren't missing > > > anything. > > > > > > I'm not entirely happy with checking the generated code, though, > > > because as Ingo says, you have a 50:50 chance of just getting it right > > > by mistake. So I'd much rather have some static tool that checks > > > things at a code level (ie coccinelle or sparse). > > > > What I meant was checking the entry code to see if it aligns stack > > frames, and good luck getting sparse to do that. Hmm, getting 16-byte > > alignment for real may actually be entirely a lost cause. After all, > > I think we have some inline functions that do asm volatile ("call > > ..."), and I don't see any credible way of forcing alignment short of > > generating an entirely new stack frame and aligning that. > > Actually we already found all such cases and fixed them by forcing a new > stack frame, thanks to objtool. For example, see 55a76b59b5fe. > > > Ick. This > > whole situation stinks, and I wish that the gcc developers had been > > less daft here in the first place or that we'd noticed and gotten it > > fixed much longer ago. > > > > Can we come up with a macro like STACK_ALIGN_16 that turns into > > __aligned__(32) on bad gcc versions and combine that with your sparse > > patch? This could work. Only concerns I'd have are: - Are there (or will there be in the future) any asm functions which assume a 16-byte aligned stack? (Seems unlikely. Stack alignment is common in the crypto code but they do the alignment manually.) - Who's going to run sparse all the time to catch unauthorized users of __aligned__(16)? -- Josh ^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: x86-64: Maintain 16-byte stack alignment 2017-01-12 20:55 ` Josh Poimboeuf @ 2017-01-12 21:40 ` Linus Torvalds 2017-01-13 8:38 ` Herbert Xu 0 siblings, 1 reply; 54+ messages in thread From: Linus Torvalds @ 2017-01-12 21:40 UTC (permalink / raw) To: Josh Poimboeuf Cc: Andy Lutomirski, Herbert Xu, Linux Kernel Mailing List, Linux Crypto Mailing List, Ingo Molnar, Thomas Gleixner, Andy Lutomirski, Ard Biesheuvel On Thu, Jan 12, 2017 at 12:55 PM, Josh Poimboeuf <jpoimboe@redhat.com> wrote: > > - Who's going to run sparse all the time to catch unauthorized users of > __aligned__(16)? Well, considering that we apparently only have a small handful of existing users without anybody having ever run any tool at all, I don't think this is necessarily a huge problem. One of the build servers could easily add the "make C=2" case to a build test, and just grep the error reports for the 'excessive alignment' string. The zero-day build bot already does much fancier things. So I don't think it would necessarily be all that hard to get a clean build, and just say "if you need aligned stack space, you have to do it yourself by hand". That saId, if we now always enable frame pointers on x86 (and it has gotten more and more difficult to avoid it), then the 16-byte alignment would fairly natural. The 8-byte alignment mainly makes sense when the basic call sequence just adds 8 bytes, and you have functions without frames (that still call other functions). Linus ^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: x86-64: Maintain 16-byte stack alignment 2017-01-12 21:40 ` Linus Torvalds @ 2017-01-13 8:38 ` Herbert Xu 0 siblings, 0 replies; 54+ messages in thread From: Herbert Xu @ 2017-01-13 8:38 UTC (permalink / raw) To: Linus Torvalds Cc: Josh Poimboeuf, Andy Lutomirski, Linux Kernel Mailing List, Linux Crypto Mailing List, Ingo Molnar, Thomas Gleixner, Andy Lutomirski, Ard Biesheuvel On Thu, Jan 12, 2017 at 01:40:54PM -0800, Linus Torvalds wrote: > > The 8-byte alignment mainly makes sense when the basic call sequence > just adds 8 bytes, and you have functions without frames (that still > call other functions). The question is does it really make sense to save those 8 bytes of padding on x86-64 when arm64 apparently also requires 16-byte stack alignment. Cheers, -- Email: Herbert Xu <herbert@gondor.apana.org.au> Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt ^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: x86-64: Maintain 16-byte stack alignment 2017-01-12 20:15 ` Josh Poimboeuf 2017-01-12 20:55 ` Josh Poimboeuf @ 2017-01-13 1:46 ` Andy Lutomirski 2017-01-13 3:11 ` Josh Poimboeuf 2017-01-13 8:39 ` Herbert Xu 1 sibling, 2 replies; 54+ messages in thread From: Andy Lutomirski @ 2017-01-13 1:46 UTC (permalink / raw) To: Josh Poimboeuf Cc: Linus Torvalds, Herbert Xu, Linux Kernel Mailing List, Linux Crypto Mailing List, Ingo Molnar, Thomas Gleixner, Andy Lutomirski, Ard Biesheuvel On Thu, Jan 12, 2017 at 12:15 PM, Josh Poimboeuf <jpoimboe@redhat.com> wrote: > On Thu, Jan 12, 2017 at 12:08:07PM -0800, Andy Lutomirski wrote: >> On Thu, Jan 12, 2017 at 11:51 AM, Linus Torvalds >> <torvalds@linux-foundation.org> wrote: >> > On Thu, Jan 12, 2017 at 6:02 AM, Josh Poimboeuf <jpoimboe@redhat.com> wrote: >> >> >> >> Just to clarify, I think you're asking if, for versions of gcc which >> >> don't support -mpreferred-stack-boundary=3, objtool can analyze all C >> >> functions to ensure their stacks are 16-byte aligned. >> >> >> >> It's certainly possible, but I don't see how that solves the problem. >> >> The stack will still be misaligned by entry code. Or am I missing >> >> something? >> > >> > I think the argument is that we *could* try to align things, if we >> > just had some tool that actually then verified that we aren't missing >> > anything. >> > >> > I'm not entirely happy with checking the generated code, though, >> > because as Ingo says, you have a 50:50 chance of just getting it right >> > by mistake. So I'd much rather have some static tool that checks >> > things at a code level (ie coccinelle or sparse). >> >> What I meant was checking the entry code to see if it aligns stack >> frames, and good luck getting sparse to do that. Hmm, getting 16-byte >> alignment for real may actually be entirely a lost cause. After all, >> I think we have some inline functions that do asm volatile ("call >> ..."), and I don't see any credible way of forcing alignment short of >> generating an entirely new stack frame and aligning that. > > Actually we already found all such cases and fixed them by forcing a new > stack frame, thanks to objtool. For example, see 55a76b59b5fe. What I mean is: what guarantees that the stack is properly aligned for the subroutine call? gcc promises to set up a stack frame, but does it promise that rsp will be properly aligned to call a C function? ^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: x86-64: Maintain 16-byte stack alignment 2017-01-13 1:46 ` Andy Lutomirski @ 2017-01-13 3:11 ` Josh Poimboeuf 2017-01-13 3:23 ` Andy Lutomirski 2017-01-13 8:39 ` Herbert Xu 1 sibling, 1 reply; 54+ messages in thread From: Josh Poimboeuf @ 2017-01-13 3:11 UTC (permalink / raw) To: Andy Lutomirski Cc: Linus Torvalds, Herbert Xu, Linux Kernel Mailing List, Linux Crypto Mailing List, Ingo Molnar, Thomas Gleixner, Andy Lutomirski, Ard Biesheuvel On Thu, Jan 12, 2017 at 05:46:55PM -0800, Andy Lutomirski wrote: > On Thu, Jan 12, 2017 at 12:15 PM, Josh Poimboeuf <jpoimboe@redhat.com> wrote: > > On Thu, Jan 12, 2017 at 12:08:07PM -0800, Andy Lutomirski wrote: > >> On Thu, Jan 12, 2017 at 11:51 AM, Linus Torvalds > >> <torvalds@linux-foundation.org> wrote: > >> > On Thu, Jan 12, 2017 at 6:02 AM, Josh Poimboeuf <jpoimboe@redhat.com> wrote: > >> >> > >> >> Just to clarify, I think you're asking if, for versions of gcc which > >> >> don't support -mpreferred-stack-boundary=3, objtool can analyze all C > >> >> functions to ensure their stacks are 16-byte aligned. > >> >> > >> >> It's certainly possible, but I don't see how that solves the problem. > >> >> The stack will still be misaligned by entry code. Or am I missing > >> >> something? > >> > > >> > I think the argument is that we *could* try to align things, if we > >> > just had some tool that actually then verified that we aren't missing > >> > anything. > >> > > >> > I'm not entirely happy with checking the generated code, though, > >> > because as Ingo says, you have a 50:50 chance of just getting it right > >> > by mistake. So I'd much rather have some static tool that checks > >> > things at a code level (ie coccinelle or sparse). > >> > >> What I meant was checking the entry code to see if it aligns stack > >> frames, and good luck getting sparse to do that. Hmm, getting 16-byte > >> alignment for real may actually be entirely a lost cause. After all, > >> I think we have some inline functions that do asm volatile ("call > >> ..."), and I don't see any credible way of forcing alignment short of > >> generating an entirely new stack frame and aligning that. > > > > Actually we already found all such cases and fixed them by forcing a new > > stack frame, thanks to objtool. For example, see 55a76b59b5fe. > > What I mean is: what guarantees that the stack is properly aligned for > the subroutine call? gcc promises to set up a stack frame, but does > it promise that rsp will be properly aligned to call a C function? Yes, I did an experiment and you're right. I had naively assumed that all stack frames would be aligned. -- Josh ^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: x86-64: Maintain 16-byte stack alignment 2017-01-13 3:11 ` Josh Poimboeuf @ 2017-01-13 3:23 ` Andy Lutomirski 2017-01-13 4:27 ` Josh Poimboeuf 0 siblings, 1 reply; 54+ messages in thread From: Andy Lutomirski @ 2017-01-13 3:23 UTC (permalink / raw) To: Josh Poimboeuf Cc: Linus Torvalds, Herbert Xu, Linux Kernel Mailing List, Linux Crypto Mailing List, Ingo Molnar, Thomas Gleixner, Andy Lutomirski, Ard Biesheuvel On Thu, Jan 12, 2017 at 7:11 PM, Josh Poimboeuf <jpoimboe@redhat.com> wrote: > On Thu, Jan 12, 2017 at 05:46:55PM -0800, Andy Lutomirski wrote: >> On Thu, Jan 12, 2017 at 12:15 PM, Josh Poimboeuf <jpoimboe@redhat.com> wrote: >> > On Thu, Jan 12, 2017 at 12:08:07PM -0800, Andy Lutomirski wrote: >> >> On Thu, Jan 12, 2017 at 11:51 AM, Linus Torvalds >> >> <torvalds@linux-foundation.org> wrote: >> >> > On Thu, Jan 12, 2017 at 6:02 AM, Josh Poimboeuf <jpoimboe@redhat.com> wrote: >> >> >> >> >> >> Just to clarify, I think you're asking if, for versions of gcc which >> >> >> don't support -mpreferred-stack-boundary=3, objtool can analyze all C >> >> >> functions to ensure their stacks are 16-byte aligned. >> >> >> >> >> >> It's certainly possible, but I don't see how that solves the problem. >> >> >> The stack will still be misaligned by entry code. Or am I missing >> >> >> something? >> >> > >> >> > I think the argument is that we *could* try to align things, if we >> >> > just had some tool that actually then verified that we aren't missing >> >> > anything. >> >> > >> >> > I'm not entirely happy with checking the generated code, though, >> >> > because as Ingo says, you have a 50:50 chance of just getting it right >> >> > by mistake. So I'd much rather have some static tool that checks >> >> > things at a code level (ie coccinelle or sparse). >> >> >> >> What I meant was checking the entry code to see if it aligns stack >> >> frames, and good luck getting sparse to do that. Hmm, getting 16-byte >> >> alignment for real may actually be entirely a lost cause. After all, >> >> I think we have some inline functions that do asm volatile ("call >> >> ..."), and I don't see any credible way of forcing alignment short of >> >> generating an entirely new stack frame and aligning that. >> > >> > Actually we already found all such cases and fixed them by forcing a new >> > stack frame, thanks to objtool. For example, see 55a76b59b5fe. >> >> What I mean is: what guarantees that the stack is properly aligned for >> the subroutine call? gcc promises to set up a stack frame, but does >> it promise that rsp will be properly aligned to call a C function? > > Yes, I did an experiment and you're right. I had naively assumed that > all stack frames would be aligned. Just to check: did you do your experiment with -mpreferred-stack-boundary=4? --Andy ^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: x86-64: Maintain 16-byte stack alignment 2017-01-13 3:23 ` Andy Lutomirski @ 2017-01-13 4:27 ` Josh Poimboeuf [not found] ` <CA+55aFzRrSwGxxfZk-RUEnsz=xhcSmOwE1CenfCPBWtsS9MwDw@mail.gmail.com> 0 siblings, 1 reply; 54+ messages in thread From: Josh Poimboeuf @ 2017-01-13 4:27 UTC (permalink / raw) To: Andy Lutomirski Cc: Linus Torvalds, Herbert Xu, Linux Kernel Mailing List, Linux Crypto Mailing List, Ingo Molnar, Thomas Gleixner, Andy Lutomirski, Ard Biesheuvel On Thu, Jan 12, 2017 at 07:23:18PM -0800, Andy Lutomirski wrote: > On Thu, Jan 12, 2017 at 7:11 PM, Josh Poimboeuf <jpoimboe@redhat.com> wrote: > > On Thu, Jan 12, 2017 at 05:46:55PM -0800, Andy Lutomirski wrote: > >> On Thu, Jan 12, 2017 at 12:15 PM, Josh Poimboeuf <jpoimboe@redhat.com> wrote: > >> > On Thu, Jan 12, 2017 at 12:08:07PM -0800, Andy Lutomirski wrote: > >> >> On Thu, Jan 12, 2017 at 11:51 AM, Linus Torvalds > >> >> <torvalds@linux-foundation.org> wrote: > >> >> > On Thu, Jan 12, 2017 at 6:02 AM, Josh Poimboeuf <jpoimboe@redhat.com> wrote: > >> >> >> > >> >> >> Just to clarify, I think you're asking if, for versions of gcc which > >> >> >> don't support -mpreferred-stack-boundary=3, objtool can analyze all C > >> >> >> functions to ensure their stacks are 16-byte aligned. > >> >> >> > >> >> >> It's certainly possible, but I don't see how that solves the problem. > >> >> >> The stack will still be misaligned by entry code. Or am I missing > >> >> >> something? > >> >> > > >> >> > I think the argument is that we *could* try to align things, if we > >> >> > just had some tool that actually then verified that we aren't missing > >> >> > anything. > >> >> > > >> >> > I'm not entirely happy with checking the generated code, though, > >> >> > because as Ingo says, you have a 50:50 chance of just getting it right > >> >> > by mistake. So I'd much rather have some static tool that checks > >> >> > things at a code level (ie coccinelle or sparse). > >> >> > >> >> What I meant was checking the entry code to see if it aligns stack > >> >> frames, and good luck getting sparse to do that. Hmm, getting 16-byte > >> >> alignment for real may actually be entirely a lost cause. After all, > >> >> I think we have some inline functions that do asm volatile ("call > >> >> ..."), and I don't see any credible way of forcing alignment short of > >> >> generating an entirely new stack frame and aligning that. > >> > > >> > Actually we already found all such cases and fixed them by forcing a new > >> > stack frame, thanks to objtool. For example, see 55a76b59b5fe. > >> > >> What I mean is: what guarantees that the stack is properly aligned for > >> the subroutine call? gcc promises to set up a stack frame, but does > >> it promise that rsp will be properly aligned to call a C function? > > > > Yes, I did an experiment and you're right. I had naively assumed that > > all stack frames would be aligned. > > Just to check: did you do your experiment with -mpreferred-stack-boundary=4? Yes, but it's too late for me to be doing hard stuff and I think my first experiment was bogus. I didn't use all the other kernel-specific gcc options. I tried again with all the kernel gcc options, except with -mpreferred-stack-boundary=4 instead of 3, and actually came up with the opposite conclusion. I used the following code: void otherfunc(void); static inline void bar(long *f) { asm volatile("call otherfunc" : : "m" (f) : ); } void foo(void) { long buf[3] = {0, 0, 0}; bar(buf); } The stack frame was always 16-byte aligned regardless of whether the buf array size was even or odd. So my half-asleep brain is telling me that my original assumption was right. -- Josh ^ permalink raw reply [flat|nested] 54+ messages in thread
[parent not found: <CA+55aFzRrSwGxxfZk-RUEnsz=xhcSmOwE1CenfCPBWtsS9MwDw@mail.gmail.com>]
* Re: x86-64: Maintain 16-byte stack alignment [not found] ` <CA+55aFzRrSwGxxfZk-RUEnsz=xhcSmOwE1CenfCPBWtsS9MwDw@mail.gmail.com> @ 2017-01-13 5:07 ` Josh Poimboeuf 2017-01-13 8:43 ` Herbert Xu 2017-01-13 8:42 ` Herbert Xu 1 sibling, 1 reply; 54+ messages in thread From: Josh Poimboeuf @ 2017-01-13 5:07 UTC (permalink / raw) To: Linus Torvalds Cc: Linux Crypto Mailing List, Thomas Gleixner, Herbert Xu, Andy Lutomirski, Ingo Molnar, Andy Lutomirski, Linux Kernel Mailing List, Ard Biesheuvel On Thu, Jan 12, 2017 at 08:37:18PM -0800, Linus Torvalds wrote: > On Jan 12, 2017 8:28 PM, "Josh Poimboeuf" <jpoimboe@redhat.com> wrote: > > > The stack frame was always 16-byte aligned regardless of whether the > buf array size was even or odd. > > > Including with -fomit-frame-pointer? > > With frame pointers, stack frames really are naturally 16 bytes, and then > keeping the frame 16-byte aligned is just a matter of making any extra > frame allocations or push/pop sequences that you do also be a multiple of > 16 bytes. > > But *without* frame pointers, the"native" frame size is just 8 bytes, and a > function that doesn't need any other local storage and then calls another > function (think various trivial wrapper functions that just add an argument > and then munge the return value) would thus naturally cause the frame to > become misaligned. > > So then the compiler actually needs to start adding useless instructions > just to keep the stack 16-byte aligned. Disabling frame pointers didn't seem to help, but I finally got it to misalign with a different test case. I think it had been aligning the array, so instead I made it push a register. void otherfunc(void); static inline void bar(int f) { register void *__sp asm(_ASM_SP); asm volatile("call otherfunc" : "+r" (__sp) : "b"(f)); } void foo(void) { bar(5); } 00000000000020f0 <foo>: 20f0: 55 push %rbp 20f1: 48 89 e5 mov %rsp,%rbp 20f4: 53 push %rbx 20f5: bb 05 00 00 00 mov $0x5,%ebx 20fa: e8 00 00 00 00 callq 20ff <foo+0xf> 20fb: R_X86_64_PC32 otherfunc-0x4 20ff: 5b pop %rbx 2100: 5d pop %rbp 2101: c3 retq 2102: 0f 1f 40 00 nopl 0x0(%rax) 2106: 66 2e 0f 1f 84 00 00 nopw %cs:0x0(%rax,%rax,1) 210d: 00 00 00 -- Josh ^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: x86-64: Maintain 16-byte stack alignment 2017-01-13 5:07 ` Josh Poimboeuf @ 2017-01-13 8:43 ` Herbert Xu 0 siblings, 0 replies; 54+ messages in thread From: Herbert Xu @ 2017-01-13 8:43 UTC (permalink / raw) To: Josh Poimboeuf Cc: Linus Torvalds, Linux Crypto Mailing List, Thomas Gleixner, Andy Lutomirski, Ingo Molnar, Andy Lutomirski, Linux Kernel Mailing List, Ard Biesheuvel On Thu, Jan 12, 2017 at 11:07:09PM -0600, Josh Poimboeuf wrote: > > Disabling frame pointers didn't seem to help, but I finally got it to > misalign with a different test case. I think it had been aligning the > array, so instead I made it push a register. Right. If you start manipulating the stack directly then all bets are off. Cheers, -- Email: Herbert Xu <herbert@gondor.apana.org.au> Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt ^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: x86-64: Maintain 16-byte stack alignment [not found] ` <CA+55aFzRrSwGxxfZk-RUEnsz=xhcSmOwE1CenfCPBWtsS9MwDw@mail.gmail.com> 2017-01-13 5:07 ` Josh Poimboeuf @ 2017-01-13 8:42 ` Herbert Xu 1 sibling, 0 replies; 54+ messages in thread From: Herbert Xu @ 2017-01-13 8:42 UTC (permalink / raw) To: Linus Torvalds Cc: Josh Poimboeuf, Linux Crypto Mailing List, Thomas Gleixner, Andy Lutomirski, Ingo Molnar, Andy Lutomirski, Linux Kernel Mailing List, Ard Biesheuvel On Thu, Jan 12, 2017 at 08:37:18PM -0800, Linus Torvalds wrote: > > So then the compiler actually needs to start adding useless instructions > just to keep the stack 16-byte aligned. Which it does. Of course most of the time no extra instructions are required because there are stack variables, so it's just matter of adding 8 to the value you're subtracting from rsp. But it is probably why gcc assumes that the stack is 16-byte aligned which triggered my original crash. Here is an example from the function that was involved in the crash, without frame pointers: 00000000000001b0 <chacha20_simd>: 1b0: 41 54 push %r12 1b2: 55 push %rbp 1b3: 48 81 ec f8 00 00 00 sub $0xf8,%rsp Cheers, -- Email: Herbert Xu <herbert@gondor.apana.org.au> Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt ^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: x86-64: Maintain 16-byte stack alignment 2017-01-13 1:46 ` Andy Lutomirski 2017-01-13 3:11 ` Josh Poimboeuf @ 2017-01-13 8:39 ` Herbert Xu 1 sibling, 0 replies; 54+ messages in thread From: Herbert Xu @ 2017-01-13 8:39 UTC (permalink / raw) To: Andy Lutomirski Cc: Josh Poimboeuf, Linus Torvalds, Linux Kernel Mailing List, Linux Crypto Mailing List, Ingo Molnar, Thomas Gleixner, Andy Lutomirski, Ard Biesheuvel On Thu, Jan 12, 2017 at 05:46:55PM -0800, Andy Lutomirski wrote: > > What I mean is: what guarantees that the stack is properly aligned for > the subroutine call? gcc promises to set up a stack frame, but does > it promise that rsp will be properly aligned to call a C function? Yes, as long as you don't go behind its back and start directly pushing things onto the stack with inline asm. Cheers, -- Email: Herbert Xu <herbert@gondor.apana.org.au> Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt ^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: x86-64: Maintain 16-byte stack alignment 2017-01-12 20:08 ` Andy Lutomirski 2017-01-12 20:15 ` Josh Poimboeuf @ 2017-01-13 8:36 ` Herbert Xu 2017-01-13 13:07 ` Josh Poimboeuf 1 sibling, 1 reply; 54+ messages in thread From: Herbert Xu @ 2017-01-13 8:36 UTC (permalink / raw) To: Andy Lutomirski Cc: Linus Torvalds, Josh Poimboeuf, Linux Kernel Mailing List, Linux Crypto Mailing List, Ingo Molnar, Thomas Gleixner, Andy Lutomirski, Ard Biesheuvel On Thu, Jan 12, 2017 at 12:08:07PM -0800, Andy Lutomirski wrote: > > I think we have some inline functions that do asm volatile ("call > ..."), and I don't see any credible way of forcing alignment short of > generating an entirely new stack frame and aligning that. Ick. This A straight asm call from C should always work because gcc keeps the stack aligned in the prologue. The only problem with inline assembly is when you start pushing things onto the stack directly. Cheers, -- Email: Herbert Xu <herbert@gondor.apana.org.au> Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt ^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: x86-64: Maintain 16-byte stack alignment 2017-01-13 8:36 ` Herbert Xu @ 2017-01-13 13:07 ` Josh Poimboeuf 0 siblings, 0 replies; 54+ messages in thread From: Josh Poimboeuf @ 2017-01-13 13:07 UTC (permalink / raw) To: Herbert Xu Cc: Andy Lutomirski, Linus Torvalds, Linux Kernel Mailing List, Linux Crypto Mailing List, Ingo Molnar, Thomas Gleixner, Andy Lutomirski, Ard Biesheuvel On Fri, Jan 13, 2017 at 04:36:48PM +0800, Herbert Xu wrote: > On Thu, Jan 12, 2017 at 12:08:07PM -0800, Andy Lutomirski wrote: > > > > I think we have some inline functions that do asm volatile ("call > > ..."), and I don't see any credible way of forcing alignment short of > > generating an entirely new stack frame and aligning that. Ick. This > > A straight asm call from C should always work because gcc keeps > the stack aligned in the prologue. > > The only problem with inline assembly is when you start pushing > things onto the stack directly. I tried another approach. I rebuilt the kernel with -mpreferred-stack-boundary=4 and used awk (poor man's objtool) to find all leaf functions with misaligned stacks. objdump -d ~/k/vmlinux | awk '/>:/ { f=$2; call=0; push=0 } /fentry/ { next } /callq/ { call=1 } /push/ { push=!push } /sub.*8,%rsp/ { push=!push } /^$/ && call == 0 && push == 0 { print f }' It found a lot of functions. Here's one of them: ffffffff814ab450 <mpihelp_add_n>: ffffffff814ab450: 55 push %rbp ffffffff814ab451: f7 d9 neg %ecx ffffffff814ab453: 31 c0 xor %eax,%eax ffffffff814ab455: 4c 63 c1 movslq %ecx,%r8 ffffffff814ab458: 48 89 e5 mov %rsp,%rbp ffffffff814ab45b: 53 push %rbx ffffffff814ab45c: 4a 8d 1c c5 00 00 00 lea 0x0(,%r8,8),%rbx ffffffff814ab463: 00 ffffffff814ab464: eb 03 jmp ffffffff814ab469 <mpihelp_add_n+0x19> ffffffff814ab466: 4c 63 c1 movslq %ecx,%r8 ffffffff814ab469: 49 c1 e0 03 shl $0x3,%r8 ffffffff814ab46d: 45 31 c9 xor %r9d,%r9d ffffffff814ab470: 49 29 d8 sub %rbx,%r8 ffffffff814ab473: 4a 03 04 02 add (%rdx,%r8,1),%rax ffffffff814ab477: 41 0f 92 c1 setb %r9b ffffffff814ab47b: 4a 03 04 06 add (%rsi,%r8,1),%rax ffffffff814ab47f: 41 0f 92 c2 setb %r10b ffffffff814ab483: 49 89 c3 mov %rax,%r11 ffffffff814ab486: 83 c1 01 add $0x1,%ecx ffffffff814ab489: 45 0f b6 d2 movzbl %r10b,%r10d ffffffff814ab48d: 4e 89 1c 07 mov %r11,(%rdi,%r8,1) ffffffff814ab491: 4b 8d 04 0a lea (%r10,%r9,1),%rax ffffffff814ab495: 75 cf jne ffffffff814ab466 <mpihelp_add_n+0x16> ffffffff814ab497: 5b pop %rbx ffffffff814ab498: 5d pop %rbp ffffffff814ab499: c3 retq ffffffff814ab49a: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1) That's a leaf function which, as far as I can tell, doesn't use any inline asm, but its prologue produces a misaligned stack. I added inline asm with a call instruction and no operands or clobbers, and got the same result. So Andy's theory seems to be correct. As long as we allow calls from inline asm, we can't rely on aligned stacks. -- Josh ^ permalink raw reply [flat|nested] 54+ messages in thread
[parent not found: <CA+55aFw+Z_ieo6DzTVB6_-TvQ0jj60s=T0mvXfqkBVFdKFPw_Q@mail.gmail.com>]
* Re: x86-64: Maintain 16-byte stack alignment [not found] ` <CA+55aFw+Z_ieo6DzTVB6_-TvQ0jj60s=T0mvXfqkBVFdKFPw_Q@mail.gmail.com> @ 2017-01-11 8:06 ` Ard Biesheuvel 2017-01-11 8:09 ` Herbert Xu 0 siblings, 1 reply; 54+ messages in thread From: Ard Biesheuvel @ 2017-01-11 8:06 UTC (permalink / raw) To: Linus Torvalds Cc: Herbert Xu, Andrew Lutomirski, Linux Crypto Mailing List, Thomas Gleixner, Ingo Molnar, Linux Kernel Mailing List On 11 January 2017 at 06:53, Linus Torvalds <torvalds@linux-foundation.org> wrote: > > > On Jan 10, 2017 8:36 PM, "Herbert Xu" <herbert@gondor.apana.org.au> wrote: > > > Sure we can ban the use of attribute aligned on stacks. But > what about indirect uses through structures? > > > It should be pretty trivial to add a sparse warning for that, though. > Couldn't we update the __aligned(x) macro to emit 32 if arch == x86 and x == 16? All other cases should work just fine afaict ^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: x86-64: Maintain 16-byte stack alignment 2017-01-11 8:06 ` Ard Biesheuvel @ 2017-01-11 8:09 ` Herbert Xu 2017-01-11 18:20 ` Andy Lutomirski 0 siblings, 1 reply; 54+ messages in thread From: Herbert Xu @ 2017-01-11 8:09 UTC (permalink / raw) To: Ard Biesheuvel Cc: Linus Torvalds, Andrew Lutomirski, Linux Crypto Mailing List, Thomas Gleixner, Ingo Molnar, Linux Kernel Mailing List On Wed, Jan 11, 2017 at 08:06:54AM +0000, Ard Biesheuvel wrote: > > Couldn't we update the __aligned(x) macro to emit 32 if arch == x86 > and x == 16? All other cases should work just fine afaict Not everyone uses that macro. You'd also need to add some checks to stop people from using the gcc __attribute__ directly. Cheers, -- Email: Herbert Xu <herbert@gondor.apana.org.au> Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt ^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: x86-64: Maintain 16-byte stack alignment 2017-01-11 8:09 ` Herbert Xu @ 2017-01-11 18:20 ` Andy Lutomirski 0 siblings, 0 replies; 54+ messages in thread From: Andy Lutomirski @ 2017-01-11 18:20 UTC (permalink / raw) To: Herbert Xu Cc: Ard Biesheuvel, Linus Torvalds, Andrew Lutomirski, Linux Crypto Mailing List, Thomas Gleixner, Ingo Molnar, Linux Kernel Mailing List On Wed, Jan 11, 2017 at 12:09 AM, Herbert Xu <herbert@gondor.apana.org.au> wrote: > On Wed, Jan 11, 2017 at 08:06:54AM +0000, Ard Biesheuvel wrote: >> >> Couldn't we update the __aligned(x) macro to emit 32 if arch == x86 >> and x == 16? All other cases should work just fine afaict > > Not everyone uses that macro. You'd also need to add some checks > to stop people from using the gcc __attribute__ directly. > You'd also have to audit things to make sure that __aligned__(16) isn't being used for non-stack purposes. After all, __aligned__(16) in static data is fine, and it's also fine as a promise to GCC that some object is 16-byte aligned. --Andy ^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: x86-64: Maintain 16-byte stack alignment 2017-01-10 17:05 ` Linus Torvalds 2017-01-10 17:09 ` Andy Lutomirski 2017-01-11 3:11 ` Herbert Xu @ 2017-01-12 7:05 ` Herbert Xu 2017-01-12 7:46 ` Ingo Molnar 2017-01-12 7:51 ` Andy Lutomirski 2 siblings, 2 replies; 54+ messages in thread From: Herbert Xu @ 2017-01-12 7:05 UTC (permalink / raw) To: Linus Torvalds Cc: Linux Kernel Mailing List, Linux Crypto Mailing List, Ingo Molnar, Thomas Gleixner, Andy Lutomirski, Ard Biesheuvel On Tue, Jan 10, 2017 at 09:05:28AM -0800, Linus Torvalds wrote: > > I'm pretty sure we have random asm code that may not maintain a > 16-byte stack alignment when it calls other code (including, in some > cases, calling C code). > > So I'm not at all convinced that this is a good idea. We shouldn't > expect 16-byte alignment to be something trustworthy. So what if we audited all the x86 assembly code to fix this? Would it then be acceptable to do a 16-byte aligned stack? On the face of it it doesn't seem to be a huge amount of code assuming they mostly live under arch/x86. Cheers, -- Email: Herbert Xu <herbert@gondor.apana.org.au> Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt ^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: x86-64: Maintain 16-byte stack alignment 2017-01-12 7:05 ` Herbert Xu @ 2017-01-12 7:46 ` Ingo Molnar 2017-01-12 14:49 ` Josh Poimboeuf 2017-01-12 7:51 ` Andy Lutomirski 1 sibling, 1 reply; 54+ messages in thread From: Ingo Molnar @ 2017-01-12 7:46 UTC (permalink / raw) To: Herbert Xu, Josh Poimboeuf Cc: Linus Torvalds, Linux Kernel Mailing List, Linux Crypto Mailing List, Thomas Gleixner, Andy Lutomirski, Ard Biesheuvel * Herbert Xu <herbert@gondor.apana.org.au> wrote: > On Tue, Jan 10, 2017 at 09:05:28AM -0800, Linus Torvalds wrote: > > > > I'm pretty sure we have random asm code that may not maintain a > > 16-byte stack alignment when it calls other code (including, in some > > cases, calling C code). > > > > So I'm not at all convinced that this is a good idea. We shouldn't > > expect 16-byte alignment to be something trustworthy. > > So what if we audited all the x86 assembly code to fix this? Would > it then be acceptable to do a 16-byte aligned stack? Audits for small but deadly details that isn't checked automatically by tooling would inevitably bitrot again - and in this particular case there's a 50% chance that a new, buggy change would test out to be 'fine' on a kernel developer's own box - and break on different configs, different hw or with unrelated (and innocent) kernel changes, sometime later - spreading the pain unnecessarily. So my feeling is that we really need improved tooling for this (and yes, the GCC toolchain should have handled this correctly). But fortunately we have related tooling in the kernel: could objtool handle this? My secret hope was always that objtool would grow into a kind of life insurance against toolchain bogosities (which is a must for things like livepatching or a DWARF unwinder - but I digress). Thanks, Ingo ^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: x86-64: Maintain 16-byte stack alignment 2017-01-12 7:46 ` Ingo Molnar @ 2017-01-12 14:49 ` Josh Poimboeuf 0 siblings, 0 replies; 54+ messages in thread From: Josh Poimboeuf @ 2017-01-12 14:49 UTC (permalink / raw) To: Ingo Molnar Cc: Herbert Xu, Linus Torvalds, Linux Kernel Mailing List, Linux Crypto Mailing List, Thomas Gleixner, Andy Lutomirski, Ard Biesheuvel On Thu, Jan 12, 2017 at 08:46:01AM +0100, Ingo Molnar wrote: > > * Herbert Xu <herbert@gondor.apana.org.au> wrote: > > > On Tue, Jan 10, 2017 at 09:05:28AM -0800, Linus Torvalds wrote: > > > > > > I'm pretty sure we have random asm code that may not maintain a > > > 16-byte stack alignment when it calls other code (including, in some > > > cases, calling C code). > > > > > > So I'm not at all convinced that this is a good idea. We shouldn't > > > expect 16-byte alignment to be something trustworthy. > > > > So what if we audited all the x86 assembly code to fix this? Would > > it then be acceptable to do a 16-byte aligned stack? > > Audits for small but deadly details that isn't checked automatically by tooling > would inevitably bitrot again - and in this particular case there's a 50% chance > that a new, buggy change would test out to be 'fine' on a kernel developer's own > box - and break on different configs, different hw or with unrelated (and > innocent) kernel changes, sometime later - spreading the pain unnecessarily. > > So my feeling is that we really need improved tooling for this (and yes, the GCC > toolchain should have handled this correctly). > > But fortunately we have related tooling in the kernel: could objtool handle this? > My secret hope was always that objtool would grow into a kind of life insurance > against toolchain bogosities (which is a must for things like livepatching or a > DWARF unwinder - but I digress). Are we talking about entry code, or other asm code? Because objtool audits *most* asm code, but entry code is way too specialized for objtool to understand. (I do have a pending objtool rewrite which would make it very easy to ensure 16-byte stack alignment. But again, objtool can only understand callable C or asm functions, not entry code.) Another approach would be to solve this problem with unwinder warnings, *if* there's enough test coverage. I recently made some changes to try to standardize the "end" of the stack, so that the stack pointer is always a certain value before calling into C code. I also added some warnings to the unwinder to ensure that it always reaches that point on the stack. So if the "end" of the stack were adjusted by a word by adding padding to pt_regs, the unwinder warnings could help preserve that. We could take that a step further by adding an unwinder check to ensure that *every* frame is 16-byte aligned if -mpreferred-stack-boundary=3 isn't used. Yet another step would be to add a debug feature which does stack sanity checking from a periodic NMI, to flush out these unwinder warnings. (Though I've found that current 0-day and fuzzing efforts, combined with lockdep and perf's frequent unwinder usage, are already doing a great job at flushing out unwinder warnings.) The only question is if there would be enough test coverage, particularly with those versions of gcc which don't have -mpreferred-stack-boundary=3. -- Josh ^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: x86-64: Maintain 16-byte stack alignment 2017-01-12 7:05 ` Herbert Xu 2017-01-12 7:46 ` Ingo Molnar @ 2017-01-12 7:51 ` Andy Lutomirski 2017-01-12 8:04 ` Herbert Xu 2017-01-12 15:03 ` Josh Poimboeuf 1 sibling, 2 replies; 54+ messages in thread From: Andy Lutomirski @ 2017-01-12 7:51 UTC (permalink / raw) To: Herbert Xu Cc: Linus Torvalds, Linux Kernel Mailing List, Linux Crypto Mailing List, Ingo Molnar, Thomas Gleixner, Andy Lutomirski, Ard Biesheuvel On Wed, Jan 11, 2017 at 11:05 PM, Herbert Xu <herbert@gondor.apana.org.au> wrote: > On Tue, Jan 10, 2017 at 09:05:28AM -0800, Linus Torvalds wrote: >> >> I'm pretty sure we have random asm code that may not maintain a >> 16-byte stack alignment when it calls other code (including, in some >> cases, calling C code). >> >> So I'm not at all convinced that this is a good idea. We shouldn't >> expect 16-byte alignment to be something trustworthy. > > So what if we audited all the x86 assembly code to fix this? Would > it then be acceptable to do a 16-byte aligned stack? > > On the face of it it doesn't seem to be a huge amount of code > assuming they mostly live under arch/x86. The problem is that we have nasties like TRACE_IRQS_OFF. Performance doesn't really matter for these macros, so we could probably rig up a helper for forcibly align the stack there. Maybe FRAME_BEGIN_FORCE_ALIGN? I also think I'd rather not to modify pt_regs. We should just fix the small number of code paths that create a pt_regs and then call into C code to align the stack. But if we can't do this with automatic verification, then I'm not sure I want to do it at all. The asm is already more precarious than I'd like, and having a code path that is misaligned is asking for obscure bugs down the road. --Andy ^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: x86-64: Maintain 16-byte stack alignment 2017-01-12 7:51 ` Andy Lutomirski @ 2017-01-12 8:04 ` Herbert Xu 2017-01-12 8:18 ` Ingo Molnar 2017-01-12 15:03 ` Josh Poimboeuf 1 sibling, 1 reply; 54+ messages in thread From: Herbert Xu @ 2017-01-12 8:04 UTC (permalink / raw) To: Andy Lutomirski Cc: Linus Torvalds, Linux Kernel Mailing List, Linux Crypto Mailing List, Ingo Molnar, Thomas Gleixner, Andy Lutomirski, Ard Biesheuvel On Wed, Jan 11, 2017 at 11:51:10PM -0800, Andy Lutomirski wrote: > > The problem is that we have nasties like TRACE_IRQS_OFF. Performance I don't understand. What's the issue with TRACE_IRQS_OFF? It should be treated as any other function call. That is, enter it with an aligned stack, and the TRACE_IRQS_OFF code itself should ensure the stack stays aligned before it calls down into C. > But if we can't do this with automatic verification, then I'm not sure > I want to do it at all. The asm is already more precarious than I'd > like, and having a code path that is misaligned is asking for obscure > bugs down the road. I understand the need for automated checks at this point in time. But longer term this is just part of the calling ABI. After all, we don't add checks everywhere to ensure people preserve rbx. Cheers, -- Email: Herbert Xu <herbert@gondor.apana.org.au> Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt ^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: x86-64: Maintain 16-byte stack alignment 2017-01-12 8:04 ` Herbert Xu @ 2017-01-12 8:18 ` Ingo Molnar 0 siblings, 0 replies; 54+ messages in thread From: Ingo Molnar @ 2017-01-12 8:18 UTC (permalink / raw) To: Herbert Xu Cc: Andy Lutomirski, Linus Torvalds, Linux Kernel Mailing List, Linux Crypto Mailing List, Thomas Gleixner, Andy Lutomirski, Ard Biesheuvel * Herbert Xu <herbert@gondor.apana.org.au> wrote: > > But if we can't do this with automatic verification, then I'm not sure > > I want to do it at all. The asm is already more precarious than I'd > > like, and having a code path that is misaligned is asking for obscure > > bugs down the road. > > I understand the need for automated checks at this point in time. > But longer term this is just part of the calling ABI. After all, > we don't add checks everywhere to ensure people preserve rbx. The intelligent and responsible way to introduce such post facto ABI changes is via a smarter assembler: which would detect the obvious cases where assembly code generates a misaligned stack, at build time. Assembly code can obviously still mess up in a hard to detect fashion if it tries - but that's OK, as most assembly code doesn't try to go outside regular stack allocation patterns. Such a static check is relatively straightforward to do in assembly tooling - and perhaps objtool could do this too, as it already tracks the instructions that change the stack offset. ( And yes, this is what the GCC guys should have done, instead of sloppily introducing such silent breakages and making the whole application landscape less robust ... ) Thanks, Ingo ^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: x86-64: Maintain 16-byte stack alignment 2017-01-12 7:51 ` Andy Lutomirski 2017-01-12 8:04 ` Herbert Xu @ 2017-01-12 15:03 ` Josh Poimboeuf 2017-01-12 15:06 ` Herbert Xu 2017-01-12 15:10 ` Josh Poimboeuf 1 sibling, 2 replies; 54+ messages in thread From: Josh Poimboeuf @ 2017-01-12 15:03 UTC (permalink / raw) To: Andy Lutomirski Cc: Herbert Xu, Linus Torvalds, Linux Kernel Mailing List, Linux Crypto Mailing List, Ingo Molnar, Thomas Gleixner, Andy Lutomirski, Ard Biesheuvel On Wed, Jan 11, 2017 at 11:51:10PM -0800, Andy Lutomirski wrote: > On Wed, Jan 11, 2017 at 11:05 PM, Herbert Xu > <herbert@gondor.apana.org.au> wrote: > > On Tue, Jan 10, 2017 at 09:05:28AM -0800, Linus Torvalds wrote: > >> > >> I'm pretty sure we have random asm code that may not maintain a > >> 16-byte stack alignment when it calls other code (including, in some > >> cases, calling C code). > >> > >> So I'm not at all convinced that this is a good idea. We shouldn't > >> expect 16-byte alignment to be something trustworthy. > > > > So what if we audited all the x86 assembly code to fix this? Would > > it then be acceptable to do a 16-byte aligned stack? > > > > On the face of it it doesn't seem to be a huge amount of code > > assuming they mostly live under arch/x86. > > The problem is that we have nasties like TRACE_IRQS_OFF. Performance > doesn't really matter for these macros, so we could probably rig up a > helper for forcibly align the stack there. Maybe > FRAME_BEGIN_FORCE_ALIGN? I also think I'd rather not to modify > pt_regs. We should just fix the small number of code paths that > create a pt_regs and then call into C code to align the stack. > > But if we can't do this with automatic verification, then I'm not sure > I want to do it at all. The asm is already more precarious than I'd > like, and having a code path that is misaligned is asking for obscure > bugs down the road. For the entry code, could we just replace all calls with CALL_ALIGNED? That might be less intrusive than trying to adjust all the pt_regs accesses. Then to ensure that nobody ever uses 'call' directly: '#define call please-use-CALL-ALIGNED-instead-of-call' I think that would be possible if CALL_ALIGNED were a ".macro". -- Josh ^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: x86-64: Maintain 16-byte stack alignment 2017-01-12 15:03 ` Josh Poimboeuf @ 2017-01-12 15:06 ` Herbert Xu 2017-01-12 15:18 ` Josh Poimboeuf 2017-01-12 15:10 ` Josh Poimboeuf 1 sibling, 1 reply; 54+ messages in thread From: Herbert Xu @ 2017-01-12 15:06 UTC (permalink / raw) To: Josh Poimboeuf Cc: Andy Lutomirski, Linus Torvalds, Linux Kernel Mailing List, Linux Crypto Mailing List, Ingo Molnar, Thomas Gleixner, Andy Lutomirski, Ard Biesheuvel On Thu, Jan 12, 2017 at 09:03:18AM -0600, Josh Poimboeuf wrote: > > For the entry code, could we just replace all calls with CALL_ALIGNED? > That might be less intrusive than trying to adjust all the pt_regs > accesses. > > Then to ensure that nobody ever uses 'call' directly: > > '#define call please-use-CALL-ALIGNED-instead-of-call' > > I think that would be possible if CALL_ALIGNED were a ".macro". The trouble with that is that you have got things like TRACE_IRQS_OFF which are also used outside of the entry code. Cheers, -- Email: Herbert Xu <herbert@gondor.apana.org.au> Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt ^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: x86-64: Maintain 16-byte stack alignment 2017-01-12 15:06 ` Herbert Xu @ 2017-01-12 15:18 ` Josh Poimboeuf 0 siblings, 0 replies; 54+ messages in thread From: Josh Poimboeuf @ 2017-01-12 15:18 UTC (permalink / raw) To: Herbert Xu Cc: Andy Lutomirski, Linus Torvalds, Linux Kernel Mailing List, Linux Crypto Mailing List, Ingo Molnar, Thomas Gleixner, Andy Lutomirski, Ard Biesheuvel On Thu, Jan 12, 2017 at 11:06:50PM +0800, Herbert Xu wrote: > On Thu, Jan 12, 2017 at 09:03:18AM -0600, Josh Poimboeuf wrote: > > > > For the entry code, could we just replace all calls with CALL_ALIGNED? > > That might be less intrusive than trying to adjust all the pt_regs > > accesses. > > > > Then to ensure that nobody ever uses 'call' directly: > > > > '#define call please-use-CALL-ALIGNED-instead-of-call' > > > > I think that would be possible if CALL_ALIGNED were a ".macro". > > The trouble with that is that you have got things like TRACE_IRQS_OFF > which are also used outside of the entry code. Where? As far as I can tell, TRACE_IRQS_OFF is used exclusively by entry code. -- Josh ^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: x86-64: Maintain 16-byte stack alignment 2017-01-12 15:03 ` Josh Poimboeuf 2017-01-12 15:06 ` Herbert Xu @ 2017-01-12 15:10 ` Josh Poimboeuf 1 sibling, 0 replies; 54+ messages in thread From: Josh Poimboeuf @ 2017-01-12 15:10 UTC (permalink / raw) To: Andy Lutomirski Cc: Herbert Xu, Linus Torvalds, Linux Kernel Mailing List, Linux Crypto Mailing List, Ingo Molnar, Thomas Gleixner, Andy Lutomirski, Ard Biesheuvel On Thu, Jan 12, 2017 at 09:03:18AM -0600, Josh Poimboeuf wrote: > On Wed, Jan 11, 2017 at 11:51:10PM -0800, Andy Lutomirski wrote: > > On Wed, Jan 11, 2017 at 11:05 PM, Herbert Xu > > <herbert@gondor.apana.org.au> wrote: > > > On Tue, Jan 10, 2017 at 09:05:28AM -0800, Linus Torvalds wrote: > > >> > > >> I'm pretty sure we have random asm code that may not maintain a > > >> 16-byte stack alignment when it calls other code (including, in some > > >> cases, calling C code). > > >> > > >> So I'm not at all convinced that this is a good idea. We shouldn't > > >> expect 16-byte alignment to be something trustworthy. > > > > > > So what if we audited all the x86 assembly code to fix this? Would > > > it then be acceptable to do a 16-byte aligned stack? > > > > > > On the face of it it doesn't seem to be a huge amount of code > > > assuming they mostly live under arch/x86. > > > > The problem is that we have nasties like TRACE_IRQS_OFF. Performance > > doesn't really matter for these macros, so we could probably rig up a > > helper for forcibly align the stack there. Maybe > > FRAME_BEGIN_FORCE_ALIGN? I also think I'd rather not to modify > > pt_regs. We should just fix the small number of code paths that > > create a pt_regs and then call into C code to align the stack. > > > > But if we can't do this with automatic verification, then I'm not sure > > I want to do it at all. The asm is already more precarious than I'd > > like, and having a code path that is misaligned is asking for obscure > > bugs down the road. > > For the entry code, could we just replace all calls with CALL_ALIGNED? > That might be less intrusive than trying to adjust all the pt_regs > accesses. > > Then to ensure that nobody ever uses 'call' directly: > > '#define call please-use-CALL-ALIGNED-instead-of-call' > > I think that would be possible if CALL_ALIGNED were a ".macro". To clarify, CALL_ALIGNED could be (completely untested): .macro CALL_ALIGNED \func push %rbp movq %rsp, %rbp and $0xfffffffffffffff0,%rsp call \func movq %rbp, %rsp pop %rbp .endm -- Josh ^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: x86-64: Maintain 16-byte stack alignment 2017-01-10 14:33 x86-64: Maintain 16-byte stack alignment Herbert Xu 2017-01-10 14:39 ` Herbert Xu @ 2017-01-10 17:30 ` Ard Biesheuvel 2017-01-10 19:00 ` Andy Lutomirski ` (2 more replies) 1 sibling, 3 replies; 54+ messages in thread From: Ard Biesheuvel @ 2017-01-10 17:30 UTC (permalink / raw) To: Herbert Xu Cc: Linux Kernel Mailing List, Linux Crypto Mailing List, Linus Torvalds, Ingo Molnar, Thomas Gleixner, Andy Lutomirski On 10 January 2017 at 14:33, Herbert Xu <herbert@gondor.apana.org.au> wrote: > I recently applied the patch > > https://patchwork.kernel.org/patch/9468391/ > > and ended up with a boot crash when it tried to run the x86 chacha20 > code. It turned out that the patch changed a manually aligned > stack buffer to one that is aligned by gcc. What was happening was > that gcc can stack align to any value on x86-64 except 16. The > reason is that gcc assumes that the stack is always 16-byte aligned, > which is not actually the case in the kernel. > Apologies for introducing this breakage. It seemed like an obvious and simple cleanup, so I didn't even bother to mention it in the commit log, but if the kernel does not guarantee 16 byte alignment, I guess we should revert to the old method. If SSE instructions are the only ones that require this alignment, then I suppose not having a ABI conforming stack pointer should not be an issue in general. > The x86-64 CPU actually tries to keep the stack 16-byte aligned, > e.g., it'll do so when an IRQ comes in. So the reason it doesn't > work in the kernel mostly comes down to the fact that the struct > pt_regs which lives near the top of the stack is 168 bytes which > is not a multiple of 16. > > This patch tries to fix this by adding an 8-byte padding at the > top of the call-chain involving pt_regs so that when we call a C > function later we do so with an aligned stack. > > The same problem probably exists on i386 too since gcc also assumes > 16-byte alignment there. It's harder to fix however as the CPU > doesn't help us in the IRQ case. > > Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> > > diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h > index 05ed3d3..29d3bcb 100644 > --- a/arch/x86/entry/calling.h > +++ b/arch/x86/entry/calling.h > @@ -59,39 +59,42 @@ > /* > * C ABI says these regs are callee-preserved. They aren't saved on kernel entry > * unless syscall needs a complete, fully filled "struct pt_regs". > + * > + * Note we add 8 extra bytes at the beginning to preserve stack alignment. > */ > -#define R15 0*8 > -#define R14 1*8 > -#define R13 2*8 > -#define R12 3*8 > -#define RBP 4*8 > -#define RBX 5*8 > +#define R15 1*8 > +#define R14 2*8 > +#define R13 3*8 > +#define R12 4*8 > +#define RBP 5*8 > +#define RBX 6*8 > /* These regs are callee-clobbered. Always saved on kernel entry. */ > -#define R11 6*8 > -#define R10 7*8 > -#define R9 8*8 > -#define R8 9*8 > -#define RAX 10*8 > -#define RCX 11*8 > -#define RDX 12*8 > -#define RSI 13*8 > -#define RDI 14*8 > +#define R11 7*8 > +#define R10 8*8 > +#define R9 9*8 > +#define R8 10*8 > +#define RAX 11*8 > +#define RCX 12*8 > +#define RDX 13*8 > +#define RSI 14*8 > +#define RDI 15*8 > /* > * On syscall entry, this is syscall#. On CPU exception, this is error code. > * On hw interrupt, it's IRQ number: > */ > -#define ORIG_RAX 15*8 > +#define ORIG_RAX 16*8 > /* Return frame for iretq */ > -#define RIP 16*8 > -#define CS 17*8 > -#define EFLAGS 18*8 > -#define RSP 19*8 > -#define SS 20*8 > +#define RIP 17*8 > +#define CS 18*8 > +#define EFLAGS 19*8 > +#define RSP 20*8 > +#define SS 21*8 > > +/* Note that this excludes the 8-byte padding. */ > #define SIZEOF_PTREGS 21*8 > > .macro ALLOC_PT_GPREGS_ON_STACK > - addq $-(15*8), %rsp > + addq $-(16*8), %rsp > .endm > > .macro SAVE_C_REGS_HELPER offset=0 rax=1 rcx=1 r8910=1 r11=1 > @@ -114,7 +117,7 @@ > movq %rdi, 14*8+\offset(%rsp) > .endm > .macro SAVE_C_REGS offset=0 > - SAVE_C_REGS_HELPER \offset, 1, 1, 1, 1 > + SAVE_C_REGS_HELPER 8+\offset, 1, 1, 1, 1 > .endm > .macro SAVE_C_REGS_EXCEPT_RAX_RCX offset=0 > SAVE_C_REGS_HELPER \offset, 0, 0, 1, 1 > @@ -130,43 +133,43 @@ > .endm > > .macro SAVE_EXTRA_REGS offset=0 > - movq %r15, 0*8+\offset(%rsp) > - movq %r14, 1*8+\offset(%rsp) > - movq %r13, 2*8+\offset(%rsp) > - movq %r12, 3*8+\offset(%rsp) > - movq %rbp, 4*8+\offset(%rsp) > - movq %rbx, 5*8+\offset(%rsp) > + movq %r15, 1*8+\offset(%rsp) > + movq %r14, 2*8+\offset(%rsp) > + movq %r13, 3*8+\offset(%rsp) > + movq %r12, 4*8+\offset(%rsp) > + movq %rbp, 5*8+\offset(%rsp) > + movq %rbx, 6*8+\offset(%rsp) > .endm > > .macro RESTORE_EXTRA_REGS offset=0 > - movq 0*8+\offset(%rsp), %r15 > - movq 1*8+\offset(%rsp), %r14 > - movq 2*8+\offset(%rsp), %r13 > - movq 3*8+\offset(%rsp), %r12 > - movq 4*8+\offset(%rsp), %rbp > - movq 5*8+\offset(%rsp), %rbx > + movq 1*8+\offset(%rsp), %r15 > + movq 2*8+\offset(%rsp), %r14 > + movq 3*8+\offset(%rsp), %r13 > + movq 4*8+\offset(%rsp), %r12 > + movq 5*8+\offset(%rsp), %rbp > + movq 6*8+\offset(%rsp), %rbx > .endm > > .macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1 > .if \rstor_r11 > - movq 6*8(%rsp), %r11 > + movq 7*8(%rsp), %r11 > .endif > .if \rstor_r8910 > - movq 7*8(%rsp), %r10 > - movq 8*8(%rsp), %r9 > - movq 9*8(%rsp), %r8 > + movq 8*8(%rsp), %r10 > + movq 9*8(%rsp), %r9 > + movq 10*8(%rsp), %r8 > .endif > .if \rstor_rax > - movq 10*8(%rsp), %rax > + movq 11*8(%rsp), %rax > .endif > .if \rstor_rcx > - movq 11*8(%rsp), %rcx > + movq 12*8(%rsp), %rcx > .endif > .if \rstor_rdx > - movq 12*8(%rsp), %rdx > + movq 13*8(%rsp), %rdx > .endif > - movq 13*8(%rsp), %rsi > - movq 14*8(%rsp), %rdi > + movq 14*8(%rsp), %rsi > + movq 15*8(%rsp), %rdi > .endm > .macro RESTORE_C_REGS > RESTORE_C_REGS_HELPER 1,1,1,1,1 > @@ -185,7 +188,7 @@ > .endm > > .macro REMOVE_PT_GPREGS_FROM_STACK addskip=0 > - subq $-(15*8+\addskip), %rsp > + subq $-(16*8+\addskip), %rsp > .endm > > .macro icebp > @@ -203,11 +206,7 @@ > */ > .macro ENCODE_FRAME_POINTER ptregs_offset=0 > #ifdef CONFIG_FRAME_POINTER > - .if \ptregs_offset > - leaq \ptregs_offset(%rsp), %rbp > - .else > - mov %rsp, %rbp > - .endif > + leaq 8+\ptregs_offset(%rsp), %rbp > orq $0x1, %rbp > #endif > .endm > diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S > index 5b21970..880bbb8 100644 > --- a/arch/x86/entry/entry_64.S > +++ b/arch/x86/entry/entry_64.S > @@ -168,7 +168,7 @@ GLOBAL(entry_SYSCALL_64_after_swapgs) > pushq %r9 /* pt_regs->r9 */ > pushq %r10 /* pt_regs->r10 */ > pushq %r11 /* pt_regs->r11 */ > - sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */ > + sub $(7*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */ > > /* > * If we need to do entry work or if we guess we'll need to do > @@ -234,14 +234,14 @@ entry_SYSCALL_64_fastpath: > TRACE_IRQS_ON > ENABLE_INTERRUPTS(CLBR_NONE) > SAVE_EXTRA_REGS > - movq %rsp, %rdi > + leaq 8(%rsp), %rdi > call syscall_return_slowpath /* returns with IRQs disabled */ > jmp return_from_SYSCALL_64 > > entry_SYSCALL64_slow_path: > /* IRQs are off. */ > SAVE_EXTRA_REGS > - movq %rsp, %rdi > + leaq 8(%rsp), %rdi > call do_syscall_64 /* returns with IRQs disabled */ > > return_from_SYSCALL_64: > @@ -342,9 +342,9 @@ ENTRY(stub_ptregs_64) > * Called from fast path -- disable IRQs again, pop return address > * and jump to slow path > */ > + popq %rax > DISABLE_INTERRUPTS(CLBR_NONE) > TRACE_IRQS_OFF > - popq %rax > jmp entry_SYSCALL64_slow_path > > 1: > @@ -409,13 +409,14 @@ END(__switch_to_asm) > */ > ENTRY(ret_from_fork) > movq %rax, %rdi > + subq $8, %rsp > call schedule_tail /* rdi: 'prev' task parameter */ > > testq %rbx, %rbx /* from kernel_thread? */ > jnz 1f /* kernel threads are uncommon */ > > 2: > - movq %rsp, %rdi > + leaq 8(%rsp), %rdi > call syscall_return_slowpath /* returns with IRQs disabled */ > TRACE_IRQS_ON /* user mode is traced as IRQS on */ > SWAPGS > @@ -494,10 +495,12 @@ END(irq_entries_start) > * a little cheaper to use a separate counter in the PDA (short of > * moving irq_enter into assembly, which would be too much work) > */ > - movq %rsp, %rdi > + movq %rsp, %rax > + leaq 8(%rsp), %rdi > incl PER_CPU_VAR(irq_count) > cmovzq PER_CPU_VAR(irq_stack_ptr), %rsp > - pushq %rdi > + sub $8, %rsp > + pushq %rax > /* We entered an interrupt context - irqs are off: */ > TRACE_IRQS_OFF > > @@ -527,7 +530,7 @@ ret_from_intr: > > /* Interrupt came from user space */ > GLOBAL(retint_user) > - mov %rsp,%rdi > + leaq 8(%rsp), %rdi > call prepare_exit_to_usermode > TRACE_IRQS_IRETQ > SWAPGS > @@ -774,7 +777,7 @@ ENTRY(\sym) > .endif > .endif > > - movq %rsp, %rdi /* pt_regs pointer */ > + leaq 8(%rsp), %rdi /* pt_regs pointer */ > > .if \has_error_code > movq ORIG_RAX(%rsp), %rsi /* get error code */ > @@ -810,11 +813,11 @@ ENTRY(\sym) > call error_entry > > > - movq %rsp, %rdi /* pt_regs pointer */ > + leaq 8(%rsp), %rdi /* pt_regs pointer */ > call sync_regs > - movq %rax, %rsp /* switch stack */ > + leaq -8(%rax), %rsp /* switch stack */ > > - movq %rsp, %rdi /* pt_regs pointer */ > + movq %rax, %rdi /* pt_regs pointer */ > > .if \has_error_code > movq ORIG_RAX(%rsp), %rsi /* get error code */ > @@ -895,6 +898,7 @@ ENTRY(do_softirq_own_stack) > mov %rsp, %rbp > incl PER_CPU_VAR(irq_count) > cmove PER_CPU_VAR(irq_stack_ptr), %rsp > + sub $8, %rsp > push %rbp /* frame pointer backlink */ > call __do_softirq > leaveq > @@ -924,10 +928,11 @@ ENTRY(xen_do_hypervisor_callback) /* do_hypervisor_callback(struct *pt_regs) */ > * Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will > * see the correct pointer to the pt_regs > */ > - movq %rdi, %rsp /* we don't return, adjust the stack frame */ > + leaq -8(%rdi), %rsp /* we don't return, adjust the stack frame */ > 11: incl PER_CPU_VAR(irq_count) > movq %rsp, %rbp > cmovzq PER_CPU_VAR(irq_stack_ptr), %rsp > + subq $8, %rsp > pushq %rbp /* frame pointer backlink */ > call xen_evtchn_do_upcall > popq %rsp > @@ -1264,6 +1269,7 @@ ENTRY(nmi) > */ > > movq %rsp, %rdi > + subq $8, %rsp > movq $-1, %rsi > call do_nmi > > @@ -1475,7 +1481,7 @@ end_repeat_nmi: > call paranoid_entry > > /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ > - movq %rsp, %rdi > + leaq 8(%rsp), %rdi > movq $-1, %rsi > call do_nmi > > @@ -1519,7 +1525,7 @@ ENTRY(rewind_stack_do_exit) > xorl %ebp, %ebp > > movq PER_CPU_VAR(cpu_current_top_of_stack), %rax > - leaq -TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE(%rax), %rsp > + leaq -TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE-8(%rax), %rsp > > call do_exit > 1: jmp 1b > diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S > index e1721da..7d3f1e3 100644 > --- a/arch/x86/entry/entry_64_compat.S > +++ b/arch/x86/entry/entry_64_compat.S > @@ -89,6 +89,8 @@ ENTRY(entry_SYSENTER_compat) > pushq $0 /* pt_regs->r13 = 0 */ > pushq $0 /* pt_regs->r14 = 0 */ > pushq $0 /* pt_regs->r15 = 0 */ > + > + subq $8, %rsp > cld > > /* > @@ -120,7 +122,7 @@ ENTRY(entry_SYSENTER_compat) > */ > TRACE_IRQS_OFF > > - movq %rsp, %rdi > + leaq 8(%rsp), %rdi > call do_fast_syscall_32 > /* XEN PV guests always use IRET path */ > ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \ > @@ -215,13 +217,15 @@ ENTRY(entry_SYSCALL_compat) > pushq $0 /* pt_regs->r14 = 0 */ > pushq $0 /* pt_regs->r15 = 0 */ > > + subq $8, %rsp > + > /* > * User mode is traced as though IRQs are on, and SYSENTER > * turned them off. > */ > TRACE_IRQS_OFF > > - movq %rsp, %rdi > + leaq 8(%rsp), %rdi > call do_fast_syscall_32 > /* XEN PV guests always use IRET path */ > ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \ > @@ -324,6 +328,8 @@ ENTRY(entry_INT80_compat) > pushq %r13 /* pt_regs->r13 */ > pushq %r14 /* pt_regs->r14 */ > pushq %r15 /* pt_regs->r15 */ > + > + subq $8, %rsp > cld > > /* > @@ -332,7 +338,7 @@ ENTRY(entry_INT80_compat) > */ > TRACE_IRQS_OFF > > - movq %rsp, %rdi > + leaq 8(%rsp), %rdi > call do_int80_syscall_32 > .Lsyscall_32_done: > > diff --git a/arch/x86/entry/thunk_64.S b/arch/x86/entry/thunk_64.S > index be36bf4..3c80aac 100644 > --- a/arch/x86/entry/thunk_64.S > +++ b/arch/x86/entry/thunk_64.S > @@ -33,6 +33,7 @@ > movq 8(%rbp), %rdi > .endif > > + sub $8, %rsp > call \func > jmp .L_restore > _ASM_NOKPROBE(\name) > @@ -58,6 +59,7 @@ > || defined(CONFIG_DEBUG_LOCK_ALLOC) \ > || defined(CONFIG_PREEMPT) > .L_restore: > + add $8, %rsp > popq %r11 > popq %r10 > popq %r9 > diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S > index b467b14..d03ab72 100644 > --- a/arch/x86/kernel/head_64.S > +++ b/arch/x86/kernel/head_64.S > @@ -384,6 +384,8 @@ early_idt_handler_common: > pushq %r14 /* pt_regs->r14 */ > pushq %r15 /* pt_regs->r15 */ > > + sub $8, %rsp > + > cmpq $14,%rsi /* Page fault? */ > jnz 10f > GET_CR2_INTO(%rdi) /* Can clobber any volatile register if pv */ > @@ -392,7 +394,7 @@ early_idt_handler_common: > jz 20f /* All good */ > > 10: > - movq %rsp,%rdi /* RDI = pt_regs; RSI is already trapnr */ > + leaq 8(%rsp), %rdi /* RDI = pt_regs; RSI is already trapnr */ > call early_fixup_exception > > 20: > diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c > index bf0c6d0..2af9f81 100644 > --- a/arch/x86/kernel/traps.c > +++ b/arch/x86/kernel/traps.c > @@ -590,6 +590,7 @@ asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs) > > struct bad_iret_stack { > void *error_entry_ret; > + void *padding; > struct pt_regs regs; > }; > > -- > Email: Herbert Xu <herbert@gondor.apana.org.au> > Home Page: http://gondor.apana.org.au/~herbert/ > PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt ^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: x86-64: Maintain 16-byte stack alignment 2017-01-10 17:30 ` Ard Biesheuvel @ 2017-01-10 19:00 ` Andy Lutomirski 2017-01-10 19:16 ` Ard Biesheuvel 2017-01-11 3:16 ` Herbert Xu 2017-01-11 3:15 ` Herbert Xu 2017-01-12 6:12 ` Herbert Xu 2 siblings, 2 replies; 54+ messages in thread From: Andy Lutomirski @ 2017-01-10 19:00 UTC (permalink / raw) To: Ard Biesheuvel Cc: Herbert Xu, Linux Kernel Mailing List, Linux Crypto Mailing List, Linus Torvalds, Ingo Molnar, Thomas Gleixner, Andy Lutomirski On Tue, Jan 10, 2017 at 9:30 AM, Ard Biesheuvel <ard.biesheuvel@linaro.org> wrote: > On 10 January 2017 at 14:33, Herbert Xu <herbert@gondor.apana.org.au> wrote: >> I recently applied the patch >> >> https://patchwork.kernel.org/patch/9468391/ >> >> and ended up with a boot crash when it tried to run the x86 chacha20 >> code. It turned out that the patch changed a manually aligned >> stack buffer to one that is aligned by gcc. What was happening was >> that gcc can stack align to any value on x86-64 except 16. The >> reason is that gcc assumes that the stack is always 16-byte aligned, >> which is not actually the case in the kernel. >> > > Apologies for introducing this breakage. It seemed like an obvious and > simple cleanup, so I didn't even bother to mention it in the commit > log, but if the kernel does not guarantee 16 byte alignment, I guess > we should revert to the old method. If SSE instructions are the only > ones that require this alignment, then I suppose not having a ABI > conforming stack pointer should not be an issue in general. Here's what I think is really going on. This is partially from memory, so I could be off base. The kernel is up against https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53383, which means that, on some GCC versions (like the bad one and maybe even current ones), things compiled without -mno-sse can't have the stack alignment set properly. IMO we should fix this in the affected code, not the entry code. In fact, I think that fixing it in the entry code won't even fully fix it because modern GCC will compile the rest of the kernel with 8-byte alignment and the stack will get randomly unaligned (GCC 4.8 and newer). Can we just add __attribute__((force_align_arg_pointer)) to the affected functions? Maybe have: #define __USES_SSE __attribute__((force_align_arg_pointer)) on affected gcc versions? ***HOWEVER*** I think this is missing the tree for the supposed forest. The actual affected code appears to be: static int chacha20_simd(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { u32 *state, state_buf[16 + (CHACHA20_STATE_ALIGN / sizeof(u32)) - 1]; ... state = (u32 *)roundup((uintptr_t)state_buf, CHACHA20_STATE_ALIGN); gcc presumably infers (incorrectly) that state_buf is 16-byte aligned and optimizes out the roundup. How about just declaring an actual __aligned(16) buffer, marking the function __attribute__((force_align_arg_pointer)), and being done with it? After all, we need that forcible alignment on *all* gcc versions. --Andy ^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: x86-64: Maintain 16-byte stack alignment 2017-01-10 19:00 ` Andy Lutomirski @ 2017-01-10 19:16 ` Ard Biesheuvel 2017-01-10 19:22 ` Andy Lutomirski 2017-01-11 3:16 ` Herbert Xu 1 sibling, 1 reply; 54+ messages in thread From: Ard Biesheuvel @ 2017-01-10 19:16 UTC (permalink / raw) To: Andy Lutomirski Cc: Herbert Xu, Linux Kernel Mailing List, Linux Crypto Mailing List, Linus Torvalds, Ingo Molnar, Thomas Gleixner, Andy Lutomirski On 10 January 2017 at 19:00, Andy Lutomirski <luto@amacapital.net> wrote: > On Tue, Jan 10, 2017 at 9:30 AM, Ard Biesheuvel > <ard.biesheuvel@linaro.org> wrote: >> On 10 January 2017 at 14:33, Herbert Xu <herbert@gondor.apana.org.au> wrote: >>> I recently applied the patch >>> >>> https://patchwork.kernel.org/patch/9468391/ >>> >>> and ended up with a boot crash when it tried to run the x86 chacha20 >>> code. It turned out that the patch changed a manually aligned >>> stack buffer to one that is aligned by gcc. What was happening was >>> that gcc can stack align to any value on x86-64 except 16. The >>> reason is that gcc assumes that the stack is always 16-byte aligned, >>> which is not actually the case in the kernel. >>> >> >> Apologies for introducing this breakage. It seemed like an obvious and >> simple cleanup, so I didn't even bother to mention it in the commit >> log, but if the kernel does not guarantee 16 byte alignment, I guess >> we should revert to the old method. If SSE instructions are the only >> ones that require this alignment, then I suppose not having a ABI >> conforming stack pointer should not be an issue in general. > > Here's what I think is really going on. This is partially from > memory, so I could be off base. The kernel is up against > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53383, which means that, > on some GCC versions (like the bad one and maybe even current ones), > things compiled without -mno-sse can't have the stack alignment set > properly. IMO we should fix this in the affected code, not the entry > code. In fact, I think that fixing it in the entry code won't even > fully fix it because modern GCC will compile the rest of the kernel > with 8-byte alignment and the stack will get randomly unaligned (GCC > 4.8 and newer). > > Can we just add __attribute__((force_align_arg_pointer)) to the > affected functions? Maybe have: > > #define __USES_SSE __attribute__((force_align_arg_pointer)) > > on affected gcc versions? > > ***HOWEVER*** > > I think this is missing the tree for the supposed forest. The actual > affected code appears to be: > > static int chacha20_simd(struct blkcipher_desc *desc, struct scatterlist *dst, > struct scatterlist *src, unsigned int nbytes) > { > u32 *state, state_buf[16 + (CHACHA20_STATE_ALIGN / sizeof(u32)) - 1]; > > ... > > state = (u32 *)roundup((uintptr_t)state_buf, CHACHA20_STATE_ALIGN); > > gcc presumably infers (incorrectly) that state_buf is 16-byte aligned > and optimizes out the roundup. How about just declaring an actual > __aligned(16) buffer, marking the function > __attribute__((force_align_arg_pointer)), and being done with it? > After all, we need that forcible alignment on *all* gcc versions. > Actually, the breakage is introduced by the patch Herbert refers to https://patchwork.kernel.org/patch/9468391/ where the state is replaced by a simple u32 state[16] __aligned(CHACHA20_STATE_ALIGN); which seemed harmless enough to me. So the code above works fine. ^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: x86-64: Maintain 16-byte stack alignment 2017-01-10 19:16 ` Ard Biesheuvel @ 2017-01-10 19:22 ` Andy Lutomirski 2017-01-10 20:00 ` Ard Biesheuvel 2017-01-11 3:26 ` Herbert Xu 0 siblings, 2 replies; 54+ messages in thread From: Andy Lutomirski @ 2017-01-10 19:22 UTC (permalink / raw) To: Ard Biesheuvel Cc: Herbert Xu, Linux Kernel Mailing List, Linux Crypto Mailing List, Linus Torvalds, Ingo Molnar, Thomas Gleixner, Andy Lutomirski On Tue, Jan 10, 2017 at 11:16 AM, Ard Biesheuvel <ard.biesheuvel@linaro.org> wrote: > On 10 January 2017 at 19:00, Andy Lutomirski <luto@amacapital.net> wrote: >> On Tue, Jan 10, 2017 at 9:30 AM, Ard Biesheuvel >> <ard.biesheuvel@linaro.org> wrote: >>> On 10 January 2017 at 14:33, Herbert Xu <herbert@gondor.apana.org.au> wrote: >>>> I recently applied the patch >>>> >>>> https://patchwork.kernel.org/patch/9468391/ >>>> >>>> and ended up with a boot crash when it tried to run the x86 chacha20 >>>> code. It turned out that the patch changed a manually aligned >>>> stack buffer to one that is aligned by gcc. What was happening was >>>> that gcc can stack align to any value on x86-64 except 16. The >>>> reason is that gcc assumes that the stack is always 16-byte aligned, >>>> which is not actually the case in the kernel. >>>> >>> >>> Apologies for introducing this breakage. It seemed like an obvious and >>> simple cleanup, so I didn't even bother to mention it in the commit >>> log, but if the kernel does not guarantee 16 byte alignment, I guess >>> we should revert to the old method. If SSE instructions are the only >>> ones that require this alignment, then I suppose not having a ABI >>> conforming stack pointer should not be an issue in general. >> >> Here's what I think is really going on. This is partially from >> memory, so I could be off base. The kernel is up against >> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53383, which means that, >> on some GCC versions (like the bad one and maybe even current ones), >> things compiled without -mno-sse can't have the stack alignment set >> properly. IMO we should fix this in the affected code, not the entry >> code. In fact, I think that fixing it in the entry code won't even >> fully fix it because modern GCC will compile the rest of the kernel >> with 8-byte alignment and the stack will get randomly unaligned (GCC >> 4.8 and newer). >> >> Can we just add __attribute__((force_align_arg_pointer)) to the >> affected functions? Maybe have: >> >> #define __USES_SSE __attribute__((force_align_arg_pointer)) >> >> on affected gcc versions? >> >> ***HOWEVER*** >> >> I think this is missing the tree for the supposed forest. The actual >> affected code appears to be: >> >> static int chacha20_simd(struct blkcipher_desc *desc, struct scatterlist *dst, >> struct scatterlist *src, unsigned int nbytes) >> { >> u32 *state, state_buf[16 + (CHACHA20_STATE_ALIGN / sizeof(u32)) - 1]; >> >> ... >> >> state = (u32 *)roundup((uintptr_t)state_buf, CHACHA20_STATE_ALIGN); >> >> gcc presumably infers (incorrectly) that state_buf is 16-byte aligned >> and optimizes out the roundup. How about just declaring an actual >> __aligned(16) buffer, marking the function >> __attribute__((force_align_arg_pointer)), and being done with it? >> After all, we need that forcible alignment on *all* gcc versions. >> > > Actually, the breakage is introduced by the patch Herbert refers to > > https://patchwork.kernel.org/patch/9468391/ > > where the state is replaced by a simple > > u32 state[16] __aligned(CHACHA20_STATE_ALIGN); > > which seemed harmless enough to me. So the code above works fine. So how about just the one-line patch of adding the force_align_arg_pointer? Would that solve the problem? ^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: x86-64: Maintain 16-byte stack alignment 2017-01-10 19:22 ` Andy Lutomirski @ 2017-01-10 20:00 ` Ard Biesheuvel 2017-01-10 23:25 ` Andy Lutomirski 2017-01-11 3:26 ` Herbert Xu 1 sibling, 1 reply; 54+ messages in thread From: Ard Biesheuvel @ 2017-01-10 20:00 UTC (permalink / raw) To: Andy Lutomirski Cc: Herbert Xu, Linux Kernel Mailing List, Linux Crypto Mailing List, Linus Torvalds, Ingo Molnar, Thomas Gleixner, Andy Lutomirski On 10 January 2017 at 19:22, Andy Lutomirski <luto@amacapital.net> wrote: > On Tue, Jan 10, 2017 at 11:16 AM, Ard Biesheuvel > <ard.biesheuvel@linaro.org> wrote: >> On 10 January 2017 at 19:00, Andy Lutomirski <luto@amacapital.net> wrote: >>> On Tue, Jan 10, 2017 at 9:30 AM, Ard Biesheuvel >>> <ard.biesheuvel@linaro.org> wrote: >>>> On 10 January 2017 at 14:33, Herbert Xu <herbert@gondor.apana.org.au> wrote: >>>>> I recently applied the patch >>>>> >>>>> https://patchwork.kernel.org/patch/9468391/ >>>>> >>>>> and ended up with a boot crash when it tried to run the x86 chacha20 >>>>> code. It turned out that the patch changed a manually aligned >>>>> stack buffer to one that is aligned by gcc. What was happening was >>>>> that gcc can stack align to any value on x86-64 except 16. The >>>>> reason is that gcc assumes that the stack is always 16-byte aligned, >>>>> which is not actually the case in the kernel. >>>>> >>>> >>>> Apologies for introducing this breakage. It seemed like an obvious and >>>> simple cleanup, so I didn't even bother to mention it in the commit >>>> log, but if the kernel does not guarantee 16 byte alignment, I guess >>>> we should revert to the old method. If SSE instructions are the only >>>> ones that require this alignment, then I suppose not having a ABI >>>> conforming stack pointer should not be an issue in general. >>> >>> Here's what I think is really going on. This is partially from >>> memory, so I could be off base. The kernel is up against >>> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53383, which means that, >>> on some GCC versions (like the bad one and maybe even current ones), >>> things compiled without -mno-sse can't have the stack alignment set >>> properly. IMO we should fix this in the affected code, not the entry >>> code. In fact, I think that fixing it in the entry code won't even >>> fully fix it because modern GCC will compile the rest of the kernel >>> with 8-byte alignment and the stack will get randomly unaligned (GCC >>> 4.8 and newer). >>> >>> Can we just add __attribute__((force_align_arg_pointer)) to the >>> affected functions? Maybe have: >>> >>> #define __USES_SSE __attribute__((force_align_arg_pointer)) >>> >>> on affected gcc versions? >>> >>> ***HOWEVER*** >>> >>> I think this is missing the tree for the supposed forest. The actual >>> affected code appears to be: >>> >>> static int chacha20_simd(struct blkcipher_desc *desc, struct scatterlist *dst, >>> struct scatterlist *src, unsigned int nbytes) >>> { >>> u32 *state, state_buf[16 + (CHACHA20_STATE_ALIGN / sizeof(u32)) - 1]; >>> >>> ... >>> >>> state = (u32 *)roundup((uintptr_t)state_buf, CHACHA20_STATE_ALIGN); >>> >>> gcc presumably infers (incorrectly) that state_buf is 16-byte aligned >>> and optimizes out the roundup. How about just declaring an actual >>> __aligned(16) buffer, marking the function >>> __attribute__((force_align_arg_pointer)), and being done with it? >>> After all, we need that forcible alignment on *all* gcc versions. >>> >> >> Actually, the breakage is introduced by the patch Herbert refers to >> >> https://patchwork.kernel.org/patch/9468391/ >> >> where the state is replaced by a simple >> >> u32 state[16] __aligned(CHACHA20_STATE_ALIGN); >> >> which seemed harmless enough to me. So the code above works fine. > > So how about just the one-line patch of adding the > force_align_arg_pointer? Would that solve the problem? If it does what it says on the tin, it should fix the issue, but after adding the attribute, I get the exact same object output, so there's something dodgy going on here. ^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: x86-64: Maintain 16-byte stack alignment 2017-01-10 20:00 ` Ard Biesheuvel @ 2017-01-10 23:25 ` Andy Lutomirski 2017-01-11 3:26 ` Herbert Xu 0 siblings, 1 reply; 54+ messages in thread From: Andy Lutomirski @ 2017-01-10 23:25 UTC (permalink / raw) To: Ard Biesheuvel Cc: Herbert Xu, Linux Kernel Mailing List, Linux Crypto Mailing List, Linus Torvalds, Ingo Molnar, Thomas Gleixner, Andy Lutomirski On Tue, Jan 10, 2017 at 12:00 PM, Ard Biesheuvel <ard.biesheuvel@linaro.org> wrote: > On 10 January 2017 at 19:22, Andy Lutomirski <luto@amacapital.net> wrote: >> On Tue, Jan 10, 2017 at 11:16 AM, Ard Biesheuvel >> <ard.biesheuvel@linaro.org> wrote: >>> On 10 January 2017 at 19:00, Andy Lutomirski <luto@amacapital.net> wrote: >>>> On Tue, Jan 10, 2017 at 9:30 AM, Ard Biesheuvel >>>> <ard.biesheuvel@linaro.org> wrote: >>>>> On 10 January 2017 at 14:33, Herbert Xu <herbert@gondor.apana.org.au> wrote: >>>>>> I recently applied the patch >>>>>> >>>>>> https://patchwork.kernel.org/patch/9468391/ >>>>>> >>>>>> and ended up with a boot crash when it tried to run the x86 chacha20 >>>>>> code. It turned out that the patch changed a manually aligned >>>>>> stack buffer to one that is aligned by gcc. What was happening was >>>>>> that gcc can stack align to any value on x86-64 except 16. The >>>>>> reason is that gcc assumes that the stack is always 16-byte aligned, >>>>>> which is not actually the case in the kernel. >>>>>> >>>>> >>>>> Apologies for introducing this breakage. It seemed like an obvious and >>>>> simple cleanup, so I didn't even bother to mention it in the commit >>>>> log, but if the kernel does not guarantee 16 byte alignment, I guess >>>>> we should revert to the old method. If SSE instructions are the only >>>>> ones that require this alignment, then I suppose not having a ABI >>>>> conforming stack pointer should not be an issue in general. >>>> >>>> Here's what I think is really going on. This is partially from >>>> memory, so I could be off base. The kernel is up against >>>> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53383, which means that, >>>> on some GCC versions (like the bad one and maybe even current ones), >>>> things compiled without -mno-sse can't have the stack alignment set >>>> properly. IMO we should fix this in the affected code, not the entry >>>> code. In fact, I think that fixing it in the entry code won't even >>>> fully fix it because modern GCC will compile the rest of the kernel >>>> with 8-byte alignment and the stack will get randomly unaligned (GCC >>>> 4.8 and newer). >>>> >>>> Can we just add __attribute__((force_align_arg_pointer)) to the >>>> affected functions? Maybe have: >>>> >>>> #define __USES_SSE __attribute__((force_align_arg_pointer)) >>>> >>>> on affected gcc versions? >>>> >>>> ***HOWEVER*** >>>> >>>> I think this is missing the tree for the supposed forest. The actual >>>> affected code appears to be: >>>> >>>> static int chacha20_simd(struct blkcipher_desc *desc, struct scatterlist *dst, >>>> struct scatterlist *src, unsigned int nbytes) >>>> { >>>> u32 *state, state_buf[16 + (CHACHA20_STATE_ALIGN / sizeof(u32)) - 1]; >>>> >>>> ... >>>> >>>> state = (u32 *)roundup((uintptr_t)state_buf, CHACHA20_STATE_ALIGN); >>>> >>>> gcc presumably infers (incorrectly) that state_buf is 16-byte aligned >>>> and optimizes out the roundup. How about just declaring an actual >>>> __aligned(16) buffer, marking the function >>>> __attribute__((force_align_arg_pointer)), and being done with it? >>>> After all, we need that forcible alignment on *all* gcc versions. >>>> >>> >>> Actually, the breakage is introduced by the patch Herbert refers to >>> >>> https://patchwork.kernel.org/patch/9468391/ >>> >>> where the state is replaced by a simple >>> >>> u32 state[16] __aligned(CHACHA20_STATE_ALIGN); >>> >>> which seemed harmless enough to me. So the code above works fine. >> >> So how about just the one-line patch of adding the >> force_align_arg_pointer? Would that solve the problem? > > If it does what it says on the tin, it should fix the issue, but after > adding the attribute, I get the exact same object output, so there's > something dodgy going on here. Ugh, that's annoying. Maybe it needs noinline too? --Andy ^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: x86-64: Maintain 16-byte stack alignment 2017-01-10 23:25 ` Andy Lutomirski @ 2017-01-11 3:26 ` Herbert Xu 0 siblings, 0 replies; 54+ messages in thread From: Herbert Xu @ 2017-01-11 3:26 UTC (permalink / raw) To: Andy Lutomirski Cc: Ard Biesheuvel, Linux Kernel Mailing List, Linux Crypto Mailing List, Linus Torvalds, Ingo Molnar, Thomas Gleixner, Andy Lutomirski On Tue, Jan 10, 2017 at 03:25:47PM -0800, Andy Lutomirski wrote: > > > If it does what it says on the tin, it should fix the issue, but after > > adding the attribute, I get the exact same object output, so there's > > something dodgy going on here. > > Ugh, that's annoying. Maybe it needs noinline too? Perhaps something to do with https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66697 Cheers, -- Email: Herbert Xu <herbert@gondor.apana.org.au> Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt ^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: x86-64: Maintain 16-byte stack alignment 2017-01-10 19:22 ` Andy Lutomirski 2017-01-10 20:00 ` Ard Biesheuvel @ 2017-01-11 3:26 ` Herbert Xu 1 sibling, 0 replies; 54+ messages in thread From: Herbert Xu @ 2017-01-11 3:26 UTC (permalink / raw) To: Andy Lutomirski Cc: Ard Biesheuvel, Linux Kernel Mailing List, Linux Crypto Mailing List, Linus Torvalds, Ingo Molnar, Thomas Gleixner, Andy Lutomirski On Tue, Jan 10, 2017 at 11:22:15AM -0800, Andy Lutomirski wrote: > > > Actually, the breakage is introduced by the patch Herbert refers to > > > > https://patchwork.kernel.org/patch/9468391/ > > > > where the state is replaced by a simple > > > > u32 state[16] __aligned(CHACHA20_STATE_ALIGN); > > > > which seemed harmless enough to me. So the code above works fine. > > So how about just the one-line patch of adding the > force_align_arg_pointer? Would that solve the problem? It probably does. However, this is too error-prone. Surely you can't expect random kernel developers to know to add this force_align_arg_pointer every time they try to align a stack variable to 16 bytes? Cheers, -- Email: Herbert Xu <herbert@gondor.apana.org.au> Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt ^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: x86-64: Maintain 16-byte stack alignment 2017-01-10 19:00 ` Andy Lutomirski 2017-01-10 19:16 ` Ard Biesheuvel @ 2017-01-11 3:16 ` Herbert Xu 1 sibling, 0 replies; 54+ messages in thread From: Herbert Xu @ 2017-01-11 3:16 UTC (permalink / raw) To: Andy Lutomirski Cc: Ard Biesheuvel, Linux Kernel Mailing List, Linux Crypto Mailing List, Linus Torvalds, Ingo Molnar, Thomas Gleixner, Andy Lutomirski On Tue, Jan 10, 2017 at 11:00:31AM -0800, Andy Lutomirski wrote: > > Here's what I think is really going on. This is partially from > memory, so I could be off base. The kernel is up against > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53383, which means that, > on some GCC versions (like the bad one and maybe even current ones), > things compiled without -mno-sse can't have the stack alignment set > properly. IMO we should fix this in the affected code, not the entry No that's not it. My compiler (gcc 4.7.2) doesn't support it period: $ gcc -S -O2 -mno-sse -mpreferred-stack-boundary=3 a.c a.c:1:0: error: -mpreferred-stack-boundary=3 is not between 4 and 12 $ So you either have to ban all compilers older than whatever version that started supporting 8-byte stack alignment, or fix the kernel. Cheers, -- Email: Herbert Xu <herbert@gondor.apana.org.au> Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt ^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: x86-64: Maintain 16-byte stack alignment 2017-01-10 17:30 ` Ard Biesheuvel 2017-01-10 19:00 ` Andy Lutomirski @ 2017-01-11 3:15 ` Herbert Xu 2017-01-12 6:12 ` Herbert Xu 2 siblings, 0 replies; 54+ messages in thread From: Herbert Xu @ 2017-01-11 3:15 UTC (permalink / raw) To: Ard Biesheuvel Cc: Linux Kernel Mailing List, Linux Crypto Mailing List, Linus Torvalds, Ingo Molnar, Thomas Gleixner, Andy Lutomirski On Tue, Jan 10, 2017 at 05:30:48PM +0000, Ard Biesheuvel wrote: > > Apologies for introducing this breakage. It seemed like an obvious and > simple cleanup, so I didn't even bother to mention it in the commit > log, but if the kernel does not guarantee 16 byte alignment, I guess > we should revert to the old method. If SSE instructions are the only > ones that require this alignment, then I suppose not having a ABI > conforming stack pointer should not be an issue in general. I think we need to address this regardless of your patch. You won't be the last person to use __attribute__ to get 16-byte alignment on the stack. Cheers, -- Email: Herbert Xu <herbert@gondor.apana.org.au> Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt ^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: x86-64: Maintain 16-byte stack alignment 2017-01-10 17:30 ` Ard Biesheuvel 2017-01-10 19:00 ` Andy Lutomirski 2017-01-11 3:15 ` Herbert Xu @ 2017-01-12 6:12 ` Herbert Xu 2017-01-12 8:01 ` Ard Biesheuvel 2 siblings, 1 reply; 54+ messages in thread From: Herbert Xu @ 2017-01-12 6:12 UTC (permalink / raw) To: Ard Biesheuvel Cc: Linux Kernel Mailing List, Linux Crypto Mailing List, Linus Torvalds, Ingo Molnar, Thomas Gleixner, Andy Lutomirski On Tue, Jan 10, 2017 at 05:30:48PM +0000, Ard Biesheuvel wrote: > > Apologies for introducing this breakage. It seemed like an obvious and > simple cleanup, so I didn't even bother to mention it in the commit > log, but if the kernel does not guarantee 16 byte alignment, I guess > we should revert to the old method. If SSE instructions are the only > ones that require this alignment, then I suppose not having a ABI > conforming stack pointer should not be an issue in general. BTW Ard, what is the stack alignment on ARM64? Cheers, -- Email: Herbert Xu <herbert@gondor.apana.org.au> Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt ^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: x86-64: Maintain 16-byte stack alignment 2017-01-12 6:12 ` Herbert Xu @ 2017-01-12 8:01 ` Ard Biesheuvel 2017-01-12 8:06 ` Herbert Xu 0 siblings, 1 reply; 54+ messages in thread From: Ard Biesheuvel @ 2017-01-12 8:01 UTC (permalink / raw) To: Herbert Xu Cc: Linux Kernel Mailing List, Linux Crypto Mailing List, Linus Torvalds, Ingo Molnar, Thomas Gleixner, Andy Lutomirski On 12 January 2017 at 06:12, Herbert Xu <herbert@gondor.apana.org.au> wrote: > On Tue, Jan 10, 2017 at 05:30:48PM +0000, Ard Biesheuvel wrote: >> >> Apologies for introducing this breakage. It seemed like an obvious and >> simple cleanup, so I didn't even bother to mention it in the commit >> log, but if the kernel does not guarantee 16 byte alignment, I guess >> we should revert to the old method. If SSE instructions are the only >> ones that require this alignment, then I suppose not having a ABI >> conforming stack pointer should not be an issue in general. > > BTW Ard, what is the stack alignment on ARM64? > [From memory] the arm64 ELF psABI mandates a 16 byte stack alignment at function entry, and 8 byte alignment at all other times. This means compiled code will typically preserve 16 byte alignment, and __aligned(16) on a stack variable will likely not result in an explicit alignment of the stack pointer *. But the arm64 ISA does not have any load/store instructions that would trigger a misalignment fault on an address that is 8 byte aligned but not 16 byte aligned, so the situation is very different from x86 (assuming I am correct in asserting that there are no failure modes resulting from a misaligned stack other than this one and a potential performance hit) * I didn't check whether the exception handling realigns the stack pointer on nested exceptions (arm64 has separate IRQ stacks) ^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: x86-64: Maintain 16-byte stack alignment 2017-01-12 8:01 ` Ard Biesheuvel @ 2017-01-12 8:06 ` Herbert Xu 0 siblings, 0 replies; 54+ messages in thread From: Herbert Xu @ 2017-01-12 8:06 UTC (permalink / raw) To: Ard Biesheuvel Cc: Linux Kernel Mailing List, Linux Crypto Mailing List, Linus Torvalds, Ingo Molnar, Thomas Gleixner, Andy Lutomirski On Thu, Jan 12, 2017 at 08:01:51AM +0000, Ard Biesheuvel wrote: > > [From memory] the arm64 ELF psABI mandates a 16 byte stack alignment > at function entry, and 8 byte alignment at all other times. This means > compiled code will typically preserve 16 byte alignment, and > __aligned(16) on a stack variable will likely not result in an > explicit alignment of the stack pointer *. But the arm64 ISA does not > have any load/store instructions that would trigger a misalignment > fault on an address that is 8 byte aligned but not 16 byte aligned, so > the situation is very different from x86 (assuming I am correct in > asserting that there are no failure modes resulting from a misaligned > stack other than this one and a potential performance hit) OK, sounds like we're already using 16-byte aligned stacks on ARM64. So unless x86-64 stacks are much smaller, I don't see the need to use 8-byte aligned stacks at least from a stack space point-of-view. Thanks, -- Email: Herbert Xu <herbert@gondor.apana.org.au> Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt ^ permalink raw reply [flat|nested] 54+ messages in thread
end of thread, other threads:[~2017-01-13 13:08 UTC | newest] Thread overview: 54+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2017-01-10 14:33 x86-64: Maintain 16-byte stack alignment Herbert Xu 2017-01-10 14:39 ` Herbert Xu 2017-01-10 17:05 ` Linus Torvalds 2017-01-10 17:09 ` Andy Lutomirski 2017-01-11 3:11 ` Herbert Xu 2017-01-11 3:30 ` Linus Torvalds 2017-01-11 4:17 ` Linus Torvalds 2017-01-11 4:35 ` Herbert Xu 2017-01-11 6:01 ` Andy Lutomirski 2017-01-12 6:21 ` Andy Lutomirski 2017-01-12 7:40 ` Ingo Molnar 2017-01-12 14:02 ` Josh Poimboeuf 2017-01-12 19:51 ` Linus Torvalds 2017-01-12 20:08 ` Andy Lutomirski 2017-01-12 20:15 ` Josh Poimboeuf 2017-01-12 20:55 ` Josh Poimboeuf 2017-01-12 21:40 ` Linus Torvalds 2017-01-13 8:38 ` Herbert Xu 2017-01-13 1:46 ` Andy Lutomirski 2017-01-13 3:11 ` Josh Poimboeuf 2017-01-13 3:23 ` Andy Lutomirski 2017-01-13 4:27 ` Josh Poimboeuf [not found] ` <CA+55aFzRrSwGxxfZk-RUEnsz=xhcSmOwE1CenfCPBWtsS9MwDw@mail.gmail.com> 2017-01-13 5:07 ` Josh Poimboeuf 2017-01-13 8:43 ` Herbert Xu 2017-01-13 8:42 ` Herbert Xu 2017-01-13 8:39 ` Herbert Xu 2017-01-13 8:36 ` Herbert Xu 2017-01-13 13:07 ` Josh Poimboeuf [not found] ` <CA+55aFw+Z_ieo6DzTVB6_-TvQ0jj60s=T0mvXfqkBVFdKFPw_Q@mail.gmail.com> 2017-01-11 8:06 ` Ard Biesheuvel 2017-01-11 8:09 ` Herbert Xu 2017-01-11 18:20 ` Andy Lutomirski 2017-01-12 7:05 ` Herbert Xu 2017-01-12 7:46 ` Ingo Molnar 2017-01-12 14:49 ` Josh Poimboeuf 2017-01-12 7:51 ` Andy Lutomirski 2017-01-12 8:04 ` Herbert Xu 2017-01-12 8:18 ` Ingo Molnar 2017-01-12 15:03 ` Josh Poimboeuf 2017-01-12 15:06 ` Herbert Xu 2017-01-12 15:18 ` Josh Poimboeuf 2017-01-12 15:10 ` Josh Poimboeuf 2017-01-10 17:30 ` Ard Biesheuvel 2017-01-10 19:00 ` Andy Lutomirski 2017-01-10 19:16 ` Ard Biesheuvel 2017-01-10 19:22 ` Andy Lutomirski 2017-01-10 20:00 ` Ard Biesheuvel 2017-01-10 23:25 ` Andy Lutomirski 2017-01-11 3:26 ` Herbert Xu 2017-01-11 3:26 ` Herbert Xu 2017-01-11 3:16 ` Herbert Xu 2017-01-11 3:15 ` Herbert Xu 2017-01-12 6:12 ` Herbert Xu 2017-01-12 8:01 ` Ard Biesheuvel 2017-01-12 8:06 ` Herbert Xu
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).