From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S966328AbdDSQik (ORCPT ); Wed, 19 Apr 2017 12:38:40 -0400 Received: from mx1.redhat.com ([209.132.183.28]:37682 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S938163AbdDSQii (ORCPT ); Wed, 19 Apr 2017 12:38:38 -0400 DMARC-Filter: OpenDMARC Filter v1.3.2 mx1.redhat.com 687BB3DBC9 Authentication-Results: ext-mx06.extmail.prod.ext.phx2.redhat.com; dmarc=none (p=none dis=none) header.from=redhat.com Authentication-Results: ext-mx06.extmail.prod.ext.phx2.redhat.com; spf=pass smtp.mailfrom=jpoimboe@redhat.com DKIM-Filter: OpenDKIM Filter v2.11.0 mx1.redhat.com 687BB3DBC9 Date: Wed, 19 Apr 2017 11:38:37 -0500 From: Josh Poimboeuf To: Steven Rostedt Cc: LKML , Ingo Molnar , Thomas Gleixner , Andy Lutomirski , "H. Peter Anvin" Subject: Re: WARNING: kernel stack frame pointer has bad value Message-ID: <20170419163837.dxqsdfhk5fkod6rv@treble> References: <20170418233714.14fbf55d@grimm.local.home> <20170419134457.l7gdyuf4xbe4nns5@treble> <20170419101203.670115bc@gandalf.local.home> MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Disposition: inline In-Reply-To: <20170419101203.670115bc@gandalf.local.home> User-Agent: Mutt/1.6.0.1 (2016-04-01) X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.30]); Wed, 19 Apr 2017 16:38:38 +0000 (UTC) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org On Wed, Apr 19, 2017 at 10:12:03AM -0400, Steven Rostedt wrote: > On Wed, 19 Apr 2017 08:44:57 -0500 > Josh Poimboeuf wrote: > > > On Tue, Apr 18, 2017 at 11:37:14PM -0400, Steven Rostedt wrote: > > > Josh, > > > > > > I'm starting to get a bunch of these warnings, and I'm thinking they > > > are false positives. The stack frame error is recorded at a call from > > > entry_SYSCALL_64_fastpath, where I would expect the bp to not be valid. > > > > > > To trigger this, I only need to go into /sys/kernel/debug/tracing and > > > echo function > current_tracer then cat trace. Maybe function tracer > > > stack frames is messing it up some how, but it always fails at the > > > entry call. > > > > > > Here's the dump; > > > > > > WARNING: kernel stack frame pointer at ffff8800bda0ff30 in sshd:1090 has bad value 000055b32abf1fa8 > > ... > > > ffff8800bda0ff20: ffff8800bda0ff30 (0xffff8800bda0ff30) > > > ffff8800bda0ff28: ffffffff810dc945 (SyS_rt_sigprocmask+0x5/0x1a0) > > > ffff8800bda0ff30: 000055b32abf1fa8 (0x55b32abf1fa8) > > > ffff8800bda0ff38: ffffffff81cf502a (entry_SYSCALL_64_fastpath+0x18/0xad) > > > ffff8800bda0ff40: 000055b32abf1fa8 (0x55b32abf1fa8) > > > ffff8800bda0ff48: ffffffff810dc945 (SyS_rt_sigprocmask+0x5/0x1a0) > > > ffff8800bda0ff50: ffffffff81cf502a (entry_SYSCALL_64_fastpath+0x18/0xad) > > > > Thanks for reporting, I hadn't seen this one yet. > > > > The problem is that the unwinder expects the last frame pointer to be at > > a certain address (0xffff8800bda0ff48 in this case), so it can know that > > it reached the end. It's confused by the save_mcount_regs macro, which > > builds some fake frames -- which is good -- but then the last frame is > > at a different offset than what the unwinder expects. > > > > Would it be possible for ftrace to rewrite the stack so that it looks > > like this instead? > > > > > ffff8800bda0ff38: ffff8800bda0ff48 (0xffff8800bda0ff48) > > > ffff8800bda0ff40: ffffffff810dc945 (SyS_rt_sigprocmask+0x5/0x1a0) > > > ffff8800bda0ff48: 000055b32abf1fa8 (0x55b32abf1fa8) > > > ffff8800bda0ff50: ffffffff81cf502a (entry_SYSCALL_64_fastpath+0x18/0xad) > > > > In other words it would overwrite the "SyS_rt_sigprocmask+0x5/0x1a0" > > value on the stack at ffff8800bda0ff48 with the original bp, instead of > > appending to the existing stack. If you would be ok with such an > > approach, I could take a stab at it. > > This is because we have to handle each different config differently. > This is the case with FENTRY and FRAME_POINTERS. As I like to keep this > as efficient as possible. To do the above, we need to modify the return > address and then restore it. And handle that for each config type. > > > > > The alternative would be to change the unwinder, but I would rather > > avoid having to detect another special case if possible. > > I'm not sure what's worse. Modifying all the special cases of ftrace, > or adding a new one to the undwinder. > > You can take a crack at it if you like, but it needs to be negligible > in the performance of FENTRY and no frame pointers. How about something like the following (completely untested): diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S index 7b0d3da..54f0f45 100644 --- a/arch/x86/kernel/mcount_64.S +++ b/arch/x86/kernel/mcount_64.S @@ -27,19 +27,19 @@ EXPORT_SYMBOL(mcount) /* All cases save the original rbp (8 bytes) */ #ifdef CONFIG_FRAME_POINTER # ifdef CC_USING_FENTRY -/* Save parent and function stack frames (rip and rbp) */ -# define MCOUNT_FRAME_SIZE (8+16*2) +/* Save extra stack frame (rip and rbp) */ +# define MCOUNT_FRAME_SIZE 16 # else -/* Save just function stack frame (rip and rbp) */ -# define MCOUNT_FRAME_SIZE (8+16) +/* Save just rbp */ +# define MCOUNT_FRAME_SIZE 8 # endif #else /* No need to save a stack frame */ -# define MCOUNT_FRAME_SIZE 8 +# define MCOUNT_FRAME_SIZE 0 #endif /* CONFIG_FRAME_POINTER */ /* Size of stack used to save mcount regs in save_mcount_regs */ -#define MCOUNT_REG_SIZE (SS+8 + MCOUNT_FRAME_SIZE) +#define MCOUNT_REG_SIZE (FRAME_SIZE + MCOUNT_FRAME_SIZE) /* * gcc -pg option adds a call to 'mcount' in most functions. @@ -66,10 +66,7 @@ EXPORT_SYMBOL(mcount) * %rsi - holds the parent function (traced function's return address) * %rdx - holds the original %rbp */ -.macro save_mcount_regs added=0 - - /* Always save the original rbp */ - pushq %rbp +.macro save_mcount_regs save_flags=0 #ifdef CONFIG_FRAME_POINTER /* @@ -80,15 +77,14 @@ EXPORT_SYMBOL(mcount) * is called afterward. */ #ifdef CC_USING_FENTRY - /* Save the parent pointer (skip orig rbp and our return address) */ - pushq \added+8*2(%rsp) - pushq %rbp - movq %rsp, %rbp - /* Save the return address (now skip orig rbp, rbp and parent) */ - pushq \added+8*3(%rsp) -#else - /* Can't assume that rip is before this (unless added was zero) */ - pushq \added+8(%rsp) + /* Copy rip to make room for original rbp */ + pushq (%rsp) + + /* Put original rbp next to parent rip */ + movq %rbp, 8(%rsp) + + /* Make rbp point to original rbp */ + leaq 8(%rsp), %rbp #endif pushq %rbp movq %rsp, %rbp @@ -96,8 +92,19 @@ EXPORT_SYMBOL(mcount) /* * We add enough stack to save all regs. + * + * If saving flags, stop in the middle of the stack adjustment to save + * them in the right spot. Use 'leaq' instead of 'subq' so the flags + * aren't affected. */ - subq $(MCOUNT_REG_SIZE - MCOUNT_FRAME_SIZE), %rsp + .if \save_flags + leaq EFLAGS-FRAME_SIZE+8(%rsp), %rsp + pushfq + subq $EFLAGS, %rsp + .else + subq FRAME_SIZE, %rsp + .endif + movq %rax, RAX(%rsp) movq %rcx, RCX(%rsp) movq %rdx, RDX(%rsp) @@ -105,23 +112,28 @@ EXPORT_SYMBOL(mcount) movq %rdi, RDI(%rsp) movq %r8, R8(%rsp) movq %r9, R9(%rsp) + /* * Save the original RBP. Even though the mcount ABI does not * require this, it helps out callers. */ - movq MCOUNT_REG_SIZE-8(%rsp), %rdx +#ifdef CONFIG_FRAME_POINTER + movq (%rbp), %rdx movq %rdx, RBP(%rsp) +#else + movq %rbp, RBP(%rsp) +#endif /* Copy the parent address into %rsi (second parameter) */ #ifdef CC_USING_FENTRY - movq MCOUNT_REG_SIZE+8+\added(%rsp), %rsi + movq MCOUNT_REG_SIZE+8(%rsp), %rsi #else - /* %rdx contains original %rbp */ + movq RBP(%rsp), %rdx movq 8(%rdx), %rsi #endif /* Move RIP to its proper location */ - movq MCOUNT_REG_SIZE+\added(%rsp), %rdi + movq MCOUNT_REG_SIZE(%rsp), %rdi movq %rdi, RIP(%rsp) /* @@ -132,7 +144,7 @@ EXPORT_SYMBOL(mcount) subq $MCOUNT_INSN_SIZE, %rdi .endm -.macro restore_mcount_regs +.macro restore_mcount_regs restore_flags=0 movq R9(%rsp), %r9 movq R8(%rsp), %r8 movq RDI(%rsp), %rdi @@ -144,7 +156,13 @@ EXPORT_SYMBOL(mcount) /* ftrace_regs_caller can modify %rbp */ movq RBP(%rsp), %rbp + .if \restore_flags + leaq EFLAGS(%rsp), %rsp + popfq + addq FRAME_SIZE-EFLAGS+8, %rsp + .else addq $MCOUNT_REG_SIZE, %rsp + .endif .endm @@ -191,11 +209,7 @@ WEAK(ftrace_stub) END(ftrace_caller) ENTRY(ftrace_regs_caller) - /* Save the current flags before any operations that can change them */ - pushfq - - /* added 8 bytes to save flags */ - save_mcount_regs 8 + save_mcount_regs save_flags=1 /* save_mcount_regs fills in first two parameters */ GLOBAL(ftrace_regs_caller_op_ptr) @@ -210,16 +224,13 @@ GLOBAL(ftrace_regs_caller_op_ptr) movq %r11, R11(%rsp) movq %r10, R10(%rsp) movq %rbx, RBX(%rsp) - /* Copy saved flags */ - movq MCOUNT_REG_SIZE(%rsp), %rcx - movq %rcx, EFLAGS(%rsp) /* Kernel segments */ movq $__KERNEL_DS, %rcx movq %rcx, SS(%rsp) movq $__KERNEL_CS, %rcx movq %rcx, CS(%rsp) - /* Stack - skipping return address and flags */ - leaq MCOUNT_REG_SIZE+8*2(%rsp), %rcx + /* Stack - skipping return address */ + leaq MCOUNT_REG_SIZE+8(%rsp), %rcx movq %rcx, RSP(%rsp) /* regs go into 4th parameter */ @@ -228,10 +239,6 @@ GLOBAL(ftrace_regs_caller_op_ptr) GLOBAL(ftrace_regs_call) call ftrace_stub - /* Copy flags back to SS, to restore them */ - movq EFLAGS(%rsp), %rax - movq %rax, MCOUNT_REG_SIZE(%rsp) - /* Handlers can change the RIP */ movq RIP(%rsp), %rax movq %rax, MCOUNT_REG_SIZE+8(%rsp) @@ -244,10 +251,7 @@ GLOBAL(ftrace_regs_call) movq R10(%rsp), %r10 movq RBX(%rsp), %rbx - restore_mcount_regs - - /* Restore flags */ - popfq + restore_mcount_regs restore_flags=1 /* * As this jmp to ftrace_epilogue can be a short jump