stable.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: Linus Torvalds <torvalds@linux-foundation.org>
Cc: "Holger Hoffstätte" <holger@applied-asynchrony.com>,
	"Qi Zheng" <zhengqi.arch@bytedance.com>,
	"Kees Cook" <keescook@chromium.org>,
	"Thomas Gleixner" <tglx@linutronix.de>,
	"Justin Forbes" <jmforbes@linuxtx.org>,
	"Greg Kroah-Hartman" <gregkh@linuxfoundation.org>,
	"Linux Kernel Mailing List" <linux-kernel@vger.kernel.org>,
	"Andrew Morton" <akpm@linux-foundation.org>,
	"Guenter Roeck" <linux@roeck-us.net>,
	"Shuah Khan" <shuah@kernel.org>,
	patches@kernelci.org, lkft-triage@lists.linaro.org,
	"Pavel Machek" <pavel@denx.de>,
	"Jon Hunter" <jonathanh@nvidia.com>,
	"Florian Fainelli" <f.fainelli@gmail.com>,
	stable <stable@vger.kernel.org>,
	"Josh Poimboeuf" <jpoimboe@redhat.com>
Subject: Re: [PATCH 5.15 000/923] 5.15.3-rc3 review
Date: Thu, 18 Nov 2021 13:11:09 +0100	[thread overview]
Message-ID: <YZZC3Shc0XA/gHK9@hirez.programming.kicks-ass.net> (raw)
In-Reply-To: <YZYfYOcqNqOyZ8Yo@hirez.programming.kicks-ass.net>

On Thu, Nov 18, 2021 at 10:39:44AM +0100, Peter Zijlstra wrote:
> On Thu, Nov 18, 2021 at 09:18:52AM +0100, Peter Zijlstra wrote:
> > On Thu, Nov 18, 2021 at 09:06:27AM +0100, Peter Zijlstra wrote:
> > > On Wed, Nov 17, 2021 at 03:50:17PM -0800, Linus Torvalds wrote:
> > > 
> > > > I really don't think the WCHAN code should use unwinders at all. It's
> > > > too damn fragile, and it's too easily triggered from user space.
> > > 
> > > On x86, esp. with ORC, it pretty much has to. The thing is, the ORC
> > > unwinder has been very stable so far. I'm guessing there's some really
> > > stupid thing going on, like for example trying to unwind a freed stack.
> > > 
> > > I *just* managed to reproduce, so let me go have a poke.
> > 
> > Confirmed, with the below it no longer reproduces. Now, let me go undo
> > that and fix the unwinder to not explode while trying to unwind nothing.
> 
> OK, so the bug is firmly with 5d1ceb3969b6 ("x86: Fix __get_wchan() for
> !STACKTRACE") which lost the try_get_task_stack() that stack_trace_*()
> does.
> 
> We can ofc trivially re-instate that, but I'm now running with the
> below which I suppose is a better fix, hmm?
> 
> (obv I still need to look a the other two unwinders)

I now have the below, the only thing missing is that there's a
user_mode() call on a stack based regs. Now on x86_64 we can
__get_kernel_nofault() regs->cs and call it a day, but on i386 we have
to also fetch regs->flags.

Is this really the way to go?

The thing is, we're already very careful about making sure the addresses
are within the stack range before touching them, it's just that when we
free the task stack we end up with trivially dodgy state.

---
 arch/x86/kernel/unwind_frame.c | 31 ++++++++++++++++++------
 arch/x86/kernel/unwind_guess.c | 18 ++++++++++++--
 arch/x86/kernel/unwind_orc.c   | 54 +++++++++++++++++++++++++++++++++---------
 3 files changed, 83 insertions(+), 20 deletions(-)

diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c
index 8e1c50c86e5d..0c5cbc8ee300 100644
--- a/arch/x86/kernel/unwind_frame.c
+++ b/arch/x86/kernel/unwind_frame.c
@@ -215,10 +215,11 @@ static bool update_stack_state(struct unwind_state *state,
 	 * that info->next_sp could point to an empty stack and the next bp
 	 * could be on a subsequent stack.
 	 */
-	while (!on_stack(info, frame, len))
+	while (!on_stack(info, frame, len)) {
 		if (get_stack_info(info->next_sp, state->task, info,
 				   &state->stack_mask))
 			return false;
+	}
 
 	/* Make sure it only unwinds up and doesn't overlap the prev frame: */
 	if (state->orig_sp && state->stack_info.type == prev_type &&
@@ -235,11 +236,16 @@ static bool update_stack_state(struct unwind_state *state,
 	}
 
 	/* Save the return address: */
-	if (state->regs && user_mode(state->regs))
+	// regs deref
+	if (state->regs && user_mode(state->regs)) {
 		state->ip = 0;
-	else {
+	} else {
 		addr_p = unwind_get_return_address_ptr(state);
-		addr = READ_ONCE_TASK_STACK(state->task, *addr_p);
+
+		pagefault_disable();
+		__get_kernel_nofault(&addr, addr_p, unsigned long, Efault);
+		pagefault_enable();
+
 		state->ip = unwind_recover_ret_addr(state, addr, addr_p);
 	}
 
@@ -248,6 +254,10 @@ static bool update_stack_state(struct unwind_state *state,
 		state->orig_sp = frame;
 
 	return true;
+
+Efault:
+	pagefault_enable();
+	return false;
 }
 
 bool unwind_next_frame(struct unwind_state *state)
@@ -259,6 +269,7 @@ bool unwind_next_frame(struct unwind_state *state)
 		return false;
 
 	/* Have we reached the end? */
+	// regs deref
 	if (state->regs && user_mode(state->regs))
 		goto the_end;
 
@@ -295,9 +306,13 @@ bool unwind_next_frame(struct unwind_state *state)
 		next_bp = state->next_bp;
 		state->next_bp = NULL;
 	} else if (state->regs) {
-		next_bp = (unsigned long *)state->regs->bp;
+		pagefault_disable();
+		__get_kernel_nofault(&next_bp, &state->regs->bp, unsigned long, Efault);
+		pagefault_enable();
 	} else {
-		next_bp = (unsigned long *)READ_ONCE_TASK_STACK(state->task, *state->bp);
+		pagefault_disable();
+		__get_kernel_nofault(&next_bp, state->bp, unsigned long, Efault);
+		pagefault_enable();
 	}
 
 	/* Move to the next frame if it's safe: */
@@ -306,6 +321,8 @@ bool unwind_next_frame(struct unwind_state *state)
 
 	return true;
 
+Efault:
+	pagefault_enable();
 bad_address:
 	state->error = true;
 
@@ -402,7 +419,7 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
 	 */
 	while (!unwind_done(state) &&
 	       (!on_stack(&state->stack_info, first_frame, sizeof(long)) ||
-			(state->next_bp == NULL && state->bp < first_frame)))
+		(state->next_bp == NULL && state->bp < first_frame)))
 		unwind_next_frame(state);
 }
 EXPORT_SYMBOL_GPL(__unwind_start);
diff --git a/arch/x86/kernel/unwind_guess.c b/arch/x86/kernel/unwind_guess.c
index 884d68a6e714..22153d91e868 100644
--- a/arch/x86/kernel/unwind_guess.c
+++ b/arch/x86/kernel/unwind_guess.c
@@ -13,9 +13,15 @@ unsigned long unwind_get_return_address(struct unwind_state *state)
 	if (unwind_done(state))
 		return 0;
 
-	addr = READ_ONCE_NOCHECK(*state->sp);
+	pagefault_disable();
+	__get_kernel_nofault(&addr, state->sp, unsigned long, Efault);
+	pagefault_enable();
 
 	return unwind_recover_ret_addr(state, addr, state->sp);
+
+Efault:
+	pagefault_enable();
+	return 0;
 }
 EXPORT_SYMBOL_GPL(unwind_get_return_address);
 
@@ -33,7 +39,11 @@ bool unwind_next_frame(struct unwind_state *state)
 
 	do {
 		for (state->sp++; state->sp < info->end; state->sp++) {
-			unsigned long addr = READ_ONCE_NOCHECK(*state->sp);
+			unsigned long addr;
+
+			pagefault_disable();
+			__get_kernel_nofault(&addr, state->sp, unsigned long, Efault);
+			pagefault_enable();
 
 			if (__kernel_text_address(addr))
 				return true;
@@ -45,6 +55,10 @@ bool unwind_next_frame(struct unwind_state *state)
 				 &state->stack_mask));
 
 	return false;
+
+Efault:
+	pagefault_enable();
+	return false;
 }
 EXPORT_SYMBOL_GPL(unwind_next_frame);
 
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index e6f7592790af..b0b5ac530450 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -352,8 +352,14 @@ static bool deref_stack_reg(struct unwind_state *state, unsigned long addr,
 	if (!stack_access_ok(state, addr, sizeof(long)))
 		return false;
 
-	*val = READ_ONCE_NOCHECK(*(unsigned long *)addr);
+	pagefault_disable();
+	__get_kernel_nofault(val, addr, unsigned long, Efault);
+	pagefault_enable();
 	return true;
+
+Efault:
+	pagefault_enable();
+	return false;
 }
 
 static bool deref_stack_regs(struct unwind_state *state, unsigned long addr,
@@ -367,9 +373,16 @@ static bool deref_stack_regs(struct unwind_state *state, unsigned long addr,
 	if (!stack_access_ok(state, addr, sizeof(struct pt_regs)))
 		return false;
 
-	*ip = READ_ONCE_NOCHECK(regs->ip);
-	*sp = READ_ONCE_NOCHECK(regs->sp);
+	pagefault_disable();
+	__get_kernel_nofault(ip, &regs->ip, unsigned long, Efault);
+	__get_kernel_nofault(sp, &regs->sp, unsigned long, Efault);
+	pagefault_enable();
+
 	return true;
+
+Efault:
+	pagefault_enable();
+	return false;
 }
 
 static bool deref_stack_iret_regs(struct unwind_state *state, unsigned long addr,
@@ -380,9 +393,16 @@ static bool deref_stack_iret_regs(struct unwind_state *state, unsigned long addr
 	if (!stack_access_ok(state, addr, IRET_FRAME_SIZE))
 		return false;
 
-	*ip = READ_ONCE_NOCHECK(regs->ip);
-	*sp = READ_ONCE_NOCHECK(regs->sp);
+	pagefault_disable();
+	__get_kernel_nofault(ip, &regs->ip, unsigned long, Efault);
+	__get_kernel_nofault(sp, &regs->sp, unsigned long, Efault);
+	pagefault_enable();
+
 	return true;
+
+Efault:
+	pagefault_enable();
+	return false;
 }
 
 /*
@@ -396,22 +416,27 @@ static bool deref_stack_iret_regs(struct unwind_state *state, unsigned long addr
 static bool get_reg(struct unwind_state *state, unsigned int reg_off,
 		    unsigned long *val)
 {
-	unsigned int reg = reg_off/8;
-
 	if (!state->regs)
 		return false;
 
+	pagefault_disable();
 	if (state->full_regs) {
-		*val = READ_ONCE_NOCHECK(((unsigned long *)state->regs)[reg]);
+		__get_kernel_nofault(val, (void *)state->regs + reg_off, unsigned long, Efault);
+		pagefault_enable();
 		return true;
 	}
 
 	if (state->prev_regs) {
-		*val = READ_ONCE_NOCHECK(((unsigned long *)state->prev_regs)[reg]);
+		__get_kernel_nofault(val, (void *)state->prev_regs + reg_off, unsigned long, Efault);
+		pagefault_enable();
 		return true;
 	}
 
 	return false;
+
+Efault:
+	pagefault_enable();
+	return false;
 }
 
 bool unwind_next_frame(struct unwind_state *state)
@@ -428,6 +453,7 @@ bool unwind_next_frame(struct unwind_state *state)
 	preempt_disable();
 
 	/* End-of-stack check for user tasks: */
+	// regs deref
 	if (state->regs && user_mode(state->regs))
 		goto the_end;
 
@@ -673,8 +699,12 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
 		struct inactive_task_frame *frame = (void *)task->thread.sp;
 
 		state->sp = task->thread.sp + sizeof(*frame);
-		state->bp = READ_ONCE_NOCHECK(frame->bp);
-		state->ip = READ_ONCE_NOCHECK(frame->ret_addr);
+
+		pagefault_disable();
+		__get_kernel_nofault(&state->bp, &frame->bp, unsigned long, Efault);
+		__get_kernel_nofault(&state->ip, &frame->ret_addr, unsigned long, Efault);
+		pagefault_enable();
+
 		state->signal = (void *)state->ip == ret_from_fork;
 	}
 
@@ -713,6 +743,8 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
 
 	return;
 
+Efault:
+	pagefault_enable();
 err:
 	state->error = true;
 the_end:

  parent reply	other threads:[~2021-11-18 12:12 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-11-17 10:19 [PATCH 5.15 000/923] 5.15.3-rc3 review Greg Kroah-Hartman
2021-11-17 13:37 ` Fox Chen
2021-11-17 14:13 ` Guenter Roeck
2021-11-17 14:43   ` Greg Kroah-Hartman
2021-11-17 14:54     ` Guenter Roeck
2021-11-17 17:34 ` Jon Hunter
2021-11-17 18:51 ` Florian Fainelli
2021-11-17 20:25 ` Holger Kiehl
2021-11-18  8:14   ` Greg Kroah-Hartman
2021-11-18 14:08     ` Holger Kiehl
2021-11-18 17:08       ` Greg Kroah-Hartman
2021-11-17 20:35 ` Guenter Roeck
2021-11-17 21:32 ` Justin Forbes
2021-11-17 23:32   ` Holger Hoffstätte
2021-11-17 23:50     ` Linus Torvalds
2021-11-18  0:16       ` Kees Cook
2021-11-18  6:26         ` Guenter Roeck
2021-11-18  8:14           ` Greg Kroah-Hartman
2021-11-18  8:12         ` Greg Kroah-Hartman
2021-11-18 17:17           ` Kees Cook
2021-11-18  8:06       ` Peter Zijlstra
2021-11-18  8:18         ` Peter Zijlstra
2021-11-18  9:39           ` Peter Zijlstra
2021-11-18 10:12             ` Peter Zijlstra
2021-11-18 12:11             ` Peter Zijlstra [this message]
2021-11-19  2:04               ` Josh Poimboeuf
2021-11-19  9:29                 ` [PATCH] x86: Pin task-stack in __get_wchan() Peter Zijlstra
2021-11-19 10:02                   ` Qi Zheng
2021-11-19 10:22                     ` Peter Zijlstra
2021-11-19 10:26                       ` Qi Zheng
2021-11-19 18:16                   ` Linus Torvalds
2021-11-19 18:35                   ` Josh Poimboeuf
2021-11-22  9:32                     ` Peter Zijlstra
2021-11-22 16:14                       ` Josh Poimboeuf
2021-11-18  5:45 ` [PATCH 5.15 000/923] 5.15.3-rc3 review Naresh Kamboju

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=YZZC3Shc0XA/gHK9@hirez.programming.kicks-ass.net \
    --to=peterz@infradead.org \
    --cc=akpm@linux-foundation.org \
    --cc=f.fainelli@gmail.com \
    --cc=gregkh@linuxfoundation.org \
    --cc=holger@applied-asynchrony.com \
    --cc=jmforbes@linuxtx.org \
    --cc=jonathanh@nvidia.com \
    --cc=jpoimboe@redhat.com \
    --cc=keescook@chromium.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux@roeck-us.net \
    --cc=lkft-triage@lists.linaro.org \
    --cc=patches@kernelci.org \
    --cc=pavel@denx.de \
    --cc=shuah@kernel.org \
    --cc=stable@vger.kernel.org \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    --cc=zhengqi.arch@bytedance.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).