linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Masami Hiramatsu <mhiramat@kernel.org>
To: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Daniel Xu <dxu@dxuuu.xyz>, Steven Rostedt <rostedt@goodmis.org>,
	Ingo Molnar <mingo@kernel.org>, X86 ML <x86@kernel.org>,
	linux-kernel@vger.kernel.org, bpf@vger.kernel.org,
	kuba@kernel.org, mingo@redhat.com, ast@kernel.org,
	tglx@linutronix.de, kernel-team@fb.com, yhs@fb.com
Subject: Re: [PATCH -tip 0/5] kprobes: Fix stacktrace in kretprobes
Date: Thu, 11 Mar 2021 00:55:09 +0900	[thread overview]
Message-ID: <20210311005509.0a1a65df0d2d6c7da73a9288@kernel.org> (raw)
In-Reply-To: <20210310150845.7kctaox34yrfyjxt@treble>

Hi Josh and Daniel,

On Wed, 10 Mar 2021 09:08:45 -0600
Josh Poimboeuf <jpoimboe@redhat.com> wrote:

> On Wed, Mar 10, 2021 at 06:57:34PM +0900, Masami Hiramatsu wrote:
> > > If I understand correctly, for #1 you need an unwind hint which treats
> > > the instruction *after* the "pushq %rsp" as the beginning of the
> > > function.
> > 
> > Thanks for the patch. In that case, should I still change the stack allocation?
> > Or can I continue to use a series of "push/pop" ?
> 
> You can continue to use push/pop.  Objtool is only getting confused by
> the unbalanced stack of the function (more pushes than pops).  The
> unwind hint should fix that.

With you patch, I made a fix for ORC unwinder. I confirmed it works with
2 multiple kretprobes on the call path like this ;

cd /sys/kernel/debug/tracing/
echo r vfs_read >> kprobe_events
echo r full_proxy_read >> kprobe_events
echo traceoff:1 > events/kprobes/r_vfs_read_0/trigger
echo stacktrace:1 > events/kprobes/r_full_proxy_read_0/trigger
echo 1 > events/kprobes/enable
echo 1 > options/sym-offset
cat /sys/kernel/debug/kprobes/list
echo 0 > events/kprobes/enable
cat trace

# tracer: nop
#
# entries-in-buffer/entries-written: 3/3   #P:8
#
#                                _-----=> irqs-off
#                               / _----=> need-resched
#                              | / _---=> hardirq/softirq
#                              || / _--=> preempt-depth
#                              ||| /     delay
#           TASK-PID     CPU#  ||||   TIMESTAMP  FUNCTION
#              | |         |   ||||      |         |
           <...>-136     [004] ...1   648.481281: r_full_proxy_read_0: (vfs_read+0xab/0x1a0 <- full_proxy_read)
           <...>-136     [004] ...1   648.481310: <stack trace>
 => kretprobe_trace_func+0x209/0x2f0
 => kretprobe_dispatcher+0x4a/0x70
 => __kretprobe_trampoline_handler+0xcd/0x170
 => trampoline_handler+0x3d/0x50
 => kretprobe_trampoline+0x25/0x50
 => vfs_read+0xab/0x1a0
 => ksys_read+0x5f/0xe0
 => do_syscall_64+0x33/0x40
 => entry_SYSCALL_64_after_hwframe+0x44/0xae
 => 0
 => 0
 => 0
 => 0
 => 0
 => 0
 => 0

I didn't tested it with bpftrace, but this also handles 
regs->ip == kretprobe_trampoline case. So it should work.

commit aa452d999b524b1851f69cc947be3e1a2f3ca1ec
Author: Masami Hiramatsu <mhiramat@kernel.org>
Date:   Sat Mar 6 08:34:51 2021 +0900

    x86/unwind/orc: Fixup kretprobe trampoline entry
    
    Since the kretprobe replaces the function return address with
    the kretprobe_trampoline on the stack, the ORC unwinder can not
    continue the stack unwinding at that point.
    
    To fix this issue, correct state->ip as like as function-graph
    tracer in the unwind_next_frame().
    
    Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>

diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h
index 70fc159ebe69..ab5e45b848d5 100644
--- a/arch/x86/include/asm/unwind.h
+++ b/arch/x86/include/asm/unwind.h
@@ -4,6 +4,7 @@
 
 #include <linux/sched.h>
 #include <linux/ftrace.h>
+#include <linux/llist.h>
 #include <asm/ptrace.h>
 #include <asm/stacktrace.h>
 
@@ -20,6 +21,9 @@ struct unwind_state {
 	bool signal, full_regs;
 	unsigned long sp, bp, ip;
 	struct pt_regs *regs, *prev_regs;
+#if defined(CONFIG_KRETPROBES)
+	struct llist_node *kr_iter;
+#endif
 #elif defined(CONFIG_UNWINDER_FRAME_POINTER)
 	bool got_irq;
 	unsigned long *bp, *orig_sp, ip;
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index 2a1d47f47eee..94869516cfc0 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -2,6 +2,7 @@
 #include <linux/objtool.h>
 #include <linux/module.h>
 #include <linux/sort.h>
+#include <linux/kprobes.h>
 #include <asm/ptrace.h>
 #include <asm/stacktrace.h>
 #include <asm/unwind.h>
@@ -414,6 +415,30 @@ static bool get_reg(struct unwind_state *state, unsigned int reg_off,
 	return false;
 }
 
+#ifdef CONFIG_KRETPROBES
+static unsigned long orc_kretprobe_correct_ip(struct unwind_state *state)
+{
+	return kretprobe_find_ret_addr(
+			(unsigned long)kretprobe_trampoline_addr(),
+			state->task, &state->kr_iter);
+}
+
+static bool is_kretprobe_trampoline_address(unsigned long ip)
+{
+	return ip == (unsigned long)kretprobe_trampoline_addr();
+}
+#else
+static unsigned long orc_kretprobe_correct_ip(struct unwind_state *state)
+{
+	return state->ip;
+}
+
+static bool is_kretprobe_trampoline_address(unsigned long ip)
+{
+	return false;
+}
+#endif
+
 bool unwind_next_frame(struct unwind_state *state)
 {
 	unsigned long ip_p, sp, tmp, orig_ip = state->ip, prev_sp = state->sp;
@@ -536,6 +561,18 @@ bool unwind_next_frame(struct unwind_state *state)
 
 		state->ip = ftrace_graph_ret_addr(state->task, &state->graph_idx,
 						  state->ip, (void *)ip_p);
+		/*
+		 * There are special cases when the stack unwinder is called
+		 * from the kretprobe handler or the interrupt handler which
+		 * occurs in the kretprobe trampoline code. In those cases,
+		 * %sp is shown on the stack instead of the return address.
+		 * Or, when the unwinder find the return address is replaced
+		 * by kretprobe_trampoline.
+		 * In those cases, correct address can be found in kretprobe.
+		 */
+		if (state->ip == sp ||
+		    is_kretprobe_trampoline_address(state->ip))
+			state->ip = orc_kretprobe_correct_ip(state);
 
 		state->sp = sp;
 		state->regs = NULL;
@@ -649,6 +686,12 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
 		state->full_regs = true;
 		state->signal = true;
 
+		/*
+		 * When the unwinder called with regs from kretprobe handler,
+		 * the regs->ip starts from kretprobe_trampoline address.
+		 */
+		if (is_kretprobe_trampoline_address(state->ip))
+			state->ip = orc_kretprobe_correct_ip(state);
 	} else if (task == current) {
 		asm volatile("lea (%%rip), %0\n\t"
 			     "mov %%rsp, %1\n\t"
-- 
Masami Hiramatsu <mhiramat@kernel.org>

  reply	other threads:[~2021-03-10 15:55 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-03-05 15:38 [PATCH -tip 0/5] kprobes: Fix stacktrace in kretprobes Masami Hiramatsu
2021-03-05 15:39 ` [PATCH -tip 1/5] ia64: kprobes: Fix to pass correct trampoline address to the handler Masami Hiramatsu
2021-03-05 15:39 ` [PATCH -tip 2/5] kprobes: treewide: Replace arch_deref_entry_point() with dereference_function_descriptor() Masami Hiramatsu
2021-03-05 15:39 ` [PATCH -tip 3/5] kprobes: treewide: Remove trampoline_address from kretprobe_trampoline_handler() Masami Hiramatsu
2021-03-10 14:21   ` Miroslav Benes
2021-03-10 15:42     ` Masami Hiramatsu
2021-03-11 13:37       ` Masami Hiramatsu
2021-03-05 15:39 ` [PATCH -tip 4/5] kprobes: stacktrace: Recover the address changed by kretprobe Masami Hiramatsu
2021-03-05 15:39 ` [PATCH -tip 5/5] tracing: Remove kretprobe unknown indicator from stacktrace Masami Hiramatsu
2021-03-05 15:44   ` Steven Rostedt
2021-03-05 19:16 ` [PATCH -tip 0/5] kprobes: Fix stacktrace in kretprobes Daniel Xu
2021-03-06  1:13   ` Masami Hiramatsu
2021-03-07 21:23     ` Daniel Xu
2021-03-08  2:52       ` Masami Hiramatsu
2021-03-08 13:05         ` Masami Hiramatsu
2021-03-09  1:19         ` Josh Poimboeuf
2021-03-10  9:57           ` Masami Hiramatsu
2021-03-10 15:08             ` Josh Poimboeuf
2021-03-10 15:55               ` Masami Hiramatsu [this message]
2021-03-10 18:31                 ` Josh Poimboeuf
2021-03-11  0:20                   ` Masami Hiramatsu
2021-03-11  1:06                     ` Josh Poimboeuf
2021-03-11  1:54                       ` Masami Hiramatsu
2021-03-11 16:51                         ` Josh Poimboeuf
2021-03-12  3:22                           ` Masami Hiramatsu
2021-03-10 22:46                 ` Daniel Xu
2021-03-09 21:34         ` Daniel Xu
2021-03-10 10:50           ` Masami Hiramatsu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210311005509.0a1a65df0d2d6c7da73a9288@kernel.org \
    --to=mhiramat@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=dxu@dxuuu.xyz \
    --cc=jpoimboe@redhat.com \
    --cc=kernel-team@fb.com \
    --cc=kuba@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=mingo@redhat.com \
    --cc=rostedt@goodmis.org \
    --cc=tglx@linutronix.de \
    --cc=x86@kernel.org \
    --cc=yhs@fb.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).