bpf.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Masami Hiramatsu <mhiramat@kernel.org>
To: Steven Rostedt <rostedt@goodmis.org>
Cc: "Daniel Xu" <dxu@dxuuu.xyz>,
	linux-kernel@vger.kernel.org,
	"bpf@vger.kernel.org" <bpf@vger.kernel.org>,
	kuba@kernel.org
Subject: Re: Broken kretprobe stack traces
Date: Thu, 4 Mar 2021 22:19:47 +0900	[thread overview]
Message-ID: <20210304221947.5a177ce2e1e94314e57c38a4@kernel.org> (raw)
In-Reply-To: <20210303092604.59aea82c@gandalf.local.home>

On Wed, 3 Mar 2021 09:26:04 -0500
Steven Rostedt <rostedt@goodmis.org> wrote:

> On Wed, 3 Mar 2021 13:48:28 +0900
> Masami Hiramatsu <mhiramat@kernel.org> wrote:
> 
> > 
> > > 
> > > I think (can't prove) this used to work:  
> 
> Would be good to find out if it did.
> 
> > 
> > I'm not sure the bpftrace had correctly handled it or not.
> > 
> > > 
> > >     # bpftrace -e 'kretprobe:__tcp_retransmit_skb { @[kstack()] = count() }'
> > >     Attaching 1 probe...
> > >     ^C
> > > 
> > >     @[
> > >         kretprobe_trampoline+0
> > >     ]: 1  
> > 
> > Would you know how the bpftrace stacktracer rewinds the stack entries?
> > FYI, ftrace does it in trace_seq_print_sym()@kernel/trace/trace_output.c
> > 
> 
> The difference between trace events and normal function tracing stack
> traces is that it keeps its original return address. But kretprobes (and
> function graph tracing, and some bpf trampolines too) modify the return
> pointer, and that could possibly cause havoc with the stack trace.

BTW, I think if the stack tracer passes the nth of kretprobe_trampoline
or a cursor, kretprobe can find the correct return address from given task.

I've made a patch to do that only for the CONFIG_ARCH_STACKWALK=y

Here is an example on x86. 

 # echo r vfs_read > kprobe_events 
 # echo stacktrace > events/kprobes/r_vfs_read_0/trigger 
 # echo 1 > events/kprobes/r_vfs_read_0/enable 
 # echo 1 > options/sym-offset 
 # less trace 
...

              sh-132     [007] ...1    22.524917: <stack trace>
 => kretprobe_dispatcher+0x7d/0xc0
 => __kretprobe_trampoline_handler+0xdb/0x1b0
 => trampoline_handler+0x48/0x60
 => kretprobe_trampoline+0x2a/0x50
 => ksys_read+0x70/0xf0
 => __x64_sys_read+0x1a/0x20
 => do_syscall_64+0x38/0x50
 => entry_SYSCALL_64_after_hwframe+0x44/0xae
 => 0
 => 0

------

From 77a785a3a0791171b570830d0b2f099f8a4ba337 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Thu, 4 Mar 2021 14:19:24 +0900
Subject: [PATCH] kprobes: stacktrace: Recover the address changed by kretprobe

Recover the return address on the stack which changed by the
kretprobe.

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
---
 include/linux/kprobes.h |  3 ++
 kernel/kprobes.c        | 81 +++++++++++++++++++++++++++--------------
 kernel/stacktrace.c     | 26 +++++++++++++
 3 files changed, 82 insertions(+), 28 deletions(-)

diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 1883a4a9f16a..a022e507d829 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -205,6 +205,9 @@ extern void arch_prepare_kretprobe(struct kretprobe_instance *ri,
 				   struct pt_regs *regs);
 extern int arch_trampoline_kprobe(struct kprobe *p);
 
+unsigned long kretprobe_real_stack_tsk(struct task_struct *tsk,
+				       unsigned long addr,
+				       struct llist_node **cur);
 /* If the trampoline handler called from a kprobe, use this version */
 unsigned long __kretprobe_trampoline_handler(struct pt_regs *regs,
 				void *trampoline_address,
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 745f08fdd7a6..b3d9dbd6086f 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1863,46 +1863,56 @@ unsigned long __weak arch_deref_entry_point(void *entry)
 
 #ifdef CONFIG_KRETPROBES
 
-unsigned long __kretprobe_trampoline_handler(struct pt_regs *regs,
-					     void *trampoline_address,
-					     void *frame_pointer)
+/* This assumes the tsk is current or the task which is not running. */
+unsigned long kretprobe_real_stack_tsk(struct task_struct *tsk,
+				       unsigned long addr,
+				       struct llist_node **cur)
 {
-	kprobe_opcode_t *correct_ret_addr = NULL;
 	struct kretprobe_instance *ri = NULL;
-	struct llist_node *first, *node;
-	struct kretprobe *rp;
+	struct llist_node *node = *cur;
 
-	/* Find all nodes for this frame. */
-	first = node = current->kretprobe_instances.first;
-	while (node) {
-		ri = container_of(node, struct kretprobe_instance, llist);
+	if (addr != (unsigned long)&kretprobe_trampoline)
+		return addr;
 
-		BUG_ON(ri->fp != frame_pointer);
+	if (!node)
+		node = tsk->kretprobe_instances.first;
+	else
+		node = node->next;
 
-		if (ri->ret_addr != trampoline_address) {
-			correct_ret_addr = ri->ret_addr;
-			/*
-			 * This is the real return address. Any other
-			 * instances associated with this task are for
-			 * other calls deeper on the call stack
-			 */
-			goto found;
+	while (node) {
+		ri = container_of(node, struct kretprobe_instance, llist);
+		if (ri->ret_addr != (void *)&kretprobe_trampoline) {
+			*cur = node;
+			return (unsigned long)ri->ret_addr;
 		}
-
 		node = node->next;
 	}
-	pr_err("Oops! Kretprobe fails to find correct return address.\n");
-	BUG_ON(1);
+	return 0;
+}
 
-found:
-	/* Unlink all nodes for this frame. */
-	current->kretprobe_instances.first = node->next;
-	node->next = NULL;
+unsigned long __kretprobe_trampoline_handler(struct pt_regs *regs,
+					     void *trampoline_address,
+					     void *frame_pointer)
+{
+	kprobe_opcode_t *correct_ret_addr = NULL;
+	struct kretprobe_instance *ri = NULL;
+	struct llist_node *first, *node = NULL;
+	struct kretprobe *rp;
 
-	/* Run them..  */
+	/* Find correct address and all nodes for this frame. */
+	correct_ret_addr = (void*)kretprobe_real_stack_tsk(current,
+				(unsigned long)&kretprobe_trampoline, &node);
+	if (!correct_ret_addr) {
+		pr_err("Oops! Kretprobe fails to find correct return address.\n");
+		BUG_ON(1);
+	}
+
+	/* Run them. */
+	first = current->kretprobe_instances.first;
 	while (first) {
 		ri = container_of(first, struct kretprobe_instance, llist);
-		first = first->next;
+
+		BUG_ON(ri->fp != frame_pointer);
 
 		rp = get_kretprobe(ri);
 		if (rp && rp->handler) {
@@ -1913,6 +1923,21 @@ unsigned long __kretprobe_trampoline_handler(struct pt_regs *regs,
 			rp->handler(ri, regs);
 			__this_cpu_write(current_kprobe, prev);
 		}
+		if (first == node)
+			break;
+
+		first = first->next;
+	}
+
+	/* Unlink all nodes for this frame. */
+	first = current->kretprobe_instances.first;
+	current->kretprobe_instances.first = node->next;
+	node->next = NULL;
+
+	/* Recycle them.  */
+	while (first) {
+		ri = container_of(first, struct kretprobe_instance, llist);
+		first = first->next;
 
 		recycle_rp_inst(ri);
 	}
diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c
index 9f8117c7cfdd..416f357e64b8 100644
--- a/kernel/stacktrace.c
+++ b/kernel/stacktrace.c
@@ -13,6 +13,7 @@
 #include <linux/export.h>
 #include <linux/kallsyms.h>
 #include <linux/stacktrace.h>
+#include <linux/kprobes.h>
 
 /**
  * stack_trace_print - Print the entries in the stack trace
@@ -76,6 +77,10 @@ struct stacktrace_cookie {
 	unsigned int	size;
 	unsigned int	skip;
 	unsigned int	len;
+#ifdef CONFIG_KRETPROBES
+	struct llist_node *cur;
+	struct task_struct *tsk;
+#endif
 };
 
 static bool stack_trace_consume_entry(void *cookie, unsigned long addr)
@@ -89,6 +94,7 @@ static bool stack_trace_consume_entry(void *cookie, unsigned long addr)
 		c->skip--;
 		return true;
 	}
+	addr = kretprobe_real_stack_tsk(c->tsk, addr, &c->cur);
 	c->store[c->len++] = addr;
 	return c->len < c->size;
 }
@@ -116,6 +122,10 @@ unsigned int stack_trace_save(unsigned long *store, unsigned int size,
 		.store	= store,
 		.size	= size,
 		.skip	= skipnr + 1,
+#ifdef CONFIG_KRETPROBES
+		.cur	= NULL,
+		.tsk	= current,
+#endif
 	};
 
 	arch_stack_walk(consume_entry, &c, current, NULL);
@@ -141,6 +151,10 @@ unsigned int stack_trace_save_tsk(struct task_struct *tsk, unsigned long *store,
 		.size	= size,
 		/* skip this function if they are tracing us */
 		.skip	= skipnr + (current == tsk),
+#ifdef CONFIG_KRETPROBES
+		.cur	= NULL,
+		.tsk	= tsk,
+#endif
 	};
 
 	if (!try_get_task_stack(tsk))
@@ -168,6 +182,10 @@ unsigned int stack_trace_save_regs(struct pt_regs *regs, unsigned long *store,
 		.store	= store,
 		.size	= size,
 		.skip	= skipnr,
+#ifdef CONFIG_KRETPROBES
+		.cur	= NULL,
+		.tsk	= current,
+#endif
 	};
 
 	arch_stack_walk(consume_entry, &c, current, regs);
@@ -194,6 +212,10 @@ int stack_trace_save_tsk_reliable(struct task_struct *tsk, unsigned long *store,
 	struct stacktrace_cookie c = {
 		.store	= store,
 		.size	= size,
+#ifdef CONFIG_KRETPROBES
+		.cur	= NULL,
+		.tsk	= tsk,
+#endif
 	};
 	int ret;
 
@@ -224,6 +246,10 @@ unsigned int stack_trace_save_user(unsigned long *store, unsigned int size)
 	struct stacktrace_cookie c = {
 		.store	= store,
 		.size	= size,
+#ifdef CONFIG_KRETPROBES
+		.cur	= NULL,
+		.tsk	= current,
+#endif
 	};
 	mm_segment_t fs;
 
-- 
2.25.1




Thank you,

-- 
Masami Hiramatsu <mhiramat@kernel.org>

  parent reply	other threads:[~2021-03-04 13:21 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-03-03  1:15 Broken kretprobe stack traces Daniel Xu
2021-03-03  4:48 ` Masami Hiramatsu
2021-03-03 14:26   ` Steven Rostedt
2021-03-03 19:58     ` Daniel Xu
2021-03-03 20:13       ` Daniel Xu
2021-03-03 20:37         ` Steven Rostedt
2021-03-04  2:18           ` Daniel Xu
2021-03-04 19:04             ` Daniel Xu
2021-03-04 13:19     ` Masami Hiramatsu [this message]
2021-03-04 15:22       ` [PATCH] kprobes: stacktrace: Recover the address changed by kretprobe kernel test robot
2021-03-04 17:37       ` kernel test robot
2021-03-04 20:25       ` kernel test robot
2021-03-03 19:57   ` Broken kretprobe stack traces Daniel Xu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210304221947.5a177ce2e1e94314e57c38a4@kernel.org \
    --to=mhiramat@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=dxu@dxuuu.xyz \
    --cc=kuba@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=rostedt@goodmis.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).