From: Masami Hiramatsu <mhiramat@kernel.org>
To: Steven Rostedt <rostedt@goodmis.org>
Cc: "Daniel Xu" <dxu@dxuuu.xyz>,
linux-kernel@vger.kernel.org,
"bpf@vger.kernel.org" <bpf@vger.kernel.org>,
kuba@kernel.org
Subject: Re: Broken kretprobe stack traces
Date: Thu, 4 Mar 2021 22:19:47 +0900 [thread overview]
Message-ID: <20210304221947.5a177ce2e1e94314e57c38a4@kernel.org> (raw)
In-Reply-To: <20210303092604.59aea82c@gandalf.local.home>
On Wed, 3 Mar 2021 09:26:04 -0500
Steven Rostedt <rostedt@goodmis.org> wrote:
> On Wed, 3 Mar 2021 13:48:28 +0900
> Masami Hiramatsu <mhiramat@kernel.org> wrote:
>
> >
> > >
> > > I think (can't prove) this used to work:
>
> Would be good to find out if it did.
>
> >
> > I'm not sure the bpftrace had correctly handled it or not.
> >
> > >
> > > # bpftrace -e 'kretprobe:__tcp_retransmit_skb { @[kstack()] = count() }'
> > > Attaching 1 probe...
> > > ^C
> > >
> > > @[
> > > kretprobe_trampoline+0
> > > ]: 1
> >
> > Would you know how the bpftrace stacktracer rewinds the stack entries?
> > FYI, ftrace does it in trace_seq_print_sym()@kernel/trace/trace_output.c
> >
>
> The difference between trace events and normal function tracing stack
> traces is that it keeps its original return address. But kretprobes (and
> function graph tracing, and some bpf trampolines too) modify the return
> pointer, and that could possibly cause havoc with the stack trace.
BTW, I think if the stack tracer passes the nth of kretprobe_trampoline
or a cursor, kretprobe can find the correct return address from given task.
I've made a patch to do that only for the CONFIG_ARCH_STACKWALK=y
Here is an example on x86.
# echo r vfs_read > kprobe_events
# echo stacktrace > events/kprobes/r_vfs_read_0/trigger
# echo 1 > events/kprobes/r_vfs_read_0/enable
# echo 1 > options/sym-offset
# less trace
...
sh-132 [007] ...1 22.524917: <stack trace>
=> kretprobe_dispatcher+0x7d/0xc0
=> __kretprobe_trampoline_handler+0xdb/0x1b0
=> trampoline_handler+0x48/0x60
=> kretprobe_trampoline+0x2a/0x50
=> ksys_read+0x70/0xf0
=> __x64_sys_read+0x1a/0x20
=> do_syscall_64+0x38/0x50
=> entry_SYSCALL_64_after_hwframe+0x44/0xae
=> 0
=> 0
------
From 77a785a3a0791171b570830d0b2f099f8a4ba337 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Thu, 4 Mar 2021 14:19:24 +0900
Subject: [PATCH] kprobes: stacktrace: Recover the address changed by kretprobe
Recover the return address on the stack which changed by the
kretprobe.
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
---
include/linux/kprobes.h | 3 ++
kernel/kprobes.c | 81 +++++++++++++++++++++++++++--------------
kernel/stacktrace.c | 26 +++++++++++++
3 files changed, 82 insertions(+), 28 deletions(-)
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 1883a4a9f16a..a022e507d829 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -205,6 +205,9 @@ extern void arch_prepare_kretprobe(struct kretprobe_instance *ri,
struct pt_regs *regs);
extern int arch_trampoline_kprobe(struct kprobe *p);
+unsigned long kretprobe_real_stack_tsk(struct task_struct *tsk,
+ unsigned long addr,
+ struct llist_node **cur);
/* If the trampoline handler called from a kprobe, use this version */
unsigned long __kretprobe_trampoline_handler(struct pt_regs *regs,
void *trampoline_address,
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 745f08fdd7a6..b3d9dbd6086f 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1863,46 +1863,56 @@ unsigned long __weak arch_deref_entry_point(void *entry)
#ifdef CONFIG_KRETPROBES
-unsigned long __kretprobe_trampoline_handler(struct pt_regs *regs,
- void *trampoline_address,
- void *frame_pointer)
+/* This assumes the tsk is current or the task which is not running. */
+unsigned long kretprobe_real_stack_tsk(struct task_struct *tsk,
+ unsigned long addr,
+ struct llist_node **cur)
{
- kprobe_opcode_t *correct_ret_addr = NULL;
struct kretprobe_instance *ri = NULL;
- struct llist_node *first, *node;
- struct kretprobe *rp;
+ struct llist_node *node = *cur;
- /* Find all nodes for this frame. */
- first = node = current->kretprobe_instances.first;
- while (node) {
- ri = container_of(node, struct kretprobe_instance, llist);
+ if (addr != (unsigned long)&kretprobe_trampoline)
+ return addr;
- BUG_ON(ri->fp != frame_pointer);
+ if (!node)
+ node = tsk->kretprobe_instances.first;
+ else
+ node = node->next;
- if (ri->ret_addr != trampoline_address) {
- correct_ret_addr = ri->ret_addr;
- /*
- * This is the real return address. Any other
- * instances associated with this task are for
- * other calls deeper on the call stack
- */
- goto found;
+ while (node) {
+ ri = container_of(node, struct kretprobe_instance, llist);
+ if (ri->ret_addr != (void *)&kretprobe_trampoline) {
+ *cur = node;
+ return (unsigned long)ri->ret_addr;
}
-
node = node->next;
}
- pr_err("Oops! Kretprobe fails to find correct return address.\n");
- BUG_ON(1);
+ return 0;
+}
-found:
- /* Unlink all nodes for this frame. */
- current->kretprobe_instances.first = node->next;
- node->next = NULL;
+unsigned long __kretprobe_trampoline_handler(struct pt_regs *regs,
+ void *trampoline_address,
+ void *frame_pointer)
+{
+ kprobe_opcode_t *correct_ret_addr = NULL;
+ struct kretprobe_instance *ri = NULL;
+ struct llist_node *first, *node = NULL;
+ struct kretprobe *rp;
- /* Run them.. */
+ /* Find correct address and all nodes for this frame. */
+ correct_ret_addr = (void*)kretprobe_real_stack_tsk(current,
+ (unsigned long)&kretprobe_trampoline, &node);
+ if (!correct_ret_addr) {
+ pr_err("Oops! Kretprobe fails to find correct return address.\n");
+ BUG_ON(1);
+ }
+
+ /* Run them. */
+ first = current->kretprobe_instances.first;
while (first) {
ri = container_of(first, struct kretprobe_instance, llist);
- first = first->next;
+
+ BUG_ON(ri->fp != frame_pointer);
rp = get_kretprobe(ri);
if (rp && rp->handler) {
@@ -1913,6 +1923,21 @@ unsigned long __kretprobe_trampoline_handler(struct pt_regs *regs,
rp->handler(ri, regs);
__this_cpu_write(current_kprobe, prev);
}
+ if (first == node)
+ break;
+
+ first = first->next;
+ }
+
+ /* Unlink all nodes for this frame. */
+ first = current->kretprobe_instances.first;
+ current->kretprobe_instances.first = node->next;
+ node->next = NULL;
+
+ /* Recycle them. */
+ while (first) {
+ ri = container_of(first, struct kretprobe_instance, llist);
+ first = first->next;
recycle_rp_inst(ri);
}
diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c
index 9f8117c7cfdd..416f357e64b8 100644
--- a/kernel/stacktrace.c
+++ b/kernel/stacktrace.c
@@ -13,6 +13,7 @@
#include <linux/export.h>
#include <linux/kallsyms.h>
#include <linux/stacktrace.h>
+#include <linux/kprobes.h>
/**
* stack_trace_print - Print the entries in the stack trace
@@ -76,6 +77,10 @@ struct stacktrace_cookie {
unsigned int size;
unsigned int skip;
unsigned int len;
+#ifdef CONFIG_KRETPROBES
+ struct llist_node *cur;
+ struct task_struct *tsk;
+#endif
};
static bool stack_trace_consume_entry(void *cookie, unsigned long addr)
@@ -89,6 +94,7 @@ static bool stack_trace_consume_entry(void *cookie, unsigned long addr)
c->skip--;
return true;
}
+ addr = kretprobe_real_stack_tsk(c->tsk, addr, &c->cur);
c->store[c->len++] = addr;
return c->len < c->size;
}
@@ -116,6 +122,10 @@ unsigned int stack_trace_save(unsigned long *store, unsigned int size,
.store = store,
.size = size,
.skip = skipnr + 1,
+#ifdef CONFIG_KRETPROBES
+ .cur = NULL,
+ .tsk = current,
+#endif
};
arch_stack_walk(consume_entry, &c, current, NULL);
@@ -141,6 +151,10 @@ unsigned int stack_trace_save_tsk(struct task_struct *tsk, unsigned long *store,
.size = size,
/* skip this function if they are tracing us */
.skip = skipnr + (current == tsk),
+#ifdef CONFIG_KRETPROBES
+ .cur = NULL,
+ .tsk = tsk,
+#endif
};
if (!try_get_task_stack(tsk))
@@ -168,6 +182,10 @@ unsigned int stack_trace_save_regs(struct pt_regs *regs, unsigned long *store,
.store = store,
.size = size,
.skip = skipnr,
+#ifdef CONFIG_KRETPROBES
+ .cur = NULL,
+ .tsk = current,
+#endif
};
arch_stack_walk(consume_entry, &c, current, regs);
@@ -194,6 +212,10 @@ int stack_trace_save_tsk_reliable(struct task_struct *tsk, unsigned long *store,
struct stacktrace_cookie c = {
.store = store,
.size = size,
+#ifdef CONFIG_KRETPROBES
+ .cur = NULL,
+ .tsk = tsk,
+#endif
};
int ret;
@@ -224,6 +246,10 @@ unsigned int stack_trace_save_user(unsigned long *store, unsigned int size)
struct stacktrace_cookie c = {
.store = store,
.size = size,
+#ifdef CONFIG_KRETPROBES
+ .cur = NULL,
+ .tsk = current,
+#endif
};
mm_segment_t fs;
--
2.25.1
Thank you,
--
Masami Hiramatsu <mhiramat@kernel.org>
next prev parent reply other threads:[~2021-03-04 13:21 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-03-03 1:15 Broken kretprobe stack traces Daniel Xu
2021-03-03 4:48 ` Masami Hiramatsu
2021-03-03 14:26 ` Steven Rostedt
2021-03-03 19:58 ` Daniel Xu
2021-03-03 20:13 ` Daniel Xu
2021-03-03 20:37 ` Steven Rostedt
2021-03-04 2:18 ` Daniel Xu
2021-03-04 19:04 ` Daniel Xu
2021-03-04 13:19 ` Masami Hiramatsu [this message]
2021-03-04 15:22 ` [PATCH] kprobes: stacktrace: Recover the address changed by kretprobe kernel test robot
2021-03-04 17:37 ` kernel test robot
2021-03-04 20:25 ` kernel test robot
2021-03-03 19:57 ` Broken kretprobe stack traces Daniel Xu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210304221947.5a177ce2e1e94314e57c38a4@kernel.org \
--to=mhiramat@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=dxu@dxuuu.xyz \
--cc=kuba@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=rostedt@goodmis.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).