* [RFC PATCH 1/3] x86/kprobes: Support nested kprobes
2020-05-07 9:59 [RFC PATCH 0/3] kprobes: Support nested kprobes Masami Hiramatsu
@ 2020-05-07 9:59 ` Masami Hiramatsu
2020-05-07 15:45 ` Masami Hiramatsu
2020-05-07 10:00 ` [RFC PATCH 2/3] arm64: kprobes: " Masami Hiramatsu
2020-05-07 10:00 ` [RFC PATCH 3/3] arm: " Masami Hiramatsu
2 siblings, 1 reply; 5+ messages in thread
From: Masami Hiramatsu @ 2020-05-07 9:59 UTC (permalink / raw)
To: LKML
Cc: Steven Rostedt, Ingo Molnar, Arnaldo Carvalho de Melo,
Eelco Chaudron, Yonghong Song, bpf, David S . Miller,
Network Development, Alexei Starovoitov, Daniel Borkmann,
Martin KaFai Lau, Song Liu, Andrii Nakryiko, Catalin Marinas,
Will Deacon, Wang Nan, Russell King, linux-arm-kernel
Make kprobes to accept 1-level nesting instead of
missing it.
Any kprobes hits in kprobes pre/post handler context
can be nested at once. If the other kprobes hits in
the nested pre/post handler context, that will be missed.
This is useful if user wants to trace/debug the code
which can be invoked from another kprobe handlers.
We can test this feature on the kernel with
CONFIG_KPROBE_EVENTS_ON_NOTRACE=y as below.
# cd /sys/kernel/debug/tracing
# echo p ring_buffer_lock_reserve > kprobe_events
# echo p vfs_read >> kprobe_events
# echo 0 > /proc/sys/debug/kprobes-optimization # to check int3 handler
# echo stacktrace > events/kprobes/p_ring_buffer_lock_reserve_0/trigger
# echo 1 > events/kprobes/enable
# cat trace
...
cat-161 [000] d..1 46602.280111: p_vfs_read_0: (vfs_read+0x0/0x150)
cat-161 [000] d..2 46602.280222: p_ring_buffer_lock_reserve_0: (ring_buffer_lock_reserve+0x0/0x400)
cat-161 [000] d..2 46602.280247: <stack trace>
=> kprobe_trace_func
=> kprobe_dispatcher
=> aggr_pre_handler
=> kprobe_int3_handler
=> do_int3
=> int3
=> ring_buffer_lock_reserve
=> trace_event_buffer_lock_reserve
=> kprobe_trace_func
=> kprobe_dispatcher
=> aggr_pre_handler
=> kprobe_int3_handler
=> do_int3
=> int3
=> vfs_read
=> kernel_read
=> load_elf_phdrs
=> load_elf_binary
=> search_binary_handler.part.0
=> __do_execve_file.isra.0
=> __x64_sys_execve
=> do_syscall_64
=> entry_SYSCALL_64_after_hwframe
This shows int3 is nested.
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
---
arch/x86/include/asm/kprobes.h | 5 ++
arch/x86/kernel/kprobes/common.h | 39 +++++++++++++++
arch/x86/kernel/kprobes/core.c | 96 ++++++++++++++++----------------------
arch/x86/kernel/kprobes/ftrace.c | 6 ++
arch/x86/kernel/kprobes/opt.c | 13 +++--
5 files changed, 92 insertions(+), 67 deletions(-)
diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h
index 073eb7ad2f56..60ca23aeece8 100644
--- a/arch/x86/include/asm/kprobes.h
+++ b/arch/x86/include/asm/kprobes.h
@@ -83,6 +83,8 @@ static inline int arch_prepared_optinsn(struct arch_optimized_insn *optinsn)
return optinsn->size;
}
+#define KPROBE_NEST_MAX 2
+
struct prev_kprobe {
struct kprobe *kp;
unsigned long status;
@@ -95,7 +97,8 @@ struct kprobe_ctlblk {
unsigned long kprobe_status;
unsigned long kprobe_old_flags;
unsigned long kprobe_saved_flags;
- struct prev_kprobe prev_kprobe;
+ struct prev_kprobe prev[KPROBE_NEST_MAX];
+ int nested;
};
extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
diff --git a/arch/x86/kernel/kprobes/common.h b/arch/x86/kernel/kprobes/common.h
index 7d3a2e2daf01..490e871df36e 100644
--- a/arch/x86/kernel/kprobes/common.h
+++ b/arch/x86/kernel/kprobes/common.h
@@ -91,11 +91,46 @@ extern int __copy_instruction(u8 *dest, u8 *src, u8 *real, struct insn *insn);
extern void synthesize_reljump(void *dest, void *from, void *to);
extern void synthesize_relcall(void *dest, void *from, void *to);
+static nokprobe_inline void
+save_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+ int i = kcb->nested++;
+
+ kcb->prev[i].kp = kprobe_running();
+ kcb->prev[i].status = kcb->kprobe_status;
+ kcb->prev[i].old_flags = kcb->kprobe_old_flags;
+ kcb->prev[i].saved_flags = kcb->kprobe_saved_flags;
+}
+
+static nokprobe_inline void
+restore_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+ int i = --kcb->nested;
+
+ __this_cpu_write(current_kprobe, kcb->prev[i].kp);
+ kcb->kprobe_status = kcb->prev[i].status;
+ kcb->kprobe_old_flags = kcb->prev[i].old_flags;
+ kcb->kprobe_saved_flags = kcb->prev[i].saved_flags;
+}
+
+static nokprobe_inline void pop_current_kprobe(struct kprobe_ctlblk *kcb)
+{
+ if (kcb->nested)
+ restore_previous_kprobe(kcb);
+ else
+ reset_current_kprobe();
+}
+
+static nokprobe_inline bool kprobe_can_nest(struct kprobe_ctlblk *kcb)
+{
+ return !kprobe_running() || (kcb->nested < KPROBE_NEST_MAX - 1);
+}
+
#ifdef CONFIG_OPTPROBES
-extern int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter);
+extern int setup_detour_execution(struct kprobe *p, struct pt_regs *regs);
extern unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr);
#else /* !CONFIG_OPTPROBES */
-static inline int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter)
+static inline int setup_detour_execution(struct kprobe *p, struct pt_regs *regs)
{
return 0;
}
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 4d7022a740ab..998524b73d4d 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -520,24 +520,6 @@ void arch_remove_kprobe(struct kprobe *p)
}
}
-static nokprobe_inline void
-save_previous_kprobe(struct kprobe_ctlblk *kcb)
-{
- kcb->prev_kprobe.kp = kprobe_running();
- kcb->prev_kprobe.status = kcb->kprobe_status;
- kcb->prev_kprobe.old_flags = kcb->kprobe_old_flags;
- kcb->prev_kprobe.saved_flags = kcb->kprobe_saved_flags;
-}
-
-static nokprobe_inline void
-restore_previous_kprobe(struct kprobe_ctlblk *kcb)
-{
- __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp);
- kcb->kprobe_status = kcb->prev_kprobe.status;
- kcb->kprobe_old_flags = kcb->prev_kprobe.old_flags;
- kcb->kprobe_saved_flags = kcb->prev_kprobe.saved_flags;
-}
-
static nokprobe_inline void
set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
struct kprobe_ctlblk *kcb)
@@ -581,26 +563,34 @@ void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs)
}
NOKPROBE_SYMBOL(arch_prepare_kretprobe);
-static void setup_singlestep(struct kprobe *p, struct pt_regs *regs,
- struct kprobe_ctlblk *kcb, int reenter)
+static nokprobe_inline
+int setup_booster_execution(struct kprobe *p, struct pt_regs *regs)
{
- if (setup_detour_execution(p, regs, reenter))
- return;
-
#if !defined(CONFIG_PREEMPTION)
if (p->ainsn.boostable && !p->post_handler) {
/* Boost up -- we can execute copied instructions directly */
+ regs->ip = (unsigned long)p->ainsn.insn;
+ return 1;
+ }
+#endif
+ return 0;
+}
+
+static void setup_singlestep(struct kprobe *p, struct pt_regs *regs,
+ struct kprobe_ctlblk *kcb, int reenter)
+{
+ if (setup_detour_execution(p, regs) ||
+ setup_booster_execution(p, regs)) {
if (!reenter)
- reset_current_kprobe();
+ pop_current_kprobe(kcb);
/*
* Reentering boosted probe doesn't reset current_kprobe,
* nor set current_kprobe, because it doesn't use single
* stepping.
*/
- regs->ip = (unsigned long)p->ainsn.insn;
return;
}
-#endif
+
if (reenter) {
save_previous_kprobe(kcb);
set_current_kprobe(p, regs, kcb);
@@ -630,6 +620,11 @@ static int reenter_kprobe(struct kprobe *p, struct pt_regs *regs,
switch (kcb->kprobe_status) {
case KPROBE_HIT_SSDONE:
case KPROBE_HIT_ACTIVE:
+ if (kcb->nested < KPROBE_NEST_MAX - 1) {
+ save_previous_kprobe(kcb);
+ return 0;
+ }
+ fallthrough;
case KPROBE_HIT_SS:
kprobes_inc_nmissed_count(p);
setup_singlestep(p, regs, kcb, 1);
@@ -678,26 +673,24 @@ int kprobe_int3_handler(struct pt_regs *regs)
p = get_kprobe(addr);
if (p) {
- if (kprobe_running()) {
- if (reenter_kprobe(p, regs, kcb))
- return 1;
- } else {
- set_current_kprobe(p, regs, kcb);
- kcb->kprobe_status = KPROBE_HIT_ACTIVE;
-
- /*
- * If we have no pre-handler or it returned 0, we
- * continue with normal processing. If we have a
- * pre-handler and it returned non-zero, that means
- * user handler setup registers to exit to another
- * instruction, we must skip the single stepping.
- */
- if (!p->pre_handler || !p->pre_handler(p, regs))
- setup_singlestep(p, regs, kcb, 0);
- else
- reset_current_kprobe();
+ if (kprobe_running() && reenter_kprobe(p, regs, kcb))
return 1;
- }
+
+ set_current_kprobe(p, regs, kcb);
+ kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+
+ /*
+ * If we have no pre-handler or it returned 0, we
+ * continue with normal processing. If we have a
+ * pre-handler and it returned non-zero, that means
+ * user handler setup registers to exit to another
+ * instruction, we must skip the single stepping.
+ */
+ if (!p->pre_handler || !p->pre_handler(p, regs))
+ setup_singlestep(p, regs, kcb, 0);
+ else
+ pop_current_kprobe(kcb);
+ return 1;
} else if (*addr != INT3_INSN_OPCODE) {
/*
* The breakpoint instruction was removed right
@@ -994,13 +987,7 @@ int kprobe_debug_handler(struct pt_regs *regs)
cur->post_handler(cur, regs, 0);
}
- /* Restore back the original saved kprobes variables and continue. */
- if (kcb->kprobe_status == KPROBE_REENTER) {
- restore_previous_kprobe(kcb);
- goto out;
- }
- reset_current_kprobe();
-out:
+ pop_current_kprobe(kcb);
/*
* if somebody else is singlestepping across a probe point, flags
* will have TF set, in which case, continue the remaining processing
@@ -1043,10 +1030,7 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
*/
regs->flags |= kcb->kprobe_old_flags;
- if (kcb->kprobe_status == KPROBE_REENTER)
- restore_previous_kprobe(kcb);
- else
- reset_current_kprobe();
+ pop_current_kprobe(kcb);
} else if (kcb->kprobe_status == KPROBE_HIT_ACTIVE ||
kcb->kprobe_status == KPROBE_HIT_SSDONE) {
/*
diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c
index 681a4b36e9bb..b695c2e118f8 100644
--- a/arch/x86/kernel/kprobes/ftrace.c
+++ b/arch/x86/kernel/kprobes/ftrace.c
@@ -25,13 +25,15 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
return;
kcb = get_kprobe_ctlblk();
- if (kprobe_running()) {
+ if (!kprobe_can_nest()) {
kprobes_inc_nmissed_count(p);
} else {
unsigned long orig_ip = regs->ip;
/* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */
regs->ip = ip + sizeof(kprobe_opcode_t);
+ if (kprobe_running())
+ save_previous_kprobe(kcb);
__this_cpu_write(current_kprobe, p);
kcb->kprobe_status = KPROBE_HIT_ACTIVE;
if (!p->pre_handler || !p->pre_handler(p, regs)) {
@@ -50,7 +52,7 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
* If pre_handler returns !0, it changes regs->ip. We have to
* skip emulating post_handler.
*/
- __this_cpu_write(current_kprobe, NULL);
+ pop_current_kprobe(kcb);
}
}
NOKPROBE_SYMBOL(kprobe_ftrace_handler);
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index ea13f6888284..ec7cfd4906e3 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -168,15 +168,16 @@ STACK_FRAME_NON_STANDARD(optprobe_template_func);
static void
optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
{
+ struct kprobe_ctlblk *kcb;
/* This is possible if op is under delayed unoptimizing */
if (kprobe_disabled(&op->kp))
return;
preempt_disable();
- if (kprobe_running()) {
+ kcb = get_kprobe_ctlblk();
+ if (!kprobe_can_nest(kcb)) {
kprobes_inc_nmissed_count(&op->kp);
} else {
- struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
/* Save skipped registers */
regs->cs = __KERNEL_CS;
#ifdef CONFIG_X86_32
@@ -186,10 +187,12 @@ optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
regs->ip = (unsigned long)op->kp.addr + INT3_INSN_SIZE;
regs->orig_ax = ~0UL;
+ if (kprobe_running())
+ save_previous_kprobe(kcb);
__this_cpu_write(current_kprobe, &op->kp);
kcb->kprobe_status = KPROBE_HIT_ACTIVE;
opt_pre_handler(&op->kp, regs);
- __this_cpu_write(current_kprobe, NULL);
+ pop_current_kprobe(kcb);
}
preempt_enable();
}
@@ -500,7 +503,7 @@ extern void arch_unoptimize_kprobes(struct list_head *oplist,
}
}
-int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter)
+int setup_detour_execution(struct kprobe *p, struct pt_regs *regs)
{
struct optimized_kprobe *op;
@@ -509,8 +512,6 @@ int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter)
op = container_of(p, struct optimized_kprobe, kp);
/* Detour through copied instructions */
regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX;
- if (!reenter)
- reset_current_kprobe();
return 1;
}
return 0;
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [RFC PATCH 2/3] arm64: kprobes: Support nested kprobes
2020-05-07 9:59 [RFC PATCH 0/3] kprobes: Support nested kprobes Masami Hiramatsu
2020-05-07 9:59 ` [RFC PATCH 1/3] x86/kprobes: " Masami Hiramatsu
@ 2020-05-07 10:00 ` Masami Hiramatsu
2020-05-07 10:00 ` [RFC PATCH 3/3] arm: " Masami Hiramatsu
2 siblings, 0 replies; 5+ messages in thread
From: Masami Hiramatsu @ 2020-05-07 10:00 UTC (permalink / raw)
To: LKML
Cc: Steven Rostedt, Ingo Molnar, Arnaldo Carvalho de Melo,
Eelco Chaudron, Yonghong Song, bpf, David S . Miller,
Network Development, Alexei Starovoitov, Daniel Borkmann,
Martin KaFai Lau, Song Liu, Andrii Nakryiko, Catalin Marinas,
Will Deacon, Wang Nan, Russell King, linux-arm-kernel
Make kprobes to accept 1-level nesting instead of missing it
on arm64.
Any kprobes hits in kprobes pre/post handler context can be
nested at once. If the other kprobes hits in the nested pre/post
handler context, that will be missed.
We can test this feature on the kernel with
CONFIG_KPROBE_EVENTS_ON_NOTRACE=y as below.
# cd /sys/kernel/debug/tracing
# echo p ring_buffer_lock_reserve > kprobe_events
# echo p vfs_read >> kprobe_events
# echo stacktrace > events/kprobes/p_ring_buffer_lock_reserve_0/trigger
# echo 1 > events/kprobes/enable
# cat trace
...
sh-211 [005] d..2 71.708242: p_ring_buffer_lock_reserve_0: (ring_buffer_lock_reserve+0x0/0x4c8)
sh-211 [005] d..2 71.709982: <stack trace>
=> kprobe_dispatcher
=> kprobe_breakpoint_handler
=> call_break_hook
=> brk_handler
=> do_debug_exception
=> el1_sync_handler
=> el1_sync
=> ring_buffer_lock_reserve
=> kprobe_trace_func
=> kprobe_dispatcher
=> kprobe_breakpoint_handler
=> call_break_hook
=> brk_handler
=> do_debug_exception
=> el1_sync_handler
=> el1_sync
=> vfs_read
=> __arm64_sys_read
=> el0_svc_common.constprop.3
=> do_el0_svc
=> el0_sync_handler
=> el0_sync
This shows brk_handler is nested.
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
---
arch/arm64/include/asm/kprobes.h | 5 ++
arch/arm64/kernel/probes/kprobes.c | 75 ++++++++++++++++++++----------------
2 files changed, 46 insertions(+), 34 deletions(-)
diff --git a/arch/arm64/include/asm/kprobes.h b/arch/arm64/include/asm/kprobes.h
index 97e511d645a2..b2ebd3bad794 100644
--- a/arch/arm64/include/asm/kprobes.h
+++ b/arch/arm64/include/asm/kprobes.h
@@ -34,12 +34,15 @@ struct kprobe_step_ctx {
unsigned long match_addr;
};
+#define KPROBE_NEST_MAX 2
+
/* per-cpu kprobe control block */
struct kprobe_ctlblk {
unsigned int kprobe_status;
unsigned long saved_irqflag;
- struct prev_kprobe prev_kprobe;
+ struct prev_kprobe prev[KPROBE_NEST_MAX];
struct kprobe_step_ctx ss_ctx;
+ int nested;
};
void arch_remove_kprobe(struct kprobe *);
diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
index d1c95dcf1d78..27d52726277b 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -153,14 +153,18 @@ void __kprobes arch_remove_kprobe(struct kprobe *p)
static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
{
- kcb->prev_kprobe.kp = kprobe_running();
- kcb->prev_kprobe.status = kcb->kprobe_status;
+ int i = kcb->nested++;
+
+ kcb->prev[i].kp = kprobe_running();
+ kcb->prev[i].status = kcb->kprobe_status;
}
static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
{
- __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp);
- kcb->kprobe_status = kcb->prev_kprobe.status;
+ int i = --kcb->nested;
+
+ __this_cpu_write(current_kprobe, kcb->prev[i].kp);
+ kcb->kprobe_status = kcb->prev[i].status;
}
static void __kprobes set_current_kprobe(struct kprobe *p)
@@ -168,6 +172,14 @@ static void __kprobes set_current_kprobe(struct kprobe *p)
__this_cpu_write(current_kprobe, p);
}
+static nokprobe_inline void pop_current_kprobe(struct kprobe_ctlblk *kcb)
+{
+ if (kcb->nested)
+ restore_previous_kprobe(kcb);
+ else
+ reset_current_kprobe();
+}
+
/*
* Interrupts need to be disabled before single-step mode is set, and not
* reenabled until after single-step mode ends.
@@ -243,6 +255,10 @@ static int __kprobes reenter_kprobe(struct kprobe *p,
switch (kcb->kprobe_status) {
case KPROBE_HIT_SSDONE:
case KPROBE_HIT_ACTIVE:
+ if (kcb->nested < KPROBE_NEST_MAX - 1) {
+ save_previous_kprobe(kcb);
+ return 0;
+ }
kprobes_inc_nmissed_count(p);
setup_singlestep(p, regs, kcb, 1);
break;
@@ -286,7 +302,7 @@ post_kprobe_handler(struct kprobe_ctlblk *kcb, struct pt_regs *regs)
cur->post_handler(cur, regs, 0);
}
- reset_current_kprobe();
+ pop_current_kprobe(kcb);
}
int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr)
@@ -309,11 +325,7 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr)
BUG();
kernel_disable_single_step();
-
- if (kcb->kprobe_status == KPROBE_REENTER)
- restore_previous_kprobe(kcb);
- else
- reset_current_kprobe();
+ pop_current_kprobe(kcb);
break;
case KPROBE_HIT_ACTIVE:
@@ -357,30 +369,27 @@ static void __kprobes kprobe_handler(struct pt_regs *regs)
p = get_kprobe((kprobe_opcode_t *) addr);
if (p) {
- if (cur_kprobe) {
- if (reenter_kprobe(p, regs, kcb))
- return;
- } else {
- /* Probe hit */
- set_current_kprobe(p);
- kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+ if (cur_kprobe && reenter_kprobe(p, regs, kcb))
+ return;
+ /* Probe hit */
+ set_current_kprobe(p);
+ kcb->kprobe_status = KPROBE_HIT_ACTIVE;
- /*
- * If we have no pre-handler or it returned 0, we
- * continue with normal processing. If we have a
- * pre-handler and it returned non-zero, it will
- * modify the execution path and no need to single
- * stepping. Let's just reset current kprobe and exit.
- *
- * pre_handler can hit a breakpoint and can step thru
- * before return, keep PSTATE D-flag enabled until
- * pre_handler return back.
- */
- if (!p->pre_handler || !p->pre_handler(p, regs)) {
- setup_singlestep(p, regs, kcb, 0);
- } else
- reset_current_kprobe();
- }
+ /*
+ * If we have no pre-handler or it returned 0, we
+ * continue with normal processing. If we have a
+ * pre-handler and it returned non-zero, it will
+ * modify the execution path and no need to single
+ * stepping. Let's just reset current kprobe and exit.
+ *
+ * pre_handler can hit a breakpoint and can step thru
+ * before return, keep PSTATE D-flag enabled until
+ * pre_handler return back.
+ */
+ if (!p->pre_handler || !p->pre_handler(p, regs)) {
+ setup_singlestep(p, regs, kcb, 0);
+ } else
+ pop_current_kprobe(kcb);
}
/*
* The breakpoint instruction was removed right
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [RFC PATCH 3/3] arm: kprobes: Support nested kprobes
2020-05-07 9:59 [RFC PATCH 0/3] kprobes: Support nested kprobes Masami Hiramatsu
2020-05-07 9:59 ` [RFC PATCH 1/3] x86/kprobes: " Masami Hiramatsu
2020-05-07 10:00 ` [RFC PATCH 2/3] arm64: kprobes: " Masami Hiramatsu
@ 2020-05-07 10:00 ` Masami Hiramatsu
2 siblings, 0 replies; 5+ messages in thread
From: Masami Hiramatsu @ 2020-05-07 10:00 UTC (permalink / raw)
To: LKML
Cc: Steven Rostedt, Ingo Molnar, Arnaldo Carvalho de Melo,
Eelco Chaudron, Yonghong Song, bpf, David S . Miller,
Network Development, Alexei Starovoitov, Daniel Borkmann,
Martin KaFai Lau, Song Liu, Andrii Nakryiko, Catalin Marinas,
Will Deacon, Wang Nan, Russell King, linux-arm-kernel
Make kprobes to accept 1-level nesting instead of missing it
on arm.
Any kprobes hits in kprobes pre/post handler context can be
nested at once. If the other kprobes hits in the nested pre/post
handler context, that will be missed.
We can test this feature on the kernel with
CONFIG_KPROBE_EVENTS_ON_NOTRACE=y as below.
# cd /sys/kernel/debug/tracing
# echo p ring_buffer_lock_reserve > kprobe_events
# echo p vfs_read >> kprobe_events
# echo 0 > /proc/sys/debug/kprobes-optimization # to check trap handler
# echo stacktrace > events/kprobes/p_ring_buffer_lock_reserve_0/trigger
# echo 1 > events/kprobes/enable
# cat trace
...
sh-94 [000] d... 124.945302: p_vfs_read_0: (vfs_read+0x0/0x120)
sh-94 [000] d... 125.041822: p_ring_buffer_lock_reserve_0: (ring_buffer_lock_reserve+0x0/0x57c)
sh-94 [000] d... 125.042102: <stack trace>
=> kprobe_dispatcher
=> aggr_pre_handler
=> kprobe_handler
=> kprobe_trap_handler
=> do_undefinstr
=> __und_svc_finish
=> ring_buffer_lock_reserve
=> kprobe_trace_func
=> kprobe_dispatcher
=> aggr_pre_handler
=> kprobe_handler
=> kprobe_trap_handler
=> do_undefinstr
=> __und_svc_finish
=> vfs_read
=> sys_read
=> ret_fast_syscall
The trap handler is nested correctly.
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
---
arch/arm/include/asm/kprobes.h | 5 ++
arch/arm/probes/kprobes/core.c | 79 ++++++++++++++++++-------------------
arch/arm/probes/kprobes/core.h | 30 ++++++++++++++
arch/arm/probes/kprobes/opt-arm.c | 6 ++-
4 files changed, 76 insertions(+), 44 deletions(-)
diff --git a/arch/arm/include/asm/kprobes.h b/arch/arm/include/asm/kprobes.h
index 213607a1f45c..553f719bbfd5 100644
--- a/arch/arm/include/asm/kprobes.h
+++ b/arch/arm/include/asm/kprobes.h
@@ -32,10 +32,13 @@ struct prev_kprobe {
unsigned int status;
};
+#define KPROBE_NEST_MAX 2
+
/* per-cpu kprobe control block */
struct kprobe_ctlblk {
unsigned int kprobe_status;
- struct prev_kprobe prev_kprobe;
+ struct prev_kprobe prev[KPROBE_NEST_MAX];
+ int nested;
};
void arch_remove_kprobe(struct kprobe *);
diff --git a/arch/arm/probes/kprobes/core.c b/arch/arm/probes/kprobes/core.c
index 90b5bc723c83..2fe7373c3f77 100644
--- a/arch/arm/probes/kprobes/core.c
+++ b/arch/arm/probes/kprobes/core.c
@@ -187,18 +187,6 @@ void __kprobes arch_remove_kprobe(struct kprobe *p)
}
}
-static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
-{
- kcb->prev_kprobe.kp = kprobe_running();
- kcb->prev_kprobe.status = kcb->kprobe_status;
-}
-
-static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
-{
- __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp);
- kcb->kprobe_status = kcb->prev_kprobe.status;
-}
-
static void __kprobes set_current_kprobe(struct kprobe *p)
{
__this_cpu_write(current_kprobe, p);
@@ -224,6 +212,40 @@ singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb)
p->ainsn.insn_singlestep(p->opcode, &p->ainsn, regs);
}
+static nokprobe_inline int reenter_kprobe(struct kprobe *p,
+ struct pt_regs *regs,
+ struct kprobe_ctlblk *kcb)
+{
+ /* Kprobe is pending, so we're recursing. */
+ switch (kcb->kprobe_status) {
+ case KPROBE_HIT_ACTIVE:
+ case KPROBE_HIT_SSDONE:
+ if (kcb->nested < KPROBE_NEST_MAX - 1) {
+ save_previous_kprobe(kcb);
+ return 0;
+ }
+ fallthrough;
+ case KPROBE_HIT_SS:
+ /* A pre- or post-handler probe got us here. */
+ kprobes_inc_nmissed_count(p);
+ save_previous_kprobe(kcb);
+ set_current_kprobe(p);
+ kcb->kprobe_status = KPROBE_REENTER;
+ singlestep(p, regs, kcb);
+ restore_previous_kprobe(kcb);
+ break;
+ case KPROBE_REENTER:
+ /* A nested probe was hit in FIQ, it is a BUG */
+ pr_warn("Unrecoverable kprobe detected.\n");
+ dump_kprobe(p);
+ fallthrough;
+ default:
+ /* impossible cases */
+ BUG();
+ }
+ return 1;
+}
+
/*
* Called with IRQs disabled. IRQs must remain disabled from that point
* all the way until processing this kprobe is complete. The current
@@ -262,30 +284,9 @@ void __kprobes kprobe_handler(struct pt_regs *regs)
* In this case, we can skip recursing check too.
*/
singlestep_skip(p, regs);
- } else if (cur) {
- /* Kprobe is pending, so we're recursing. */
- switch (kcb->kprobe_status) {
- case KPROBE_HIT_ACTIVE:
- case KPROBE_HIT_SSDONE:
- case KPROBE_HIT_SS:
- /* A pre- or post-handler probe got us here. */
- kprobes_inc_nmissed_count(p);
- save_previous_kprobe(kcb);
- set_current_kprobe(p);
- kcb->kprobe_status = KPROBE_REENTER;
- singlestep(p, regs, kcb);
- restore_previous_kprobe(kcb);
- break;
- case KPROBE_REENTER:
- /* A nested probe was hit in FIQ, it is a BUG */
- pr_warn("Unrecoverable kprobe detected.\n");
- dump_kprobe(p);
- /* fall through */
- default:
- /* impossible cases */
- BUG();
- }
} else {
+ if (cur && reenter_kprobe(p, regs, kcb))
+ return;
/* Probe hit and conditional execution check ok. */
set_current_kprobe(p);
kcb->kprobe_status = KPROBE_HIT_ACTIVE;
@@ -305,7 +306,7 @@ void __kprobes kprobe_handler(struct pt_regs *regs)
p->post_handler(p, regs, 0);
}
}
- reset_current_kprobe();
+ pop_current_kprobe(kcb);
}
} else {
/*
@@ -342,11 +343,7 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr)
* normal page fault.
*/
regs->ARM_pc = (long)cur->addr;
- if (kcb->kprobe_status == KPROBE_REENTER) {
- restore_previous_kprobe(kcb);
- } else {
- reset_current_kprobe();
- }
+ pop_current_kprobe(kcb);
break;
case KPROBE_HIT_ACTIVE:
diff --git a/arch/arm/probes/kprobes/core.h b/arch/arm/probes/kprobes/core.h
index c3db468650ce..aaff1e0f2153 100644
--- a/arch/arm/probes/kprobes/core.h
+++ b/arch/arm/probes/kprobes/core.h
@@ -34,6 +34,36 @@ typedef enum probes_insn (kprobe_decode_insn_t)(probes_opcode_t,
const union decode_action *,
const struct decode_checker *[]);
+
+static nokprobe_inline void save_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+ int i = kcb->nested++;
+
+ kcb->prev[i].kp = kprobe_running();
+ kcb->prev[i].status = kcb->kprobe_status;
+}
+
+static nokprobe_inline void restore_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+ int i = --kcb->nested;
+
+ __this_cpu_write(current_kprobe, kcb->prev[i].kp);
+ kcb->kprobe_status = kcb->prev[i].status;
+}
+
+static nokprobe_inline void pop_current_kprobe(struct kprobe_ctlblk *kcb)
+{
+ if (kcb->nested)
+ restore_previous_kprobe(kcb);
+ else
+ reset_current_kprobe();
+}
+
+static nokprobe_inline bool kprobe_can_nest(struct kprobe_ctlblk *kcb)
+{
+ return !kprobe_running() || (kcb->nested < KPROBE_NEST_MAX - 1);
+}
+
#ifdef CONFIG_THUMB2_KERNEL
extern const union decode_action kprobes_t32_actions[];
diff --git a/arch/arm/probes/kprobes/opt-arm.c b/arch/arm/probes/kprobes/opt-arm.c
index 7a449df0b359..cb4cb13bff88 100644
--- a/arch/arm/probes/kprobes/opt-arm.c
+++ b/arch/arm/probes/kprobes/opt-arm.c
@@ -161,13 +161,15 @@ optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
local_irq_save(flags);
kcb = get_kprobe_ctlblk();
- if (kprobe_running()) {
+ if (!kprobe_can_nest(kcb)) {
kprobes_inc_nmissed_count(&op->kp);
} else {
+ if (kprobe_running())
+ save_previous_kprobe(kcb);
__this_cpu_write(current_kprobe, &op->kp);
kcb->kprobe_status = KPROBE_HIT_ACTIVE;
opt_pre_handler(&op->kp, regs);
- __this_cpu_write(current_kprobe, NULL);
+ pop_current_kprobe(kcb);
}
/*
^ permalink raw reply related [flat|nested] 5+ messages in thread