linux-riscv.lists.infradead.org archive mirror
 help / color / mirror / Atom feed
From: Chen Guokai <chenguokai17@mails.ucas.ac.cn>
To: paul.walmsley@sifive.com, palmer@dabbelt.com,
	aou@eecs.berkeley.edu, rostedt@goodmis.org, mingo@redhat.com,
	sfr@canb.auug.org.au
Cc: linux-riscv@lists.infradead.org, linux-kernel@vger.kernel.org,
	liaochang1@huawei.com,
	Chen Guokai <chenguokai17@mails.ucas.ac.cn>
Subject: [PATCH 7/8] riscv/kprobe: Prepare detour buffer for optimized kprobe
Date: Sun, 30 Oct 2022 17:01:40 +0800	[thread overview]
Message-ID: <20221030090141.2550837-8-chenguokai17@mails.ucas.ac.cn> (raw)
In-Reply-To: <20221030090141.2550837-1-chenguokai17@mails.ucas.ac.cn>

From: Liao Chang <liaochang1@huawei.com>

This patch introduces code to prepare instruction slot for optimized
kprobe. The instruction slot for regular kprobe just records two
instructions, first one is the original instruction replaced by EBREAK,
the second one is EBREAK for single-step. While instruction slot for
optimized kprobe is larger, beside execute instruction out-of-line, it
also contains a standalone stackframe for calling kprobe handler.

All optimized instruction slots consis of 5 major parts, which copied
from the assembly code template in opt_trampoline.S.

	SAVE REGS
	CALL optimized_callback
	RESTORE REGS
	EXECUTE INSNS OUT-OF-LINE
	RETURN BACK

Although most instructions in each slot are same, these slots still have
a bit difference in their payload, it is caused by three parts:

  - 'CALL optimized_callback', the relative offset for 'call'
    instruction is different for each kprobe.
  - 'EXECUTE INSN OUT-OF-LINE', no doubt.
  - 'RETURN BACK', the chosen free register is reused here as the
     destination register of jumping back.

So we also need customize the slot payload for each optimized kprobe.

Co-developed-by: Chen Guokai <chenguokai17@mails.ucas.ac.cn>
Signed-off-by: Chen Guokai <chenguokai17@mails.ucas.ac.cn>
Signed-off-by: Liao Chang <liaochang1@huawei.com>
---
 arch/riscv/include/asm/kprobes.h          |  16 +++
 arch/riscv/kernel/probes/opt.c            |  75 +++++++++++++
 arch/riscv/kernel/probes/opt_trampoline.S | 125 ++++++++++++++++++++++
 3 files changed, 216 insertions(+)

diff --git a/arch/riscv/include/asm/kprobes.h b/arch/riscv/include/asm/kprobes.h
index 22b73a2fd1fd..a9ef864f7225 100644
--- a/arch/riscv/include/asm/kprobes.h
+++ b/arch/riscv/include/asm/kprobes.h
@@ -48,10 +48,26 @@ void __kprobes *trampoline_probe_handler(struct pt_regs *regs);
 /* optinsn template addresses */
 extern __visible kprobe_opcode_t optprobe_template_entry[];
 extern __visible kprobe_opcode_t optprobe_template_end[];
+extern __visible kprobe_opcode_t optprobe_template_save[];
+extern __visible kprobe_opcode_t optprobe_template_call[];
+extern __visible kprobe_opcode_t optprobe_template_insn[];
+extern __visible kprobe_opcode_t optprobe_template_return[];
 
 #define MAX_OPTINSN_SIZE				\
 	((unsigned long)optprobe_template_end -		\
 	 (unsigned long)optprobe_template_entry)
+#define DETOUR_SAVE_OFFSET				\
+	((unsigned long)optprobe_template_save -	\
+	 (unsigned long)optprobe_template_entry)
+#define DETOUR_CALL_OFFSET				\
+	((unsigned long)optprobe_template_call -	\
+	 (unsigned long)optprobe_template_entry)
+#define DETOUR_INSN_OFFSET				\
+	((unsigned long)optprobe_template_insn -	\
+	 (unsigned long)optprobe_template_entry)
+#define DETOUR_RETURN_OFFSET				\
+	((unsigned long)optprobe_template_return -	\
+	 (unsigned long)optprobe_template_entry)
 
 /*
  * For RVI and RVC hybird encoding kernel, althought long jump just needs
diff --git a/arch/riscv/kernel/probes/opt.c b/arch/riscv/kernel/probes/opt.c
index 876bec539554..77248ed7d4e8 100644
--- a/arch/riscv/kernel/probes/opt.c
+++ b/arch/riscv/kernel/probes/opt.c
@@ -11,9 +11,37 @@
 #include <linux/kprobes.h>
 #include <asm/kprobes.h>
 #include <asm/patch.h>
+#include <asm/asm-offsets.h>
 
 #include "simulate-insn.h"
 #include "decode-insn.h"
+#include "../../net/bpf_jit.h"
+
+static void
+optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
+{
+	unsigned long flags;
+	struct kprobe_ctlblk *kcb;
+
+	/* Save skipped registers */
+	regs->epc = (unsigned long)op->kp.addr;
+	regs->orig_a0 = ~0UL;
+
+	local_irq_save(flags);
+	kcb = get_kprobe_ctlblk();
+
+	if (kprobe_running()) {
+		kprobes_inc_nmissed_count(&op->kp);
+	} else {
+		__this_cpu_write(current_kprobe, &op->kp);
+		kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+		opt_pre_handler(&op->kp, regs);
+		__this_cpu_write(current_kprobe, NULL);
+	}
+	local_irq_restore(flags);
+}
+
+NOKPROBE_SYMBOL(optimized_callback)
 
 static inline int in_auipc_jalr_range(long val)
 {
@@ -30,6 +58,11 @@ static inline int in_auipc_jalr_range(long val)
 #endif
 }
 
+#define DETOUR_ADDR(code, offs) \
+	((void *)((unsigned long)(code) + (offs)))
+#define DETOUR_INSN(code, offs) \
+	(*(kprobe_opcode_t *)((unsigned long)(code) + (offs)))
+
 /*
  * Copy optprobe assembly code template into detour buffer and modify some
  * instructions for each kprobe.
@@ -38,6 +71,49 @@ static void prepare_detour_buffer(kprobe_opcode_t *code, kprobe_opcode_t *slot,
 				  int rd, struct optimized_kprobe *op,
 				  kprobe_opcode_t opcode)
 {
+	long offs;
+	unsigned long data;
+
+	memcpy(code, optprobe_template_entry, MAX_OPTINSN_SIZE);
+
+	/* Step1: record optimized_kprobe pointer into detour buffer */
+	memcpy(DETOUR_ADDR(code, DETOUR_SAVE_OFFSET), &op, sizeof(op));
+
+	/*
+	 * Step2
+	 * auipc ra, 0     --> aupic ra, HI20.{optimized_callback - pc}
+	 * jalr  ra, 0(ra) --> jalr  ra, LO12.{optimized_callback - pc}(ra)
+	 */
+	offs = (unsigned long)&optimized_callback -
+	       (unsigned long)DETOUR_ADDR(slot, DETOUR_CALL_OFFSET);
+	DETOUR_INSN(code, DETOUR_CALL_OFFSET) =
+				rv_auipc(1, (offs + (1 << 11)) >> 12);
+	DETOUR_INSN(code, DETOUR_CALL_OFFSET + 0x4) =
+				rv_jalr(1, 1, offs & 0xFFF);
+
+	/* Step3: copy replaced instructions into detour buffer */
+	memcpy(DETOUR_ADDR(code, DETOUR_INSN_OFFSET), op->kp.addr,
+	       op->optinsn.length);
+	memcpy(DETOUR_ADDR(code, DETOUR_INSN_OFFSET), &opcode,
+	       GET_INSN_LENGTH(opcode));
+
+	/* Step4: record return address of long jump into detour buffer */
+	data = (unsigned long)op->kp.addr + op->optinsn.length;
+	memcpy(DETOUR_ADDR(code, DETOUR_RETURN_OFFSET), &data, sizeof(data));
+
+	/*
+	 * Step5
+	 * auipc ra, 0      --> auipc rd, 0
+	 * ld/w  ra, -4(ra) --> ld/w  rd, -8(rd)
+	 * jalr  x0,  0(ra) --> jalr  x0,  0(rd)
+	 */
+	DETOUR_INSN(code, DETOUR_RETURN_OFFSET + 0x8) = rv_auipc(rd, 0);
+#if __riscv_xlen == 32
+	DETOUR_INSN(code, DETOUR_RETURN_OFFSET + 0xC) = rv_lw(rd, -8, rd);
+#else
+	DETOUR_INSN(code, DETOUR_RETURN_OFFSET + 0xC) = rv_ld(rd, -8, rd);
+#endif
+	DETOUR_INSN(code, DETOUR_RETURN_OFFSET + 0x10) = rv_jalr(0, rd, 0);
 }
 
 /* Registers the first usage of which is the destination of instruction */
diff --git a/arch/riscv/kernel/probes/opt_trampoline.S b/arch/riscv/kernel/probes/opt_trampoline.S
index 16160c4367ff..75e34e373cf2 100644
--- a/arch/riscv/kernel/probes/opt_trampoline.S
+++ b/arch/riscv/kernel/probes/opt_trampoline.S
@@ -1,12 +1,137 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright (C) 2022 Guokai Chen
+ * Copyright (C) 2022 Liao, Chang <liaochang1@huawei.com>
  */
 
 #include <linux/linkage.h>
 
+#include <asm/asm.h>
 #incldue <asm/csr.h>
 #include <asm/asm-offsets.h>
 
 SYM_ENTRY(optprobe_template_entry, SYM_L_GLOBAL, SYM_A_NONE)
+	addi  sp, sp, -(PT_SIZE_ON_STACK)
+	REG_S x1,  PT_RA(sp)
+	REG_S x2,  PT_SP(sp)
+	REG_S x3,  PT_GP(sp)
+	REG_S x4,  PT_TP(sp)
+	REG_S x5,  PT_T0(sp)
+	REG_S x6,  PT_T1(sp)
+	REG_S x7,  PT_T2(sp)
+	REG_S x8,  PT_S0(sp)
+	REG_S x9,  PT_S1(sp)
+	REG_S x10, PT_A0(sp)
+	REG_S x11, PT_A1(sp)
+	REG_S x12, PT_A2(sp)
+	REG_S x13, PT_A3(sp)
+	REG_S x14, PT_A4(sp)
+	REG_S x15, PT_A5(sp)
+	REG_S x16, PT_A6(sp)
+	REG_S x17, PT_A7(sp)
+	REG_S x18, PT_S2(sp)
+	REG_S x19, PT_S3(sp)
+	REG_S x20, PT_S4(sp)
+	REG_S x21, PT_S5(sp)
+	REG_S x22, PT_S6(sp)
+	REG_S x23, PT_S7(sp)
+	REG_S x24, PT_S8(sp)
+	REG_S x25, PT_S9(sp)
+	REG_S x26, PT_S10(sp)
+	REG_S x27, PT_S11(sp)
+	REG_S x28, PT_T3(sp)
+	REG_S x29, PT_T4(sp)
+	REG_S x30, PT_T5(sp)
+	REG_S x31, PT_T6(sp)
+	/* Update fp is friendly for stacktrace */
+	addi  s0, sp, (PT_SIZE_ON_STACK)
+	j 1f
+
+SYM_ENTRY(optprobe_template_save, SYM_L_GLOBAL, SYM_A_NONE)
+	/*
+	 * Step1:
+	 * Filled with the pointer to optimized_kprobe data
+	 */
+	.dword 0
+1:
+	/* Load optimize_kprobe pointer from .dword below */
+	auipc a0, 0
+	REG_L a0, -8(a0)
+	add   a1, sp, x0
+
+SYM_ENTRY(optprobe_template_call, SYM_L_GLOBAL, SYM_A_NONE)
+	/*
+	 * Step2:
+	 * <IMME> of AUIPC/JALR are modified to the offset to optimized_callback
+	 * jump target is loaded from above .dword.
+	 */
+	auipc ra, 0
+	jalr  ra, 0(ra)
+
+	REG_L x1,  PT_RA(sp)
+	REG_L x3,  PT_GP(sp)
+	REG_L x4,  PT_TP(sp)
+	REG_L x5,  PT_T0(sp)
+	REG_L x6,  PT_T1(sp)
+	REG_L x7,  PT_T2(sp)
+	REG_L x8,  PT_S0(sp)
+	REG_L x9,  PT_S1(sp)
+	REG_L x10, PT_A0(sp)
+	REG_L x11, PT_A1(sp)
+	REG_L x12, PT_A2(sp)
+	REG_L x13, PT_A3(sp)
+	REG_L x14, PT_A4(sp)
+	REG_L x15, PT_A5(sp)
+	REG_L x16, PT_A6(sp)
+	REG_L x17, PT_A7(sp)
+	REG_L x18, PT_S2(sp)
+	REG_L x19, PT_S3(sp)
+	REG_L x20, PT_S4(sp)
+	REG_L x21, PT_S5(sp)
+	REG_L x22, PT_S6(sp)
+	REG_L x23, PT_S7(sp)
+	REG_L x24, PT_S8(sp)
+	REG_L x25, PT_S9(sp)
+	REG_L x26, PT_S10(sp)
+	REG_L x27, PT_S11(sp)
+	REG_L x28, PT_T3(sp)
+	REG_L x29, PT_T4(sp)
+	REG_L x30, PT_T5(sp)
+	REG_L x31, PT_T6(sp)
+	REG_L x2,  PT_SP(sp)
+	addi  sp, sp, (PT_SIZE_ON_STACK)
+
+SYM_ENTRY(optprobe_template_insn, SYM_L_GLOBAL, SYM_A_NONE)
+	/*
+	 * Step3:
+	 * NOPS will be replaced by the probed instruction, at worst case 3 RVC
+	 * and 1 RVI instructions is about to execute out of line.
+	 */
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	j 2f
+
+SYM_ENTRY(optprobe_template_return, SYM_L_GLOBAL, SYM_A_NONE)
+	/*
+	 * Step4:
+	 * Filled with the return address of long jump(AUIPC/JALR)
+	 */
+	.dword 0
+2:
+	/*
+	 * Step5:
+	 * The <RA> of AUIPC/LD/JALR will be replaced for each kprobe,
+	 * used to read return address saved in .dword above.
+	 */
+	auipc ra, 0
+	REG_L ra, -8(ra)
+	jalr  x0, 0(ra)
 SYM_ENTRY(optprobe_template_end, SYM_L_GLOBAL, SYM_A_NONE)
-- 
2.25.1


_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

  parent reply	other threads:[~2022-10-30  9:02 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-10-30  9:01 [PATCH v3 0/8] Add OPTPROBES feature on RISCV Chen Guokai
2022-10-30  9:01 ` [PATCH 1/8] riscv/kprobe: Prepare the skeleton to implement RISCV OPTPROBES feature Chen Guokai
2022-10-31 19:42   ` Conor Dooley
2022-11-01 11:07     ` liaochang (A)
2022-11-01 23:30       ` Conor Dooley
2022-11-03  1:23         ` liaochang (A)
2022-10-30  9:01 ` [PATCH 2/8] riscv/kprobe: Allocate detour buffer from module area Chen Guokai
2022-10-30  9:01 ` [PATCH 3/8] riscv/kprobe: Prepare the skeleton to prepare optimized kprobe Chen Guokai
2022-10-30  9:01 ` [PATCH 4/8] riscv/kprobe: Add common RVI and RVC instruction decoder code Chen Guokai
2022-10-30  9:01 ` [PATCH 5/8] riscv/kprobe: Search free register(s) to clobber for 'AUIPC/JALR' Chen Guokai
2022-10-30  9:01 ` [PATCH 6/8] riscv/kprobe: Add code to check if kprobe can be optimized Chen Guokai
2022-10-30  9:01 ` Chen Guokai [this message]
2022-10-30  9:01 ` [PATCH 8/8] riscv/kprobe: Patch AUIPC/JALR pair to optimize kprobe Chen Guokai

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20221030090141.2550837-8-chenguokai17@mails.ucas.ac.cn \
    --to=chenguokai17@mails.ucas.ac.cn \
    --cc=aou@eecs.berkeley.edu \
    --cc=liaochang1@huawei.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-riscv@lists.infradead.org \
    --cc=mingo@redhat.com \
    --cc=palmer@dabbelt.com \
    --cc=paul.walmsley@sifive.com \
    --cc=rostedt@goodmis.org \
    --cc=sfr@canb.auug.org.au \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).