linux-riscv.lists.infradead.org archive mirror
 help / color / mirror / Atom feed
From: Chen Guokai <chenguokai17@mails.ucas.ac.cn>
To: paul.walmsley@sifive.com, palmer@dabbelt.com,
	aou@eecs.berkeley.edu, rostedt@goodmis.org, mingo@redhat.com,
	sfr@canb.auug.org.au
Cc: linux-riscv@lists.infradead.org, linux-kernel@vger.kernel.org,
	liaochang1@huawei.com,
	Chen Guokai <chenguokai17@mails.ucas.ac.cn>
Subject: [PATCH v6 05/13] riscv/kprobe: Introduce free register(s) searching algorithm
Date: Fri, 27 Jan 2023 21:05:33 +0800	[thread overview]
Message-ID: <20230127130541.1250865-6-chenguokai17@mails.ucas.ac.cn> (raw)
In-Reply-To: <20230127130541.1250865-1-chenguokai17@mails.ucas.ac.cn>

From: Liao Chang <liaochang1@huawei.com>

To do jump optimization, it needs to clobber two integer GPRs, the first
one is used to form AUIPC/JALR jumping to detour buffer, the second one
is used to form JR in detour buffer. Since kprobe can be installed
anywhere of kernel/module text, hence the register being clobbered needs
to be chosen carefully to avoid changing the original logic.

The algorithm for finding free register is inspired by the register
renaming in modern processors. From the perspective of register renaming,
a register could be represented as two different registers if two neighbor
instructions both write to it but no one ever reads it. Extending this
fact a register is considered to be free if it has never been read since
the first write on it in the execution flow.

Let's use the example below to explain how the algorithm work. Given
kernel is RVI and RCV hybrid binary, and one kprobe is instrumented at
the entry of function idle_dummy().

Before			Optimized		Detour buffer
<idle_dummy>:					...
 #1 add  sp,sp,-16	auipc a0, #?		add  sp,sp,-16
 #2 sd   s0,8(sp)				sd   s0,8(sp)
 #3 addi s0,sp,16	jalr  a0, #?(a0)	addi s0,sp,16
 #4 ld   s0,8(sp)				ld   s0,8(sp)
 #5 li   a0,0		li   a0,0		auipc a0, #?
 #6 addi sp,sp,16	addi sp,sp,16		jr    x0, #?(a0)
 #7 ret			ret

To optimize kprobe, it used to patch the first 8 bytes with AUIPC/JALR,
because from #1 to #7, a0 is the only register that satisfies condition:

 - Never been read before write
 - Never been updated in detour buffer

So a0 will be chosen to form AUIPC/JALR and JR.

Signed-off-by: Liao Chang <liaochang1@huawei.com>
Co-developed-by: Chen Guokai <chenguokai17@mails.ucas.ac.cn>
Signed-off-by: Chen Guokai <chenguokai17@mails.ucas.ac.cn>
---
 arch/riscv/kernel/probes/opt.c | 221 +++++++++++++++++++++++++++++++++
 1 file changed, 221 insertions(+)

diff --git a/arch/riscv/kernel/probes/opt.c b/arch/riscv/kernel/probes/opt.c
index c03cdb1512a6..d38ed1a52c93 100644
--- a/arch/riscv/kernel/probes/opt.c
+++ b/arch/riscv/kernel/probes/opt.c
@@ -12,6 +12,9 @@
 #include <asm/kprobes.h>
 #include <asm/patch.h>
 
+#include "simulate-insn.h"
+#include "decode-insn.h"
+
 static int in_auipc_jalr_range(long val)
 {
 #ifdef CONFIG_ARCH_RV32I
@@ -37,15 +40,233 @@ static void prepare_detour_buffer(kprobe_opcode_t *code, kprobe_opcode_t *slot,
 {
 }
 
+/* Registers the first usage of which is the destination of instruction */
+#define WRITE_ON(reg)	\
+	(*write |= (((*read >> (reg)) ^ 1UL) & 1) << (reg))
+/* Registers the first usage of which is the source of instruction */
+#define READ_ON(reg)	\
+	(*read |= (((*write >> (reg)) ^ 1UL) & 1) << (reg))
+
 /*
  * In RISC-V ISA, AUIPC/JALR clobber one register to form target address,
  * inspired by register renaming in OoO processor, this involves search
  * backward that is not previously used as a source register and is used
  * as a destination register before any branch or jump instruction.
  */
+static void find_register(unsigned long start, unsigned long end,
+			       unsigned long *write, unsigned long *read)
+{
+	kprobe_opcode_t insn;
+	unsigned long addr, offset = 0UL;
+
+	for (addr = start; addr < end; addr += offset) {
+		insn = *(kprobe_opcode_t *)addr;
+		offset = GET_INSN_LENGTH(insn);
+
+#ifdef CONFIG_RISCV_ISA_C
+		if (offset == RVI_INSN_LEN)
+			goto is_rvi;
+
+		insn &= __COMPRESSED_INSN_MASK;
+		/* Stop searching until any control transfer instruction */
+		if (riscv_insn_is_c_ebreak(insn) || riscv_insn_is_c_j(insn))
+			break;
+
+		if (riscv_insn_is_c_jal(insn)) {
+			/* The rd of C.JAL is x1 by default */
+			WRITE_ON(1);
+			break;
+		}
+
+		if (riscv_insn_is_c_jr(insn)) {
+			READ_ON(rvc_r_rs1(insn));
+			break;
+		}
+
+		if (riscv_insn_is_c_jalr(insn)) {
+			READ_ON(rvc_r_rs1(insn));
+			/* The rd of C.JALR is x1 by default */
+			WRITE_ON(1);
+			break;
+		}
+
+		if (riscv_insn_is_c_beqz(insn) || riscv_insn_is_c_bnez(insn)) {
+			READ_ON(rvc_b_rs(insn));
+			break;
+		}
+
+		/*
+		 * Decode RVC instructions to find out some destination
+		 * registers never be used as a source register.
+		 */
+		if (riscv_insn_is_c_sub(insn) || riscv_insn_is_c_subw(insn)) {
+			READ_ON(rvc_a_rs1(insn));
+			READ_ON(rvc_a_rs2(insn));
+			continue;
+		} else if (riscv_insn_is_c_sq(insn) ||
+			   riscv_insn_is_c_sw(insn) ||
+			   riscv_insn_is_c_sd(insn)) {
+			READ_ON(rvc_s_rs1(insn));
+			READ_ON(rvc_s_rs2(insn));
+			continue;
+		} else if (riscv_insn_is_c_addi16sp(insn) ||
+			   riscv_insn_is_c_addi(insn) ||
+			   riscv_insn_is_c_addiw(insn) ||
+			   riscv_insn_is_c_slli(insn)) {
+			READ_ON(rvc_i_rs1(insn));
+			continue;
+		} else if (riscv_insn_is_c_sri(insn) ||
+			   riscv_insn_is_c_andi(insn)) {
+			READ_ON(rvc_b_rs(insn));
+			continue;
+		} else if (riscv_insn_is_c_sqsp(insn) ||
+			   riscv_insn_is_c_swsp(insn) ||
+			   riscv_insn_is_c_sdsp(insn)) {
+			READ_ON(rvc_ss_rs2(insn));
+			/* The rs2 of C.SQSP/SWSP/SDSP are x2 by default */
+			READ_ON(2);
+			continue;
+		} else if (riscv_insn_is_c_mv(insn)) {
+			READ_ON(rvc_r_rs2(insn));
+			WRITE_ON(rvc_r_rd(insn));
+		} else if (riscv_insn_is_c_addi4spn(insn)) {
+			/* The rs of C.ADDI4SPN is x2 by default */
+			READ_ON(2);
+			WRITE_ON(rvc_l_rd(insn));
+		} else if (riscv_insn_is_c_lq(insn) ||
+			   riscv_insn_is_c_lw(insn) ||
+			   riscv_insn_is_c_ld(insn)) {
+			/* FIXME: c.lw/c.ld share opcode with c.flw/c.fld */
+			READ_ON(rvc_l_rs(insn));
+			WRITE_ON(rvc_l_rd(insn));
+		} else if (riscv_insn_is_c_lqsp(insn) ||
+			   riscv_insn_is_c_lwsp(insn) ||
+			   riscv_insn_is_c_ldsp(insn)) {
+			/*
+			 * FIXME: c.lwsp/c.ldsp share opcode with c.flwsp/c.fldsp
+			 * The rs of C.LQSP/C.LWSP/C.LDSP is x2 by default.
+			 */
+			READ_ON(2);
+			WRITE_ON(rvc_i_rd(insn));
+		} else if (riscv_insn_is_c_li(insn) ||
+			   riscv_insn_is_c_lui(insn)) {
+			WRITE_ON(rvc_i_rd(insn));
+		}
+
+		if ((*write > 1UL) && __builtin_ctzl(*write & ~1UL))
+			return;
+is_rvi:
+#endif
+		/* Stop searching until any control transfer instruction */
+		if (riscv_insn_is_branch(insn)) {
+			READ_ON(rvi_rs1(insn));
+			READ_ON(rvi_rs2(insn));
+			break;
+		}
+
+		if (riscv_insn_is_jal(insn)) {
+			WRITE_ON(rvi_rd(insn));
+			break;
+		}
+
+		if (riscv_insn_is_jalr(insn)) {
+			READ_ON(rvi_rs1(insn));
+			WRITE_ON(rvi_rd(insn));
+			break;
+		}
+
+		if (riscv_insn_is_system(insn)) {
+			/* csrrw, csrrs, csrrc */
+			if (rvi_rs1(insn))
+				READ_ON(rvi_rs1(insn));
+			/* csrrwi, csrrsi, csrrci, csrrw, csrrs, csrrc */
+			if (rvi_rd(insn))
+				WRITE_ON(rvi_rd(insn));
+			break;
+		}
+
+		/*
+		 * Decode RVI instructions to find out some destination
+		 * registers never be used as a source register.
+		 */
+		if (riscv_insn_is_lui(insn) || riscv_insn_is_auipc(insn)) {
+			WRITE_ON(rvi_rd(insn));
+		} else if (riscv_insn_is_arith_ri(insn) ||
+			   riscv_insn_is_load(insn)) {
+			READ_ON(rvi_rs1(insn));
+			WRITE_ON(rvi_rd(insn));
+		} else if (riscv_insn_is_arith_rr(insn) ||
+			   riscv_insn_is_store(insn) ||
+			   riscv_insn_is_amo(insn)) {
+			READ_ON(rvi_rs1(insn));
+			READ_ON(rvi_rs2(insn));
+			WRITE_ON(rvi_rd(insn));
+		}
+
+		if ((*write > 1UL) && __builtin_ctzl(*write & ~1UL))
+			return;
+	}
+}
+
 static void find_free_registers(struct kprobe *kp, struct optimized_kprobe *op,
 				int *rd, int *ra)
 {
+	unsigned long start, end;
+	/*
+	 * Searching algorithm explanation:
+	 *
+	 * 1. Define two types of instruction areas firstly:
+	 *
+	 * +-----+
+	 * +     +
+	 * +     + ---> instructions modified by optprobe, named 'O-Area'.
+	 * +     +
+	 * +-----+
+	 * +     +
+	 * +     + ---> instructions after optprobe, named 'K-Area'.
+	 * +     +
+	 * +  ~  +
+	 *
+	 * 2. There are two usages for each GPR in the given instruction area.
+	 *
+	 *   - W: GPR is used as the RD oprand at first emergence.
+	 *   - R: GPR is used as the RS oprand at first emergence.
+	 *
+	 * Then there are 4 different usages for each GPR total:
+	 *
+	 *   1. Used as W in O-Area, Used as W in K-Area.
+	 *   2. Used as W in O-Area, Used as R in K-Area.
+	 *   3. Used as R in O-Area, Used as W in K-Area.
+	 *   4. Used as R in O-Area, Used as R in K-Area.
+	 *
+	 * All registers satisfy #1 or #3 could be chosen to form 'AUIPC/JALR'
+	 * jumping to detour buffer.
+	 *
+	 * All registers satisfy #1 or #2, could be chosen to form 'JR' jumping
+	 * back from detour buffer.
+	 */
+	unsigned long kw = 0UL, kr = 0UL, ow = 0UL, or = 0UL;
+
+	/* Search one free register used to form AUIPC/JALR */
+	start = (unsigned long)&kp->opcode;
+	end = start + GET_INSN_LENGTH(kp->opcode);
+	find_register(start, end, &ow, &or);
+
+	start = (unsigned long)kp->addr + GET_INSN_LENGTH(kp->opcode);
+	end = (unsigned long)kp->addr + op->optinsn.length;
+	find_register(start, end, &ow, &or);
+
+	/* Search one free register used to form JR */
+	find_register(end, (unsigned long)_end, &kw, &kr);
+
+	if ((kw & ow) > 1UL) {
+		*rd = __builtin_ctzl((kw & ow) & ~1UL);
+		*ra = *rd;
+		return;
+	}
+
+	*rd = ((kw | ow) == 1UL) ? 0 : __builtin_ctzl((kw | ow) & ~1UL);
+	*ra = (kw == 1UL) ? 0 : __builtin_ctzl(kw & ~1UL);
 }
 
 /*
-- 
2.34.1


_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

  parent reply	other threads:[~2023-01-27 13:45 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-01-27 13:05 [PATCH v6 00/13] Add OPTPROBES feature on RISCV Chen Guokai
2023-01-27 13:05 ` [PATCH v6 01/13] riscv/kprobe: Prepare the skeleton to implement RISCV OPTPROBES Chen Guokai
2023-01-27 13:05 ` [PATCH v6 02/13] riscv/kprobe: Allocate detour buffer from module region Chen Guokai
2023-01-27 13:05 ` [PATCH v6 03/13] riscv/kprobe: Add skeleton for preparing optimized kprobe Chen Guokai
2023-01-27 13:05 ` [PATCH v6 04/13] riscv/kprobe: Add common RVI and RVC instruction decoder code Chen Guokai
2023-02-01 13:29   ` Björn Töpel
2023-02-02 10:16   ` Conor Dooley
2023-01-27 13:05 ` Chen Guokai [this message]
2023-01-27 13:05 ` [PATCH v6 06/13] riscv/kprobe: Add code to check if kprobe can be optimized Chen Guokai
2023-02-01 13:30   ` Björn Töpel
2023-01-27 13:05 ` [PATCH v6 07/13] riscv/kprobe: Prepare detour buffer for optimized kprobe Chen Guokai
2023-02-01 13:30   ` Björn Töpel
2023-01-27 13:05 ` [PATCH v6 08/13] riscv/kprobe: Patch AUIPC/JALR pair to optimize kprobe Chen Guokai
2023-02-01 13:31   ` Björn Töpel
2023-01-27 13:05 ` [PATCH v6 09/13] riscv/kprobe: Search free registers from unused caller-saved ones Chen Guokai
2023-02-01 13:31   ` Björn Töpel
2023-02-02  9:08   ` Conor Dooley
2023-01-27 13:05 ` [PATCH v6 10/13] riscv/kprobe: Add instruction boundary check for RVI/RVC hybrid kernel Chen Guokai
2023-01-27 13:05 ` [PATCH v6 11/13] riscv/kprobe: Fix instruction simulation of JALR Chen Guokai
2023-01-31 12:51   ` Björn Töpel
2023-01-27 13:05 ` [PATCH v6 12/13] riscv/kprobe: Move exception related symbols to .kprobe_blacklist Chen Guokai
2023-02-01 13:30   ` Björn Töpel
2023-01-27 13:05 ` [PATCH v6 13/13] selftest/kprobes: Add testcase for kprobe SYM[+offs] Chen Guokai
2023-01-30 12:31 ` [PATCH v6 00/13] Add OPTPROBES feature on RISCV Björn Töpel
2023-01-30 14:38   ` Xim
2023-04-26 18:01     ` Palmer Dabbelt
2023-02-01 13:29 ` Björn Töpel

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230127130541.1250865-6-chenguokai17@mails.ucas.ac.cn \
    --to=chenguokai17@mails.ucas.ac.cn \
    --cc=aou@eecs.berkeley.edu \
    --cc=liaochang1@huawei.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-riscv@lists.infradead.org \
    --cc=mingo@redhat.com \
    --cc=palmer@dabbelt.com \
    --cc=paul.walmsley@sifive.com \
    --cc=rostedt@goodmis.org \
    --cc=sfr@canb.auug.org.au \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).