linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC PATCH v3 1/3] arch/powerpc : Add detour buffer support for optprobes
       [not found] <1464692191-1167-1-git-send-email-anju@linux.vnet.ibm.com>
@ 2016-05-31 10:56 ` Anju T
  2016-05-31 10:56 ` [RFC PATCH v3 2/3] arch/powerpc : optprobes for powerpc core Anju T
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 4+ messages in thread
From: Anju T @ 2016-05-31 10:56 UTC (permalink / raw)
  To: linux-kernel, linuxppc-dev
  Cc: anju, ananth, naveen.n.rao, paulus, srikar, benh, mpe, hemant,
	mahesh, mhiramat, anjutsudhakar

Detour buffer contains instructions to create an in memory pt_regs.
After the execution of prehandler a call is made for instruction emulation.
The NIP is decided after the probed instruction is executed. Hence a branch
instruction is created to the NIP returned by emulate_step().

Instruction slot for detour buffer is allocated from
the reserved area. For the time being 64KB is reserved
in memory for this purpose.

Signed-off-by: Anju T <anju@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/kprobes.h   |  27 +++++++
 arch/powerpc/kernel/optprobes_head.S | 136 +++++++++++++++++++++++++++++++++++
 2 files changed, 163 insertions(+)
 create mode 100644 arch/powerpc/kernel/optprobes_head.S

diff --git a/arch/powerpc/include/asm/kprobes.h b/arch/powerpc/include/asm/kprobes.h
index 039b583..1cb2527 100644
--- a/arch/powerpc/include/asm/kprobes.h
+++ b/arch/powerpc/include/asm/kprobes.h
@@ -38,7 +38,27 @@ struct pt_regs;
 struct kprobe;
 
 typedef ppc_opcode_t kprobe_opcode_t;
+
+extern kprobe_opcode_t optinsn_slot;
+/* Optinsn template address */
+extern kprobe_opcode_t optprobe_template_entry[];
+extern kprobe_opcode_t optprobe_template_call_handler[];
+extern kprobe_opcode_t optprobe_template_call_emulate[];
+extern kprobe_opcode_t optprobe_template_ret_branch[];
+extern kprobe_opcode_t optprobe_template_ret[];
+extern kprobe_opcode_t optprobe_template_insn[];
+extern kprobe_opcode_t optprobe_template_kp_addr[];
+extern kprobe_opcode_t optprobe_template_op_address1[];
+extern kprobe_opcode_t optprobe_template_op_address2[];
+extern kprobe_opcode_t optprobe_template_end[];
+
 #define MAX_INSN_SIZE 1
+#define MAX_OPTIMIZED_LENGTH    4
+#define	MAX_OPTINSN_SIZE				\
+	(((unsigned long)&optprobe_template_end -	\
+	(unsigned long)&optprobe_template_entry) /	\
+	sizeof(kprobe_opcode_t))
+#define RELATIVEJUMP_SIZE       4
 
 #ifdef CONFIG_PPC64
 #if defined(_CALL_ELF) && _CALL_ELF == 2
@@ -129,5 +149,12 @@ struct kprobe_ctlblk {
 extern int kprobe_exceptions_notify(struct notifier_block *self,
 					unsigned long val, void *data);
 extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
+
+struct arch_optimized_insn {
+	kprobe_opcode_t copied_insn[1];
+	/* detour buffer */
+	kprobe_opcode_t *insn;
+};
+
 #endif /* __KERNEL__ */
 #endif	/* _ASM_POWERPC_KPROBES_H */
diff --git a/arch/powerpc/kernel/optprobes_head.S b/arch/powerpc/kernel/optprobes_head.S
new file mode 100644
index 0000000..b2536bc
--- /dev/null
+++ b/arch/powerpc/kernel/optprobes_head.S
@@ -0,0 +1,136 @@
+/*
+ * Code to prepare detour buffer for optprobes in Kernel.
+ *
+ * Copyright 2016, Anju T, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <asm/ppc_asm.h>
+#include <asm/ptrace.h>
+#include <asm/asm-offsets.h>
+
+#define	OPT_SLOT_SIZE	65536
+
+.align	2
+.global optinsn_slot
+optinsn_slot:
+	/* Reserve an area to allocate slots for detour buffer */
+	.space	OPT_SLOT_SIZE
+
+/* Create an in-memory pt_regs */
+.global optprobe_template_entry
+optprobe_template_entry:
+	stdu	r1,-INT_FRAME_SIZE(r1)
+	SAVE_GPR(0,r1)
+	/* Save the previous SP into stack */
+	addi	r0,r1,INT_FRAME_SIZE
+	std	0,GPR1(r1)
+	SAVE_2GPRS(2,r1)
+	SAVE_8GPRS(4,r1)
+	SAVE_10GPRS(12,r1)
+	SAVE_10GPRS(22,r1)
+	/* Save SPRS */
+	mfmsr	r5
+	std	r5,_MSR(r1)
+	li	r5,0
+	std	r5,ORIG_GPR3(r1)
+	mfctr	r5
+	std	r5,_CTR(r1)
+	mflr	r5
+	std	r5,_LINK(r1)
+	mfspr	r5,SPRN_XER
+	std	r5,_XER(r1)
+	mfcr	r5
+	std	r5,_CCR(r1)
+	lbz     r5,PACASOFTIRQEN(r13)
+	std     r5,SOFTE(r1)
+	li	r5,0
+	std	r5,_TRAP(r1)
+	mfdar	r5
+	std	r5,_DAR(r1)
+	mfdsisr	r5
+	std	r5,_DSISR(r1)
+	li	r5,0
+	std	r5,RESULT(r1)
+
+/* Save p->addr into stack */
+.global optprobe_template_kp_addr
+optprobe_template_kp_addr:
+	nop
+	nop
+	nop
+	nop
+	nop
+	std	r3,_NIP(r1)
+
+/* Pass parameters for optimized_callback */
+.global optprobe_template_op_address1
+optprobe_template_op_address1:
+	nop
+	nop
+	nop
+	nop
+	nop
+	addi	r4,r1,STACK_FRAME_OVERHEAD
+
+/* Branch to the prehandler */
+.global optprobe_template_call_handler
+optprobe_template_call_handler:
+	nop
+	/* Pass parameters for instruction emulation */
+	addi	r3,r1,STACK_FRAME_OVERHEAD
+.global optprobe_template_insn
+optprobe_template_insn:
+	nop
+	nop
+
+/* Branch to instruction emulation  */
+.global optprobe_template_call_emulate
+optprobe_template_call_emulate:
+	nop
+.global optprobe_template_op_address2
+optprobe_template_op_address2:
+	nop
+	nop
+	nop
+	nop
+	nop
+	addi	r4,r1,STACK_FRAME_OVERHEAD
+
+/* Branch to create_return_branch() function */
+.global optprobe_template_ret_branch
+optprobe_template_ret_branch:
+	nop
+	/* Restore the registers */
+	ld	r5,_MSR(r1)
+	mtmsr	r5
+	ld	r5,_CTR(r1)
+	mtctr	r5
+	ld	r5,_LINK(r1)
+	mtlr	r5
+	ld	r5,_XER(r1)
+	mtxer	r5
+	ld	r5,_CCR(r1)
+	mtcr	r5
+	ld	r5,_DAR(r1)
+	mtdar	r5
+	ld	r5,_DSISR(r1)
+	mtdsisr	r5
+	REST_GPR(0,r1)
+	REST_2GPRS(2,r1)
+	REST_8GPRS(4,r1)
+	REST_10GPRS(12,r1)
+	REST_10GPRS(22,r1)
+	/* Restore the previous SP */
+	addi	r1,r1,INT_FRAME_SIZE
+
+/* Jump back to the normal workflow from trampoline */
+.global optprobe_template_ret
+optprobe_template_ret:
+	nop
+.global optprobe_template_end
+optprobe_template_end:
-- 
2.1.0

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [RFC PATCH v3 2/3] arch/powerpc : optprobes for powerpc core
       [not found] <1464692191-1167-1-git-send-email-anju@linux.vnet.ibm.com>
  2016-05-31 10:56 ` [RFC PATCH v3 1/3] arch/powerpc : Add detour buffer support for optprobes Anju T
@ 2016-05-31 10:56 ` Anju T
  2016-05-31 10:56 ` [RFC PATCH v4 3/3] arch/powerpc : Enable optprobes support in powerpc Anju T
       [not found] ` <201605311058.u4VAsdah009164@mx0a-001b2d01.pphosted.com>
  3 siblings, 0 replies; 4+ messages in thread
From: Anju T @ 2016-05-31 10:56 UTC (permalink / raw)
  To: linux-kernel, linuxppc-dev
  Cc: anju, ananth, naveen.n.rao, paulus, srikar, benh, mpe, hemant,
	mahesh, mhiramat, anjutsudhakar

Instructions which can be emulated are suppliants for
optimization. Before optimization ensure that the address range
between the detour buffer allocated and the instruction being probed
is within +/- 32MB.

Signed-off-by: Anju T <anju@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/optprobes.c | 351 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 351 insertions(+)
 create mode 100644 arch/powerpc/kernel/optprobes.c

diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c
new file mode 100644
index 0000000..c4253b6
--- /dev/null
+++ b/arch/powerpc/kernel/optprobes.c
@@ -0,0 +1,351 @@
+/*
+ * Code for Kernel probes Jump optimization.
+ *
+ * Copyright 2016, Anju T, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kprobes.h>
+#include <linux/jump_label.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <asm/kprobes.h>
+#include <asm/ptrace.h>
+#include <asm/cacheflush.h>
+#include <asm/code-patching.h>
+#include <asm/sstep.h>
+
+DEFINE_INSN_CACHE_OPS(ppc_optinsn)
+
+#define TMPL_CALL_HDLR_IDX	\
+	(optprobe_template_call_handler - optprobe_template_entry)
+#define TMPL_EMULATE_IDX	\
+	(optprobe_template_call_emulate - optprobe_template_entry)
+#define TMPL_RET_BRANCH_IDX	\
+	(optprobe_template_ret_branch - optprobe_template_entry)
+#define TMPL_RET_IDX	\
+	(optprobe_template_ret - optprobe_template_entry)
+#define TMPL_KP_IDX	\
+	(optprobe_template_kp_addr - optprobe_template_entry)
+#define TMPL_OP1_IDX	\
+	(optprobe_template_op_address1 - optprobe_template_entry)
+#define TMPL_OP2_IDX	\
+	(optprobe_template_op_address2 - optprobe_template_entry)
+#define TMPL_INSN_IDX	\
+	(optprobe_template_insn - optprobe_template_entry)
+#define TMPL_END_IDX	\
+	(optprobe_template_end - optprobe_template_entry)
+
+static unsigned long val_nip;
+
+static void *__ppc_alloc_insn_page(void)
+{
+	return &optinsn_slot;
+}
+
+static void *__ppc_free_insn_page(void *page __maybe_unused)
+{
+	return;
+}
+
+struct kprobe_insn_cache kprobe_ppc_optinsn_slots = {
+	.mutex = __MUTEX_INITIALIZER(kprobe_ppc_optinsn_slots.mutex),
+	.pages = LIST_HEAD_INIT(kprobe_ppc_optinsn_slots.pages),
+	/* insn_size initialized later */
+	.alloc = __ppc_alloc_insn_page,
+	.free = __ppc_free_insn_page,
+	.nr_garbage = 0,
+};
+
+kprobe_opcode_t *ppc_get_optinsn_slot(struct optimized_kprobe *op)
+{
+	/*
+	 * The insn slot is allocated from the reserved
+	 * area(ie &optinsn_slot).We are not optimizing probes
+	 * at module_addr now.
+	 */
+	kprobe_opcode_t *slot = NULL;
+
+	if (is_kernel_addr(op->kp.addr))
+		slot = get_ppc_optinsn_slot();
+	return slot;
+}
+
+static void ppc_free_optinsn_slot(struct optimized_kprobe *op)
+{
+	if (!op->optinsn.insn)
+		return;
+	if (is_kernel_addr((unsigned long)op->kp.addr))
+		free_ppc_optinsn_slot(op->optinsn.insn, 0);
+}
+
+static void
+__arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty)
+{
+	ppc_free_optinsn_slot(op);
+	op->optinsn.insn = NULL;
+}
+
+static int can_optimize(struct kprobe *p)
+{
+	struct pt_regs *regs;
+	unsigned int instr;
+	int r;
+
+	/*
+	 * Not optimizing the kprobe placed by
+	 * kretprobe during boot time
+	 */
+	if ((kprobe_opcode_t)p->addr == (kprobe_opcode_t)&kretprobe_trampoline)
+		return 0;
+
+	regs = kmalloc(sizeof(*regs), GFP_KERNEL);
+	if (!regs)
+		return -ENOMEM;
+	memset(regs, 0, sizeof(struct pt_regs));
+	memcpy(regs, current_pt_regs(), sizeof(struct pt_regs));
+	regs->nip = p->addr;
+	instr = *(p->ainsn.insn);
+
+	/* Ensure the instruction can be emulated*/
+	r = emulate_step(regs, instr);
+	val_nip = regs->nip;
+	if (r != 1)
+		return 0;
+
+	return 1;
+}
+
+static void
+create_return_branch(struct optimized_kprobe *op, struct pt_regs *regs)
+{
+	/*
+	 * Create a branch back to the return address
+	 * after the probed instruction is emulated
+	 */
+
+	kprobe_opcode_t branch, *buff;
+	unsigned long ret;
+
+	ret = regs->nip;
+	buff = op->optinsn.insn;
+	/*
+	 * TODO: For conditional branch instructions, the return
+	 * address may differ in SMP systems.This has to be addressed.
+	 */
+
+	branch = create_branch((unsigned int *)buff + TMPL_RET_IDX,
+			       (unsigned long)ret, 0);
+	buff[TMPL_RET_IDX] = branch;
+	isync();
+}
+
+static void
+optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
+{
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+	unsigned long flags;
+
+	local_irq_save(flags);
+
+	if (kprobe_running())
+		kprobes_inc_nmissed_count(&op->kp);
+	else {
+		__this_cpu_write(current_kprobe, &op->kp);
+		kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+		opt_pre_handler(&op->kp, regs);
+		__this_cpu_write(current_kprobe, NULL);
+	}
+	local_irq_restore(flags);
+}
+NOKPROBE_SYMBOL(optimized_callback);
+
+void arch_remove_optimized_kprobe(struct optimized_kprobe *op)
+{
+	 __arch_remove_optimized_kprobe(op, 1);
+}
+
+void  create_insn(unsigned int insn, kprobe_opcode_t *addr)
+{
+	u32 instr, instr2;
+
+	/*
+	 * emulate_step() requires insn to be emulated as
+	 * second parameter. Hence r4 should be loaded
+	 * with 'insn'.
+	 * synthesize addis r4,0,(insn)@h
+	 */
+	instr = 0x3c000000 | 0x800000 | ((insn >> 16) & 0xffff);
+	*addr++ = instr;
+
+	/* ori r4,r4,(insn)@l */
+	instr2 = 0x60000000 | 0x40000 | 0x800000;
+	instr2 = instr2 | (insn & 0xffff);
+	*addr = instr2;
+}
+
+void create_load_address_insn(unsigned long val, kprobe_opcode_t *addr)
+{
+	u32 instr1, instr2, instr3, instr4, instr5;
+	/*
+	 * Optimized_kprobe structure is required as a parameter
+	 * for invoking optimized_callback() and create_return_branch()
+	 * from detour buffer. Hence need to have a 64bit immediate
+	 * load into r3.
+	 *
+	 * lis r3,(op)@highest
+	 */
+	instr1 = 0x3c000000 | 0x600000 | ((val >> 48) & 0xffff);
+	*addr++ = instr1;
+
+	/* ori r3,r3,(op)@higher */
+	instr2 = 0x60000000 | 0x30000 | 0x600000 | ((val >> 32) & 0xffff);
+	*addr++ = instr2;
+
+	/* rldicr r3,r3,32,31 */
+	instr3 = 0x78000004 | 0x30000 | 0x600000 | ((32 & 0x1f) << 11);
+	instr3 = instr3 | ((31 & 0x1f) << 6) | ((32 & 0x20) >> 4);
+	*addr++ = instr3;
+
+	/* oris r3,r3,(op)@h */
+	instr4 = 0x64000000 |  0x30000 | 0x600000 | ((val >> 16) & 0xffff);
+	*addr++ = instr4;
+
+	/* ori r3,r3,(op)@l */
+	instr5 =  0x60000000 | 0x30000 | 0x600000 | (val & 0xffff);
+	*addr = instr5;
+}
+
+int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p)
+{
+	kprobe_opcode_t *buff, branch, branch2, branch3;
+	long rel_chk, ret_chk;
+
+	kprobe_ppc_optinsn_slots.insn_size = MAX_OPTINSN_SIZE;
+	op->optinsn.insn = NULL;
+
+	if (!can_optimize(p))
+		return -EILSEQ;
+
+	/* Allocate instruction slot for detour buffer*/
+	buff = ppc_get_optinsn_slot(op);
+	if (!buff)
+		return -ENOMEM;
+
+	/*
+	 * OPTPROBE use a 'b' instruction to branch to optinsn.insn.
+	 *
+	 * The target address has to be relatively nearby, to permit use
+	 * of branch instruction in powerpc because the address is specified
+	 * in an immediate field in the instruction opcode itself, ie 24 bits
+	 * in the opcode specify the address. Therefore the address gap should
+	 * be 32MB on either side of the current instruction.
+	 */
+	rel_chk = (long)buff - (unsigned long)p->addr;
+	if (rel_chk < -0x2000000 || rel_chk > 0x1fffffc || rel_chk & 0x3) {
+		ppc_free_optinsn_slot(op);
+		return -ERANGE;
+	}
+	/* Check the return address is also within 32MB range */
+	ret_chk = (long)(buff + TMPL_RET_IDX) - (unsigned long)val_nip;
+	if (ret_chk < -0x2000000 || ret_chk > 0x1fffffc || ret_chk & 0x3) {
+		ppc_free_optinsn_slot(op);
+		return -ERANGE;
+	}
+
+	/* Do Copy arch specific instance from template*/
+	memcpy(buff, optprobe_template_entry,
+	       TMPL_END_IDX * sizeof(kprobe_opcode_t));
+	create_load_address_insn((unsigned long)p->addr, buff + TMPL_KP_IDX);
+	create_load_address_insn((unsigned long)op, buff + TMPL_OP1_IDX);
+	create_load_address_insn((unsigned long)op, buff + TMPL_OP2_IDX);
+
+	/* Create a branch to the optimized_callback function */
+	branch = create_branch((unsigned int *)buff + TMPL_CALL_HDLR_IDX,
+			       (unsigned long)optimized_callback + 8,
+				BRANCH_SET_LINK);
+
+	/* Place the branch instr into the trampoline */
+	buff[TMPL_CALL_HDLR_IDX] = branch;
+	create_insn(*(p->ainsn.insn), buff + TMPL_INSN_IDX);
+
+	/*Create a branch instruction into the emulate_step*/
+	branch3 = create_branch((unsigned int *)buff + TMPL_EMULATE_IDX,
+				(unsigned long)emulate_step + 8,
+				BRANCH_SET_LINK);
+	buff[TMPL_EMULATE_IDX] = branch3;
+
+	/* Create a branch for jumping back*/
+	branch2 = create_branch((unsigned int *)buff + TMPL_RET_BRANCH_IDX,
+				(unsigned long)create_return_branch + 8,
+				BRANCH_SET_LINK);
+	buff[TMPL_RET_BRANCH_IDX] = branch2;
+
+	op->optinsn.insn = buff;
+	smp_mb();
+	return 0;
+}
+
+int arch_prepared_optinsn(struct arch_optimized_insn *optinsn)
+{
+	return optinsn->insn;
+}
+
+/*
+ * Here,kprobe opt always replace one instruction (4 bytes
+ * aligned and 4 bytes long). It is impossible to encounter another
+ * kprobe in the address range. So always return 0.
+ */
+int arch_check_optimized_kprobe(struct optimized_kprobe *op)
+{
+	return 0;
+}
+
+void arch_optimize_kprobes(struct list_head *oplist)
+{
+	struct optimized_kprobe *op;
+	struct optimized_kprobe *tmp;
+
+	unsigned int branch;
+
+	list_for_each_entry_safe(op, tmp, oplist, list) {
+		/*
+		 * Backup instructions which will be replaced
+		 *by jump address
+		 */
+		memcpy(op->optinsn.copied_insn, op->kp.addr,
+		       RELATIVEJUMP_SIZE);
+		branch = create_branch((unsigned int *)op->kp.addr,
+					(unsigned long)op->optinsn.insn, 0);
+		*op->kp.addr = branch;
+		list_del_init(&op->list);
+	}
+}
+
+void arch_unoptimize_kprobe(struct optimized_kprobe *op)
+{
+	arch_arm_kprobe(&op->kp);
+}
+
+void arch_unoptimize_kprobes(struct list_head *oplist,
+			     struct list_head *done_list)
+{
+	struct optimized_kprobe *op;
+	struct optimized_kprobe *tmp;
+
+	list_for_each_entry_safe(op, tmp, oplist, list) {
+		arch_unoptimize_kprobe(op);
+		list_move(&op->list, done_list);
+	}
+}
+
+int arch_within_optimized_kprobe(struct optimized_kprobe *op,
+				 unsigned long addr)
+{
+	return 0;
+}
-- 
2.1.0

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [RFC PATCH v4 3/3] arch/powerpc : Enable optprobes support in powerpc
       [not found] <1464692191-1167-1-git-send-email-anju@linux.vnet.ibm.com>
  2016-05-31 10:56 ` [RFC PATCH v3 1/3] arch/powerpc : Add detour buffer support for optprobes Anju T
  2016-05-31 10:56 ` [RFC PATCH v3 2/3] arch/powerpc : optprobes for powerpc core Anju T
@ 2016-05-31 10:56 ` Anju T
       [not found] ` <201605311058.u4VAsdah009164@mx0a-001b2d01.pphosted.com>
  3 siblings, 0 replies; 4+ messages in thread
From: Anju T @ 2016-05-31 10:56 UTC (permalink / raw)
  To: linux-kernel, linuxppc-dev
  Cc: anju, ananth, naveen.n.rao, paulus, srikar, benh, mpe, hemant,
	mahesh, mhiramat, anjutsudhakar

Signed-off-by: Anju T <anju@linux.vnet.ibm.com>
---
 Documentation/features/debug/optprobes/arch-support.txt | 2 +-
 arch/powerpc/Kconfig                                    | 1 +
 arch/powerpc/kernel/Makefile                            | 1 +
 3 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/Documentation/features/debug/optprobes/arch-support.txt b/Documentation/features/debug/optprobes/arch-support.txt
index b8999d8..45bc99d 100644
--- a/Documentation/features/debug/optprobes/arch-support.txt
+++ b/Documentation/features/debug/optprobes/arch-support.txt
@@ -27,7 +27,7 @@
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: | TODO |
-    |     powerpc: | TODO |
+    |     powerpc: |  ok  |
     |        s390: | TODO |
     |       score: | TODO |
     |          sh: | TODO |
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 7cd32c0..a87c9b1 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -104,6 +104,7 @@ config PPC
 	select HAVE_IOREMAP_PROT
 	select HAVE_EFFICIENT_UNALIGNED_ACCESS if !CPU_LITTLE_ENDIAN
 	select HAVE_KPROBES
+	select HAVE_OPTPROBES
 	select HAVE_ARCH_KGDB
 	select HAVE_KRETPROBES
 	select HAVE_ARCH_TRACEHOOK
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 2da380f..7994e22 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -99,6 +99,7 @@ endif
 obj-$(CONFIG_BOOTX_TEXT)	+= btext.o
 obj-$(CONFIG_SMP)		+= smp.o
 obj-$(CONFIG_KPROBES)		+= kprobes.o
+obj-$(CONFIG_OPTPROBES)		+= optprobes.o optprobes_head.o
 obj-$(CONFIG_UPROBES)		+= uprobes.o
 obj-$(CONFIG_PPC_UDBG_16550)	+= legacy_serial.o udbg_16550.o
 obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
-- 
2.1.0

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [RFC PATCH v3 2/3] arch/powerpc : optprobes for powerpc core
       [not found] ` <201605311058.u4VAsdah009164@mx0a-001b2d01.pphosted.com>
@ 2016-05-31 14:55   ` Masami Hiramatsu
  0 siblings, 0 replies; 4+ messages in thread
From: Masami Hiramatsu @ 2016-05-31 14:55 UTC (permalink / raw)
  To: Anju T
  Cc: linux-kernel, linuxppc-dev, ananth, naveen.n.rao, paulus, srikar,
	benh, mpe, hemant, mahesh, mhiramat, anjutsudhakar

On Tue, 31 May 2016 16:26:30 +0530
Anju T <anju@linux.vnet.ibm.com> wrote:

> Instructions which can be emulated are suppliants for
> optimization. Before optimization ensure that the address range
> between the detour buffer allocated and the instruction being probed
> is within +/- 32MB.
> 
> Signed-off-by: Anju T <anju@linux.vnet.ibm.com>
> ---
>  arch/powerpc/kernel/optprobes.c | 351 ++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 351 insertions(+)
>  create mode 100644 arch/powerpc/kernel/optprobes.c
> 
> diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c
> new file mode 100644
> index 0000000..c4253b6
> --- /dev/null
> +++ b/arch/powerpc/kernel/optprobes.c
> @@ -0,0 +1,351 @@
> +/*
> + * Code for Kernel probes Jump optimization.
> + *
> + * Copyright 2016, Anju T, IBM Corp.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version
> + * 2 of the License, or (at your option) any later version.
> + */
> +
> +#include <linux/kprobes.h>
> +#include <linux/jump_label.h>
> +#include <linux/types.h>
> +#include <linux/slab.h>
> +#include <linux/list.h>
> +#include <asm/kprobes.h>
> +#include <asm/ptrace.h>
> +#include <asm/cacheflush.h>
> +#include <asm/code-patching.h>
> +#include <asm/sstep.h>
> +
> +DEFINE_INSN_CACHE_OPS(ppc_optinsn)
> +
> +#define TMPL_CALL_HDLR_IDX	\
> +	(optprobe_template_call_handler - optprobe_template_entry)
> +#define TMPL_EMULATE_IDX	\
> +	(optprobe_template_call_emulate - optprobe_template_entry)
> +#define TMPL_RET_BRANCH_IDX	\
> +	(optprobe_template_ret_branch - optprobe_template_entry)
> +#define TMPL_RET_IDX	\
> +	(optprobe_template_ret - optprobe_template_entry)
> +#define TMPL_KP_IDX	\
> +	(optprobe_template_kp_addr - optprobe_template_entry)
> +#define TMPL_OP1_IDX	\
> +	(optprobe_template_op_address1 - optprobe_template_entry)
> +#define TMPL_OP2_IDX	\
> +	(optprobe_template_op_address2 - optprobe_template_entry)
> +#define TMPL_INSN_IDX	\
> +	(optprobe_template_insn - optprobe_template_entry)
> +#define TMPL_END_IDX	\
> +	(optprobe_template_end - optprobe_template_entry)
> +
> +static unsigned long val_nip;
> +
> +static void *__ppc_alloc_insn_page(void)
> +{
> +	return &optinsn_slot;
> +}
> +
> +static void *__ppc_free_insn_page(void *page __maybe_unused)
> +{
> +	return;
> +}

Hmm, you should not return optinsn_slot twice or more, because
it actually doesn't allocate memory but just returns reserved
memory area. So, it should be something like this;

static bool insn_page_in_use;

static void *__ppc_alloc_insn_page(void)
{
	if (insn_page_in_use)
		return NULL;
	insn_page_in_use = true;
	return &optinsn_slot;
}

static void *__ppc_free_insn_page(void *page __maybe_unused)
{
	insn_page_in_use = false;
}


> +
> +struct kprobe_insn_cache kprobe_ppc_optinsn_slots = {
> +	.mutex = __MUTEX_INITIALIZER(kprobe_ppc_optinsn_slots.mutex),
> +	.pages = LIST_HEAD_INIT(kprobe_ppc_optinsn_slots.pages),
> +	/* insn_size initialized later */
> +	.alloc = __ppc_alloc_insn_page,
> +	.free = __ppc_free_insn_page,
> +	.nr_garbage = 0,
> +};
> +
> +kprobe_opcode_t *ppc_get_optinsn_slot(struct optimized_kprobe *op)
> +{
> +	/*
> +	 * The insn slot is allocated from the reserved
> +	 * area(ie &optinsn_slot).We are not optimizing probes
> +	 * at module_addr now.
> +	 */
> +	kprobe_opcode_t *slot = NULL;
> +
> +	if (is_kernel_addr(op->kp.addr))
> +		slot = get_ppc_optinsn_slot();
> +	return slot;
> +}
> +
> +static void ppc_free_optinsn_slot(struct optimized_kprobe *op)
> +{
> +	if (!op->optinsn.insn)
> +		return;
> +	if (is_kernel_addr((unsigned long)op->kp.addr))
> +		free_ppc_optinsn_slot(op->optinsn.insn, 0);
> +}
> +
> +static void
> +__arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty)
> +{
> +	ppc_free_optinsn_slot(op);
> +	op->optinsn.insn = NULL;
> +}
> +
> +static int can_optimize(struct kprobe *p)
> +{
> +	struct pt_regs *regs;
> +	unsigned int instr;
> +	int r;
> +
> +	/*
> +	 * Not optimizing the kprobe placed by
> +	 * kretprobe during boot time
> +	 */
> +	if ((kprobe_opcode_t)p->addr == (kprobe_opcode_t)&kretprobe_trampoline)
> +		return 0;
> +
> +	regs = kmalloc(sizeof(*regs), GFP_KERNEL);
> +	if (!regs)
> +		return -ENOMEM;
> +	memset(regs, 0, sizeof(struct pt_regs));
> +	memcpy(regs, current_pt_regs(), sizeof(struct pt_regs));
> +	regs->nip = p->addr;
> +	instr = *(p->ainsn.insn);
> +
> +	/* Ensure the instruction can be emulated*/
> +	r = emulate_step(regs, instr);
> +	val_nip = regs->nip;
> +	if (r != 1)
> +		return 0;
> +
> +	return 1;
> +}
> +
> +static void
> +create_return_branch(struct optimized_kprobe *op, struct pt_regs *regs)
> +{
> +	/*
> +	 * Create a branch back to the return address
> +	 * after the probed instruction is emulated
> +	 */
> +
> +	kprobe_opcode_t branch, *buff;
> +	unsigned long ret;
> +
> +	ret = regs->nip;
> +	buff = op->optinsn.insn;
> +	/*
> +	 * TODO: For conditional branch instructions, the return
> +	 * address may differ in SMP systems.This has to be addressed.
> +	 */
> +
> +	branch = create_branch((unsigned int *)buff + TMPL_RET_IDX,
> +			       (unsigned long)ret, 0);
> +	buff[TMPL_RET_IDX] = branch;
> +	isync();
> +}
> +
> +static void
> +optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
> +{
> +	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
> +	unsigned long flags;
> +
> +	local_irq_save(flags);
> +
> +	if (kprobe_running())
> +		kprobes_inc_nmissed_count(&op->kp);
> +	else {
> +		__this_cpu_write(current_kprobe, &op->kp);
> +		kcb->kprobe_status = KPROBE_HIT_ACTIVE;
> +		opt_pre_handler(&op->kp, regs);
> +		__this_cpu_write(current_kprobe, NULL);
> +	}
> +	local_irq_restore(flags);
> +}
> +NOKPROBE_SYMBOL(optimized_callback);
> +
> +void arch_remove_optimized_kprobe(struct optimized_kprobe *op)
> +{
> +	 __arch_remove_optimized_kprobe(op, 1);
> +}
> +
> +void  create_insn(unsigned int insn, kprobe_opcode_t *addr)
> +{
> +	u32 instr, instr2;
> +
> +	/*
> +	 * emulate_step() requires insn to be emulated as
> +	 * second parameter. Hence r4 should be loaded
> +	 * with 'insn'.
> +	 * synthesize addis r4,0,(insn)@h
> +	 */
> +	instr = 0x3c000000 | 0x800000 | ((insn >> 16) & 0xffff);
> +	*addr++ = instr;
> +
> +	/* ori r4,r4,(insn)@l */
> +	instr2 = 0x60000000 | 0x40000 | 0x800000;
> +	instr2 = instr2 | (insn & 0xffff);
> +	*addr = instr2;
> +}
> +
> +void create_load_address_insn(unsigned long val, kprobe_opcode_t *addr)
> +{
> +	u32 instr1, instr2, instr3, instr4, instr5;
> +	/*
> +	 * Optimized_kprobe structure is required as a parameter
> +	 * for invoking optimized_callback() and create_return_branch()
> +	 * from detour buffer. Hence need to have a 64bit immediate
> +	 * load into r3.
> +	 *
> +	 * lis r3,(op)@highest
> +	 */
> +	instr1 = 0x3c000000 | 0x600000 | ((val >> 48) & 0xffff);
> +	*addr++ = instr1;
> +
> +	/* ori r3,r3,(op)@higher */
> +	instr2 = 0x60000000 | 0x30000 | 0x600000 | ((val >> 32) & 0xffff);
> +	*addr++ = instr2;
> +
> +	/* rldicr r3,r3,32,31 */
> +	instr3 = 0x78000004 | 0x30000 | 0x600000 | ((32 & 0x1f) << 11);
> +	instr3 = instr3 | ((31 & 0x1f) << 6) | ((32 & 0x20) >> 4);
> +	*addr++ = instr3;
> +
> +	/* oris r3,r3,(op)@h */
> +	instr4 = 0x64000000 |  0x30000 | 0x600000 | ((val >> 16) & 0xffff);
> +	*addr++ = instr4;
> +
> +	/* ori r3,r3,(op)@l */
> +	instr5 =  0x60000000 | 0x30000 | 0x600000 | (val & 0xffff);
> +	*addr = instr5;
> +}
> +
> +int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p)
> +{
> +	kprobe_opcode_t *buff, branch, branch2, branch3;
> +	long rel_chk, ret_chk;
> +
> +	kprobe_ppc_optinsn_slots.insn_size = MAX_OPTINSN_SIZE;
> +	op->optinsn.insn = NULL;
> +
> +	if (!can_optimize(p))
> +		return -EILSEQ;
> +
> +	/* Allocate instruction slot for detour buffer*/
> +	buff = ppc_get_optinsn_slot(op);
> +	if (!buff)
> +		return -ENOMEM;
> +
> +	/*
> +	 * OPTPROBE use a 'b' instruction to branch to optinsn.insn.
> +	 *
> +	 * The target address has to be relatively nearby, to permit use
> +	 * of branch instruction in powerpc because the address is specified
> +	 * in an immediate field in the instruction opcode itself, ie 24 bits
> +	 * in the opcode specify the address. Therefore the address gap should
> +	 * be 32MB on either side of the current instruction.
> +	 */
> +	rel_chk = (long)buff - (unsigned long)p->addr;
> +	if (rel_chk < -0x2000000 || rel_chk > 0x1fffffc || rel_chk & 0x3) {
> +		ppc_free_optinsn_slot(op);
> +		return -ERANGE;
> +	}
> +	/* Check the return address is also within 32MB range */
> +	ret_chk = (long)(buff + TMPL_RET_IDX) - (unsigned long)val_nip;

No, please don't pass the "regs->nip" via hidden channel like as this val_nip
static local variable. Instead, you should run emulate_step() again here,
or, at least you must keep the address and compare it. I recommend former.


Thank you,

> +	if (ret_chk < -0x2000000 || ret_chk > 0x1fffffc || ret_chk & 0x3) {
> +		ppc_free_optinsn_slot(op);
> +		return -ERANGE;
> +	}
> +
> +	/* Do Copy arch specific instance from template*/
> +	memcpy(buff, optprobe_template_entry,
> +	       TMPL_END_IDX * sizeof(kprobe_opcode_t));
> +	create_load_address_insn((unsigned long)p->addr, buff + TMPL_KP_IDX);
> +	create_load_address_insn((unsigned long)op, buff + TMPL_OP1_IDX);
> +	create_load_address_insn((unsigned long)op, buff + TMPL_OP2_IDX);
> +
> +	/* Create a branch to the optimized_callback function */
> +	branch = create_branch((unsigned int *)buff + TMPL_CALL_HDLR_IDX,
> +			       (unsigned long)optimized_callback + 8,
> +				BRANCH_SET_LINK);
> +
> +	/* Place the branch instr into the trampoline */
> +	buff[TMPL_CALL_HDLR_IDX] = branch;
> +	create_insn(*(p->ainsn.insn), buff + TMPL_INSN_IDX);
> +
> +	/*Create a branch instruction into the emulate_step*/
> +	branch3 = create_branch((unsigned int *)buff + TMPL_EMULATE_IDX,
> +				(unsigned long)emulate_step + 8,
> +				BRANCH_SET_LINK);
> +	buff[TMPL_EMULATE_IDX] = branch3;
> +
> +	/* Create a branch for jumping back*/
> +	branch2 = create_branch((unsigned int *)buff + TMPL_RET_BRANCH_IDX,
> +				(unsigned long)create_return_branch + 8,
> +				BRANCH_SET_LINK);
> +	buff[TMPL_RET_BRANCH_IDX] = branch2;
> +
> +	op->optinsn.insn = buff;
> +	smp_mb();
> +	return 0;
> +}
> +
> +int arch_prepared_optinsn(struct arch_optimized_insn *optinsn)
> +{
> +	return optinsn->insn;
> +}
> +
> +/*
> + * Here,kprobe opt always replace one instruction (4 bytes
> + * aligned and 4 bytes long). It is impossible to encounter another
> + * kprobe in the address range. So always return 0.
> + */
> +int arch_check_optimized_kprobe(struct optimized_kprobe *op)
> +{
> +	return 0;
> +}
> +
> +void arch_optimize_kprobes(struct list_head *oplist)
> +{
> +	struct optimized_kprobe *op;
> +	struct optimized_kprobe *tmp;
> +
> +	unsigned int branch;
> +
> +	list_for_each_entry_safe(op, tmp, oplist, list) {
> +		/*
> +		 * Backup instructions which will be replaced
> +		 *by jump address
> +		 */
> +		memcpy(op->optinsn.copied_insn, op->kp.addr,
> +		       RELATIVEJUMP_SIZE);
> +		branch = create_branch((unsigned int *)op->kp.addr,
> +					(unsigned long)op->optinsn.insn, 0);
> +		*op->kp.addr = branch;
> +		list_del_init(&op->list);
> +	}
> +}
> +
> +void arch_unoptimize_kprobe(struct optimized_kprobe *op)
> +{
> +	arch_arm_kprobe(&op->kp);
> +}
> +
> +void arch_unoptimize_kprobes(struct list_head *oplist,
> +			     struct list_head *done_list)
> +{
> +	struct optimized_kprobe *op;
> +	struct optimized_kprobe *tmp;
> +
> +	list_for_each_entry_safe(op, tmp, oplist, list) {
> +		arch_unoptimize_kprobe(op);
> +		list_move(&op->list, done_list);
> +	}
> +}
> +
> +int arch_within_optimized_kprobe(struct optimized_kprobe *op,
> +				 unsigned long addr)
> +{
> +	return 0;
> +}
> -- 
> 2.1.0
> 


-- 
Masami Hiramatsu <mhiramat@kernel.org>

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2016-05-31 14:55 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <1464692191-1167-1-git-send-email-anju@linux.vnet.ibm.com>
2016-05-31 10:56 ` [RFC PATCH v3 1/3] arch/powerpc : Add detour buffer support for optprobes Anju T
2016-05-31 10:56 ` [RFC PATCH v3 2/3] arch/powerpc : optprobes for powerpc core Anju T
2016-05-31 10:56 ` [RFC PATCH v4 3/3] arch/powerpc : Enable optprobes support in powerpc Anju T
     [not found] ` <201605311058.u4VAsdah009164@mx0a-001b2d01.pphosted.com>
2016-05-31 14:55   ` [RFC PATCH v3 2/3] arch/powerpc : optprobes for powerpc core Masami Hiramatsu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).