* [RFC PATCH v3 1/3] arch/powerpc : Add detour buffer support for optprobes
[not found] <1464692191-1167-1-git-send-email-anju@linux.vnet.ibm.com>
@ 2016-05-31 10:56 ` Anju T
2016-05-31 10:56 ` [RFC PATCH v3 2/3] arch/powerpc : optprobes for powerpc core Anju T
` (2 subsequent siblings)
3 siblings, 0 replies; 4+ messages in thread
From: Anju T @ 2016-05-31 10:56 UTC (permalink / raw)
To: linux-kernel, linuxppc-dev
Cc: anju, ananth, naveen.n.rao, paulus, srikar, benh, mpe, hemant,
mahesh, mhiramat, anjutsudhakar
Detour buffer contains instructions to create an in memory pt_regs.
After the execution of prehandler a call is made for instruction emulation.
The NIP is decided after the probed instruction is executed. Hence a branch
instruction is created to the NIP returned by emulate_step().
Instruction slot for detour buffer is allocated from
the reserved area. For the time being 64KB is reserved
in memory for this purpose.
Signed-off-by: Anju T <anju@linux.vnet.ibm.com>
---
arch/powerpc/include/asm/kprobes.h | 27 +++++++
arch/powerpc/kernel/optprobes_head.S | 136 +++++++++++++++++++++++++++++++++++
2 files changed, 163 insertions(+)
create mode 100644 arch/powerpc/kernel/optprobes_head.S
diff --git a/arch/powerpc/include/asm/kprobes.h b/arch/powerpc/include/asm/kprobes.h
index 039b583..1cb2527 100644
--- a/arch/powerpc/include/asm/kprobes.h
+++ b/arch/powerpc/include/asm/kprobes.h
@@ -38,7 +38,27 @@ struct pt_regs;
struct kprobe;
typedef ppc_opcode_t kprobe_opcode_t;
+
+extern kprobe_opcode_t optinsn_slot;
+/* Optinsn template address */
+extern kprobe_opcode_t optprobe_template_entry[];
+extern kprobe_opcode_t optprobe_template_call_handler[];
+extern kprobe_opcode_t optprobe_template_call_emulate[];
+extern kprobe_opcode_t optprobe_template_ret_branch[];
+extern kprobe_opcode_t optprobe_template_ret[];
+extern kprobe_opcode_t optprobe_template_insn[];
+extern kprobe_opcode_t optprobe_template_kp_addr[];
+extern kprobe_opcode_t optprobe_template_op_address1[];
+extern kprobe_opcode_t optprobe_template_op_address2[];
+extern kprobe_opcode_t optprobe_template_end[];
+
#define MAX_INSN_SIZE 1
+#define MAX_OPTIMIZED_LENGTH 4
+#define MAX_OPTINSN_SIZE \
+ (((unsigned long)&optprobe_template_end - \
+ (unsigned long)&optprobe_template_entry) / \
+ sizeof(kprobe_opcode_t))
+#define RELATIVEJUMP_SIZE 4
#ifdef CONFIG_PPC64
#if defined(_CALL_ELF) && _CALL_ELF == 2
@@ -129,5 +149,12 @@ struct kprobe_ctlblk {
extern int kprobe_exceptions_notify(struct notifier_block *self,
unsigned long val, void *data);
extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
+
+struct arch_optimized_insn {
+ kprobe_opcode_t copied_insn[1];
+ /* detour buffer */
+ kprobe_opcode_t *insn;
+};
+
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_KPROBES_H */
diff --git a/arch/powerpc/kernel/optprobes_head.S b/arch/powerpc/kernel/optprobes_head.S
new file mode 100644
index 0000000..b2536bc
--- /dev/null
+++ b/arch/powerpc/kernel/optprobes_head.S
@@ -0,0 +1,136 @@
+/*
+ * Code to prepare detour buffer for optprobes in Kernel.
+ *
+ * Copyright 2016, Anju T, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <asm/ppc_asm.h>
+#include <asm/ptrace.h>
+#include <asm/asm-offsets.h>
+
+#define OPT_SLOT_SIZE 65536
+
+.align 2
+.global optinsn_slot
+optinsn_slot:
+ /* Reserve an area to allocate slots for detour buffer */
+ .space OPT_SLOT_SIZE
+
+/* Create an in-memory pt_regs */
+.global optprobe_template_entry
+optprobe_template_entry:
+ stdu r1,-INT_FRAME_SIZE(r1)
+ SAVE_GPR(0,r1)
+ /* Save the previous SP into stack */
+ addi r0,r1,INT_FRAME_SIZE
+ std 0,GPR1(r1)
+ SAVE_2GPRS(2,r1)
+ SAVE_8GPRS(4,r1)
+ SAVE_10GPRS(12,r1)
+ SAVE_10GPRS(22,r1)
+ /* Save SPRS */
+ mfmsr r5
+ std r5,_MSR(r1)
+ li r5,0
+ std r5,ORIG_GPR3(r1)
+ mfctr r5
+ std r5,_CTR(r1)
+ mflr r5
+ std r5,_LINK(r1)
+ mfspr r5,SPRN_XER
+ std r5,_XER(r1)
+ mfcr r5
+ std r5,_CCR(r1)
+ lbz r5,PACASOFTIRQEN(r13)
+ std r5,SOFTE(r1)
+ li r5,0
+ std r5,_TRAP(r1)
+ mfdar r5
+ std r5,_DAR(r1)
+ mfdsisr r5
+ std r5,_DSISR(r1)
+ li r5,0
+ std r5,RESULT(r1)
+
+/* Save p->addr into stack */
+.global optprobe_template_kp_addr
+optprobe_template_kp_addr:
+ nop
+ nop
+ nop
+ nop
+ nop
+ std r3,_NIP(r1)
+
+/* Pass parameters for optimized_callback */
+.global optprobe_template_op_address1
+optprobe_template_op_address1:
+ nop
+ nop
+ nop
+ nop
+ nop
+ addi r4,r1,STACK_FRAME_OVERHEAD
+
+/* Branch to the prehandler */
+.global optprobe_template_call_handler
+optprobe_template_call_handler:
+ nop
+ /* Pass parameters for instruction emulation */
+ addi r3,r1,STACK_FRAME_OVERHEAD
+.global optprobe_template_insn
+optprobe_template_insn:
+ nop
+ nop
+
+/* Branch to instruction emulation */
+.global optprobe_template_call_emulate
+optprobe_template_call_emulate:
+ nop
+.global optprobe_template_op_address2
+optprobe_template_op_address2:
+ nop
+ nop
+ nop
+ nop
+ nop
+ addi r4,r1,STACK_FRAME_OVERHEAD
+
+/* Branch to create_return_branch() function */
+.global optprobe_template_ret_branch
+optprobe_template_ret_branch:
+ nop
+ /* Restore the registers */
+ ld r5,_MSR(r1)
+ mtmsr r5
+ ld r5,_CTR(r1)
+ mtctr r5
+ ld r5,_LINK(r1)
+ mtlr r5
+ ld r5,_XER(r1)
+ mtxer r5
+ ld r5,_CCR(r1)
+ mtcr r5
+ ld r5,_DAR(r1)
+ mtdar r5
+ ld r5,_DSISR(r1)
+ mtdsisr r5
+ REST_GPR(0,r1)
+ REST_2GPRS(2,r1)
+ REST_8GPRS(4,r1)
+ REST_10GPRS(12,r1)
+ REST_10GPRS(22,r1)
+ /* Restore the previous SP */
+ addi r1,r1,INT_FRAME_SIZE
+
+/* Jump back to the normal workflow from trampoline */
+.global optprobe_template_ret
+optprobe_template_ret:
+ nop
+.global optprobe_template_end
+optprobe_template_end:
--
2.1.0
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [RFC PATCH v3 2/3] arch/powerpc : optprobes for powerpc core
[not found] <1464692191-1167-1-git-send-email-anju@linux.vnet.ibm.com>
2016-05-31 10:56 ` [RFC PATCH v3 1/3] arch/powerpc : Add detour buffer support for optprobes Anju T
@ 2016-05-31 10:56 ` Anju T
2016-05-31 10:56 ` [RFC PATCH v4 3/3] arch/powerpc : Enable optprobes support in powerpc Anju T
[not found] ` <201605311058.u4VAsdah009164@mx0a-001b2d01.pphosted.com>
3 siblings, 0 replies; 4+ messages in thread
From: Anju T @ 2016-05-31 10:56 UTC (permalink / raw)
To: linux-kernel, linuxppc-dev
Cc: anju, ananth, naveen.n.rao, paulus, srikar, benh, mpe, hemant,
mahesh, mhiramat, anjutsudhakar
Instructions which can be emulated are suppliants for
optimization. Before optimization ensure that the address range
between the detour buffer allocated and the instruction being probed
is within +/- 32MB.
Signed-off-by: Anju T <anju@linux.vnet.ibm.com>
---
arch/powerpc/kernel/optprobes.c | 351 ++++++++++++++++++++++++++++++++++++++++
1 file changed, 351 insertions(+)
create mode 100644 arch/powerpc/kernel/optprobes.c
diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c
new file mode 100644
index 0000000..c4253b6
--- /dev/null
+++ b/arch/powerpc/kernel/optprobes.c
@@ -0,0 +1,351 @@
+/*
+ * Code for Kernel probes Jump optimization.
+ *
+ * Copyright 2016, Anju T, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kprobes.h>
+#include <linux/jump_label.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <asm/kprobes.h>
+#include <asm/ptrace.h>
+#include <asm/cacheflush.h>
+#include <asm/code-patching.h>
+#include <asm/sstep.h>
+
+DEFINE_INSN_CACHE_OPS(ppc_optinsn)
+
+#define TMPL_CALL_HDLR_IDX \
+ (optprobe_template_call_handler - optprobe_template_entry)
+#define TMPL_EMULATE_IDX \
+ (optprobe_template_call_emulate - optprobe_template_entry)
+#define TMPL_RET_BRANCH_IDX \
+ (optprobe_template_ret_branch - optprobe_template_entry)
+#define TMPL_RET_IDX \
+ (optprobe_template_ret - optprobe_template_entry)
+#define TMPL_KP_IDX \
+ (optprobe_template_kp_addr - optprobe_template_entry)
+#define TMPL_OP1_IDX \
+ (optprobe_template_op_address1 - optprobe_template_entry)
+#define TMPL_OP2_IDX \
+ (optprobe_template_op_address2 - optprobe_template_entry)
+#define TMPL_INSN_IDX \
+ (optprobe_template_insn - optprobe_template_entry)
+#define TMPL_END_IDX \
+ (optprobe_template_end - optprobe_template_entry)
+
+static unsigned long val_nip;
+
+static void *__ppc_alloc_insn_page(void)
+{
+ return &optinsn_slot;
+}
+
+static void *__ppc_free_insn_page(void *page __maybe_unused)
+{
+ return;
+}
+
+struct kprobe_insn_cache kprobe_ppc_optinsn_slots = {
+ .mutex = __MUTEX_INITIALIZER(kprobe_ppc_optinsn_slots.mutex),
+ .pages = LIST_HEAD_INIT(kprobe_ppc_optinsn_slots.pages),
+ /* insn_size initialized later */
+ .alloc = __ppc_alloc_insn_page,
+ .free = __ppc_free_insn_page,
+ .nr_garbage = 0,
+};
+
+kprobe_opcode_t *ppc_get_optinsn_slot(struct optimized_kprobe *op)
+{
+ /*
+ * The insn slot is allocated from the reserved
+ * area(ie &optinsn_slot).We are not optimizing probes
+ * at module_addr now.
+ */
+ kprobe_opcode_t *slot = NULL;
+
+ if (is_kernel_addr(op->kp.addr))
+ slot = get_ppc_optinsn_slot();
+ return slot;
+}
+
+static void ppc_free_optinsn_slot(struct optimized_kprobe *op)
+{
+ if (!op->optinsn.insn)
+ return;
+ if (is_kernel_addr((unsigned long)op->kp.addr))
+ free_ppc_optinsn_slot(op->optinsn.insn, 0);
+}
+
+static void
+__arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty)
+{
+ ppc_free_optinsn_slot(op);
+ op->optinsn.insn = NULL;
+}
+
+static int can_optimize(struct kprobe *p)
+{
+ struct pt_regs *regs;
+ unsigned int instr;
+ int r;
+
+ /*
+ * Not optimizing the kprobe placed by
+ * kretprobe during boot time
+ */
+ if ((kprobe_opcode_t)p->addr == (kprobe_opcode_t)&kretprobe_trampoline)
+ return 0;
+
+ regs = kmalloc(sizeof(*regs), GFP_KERNEL);
+ if (!regs)
+ return -ENOMEM;
+ memset(regs, 0, sizeof(struct pt_regs));
+ memcpy(regs, current_pt_regs(), sizeof(struct pt_regs));
+ regs->nip = p->addr;
+ instr = *(p->ainsn.insn);
+
+ /* Ensure the instruction can be emulated*/
+ r = emulate_step(regs, instr);
+ val_nip = regs->nip;
+ if (r != 1)
+ return 0;
+
+ return 1;
+}
+
+static void
+create_return_branch(struct optimized_kprobe *op, struct pt_regs *regs)
+{
+ /*
+ * Create a branch back to the return address
+ * after the probed instruction is emulated
+ */
+
+ kprobe_opcode_t branch, *buff;
+ unsigned long ret;
+
+ ret = regs->nip;
+ buff = op->optinsn.insn;
+ /*
+ * TODO: For conditional branch instructions, the return
+ * address may differ in SMP systems.This has to be addressed.
+ */
+
+ branch = create_branch((unsigned int *)buff + TMPL_RET_IDX,
+ (unsigned long)ret, 0);
+ buff[TMPL_RET_IDX] = branch;
+ isync();
+}
+
+static void
+optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
+{
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+ unsigned long flags;
+
+ local_irq_save(flags);
+
+ if (kprobe_running())
+ kprobes_inc_nmissed_count(&op->kp);
+ else {
+ __this_cpu_write(current_kprobe, &op->kp);
+ kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+ opt_pre_handler(&op->kp, regs);
+ __this_cpu_write(current_kprobe, NULL);
+ }
+ local_irq_restore(flags);
+}
+NOKPROBE_SYMBOL(optimized_callback);
+
+void arch_remove_optimized_kprobe(struct optimized_kprobe *op)
+{
+ __arch_remove_optimized_kprobe(op, 1);
+}
+
+void create_insn(unsigned int insn, kprobe_opcode_t *addr)
+{
+ u32 instr, instr2;
+
+ /*
+ * emulate_step() requires insn to be emulated as
+ * second parameter. Hence r4 should be loaded
+ * with 'insn'.
+ * synthesize addis r4,0,(insn)@h
+ */
+ instr = 0x3c000000 | 0x800000 | ((insn >> 16) & 0xffff);
+ *addr++ = instr;
+
+ /* ori r4,r4,(insn)@l */
+ instr2 = 0x60000000 | 0x40000 | 0x800000;
+ instr2 = instr2 | (insn & 0xffff);
+ *addr = instr2;
+}
+
+void create_load_address_insn(unsigned long val, kprobe_opcode_t *addr)
+{
+ u32 instr1, instr2, instr3, instr4, instr5;
+ /*
+ * Optimized_kprobe structure is required as a parameter
+ * for invoking optimized_callback() and create_return_branch()
+ * from detour buffer. Hence need to have a 64bit immediate
+ * load into r3.
+ *
+ * lis r3,(op)@highest
+ */
+ instr1 = 0x3c000000 | 0x600000 | ((val >> 48) & 0xffff);
+ *addr++ = instr1;
+
+ /* ori r3,r3,(op)@higher */
+ instr2 = 0x60000000 | 0x30000 | 0x600000 | ((val >> 32) & 0xffff);
+ *addr++ = instr2;
+
+ /* rldicr r3,r3,32,31 */
+ instr3 = 0x78000004 | 0x30000 | 0x600000 | ((32 & 0x1f) << 11);
+ instr3 = instr3 | ((31 & 0x1f) << 6) | ((32 & 0x20) >> 4);
+ *addr++ = instr3;
+
+ /* oris r3,r3,(op)@h */
+ instr4 = 0x64000000 | 0x30000 | 0x600000 | ((val >> 16) & 0xffff);
+ *addr++ = instr4;
+
+ /* ori r3,r3,(op)@l */
+ instr5 = 0x60000000 | 0x30000 | 0x600000 | (val & 0xffff);
+ *addr = instr5;
+}
+
+int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p)
+{
+ kprobe_opcode_t *buff, branch, branch2, branch3;
+ long rel_chk, ret_chk;
+
+ kprobe_ppc_optinsn_slots.insn_size = MAX_OPTINSN_SIZE;
+ op->optinsn.insn = NULL;
+
+ if (!can_optimize(p))
+ return -EILSEQ;
+
+ /* Allocate instruction slot for detour buffer*/
+ buff = ppc_get_optinsn_slot(op);
+ if (!buff)
+ return -ENOMEM;
+
+ /*
+ * OPTPROBE use a 'b' instruction to branch to optinsn.insn.
+ *
+ * The target address has to be relatively nearby, to permit use
+ * of branch instruction in powerpc because the address is specified
+ * in an immediate field in the instruction opcode itself, ie 24 bits
+ * in the opcode specify the address. Therefore the address gap should
+ * be 32MB on either side of the current instruction.
+ */
+ rel_chk = (long)buff - (unsigned long)p->addr;
+ if (rel_chk < -0x2000000 || rel_chk > 0x1fffffc || rel_chk & 0x3) {
+ ppc_free_optinsn_slot(op);
+ return -ERANGE;
+ }
+ /* Check the return address is also within 32MB range */
+ ret_chk = (long)(buff + TMPL_RET_IDX) - (unsigned long)val_nip;
+ if (ret_chk < -0x2000000 || ret_chk > 0x1fffffc || ret_chk & 0x3) {
+ ppc_free_optinsn_slot(op);
+ return -ERANGE;
+ }
+
+ /* Do Copy arch specific instance from template*/
+ memcpy(buff, optprobe_template_entry,
+ TMPL_END_IDX * sizeof(kprobe_opcode_t));
+ create_load_address_insn((unsigned long)p->addr, buff + TMPL_KP_IDX);
+ create_load_address_insn((unsigned long)op, buff + TMPL_OP1_IDX);
+ create_load_address_insn((unsigned long)op, buff + TMPL_OP2_IDX);
+
+ /* Create a branch to the optimized_callback function */
+ branch = create_branch((unsigned int *)buff + TMPL_CALL_HDLR_IDX,
+ (unsigned long)optimized_callback + 8,
+ BRANCH_SET_LINK);
+
+ /* Place the branch instr into the trampoline */
+ buff[TMPL_CALL_HDLR_IDX] = branch;
+ create_insn(*(p->ainsn.insn), buff + TMPL_INSN_IDX);
+
+ /*Create a branch instruction into the emulate_step*/
+ branch3 = create_branch((unsigned int *)buff + TMPL_EMULATE_IDX,
+ (unsigned long)emulate_step + 8,
+ BRANCH_SET_LINK);
+ buff[TMPL_EMULATE_IDX] = branch3;
+
+ /* Create a branch for jumping back*/
+ branch2 = create_branch((unsigned int *)buff + TMPL_RET_BRANCH_IDX,
+ (unsigned long)create_return_branch + 8,
+ BRANCH_SET_LINK);
+ buff[TMPL_RET_BRANCH_IDX] = branch2;
+
+ op->optinsn.insn = buff;
+ smp_mb();
+ return 0;
+}
+
+int arch_prepared_optinsn(struct arch_optimized_insn *optinsn)
+{
+ return optinsn->insn;
+}
+
+/*
+ * Here,kprobe opt always replace one instruction (4 bytes
+ * aligned and 4 bytes long). It is impossible to encounter another
+ * kprobe in the address range. So always return 0.
+ */
+int arch_check_optimized_kprobe(struct optimized_kprobe *op)
+{
+ return 0;
+}
+
+void arch_optimize_kprobes(struct list_head *oplist)
+{
+ struct optimized_kprobe *op;
+ struct optimized_kprobe *tmp;
+
+ unsigned int branch;
+
+ list_for_each_entry_safe(op, tmp, oplist, list) {
+ /*
+ * Backup instructions which will be replaced
+ *by jump address
+ */
+ memcpy(op->optinsn.copied_insn, op->kp.addr,
+ RELATIVEJUMP_SIZE);
+ branch = create_branch((unsigned int *)op->kp.addr,
+ (unsigned long)op->optinsn.insn, 0);
+ *op->kp.addr = branch;
+ list_del_init(&op->list);
+ }
+}
+
+void arch_unoptimize_kprobe(struct optimized_kprobe *op)
+{
+ arch_arm_kprobe(&op->kp);
+}
+
+void arch_unoptimize_kprobes(struct list_head *oplist,
+ struct list_head *done_list)
+{
+ struct optimized_kprobe *op;
+ struct optimized_kprobe *tmp;
+
+ list_for_each_entry_safe(op, tmp, oplist, list) {
+ arch_unoptimize_kprobe(op);
+ list_move(&op->list, done_list);
+ }
+}
+
+int arch_within_optimized_kprobe(struct optimized_kprobe *op,
+ unsigned long addr)
+{
+ return 0;
+}
--
2.1.0
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [RFC PATCH v4 3/3] arch/powerpc : Enable optprobes support in powerpc
[not found] <1464692191-1167-1-git-send-email-anju@linux.vnet.ibm.com>
2016-05-31 10:56 ` [RFC PATCH v3 1/3] arch/powerpc : Add detour buffer support for optprobes Anju T
2016-05-31 10:56 ` [RFC PATCH v3 2/3] arch/powerpc : optprobes for powerpc core Anju T
@ 2016-05-31 10:56 ` Anju T
[not found] ` <201605311058.u4VAsdah009164@mx0a-001b2d01.pphosted.com>
3 siblings, 0 replies; 4+ messages in thread
From: Anju T @ 2016-05-31 10:56 UTC (permalink / raw)
To: linux-kernel, linuxppc-dev
Cc: anju, ananth, naveen.n.rao, paulus, srikar, benh, mpe, hemant,
mahesh, mhiramat, anjutsudhakar
Signed-off-by: Anju T <anju@linux.vnet.ibm.com>
---
Documentation/features/debug/optprobes/arch-support.txt | 2 +-
arch/powerpc/Kconfig | 1 +
arch/powerpc/kernel/Makefile | 1 +
3 files changed, 3 insertions(+), 1 deletion(-)
diff --git a/Documentation/features/debug/optprobes/arch-support.txt b/Documentation/features/debug/optprobes/arch-support.txt
index b8999d8..45bc99d 100644
--- a/Documentation/features/debug/optprobes/arch-support.txt
+++ b/Documentation/features/debug/optprobes/arch-support.txt
@@ -27,7 +27,7 @@
| nios2: | TODO |
| openrisc: | TODO |
| parisc: | TODO |
- | powerpc: | TODO |
+ | powerpc: | ok |
| s390: | TODO |
| score: | TODO |
| sh: | TODO |
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 7cd32c0..a87c9b1 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -104,6 +104,7 @@ config PPC
select HAVE_IOREMAP_PROT
select HAVE_EFFICIENT_UNALIGNED_ACCESS if !CPU_LITTLE_ENDIAN
select HAVE_KPROBES
+ select HAVE_OPTPROBES
select HAVE_ARCH_KGDB
select HAVE_KRETPROBES
select HAVE_ARCH_TRACEHOOK
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 2da380f..7994e22 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -99,6 +99,7 @@ endif
obj-$(CONFIG_BOOTX_TEXT) += btext.o
obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_KPROBES) += kprobes.o
+obj-$(CONFIG_OPTPROBES) += optprobes.o optprobes_head.o
obj-$(CONFIG_UPROBES) += uprobes.o
obj-$(CONFIG_PPC_UDBG_16550) += legacy_serial.o udbg_16550.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
--
2.1.0
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [RFC PATCH v3 2/3] arch/powerpc : optprobes for powerpc core
[not found] ` <201605311058.u4VAsdah009164@mx0a-001b2d01.pphosted.com>
@ 2016-05-31 14:55 ` Masami Hiramatsu
0 siblings, 0 replies; 4+ messages in thread
From: Masami Hiramatsu @ 2016-05-31 14:55 UTC (permalink / raw)
To: Anju T
Cc: linux-kernel, linuxppc-dev, ananth, naveen.n.rao, paulus, srikar,
benh, mpe, hemant, mahesh, mhiramat, anjutsudhakar
On Tue, 31 May 2016 16:26:30 +0530
Anju T <anju@linux.vnet.ibm.com> wrote:
> Instructions which can be emulated are suppliants for
> optimization. Before optimization ensure that the address range
> between the detour buffer allocated and the instruction being probed
> is within +/- 32MB.
>
> Signed-off-by: Anju T <anju@linux.vnet.ibm.com>
> ---
> arch/powerpc/kernel/optprobes.c | 351 ++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 351 insertions(+)
> create mode 100644 arch/powerpc/kernel/optprobes.c
>
> diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c
> new file mode 100644
> index 0000000..c4253b6
> --- /dev/null
> +++ b/arch/powerpc/kernel/optprobes.c
> @@ -0,0 +1,351 @@
> +/*
> + * Code for Kernel probes Jump optimization.
> + *
> + * Copyright 2016, Anju T, IBM Corp.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version
> + * 2 of the License, or (at your option) any later version.
> + */
> +
> +#include <linux/kprobes.h>
> +#include <linux/jump_label.h>
> +#include <linux/types.h>
> +#include <linux/slab.h>
> +#include <linux/list.h>
> +#include <asm/kprobes.h>
> +#include <asm/ptrace.h>
> +#include <asm/cacheflush.h>
> +#include <asm/code-patching.h>
> +#include <asm/sstep.h>
> +
> +DEFINE_INSN_CACHE_OPS(ppc_optinsn)
> +
> +#define TMPL_CALL_HDLR_IDX \
> + (optprobe_template_call_handler - optprobe_template_entry)
> +#define TMPL_EMULATE_IDX \
> + (optprobe_template_call_emulate - optprobe_template_entry)
> +#define TMPL_RET_BRANCH_IDX \
> + (optprobe_template_ret_branch - optprobe_template_entry)
> +#define TMPL_RET_IDX \
> + (optprobe_template_ret - optprobe_template_entry)
> +#define TMPL_KP_IDX \
> + (optprobe_template_kp_addr - optprobe_template_entry)
> +#define TMPL_OP1_IDX \
> + (optprobe_template_op_address1 - optprobe_template_entry)
> +#define TMPL_OP2_IDX \
> + (optprobe_template_op_address2 - optprobe_template_entry)
> +#define TMPL_INSN_IDX \
> + (optprobe_template_insn - optprobe_template_entry)
> +#define TMPL_END_IDX \
> + (optprobe_template_end - optprobe_template_entry)
> +
> +static unsigned long val_nip;
> +
> +static void *__ppc_alloc_insn_page(void)
> +{
> + return &optinsn_slot;
> +}
> +
> +static void *__ppc_free_insn_page(void *page __maybe_unused)
> +{
> + return;
> +}
Hmm, you should not return optinsn_slot twice or more, because
it actually doesn't allocate memory but just returns reserved
memory area. So, it should be something like this;
static bool insn_page_in_use;
static void *__ppc_alloc_insn_page(void)
{
if (insn_page_in_use)
return NULL;
insn_page_in_use = true;
return &optinsn_slot;
}
static void *__ppc_free_insn_page(void *page __maybe_unused)
{
insn_page_in_use = false;
}
> +
> +struct kprobe_insn_cache kprobe_ppc_optinsn_slots = {
> + .mutex = __MUTEX_INITIALIZER(kprobe_ppc_optinsn_slots.mutex),
> + .pages = LIST_HEAD_INIT(kprobe_ppc_optinsn_slots.pages),
> + /* insn_size initialized later */
> + .alloc = __ppc_alloc_insn_page,
> + .free = __ppc_free_insn_page,
> + .nr_garbage = 0,
> +};
> +
> +kprobe_opcode_t *ppc_get_optinsn_slot(struct optimized_kprobe *op)
> +{
> + /*
> + * The insn slot is allocated from the reserved
> + * area(ie &optinsn_slot).We are not optimizing probes
> + * at module_addr now.
> + */
> + kprobe_opcode_t *slot = NULL;
> +
> + if (is_kernel_addr(op->kp.addr))
> + slot = get_ppc_optinsn_slot();
> + return slot;
> +}
> +
> +static void ppc_free_optinsn_slot(struct optimized_kprobe *op)
> +{
> + if (!op->optinsn.insn)
> + return;
> + if (is_kernel_addr((unsigned long)op->kp.addr))
> + free_ppc_optinsn_slot(op->optinsn.insn, 0);
> +}
> +
> +static void
> +__arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty)
> +{
> + ppc_free_optinsn_slot(op);
> + op->optinsn.insn = NULL;
> +}
> +
> +static int can_optimize(struct kprobe *p)
> +{
> + struct pt_regs *regs;
> + unsigned int instr;
> + int r;
> +
> + /*
> + * Not optimizing the kprobe placed by
> + * kretprobe during boot time
> + */
> + if ((kprobe_opcode_t)p->addr == (kprobe_opcode_t)&kretprobe_trampoline)
> + return 0;
> +
> + regs = kmalloc(sizeof(*regs), GFP_KERNEL);
> + if (!regs)
> + return -ENOMEM;
> + memset(regs, 0, sizeof(struct pt_regs));
> + memcpy(regs, current_pt_regs(), sizeof(struct pt_regs));
> + regs->nip = p->addr;
> + instr = *(p->ainsn.insn);
> +
> + /* Ensure the instruction can be emulated*/
> + r = emulate_step(regs, instr);
> + val_nip = regs->nip;
> + if (r != 1)
> + return 0;
> +
> + return 1;
> +}
> +
> +static void
> +create_return_branch(struct optimized_kprobe *op, struct pt_regs *regs)
> +{
> + /*
> + * Create a branch back to the return address
> + * after the probed instruction is emulated
> + */
> +
> + kprobe_opcode_t branch, *buff;
> + unsigned long ret;
> +
> + ret = regs->nip;
> + buff = op->optinsn.insn;
> + /*
> + * TODO: For conditional branch instructions, the return
> + * address may differ in SMP systems.This has to be addressed.
> + */
> +
> + branch = create_branch((unsigned int *)buff + TMPL_RET_IDX,
> + (unsigned long)ret, 0);
> + buff[TMPL_RET_IDX] = branch;
> + isync();
> +}
> +
> +static void
> +optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
> +{
> + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
> + unsigned long flags;
> +
> + local_irq_save(flags);
> +
> + if (kprobe_running())
> + kprobes_inc_nmissed_count(&op->kp);
> + else {
> + __this_cpu_write(current_kprobe, &op->kp);
> + kcb->kprobe_status = KPROBE_HIT_ACTIVE;
> + opt_pre_handler(&op->kp, regs);
> + __this_cpu_write(current_kprobe, NULL);
> + }
> + local_irq_restore(flags);
> +}
> +NOKPROBE_SYMBOL(optimized_callback);
> +
> +void arch_remove_optimized_kprobe(struct optimized_kprobe *op)
> +{
> + __arch_remove_optimized_kprobe(op, 1);
> +}
> +
> +void create_insn(unsigned int insn, kprobe_opcode_t *addr)
> +{
> + u32 instr, instr2;
> +
> + /*
> + * emulate_step() requires insn to be emulated as
> + * second parameter. Hence r4 should be loaded
> + * with 'insn'.
> + * synthesize addis r4,0,(insn)@h
> + */
> + instr = 0x3c000000 | 0x800000 | ((insn >> 16) & 0xffff);
> + *addr++ = instr;
> +
> + /* ori r4,r4,(insn)@l */
> + instr2 = 0x60000000 | 0x40000 | 0x800000;
> + instr2 = instr2 | (insn & 0xffff);
> + *addr = instr2;
> +}
> +
> +void create_load_address_insn(unsigned long val, kprobe_opcode_t *addr)
> +{
> + u32 instr1, instr2, instr3, instr4, instr5;
> + /*
> + * Optimized_kprobe structure is required as a parameter
> + * for invoking optimized_callback() and create_return_branch()
> + * from detour buffer. Hence need to have a 64bit immediate
> + * load into r3.
> + *
> + * lis r3,(op)@highest
> + */
> + instr1 = 0x3c000000 | 0x600000 | ((val >> 48) & 0xffff);
> + *addr++ = instr1;
> +
> + /* ori r3,r3,(op)@higher */
> + instr2 = 0x60000000 | 0x30000 | 0x600000 | ((val >> 32) & 0xffff);
> + *addr++ = instr2;
> +
> + /* rldicr r3,r3,32,31 */
> + instr3 = 0x78000004 | 0x30000 | 0x600000 | ((32 & 0x1f) << 11);
> + instr3 = instr3 | ((31 & 0x1f) << 6) | ((32 & 0x20) >> 4);
> + *addr++ = instr3;
> +
> + /* oris r3,r3,(op)@h */
> + instr4 = 0x64000000 | 0x30000 | 0x600000 | ((val >> 16) & 0xffff);
> + *addr++ = instr4;
> +
> + /* ori r3,r3,(op)@l */
> + instr5 = 0x60000000 | 0x30000 | 0x600000 | (val & 0xffff);
> + *addr = instr5;
> +}
> +
> +int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p)
> +{
> + kprobe_opcode_t *buff, branch, branch2, branch3;
> + long rel_chk, ret_chk;
> +
> + kprobe_ppc_optinsn_slots.insn_size = MAX_OPTINSN_SIZE;
> + op->optinsn.insn = NULL;
> +
> + if (!can_optimize(p))
> + return -EILSEQ;
> +
> + /* Allocate instruction slot for detour buffer*/
> + buff = ppc_get_optinsn_slot(op);
> + if (!buff)
> + return -ENOMEM;
> +
> + /*
> + * OPTPROBE use a 'b' instruction to branch to optinsn.insn.
> + *
> + * The target address has to be relatively nearby, to permit use
> + * of branch instruction in powerpc because the address is specified
> + * in an immediate field in the instruction opcode itself, ie 24 bits
> + * in the opcode specify the address. Therefore the address gap should
> + * be 32MB on either side of the current instruction.
> + */
> + rel_chk = (long)buff - (unsigned long)p->addr;
> + if (rel_chk < -0x2000000 || rel_chk > 0x1fffffc || rel_chk & 0x3) {
> + ppc_free_optinsn_slot(op);
> + return -ERANGE;
> + }
> + /* Check the return address is also within 32MB range */
> + ret_chk = (long)(buff + TMPL_RET_IDX) - (unsigned long)val_nip;
No, please don't pass the "regs->nip" via hidden channel like as this val_nip
static local variable. Instead, you should run emulate_step() again here,
or, at least you must keep the address and compare it. I recommend former.
Thank you,
> + if (ret_chk < -0x2000000 || ret_chk > 0x1fffffc || ret_chk & 0x3) {
> + ppc_free_optinsn_slot(op);
> + return -ERANGE;
> + }
> +
> + /* Do Copy arch specific instance from template*/
> + memcpy(buff, optprobe_template_entry,
> + TMPL_END_IDX * sizeof(kprobe_opcode_t));
> + create_load_address_insn((unsigned long)p->addr, buff + TMPL_KP_IDX);
> + create_load_address_insn((unsigned long)op, buff + TMPL_OP1_IDX);
> + create_load_address_insn((unsigned long)op, buff + TMPL_OP2_IDX);
> +
> + /* Create a branch to the optimized_callback function */
> + branch = create_branch((unsigned int *)buff + TMPL_CALL_HDLR_IDX,
> + (unsigned long)optimized_callback + 8,
> + BRANCH_SET_LINK);
> +
> + /* Place the branch instr into the trampoline */
> + buff[TMPL_CALL_HDLR_IDX] = branch;
> + create_insn(*(p->ainsn.insn), buff + TMPL_INSN_IDX);
> +
> + /*Create a branch instruction into the emulate_step*/
> + branch3 = create_branch((unsigned int *)buff + TMPL_EMULATE_IDX,
> + (unsigned long)emulate_step + 8,
> + BRANCH_SET_LINK);
> + buff[TMPL_EMULATE_IDX] = branch3;
> +
> + /* Create a branch for jumping back*/
> + branch2 = create_branch((unsigned int *)buff + TMPL_RET_BRANCH_IDX,
> + (unsigned long)create_return_branch + 8,
> + BRANCH_SET_LINK);
> + buff[TMPL_RET_BRANCH_IDX] = branch2;
> +
> + op->optinsn.insn = buff;
> + smp_mb();
> + return 0;
> +}
> +
> +int arch_prepared_optinsn(struct arch_optimized_insn *optinsn)
> +{
> + return optinsn->insn;
> +}
> +
> +/*
> + * Here,kprobe opt always replace one instruction (4 bytes
> + * aligned and 4 bytes long). It is impossible to encounter another
> + * kprobe in the address range. So always return 0.
> + */
> +int arch_check_optimized_kprobe(struct optimized_kprobe *op)
> +{
> + return 0;
> +}
> +
> +void arch_optimize_kprobes(struct list_head *oplist)
> +{
> + struct optimized_kprobe *op;
> + struct optimized_kprobe *tmp;
> +
> + unsigned int branch;
> +
> + list_for_each_entry_safe(op, tmp, oplist, list) {
> + /*
> + * Backup instructions which will be replaced
> + *by jump address
> + */
> + memcpy(op->optinsn.copied_insn, op->kp.addr,
> + RELATIVEJUMP_SIZE);
> + branch = create_branch((unsigned int *)op->kp.addr,
> + (unsigned long)op->optinsn.insn, 0);
> + *op->kp.addr = branch;
> + list_del_init(&op->list);
> + }
> +}
> +
> +void arch_unoptimize_kprobe(struct optimized_kprobe *op)
> +{
> + arch_arm_kprobe(&op->kp);
> +}
> +
> +void arch_unoptimize_kprobes(struct list_head *oplist,
> + struct list_head *done_list)
> +{
> + struct optimized_kprobe *op;
> + struct optimized_kprobe *tmp;
> +
> + list_for_each_entry_safe(op, tmp, oplist, list) {
> + arch_unoptimize_kprobe(op);
> + list_move(&op->list, done_list);
> + }
> +}
> +
> +int arch_within_optimized_kprobe(struct optimized_kprobe *op,
> + unsigned long addr)
> +{
> + return 0;
> +}
> --
> 2.1.0
>
--
Masami Hiramatsu <mhiramat@kernel.org>
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2016-05-31 14:55 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
[not found] <1464692191-1167-1-git-send-email-anju@linux.vnet.ibm.com>
2016-05-31 10:56 ` [RFC PATCH v3 1/3] arch/powerpc : Add detour buffer support for optprobes Anju T
2016-05-31 10:56 ` [RFC PATCH v3 2/3] arch/powerpc : optprobes for powerpc core Anju T
2016-05-31 10:56 ` [RFC PATCH v4 3/3] arch/powerpc : Enable optprobes support in powerpc Anju T
[not found] ` <201605311058.u4VAsdah009164@mx0a-001b2d01.pphosted.com>
2016-05-31 14:55 ` [RFC PATCH v3 2/3] arch/powerpc : optprobes for powerpc core Masami Hiramatsu
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).