All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v6 1/9] ppc64 (le): prepare for -mprofile-kernel
  2016-01-25 15:38 [PATCH v6 0/9] ftrace with regs + live patching for ppc64 LE (ABI v2) Torsten Duwe
@ 2016-01-25 15:26 ` Torsten Duwe
  2016-01-27 10:19   ` Michael Ellerman
  2016-02-03  7:23   ` AKASHI Takahiro
  2016-01-25 15:27 ` [PATCH v6 2/9] ppc64le FTRACE_WITH_REGS implementation Torsten Duwe
                   ` (8 subsequent siblings)
  9 siblings, 2 replies; 44+ messages in thread
From: Torsten Duwe @ 2016-01-25 15:26 UTC (permalink / raw)
  To: Steven Rostedt, Michael Ellerman
  Cc: Jiri Kosina, linuxppc-dev, linux-kernel, live-patching

The gcc switch -mprofile-kernel, available for ppc64 on gcc > 4.8.5,
allows to call _mcount very early in the function, which low-level
ASM code and code patching functions need to consider.
Especially the link register and the parameter registers are still
alive and not yet saved into a new stack frame.

Signed-off-by: Torsten Duwe <duwe@suse.de>
---
 arch/powerpc/kernel/entry_64.S  | 45 +++++++++++++++++++++++++++++++++++++++--
 arch/powerpc/kernel/ftrace.c    | 12 +++++++++--
 arch/powerpc/kernel/module_64.c | 14 +++++++++++++
 3 files changed, 67 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index a94f155..e7cd043 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -1206,7 +1206,12 @@ _GLOBAL(enter_prom)
 #ifdef CONFIG_DYNAMIC_FTRACE
 _GLOBAL(mcount)
 _GLOBAL(_mcount)
-	blr
+	std	r0,LRSAVE(r1) /* gcc6 does this _after_ this call _only_ */
+	mflr	r0
+	mtctr	r0
+	ld	r0,LRSAVE(r1)
+	mtlr	r0
+	bctr
 
 _GLOBAL_TOC(ftrace_caller)
 	/* Taken from output of objdump from lib64/glibc */
@@ -1262,13 +1267,28 @@ _GLOBAL(ftrace_stub)
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 _GLOBAL(ftrace_graph_caller)
+#ifdef CC_USING_MPROFILE_KERNEL
+	/* with -mprofile-kernel, parameter regs are still alive at _mcount */
+	std	r10, 104(r1)
+	std	r9, 96(r1)
+	std	r8, 88(r1)
+	std	r7, 80(r1)
+	std	r6, 72(r1)
+	std	r5, 64(r1)
+	std	r4, 56(r1)
+	std	r3, 48(r1)
+	mfctr	r4		/* ftrace_caller has moved local addr here */
+	std	r4, 40(r1)
+	mflr	r3		/* ftrace_caller has restored LR from stack */
+#else
 	/* load r4 with local address */
 	ld	r4, 128(r1)
-	subi	r4, r4, MCOUNT_INSN_SIZE
 
 	/* Grab the LR out of the caller stack frame */
 	ld	r11, 112(r1)
 	ld	r3, 16(r11)
+#endif
+	subi	r4, r4, MCOUNT_INSN_SIZE
 
 	bl	prepare_ftrace_return
 	nop
@@ -1277,6 +1297,26 @@ _GLOBAL(ftrace_graph_caller)
 	 * prepare_ftrace_return gives us the address we divert to.
 	 * Change the LR in the callers stack frame to this.
 	 */
+
+#ifdef CC_USING_MPROFILE_KERNEL
+	mtlr	r3
+
+	ld	r0, 40(r1)
+	mtctr	r0
+	ld	r10, 104(r1)
+	ld	r9, 96(r1)
+	ld	r8, 88(r1)
+	ld	r7, 80(r1)
+	ld	r6, 72(r1)
+	ld	r5, 64(r1)
+	ld	r4, 56(r1)
+	ld	r3, 48(r1)
+
+	addi	r1, r1, 112
+	mflr	r0
+	std	r0, LRSAVE(r1)
+	bctr
+#else
 	ld	r11, 112(r1)
 	std	r3, 16(r11)
 
@@ -1284,6 +1324,7 @@ _GLOBAL(ftrace_graph_caller)
 	mtlr	r0
 	addi	r1, r1, 112
 	blr
+#endif
 
 _GLOBAL(return_to_handler)
 	/* need to save return values */
diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c
index 44d4d8e..080c525 100644
--- a/arch/powerpc/kernel/ftrace.c
+++ b/arch/powerpc/kernel/ftrace.c
@@ -306,11 +306,19 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 	 * The load offset is different depending on the ABI. For simplicity
 	 * just mask it out when doing the compare.
 	 */
+#ifndef CC_USING_MPROFILE_KERNEL
 	if ((op[0] != 0x48000008) || ((op[1] & 0xffff0000) != 0xe8410000)) {
-		pr_err("Unexpected call sequence: %x %x\n", op[0], op[1]);
+		pr_err("Unexpected call sequence at %p: %x %x\n",
+		ip, op[0], op[1]);
 		return -EINVAL;
 	}
-
+#else
+	/* look for patched "NOP" on ppc64 with -mprofile-kernel */
+	if (op[0] != 0x60000000) {
+		pr_err("Unexpected call at %p: %x\n", ip, op[0]);
+		return -EINVAL;
+	}
+#endif
 	/* If we never set up a trampoline to ftrace_caller, then bail */
 	if (!rec->arch.mod->arch.tramp) {
 		pr_err("No ftrace trampoline\n");
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index 6838451..30f6be1 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -475,6 +475,20 @@ static unsigned long stub_for_addr(Elf64_Shdr *sechdrs,
 static int restore_r2(u32 *instruction, struct module *me)
 {
 	if (*instruction != PPC_INST_NOP) {
+#ifdef CC_USING_MPROFILE_KERNEL
+		/* -mprofile_kernel sequence starting with
+		 * mflr r0 and maybe std r0, LRSAVE(r1)
+		 */
+		if ((instruction[-3] == 0x7c0802a6 &&
+		    instruction[-2] == 0xf8010010) ||
+		    instruction[-2] == 0x7c0802a6) {
+			/* Nothing to be done here, it's an _mcount
+			 * call location and r2 will have to be
+			 * restored in the _mcount function.
+			 */
+			return 1;
+		};
+#endif
 		pr_err("%s: Expect noop after relocate, got %08x\n",
 		       me->name, *instruction);
 		return 0;
-- 
1.8.5.6

^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [PATCH v6 2/9] ppc64le FTRACE_WITH_REGS implementation
  2016-01-25 15:38 [PATCH v6 0/9] ftrace with regs + live patching for ppc64 LE (ABI v2) Torsten Duwe
  2016-01-25 15:26 ` [PATCH v6 1/9] ppc64 (le): prepare for -mprofile-kernel Torsten Duwe
@ 2016-01-25 15:27 ` Torsten Duwe
  2016-01-25 15:29 ` [PATCH v6 3/9] ppc use ftrace_modify_all_code default Torsten Duwe
                   ` (7 subsequent siblings)
  9 siblings, 0 replies; 44+ messages in thread
From: Torsten Duwe @ 2016-01-25 15:27 UTC (permalink / raw)
  To: Steven Rostedt, Michael Ellerman
  Cc: Jiri Kosina, linuxppc-dev, linux-kernel, live-patching

Implement FTRACE_WITH_REGS for powerpc64, on ELF ABI v2.
Initial work started by Vojtech Pavlik, used with permission.

  * arch/powerpc/kernel/entry_64.S:
    - Implement an effective ftrace_caller that works from
      within the kernel binary as well as from modules.
  * arch/powerpc/kernel/ftrace.c:
    - be prepared to deal with ppc64 ELF ABI v2, especially
      calls to _mcount that result from gcc -mprofile-kernel
    - a little more error verbosity
  * arch/powerpc/kernel/module_64.c:
    - do not save the TOC pointer on the trampoline when the
      destination is ftrace_caller. This trampoline jump happens from
      a function prologue before a new stack frame is set up, so bad
      things may happen otherwise...
    - relax is_module_trampoline() to recognise the modified
      trampoline.

Signed-off-by: Torsten Duwe <duwe@suse.de>
---
 arch/powerpc/include/asm/ftrace.h |  5 +++
 arch/powerpc/kernel/entry_64.S    | 78 +++++++++++++++++++++++++++++++++++++++
 arch/powerpc/kernel/ftrace.c      | 60 +++++++++++++++++++++++++++---
 arch/powerpc/kernel/module_64.c   | 25 ++++++++++++-
 4 files changed, 161 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h
index ef89b14..50ca758 100644
--- a/arch/powerpc/include/asm/ftrace.h
+++ b/arch/powerpc/include/asm/ftrace.h
@@ -46,6 +46,8 @@
 extern void _mcount(void);
 
 #ifdef CONFIG_DYNAMIC_FTRACE
+# define FTRACE_ADDR ((unsigned long)ftrace_caller)
+# define FTRACE_REGS_ADDR FTRACE_ADDR
 static inline unsigned long ftrace_call_adjust(unsigned long addr)
 {
        /* reloction of mcount call site is the same as the address */
@@ -58,6 +60,9 @@ struct dyn_arch_ftrace {
 #endif /*  CONFIG_DYNAMIC_FTRACE */
 #endif /* __ASSEMBLY__ */
 
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+#define ARCH_SUPPORTS_FTRACE_OPS 1
+#endif
 #endif
 
 #if defined(CONFIG_FTRACE_SYSCALLS) && defined(CONFIG_PPC64) && !defined(__ASSEMBLY__)
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index e7cd043..9e98aa1 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -1213,6 +1213,7 @@ _GLOBAL(_mcount)
 	mtlr	r0
 	bctr
 
+#ifndef CC_USING_MPROFILE_KERNEL
 _GLOBAL_TOC(ftrace_caller)
 	/* Taken from output of objdump from lib64/glibc */
 	mflr	r3
@@ -1234,6 +1235,83 @@ _GLOBAL(ftrace_graph_stub)
 	ld	r0, 128(r1)
 	mtlr	r0
 	addi	r1, r1, 112
+#else
+_GLOBAL(ftrace_caller)
+	std	r0,LRSAVE(r1)
+#if defined(_CALL_ELF) && _CALL_ELF == 2
+	mflr	r0
+	bl	2f
+2:	mflr	r12
+	mtlr	r0
+	mr      r0,r2   /* save callee's TOC */
+	addis	r2,r12,(.TOC.-ftrace_caller-12)@ha
+	addi    r2,r2,(.TOC.-ftrace_caller-12)@l
+#else
+	mr	r0,r2
+#endif
+	ld	r12,LRSAVE(r1)	/* get caller's address */
+
+	stdu	r1,-SWITCH_FRAME_SIZE(r1)
+
+	std     r12, _LINK(r1)
+	SAVE_8GPRS(0,r1)
+	std	r0, 24(r1)	/* save TOC */
+	SAVE_8GPRS(8,r1)
+	SAVE_8GPRS(16,r1)
+	SAVE_8GPRS(24,r1)
+
+	addis	r3,r2,function_trace_op@toc@ha
+	addi	r3,r3,function_trace_op@toc@l
+	ld	r5,0(r3)
+
+	mflr    r3
+	std     r3, _NIP(r1)
+	std	r3, 16(r1)
+	subi    r3, r3, MCOUNT_INSN_SIZE
+	mfmsr   r4
+	std     r4, _MSR(r1)
+	mfctr   r4
+	std     r4, _CTR(r1)
+	mfxer   r4
+	std     r4, _XER(r1)
+	mr	r4, r12
+	addi    r6, r1 ,STACK_FRAME_OVERHEAD
+
+.globl ftrace_call
+ftrace_call:
+	bl	ftrace_stub
+	nop
+
+	ld	r3, _NIP(r1)
+	mtlr	r3
+
+	REST_8GPRS(0,r1)
+	REST_8GPRS(8,r1)
+	REST_8GPRS(16,r1)
+	REST_8GPRS(24,r1)
+
+	addi r1, r1, SWITCH_FRAME_SIZE
+
+	ld	r12, LRSAVE(r1)  /* get caller's address */
+	mtlr	r12
+	mr	r2,r0		/* restore callee's TOC */
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	stdu	r1, -112(r1)
+.globl ftrace_graph_call
+ftrace_graph_call:
+	b	ftrace_graph_stub
+_GLOBAL(ftrace_graph_stub)
+	addi	r1, r1, 112
+#endif
+
+	mflr	r0		/* move this LR to CTR */
+	mtctr	r0
+
+	ld	r0,LRSAVE(r1)	/* restore callee's lr at _mcount site */
+	mtlr	r0
+	bctr			/* jump after _mcount site */
+#endif /* CC_USING_MPROFILE_KERNEL */
 _GLOBAL(ftrace_stub)
 	blr
 #else
diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c
index 080c525..310137f 100644
--- a/arch/powerpc/kernel/ftrace.c
+++ b/arch/powerpc/kernel/ftrace.c
@@ -61,8 +61,11 @@ ftrace_modify_code(unsigned long ip, unsigned int old, unsigned int new)
 		return -EFAULT;
 
 	/* Make sure it is what we expect it to be */
-	if (replaced != old)
+	if (replaced != old) {
+		pr_err("%p: replaced (%#x) != old (%#x)",
+		(void *)ip, replaced, old);
 		return -EINVAL;
+	}
 
 	/* replace the text with the new text */
 	if (patch_instruction((unsigned int *)ip, new))
@@ -106,14 +109,16 @@ static int
 __ftrace_make_nop(struct module *mod,
 		  struct dyn_ftrace *rec, unsigned long addr)
 {
-	unsigned int op;
+	unsigned int op, op0, op1, pop;
 	unsigned long entry, ptr;
 	unsigned long ip = rec->ip;
 	void *tramp;
 
 	/* read where this goes */
-	if (probe_kernel_read(&op, (void *)ip, sizeof(int)))
+	if (probe_kernel_read(&op, (void *)ip, sizeof(int))) {
+		pr_err("Fetching opcode failed.\n");
 		return -EFAULT;
+	}
 
 	/* Make sure that that this is still a 24bit jump */
 	if (!is_bl_op(op)) {
@@ -158,10 +163,46 @@ __ftrace_make_nop(struct module *mod,
 	 *
 	 * Use a b +8 to jump over the load.
 	 */
-	op = 0x48000008;	/* b +8 */
 
-	if (patch_instruction((unsigned int *)ip, op))
+	pop = 0x48000008;	/* b +8 */
+
+	/*
+	 * Check what is in the next instruction. We can see ld r2,40(r1), but
+	 * on first pass after boot we will see mflr r0.
+	 */
+	if (probe_kernel_read(&op, (void *)(ip+4), MCOUNT_INSN_SIZE)) {
+		pr_err("Fetching op failed.\n");
+		return -EFAULT;
+	}
+
+	if (op != 0xe8410028) { /* ld r2,STACK_OFFSET(r1) */
+
+		if (probe_kernel_read(&op0, (void *)(ip-8), MCOUNT_INSN_SIZE)) {
+			pr_err("Fetching op0 failed.\n");
+			return -EFAULT;
+		}
+
+		if (probe_kernel_read(&op1, (void *)(ip-4), MCOUNT_INSN_SIZE)) {
+			pr_err("Fetching op1 failed.\n");
+			return -EFAULT;
+		}
+
+		/* mflr r0 ; std r0,LRSAVE(r1) */
+		if (op0 != 0x7c0802a6 && op1 != 0xf8010010) {
+			pr_err("Unexpected instructions around bl\n"
+				"when enabling dynamic ftrace!\t"
+				"(%08x,%08x,bl,%08x)\n", op0, op1, op);
+			return -EINVAL;
+		}
+
+		/* When using -mkernel_profile there is no load to jump over */
+		pop = PPC_INST_NOP;
+	}
+
+	if (patch_instruction((unsigned int *)ip, pop)) {
+		pr_err("Patching NOP failed.\n");
 		return -EPERM;
+	}
 
 	return 0;
 }
@@ -287,6 +328,13 @@ int ftrace_make_nop(struct module *mod,
 
 #ifdef CONFIG_MODULES
 #ifdef CONFIG_PPC64
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
+			unsigned long addr)
+{
+	return ftrace_make_call(rec, addr);
+}
+#endif
 static int
 __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 {
@@ -338,7 +386,7 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 
 	return 0;
 }
-#else
+#else  /* !CONFIG_PPC64: */
 static int
 __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 {
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index 30f6be1..a8facb6 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -138,12 +138,25 @@ static u32 ppc64_stub_insns[] = {
 	0x4e800420			/* bctr */
 };
 
+#ifdef CC_USING_MPROFILE_KERNEL
+/* In case of _mcount calls or dynamic ftracing, Do not save the
+ * current callee's TOC (in R2) again into the original caller's stack
+ * frame during this trampoline hop. The stack frame already holds
+ * that of the original caller.  _mcount and ftrace_caller will take
+ * care of this TOC value themselves.
+ */
+#define SQUASH_TOC_SAVE_INSN(trampoline_addr) \
+	(((struct ppc64_stub_entry *)(trampoline_addr))->jump[2] = PPC_INST_NOP)
+#else
+#define SQUASH_TOC_SAVE_INSN(trampoline_addr)
+#endif
+
 #ifdef CONFIG_DYNAMIC_FTRACE
 
 static u32 ppc64_stub_mask[] = {
 	0xffff0000,
 	0xffff0000,
-	0xffffffff,
+	0x00000000,
 	0xffffffff,
 #if !defined(_CALL_ELF) || _CALL_ELF != 2
 	0xffffffff,
@@ -170,6 +183,9 @@ bool is_module_trampoline(u32 *p)
 		if ((insna & mask) != (insnb & mask))
 			return false;
 	}
+	if (insns[2] != ppc64_stub_insns[2] &&
+	    insns[2] != PPC_INST_NOP)
+		return false;
 
 	return true;
 }
@@ -619,6 +635,9 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
 					return -ENOENT;
 				if (!restore_r2((u32 *)location + 1, me))
 					return -ENOEXEC;
+				/* Squash the TOC saver for profiler calls */
+				if (!strcmp("_mcount", strtab+sym->st_name))
+					SQUASH_TOC_SAVE_INSN(value);
 			} else
 				value += local_entry_offset(sym);
 
@@ -679,6 +698,10 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
 	me->arch.tramp = stub_for_addr(sechdrs,
 				       (unsigned long)ftrace_caller,
 				       me);
+	/* ftrace_caller will take care of the TOC;
+	 * do not clobber original caller's value.
+	 */
+	SQUASH_TOC_SAVE_INSN(me->arch.tramp);
 #endif
 
 	return 0;
-- 
1.8.5.6

^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [PATCH v6 3/9] ppc use ftrace_modify_all_code default
  2016-01-25 15:38 [PATCH v6 0/9] ftrace with regs + live patching for ppc64 LE (ABI v2) Torsten Duwe
  2016-01-25 15:26 ` [PATCH v6 1/9] ppc64 (le): prepare for -mprofile-kernel Torsten Duwe
  2016-01-25 15:27 ` [PATCH v6 2/9] ppc64le FTRACE_WITH_REGS implementation Torsten Duwe
@ 2016-01-25 15:29 ` Torsten Duwe
  2016-01-25 15:29 ` [PATCH v6 4/9] ppc64 ftrace_with_regs configuration variables Torsten Duwe
                   ` (6 subsequent siblings)
  9 siblings, 0 replies; 44+ messages in thread
From: Torsten Duwe @ 2016-01-25 15:29 UTC (permalink / raw)
  To: Steven Rostedt, Michael Ellerman
  Cc: Jiri Kosina, linuxppc-dev, linux-kernel, live-patching

Convert ppc's arch_ftrace_update_code from its own function copy
to use the generic default functionality (without stop_machine --
our instructions are properly aligned and the replacements atomic ;)

With this we gain error checking and the much-needed function_trace_op
handling.

Signed-off-by: Torsten Duwe <duwe@suse.de>
---
 arch/powerpc/kernel/ftrace.c | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c
index 310137f..e419c7b 100644
--- a/arch/powerpc/kernel/ftrace.c
+++ b/arch/powerpc/kernel/ftrace.c
@@ -511,20 +511,12 @@ void ftrace_replace_code(int enable)
 	}
 }
 
+/* Use the default ftrace_modify_all_code, but without
+ * stop_machine().
+ */
 void arch_ftrace_update_code(int command)
 {
-	if (command & FTRACE_UPDATE_CALLS)
-		ftrace_replace_code(1);
-	else if (command & FTRACE_DISABLE_CALLS)
-		ftrace_replace_code(0);
-
-	if (command & FTRACE_UPDATE_TRACE_FUNC)
-		ftrace_update_ftrace_func(ftrace_trace_function);
-
-	if (command & FTRACE_START_FUNC_RET)
-		ftrace_enable_ftrace_graph_caller();
-	else if (command & FTRACE_STOP_FUNC_RET)
-		ftrace_disable_ftrace_graph_caller();
+	ftrace_modify_all_code(command);
 }
 
 int __init ftrace_dyn_arch_init(void)
-- 
1.8.5.6

^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [PATCH v6 4/9] ppc64 ftrace_with_regs configuration variables
  2016-01-25 15:38 [PATCH v6 0/9] ftrace with regs + live patching for ppc64 LE (ABI v2) Torsten Duwe
                   ` (2 preceding siblings ...)
  2016-01-25 15:29 ` [PATCH v6 3/9] ppc use ftrace_modify_all_code default Torsten Duwe
@ 2016-01-25 15:29 ` Torsten Duwe
  2016-01-25 15:30 ` [PATCH v6 5/9] ppc64 ftrace_with_regs: spare early boot and low level Torsten Duwe
                   ` (5 subsequent siblings)
  9 siblings, 0 replies; 44+ messages in thread
From: Torsten Duwe @ 2016-01-25 15:29 UTC (permalink / raw)
  To: Steven Rostedt, Michael Ellerman
  Cc: Jiri Kosina, linuxppc-dev, linux-kernel, live-patching

  * Makefile:
    - globally use -mprofile-kernel in case it's configured
      and available.
  * arch/powerpc/Kconfig / kernel/trace/Kconfig:
    - declare that ppc64le HAVE_MPROFILE_KERNEL and
      HAVE_DYNAMIC_FTRACE_WITH_REGS, and use it.

Signed-off-by: Torsten Duwe <duwe@suse.de>
---
 arch/powerpc/Kconfig  |  2 ++
 arch/powerpc/Makefile | 10 ++++++++++
 kernel/trace/Kconfig  |  5 +++++
 3 files changed, 17 insertions(+)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index db49e0d..89b1a2a 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -97,8 +97,10 @@ config PPC
 	select OF_RESERVED_MEM
 	select HAVE_FTRACE_MCOUNT_RECORD
 	select HAVE_DYNAMIC_FTRACE
+	select HAVE_DYNAMIC_FTRACE_WITH_REGS if PPC64 && CPU_LITTLE_ENDIAN
 	select HAVE_FUNCTION_TRACER
 	select HAVE_FUNCTION_GRAPH_TRACER
+	select HAVE_MPROFILE_KERNEL if PPC64 && CPU_LITTLE_ENDIAN
 	select SYSCTL_EXCEPTION_TRACE
 	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select VIRT_TO_BUS if !PPC64
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 96efd82..2f9b527 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -133,6 +133,16 @@ else
 CFLAGS-$(CONFIG_GENERIC_CPU) += -mcpu=powerpc64
 endif
 
+ifeq ($(CONFIG_PPC64),y)
+ifdef CONFIG_HAVE_MPROFILE_KERNEL
+CC_USING_MPROFILE_KERNEL := $(call cc-option,-mprofile-kernel)
+ifdef CC_USING_MPROFILE_KERNEL
+CC_FLAGS_FTRACE	:= -pg $(CC_USING_MPROFILE_KERNEL)
+KBUILD_CPPFLAGS	+= -DCC_USING_MPROFILE_KERNEL
+endif
+endif
+endif
+
 CFLAGS-$(CONFIG_CELL_CPU) += $(call cc-option,-mcpu=cell)
 CFLAGS-$(CONFIG_POWER4_CPU) += $(call cc-option,-mcpu=power4)
 CFLAGS-$(CONFIG_POWER5_CPU) += $(call cc-option,-mcpu=power5)
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index e45db6b..a138f6d 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -52,6 +52,11 @@ config HAVE_FENTRY
 	help
 	  Arch supports the gcc options -pg with -mfentry
 
+config HAVE_MPROFILE_KERNEL
+	bool
+	help
+	  Arch supports the gcc options -pg with -mprofile-kernel
+
 config HAVE_C_RECORDMCOUNT
 	bool
 	help
-- 
1.8.5.6

^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [PATCH v6 5/9] ppc64 ftrace_with_regs: spare early boot and low level
  2016-01-25 15:38 [PATCH v6 0/9] ftrace with regs + live patching for ppc64 LE (ABI v2) Torsten Duwe
                   ` (3 preceding siblings ...)
  2016-01-25 15:29 ` [PATCH v6 4/9] ppc64 ftrace_with_regs configuration variables Torsten Duwe
@ 2016-01-25 15:30 ` Torsten Duwe
  2016-01-25 15:31 ` [PATCH v6 6/9] ppc64 ftrace: disable profiling for some functions Torsten Duwe
                   ` (4 subsequent siblings)
  9 siblings, 0 replies; 44+ messages in thread
From: Torsten Duwe @ 2016-01-25 15:30 UTC (permalink / raw)
  To: Steven Rostedt, Michael Ellerman
  Cc: Jiri Kosina, linuxppc-dev, linux-kernel, live-patching

Using -mprofile-kernel on early boot code not only confuses the
checker but is also useless, as the infrastructure is not yet in
place. Proceed like with -pg (remove it from CFLAGS), equally with
time.o and ftrace itself.

  * arch/powerpc/kernel/Makefile:
    - remove -mprofile-kernel from low level and boot code objects'
      CFLAGS for FUNCTION_TRACER configurations.

Signed-off-by: Torsten Duwe <duwe@suse.de>
---
 arch/powerpc/kernel/Makefile | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index ba33693..0f417d5 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -16,14 +16,14 @@ endif
 
 ifdef CONFIG_FUNCTION_TRACER
 # Do not trace early boot code
-CFLAGS_REMOVE_cputable.o = -pg -mno-sched-epilog
-CFLAGS_REMOVE_prom_init.o = -pg -mno-sched-epilog
-CFLAGS_REMOVE_btext.o = -pg -mno-sched-epilog
-CFLAGS_REMOVE_prom.o = -pg -mno-sched-epilog
+CFLAGS_REMOVE_cputable.o = -pg -mno-sched-epilog -mprofile-kernel
+CFLAGS_REMOVE_prom_init.o = -pg -mno-sched-epilog -mprofile-kernel
+CFLAGS_REMOVE_btext.o = -pg -mno-sched-epilog -mprofile-kernel
+CFLAGS_REMOVE_prom.o = -pg -mno-sched-epilog -mprofile-kernel
 # do not trace tracer code
-CFLAGS_REMOVE_ftrace.o = -pg -mno-sched-epilog
+CFLAGS_REMOVE_ftrace.o = -pg -mno-sched-epilog -mprofile-kernel
 # timers used by tracing
-CFLAGS_REMOVE_time.o = -pg -mno-sched-epilog
+CFLAGS_REMOVE_time.o = -pg -mno-sched-epilog -mprofile-kernel
 endif
 
 obj-y				:= cputable.o ptrace.o syscalls.o \
-- 
1.8.5.6

^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [PATCH v6 6/9] ppc64 ftrace: disable profiling for some functions
  2016-01-25 15:38 [PATCH v6 0/9] ftrace with regs + live patching for ppc64 LE (ABI v2) Torsten Duwe
                   ` (4 preceding siblings ...)
  2016-01-25 15:30 ` [PATCH v6 5/9] ppc64 ftrace_with_regs: spare early boot and low level Torsten Duwe
@ 2016-01-25 15:31 ` Torsten Duwe
  2016-01-25 15:31 ` [PATCH v6 7/9] ppc64 ftrace: disable profiling for some files Torsten Duwe
                   ` (3 subsequent siblings)
  9 siblings, 0 replies; 44+ messages in thread
From: Torsten Duwe @ 2016-01-25 15:31 UTC (permalink / raw)
  To: Steven Rostedt, Michael Ellerman
  Cc: Jiri Kosina, linuxppc-dev, linux-kernel, live-patching

At least POWER7/8 have MMUs that don't completely autoload;
a normal, recoverable memory fault might pass through these functions.
If a dynamic tracer function causes such a fault, any of these functions
being traced with -mprofile-kernel may cause an endless recursion.

Signed-off-by: Torsten Duwe <duwe@suse.de>
---
 arch/powerpc/kernel/process.c        |  2 +-
 arch/powerpc/mm/fault.c              |  2 +-
 arch/powerpc/mm/hash_utils_64.c      | 18 +++++++++---------
 arch/powerpc/mm/hugetlbpage-hash64.c |  2 +-
 arch/powerpc/mm/hugetlbpage.c        |  4 ++--
 arch/powerpc/mm/mem.c                |  2 +-
 arch/powerpc/mm/pgtable_64.c         |  2 +-
 arch/powerpc/mm/slb.c                |  6 +++---
 arch/powerpc/mm/slice.c              |  8 ++++----
 9 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 646bf4d..5b3c19d 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -733,7 +733,7 @@ static inline void __switch_to_tm(struct task_struct *prev)
  * don't know which of the checkpointed state and the transactional
  * state to use.
  */
-void restore_tm_state(struct pt_regs *regs)
+notrace void restore_tm_state(struct pt_regs *regs)
 {
 	unsigned long msr_diff;
 
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index a67c6d7..125be37 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -205,7 +205,7 @@ static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault)
  * The return value is 0 if the fault was handled, or the signal
  * number if this is a kernel fault that can't be handled here.
  */
-int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
+notrace int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
 			    unsigned long error_code)
 {
 	enum ctx_state prev_state = exception_enter();
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 7f9616f..64f5b40 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -849,7 +849,7 @@ void early_init_mmu_secondary(void)
 /*
  * Called by asm hashtable.S for doing lazy icache flush
  */
-unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap)
+notrace unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap)
 {
 	struct page *page;
 
@@ -870,7 +870,7 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap)
 }
 
 #ifdef CONFIG_PPC_MM_SLICES
-static unsigned int get_paca_psize(unsigned long addr)
+static notrace unsigned int get_paca_psize(unsigned long addr)
 {
 	u64 lpsizes;
 	unsigned char *hpsizes;
@@ -899,7 +899,7 @@ unsigned int get_paca_psize(unsigned long addr)
  * For now this makes the whole process use 4k pages.
  */
 #ifdef CONFIG_PPC_64K_PAGES
-void demote_segment_4k(struct mm_struct *mm, unsigned long addr)
+notrace void demote_segment_4k(struct mm_struct *mm, unsigned long addr)
 {
 	if (get_slice_psize(mm, addr) == MMU_PAGE_4K)
 		return;
@@ -920,7 +920,7 @@ void demote_segment_4k(struct mm_struct *mm, unsigned long addr)
  * Result is 0: full permissions, _PAGE_RW: read-only,
  * _PAGE_USER or _PAGE_USER|_PAGE_RW: no access.
  */
-static int subpage_protection(struct mm_struct *mm, unsigned long ea)
+static notrace int subpage_protection(struct mm_struct *mm, unsigned long ea)
 {
 	struct subpage_prot_table *spt = &mm->context.spt;
 	u32 spp = 0;
@@ -968,7 +968,7 @@ void hash_failure_debug(unsigned long ea, unsigned long access,
 		trap, vsid, ssize, psize, lpsize, pte);
 }
 
-static void check_paca_psize(unsigned long ea, struct mm_struct *mm,
+static notrace void check_paca_psize(unsigned long ea, struct mm_struct *mm,
 			     int psize, bool user_region)
 {
 	if (user_region) {
@@ -990,7 +990,7 @@ static void check_paca_psize(unsigned long ea, struct mm_struct *mm,
  * -1 - critical hash insertion error
  * -2 - access not permitted by subpage protection mechanism
  */
-int hash_page_mm(struct mm_struct *mm, unsigned long ea,
+notrace int hash_page_mm(struct mm_struct *mm, unsigned long ea,
 		 unsigned long access, unsigned long trap,
 		 unsigned long flags)
 {
@@ -1187,7 +1187,7 @@ bail:
 }
 EXPORT_SYMBOL_GPL(hash_page_mm);
 
-int hash_page(unsigned long ea, unsigned long access, unsigned long trap,
+notrace int hash_page(unsigned long ea, unsigned long access, unsigned long trap,
 	      unsigned long dsisr)
 {
 	unsigned long flags = 0;
@@ -1289,7 +1289,7 @@ out_exit:
 /* WARNING: This is called from hash_low_64.S, if you change this prototype,
  *          do not forget to update the assembly call site !
  */
-void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize,
+notrace void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize,
 		     unsigned long flags)
 {
 	unsigned long hash, index, shift, hidx, slot;
@@ -1437,7 +1437,7 @@ void low_hash_fault(struct pt_regs *regs, unsigned long address, int rc)
 	exception_exit(prev_state);
 }
 
-long hpte_insert_repeating(unsigned long hash, unsigned long vpn,
+notrace long hpte_insert_repeating(unsigned long hash, unsigned long vpn,
 			   unsigned long pa, unsigned long rflags,
 			   unsigned long vflags, int psize, int ssize)
 {
diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c
index d94b1af..50b8c6f 100644
--- a/arch/powerpc/mm/hugetlbpage-hash64.c
+++ b/arch/powerpc/mm/hugetlbpage-hash64.c
@@ -18,7 +18,7 @@ extern long hpte_insert_repeating(unsigned long hash, unsigned long vpn,
 				  unsigned long pa, unsigned long rlags,
 				  unsigned long vflags, int psize, int ssize);
 
-int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
+notrace int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
 		     pte_t *ptep, unsigned long trap, unsigned long flags,
 		     int ssize, unsigned int shift, unsigned int mmu_psize)
 {
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 9833fee..00c4b03 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -942,7 +942,7 @@ static int __init hugetlbpage_init(void)
 #endif
 arch_initcall(hugetlbpage_init);
 
-void flush_dcache_icache_hugepage(struct page *page)
+notrace void flush_dcache_icache_hugepage(struct page *page)
 {
 	int i;
 	void *start;
@@ -975,7 +975,7 @@ void flush_dcache_icache_hugepage(struct page *page)
  * when we have MSR[EE] = 0 but the paca->soft_enabled = 1
  */
 
-pte_t *__find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
+notrace pte_t *__find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
 				   bool *is_thp, unsigned *shift)
 {
 	pgd_t pgd, *pgdp;
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 22d94c3..f690e8a 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -406,7 +406,7 @@ void flush_dcache_page(struct page *page)
 }
 EXPORT_SYMBOL(flush_dcache_page);
 
-void flush_dcache_icache_page(struct page *page)
+notrace void flush_dcache_icache_page(struct page *page)
 {
 #ifdef CONFIG_HUGETLB_PAGE
 	if (PageCompound(page)) {
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index e92cb21..c74050b 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -442,7 +442,7 @@ static void page_table_free_rcu(void *table)
 	}
 }
 
-void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
+notrace void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
 {
 	unsigned long pgf = (unsigned long)table;
 
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index 515730e..3e9be5d 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -96,7 +96,7 @@ static inline void create_shadowed_slbe(unsigned long ea, int ssize,
 		     : "memory" );
 }
 
-static void __slb_flush_and_rebolt(void)
+static notrace void __slb_flush_and_rebolt(void)
 {
 	/* If you change this make sure you change SLB_NUM_BOLTED
 	 * and PR KVM appropriately too. */
@@ -136,7 +136,7 @@ static void __slb_flush_and_rebolt(void)
 		     : "memory");
 }
 
-void slb_flush_and_rebolt(void)
+notrace void slb_flush_and_rebolt(void)
 {
 
 	WARN_ON(!irqs_disabled());
@@ -151,7 +151,7 @@ void slb_flush_and_rebolt(void)
 	get_paca()->slb_cache_ptr = 0;
 }
 
-void slb_vmalloc_update(void)
+notrace void slb_vmalloc_update(void)
 {
 	unsigned long vflags;
 
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index 0f432a7..f92f0f0 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -76,8 +76,8 @@ static void slice_print_mask(const char *label, struct slice_mask mask) {}
 
 #endif
 
-static struct slice_mask slice_range_to_mask(unsigned long start,
-					     unsigned long len)
+static notrace struct slice_mask slice_range_to_mask(unsigned long start,
+						     unsigned long len)
 {
 	unsigned long end = start + len - 1;
 	struct slice_mask ret = { 0, 0 };
@@ -564,7 +564,7 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp,
 				       current->mm->context.user_psize, 1);
 }
 
-unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr)
+notrace unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr)
 {
 	unsigned char *hpsizes;
 	int index, mask_index;
@@ -645,7 +645,7 @@ void slice_set_user_psize(struct mm_struct *mm, unsigned int psize)
 	spin_unlock_irqrestore(&slice_convert_lock, flags);
 }
 
-void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
+notrace void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
 			   unsigned long len, unsigned int psize)
 {
 	struct slice_mask mask = slice_range_to_mask(start, len);
-- 
1.8.5.6

^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [PATCH v6 7/9] ppc64 ftrace: disable profiling for some files
  2016-01-25 15:38 [PATCH v6 0/9] ftrace with regs + live patching for ppc64 LE (ABI v2) Torsten Duwe
                   ` (5 preceding siblings ...)
  2016-01-25 15:31 ` [PATCH v6 6/9] ppc64 ftrace: disable profiling for some functions Torsten Duwe
@ 2016-01-25 15:31 ` Torsten Duwe
  2016-01-25 15:33 ` [PATCH v6 8/9] Implement kernel live patching for ppc64le (ABIv2) Torsten Duwe
                   ` (2 subsequent siblings)
  9 siblings, 0 replies; 44+ messages in thread
From: Torsten Duwe @ 2016-01-25 15:31 UTC (permalink / raw)
  To: Steven Rostedt, Michael Ellerman
  Cc: Jiri Kosina, linuxppc-dev, linux-kernel, live-patching

This patch complements the "notrace" attribute for selected functions.
It adds -mprofile-kernel to the cc flags to be stripped from the command
line for code-patching.o and feature-fixups.o, in addition to "-pg"

Signed-off-by: Torsten Duwe <duwe@suse.de>
---
 arch/powerpc/lib/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index a47e142..98e22b2 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -6,8 +6,8 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
 
 ccflags-$(CONFIG_PPC64)	:= $(NO_MINIMAL_TOC)
 
-CFLAGS_REMOVE_code-patching.o = -pg
-CFLAGS_REMOVE_feature-fixups.o = -pg
+CFLAGS_REMOVE_code-patching.o = -pg -mprofile-kernel
+CFLAGS_REMOVE_feature-fixups.o = -pg -mprofile-kernel
 
 obj-y += string.o alloc.o crtsavres.o ppc_ksyms.o code-patching.o \
 	 feature-fixups.o
-- 
1.8.5.6

^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [PATCH v6 8/9] Implement kernel live patching for ppc64le (ABIv2)
  2016-01-25 15:38 [PATCH v6 0/9] ftrace with regs + live patching for ppc64 LE (ABI v2) Torsten Duwe
                   ` (6 preceding siblings ...)
  2016-01-25 15:31 ` [PATCH v6 7/9] ppc64 ftrace: disable profiling for some files Torsten Duwe
@ 2016-01-25 15:33 ` Torsten Duwe
  2016-01-26 10:50   ` Miroslav Benes
  2016-02-02 13:46   ` [PATCH v6 8/9] " Denis Kirjanov
  2016-01-25 15:33 ` [PATCH v6 9/9] Enable LIVEPATCH to be configured on ppc64le and add livepatch.o if it is selected Torsten Duwe
  2016-01-27 10:51 ` [PATCH v6 0/9] ftrace with regs + live patching for ppc64 LE (ABI v2) Balbir Singh
  9 siblings, 2 replies; 44+ messages in thread
From: Torsten Duwe @ 2016-01-25 15:33 UTC (permalink / raw)
  To: Steven Rostedt, Michael Ellerman
  Cc: Jiri Kosina, linuxppc-dev, linux-kernel, live-patching

  * create the appropriate files+functions
    arch/powerpc/include/asm/livepatch.h
        klp_check_compiler_support,
        klp_arch_set_pc
    arch/powerpc/kernel/livepatch.c with a stub for
        klp_write_module_reloc
    This is architecture-independent work in progress.
  * introduce a fixup in arch/powerpc/kernel/entry_64.S
    for local calls that are becoming global due to live patching.
    And of course do the main KLP thing: return to a maybe different
    address, possibly altered by the live patching ftrace op.

Signed-off-by: Torsten Duwe <duwe@suse.de>
---
 arch/powerpc/include/asm/livepatch.h | 45 +++++++++++++++++++++++++++++++
 arch/powerpc/kernel/entry_64.S       | 51 +++++++++++++++++++++++++++++++++---
 arch/powerpc/kernel/livepatch.c      | 38 +++++++++++++++++++++++++++
 3 files changed, 130 insertions(+), 4 deletions(-)
 create mode 100644 arch/powerpc/include/asm/livepatch.h
 create mode 100644 arch/powerpc/kernel/livepatch.c

diff --git a/arch/powerpc/include/asm/livepatch.h b/arch/powerpc/include/asm/livepatch.h
new file mode 100644
index 0000000..44e8a2d
--- /dev/null
+++ b/arch/powerpc/include/asm/livepatch.h
@@ -0,0 +1,45 @@
+/*
+ * livepatch.h - powerpc-specific Kernel Live Patching Core
+ *
+ * Copyright (C) 2015 SUSE
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _ASM_POWERPC64_LIVEPATCH_H
+#define _ASM_POWERPC64_LIVEPATCH_H
+
+#include <linux/module.h>
+#include <linux/ftrace.h>
+
+#ifdef CONFIG_LIVEPATCH
+static inline int klp_check_compiler_support(void)
+{
+#if !defined(_CALL_ELF) || _CALL_ELF != 2 || !defined(CC_USING_MPROFILE_KERNEL)
+	return 1;
+#endif
+	return 0;
+}
+
+extern int klp_write_module_reloc(struct module *mod, unsigned long type,
+				   unsigned long loc, unsigned long value);
+
+static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long ip)
+{
+	regs->nip = ip;
+}
+#else
+#error Live patching support is disabled; check CONFIG_LIVEPATCH
+#endif
+
+#endif /* _ASM_POWERPC64_LIVEPATCH_H */
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 9e98aa1..f6e3ee7 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -1267,6 +1267,9 @@ _GLOBAL(ftrace_caller)
 	mflr    r3
 	std     r3, _NIP(r1)
 	std	r3, 16(r1)
+#ifdef CONFIG_LIVEPATCH
+	mr	r14,r3		/* remember old NIP */
+#endif
 	subi    r3, r3, MCOUNT_INSN_SIZE
 	mfmsr   r4
 	std     r4, _MSR(r1)
@@ -1283,7 +1286,10 @@ ftrace_call:
 	nop
 
 	ld	r3, _NIP(r1)
-	mtlr	r3
+	mtctr	r3		/* prepare to jump there */
+#ifdef CONFIG_LIVEPATCH
+	cmpd	r14,r3		/* has NIP been altered? */
+#endif
 
 	REST_8GPRS(0,r1)
 	REST_8GPRS(8,r1)
@@ -1296,6 +1302,27 @@ ftrace_call:
 	mtlr	r12
 	mr	r2,r0		/* restore callee's TOC */
 
+#ifdef CONFIG_LIVEPATCH
+	beq+	4f		/* likely(old_NIP == new_NIP) */
+
+	/* For a local call, restore this TOC after calling the patch function.
+	 * For a global call, it does not matter what we restore here,
+	 * since the global caller does its own restore right afterwards,
+	 * anyway. Just insert a KLP_return_helper frame in any case,
+	 * so a patch function can always count on the changed stack offsets.
+	 */
+	stdu	r1,-32(r1)	/* open new mini stack frame */
+	std	r0,24(r1)	/* save TOC now, unconditionally. */
+	bl	5f
+5:	mflr	r12
+	addi	r12,r12,(KLP_return_helper+4-.)@l
+	std	r12,LRSAVE(r1)
+	mtlr	r12
+	mfctr	r12		/* allow for TOC calculation in newfunc */
+	bctr
+4:
+#endif
+
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 	stdu	r1, -112(r1)
 .globl ftrace_graph_call
@@ -1305,15 +1332,31 @@ _GLOBAL(ftrace_graph_stub)
 	addi	r1, r1, 112
 #endif
 
-	mflr	r0		/* move this LR to CTR */
-	mtctr	r0
-
 	ld	r0,LRSAVE(r1)	/* restore callee's lr at _mcount site */
 	mtlr	r0
 	bctr			/* jump after _mcount site */
 #endif /* CC_USING_MPROFILE_KERNEL */
 _GLOBAL(ftrace_stub)
 	blr
+
+#ifdef CONFIG_LIVEPATCH
+/* Helper function for local calls that are becoming global
+   due to live patching.
+   We can't simply patch the NOP after the original call,
+   because, depending on the consistency model, some kernel
+   threads may still have called the original, local function
+   *without* saving their TOC in the respective stack frame slot,
+   so the decision is made per-thread during function return by
+   maybe inserting a KLP_return_helper frame or not.
+*/
+KLP_return_helper:
+	ld	r2,24(r1)	/* restore TOC (saved by ftrace_caller) */
+	addi r1, r1, 32		/* destroy mini stack frame */
+	ld	r0,LRSAVE(r1)	/* get the real return address */
+	mtlr	r0
+	blr
+#endif
+
 #else
 _GLOBAL_TOC(_mcount)
 	/* Taken from output of objdump from lib64/glibc */
diff --git a/arch/powerpc/kernel/livepatch.c b/arch/powerpc/kernel/livepatch.c
new file mode 100644
index 0000000..564eafa
--- /dev/null
+++ b/arch/powerpc/kernel/livepatch.c
@@ -0,0 +1,38 @@
+/*
+ * livepatch.c - powerpc-specific Kernel Live Patching Core
+ *
+ * Copyright (C) 2015 SUSE
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/module.h>
+#include <asm/livepatch.h>
+
+/**
+ * klp_write_module_reloc() - write a relocation in a module
+ * @mod:       module in which the section to be modified is found
+ * @type:      ELF relocation type (see asm/elf.h)
+ * @loc:       address that the relocation should be written to
+ * @value:     relocation value (sym address + addend)
+ *
+ * This function writes a relocation to the specified location for
+ * a particular module.
+ */
+int klp_write_module_reloc(struct module *mod, unsigned long type,
+			    unsigned long loc, unsigned long value)
+{
+	/* This requires infrastructure changes; we need the loadinfos. */
+	pr_err("lpc_write_module_reloc not yet supported\n");
+	return -ENOSYS;
+}
-- 
1.8.5.6

^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [PATCH v6 9/9] Enable LIVEPATCH to be configured on ppc64le and add livepatch.o if it is selected.
  2016-01-25 15:38 [PATCH v6 0/9] ftrace with regs + live patching for ppc64 LE (ABI v2) Torsten Duwe
                   ` (7 preceding siblings ...)
  2016-01-25 15:33 ` [PATCH v6 8/9] Implement kernel live patching for ppc64le (ABIv2) Torsten Duwe
@ 2016-01-25 15:33 ` Torsten Duwe
  2016-01-27 10:51 ` [PATCH v6 0/9] ftrace with regs + live patching for ppc64 LE (ABI v2) Balbir Singh
  9 siblings, 0 replies; 44+ messages in thread
From: Torsten Duwe @ 2016-01-25 15:33 UTC (permalink / raw)
  To: Steven Rostedt, Michael Ellerman
  Cc: Jiri Kosina, linuxppc-dev, linux-kernel, live-patching

Signed-off-by: Torsten Duwe <duwe@suse.de>
---
 arch/powerpc/Kconfig         |    3 +++
 arch/powerpc/kernel/Makefile |    1 +
 2 files changed, 4 insertions(+)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 89b1a2a..62a3f54 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -162,6 +162,7 @@ config PPC
 	select EDAC_ATOMIC_SCRUB
 	select ARCH_HAS_DMA_SET_COHERENT_MASK
 	select HAVE_ARCH_SECCOMP_FILTER
+	select HAVE_LIVEPATCH if PPC64 && CPU_LITTLE_ENDIAN
 
 config GENERIC_CSUM
 	def_bool CPU_LITTLE_ENDIAN
@@ -1095,3 +1098,5 @@ config PPC_LIB_RHEAP
 	bool
 
 source "arch/powerpc/kvm/Kconfig"
+
+source "kernel/livepatch/Kconfig"
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 0f417d5..f9a2925 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -119,6 +119,7 @@ obj-$(CONFIG_DYNAMIC_FTRACE)	+= ftrace.o
 obj-$(CONFIG_FUNCTION_GRAPH_TRACER)	+= ftrace.o
 obj-$(CONFIG_FTRACE_SYSCALLS)	+= ftrace.o
 obj-$(CONFIG_TRACING)		+= trace_clock.o
+obj-$(CONFIG_LIVEPATCH)		+= livepatch.o
 
 ifneq ($(CONFIG_PPC_INDIRECT_PIO),y)
 obj-y				+= iomap.o
-- 
1.8.5.6

^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [PATCH v6 0/9] ftrace with regs + live patching for ppc64 LE (ABI v2)
@ 2016-01-25 15:38 Torsten Duwe
  2016-01-25 15:26 ` [PATCH v6 1/9] ppc64 (le): prepare for -mprofile-kernel Torsten Duwe
                   ` (9 more replies)
  0 siblings, 10 replies; 44+ messages in thread
From: Torsten Duwe @ 2016-01-25 15:38 UTC (permalink / raw)
  To: Steven Rostedt, Michael Ellerman
  Cc: Jiri Kosina, linuxppc-dev, linux-kernel, live-patching

Changes since v5:
  * extra "std r0,LRSAVE(r1)" for gcc-6
    This makes the code compiler-agnostic.
  * Follow Petr Mladek's suggestion to avoid
    redefinition of HAVE_LIVEPATCH

Changes since v4:
  * change comment style in entry_64.S to C89
    (nobody is using assembler syntax comments there).
  * the bool function restore_r2 shouldn't return 2,
    that's a little confusing.
  * Test whether the compiler supports -mprofile-kernel
    and only then define CC_USING_MPROFILE_KERNEL
  * also make the return value of klp_check_compiler_support
    depend on that.

Major changes since v3:
  * the graph tracer works now.
    It turned out the stack frame it tried to manipulate does not
    exist at that point.
  * changes only needed in order to support -mprofile-kernel are now
    in a separate patch, prepended.
  * Kconfig cleanup so this is only selectable on ppc64le.


Torsten Duwe (9):
  ppc64 (le): prepare for -mprofile-kernel
  ppc64le FTRACE_WITH_REGS implementation
  ppc use ftrace_modify_all_code default
  ppc64 ftrace_with_regs configuration variables
  ppc64 ftrace_with_regs: spare early boot and low level
  ppc64 ftrace: disable profiling for some functions
  ppc64 ftrace: disable profiling for some files
  Implement kernel live patching for ppc64le (ABIv2)
  Enable LIVEPATCH to be configured on ppc64le and add livepatch.o if it
    is selected.

 arch/powerpc/Kconfig                 |   7 ++
 arch/powerpc/Makefile                |  10 +++
 arch/powerpc/include/asm/ftrace.h    |   5 ++
 arch/powerpc/include/asm/livepatch.h |  45 ++++++++++
 arch/powerpc/kernel/Makefile         |  13 +--
 arch/powerpc/kernel/entry_64.S       | 166 ++++++++++++++++++++++++++++++++++-
 arch/powerpc/kernel/ftrace.c         |  88 ++++++++++++++-----
 arch/powerpc/kernel/livepatch.c      |  38 ++++++++
 arch/powerpc/kernel/module_64.c      |  39 +++++++-
 arch/powerpc/kernel/process.c        |   2 +-
 arch/powerpc/lib/Makefile            |   4 +-
 arch/powerpc/mm/fault.c              |   2 +-
 arch/powerpc/mm/hash_utils_64.c      |  18 ++--
 arch/powerpc/mm/hugetlbpage-hash64.c |   2 +-
 arch/powerpc/mm/hugetlbpage.c        |   4 +-
 arch/powerpc/mm/mem.c                |   2 +-
 arch/powerpc/mm/pgtable_64.c         |   2 +-
 arch/powerpc/mm/slb.c                |   6 +-
 arch/powerpc/mm/slice.c              |   8 +-
 kernel/trace/Kconfig                 |   5 ++
 20 files changed, 412 insertions(+), 54 deletions(-)
 create mode 100644 arch/powerpc/include/asm/livepatch.h
 create mode 100644 arch/powerpc/kernel/livepatch.c

-- 
1.8.5.6

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v6 8/9] Implement kernel live patching for ppc64le (ABIv2)
  2016-01-25 15:33 ` [PATCH v6 8/9] Implement kernel live patching for ppc64le (ABIv2) Torsten Duwe
@ 2016-01-26 10:50   ` Miroslav Benes
  2016-01-26 12:48     ` Petr Mladek
  2016-01-26 14:00     ` Torsten Duwe
  2016-02-02 13:46   ` [PATCH v6 8/9] " Denis Kirjanov
  1 sibling, 2 replies; 44+ messages in thread
From: Miroslav Benes @ 2016-01-26 10:50 UTC (permalink / raw)
  To: Torsten Duwe
  Cc: Steven Rostedt, Michael Ellerman, Jiri Kosina, linuxppc-dev,
	Linux Kernel Mailing List, live-patching, pmladek


[ added Petr to CC list ]

On Mon, 25 Jan 2016, Torsten Duwe wrote:

>   * create the appropriate files+functions
>     arch/powerpc/include/asm/livepatch.h
>         klp_check_compiler_support,
>         klp_arch_set_pc
>     arch/powerpc/kernel/livepatch.c with a stub for
>         klp_write_module_reloc
>     This is architecture-independent work in progress.
>   * introduce a fixup in arch/powerpc/kernel/entry_64.S
>     for local calls that are becoming global due to live patching.
>     And of course do the main KLP thing: return to a maybe different
>     address, possibly altered by the live patching ftrace op.
> 
> Signed-off-by: Torsten Duwe <duwe@suse.de>

Hi,

I have a few questions...

We still need Petr's patch from [1] to make livepatch work, right? Could 
you, please, add it to this patch set to make it self-sufficient?

Second, what is the situation with mcount prologue between gcc < 6 and 
gcc-6? Are there only 12 bytes in gcc-6 prologue? If yes, we need to 
change Petr's patch to make it more general and to be able to cope with 
different prologues. This is unfortunate. Either way, please mention it 
somewhere in a changelog.

I haven't reviewed the patch properly yet, but there is a comment below.

[1] http://lkml.kernel.org/g/20151203160004.GE8047@pathway.suse.cz

> +/**
> + * klp_write_module_reloc() - write a relocation in a module
> + * @mod:       module in which the section to be modified is found
> + * @type:      ELF relocation type (see asm/elf.h)
> + * @loc:       address that the relocation should be written to
> + * @value:     relocation value (sym address + addend)
> + *
> + * This function writes a relocation to the specified location for
> + * a particular module.
> + */
> +int klp_write_module_reloc(struct module *mod, unsigned long type,
> +			    unsigned long loc, unsigned long value)
> +{
> +	/* This requires infrastructure changes; we need the loadinfos. */
> +	pr_err("lpc_write_module_reloc not yet supported\n");

This is a nit, but there is no lpc_write_module_reloc. It should be 
klp_write_module_reloc.

Thanks,
Miroslav

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v6 8/9] Implement kernel live patching for ppc64le (ABIv2)
  2016-01-26 10:50   ` Miroslav Benes
@ 2016-01-26 12:48     ` Petr Mladek
  2016-01-26 13:56       ` Torsten Duwe
  2016-02-02 12:12       ` Petr Mladek
  2016-01-26 14:00     ` Torsten Duwe
  1 sibling, 2 replies; 44+ messages in thread
From: Petr Mladek @ 2016-01-26 12:48 UTC (permalink / raw)
  To: Miroslav Benes
  Cc: Torsten Duwe, Steven Rostedt, Michael Ellerman, Jiri Kosina,
	linuxppc-dev, Linux Kernel Mailing List, live-patching

On Tue 2016-01-26 11:50:25, Miroslav Benes wrote:
> 
> [ added Petr to CC list ]
> 
> On Mon, 25 Jan 2016, Torsten Duwe wrote:
> 
> >   * create the appropriate files+functions
> >     arch/powerpc/include/asm/livepatch.h
> >         klp_check_compiler_support,
> >         klp_arch_set_pc
> >     arch/powerpc/kernel/livepatch.c with a stub for
> >         klp_write_module_reloc
> >     This is architecture-independent work in progress.
> >   * introduce a fixup in arch/powerpc/kernel/entry_64.S
> >     for local calls that are becoming global due to live patching.
> >     And of course do the main KLP thing: return to a maybe different
> >     address, possibly altered by the live patching ftrace op.
> > 
> > Signed-off-by: Torsten Duwe <duwe@suse.de>
> 
> Hi,
> 
> I have a few questions...
> 
> We still need Petr's patch from [1] to make livepatch work, right? Could 
> you, please, add it to this patch set to make it self-sufficient?
> 
> Second, what is the situation with mcount prologue between gcc < 6 and 
> gcc-6? Are there only 12 bytes in gcc-6 prologue? If yes, we need to 
> change Petr's patch to make it more general and to be able to cope with 
> different prologues. This is unfortunate. Either way, please mention it 
> somewhere in a changelog.

I am going to update the extra patch. There is an idea to detect the
offset during build by scrips/recordmcount. This tool looks for the
ftrace locations. The offset should always be a constant that depends
on the used architecture, compiler, and compiler flags.

The tool is called post build. We might need to pass the constant
as a symbol added to the binary. The tool already adds some symbols.


Best Regards,
Petr

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v6 8/9] Implement kernel live patching for ppc64le (ABIv2)
  2016-01-26 12:48     ` Petr Mladek
@ 2016-01-26 13:56       ` Torsten Duwe
  2016-02-02 12:12       ` Petr Mladek
  1 sibling, 0 replies; 44+ messages in thread
From: Torsten Duwe @ 2016-01-26 13:56 UTC (permalink / raw)
  To: Petr Mladek
  Cc: Miroslav Benes, Steven Rostedt, Michael Ellerman, Jiri Kosina,
	linuxppc-dev, Linux Kernel Mailing List, live-patching

On Tue, Jan 26, 2016 at 01:48:53PM +0100, Petr Mladek wrote:
> On Tue 2016-01-26 11:50:25, Miroslav Benes wrote:
> > 
> > We still need Petr's patch from [1] to make livepatch work, right? Could 
> > you, please, add it to this patch set to make it self-sufficient?

It's Petr's patch, I don't want to decide how to best tackle this, see below.
I think Michael is already aware that it is needed, too.

> > Second, what is the situation with mcount prologue between gcc < 6 and 
> > gcc-6? Are there only 12 bytes in gcc-6 prologue? If yes, we need to 

Precisely, it's commit e95d0248daced44 (in http://repo.or.cz/official-gcc.git)
or svn trunk change 222352 "No need for -mprofile-kernel to save LR to stack."
It's efficient, I like it.

> I am going to update the extra patch. There is an idea to detect the
> offset during build by scrips/recordmcount. This tool looks for the
> ftrace locations. The offset should always be a constant that depends
> on the used architecture, compiler, and compiler flags.

My first idea was to check for compiler version defines, but some vendors
are rumoured to patch their compilers ;-)

> The tool is called post build. We might need to pass the constant
> as a symbol added to the binary. The tool already adds some symbols.

That's even better.
Thanks!
	Torsten

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v6 8/9] Implement kernel live patching for ppc64le (ABIv2)
  2016-01-26 10:50   ` Miroslav Benes
  2016-01-26 12:48     ` Petr Mladek
@ 2016-01-26 14:00     ` Torsten Duwe
  2016-01-26 14:14       ` Miroslav Benes
  1 sibling, 1 reply; 44+ messages in thread
From: Torsten Duwe @ 2016-01-26 14:00 UTC (permalink / raw)
  To: Miroslav Benes
  Cc: Steven Rostedt, Michael Ellerman, Jiri Kosina, linuxppc-dev,
	Linux Kernel Mailing List, live-patching, pmladek

On Tue, Jan 26, 2016 at 11:50:25AM +0100, Miroslav Benes wrote:
> > + */
> > +int klp_write_module_reloc(struct module *mod, unsigned long type,
> > +			    unsigned long loc, unsigned long value)
> > +{
> > +	/* This requires infrastructure changes; we need the loadinfos. */
> > +	pr_err("lpc_write_module_reloc not yet supported\n");
> 
> This is a nit, but there is no lpc_write_module_reloc. It should be 
> klp_write_module_reloc.

Indeed. Michael, feel free to fix this on the fly or not. It needs to
disappear anyway and be replaced with functionality.

	Torsten

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v6 8/9] Implement kernel live patching for ppc64le (ABIv2)
  2016-01-26 14:00     ` Torsten Duwe
@ 2016-01-26 14:14       ` Miroslav Benes
  2016-01-27  1:53         ` Jessica Yu
  0 siblings, 1 reply; 44+ messages in thread
From: Miroslav Benes @ 2016-01-26 14:14 UTC (permalink / raw)
  To: Torsten Duwe
  Cc: Steven Rostedt, Michael Ellerman, Jiri Kosina, linuxppc-dev,
	Linux Kernel Mailing List, live-patching, pmladek, jeyu


[ Jessica added to CC list so she is aware that there are plans to 
implement livepatch on ppc64le ]

On Tue, 26 Jan 2016, Torsten Duwe wrote:

> On Tue, Jan 26, 2016 at 11:50:25AM +0100, Miroslav Benes wrote:
> > > + */
> > > +int klp_write_module_reloc(struct module *mod, unsigned long type,
> > > +			    unsigned long loc, unsigned long value)
> > > +{
> > > +	/* This requires infrastructure changes; we need the loadinfos. */
> > > +	pr_err("lpc_write_module_reloc not yet supported\n");
> > 
> > This is a nit, but there is no lpc_write_module_reloc. It should be 
> > klp_write_module_reloc.
> 
> Indeed. Michael, feel free to fix this on the fly or not. It needs to
> disappear anyway and be replaced with functionality.

Or not at all thanks to Jessica's effort [1].

Miroslav

[1] http://lkml.kernel.org/g/1452281304-28618-1-git-send-email-jeyu@redhat.com

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: Implement kernel live patching for ppc64le (ABIv2)
  2016-01-26 14:14       ` Miroslav Benes
@ 2016-01-27  1:53         ` Jessica Yu
  0 siblings, 0 replies; 44+ messages in thread
From: Jessica Yu @ 2016-01-27  1:53 UTC (permalink / raw)
  To: Miroslav Benes
  Cc: Torsten Duwe, Steven Rostedt, Michael Ellerman, Jiri Kosina,
	linuxppc-dev, Linux Kernel Mailing List, live-patching, pmladek

+++ Miroslav Benes [26/01/16 15:14 +0100]:
>
>[ Jessica added to CC list so she is aware that there are plans to
>implement livepatch on ppc64le ]
>
>On Tue, 26 Jan 2016, Torsten Duwe wrote:
>
>> On Tue, Jan 26, 2016 at 11:50:25AM +0100, Miroslav Benes wrote:
>> > > + */
>> > > +int klp_write_module_reloc(struct module *mod, unsigned long type,
>> > > +			    unsigned long loc, unsigned long value)
>> > > +{
>> > > +	/* This requires infrastructure changes; we need the loadinfos. */
>> > > +	pr_err("lpc_write_module_reloc not yet supported\n");
>> >
>> > This is a nit, but there is no lpc_write_module_reloc. It should be
>> > klp_write_module_reloc.
>>
>> Indeed. Michael, feel free to fix this on the fly or not. It needs to
>> disappear anyway and be replaced with functionality.
>
>Or not at all thanks to Jessica's effort [1].
>
>Miroslav
>
>[1] http://lkml.kernel.org/g/1452281304-28618-1-git-send-email-jeyu@redhat.com

Miroslav, thanks for the CC. Indeed, if things go well, there may be
no need to implement klp_write_module_reloc() anymore in the near
future. :-)

Jessica

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v6 1/9] ppc64 (le): prepare for -mprofile-kernel
  2016-01-25 15:26 ` [PATCH v6 1/9] ppc64 (le): prepare for -mprofile-kernel Torsten Duwe
@ 2016-01-27 10:19   ` Michael Ellerman
  2016-01-27 10:44     ` Torsten Duwe
  2016-01-27 12:58     ` Alan Modra
  2016-02-03  7:23   ` AKASHI Takahiro
  1 sibling, 2 replies; 44+ messages in thread
From: Michael Ellerman @ 2016-01-27 10:19 UTC (permalink / raw)
  To: Torsten Duwe, Steven Rostedt, Anton Blanchard, amodra
  Cc: Jiri Kosina, linuxppc-dev, linux-kernel, live-patching

Hi Torsten,

On Mon, 2016-01-25 at 16:26 +0100, Torsten Duwe wrote:
> The gcc switch -mprofile-kernel, available for ppc64 on gcc > 4.8.5,
> allows to call _mcount very early in the function, which low-level
> ASM code and code patching functions need to consider.
> Especially the link register and the parameter registers are still
> alive and not yet saved into a new stack frame.
> 
> Signed-off-by: Torsten Duwe <duwe@suse.de>
> ---
>  arch/powerpc/kernel/entry_64.S  | 45 +++++++++++++++++++++++++++++++++++++++--
>  arch/powerpc/kernel/ftrace.c    | 12 +++++++++--
>  arch/powerpc/kernel/module_64.c | 14 +++++++++++++
>  3 files changed, 67 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
> index a94f155..e7cd043 100644
> --- a/arch/powerpc/kernel/entry_64.S
> +++ b/arch/powerpc/kernel/entry_64.S
> @@ -1206,7 +1206,12 @@ _GLOBAL(enter_prom)
>  #ifdef CONFIG_DYNAMIC_FTRACE
>  _GLOBAL(mcount)
>  _GLOBAL(_mcount)
> -	blr
> +	std	r0,LRSAVE(r1) /* gcc6 does this _after_ this call _only_ */
> +	mflr	r0
> +	mtctr	r0
> +	ld	r0,LRSAVE(r1)
> +	mtlr	r0
> +	bctr

Can we use r11 instead? eg:

_GLOBAL(_mcount)
	mflr	r11
	mtctr	r11
	mtlr	r0
	bctr

Otherwise I worry the std/ld is going to cause a load-hit-store. And it's just
plain more instructions too.

I don't quite grok the gcc code enough to tell if that's always safe, GCC does
use r11 sometimes, but I don't think it ever expects it to survive across
_mcount()?


> @@ -1262,13 +1267,28 @@ _GLOBAL(ftrace_stub)
>  
>  #ifdef CONFIG_FUNCTION_GRAPH_TRACER
>  _GLOBAL(ftrace_graph_caller)
> +#ifdef CC_USING_MPROFILE_KERNEL
> +	/* with -mprofile-kernel, parameter regs are still alive at _mcount */
> +	std	r10, 104(r1)
> +	std	r9, 96(r1)
> +	std	r8, 88(r1)
> +	std	r7, 80(r1)
> +	std	r6, 72(r1)
> +	std	r5, 64(r1)
> +	std	r4, 56(r1)
> +	std	r3, 48(r1)
> +	mfctr	r4		/* ftrace_caller has moved local addr here */
> +	std	r4, 40(r1)
> +	mflr	r3		/* ftrace_caller has restored LR from stack */
> +#else
>  	/* load r4 with local address */
>  	ld	r4, 128(r1)
> -	subi	r4, r4, MCOUNT_INSN_SIZE
>  
>  	/* Grab the LR out of the caller stack frame */
>  	ld	r11, 112(r1)
>  	ld	r3, 16(r11)
> +#endif
> +	subi	r4, r4, MCOUNT_INSN_SIZE
>  
>  	bl	prepare_ftrace_return
>  	nop

AFAICS these end up being the only instructions shared between the two
versions. Which I don't think is worth the semantic burden of all the #ifdefs.
So please just write it as two separate functions, one for
CC_USING_MPROFILE_KERNEL and one for not.

> @@ -1277,6 +1297,26 @@ _GLOBAL(ftrace_graph_caller)
>  	 * prepare_ftrace_return gives us the address we divert to.
>  	 * Change the LR in the callers stack frame to this.
>  	 */
> +
> +#ifdef CC_USING_MPROFILE_KERNEL
> +	mtlr	r3
> +
> +	ld	r0, 40(r1)
> +	mtctr	r0
> +	ld	r10, 104(r1)
> +	ld	r9, 96(r1)
> +	ld	r8, 88(r1)
> +	ld	r7, 80(r1)
> +	ld	r6, 72(r1)
> +	ld	r5, 64(r1)
> +	ld	r4, 56(r1)
> +	ld	r3, 48(r1)
> +
> +	addi	r1, r1, 112
> +	mflr	r0
> +	std	r0, LRSAVE(r1)
> +	bctr
> +#else
>  	ld	r11, 112(r1)
>  	std	r3, 16(r11)
>  
> @@ -1284,6 +1324,7 @@ _GLOBAL(ftrace_graph_caller)
>  	mtlr	r0
>  	addi	r1, r1, 112
>  	blr
> +#endif
>  
>  _GLOBAL(return_to_handler)
>  	/* need to save return values */
> diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c
> index 44d4d8e..080c525 100644
> --- a/arch/powerpc/kernel/ftrace.c
> +++ b/arch/powerpc/kernel/ftrace.c
> @@ -306,11 +306,19 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
>  	 * The load offset is different depending on the ABI. For simplicity
>  	 * just mask it out when doing the compare.
>  	 */
> +#ifndef CC_USING_MPROFILE_KERNEL
>  	if ((op[0] != 0x48000008) || ((op[1] & 0xffff0000) != 0xe8410000)) {
> -		pr_err("Unexpected call sequence: %x %x\n", op[0], op[1]);
> +		pr_err("Unexpected call sequence at %p: %x %x\n",
> +		ip, op[0], op[1]);
>  		return -EINVAL;
>  	}
> -
> +#else
> +	/* look for patched "NOP" on ppc64 with -mprofile-kernel */
> +	if (op[0] != 0x60000000) {

That is "PPC_INST_NOP".

> +		pr_err("Unexpected call at %p: %x\n", ip, op[0]);
> +		return -EINVAL;
> +	}
> +#endif

Can you please break that out into a static inline, with separate versions for
the two cases.

We should aim for no #ifdefs inside functions.

> diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
> index 6838451..30f6be1 100644
> --- a/arch/powerpc/kernel/module_64.c
> +++ b/arch/powerpc/kernel/module_64.c
> @@ -475,6 +475,20 @@ static unsigned long stub_for_addr(Elf64_Shdr *sechdrs,
>  static int restore_r2(u32 *instruction, struct module *me)
>  {
>  	if (*instruction != PPC_INST_NOP) {
> +#ifdef CC_USING_MPROFILE_KERNEL
> +		/* -mprofile_kernel sequence starting with
> +		 * mflr r0 and maybe std r0, LRSAVE(r1)
> +		 */
> +		if ((instruction[-3] == 0x7c0802a6 &&
> +		    instruction[-2] == 0xf8010010) ||
> +		    instruction[-2] == 0x7c0802a6) {
> +			/* Nothing to be done here, it's an _mcount
> +			 * call location and r2 will have to be
> +			 * restored in the _mcount function.
> +			 */
> +			return 1;
> +		};
> +#endif

Again I'd rather that was in a helper static inline.

And some #defines for the instructions would also help.

>  		pr_err("%s: Expect noop after relocate, got %08x\n",
>  		       me->name, *instruction);
>  		return 0;


cheers

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v6 1/9] ppc64 (le): prepare for -mprofile-kernel
  2016-01-27 10:19   ` Michael Ellerman
@ 2016-01-27 10:44     ` Torsten Duwe
  2016-01-28  4:26         ` Michael Ellerman
  2016-01-27 12:58     ` Alan Modra
  1 sibling, 1 reply; 44+ messages in thread
From: Torsten Duwe @ 2016-01-27 10:44 UTC (permalink / raw)
  To: Michael Ellerman
  Cc: Steven Rostedt, Anton Blanchard, amodra, Jiri Kosina,
	linuxppc-dev, linux-kernel, live-patching

On Wed, Jan 27, 2016 at 09:19:27PM +1100, Michael Ellerman wrote:
> Hi Torsten,
> 
> > +++ b/arch/powerpc/kernel/entry_64.S
> > @@ -1206,7 +1206,12 @@ _GLOBAL(enter_prom)
> >  #ifdef CONFIG_DYNAMIC_FTRACE
> >  _GLOBAL(mcount)
> >  _GLOBAL(_mcount)
> > -	blr
> > +	std	r0,LRSAVE(r1) /* gcc6 does this _after_ this call _only_ */
> > +	mflr	r0
> > +	mtctr	r0
> > +	ld	r0,LRSAVE(r1)
> > +	mtlr	r0
> > +	bctr
> 
> Can we use r11 instead? eg:
> 
> _GLOBAL(_mcount)
> 	mflr	r11
> 	mtctr	r11
> 	mtlr	r0
> 	bctr
> 
> Otherwise I worry the std/ld is going to cause a load-hit-store. And it's just
> plain more instructions too.
> 
> I don't quite grok the gcc code enough to tell if that's always safe, GCC does
> use r11 sometimes, but I don't think it ever expects it to survive across
> _mcount()?

I used r11 in that area once, and it crashed, but I don't recall the deatils.
We'll see. The performance shouldn't be critical, as the code is only used
during boot-up. With DYNAMIC_FTRACE, The calls will be replaced by
0x600000^W PPC_INST_NOP :)

> >  
> >  	bl	prepare_ftrace_return
> >  	nop
> 
> AFAICS these end up being the only instructions shared between the two
> versions. Which I don't think is worth the semantic burden of all the #ifdefs.
> So please just write it as two separate functions, one for
> CC_USING_MPROFILE_KERNEL and one for not.
> 
> > index 44d4d8e..080c525 100644
> > --- a/arch/powerpc/kernel/ftrace.c
> > +++ b/arch/powerpc/kernel/ftrace.c
> > @@ -306,11 +306,19 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
> >  	 * The load offset is different depending on the ABI. For simplicity
> >  	 * just mask it out when doing the compare.
> >  	 */
> > +#ifndef CC_USING_MPROFILE_KERNEL
> >  	if ((op[0] != 0x48000008) || ((op[1] & 0xffff0000) != 0xe8410000)) {
> > -		pr_err("Unexpected call sequence: %x %x\n", op[0], op[1]);
> > +		pr_err("Unexpected call sequence at %p: %x %x\n",
> > +		ip, op[0], op[1]);
> >  		return -EINVAL;
> >  	}
> > -
> > +#else
> > +	/* look for patched "NOP" on ppc64 with -mprofile-kernel */
> > +	if (op[0] != 0x60000000) {
> 
> That is "PPC_INST_NOP".
> 
> > +		pr_err("Unexpected call at %p: %x\n", ip, op[0]);
> > +		return -EINVAL;
> > +	}
> > +#endif
> 
> Can you please break that out into a static inline, with separate versions for
> the two cases.
> 
> We should aim for no #ifdefs inside functions.

Points taken.

Does this set _work_ for you now? That'd be great to hear.

Stay tuned for v7...

	Torsten

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v6 0/9] ftrace with regs + live patching for ppc64 LE (ABI v2)
  2016-01-25 15:38 [PATCH v6 0/9] ftrace with regs + live patching for ppc64 LE (ABI v2) Torsten Duwe
                   ` (8 preceding siblings ...)
  2016-01-25 15:33 ` [PATCH v6 9/9] Enable LIVEPATCH to be configured on ppc64le and add livepatch.o if it is selected Torsten Duwe
@ 2016-01-27 10:51 ` Balbir Singh
  2016-01-27 12:19   ` Torsten Duwe
  9 siblings, 1 reply; 44+ messages in thread
From: Balbir Singh @ 2016-01-27 10:51 UTC (permalink / raw)
  To: Torsten Duwe
  Cc: Steven Rostedt, Michael Ellerman, Jiri Kosina, linuxppc-dev,
	linux-kernel, live-patching

On Mon, 25 Jan 2016 16:38:48 +0100
Torsten Duwe <duwe@lst.de> wrote:

> Changes since v5:
>   * extra "std r0,LRSAVE(r1)" for gcc-6
>     This makes the code compiler-agnostic.
>   * Follow Petr Mladek's suggestion to avoid
>     redefinition of HAVE_LIVEPATCH

I looked at the patches - well mostly patches 1 and 2, some quick questions

1. I know -mprofile-kernel is a big optimization win, do we need it or can
we incrementally add it?
2. Some of the hardcoded checks for opcode are hard to review, I know they've
been there in similar forms for a while. May be as an iterative step we should
give the numbers some meaning and use proper helpers for it.

I am going to give the patches a spin

Balbir Singh.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v6 0/9] ftrace with regs + live patching for ppc64 LE (ABI v2)
  2016-01-27 10:51 ` [PATCH v6 0/9] ftrace with regs + live patching for ppc64 LE (ABI v2) Balbir Singh
@ 2016-01-27 12:19   ` Torsten Duwe
  2016-01-28  2:41     ` Balbir Singh
  0 siblings, 1 reply; 44+ messages in thread
From: Torsten Duwe @ 2016-01-27 12:19 UTC (permalink / raw)
  To: Balbir Singh
  Cc: Steven Rostedt, Michael Ellerman, Jiri Kosina, linuxppc-dev,
	linux-kernel, live-patching

On Wed, Jan 27, 2016 at 09:51:12PM +1100, Balbir Singh wrote:
> On Mon, 25 Jan 2016 16:38:48 +0100
> Torsten Duwe <duwe@lst.de> wrote:
> 
> > Changes since v5:
> >   * extra "std r0,LRSAVE(r1)" for gcc-6
> >     This makes the code compiler-agnostic.
> >   * Follow Petr Mladek's suggestion to avoid
> >     redefinition of HAVE_LIVEPATCH
> 
> I looked at the patches - well mostly patches 1 and 2, some quick questions
> 
> 1. I know -mprofile-kernel is a big optimization win, do we need it or can
> we incrementally add it?

There's a reason why these are first ;-)
The following ones assume -mprofile-kernel is used.
The disadvantage is all relevant registers need to be saved before calling
further C code in between functions. On the Pro side, no stack frame has been
created at that point. These are assumptions made all over the ftrace-with-regs
and live patching code here.

> 2. Some of the hardcoded checks for opcode are hard to review, I know they've
> been there in similar forms for a while. May be as an iterative step we should
> give the numbers some meaning and use proper helpers for it.

Yes, Michael has already criticised that. No further literal hex constants, I promise.

> I am going to give the patches a spin

Thanks! Make sure you use a compiler that can disable -mprofile-kernel with "notrace".

	Torsten

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v6 1/9] ppc64 (le): prepare for -mprofile-kernel
  2016-01-27 10:19   ` Michael Ellerman
  2016-01-27 10:44     ` Torsten Duwe
@ 2016-01-27 12:58     ` Alan Modra
  2016-01-27 13:45       ` Torsten Duwe
  2016-01-28  3:39         ` Michael Ellerman
  1 sibling, 2 replies; 44+ messages in thread
From: Alan Modra @ 2016-01-27 12:58 UTC (permalink / raw)
  To: Michael Ellerman
  Cc: Torsten Duwe, Steven Rostedt, Anton Blanchard, Jiri Kosina,
	linuxppc-dev, linux-kernel, live-patching

On Wed, Jan 27, 2016 at 09:19:27PM +1100, Michael Ellerman wrote:
> Hi Torsten,
> 
> On Mon, 2016-01-25 at 16:26 +0100, Torsten Duwe wrote:
> > The gcc switch -mprofile-kernel, available for ppc64 on gcc > 4.8.5,
> > allows to call _mcount very early in the function, which low-level
> > ASM code and code patching functions need to consider.
> > Especially the link register and the parameter registers are still
> > alive and not yet saved into a new stack frame.
> > 
> > Signed-off-by: Torsten Duwe <duwe@suse.de>
> > ---
> >  arch/powerpc/kernel/entry_64.S  | 45 +++++++++++++++++++++++++++++++++++++++--
> >  arch/powerpc/kernel/ftrace.c    | 12 +++++++++--
> >  arch/powerpc/kernel/module_64.c | 14 +++++++++++++
> >  3 files changed, 67 insertions(+), 4 deletions(-)
> > 
> > diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
> > index a94f155..e7cd043 100644
> > --- a/arch/powerpc/kernel/entry_64.S
> > +++ b/arch/powerpc/kernel/entry_64.S
> > @@ -1206,7 +1206,12 @@ _GLOBAL(enter_prom)
> >  #ifdef CONFIG_DYNAMIC_FTRACE
> >  _GLOBAL(mcount)
> >  _GLOBAL(_mcount)
> > -	blr
> > +	std	r0,LRSAVE(r1) /* gcc6 does this _after_ this call _only_ */
> > +	mflr	r0
> > +	mtctr	r0
> > +	ld	r0,LRSAVE(r1)
> > +	mtlr	r0
> > +	bctr
> 
> Can we use r11 instead? eg:
> 
> _GLOBAL(_mcount)
> 	mflr	r11
> 	mtctr	r11
> 	mtlr	r0
> 	bctr

Depends on what you need to support.  As Torsten says, the code to
call _mcount when -mprofile-kernel is emitted before the prologue of a
function (similar to -m32), but after the ELFv2 global entry point
code.  If you trash r11 here you're killing the static chain pointer,
used by C nested functions or other languages that use a static chain,
eg. Pascal.  r11 has *not* been saved for ELFv2.

r12 might be a better choice for a temp reg.

-- 
Alan Modra
Australia Development Lab, IBM

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v6 1/9] ppc64 (le): prepare for -mprofile-kernel
  2016-01-27 12:58     ` Alan Modra
@ 2016-01-27 13:45       ` Torsten Duwe
  2016-01-28  3:39         ` Michael Ellerman
  1 sibling, 0 replies; 44+ messages in thread
From: Torsten Duwe @ 2016-01-27 13:45 UTC (permalink / raw)
  To: Alan Modra
  Cc: Michael Ellerman, Steven Rostedt, Anton Blanchard, Jiri Kosina,
	linuxppc-dev, linux-kernel, live-patching

On Wed, Jan 27, 2016 at 11:28:09PM +1030, Alan Modra wrote:
> On Wed, Jan 27, 2016 at 09:19:27PM +1100, Michael Ellerman wrote:
> > 
> > Can we use r11 instead? eg:
> > 
> > _GLOBAL(_mcount)
> > 	mflr	r11
> > 	mtctr	r11
> > 	mtlr	r0
> > 	bctr
> 
> Depends on what you need to support.  As Torsten says, the code to
> call _mcount when -mprofile-kernel is emitted before the prologue of a
> function (similar to -m32), but after the ELFv2 global entry point
> code.  If you trash r11 here you're killing the static chain pointer,
> used by C nested functions or other languages that use a static chain,
> eg. Pascal.  r11 has *not* been saved for ELFv2.

Even if nested functions aren't supported in the Linux kernel(?), I think
it was an earlier version of mcount when r11 usage ruined my day.

> r12 might be a better choice for a temp reg.

Good idea. r12 holds the function entry point and is used to calculate the
new TOC value just _before_ the call. It should be available.
I'll try, thanks for the hint.

	Torsten

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v6 0/9] ftrace with regs + live patching for ppc64 LE (ABI v2)
  2016-01-27 12:19   ` Torsten Duwe
@ 2016-01-28  2:41     ` Balbir Singh
  2016-01-28  3:31       ` Michael Ellerman
  0 siblings, 1 reply; 44+ messages in thread
From: Balbir Singh @ 2016-01-28  2:41 UTC (permalink / raw)
  To: Torsten Duwe
  Cc: Steven Rostedt, Michael Ellerman, Jiri Kosina, linuxppc-dev,
	linux-kernel, live-patching

On Wed, 27 Jan 2016 13:19:04 +0100
Torsten Duwe <duwe@lst.de> wrote:

> Thanks! Make sure you use a compiler that can disable -mprofile-kernel with "notrace".

gcc-6? I have gcc-5.2.1

Balbir Singh.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v6 0/9] ftrace with regs + live patching for ppc64 LE (ABI v2)
  2016-01-28  2:41     ` Balbir Singh
@ 2016-01-28  3:31       ` Michael Ellerman
  2016-01-28 11:19         ` Torsten Duwe
  0 siblings, 1 reply; 44+ messages in thread
From: Michael Ellerman @ 2016-01-28  3:31 UTC (permalink / raw)
  To: Balbir Singh, Torsten Duwe
  Cc: Steven Rostedt, Jiri Kosina, linuxppc-dev, linux-kernel, live-patching

On Thu, 2016-01-28 at 13:41 +1100, Balbir Singh wrote:
> On Wed, 27 Jan 2016 13:19:04 +0100
> Torsten Duwe <duwe@lst.de> wrote:
>
> > Thanks! Make sure you use a compiler that can disable -mprofile-kernel with "notrace".
> 
> gcc-6? I have gcc-5.2.1

That should work.

But that's a good point.

We need to have some Makefile logic to only enable -mprofile-kernel when it's
known to work, ie. for versions where the notrace fix is in.

Looking at GCC history it looks like the fix is in 4.9.0 and anything later.

But a version check doesn't work with patched distro/vendor toolchains. So we
probably need some sort of runtime check.

cheers

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v6 1/9] ppc64 (le): prepare for -mprofile-kernel
  2016-01-27 12:58     ` Alan Modra
@ 2016-01-28  3:39         ` Michael Ellerman
  2016-01-28  3:39         ` Michael Ellerman
  1 sibling, 0 replies; 44+ messages in thread
From: Michael Ellerman @ 2016-01-28  3:39 UTC (permalink / raw)
  To: Alan Modra
  Cc: Torsten Duwe, Steven Rostedt, Anton Blanchard, Jiri Kosina,
	linuxppc-dev, linux-kernel, live-patching

On Wed, 2016-01-27 at 23:28 +1030, Alan Modra wrote:
> On Wed, Jan 27, 2016 at 09:19:27PM +1100, Michael Ellerman wrote:
> > Hi Torsten,
> > 
> > On Mon, 2016-01-25 at 16:26 +0100, Torsten Duwe wrote:
> > > The gcc switch -mprofile-kernel, available for ppc64 on gcc > 4.8.5,
> > > allows to call _mcount very early in the function, which low-level
> > > ASM code and code patching functions need to consider.
> > > Especially the link register and the parameter registers are still
> > > alive and not yet saved into a new stack frame.
> > > 
> > > Signed-off-by: Torsten Duwe <duwe@suse.de>
> > > ---
> > >  arch/powerpc/kernel/entry_64.S  | 45 +++++++++++++++++++++++++++++++++++++++--
> > >  arch/powerpc/kernel/ftrace.c    | 12 +++++++++--
> > >  arch/powerpc/kernel/module_64.c | 14 +++++++++++++
> > >  3 files changed, 67 insertions(+), 4 deletions(-)
> > > 
> > > diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
> > > index a94f155..e7cd043 100644
> > > --- a/arch/powerpc/kernel/entry_64.S
> > > +++ b/arch/powerpc/kernel/entry_64.S
> > > @@ -1206,7 +1206,12 @@ _GLOBAL(enter_prom)
> > >  #ifdef CONFIG_DYNAMIC_FTRACE
> > >  _GLOBAL(mcount)
> > >  _GLOBAL(_mcount)
> > > -	blr
> > > +	std	r0,LRSAVE(r1) /* gcc6 does this _after_ this call _only_ */
> > > +	mflr	r0
> > > +	mtctr	r0
> > > +	ld	r0,LRSAVE(r1)
> > > +	mtlr	r0
> > > +	bctr
> > 
> > Can we use r11 instead? eg:
> > 
> > _GLOBAL(_mcount)
> > 	mflr	r11
> > 	mtctr	r11
> > 	mtlr	r0
> > 	bctr
> 
> Depends on what you need to support.  As Torsten says, the code to
> call _mcount when -mprofile-kernel is emitted before the prologue of a
> function (similar to -m32), but after the ELFv2 global entry point
> code.  If you trash r11 here you're killing the static chain pointer,
> used by C nested functions or other languages that use a static chain,
> eg. Pascal.  r11 has *not* been saved for ELFv2.

OK, thanks for clarfiying. Pascal is not a big concern :D. But although I
don't think we use nested functions anywhere, someone somewhere could be, or at
least might one day.

> r12 might be a better choice for a temp reg.

Even better.

cheers

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v6 1/9] ppc64 (le): prepare for -mprofile-kernel
@ 2016-01-28  3:39         ` Michael Ellerman
  0 siblings, 0 replies; 44+ messages in thread
From: Michael Ellerman @ 2016-01-28  3:39 UTC (permalink / raw)
  To: Alan Modra
  Cc: Torsten Duwe, Steven Rostedt, Anton Blanchard, Jiri Kosina,
	linuxppc-dev, linux-kernel, live-patching

On Wed, 2016-01-27 at 23:28 +1030, Alan Modra wrote:
> On Wed, Jan 27, 2016 at 09:19:27PM +1100, Michael Ellerman wrote:
> > Hi Torsten,
> > 
> > On Mon, 2016-01-25 at 16:26 +0100, Torsten Duwe wrote:
> > > The gcc switch -mprofile-kernel, available for ppc64 on gcc > 4.8.5,
> > > allows to call _mcount very early in the function, which low-level
> > > ASM code and code patching functions need to consider.
> > > Especially the link register and the parameter registers are still
> > > alive and not yet saved into a new stack frame.
> > > 
> > > Signed-off-by: Torsten Duwe <duwe@suse.de>
> > > ---
> > >  arch/powerpc/kernel/entry_64.S  | 45 +++++++++++++++++++++++++++++++++++++++--
> > >  arch/powerpc/kernel/ftrace.c    | 12 +++++++++--
> > >  arch/powerpc/kernel/module_64.c | 14 +++++++++++++
> > >  3 files changed, 67 insertions(+), 4 deletions(-)
> > > 
> > > diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
> > > index a94f155..e7cd043 100644
> > > --- a/arch/powerpc/kernel/entry_64.S
> > > +++ b/arch/powerpc/kernel/entry_64.S
> > > @@ -1206,7 +1206,12 @@ _GLOBAL(enter_prom)
> > >  #ifdef CONFIG_DYNAMIC_FTRACE
> > >  _GLOBAL(mcount)
> > >  _GLOBAL(_mcount)
> > > -	blr
> > > +	std	r0,LRSAVE(r1) /* gcc6 does this _after_ this call _only_ */
> > > +	mflr	r0
> > > +	mtctr	r0
> > > +	ld	r0,LRSAVE(r1)
> > > +	mtlr	r0
> > > +	bctr
> > 
> > Can we use r11 instead? eg:
> > 
> > _GLOBAL(_mcount)
> > 	mflr	r11
> > 	mtctr	r11
> > 	mtlr	r0
> > 	bctr
> 
> Depends on what you need to support.  As Torsten says, the code to
> call _mcount when -mprofile-kernel is emitted before the prologue of a
> function (similar to -m32), but after the ELFv2 global entry point
> code.  If you trash r11 here you're killing the static chain pointer,
> used by C nested functions or other languages that use a static chain,
> eg. Pascal.  r11 has *not* been saved for ELFv2.

OK, thanks for clarfiying. Pascal is not a big concern :D. But although I
don't think we use nested functions anywhere, someone somewhere could be, or at
least might one day.

> r12 might be a better choice for a temp reg.

Even better.

cheers

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v6 1/9] ppc64 (le): prepare for -mprofile-kernel
  2016-01-27 10:44     ` Torsten Duwe
@ 2016-01-28  4:26         ` Michael Ellerman
  0 siblings, 0 replies; 44+ messages in thread
From: Michael Ellerman @ 2016-01-28  4:26 UTC (permalink / raw)
  To: Torsten Duwe
  Cc: Steven Rostedt, Anton Blanchard, amodra, Jiri Kosina,
	linuxppc-dev, linux-kernel, live-patching

On Wed, 2016-01-27 at 11:44 +0100, Torsten Duwe wrote:
> On Wed, Jan 27, 2016 at 09:19:27PM +1100, Michael Ellerman wrote:
> > Hi Torsten,
> >
> > > +++ b/arch/powerpc/kernel/entry_64.S
> > > @@ -1206,7 +1206,12 @@ _GLOBAL(enter_prom)
> > >  #ifdef CONFIG_DYNAMIC_FTRACE
> > >  _GLOBAL(mcount)
> > >  _GLOBAL(_mcount)
> > > -	blr
> > > +	std	r0,LRSAVE(r1) /* gcc6 does this _after_ this call _only_ */
> > > +	mflr	r0
> > > +	mtctr	r0
> > > +	ld	r0,LRSAVE(r1)
> > > +	mtlr	r0
> > > +	bctr
> >
> > Can we use r11 instead? eg:
> >
> > _GLOBAL(_mcount)
> > 	mflr	r11
> > 	mtctr	r11
> > 	mtlr	r0
> > 	bctr
> >
> > Otherwise I worry the std/ld is going to cause a load-hit-store. And it's just
> > plain more instructions too.
> >
> > I don't quite grok the gcc code enough to tell if that's always safe, GCC does
> > use r11 sometimes, but I don't think it ever expects it to survive across
> > _mcount()?
>
> I used r11 in that area once, and it crashed, but I don't recall the deatils.
> We'll see. The performance shouldn't be critical, as the code is only used
> during boot-up. With DYNAMIC_FTRACE, The calls will be replaced by
> 0x600000^W PPC_INST_NOP :)

True.

That raises an interesting question, how does it work *without* DYNAMIC_FTRACE?

It looks like you haven't updated that version of _mcount at all? Or maybe I'm
missing an #ifdef somewhere?

_GLOBAL_TOC(_mcount)
	/* Taken from output of objdump from lib64/glibc */
	mflr	r3
	ld	r11, 0(r1)
	stdu	r1, -112(r1)
	std	r3, 128(r1)
	ld	r4, 16(r11)

	subi	r3, r3, MCOUNT_INSN_SIZE
	LOAD_REG_ADDR(r5,ftrace_trace_function)
	ld	r5,0(r5)
	ld	r5,0(r5)
	mtctr	r5
	bctrl
	nop

It doesn't look like that will work right with the -mprofile-kernel ABI. And
indeed it doesn't boot.

So we'll need to work that out. I guess the minimum would be to disable
-mprofile-kernel if DYNAMIC_FTRACE is disabled.

Frankly I think we'd be happy to *only* support DYNAMIC_FTRACE, but the generic
code doesn't let us do that at the moment.

> > > index 44d4d8e..080c525 100644
> > > --- a/arch/powerpc/kernel/ftrace.c
> > > +++ b/arch/powerpc/kernel/ftrace.c
> > > @@ -306,11 +306,19 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
> > >  	 * The load offset is different depending on the ABI. For simplicity
> > >  	 * just mask it out when doing the compare.
> > >  	 */
> > > +#ifndef CC_USING_MPROFILE_KERNEL
> > >  	if ((op[0] != 0x48000008) || ((op[1] & 0xffff0000) != 0xe8410000)) {
> > > -		pr_err("Unexpected call sequence: %x %x\n", op[0], op[1]);
> > > +		pr_err("Unexpected call sequence at %p: %x %x\n",
> > > +		ip, op[0], op[1]);
> > >  		return -EINVAL;
> > >  	}
> > > -
> > > +#else
> > > +	/* look for patched "NOP" on ppc64 with -mprofile-kernel */
> > > +	if (op[0] != 0x60000000) {
> >
> > That is "PPC_INST_NOP".
> >

> > > +		pr_err("Unexpected call at %p: %x\n", ip, op[0]);
> > > +		return -EINVAL;
> > > +	}
> > > +#endif
> >
> > Can you please break that out into a static inline, with separate versions for
> > the two cases.
> >
> > We should aim for no #ifdefs inside functions.
>
> Points taken.

Thanks.

> Does this set _work_ for you now? That'd be great to hear.

Sort of, see previous comments.

But it's better than the previous version which didn't boot :)

Also ftracetest fails at step 8:

  ...
  [8] ftrace - function graph filters with stack tracer
  Unable to handle kernel paging request for data at address 0xd0000000033d7f70
  Faulting instruction address: 0xc0000000001b16ec
  Oops: Kernel access of bad area, sig: 11 [#1]
  SMP NR_CPUS=2048 NUMA pSeries
  Modules linked in: virtio_balloon fuse autofs4 virtio_net virtio_pci virtio_ring virtio
  CPU: 15 PID: 0 Comm: swapper/15 Not tainted 4.5.0-rc1-00009-g325e167adf2b #4
  task: c0000001fefe0400 ti: c0000001fff74000 task.ti: c0000001fb0e0000
  NIP: c0000000001b16ec LR: c000000000048abc CTR: d0000000032d0424
  REGS: c0000001fff77aa0 TRAP: 0300   Not tainted  (4.5.0-rc1-00009-g325e167adf2b)
  MSR: 8000000000009033 <SF,EE,ME,IR,DR,RI,LE>  CR: 28002422  XER: 20000000
  CFAR: c0000000004ebbf0 DAR: d0000000033d7f70 DSISR: 40000000 SOFTE: 0
  GPR00: c000000000009f84 c0000001fff77d20 d0000000032d9fb0 c000000000118d70
  GPR04: d0000000032d0420 0000000000000000 0000000000000000 00000001ff170000
  GPR08: 0000000000000000 d0000000033d9fb0 c0000001f36f2c00 d0000000032d1898
  GPR12: c000000000118d70 c00000000fe03c00 c0000001fb0e0000 c000000000d6d3c8
  GPR16: c000000000d59c28 c0000001fb0e0000 0000000000000001 0000000000000008
  GPR20: c0000001fb0e0080 0000000000000001 0000000000000002 0000000000000019
  GPR24: c0000001f36f2c00 0000000000000000 0000000000000000 0000000000000000
  GPR28: 0000000000000000 d0000000032d0420 c000000000118d70 c0000001f3570680
  NIP [c0000000001b16ec] ftrace_graph_is_dead+0xc/0x20
  LR [c000000000048abc] prepare_ftrace_return+0x2c/0x150
  Call Trace:
  [c0000001fff77d20] [0000000000000002] 0x2 (unreliable)
  [c0000001fff77d70] [c000000000009f84] ftrace_graph_caller+0x34/0x74
  [c0000001fff77de0] [c000000000118d70] handle_irq_event_percpu+0x90/0x2b0
  [c0000001fff77ea0] [c000000000118ffc] handle_irq_event+0x6c/0xd0
  [c0000001fff77ed0] [c00000000011e280] handle_fasteoi_irq+0xf0/0x2a0
  [c0000001fff77f00] [c000000000117f40] generic_handle_irq+0x50/0x80
  [c0000001fff77f20] [c000000000011228] __do_irq+0x98/0x1d0
  [c0000001fff77f90] [c000000000024074] call_do_irq+0x14/0x24
  [c0000001fb0e3a20] [c0000000000113f8] do_IRQ+0x98/0x140
  [c0000001fb0e3a60] [c0000000000025d0] hardware_interrupt_common+0x150/0x180
  --- interrupt: 501 at plpar_hcall_norets+0x1c/0x28
      LR = check_and_cede_processor+0x38/0x50
  [c0000001fb0e3d50] [c0000000008008c4] check_and_cede_processor+0x24/0x50 (unreliable)
  [c0000001fb0e3db0] [c000000000800aec] shared_cede_loop+0x6c/0x180
  [c0000001fb0e3df0] [c0000000007fdca4] cpuidle_enter_state+0x174/0x400
  [c0000001fb0e3e50] [c000000000104550] call_cpuidle+0x50/0xa0
  [c0000001fb0e3e70] [c000000000104b58] cpu_startup_entry+0x338/0x440
  [c0000001fb0e3f30] [c00000000004090c] start_secondary+0x35c/0x3a0
  [c0000001fb0e3f90] [c000000000008b6c] start_secondary_prolog+0x10/0x14
  Instruction dump:
  60000000 4bfe6c79 60000000 e8610020 38210030 e8010010 7c0803a6 4bffff30
  60420000 3c4c00ca 3842ff20 3d220010 <8869dfc0> 4e800020 60000000 60000000
  ---[ end trace 129c2895cb584df3 ]---

  Kernel panic - not syncing: Fatal exception in interrupt


That doesn't happen without your series applied, though that doesn't 100% mean
it's your bug. I haven't had time to dig any deeper.

> Stay tuned for v7...

Thanks.

cheers

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v6 1/9] ppc64 (le): prepare for -mprofile-kernel
@ 2016-01-28  4:26         ` Michael Ellerman
  0 siblings, 0 replies; 44+ messages in thread
From: Michael Ellerman @ 2016-01-28  4:26 UTC (permalink / raw)
  To: Torsten Duwe
  Cc: Steven Rostedt, Anton Blanchard, amodra, Jiri Kosina,
	linuxppc-dev, linux-kernel, live-patching

On Wed, 2016-01-27 at 11:44 +0100, Torsten Duwe wrote:
> On Wed, Jan 27, 2016 at 09:19:27PM +1100, Michael Ellerman wrote:
> > Hi Torsten,
> >
> > > +++ b/arch/powerpc/kernel/entry_64.S
> > > @@ -1206,7 +1206,12 @@ _GLOBAL(enter_prom)
> > >  #ifdef CONFIG_DYNAMIC_FTRACE
> > >  _GLOBAL(mcount)
> > >  _GLOBAL(_mcount)
> > > -	blr
> > > +	std	r0,LRSAVE(r1) /* gcc6 does this _after_ this call _only_ */
> > > +	mflr	r0
> > > +	mtctr	r0
> > > +	ld	r0,LRSAVE(r1)
> > > +	mtlr	r0
> > > +	bctr
> >
> > Can we use r11 instead? eg:
> >
> > _GLOBAL(_mcount)
> > 	mflr	r11
> > 	mtctr	r11
> > 	mtlr	r0
> > 	bctr
> >
> > Otherwise I worry the std/ld is going to cause a load-hit-store. And it's just
> > plain more instructions too.
> >
> > I don't quite grok the gcc code enough to tell if that's always safe, GCC does
> > use r11 sometimes, but I don't think it ever expects it to survive across
> > _mcount()?
>
> I used r11 in that area once, and it crashed, but I don't recall the deatils.
> We'll see. The performance shouldn't be critical, as the code is only used
> during boot-up. With DYNAMIC_FTRACE, The calls will be replaced by
> 0x600000^W PPC_INST_NOP :)

True.

That raises an interesting question, how does it work *without* DYNAMIC_FTRACE?

It looks like you haven't updated that version of _mcount at all? Or maybe I'm
missing an #ifdef somewhere?

_GLOBAL_TOC(_mcount)
	/* Taken from output of objdump from lib64/glibc */
	mflr	r3
	ld	r11, 0(r1)
	stdu	r1, -112(r1)
	std	r3, 128(r1)
	ld	r4, 16(r11)

	subi	r3, r3, MCOUNT_INSN_SIZE
	LOAD_REG_ADDR(r5,ftrace_trace_function)
	ld	r5,0(r5)
	ld	r5,0(r5)
	mtctr	r5
	bctrl
	nop

It doesn't look like that will work right with the -mprofile-kernel ABI. And
indeed it doesn't boot.

So we'll need to work that out. I guess the minimum would be to disable
-mprofile-kernel if DYNAMIC_FTRACE is disabled.

Frankly I think we'd be happy to *only* support DYNAMIC_FTRACE, but the generic
code doesn't let us do that at the moment.

> > > index 44d4d8e..080c525 100644
> > > --- a/arch/powerpc/kernel/ftrace.c
> > > +++ b/arch/powerpc/kernel/ftrace.c
> > > @@ -306,11 +306,19 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
> > >  	 * The load offset is different depending on the ABI. For simplicity
> > >  	 * just mask it out when doing the compare.
> > >  	 */
> > > +#ifndef CC_USING_MPROFILE_KERNEL
> > >  	if ((op[0] != 0x48000008) || ((op[1] & 0xffff0000) != 0xe8410000)) {
> > > -		pr_err("Unexpected call sequence: %x %x\n", op[0], op[1]);
> > > +		pr_err("Unexpected call sequence at %p: %x %x\n",
> > > +		ip, op[0], op[1]);
> > >  		return -EINVAL;
> > >  	}
> > > -
> > > +#else
> > > +	/* look for patched "NOP" on ppc64 with -mprofile-kernel */
> > > +	if (op[0] != 0x60000000) {
> >
> > That is "PPC_INST_NOP".
> >

> > > +		pr_err("Unexpected call at %p: %x\n", ip, op[0]);
> > > +		return -EINVAL;
> > > +	}
> > > +#endif
> >
> > Can you please break that out into a static inline, with separate versions for
> > the two cases.
> >
> > We should aim for no #ifdefs inside functions.
>
> Points taken.

Thanks.

> Does this set _work_ for you now? That'd be great to hear.

Sort of, see previous comments.

But it's better than the previous version which didn't boot :)

Also ftracetest fails at step 8:

  ...
  [8] ftrace - function graph filters with stack tracer
  Unable to handle kernel paging request for data at address 0xd0000000033d7f70
  Faulting instruction address: 0xc0000000001b16ec
  Oops: Kernel access of bad area, sig: 11 [#1]
  SMP NR_CPUS=2048 NUMA pSeries
  Modules linked in: virtio_balloon fuse autofs4 virtio_net virtio_pci virtio_ring virtio
  CPU: 15 PID: 0 Comm: swapper/15 Not tainted 4.5.0-rc1-00009-g325e167adf2b #4
  task: c0000001fefe0400 ti: c0000001fff74000 task.ti: c0000001fb0e0000
  NIP: c0000000001b16ec LR: c000000000048abc CTR: d0000000032d0424
  REGS: c0000001fff77aa0 TRAP: 0300   Not tainted  (4.5.0-rc1-00009-g325e167adf2b)
  MSR: 8000000000009033 <SF,EE,ME,IR,DR,RI,LE>  CR: 28002422  XER: 20000000
  CFAR: c0000000004ebbf0 DAR: d0000000033d7f70 DSISR: 40000000 SOFTE: 0
  GPR00: c000000000009f84 c0000001fff77d20 d0000000032d9fb0 c000000000118d70
  GPR04: d0000000032d0420 0000000000000000 0000000000000000 00000001ff170000
  GPR08: 0000000000000000 d0000000033d9fb0 c0000001f36f2c00 d0000000032d1898
  GPR12: c000000000118d70 c00000000fe03c00 c0000001fb0e0000 c000000000d6d3c8
  GPR16: c000000000d59c28 c0000001fb0e0000 0000000000000001 0000000000000008
  GPR20: c0000001fb0e0080 0000000000000001 0000000000000002 0000000000000019
  GPR24: c0000001f36f2c00 0000000000000000 0000000000000000 0000000000000000
  GPR28: 0000000000000000 d0000000032d0420 c000000000118d70 c0000001f3570680
  NIP [c0000000001b16ec] ftrace_graph_is_dead+0xc/0x20
  LR [c000000000048abc] prepare_ftrace_return+0x2c/0x150
  Call Trace:
  [c0000001fff77d20] [0000000000000002] 0x2 (unreliable)
  [c0000001fff77d70] [c000000000009f84] ftrace_graph_caller+0x34/0x74
  [c0000001fff77de0] [c000000000118d70] handle_irq_event_percpu+0x90/0x2b0
  [c0000001fff77ea0] [c000000000118ffc] handle_irq_event+0x6c/0xd0
  [c0000001fff77ed0] [c00000000011e280] handle_fasteoi_irq+0xf0/0x2a0
  [c0000001fff77f00] [c000000000117f40] generic_handle_irq+0x50/0x80
  [c0000001fff77f20] [c000000000011228] __do_irq+0x98/0x1d0
  [c0000001fff77f90] [c000000000024074] call_do_irq+0x14/0x24
  [c0000001fb0e3a20] [c0000000000113f8] do_IRQ+0x98/0x140
  [c0000001fb0e3a60] [c0000000000025d0] hardware_interrupt_common+0x150/0x180
  --- interrupt: 501 at plpar_hcall_norets+0x1c/0x28
      LR = check_and_cede_processor+0x38/0x50
  [c0000001fb0e3d50] [c0000000008008c4] check_and_cede_processor+0x24/0x50 (unreliable)
  [c0000001fb0e3db0] [c000000000800aec] shared_cede_loop+0x6c/0x180
  [c0000001fb0e3df0] [c0000000007fdca4] cpuidle_enter_state+0x174/0x400
  [c0000001fb0e3e50] [c000000000104550] call_cpuidle+0x50/0xa0
  [c0000001fb0e3e70] [c000000000104b58] cpu_startup_entry+0x338/0x440
  [c0000001fb0e3f30] [c00000000004090c] start_secondary+0x35c/0x3a0
  [c0000001fb0e3f90] [c000000000008b6c] start_secondary_prolog+0x10/0x14
  Instruction dump:
  60000000 4bfe6c79 60000000 e8610020 38210030 e8010010 7c0803a6 4bffff30
  60420000 3c4c00ca 3842ff20 3d220010 <8869dfc0> 4e800020 60000000 60000000
  ---[ end trace 129c2895cb584df3 ]---

  Kernel panic - not syncing: Fatal exception in interrupt


That doesn't happen without your series applied, though that doesn't 100% mean
it's your bug. I haven't had time to dig any deeper.

> Stay tuned for v7...

Thanks.

cheers

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v6 0/9] ftrace with regs + live patching for ppc64 LE (ABI v2)
  2016-01-28  3:31       ` Michael Ellerman
@ 2016-01-28 11:19         ` Torsten Duwe
  0 siblings, 0 replies; 44+ messages in thread
From: Torsten Duwe @ 2016-01-28 11:19 UTC (permalink / raw)
  To: Michael Ellerman
  Cc: Balbir Singh, Steven Rostedt, Jiri Kosina, linuxppc-dev,
	linux-kernel, live-patching

On Thu, Jan 28, 2016 at 02:31:58PM +1100, Michael Ellerman wrote:
> 
> Looking at GCC history it looks like the fix is in 4.9.0 and anything later.

Good. But 4.8.5 has a buggy -mprofile-kernel, and there will be no 4.8.6, Bad.

> But a version check doesn't work with patched distro/vendor toolchains. So we
> probably need some sort of runtime check.

Agreed.

/bin/echo -e '#include <linux/compiler.h>\nnotrace int func() { return 0; }' |
 gcc -D__KERNEL__ -Iinclude -p -mprofile-kernel -x c -O2 - -S -o - | grep mcount

should be empty. If it yields "bl _mcount" your compiler is buggy.
I haven't looked at the kernel's "autoconf" yet, but it's probably capable
of testing this.

	Torsten

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v6 1/9] ppc64 (le): prepare for -mprofile-kernel
  2016-01-28  4:26         ` Michael Ellerman
  (?)
@ 2016-01-28 11:50         ` Torsten Duwe
  -1 siblings, 0 replies; 44+ messages in thread
From: Torsten Duwe @ 2016-01-28 11:50 UTC (permalink / raw)
  To: Michael Ellerman
  Cc: Steven Rostedt, Anton Blanchard, amodra, Jiri Kosina,
	linuxppc-dev, linux-kernel, live-patching

On Thu, Jan 28, 2016 at 03:26:59PM +1100, Michael Ellerman wrote:
> 
> That raises an interesting question, how does it work *without* DYNAMIC_FTRACE?
> 
> It looks like you haven't updated that version of _mcount at all? Or maybe I'm
> missing an #ifdef somewhere?

You didn't, I did. I haven't considered that combination.

> It doesn't look like that will work right with the -mprofile-kernel ABI. And
> indeed it doesn't boot.

The lean _mcount should handle it and boot, had I not misplaced it in
the #ifdefs, but then of course profiling wouldn't work.

> So we'll need to work that out. I guess the minimum would be to disable
> -mprofile-kernel if DYNAMIC_FTRACE is disabled.

I feel that supporting all combinations of ABIv1/ABIv2, FTRACE/DYNAMIC_FTRACE,
-p/-mprofile-kernel will get us into #ifdef hell, and at least one kernel
developer will go insane. That will probably be the one porting this
to ppc64be (ABIv1).

> Frankly I think we'd be happy to *only* support DYNAMIC_FTRACE, but the generic
> code doesn't let us do that at the moment.

Seconded.
I'll have a look at the Kconfigs.

> But it's better than the previous version which didn't boot :)

That's your fault, you picked the wrong compiler ;-)

> Also ftracetest fails at step 8:
>   ...
>   [8] ftrace - function graph filters with stack tracer
>   Unable to handle kernel paging request for data at address 0xd0000000033d7f70
[...]
> That doesn't happen without your series applied, though that doesn't 100% mean
> it's your bug. I haven't had time to dig any deeper.

Will check as well...

	Torsten

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v6 8/9] Implement kernel live patching for ppc64le (ABIv2)
  2016-01-26 12:48     ` Petr Mladek
  2016-01-26 13:56       ` Torsten Duwe
@ 2016-02-02 12:12       ` Petr Mladek
  2016-02-02 15:45         ` Torsten Duwe
  1 sibling, 1 reply; 44+ messages in thread
From: Petr Mladek @ 2016-02-02 12:12 UTC (permalink / raw)
  To: Miroslav Benes
  Cc: Torsten Duwe, Steven Rostedt, Michael Ellerman, Jiri Kosina,
	linuxppc-dev, Linux Kernel Mailing List, live-patching

On Tue 2016-01-26 13:48:53, Petr Mladek wrote:
> On Tue 2016-01-26 11:50:25, Miroslav Benes wrote:
> > 
> > [ added Petr to CC list ]
> > 
> > On Mon, 25 Jan 2016, Torsten Duwe wrote:
> > 
> > >   * create the appropriate files+functions
> > >     arch/powerpc/include/asm/livepatch.h
> > >         klp_check_compiler_support,
> > >         klp_arch_set_pc
> > >     arch/powerpc/kernel/livepatch.c with a stub for
> > >         klp_write_module_reloc
> > >     This is architecture-independent work in progress.
> > >   * introduce a fixup in arch/powerpc/kernel/entry_64.S
> > >     for local calls that are becoming global due to live patching.
> > >     And of course do the main KLP thing: return to a maybe different
> > >     address, possibly altered by the live patching ftrace op.
> > > 
> > > Signed-off-by: Torsten Duwe <duwe@suse.de>
> > 
> > Hi,
> > 
> > I have a few questions...
> > 
> > We still need Petr's patch from [1] to make livepatch work, right? Could 
> > you, please, add it to this patch set to make it self-sufficient?
> > 
> > Second, what is the situation with mcount prologue between gcc < 6 and 
> > gcc-6? Are there only 12 bytes in gcc-6 prologue? If yes, we need to 
> > change Petr's patch to make it more general and to be able to cope with 
> > different prologues. This is unfortunate. Either way, please mention it 
> > somewhere in a changelog.
> 
> I am going to update the extra patch. There is an idea to detect the
> offset during build by scrips/recordmcount. This tool looks for the
> ftrace locations. The offset should always be a constant that depends
> on the used architecture, compiler, and compiler flags.
> 
> The tool is called post build. We might need to pass the constant
> as a symbol added to the binary. The tool already adds some symbols.

Hmm, the size of the offset is not a constant. In particular, leaf
functions do not set TOC before the mcount location.

For example, the code generated for int_to_scsilun() looks like:


00000000000002d0 <int_to_scsilun>:
 2d0:   a6 02 08 7c     mflr    r0
 2d4:   10 00 01 f8     std     r0,16(r1)
 2d8:   01 00 00 48     bl      2d8 <int_to_scsilun+0x8>
                        2d8: R_PPC64_REL24      _mcount
 2dc:   a6 02 08 7c     mflr    r0
 2e0:   10 00 01 f8     std     r0,16(r1)
 2e4:   e1 ff 21 f8     stdu    r1,-32(r1)
 2e8:   00 00 20 39     li      r9,0
 2ec:   00 00 24 f9     std     r9,0(r4)
 2f0:   04 00 20 39     li      r9,4
 2f4:   a6 03 29 7d     mtctr   r9
 2f8:   00 00 40 39     li      r10,0
 2fc:   02 c2 68 78     rldicl  r8,r3,56,8
 300:   78 23 89 7c     mr      r9,r4
 304:   ee 51 09 7d     stbux   r8,r9,r10
 308:   02 00 4a 39     addi    r10,r10,2
 30c:   01 00 69 98     stb     r3,1(r9)
 310:   02 84 63 78     rldicl  r3,r3,48,16
 314:   e8 ff 00 42     bdnz    2fc <int_to_scsilun+0x2c>
 318:   20 00 21 38     addi    r1,r1,32
 31c:   10 00 01 e8     ld      r0,16(r1)
 320:   a6 03 08 7c     mtlr    r0
 324:   20 00 80 4e     blr
 328:   00 00 00 60     nop
 32c:   00 00 42 60     ori     r2,r2,0


Note that non-leaf functions starts with

0000000000000330 <scsi_set_sense_information>:
 330:   00 00 4c 3c     addis   r2,r12,0
                        330: R_PPC64_REL16_HA   .TOC.
 334:   00 00 42 38     addi    r2,r2,0
                        334: R_PPC64_REL16_LO   .TOC.+0x4
 338:   a6 02 08 7c     mflr    r0
 33c:   10 00 01 f8     std     r0,16(r1)
 340:   01 00 00 48     bl      340 <scsi_set_sense_information+0x10>
                        340: R_PPC64_REL24      _mcount


The above code is generated from kernel-4.5-rc1 sources using

$> gcc --version
gcc (SUSE Linux) 4.8.5
Copyright (C) 2015 Free Software Foundation, Inc.
This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.


But I get similar code also with

$> gcc-6 --version
gcc-6 (SUSE Linux) 6.0.0 20160121 (experimental) [trunk revision 232670]
Copyright (C) 2016 Free Software Foundation, Inc.
This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.



The result is that kernel crashes when trying to trace leaf function
from modules. The mcount location is replaced with a call (branch)
that does not work without the TOC stuff.

By other words, it seems that the code generated with -mprofile-kernel
option has been buggy in all gcc versions.

I am curious that nobody found this earlier. Do I something wrong,
please?


Best Regards,
Petr

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v6 8/9] Implement kernel live patching for ppc64le (ABIv2)
  2016-01-25 15:33 ` [PATCH v6 8/9] Implement kernel live patching for ppc64le (ABIv2) Torsten Duwe
  2016-01-26 10:50   ` Miroslav Benes
@ 2016-02-02 13:46   ` Denis Kirjanov
  2016-02-10 18:03     ` Torsten Duwe
  1 sibling, 1 reply; 44+ messages in thread
From: Denis Kirjanov @ 2016-02-02 13:46 UTC (permalink / raw)
  To: Torsten Duwe
  Cc: Steven Rostedt, Michael Ellerman, Jiri Kosina, linuxppc-dev,
	linux-kernel, live-patching

On 1/25/16, Torsten Duwe <duwe@lst.de> wrote:
>   * create the appropriate files+functions
>     arch/powerpc/include/asm/livepatch.h
>         klp_check_compiler_support,
>         klp_arch_set_pc
>     arch/powerpc/kernel/livepatch.c with a stub for
>         klp_write_module_reloc
>     This is architecture-independent work in progress.
>   * introduce a fixup in arch/powerpc/kernel/entry_64.S
>     for local calls that are becoming global due to live patching.
>     And of course do the main KLP thing: return to a maybe different
>     address, possibly altered by the live patching ftrace op.
>
> Signed-off-by: Torsten Duwe <duwe@suse.de>
> ---
>  arch/powerpc/include/asm/livepatch.h | 45 +++++++++++++++++++++++++++++++
>  arch/powerpc/kernel/entry_64.S       | 51
> +++++++++++++++++++++++++++++++++---
>  arch/powerpc/kernel/livepatch.c      | 38 +++++++++++++++++++++++++++
>  3 files changed, 130 insertions(+), 4 deletions(-)
>  create mode 100644 arch/powerpc/include/asm/livepatch.h
>  create mode 100644 arch/powerpc/kernel/livepatch.c
>
> diff --git a/arch/powerpc/include/asm/livepatch.h
> b/arch/powerpc/include/asm/livepatch.h
> new file mode 100644
> index 0000000..44e8a2d
> --- /dev/null
> +++ b/arch/powerpc/include/asm/livepatch.h
> @@ -0,0 +1,45 @@
> +/*
> + * livepatch.h - powerpc-specific Kernel Live Patching Core
> + *
> + * Copyright (C) 2015 SUSE
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version 2
> + * of the License, or (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, see <http://www.gnu.org/licenses/>.
> + */
> +#ifndef _ASM_POWERPC64_LIVEPATCH_H
> +#define _ASM_POWERPC64_LIVEPATCH_H
> +
> +#include <linux/module.h>
> +#include <linux/ftrace.h>
> +
> +#ifdef CONFIG_LIVEPATCH
> +static inline int klp_check_compiler_support(void)
> +{
> +#if !defined(_CALL_ELF) || _CALL_ELF != 2 ||
> !defined(CC_USING_MPROFILE_KERNEL)
> +	return 1;
> +#endif
> +	return 0;
> +}
This function can be boolean.
> +
> +extern int klp_write_module_reloc(struct module *mod, unsigned long type,
> +				   unsigned long loc, unsigned long value);
> +
> +static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long ip)
> +{
> +	regs->nip = ip;
> +}
> +#else
> +#error Live patching support is disabled; check CONFIG_LIVEPATCH
> +#endif
> +
> +#endif /* _ASM_POWERPC64_LIVEPATCH_H */
> diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
> index 9e98aa1..f6e3ee7 100644
> --- a/arch/powerpc/kernel/entry_64.S
> +++ b/arch/powerpc/kernel/entry_64.S
> @@ -1267,6 +1267,9 @@ _GLOBAL(ftrace_caller)
>  	mflr    r3
>  	std     r3, _NIP(r1)
>  	std	r3, 16(r1)
> +#ifdef CONFIG_LIVEPATCH
> +	mr	r14,r3		/* remember old NIP */
> +#endif
>  	subi    r3, r3, MCOUNT_INSN_SIZE
>  	mfmsr   r4
>  	std     r4, _MSR(r1)
> @@ -1283,7 +1286,10 @@ ftrace_call:
>  	nop
>
>  	ld	r3, _NIP(r1)
> -	mtlr	r3
> +	mtctr	r3		/* prepare to jump there */
> +#ifdef CONFIG_LIVEPATCH
> +	cmpd	r14,r3		/* has NIP been altered? */
> +#endif
>
>  	REST_8GPRS(0,r1)
>  	REST_8GPRS(8,r1)
> @@ -1296,6 +1302,27 @@ ftrace_call:
>  	mtlr	r12
>  	mr	r2,r0		/* restore callee's TOC */
>
> +#ifdef CONFIG_LIVEPATCH
> +	beq+	4f		/* likely(old_NIP == new_NIP) */
> +
> +	/* For a local call, restore this TOC after calling the patch function.
> +	 * For a global call, it does not matter what we restore here,
> +	 * since the global caller does its own restore right afterwards,
> +	 * anyway. Just insert a KLP_return_helper frame in any case,
> +	 * so a patch function can always count on the changed stack offsets.
> +	 */
> +	stdu	r1,-32(r1)	/* open new mini stack frame */
> +	std	r0,24(r1)	/* save TOC now, unconditionally. */
> +	bl	5f
> +5:	mflr	r12
> +	addi	r12,r12,(KLP_return_helper+4-.)@l
> +	std	r12,LRSAVE(r1)
> +	mtlr	r12
> +	mfctr	r12		/* allow for TOC calculation in newfunc */
> +	bctr
> +4:
> +#endif
> +
>  #ifdef CONFIG_FUNCTION_GRAPH_TRACER
>  	stdu	r1, -112(r1)
>  .globl ftrace_graph_call
> @@ -1305,15 +1332,31 @@ _GLOBAL(ftrace_graph_stub)
>  	addi	r1, r1, 112
>  #endif
>
> -	mflr	r0		/* move this LR to CTR */
> -	mtctr	r0
> -
>  	ld	r0,LRSAVE(r1)	/* restore callee's lr at _mcount site */
>  	mtlr	r0
>  	bctr			/* jump after _mcount site */
>  #endif /* CC_USING_MPROFILE_KERNEL */
>  _GLOBAL(ftrace_stub)
>  	blr
> +
> +#ifdef CONFIG_LIVEPATCH
> +/* Helper function for local calls that are becoming global
> +   due to live patching.
> +   We can't simply patch the NOP after the original call,
> +   because, depending on the consistency model, some kernel
> +   threads may still have called the original, local function
> +   *without* saving their TOC in the respective stack frame slot,
> +   so the decision is made per-thread during function return by
> +   maybe inserting a KLP_return_helper frame or not.
> +*/
> +KLP_return_helper:
> +	ld	r2,24(r1)	/* restore TOC (saved by ftrace_caller) */
> +	addi r1, r1, 32		/* destroy mini stack frame */
> +	ld	r0,LRSAVE(r1)	/* get the real return address */
> +	mtlr	r0
> +	blr
> +#endif
> +
>  #else
>  _GLOBAL_TOC(_mcount)
>  	/* Taken from output of objdump from lib64/glibc */
> diff --git a/arch/powerpc/kernel/livepatch.c
> b/arch/powerpc/kernel/livepatch.c
> new file mode 100644
> index 0000000..564eafa
> --- /dev/null
> +++ b/arch/powerpc/kernel/livepatch.c
> @@ -0,0 +1,38 @@
> +/*
> + * livepatch.c - powerpc-specific Kernel Live Patching Core
> + *
> + * Copyright (C) 2015 SUSE
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version 2
> + * of the License, or (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, see <http://www.gnu.org/licenses/>.
> + */
> +#include <linux/module.h>
> +#include <asm/livepatch.h>
> +
> +/**
> + * klp_write_module_reloc() - write a relocation in a module
> + * @mod:       module in which the section to be modified is found
> + * @type:      ELF relocation type (see asm/elf.h)
> + * @loc:       address that the relocation should be written to
> + * @value:     relocation value (sym address + addend)
> + *
> + * This function writes a relocation to the specified location for
> + * a particular module.
> + */
> +int klp_write_module_reloc(struct module *mod, unsigned long type,
> +			    unsigned long loc, unsigned long value)
> +{
> +	/* This requires infrastructure changes; we need the loadinfos. */
> +	pr_err("lpc_write_module_reloc not yet supported\n");
> +	return -ENOSYS;
> +}
> --
> 1.8.5.6
>
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v6 8/9] Implement kernel live patching for ppc64le (ABIv2)
  2016-02-02 12:12       ` Petr Mladek
@ 2016-02-02 15:45         ` Torsten Duwe
  2016-02-02 16:47           ` Petr Mladek
  0 siblings, 1 reply; 44+ messages in thread
From: Torsten Duwe @ 2016-02-02 15:45 UTC (permalink / raw)
  To: Petr Mladek
  Cc: Miroslav Benes, Steven Rostedt, Michael Ellerman, anton,
	Jiri Kosina, linuxppc-dev, Linux Kernel Mailing List,
	live-patching

On Tue, Feb 02, 2016 at 01:12:24PM +0100, Petr Mladek wrote:
> 
> Hmm, the size of the offset is not a constant. In particular, leaf
> functions do not set TOC before the mcount location.

To be slightly more precise, a leaf function that additionally uses
no global data. No global function calls, no global data access =>
no need to load the TOC.

> For example, the code generated for int_to_scsilun() looks like:
> 
> 
> 00000000000002d0 <int_to_scsilun>:
>  2d0:   a6 02 08 7c     mflr    r0
>  2d4:   10 00 01 f8     std     r0,16(r1)
>  2d8:   01 00 00 48     bl      2d8 <int_to_scsilun+0x8>
>                         2d8: R_PPC64_REL24      _mcount
[...]
> The above code is generated from kernel-4.5-rc1 sources using
> 
> $> gcc --version
> gcc (SUSE Linux) 4.8.5
> 
> But I get similar code also with
> 
> $> gcc-6 --version
> gcc-6 (SUSE Linux) 6.0.0 20160121 (experimental) [trunk revision 232670]
> 
> 
> The result is that kernel crashes when trying to trace leaf function

The trampoline *requires* a proper TOC pointer to find the remote function
entry point. If you jump onto the trampoline with the TOC from the caller's
caller you'll grab some address from somewhere and jump into nirvana.

> By other words, it seems that the code generated with -mprofile-kernel
> option has been buggy in all gcc versions.

Either that or we need bigger trampolines for everybody.

Michael, should we grow every module trampoline to always load R2,
or fix GCC to recognise the generated bl _mcount as a global function call?
Anton, what do you think?

	Torsten

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v6 8/9] Implement kernel live patching for ppc64le (ABIv2)
  2016-02-02 15:45         ` Torsten Duwe
@ 2016-02-02 16:47           ` Petr Mladek
  2016-02-02 20:39             ` Jiri Kosina
  0 siblings, 1 reply; 44+ messages in thread
From: Petr Mladek @ 2016-02-02 16:47 UTC (permalink / raw)
  To: Torsten Duwe
  Cc: Miroslav Benes, Steven Rostedt, Michael Ellerman, anton,
	Jiri Kosina, linuxppc-dev, Linux Kernel Mailing List,
	live-patching

On Tue 2016-02-02 16:45:23, Torsten Duwe wrote:
> On Tue, Feb 02, 2016 at 01:12:24PM +0100, Petr Mladek wrote:
> > 
> > Hmm, the size of the offset is not a constant. In particular, leaf
> > functions do not set TOC before the mcount location.
> 
> To be slightly more precise, a leaf function that additionally uses
> no global data. No global function calls, no global data access =>
> no need to load the TOC.

Thanks for explanation.
 
> > The result is that kernel crashes when trying to trace leaf function
> 
> The trampoline *requires* a proper TOC pointer to find the remote function
> entry point. If you jump onto the trampoline with the TOC from the caller's
> caller you'll grab some address from somewhere and jump into nirvana.

The dmesg messages suggested someting like this.


> > By other words, it seems that the code generated with -mprofile-kernel
> > option has been buggy in all gcc versions.
> 
> Either that or we need bigger trampolines for everybody.
> 
> Michael, should we grow every module trampoline to always load R2,
> or fix GCC to recognise the generated bl _mcount as a global function call?
> Anton, what do you think?

BTW: Is the trampoline used also for classic probes? If not, we might need
a trampoline for them as well.

Note that TOC is not set only when the problematic functions are
compiled with --mprofile-kernel. I still see the TOC stuff when
compiling only with -pg.


Best Regards,
Petr

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v6 8/9] Implement kernel live patching for ppc64le (ABIv2)
  2016-02-02 16:47           ` Petr Mladek
@ 2016-02-02 20:39             ` Jiri Kosina
  0 siblings, 0 replies; 44+ messages in thread
From: Jiri Kosina @ 2016-02-02 20:39 UTC (permalink / raw)
  To: Petr Mladek
  Cc: Torsten Duwe, Miroslav Benes, Steven Rostedt, Michael Ellerman,
	anton, linuxppc-dev, Linux Kernel Mailing List, live-patching

On Tue, 2 Feb 2016, Petr Mladek wrote:

> Note that TOC is not set only when the problematic functions are 
> compiled with --mprofile-kernel. I still see the TOC stuff when 
> compiling only with -pg.

I don't see how this wouldn't be a gcc bug.

No matter whether it's plain profiling call (-pg) or kernel profiling call 
(-mprofile-kernel), gcc must always assume that global function (that will 
typically have just one instance for the whole address space) will be 
called.

-- 
Jiri Kosina
SUSE Labs

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v6 1/9] ppc64 (le): prepare for -mprofile-kernel
  2016-01-25 15:26 ` [PATCH v6 1/9] ppc64 (le): prepare for -mprofile-kernel Torsten Duwe
  2016-01-27 10:19   ` Michael Ellerman
@ 2016-02-03  7:23   ` AKASHI Takahiro
  2016-02-03  8:55     ` Jiri Kosina
  1 sibling, 1 reply; 44+ messages in thread
From: AKASHI Takahiro @ 2016-02-03  7:23 UTC (permalink / raw)
  To: Torsten Duwe, Steven Rostedt, Michael Ellerman
  Cc: Jiri Kosina, linuxppc-dev, linux-kernel, live-patching

Hi,

On 01/26/2016 12:26 AM, Torsten Duwe wrote:
> The gcc switch -mprofile-kernel, available for ppc64 on gcc > 4.8.5,
> allows to call _mcount very early in the function, which low-level
> ASM code and code patching functions need to consider.
> Especially the link register and the parameter registers are still
> alive and not yet saved into a new stack frame.

I'm thinking of implementing live patch support *for arm64*, and as part of
those efforts, we are proposing[1] a new *generic* gcc option, -fprolog-add=N.
This option will insert N nop instructions at the beginning of each function.
So we have to initialize those codes at the boot time to later utilize
them for FTRACE_WITH_REGS. Other than that, it will work similarly
with -mfentry on x86 (and -mprofile-kernel?).

I'm totally unfamiliar with ppc architecture, but just wondering
whether this option will also be useful for other architectures.

I will really appreciate you if you share your thoughts with me, please?

[1]  https://gcc.gnu.org/ml/gcc/2015-05/msg00267.html, and
      https://gcc.gnu.org/ml/gcc/2015-10/msg00090.html

Thanks,
-Takahiro AKASHI

> Signed-off-by: Torsten Duwe <duwe@suse.de>
> ---
>   arch/powerpc/kernel/entry_64.S  | 45 +++++++++++++++++++++++++++++++++++++++--
>   arch/powerpc/kernel/ftrace.c    | 12 +++++++++--
>   arch/powerpc/kernel/module_64.c | 14 +++++++++++++
>   3 files changed, 67 insertions(+), 4 deletions(-)
>
> diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
> index a94f155..e7cd043 100644
> --- a/arch/powerpc/kernel/entry_64.S
> +++ b/arch/powerpc/kernel/entry_64.S
> @@ -1206,7 +1206,12 @@ _GLOBAL(enter_prom)
>   #ifdef CONFIG_DYNAMIC_FTRACE
>   _GLOBAL(mcount)
>   _GLOBAL(_mcount)
> -	blr
> +	std	r0,LRSAVE(r1) /* gcc6 does this _after_ this call _only_ */
> +	mflr	r0
> +	mtctr	r0
> +	ld	r0,LRSAVE(r1)
> +	mtlr	r0
> +	bctr
>
>   _GLOBAL_TOC(ftrace_caller)
>   	/* Taken from output of objdump from lib64/glibc */
> @@ -1262,13 +1267,28 @@ _GLOBAL(ftrace_stub)
>
>   #ifdef CONFIG_FUNCTION_GRAPH_TRACER
>   _GLOBAL(ftrace_graph_caller)
> +#ifdef CC_USING_MPROFILE_KERNEL
> +	/* with -mprofile-kernel, parameter regs are still alive at _mcount */
> +	std	r10, 104(r1)
> +	std	r9, 96(r1)
> +	std	r8, 88(r1)
> +	std	r7, 80(r1)
> +	std	r6, 72(r1)
> +	std	r5, 64(r1)
> +	std	r4, 56(r1)
> +	std	r3, 48(r1)
> +	mfctr	r4		/* ftrace_caller has moved local addr here */
> +	std	r4, 40(r1)
> +	mflr	r3		/* ftrace_caller has restored LR from stack */
> +#else
>   	/* load r4 with local address */
>   	ld	r4, 128(r1)
> -	subi	r4, r4, MCOUNT_INSN_SIZE
>
>   	/* Grab the LR out of the caller stack frame */
>   	ld	r11, 112(r1)
>   	ld	r3, 16(r11)
> +#endif
> +	subi	r4, r4, MCOUNT_INSN_SIZE
>
>   	bl	prepare_ftrace_return
>   	nop
> @@ -1277,6 +1297,26 @@ _GLOBAL(ftrace_graph_caller)
>   	 * prepare_ftrace_return gives us the address we divert to.
>   	 * Change the LR in the callers stack frame to this.
>   	 */
> +
> +#ifdef CC_USING_MPROFILE_KERNEL
> +	mtlr	r3
> +
> +	ld	r0, 40(r1)
> +	mtctr	r0
> +	ld	r10, 104(r1)
> +	ld	r9, 96(r1)
> +	ld	r8, 88(r1)
> +	ld	r7, 80(r1)
> +	ld	r6, 72(r1)
> +	ld	r5, 64(r1)
> +	ld	r4, 56(r1)
> +	ld	r3, 48(r1)
> +
> +	addi	r1, r1, 112
> +	mflr	r0
> +	std	r0, LRSAVE(r1)
> +	bctr
> +#else
>   	ld	r11, 112(r1)
>   	std	r3, 16(r11)
>
> @@ -1284,6 +1324,7 @@ _GLOBAL(ftrace_graph_caller)
>   	mtlr	r0
>   	addi	r1, r1, 112
>   	blr
> +#endif
>
>   _GLOBAL(return_to_handler)
>   	/* need to save return values */
> diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c
> index 44d4d8e..080c525 100644
> --- a/arch/powerpc/kernel/ftrace.c
> +++ b/arch/powerpc/kernel/ftrace.c
> @@ -306,11 +306,19 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
>   	 * The load offset is different depending on the ABI. For simplicity
>   	 * just mask it out when doing the compare.
>   	 */
> +#ifndef CC_USING_MPROFILE_KERNEL
>   	if ((op[0] != 0x48000008) || ((op[1] & 0xffff0000) != 0xe8410000)) {
> -		pr_err("Unexpected call sequence: %x %x\n", op[0], op[1]);
> +		pr_err("Unexpected call sequence at %p: %x %x\n",
> +		ip, op[0], op[1]);
>   		return -EINVAL;
>   	}
> -
> +#else
> +	/* look for patched "NOP" on ppc64 with -mprofile-kernel */
> +	if (op[0] != 0x60000000) {
> +		pr_err("Unexpected call at %p: %x\n", ip, op[0]);
> +		return -EINVAL;
> +	}
> +#endif
>   	/* If we never set up a trampoline to ftrace_caller, then bail */
>   	if (!rec->arch.mod->arch.tramp) {
>   		pr_err("No ftrace trampoline\n");
> diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
> index 6838451..30f6be1 100644
> --- a/arch/powerpc/kernel/module_64.c
> +++ b/arch/powerpc/kernel/module_64.c
> @@ -475,6 +475,20 @@ static unsigned long stub_for_addr(Elf64_Shdr *sechdrs,
>   static int restore_r2(u32 *instruction, struct module *me)
>   {
>   	if (*instruction != PPC_INST_NOP) {
> +#ifdef CC_USING_MPROFILE_KERNEL
> +		/* -mprofile_kernel sequence starting with
> +		 * mflr r0 and maybe std r0, LRSAVE(r1)
> +		 */
> +		if ((instruction[-3] == 0x7c0802a6 &&
> +		    instruction[-2] == 0xf8010010) ||
> +		    instruction[-2] == 0x7c0802a6) {
> +			/* Nothing to be done here, it's an _mcount
> +			 * call location and r2 will have to be
> +			 * restored in the _mcount function.
> +			 */
> +			return 1;
> +		};
> +#endif
>   		pr_err("%s: Expect noop after relocate, got %08x\n",
>   		       me->name, *instruction);
>   		return 0;
>

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v6 1/9] ppc64 (le): prepare for -mprofile-kernel
  2016-02-03  7:23   ` AKASHI Takahiro
@ 2016-02-03  8:55     ` Jiri Kosina
  2016-02-03 11:24       ` Torsten Duwe
  0 siblings, 1 reply; 44+ messages in thread
From: Jiri Kosina @ 2016-02-03  8:55 UTC (permalink / raw)
  To: AKASHI Takahiro
  Cc: Torsten Duwe, Steven Rostedt, Michael Ellerman, linuxppc-dev,
	linux-kernel, live-patching

On Wed, 3 Feb 2016, AKASHI Takahiro wrote:

> > The gcc switch -mprofile-kernel, available for ppc64 on gcc > 4.8.5,
> > allows to call _mcount very early in the function, which low-level
> > ASM code and code patching functions need to consider.
> > Especially the link register and the parameter registers are still
> > alive and not yet saved into a new stack frame.
> 
> I'm thinking of implementing live patch support *for arm64*, and as part of
> those efforts, we are proposing[1] a new *generic* gcc option, -fprolog-add=N.
> This option will insert N nop instructions at the beginning of each function.
> So we have to initialize those codes at the boot time to later utilize
> them for FTRACE_WITH_REGS. Other than that, it will work similarly
> with -mfentry on x86 (and -mprofile-kernel?).
> 
> I'm totally unfamiliar with ppc architecture, but just wondering
> whether this option will also be useful for other architectures.

The interesting part of the story with ppc64 is that you indeed want to 
create the callsite before the *most* of the prologue, but not really :) 
The part of the prologue where TOC pointer is saved needs to happen before 
the fentry/profiling call.

I don't think this will be an issue on ARM64, but it definitely is 
something that should be taken into account in case the gcc option is 
meant to be really generic.

Thanks,

-- 
Jiri Kosina
SUSE Labs

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v6 1/9] ppc64 (le): prepare for -mprofile-kernel
  2016-02-03  8:55     ` Jiri Kosina
@ 2016-02-03 11:24       ` Torsten Duwe
  2016-02-04  9:31         ` AKASHI Takahiro
  0 siblings, 1 reply; 44+ messages in thread
From: Torsten Duwe @ 2016-02-03 11:24 UTC (permalink / raw)
  To: Jiri Kosina
  Cc: AKASHI Takahiro, Steven Rostedt, Michael Ellerman, linuxppc-dev,
	linux-kernel, live-patching

On Wed, Feb 03, 2016 at 09:55:11AM +0100, Jiri Kosina wrote:
> On Wed, 3 Feb 2016, AKASHI Takahiro wrote:
> > those efforts, we are proposing[1] a new *generic* gcc option, -fprolog-add=N.
> > This option will insert N nop instructions at the beginning of each function.

> The interesting part of the story with ppc64 is that you indeed want to 
> create the callsite before the *most* of the prologue, but not really :) 

I was silently assuming that GCC would do this right on ppc64le; add the NOPs
right after the TOC load. Or after TOC load and LR save? ...

> The part of the prologue where TOC pointer is saved needs to happen before 
> the fentry/profiling call.

Yes, any call, to any profiler/tracer/live patcher is potentially global
and needs the _new_ TOC value.

This proposal, if implemented in a too naive fashion, will worsen the problem
we currently discuss: a few NOPs _never_ cause any global reference. GCC might
be even more inclined to not load a new TOC value. That change would need to be
fairly smart on ppc64le.

	Torsten

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v6 1/9] ppc64 (le): prepare for -mprofile-kernel
  2016-02-03 11:24       ` Torsten Duwe
@ 2016-02-04  9:31         ` AKASHI Takahiro
  2016-02-04 11:02           ` Petr Mladek
  2016-02-04 21:47           ` Jiri Kosina
  0 siblings, 2 replies; 44+ messages in thread
From: AKASHI Takahiro @ 2016-02-04  9:31 UTC (permalink / raw)
  To: Torsten Duwe, Jiri Kosina
  Cc: Steven Rostedt, Michael Ellerman, linuxppc-dev, linux-kernel,
	live-patching

Jiri, Torsten

Thank you for your explanation.

On 02/03/2016 08:24 PM, Torsten Duwe wrote:
> On Wed, Feb 03, 2016 at 09:55:11AM +0100, Jiri Kosina wrote:
>> On Wed, 3 Feb 2016, AKASHI Takahiro wrote:
>>> those efforts, we are proposing[1] a new *generic* gcc option, -fprolog-add=N.
>>> This option will insert N nop instructions at the beginning of each function.
>
>> The interesting part of the story with ppc64 is that you indeed want to
>> create the callsite before the *most* of the prologue, but not really :)
>
> I was silently assuming that GCC would do this right on ppc64le; add the NOPs
> right after the TOC load. Or after TOC load and LR save? ...

On arm/arm64, link register must be saved before any function call. So anyhow
we will have to add something, 3 instructions at the minimum, like:
    save lr
    branch _mcount
    restore lr
    <prologue>
    ...
    <body>
    ...

>> The part of the prologue where TOC pointer is saved needs to happen before
>> the fentry/profiling call.
>
> Yes, any call, to any profiler/tracer/live patcher is potentially global
> and needs the _new_ TOC value.

I don't want to bother you, but for my better understandings, could you show me
an example of asm instructions for a function prologue under -mprofile-kernel, please?

-Takahiro AKASHI

> This proposal, if implemented in a too naive fashion, will worsen the problem
> we currently discuss: a few NOPs _never_ cause any global reference. GCC might
> be even more inclined to not load a new TOC value. That change would need to be
> fairly smart on ppc64le.
>
> 	Torsten
>

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v6 1/9] ppc64 (le): prepare for -mprofile-kernel
  2016-02-04  9:31         ` AKASHI Takahiro
@ 2016-02-04 11:02           ` Petr Mladek
  2016-02-05  4:40             ` Balbir Singh
  2016-02-04 21:47           ` Jiri Kosina
  1 sibling, 1 reply; 44+ messages in thread
From: Petr Mladek @ 2016-02-04 11:02 UTC (permalink / raw)
  To: AKASHI Takahiro
  Cc: Torsten Duwe, Jiri Kosina, Steven Rostedt, Michael Ellerman,
	linuxppc-dev, linux-kernel, live-patching

On Thu 2016-02-04 18:31:40, AKASHI Takahiro wrote:
> Jiri, Torsten
> 
> Thank you for your explanation.
> 
> On 02/03/2016 08:24 PM, Torsten Duwe wrote:
> >On Wed, Feb 03, 2016 at 09:55:11AM +0100, Jiri Kosina wrote:
> >>On Wed, 3 Feb 2016, AKASHI Takahiro wrote:
> >>>those efforts, we are proposing[1] a new *generic* gcc option, -fprolog-add=N.
> >>>This option will insert N nop instructions at the beginning of each function.
> >
> >>The interesting part of the story with ppc64 is that you indeed want to
> >>create the callsite before the *most* of the prologue, but not really :)
> >
> >I was silently assuming that GCC would do this right on ppc64le; add the NOPs
> >right after the TOC load. Or after TOC load and LR save? ...
> 
> On arm/arm64, link register must be saved before any function call. So anyhow
> we will have to add something, 3 instructions at the minimum, like:
>    save lr
>    branch _mcount
>    restore lr
>    <prologue>
>    ...
>    <body>
>    ...

So, it is similar to PPC that has to handle LR as well.


> >>The part of the prologue where TOC pointer is saved needs to happen before
> >>the fentry/profiling call.
> >
> >Yes, any call, to any profiler/tracer/live patcher is potentially global
> >and needs the _new_ TOC value.

The code below is generated for PPC64LE with -mprofile-kernel using:

$> gcc --version
gcc (SUSE Linux) 6.0.0 20160121 (experimental) [trunk revision 232670]
Copyright (C) 2016 Free Software Foundation, Inc.
This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.


0000000000000050 <cmdline_proc_show>:
  50:   00 00 4c 3c     addis   r2,r12,0
                        50: R_PPC64_REL16_HA    .TOC.
  54:   00 00 42 38     addi    r2,r2,0
                        54: R_PPC64_REL16_LO    .TOC.+0x4
  58:   a6 02 08 7c     mflr    r0
  5c:   01 00 00 48     bl      5c <cmdline_proc_show+0xc>
                        5c: R_PPC64_REL24       _mcount
  60:   a6 02 08 7c     mflr    r0
  64:   10 00 01 f8     std     r0,16(r1)
  68:   a1 ff 21 f8     stdu    r1,-96(r1)
  6c:   00 00 22 3d     addis   r9,r2,0
                        6c: R_PPC64_TOC16_HA    .toc
  70:   00 00 82 3c     addis   r4,r2,0
                        70: R_PPC64_TOC16_HA    .rodata.str1.8
  74:   00 00 29 e9     ld      r9,0(r9)
                        74: R_PPC64_TOC16_LO_DS .toc
  78:   00 00 84 38     addi    r4,r4,0
                        78: R_PPC64_TOC16_LO    .rodata.str1.8
  7c:   00 00 a9 e8     ld      r5,0(r9)
  80:   01 00 00 48     bl      80 <cmdline_proc_show+0x30>
                        80: R_PPC64_REL24       seq_printf
  84:   00 00 00 60     nop
  88:   00 00 60 38     li      r3,0
  8c:   60 00 21 38     addi    r1,r1,96
  90:   10 00 01 e8     ld      r0,16(r1)
  94:   a6 03 08 7c     mtlr    r0
  98:   20 00 80 4e     blr


And the same function compiled using:

$> gcc --version
gcc (SUSE Linux) 4.8.5
Copyright (C) 2015 Free Software Foundation, Inc.
This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.


0000000000000050 <cmdline_proc_show>:
  50:   00 00 4c 3c     addis   r2,r12,0
                        50: R_PPC64_REL16_HA    .TOC.
  54:   00 00 42 38     addi    r2,r2,0
                        54: R_PPC64_REL16_LO    .TOC.+0x4
  58:   a6 02 08 7c     mflr    r0
  5c:   10 00 01 f8     std     r0,16(r1)
  60:   01 00 00 48     bl      60 <cmdline_proc_show+0x10>
                        60: R_PPC64_REL24       _mcount
  64:   a6 02 08 7c     mflr    r0
  68:   10 00 01 f8     std     r0,16(r1)
  6c:   a1 ff 21 f8     stdu    r1,-96(r1)
  70:   00 00 42 3d     addis   r10,r2,0
                        70: R_PPC64_TOC16_HA    .toc
  74:   00 00 82 3c     addis   r4,r2,0
                        74: R_PPC64_TOC16_HA    .rodata.str1.8
  78:   00 00 2a e9     ld      r9,0(r10)
                        78: R_PPC64_TOC16_LO_DS .toc
  7c:   00 00 84 38     addi    r4,r4,0
                        7c: R_PPC64_TOC16_LO    .rodata.str1.8
  80:   00 00 a9 e8     ld      r5,0(r9)
  84:   01 00 00 48     bl      84 <cmdline_proc_show+0x34>
                        84: R_PPC64_REL24       seq_printf
  88:   00 00 00 60     nop
  8c:   00 00 60 38     li      r3,0
  90:   60 00 21 38     addi    r1,r1,96
  94:   10 00 01 e8     ld      r0,16(r1)
  98:   a6 03 08 7c     mtlr    r0
  9c:   20 00 80 4e     blr


Please, note that are used either 3 or 4 instructions before the
mcount location depending on the compiler version.

Best Regards,
Petr

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v6 1/9] ppc64 (le): prepare for -mprofile-kernel
  2016-02-04  9:31         ` AKASHI Takahiro
  2016-02-04 11:02           ` Petr Mladek
@ 2016-02-04 21:47           ` Jiri Kosina
  1 sibling, 0 replies; 44+ messages in thread
From: Jiri Kosina @ 2016-02-04 21:47 UTC (permalink / raw)
  To: AKASHI Takahiro
  Cc: Torsten Duwe, Steven Rostedt, Michael Ellerman, linuxppc-dev,
	linux-kernel, live-patching

On Thu, 4 Feb 2016, AKASHI Takahiro wrote:

> On arm/arm64, link register must be saved before any function call. So anyhow
> we will have to add something, 3 instructions at the minimum, like:
>    save lr
>    branch _mcount
>    restore lr
>    <prologue>
>    ...
>    <body>
>    ...

This means that we have at least two architectures that need one 
instruction before the mcount/mfentry call, and the rest of the prologue 
to follow afterwards. On x86, we don't need any "pre-prologue".

Persumably the corresponding opcodes have different sizes. This nicely 
demonstrates my point -- if this one-gcc-option-to-rule-them-all would 
exist, it needs to be generic enough to describe these kinds of 
constraints (who knows what other restrictions will pop up when exploring 
other, more exotic, architectures later).

Thanks,

-- 
Jiri Kosina
SUSE Labs

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v6 1/9] ppc64 (le): prepare for -mprofile-kernel
  2016-02-04 11:02           ` Petr Mladek
@ 2016-02-05  4:40             ` Balbir Singh
  2016-02-05 10:22               ` Petr Mladek
  0 siblings, 1 reply; 44+ messages in thread
From: Balbir Singh @ 2016-02-05  4:40 UTC (permalink / raw)
  To: Petr Mladek
  Cc: AKASHI Takahiro, Jiri Kosina, linux-kernel, Steven Rostedt,
	Torsten Duwe, live-patching, linuxppc-dev

On Thu, Feb 4, 2016 at 10:02 PM, Petr Mladek <pmladek@suse.com> wrote:
> On Thu 2016-02-04 18:31:40, AKASHI Takahiro wrote:
>> Jiri, Torsten
>>
>> Thank you for your explanation.
>>
>> On 02/03/2016 08:24 PM, Torsten Duwe wrote:
>> >On Wed, Feb 03, 2016 at 09:55:11AM +0100, Jiri Kosina wrote:
>> >>On Wed, 3 Feb 2016, AKASHI Takahiro wrote:
>> >>>those efforts, we are proposing[1] a new *generic* gcc option, -fprolog-add=N.
>> >>>This option will insert N nop instructions at the beginning of each function.
>> >
>> >>The interesting part of the story with ppc64 is that you indeed want to
>> >>create the callsite before the *most* of the prologue, but not really :)
>> >
>> >I was silently assuming that GCC would do this right on ppc64le; add the NOPs
>> >right after the TOC load. Or after TOC load and LR save? ...
>>
>> On arm/arm64, link register must be saved before any function call. So anyhow
>> we will have to add something, 3 instructions at the minimum, like:
>>    save lr
>>    branch _mcount
>>    restore lr
>>    <prologue>
>>    ...
>>    <body>
>>    ...
>
> So, it is similar to PPC that has to handle LR as well.
>
>
>> >>The part of the prologue where TOC pointer is saved needs to happen before
>> >>the fentry/profiling call.
>> >
>> >Yes, any call, to any profiler/tracer/live patcher is potentially global
>> >and needs the _new_ TOC value.
>
> The code below is generated for PPC64LE with -mprofile-kernel using:
>
> $> gcc --version
> gcc (SUSE Linux) 6.0.0 20160121 (experimental) [trunk revision 232670]
> Copyright (C) 2016 Free Software Foundation, Inc.
> This is free software; see the source for copying conditions.  There is NO
> warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
>
>
> 0000000000000050 <cmdline_proc_show>:
>   50:   00 00 4c 3c     addis   r2,r12,0
>                         50: R_PPC64_REL16_HA    .TOC.
>   54:   00 00 42 38     addi    r2,r2,0
>                         54: R_PPC64_REL16_LO    .TOC.+0x4
>   58:   a6 02 08 7c     mflr    r0
>   5c:   01 00 00 48     bl      5c <cmdline_proc_show+0xc>
>                         5c: R_PPC64_REL24       _mcount
>   60:   a6 02 08 7c     mflr    r0
>   64:   10 00 01 f8     std     r0,16(r1)
>   68:   a1 ff 21 f8     stdu    r1,-96(r1)
>   6c:   00 00 22 3d     addis   r9,r2,0
>                         6c: R_PPC64_TOC16_HA    .toc
>   70:   00 00 82 3c     addis   r4,r2,0
>                         70: R_PPC64_TOC16_HA    .rodata.str1.8
>   74:   00 00 29 e9     ld      r9,0(r9)
>                         74: R_PPC64_TOC16_LO_DS .toc
>   78:   00 00 84 38     addi    r4,r4,0
>                         78: R_PPC64_TOC16_LO    .rodata.str1.8
>   7c:   00 00 a9 e8     ld      r5,0(r9)
>   80:   01 00 00 48     bl      80 <cmdline_proc_show+0x30>
>                         80: R_PPC64_REL24       seq_printf
>   84:   00 00 00 60     nop
>   88:   00 00 60 38     li      r3,0
>   8c:   60 00 21 38     addi    r1,r1,96
>   90:   10 00 01 e8     ld      r0,16(r1)
>   94:   a6 03 08 7c     mtlr    r0
>   98:   20 00 80 4e     blr
>
>
> And the same function compiled using:
>
> $> gcc --version
> gcc (SUSE Linux) 4.8.5
> Copyright (C) 2015 Free Software Foundation, Inc.
> This is free software; see the source for copying conditions.  There is NO
> warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
>
>
> 0000000000000050 <cmdline_proc_show>:
>   50:   00 00 4c 3c     addis   r2,r12,0
>                         50: R_PPC64_REL16_HA    .TOC.
>   54:   00 00 42 38     addi    r2,r2,0
>                         54: R_PPC64_REL16_LO    .TOC.+0x4
>   58:   a6 02 08 7c     mflr    r0
>   5c:   10 00 01 f8     std     r0,16(r1)
>   60:   01 00 00 48     bl      60 <cmdline_proc_show+0x10>
>                         60: R_PPC64_REL24       _mcount
>   64:   a6 02 08 7c     mflr    r0
>   68:   10 00 01 f8     std     r0,16(r1)
>   6c:   a1 ff 21 f8     stdu    r1,-96(r1)
>   70:   00 00 42 3d     addis   r10,r2,0
>                         70: R_PPC64_TOC16_HA    .toc
>   74:   00 00 82 3c     addis   r4,r2,0
>                         74: R_PPC64_TOC16_HA    .rodata.str1.8
>   78:   00 00 2a e9     ld      r9,0(r10)
>                         78: R_PPC64_TOC16_LO_DS .toc
>   7c:   00 00 84 38     addi    r4,r4,0
>                         7c: R_PPC64_TOC16_LO    .rodata.str1.8
>   80:   00 00 a9 e8     ld      r5,0(r9)
>   84:   01 00 00 48     bl      84 <cmdline_proc_show+0x34>
>                         84: R_PPC64_REL24       seq_printf
>   88:   00 00 00 60     nop
>   8c:   00 00 60 38     li      r3,0
>   90:   60 00 21 38     addi    r1,r1,96
>   94:   10 00 01 e8     ld      r0,16(r1)
>   98:   a6 03 08 7c     mtlr    r0
>   9c:   20 00 80 4e     blr
>
>
> Please, note that are used either 3 or 4 instructions before the
> mcount location depending on the compiler version.


Thanks Petr

For big endian builds I saw

Dump of assembler code for function alloc_pages_current:
   0xc000000000256f00 <+0>:    mflr    r0
   0xc000000000256f04 <+4>:    std     r0,16(r1)
   0xc000000000256f08 <+8>:    bl      0xc000000000009e5c <.mcount>
   0xc000000000256f0c <+12>:    mflr    r0

The offset is 8 bytes. Your earlier patch handled this by adding 16, I
suspect it needs revisiting

Balbir

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v6 1/9] ppc64 (le): prepare for -mprofile-kernel
  2016-02-05  4:40             ` Balbir Singh
@ 2016-02-05 10:22               ` Petr Mladek
  0 siblings, 0 replies; 44+ messages in thread
From: Petr Mladek @ 2016-02-05 10:22 UTC (permalink / raw)
  To: Balbir Singh
  Cc: AKASHI Takahiro, Jiri Kosina, linux-kernel, Steven Rostedt,
	Torsten Duwe, live-patching, linuxppc-dev

On Fri 2016-02-05 15:40:27, Balbir Singh wrote:
> On Thu, Feb 4, 2016 at 10:02 PM, Petr Mladek <pmladek@suse.com> wrote:
> For big endian builds I saw
> 
> Dump of assembler code for function alloc_pages_current:
>    0xc000000000256f00 <+0>:    mflr    r0
>    0xc000000000256f04 <+4>:    std     r0,16(r1)
>    0xc000000000256f08 <+8>:    bl      0xc000000000009e5c <.mcount>
>    0xc000000000256f0c <+12>:    mflr    r0
> 
> The offset is 8 bytes. Your earlier patch handled this by adding 16, I
> suspect it needs revisiting

It seems to be one of the funcitons that do not access any global
symbol. gcc does not produce TOC handling in this case when
compiled with -mprofile-kernel. I believe that it is a gcc bug.
More details can be found in the thread starting at
http://thread.gmane.org/gmane.linux.kernel/2134759/focus=2141996

Best Regards,
Petr

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v6 8/9] Implement kernel live patching for ppc64le (ABIv2)
  2016-02-02 13:46   ` [PATCH v6 8/9] " Denis Kirjanov
@ 2016-02-10 18:03     ` Torsten Duwe
  0 siblings, 0 replies; 44+ messages in thread
From: Torsten Duwe @ 2016-02-10 18:03 UTC (permalink / raw)
  To: Denis Kirjanov
  Cc: Steven Rostedt, Michael Ellerman, Jiri Kosina, linuxppc-dev,
	linux-kernel, live-patching

On Tue, Feb 02, 2016 at 04:46:27PM +0300, Denis Kirjanov wrote:
> > +
> > +#ifdef CONFIG_LIVEPATCH
> > +static inline int klp_check_compiler_support(void)
> > +{
> > +#if !defined(_CALL_ELF) || _CALL_ELF != 2 ||
> > !defined(CC_USING_MPROFILE_KERNEL)
> > +	return 1;
> > +#endif
> > +	return 0;
> > +}
> This function can be boolean.

Yes, like on the other archs, too.

	Torsten

^ permalink raw reply	[flat|nested] 44+ messages in thread

end of thread, other threads:[~2016-02-10 18:03 UTC | newest]

Thread overview: 44+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-01-25 15:38 [PATCH v6 0/9] ftrace with regs + live patching for ppc64 LE (ABI v2) Torsten Duwe
2016-01-25 15:26 ` [PATCH v6 1/9] ppc64 (le): prepare for -mprofile-kernel Torsten Duwe
2016-01-27 10:19   ` Michael Ellerman
2016-01-27 10:44     ` Torsten Duwe
2016-01-28  4:26       ` Michael Ellerman
2016-01-28  4:26         ` Michael Ellerman
2016-01-28 11:50         ` Torsten Duwe
2016-01-27 12:58     ` Alan Modra
2016-01-27 13:45       ` Torsten Duwe
2016-01-28  3:39       ` Michael Ellerman
2016-01-28  3:39         ` Michael Ellerman
2016-02-03  7:23   ` AKASHI Takahiro
2016-02-03  8:55     ` Jiri Kosina
2016-02-03 11:24       ` Torsten Duwe
2016-02-04  9:31         ` AKASHI Takahiro
2016-02-04 11:02           ` Petr Mladek
2016-02-05  4:40             ` Balbir Singh
2016-02-05 10:22               ` Petr Mladek
2016-02-04 21:47           ` Jiri Kosina
2016-01-25 15:27 ` [PATCH v6 2/9] ppc64le FTRACE_WITH_REGS implementation Torsten Duwe
2016-01-25 15:29 ` [PATCH v6 3/9] ppc use ftrace_modify_all_code default Torsten Duwe
2016-01-25 15:29 ` [PATCH v6 4/9] ppc64 ftrace_with_regs configuration variables Torsten Duwe
2016-01-25 15:30 ` [PATCH v6 5/9] ppc64 ftrace_with_regs: spare early boot and low level Torsten Duwe
2016-01-25 15:31 ` [PATCH v6 6/9] ppc64 ftrace: disable profiling for some functions Torsten Duwe
2016-01-25 15:31 ` [PATCH v6 7/9] ppc64 ftrace: disable profiling for some files Torsten Duwe
2016-01-25 15:33 ` [PATCH v6 8/9] Implement kernel live patching for ppc64le (ABIv2) Torsten Duwe
2016-01-26 10:50   ` Miroslav Benes
2016-01-26 12:48     ` Petr Mladek
2016-01-26 13:56       ` Torsten Duwe
2016-02-02 12:12       ` Petr Mladek
2016-02-02 15:45         ` Torsten Duwe
2016-02-02 16:47           ` Petr Mladek
2016-02-02 20:39             ` Jiri Kosina
2016-01-26 14:00     ` Torsten Duwe
2016-01-26 14:14       ` Miroslav Benes
2016-01-27  1:53         ` Jessica Yu
2016-02-02 13:46   ` [PATCH v6 8/9] " Denis Kirjanov
2016-02-10 18:03     ` Torsten Duwe
2016-01-25 15:33 ` [PATCH v6 9/9] Enable LIVEPATCH to be configured on ppc64le and add livepatch.o if it is selected Torsten Duwe
2016-01-27 10:51 ` [PATCH v6 0/9] ftrace with regs + live patching for ppc64 LE (ABI v2) Balbir Singh
2016-01-27 12:19   ` Torsten Duwe
2016-01-28  2:41     ` Balbir Singh
2016-01-28  3:31       ` Michael Ellerman
2016-01-28 11:19         ` Torsten Duwe

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.