linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 0/3] arm64 live patching
@ 2018-08-17 10:25 Torsten Duwe
  2018-08-17 10:27 ` [PATCH v2 1/3] arm64: implement ftrace with regs Torsten Duwe
                   ` (2 more replies)
  0 siblings, 3 replies; 8+ messages in thread
From: Torsten Duwe @ 2018-08-17 10:25 UTC (permalink / raw)
  To: Will Deacon, Catalin Marinas, Julien Thierry, Steven Rostedt,
	Josh Poimboeuf, Ingo Molnar, Ard Biesheuvel, Arnd Bergmann,
	AKASHI Takahiro
  Cc: linux-arm-kernel, linux-kernel, live-patching

Hi all!

Here's v2; I hope I have incorporated all feedback properly.
Julien: #S_X28 + 8 is in, but ftrace_modify_call() is referenced
from generic code: kernel/trace/ftrace.c __ftrace_replace_code()

And I'd *really* like some feedback from ARM/Linaro/... on the stack
unwinder!

[Changes from v1]:

* Missing compiler support is now a Makefile error, instead
  of a warning. This will keep the compile log shorter and
  it will thus be easier to spot the problem.

* A separate ftrace_regs_caller. Only that one will write out
  a complete pt_regs, for efficiency.

* Replace the use of X19 with X28 to remember the old PC during
  live patch detection, as only that is saved&restored now for
  non-regs ftrace.

* CONFIG_DYNAMIC_FTRACE_WITH_REGS and CC_USING_PATCHABLE_FUNCTION_ENTRY
  are currently synonymous on arm64, but differentiate better for
  the future when this is no longer the case.

* Clean up "old"/"new" insn value setting vs. #ifdefs.

* #define a INSN_MOV_X9_X30 with suggested aarch64_insn_gen call
  and use that instead of an immediate hex value.

	Torsten

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH v2 1/3] arm64: implement ftrace with regs
  2018-08-17 10:25 [PATCH v2 0/3] arm64 live patching Torsten Duwe
@ 2018-08-17 10:27 ` Torsten Duwe
  2018-08-17 17:20   ` Julien Thierry
                     ` (2 more replies)
  2018-08-17 10:27 ` [PATCH v2 2/3] arm64: implement live patching Torsten Duwe
  2018-08-17 10:27 ` [PATCH v2 3/3] arm64: reliable stacktraces Torsten Duwe
  2 siblings, 3 replies; 8+ messages in thread
From: Torsten Duwe @ 2018-08-17 10:27 UTC (permalink / raw)
  To: Will Deacon, Catalin Marinas, Julien Thierry, Steven Rostedt,
	Josh Poimboeuf, Ingo Molnar, Ard Biesheuvel, Arnd Bergmann,
	AKASHI Takahiro
  Cc: linux-arm-kernel, linux-kernel, live-patching

Check for compiler support of -fpatchable-function-entry and use it
to intercept functions immediately on entry, saving the LR in x9.
Disable ftracing in efi/libstub, because this triggers cross-section
linker errors now (-pg is disabled already for those files).
Add an ftrace_caller which can handle LR in x9, as well as an
ftrace_regs_caller that additionally writes out a set of pt_regs
for inspection.

Signed-off-by: Torsten Duwe <duwe@suse.de>

--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -110,6 +110,7 @@ config ARM64
 	select HAVE_DEBUG_KMEMLEAK
 	select HAVE_DMA_CONTIGUOUS
 	select HAVE_DYNAMIC_FTRACE
+	select HAVE_DYNAMIC_FTRACE_WITH_REGS
 	select HAVE_EFFICIENT_UNALIGNED_ACCESS
 	select HAVE_FTRACE_MCOUNT_RECORD
 	select HAVE_FUNCTION_TRACER
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -78,6 +78,15 @@ ifeq ($(CONFIG_ARM64_MODULE_PLTS),y)
 KBUILD_LDFLAGS_MODULE	+= -T $(srctree)/arch/arm64/kernel/module.lds
 endif
 
+ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+  CC_FLAGS_FTRACE := -fpatchable-function-entry=2
+  KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
+  ifeq ($(call cc-option,-fpatchable-function-entry=2),)
+    $(error Cannot use CONFIG_DYNAMIC_FTRACE_WITH_REGS: \
+             -fpatchable-function-entry not supported by compiler)
+  endif
+endif
+
 # Default value
 head-y		:= arch/arm64/kernel/head.o
 
--- a/arch/arm64/include/asm/ftrace.h
+++ b/arch/arm64/include/asm/ftrace.h
@@ -16,6 +16,13 @@
 #define MCOUNT_ADDR		((unsigned long)_mcount)
 #define MCOUNT_INSN_SIZE	AARCH64_INSN_SIZE
 
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+#define ARCH_SUPPORTS_FTRACE_OPS 1
+#define REC_IP_BRANCH_OFFSET 4
+#else
+#define REC_IP_BRANCH_OFFSET 0
+#endif
+
 #ifndef __ASSEMBLY__
 #include <linux/compat.h>
 
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -7,9 +7,9 @@ CPPFLAGS_vmlinux.lds	:= -DTEXT_OFFSET=$(
 AFLAGS_head.o		:= -DTEXT_OFFSET=$(TEXT_OFFSET)
 CFLAGS_armv8_deprecated.o := -I$(src)
 
-CFLAGS_REMOVE_ftrace.o = -pg
-CFLAGS_REMOVE_insn.o = -pg
-CFLAGS_REMOVE_return_address.o = -pg
+CFLAGS_REMOVE_ftrace.o = -pg $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_insn.o = -pg $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_return_address.o = -pg $(CC_FLAGS_FTRACE)
 
 # Object file lists.
 arm64-obj-y		:= debug-monitors.o entry.o irq.o fpsimd.o		\
--- a/drivers/firmware/efi/libstub/Makefile
+++ b/drivers/firmware/efi/libstub/Makefile
@@ -11,7 +11,8 @@ cflags-$(CONFIG_X86)		+= -m$(BITS) -D__K
 				   -fPIC -fno-strict-aliasing -mno-red-zone \
 				   -mno-mmx -mno-sse -fshort-wchar
 
-cflags-$(CONFIG_ARM64)		:= $(subst -pg,,$(KBUILD_CFLAGS)) -fpie
+cflags-$(CONFIG_ARM64)		:= $(filter-out -pg $(CC_FLAGS_FTRACE)\
+				  ,$(KBUILD_CFLAGS)) -fpie
 cflags-$(CONFIG_ARM)		:= $(subst -pg,,$(KBUILD_CFLAGS)) \
 				   -fno-builtin -fpic -mno-single-pic-base
 
--- a/arch/arm64/kernel/entry-ftrace.S
+++ b/arch/arm64/kernel/entry-ftrace.S
@@ -13,6 +13,8 @@
 #include <asm/assembler.h>
 #include <asm/ftrace.h>
 #include <asm/insn.h>
+#include <asm/asm-offsets.h>
+#include <asm/assembler.h>
 
 /*
  * Gcc with -pg will put the following code in the beginning of each function:
@@ -123,6 +125,7 @@ skip_ftrace_call:			// }
 ENDPROC(_mcount)
 
 #else /* CONFIG_DYNAMIC_FTRACE */
+#ifndef CC_USING_PATCHABLE_FUNCTION_ENTRY
 /*
  * _mcount() is used to build the kernel with -pg option, but all the branch
  * instructions to _mcount() are replaced to NOP initially at kernel start up,
@@ -162,6 +165,88 @@ ftrace_graph_call:			// ftrace_graph_cal
 
 	mcount_exit
 ENDPROC(ftrace_caller)
+#else /* CC_USING_PATCHABLE_FUNCTION_ENTRY */
+ENTRY(_mcount)
+	mov     x10, lr
+	mov     lr, x9
+	ret     x10
+ENDPROC(_mcount)
+
+ENTRY(ftrace_regs_caller)
+	stp	x29, x9, [sp, #-16]!
+	sub	sp, sp, #S_FRAME_SIZE
+
+	stp	x10, x11, [sp, #80]
+	stp	x12, x13, [sp, #96]
+	stp	x14, x15, [sp, #112]
+	stp	x16, x17, [sp, #128]
+	stp	x18, x19, [sp, #144]
+	stp	x20, x21, [sp, #160]
+	stp	x22, x23, [sp, #176]
+	stp	x24, x25, [sp, #192]
+	stp	x26, x27, [sp, #208]
+
+	b	ftrace_common
+ENDPROC(ftrace_regs_caller)
+
+ENTRY(ftrace_caller)
+	stp	x29, x9, [sp, #-16]!
+	sub	sp, sp, #S_FRAME_SIZE
+
+ftrace_common:
+	stp	x28, x29, [sp, #224]
+
+	/* save function arguments */
+	stp	x0, x1, [sp]
+	stp	x2, x3, [sp, #16]
+	stp	x4, x5, [sp, #32]
+	stp	x6, x7, [sp, #48]
+	stp	x8, x9, [sp, #64]
+
+	/* The link Register at callee entry */
+	str	x9, [sp, #S_LR]
+	/* The program counter just after the ftrace call site */
+	str	lr, [sp, #S_PC]
+	/* The stack pointer as it was on ftrace_caller entry... */
+	add	x29, sp, #S_FRAME_SIZE+16	/* ...is also our new FP */
+	str	x29, [sp, #S_SP]
+
+	adrp    x0, function_trace_op
+	ldr     x2, [x0, #:lo12:function_trace_op]
+	mov	x1, x9		/* saved LR == parent IP */
+	sub	x0, lr, #8	/* function entry == IP */
+	mov	x3, sp		/* pt_regs are @sp */
+
+	.global ftrace_call
+ftrace_call:
+
+	bl	ftrace_stub
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	.global ftrace_graph_call
+ftrace_graph_call:			// ftrace_graph_caller();
+	nop				// If enabled, this will be replaced
+					// "b ftrace_graph_caller"
+#endif
+
+ftrace_common_return:
+	ldp	x0, x1, [sp]
+	ldp	x2, x3, [sp, #16]
+	ldp	x4, x5, [sp, #32]
+	ldp	x6, x7, [sp, #48]
+	ldp	x8, x9, [sp, #64]
+
+	ldp	x28, x29, [sp, #224]
+
+	ldr	x9, [sp, #S_PC]
+	ldr	lr, [sp, #S_LR]
+	add	sp, sp, #S_FRAME_SIZE+16
+
+	ret	x9
+
+ENDPROC(ftrace_caller)
+
+#endif /* CC_USING_PATCHABLE_FUNCTION_ENTRY */
 #endif /* CONFIG_DYNAMIC_FTRACE */
 
 ENTRY(ftrace_stub)
@@ -197,12 +284,20 @@ ENDPROC(ftrace_stub)
  * and run return_to_handler() later on its exit.
  */
 ENTRY(ftrace_graph_caller)
+#ifndef CC_USING_PATCHABLE_FUNCTION_ENTRY
 	mcount_get_lr_addr	  x0	//     pointer to function's saved lr
 	mcount_get_pc		  x1	//     function's pc
 	mcount_get_parent_fp	  x2	//     parent's fp
 	bl	prepare_ftrace_return	// prepare_ftrace_return(&lr, pc, fp)
 
 	mcount_exit
+#else
+	add	x0, sp, #S_LR	/* address of (LR pointing into caller) */
+	ldr	x1, [sp, #S_PC]
+	ldr	x2, [sp, #S_X28 + 8]	/* caller's frame pointer */
+	bl	prepare_ftrace_return
+	b	ftrace_common_return
+#endif
 ENDPROC(ftrace_graph_caller)
 
 /*
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -70,7 +70,8 @@ int ftrace_update_ftrace_func(ftrace_fun
  */
 int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 {
-	unsigned long pc = rec->ip;
+	unsigned long pc = rec->ip+REC_IP_BRANCH_OFFSET;
+	int ret;
 	u32 old, new;
 	long offset = (long)pc - (long)addr;
 
@@ -128,22 +129,54 @@ int ftrace_make_call(struct dyn_ftrace *
 	}
 
 	old = aarch64_insn_gen_nop();
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+#define INSN_MOV_X9_X30 aarch64_insn_gen_add_sub_imm(AARCH64_INSN_REG_9, \
+		AARCH64_INSN_REG_30, 0, \
+		AARCH64_INSN_VARIANT_64BIT, AARCH64_INSN_ADSB_ADD)
+	new = INSN_MOV_X9_X30;
+	ret = ftrace_modify_code(pc-REC_IP_BRANCH_OFFSET, old, new, true);
+	if (ret)
+		return ret;
+	smp_wmb();	/* ensure LR saver is in place before ftrace call */
+#endif
 	new = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK);
 
 	return ftrace_modify_code(pc, old, new, true);
 }
 
+int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
+			unsigned long addr)
+{
+	unsigned long pc = rec->ip+REC_IP_BRANCH_OFFSET;
+	u32 old, new;
+
+	old = aarch64_insn_gen_branch_imm(pc, old_addr, true);
+	new = aarch64_insn_gen_branch_imm(pc, addr, true);
+
+	return ftrace_modify_code(pc, old, new, true);
+}
+
+
 /*
  * Turn off the call to ftrace_caller() in instrumented function
  */
 int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
 		    unsigned long addr)
 {
-	unsigned long pc = rec->ip;
+	unsigned long pc = rec->ip+REC_IP_BRANCH_OFFSET;
 	bool validate = true;
+	int ret;
 	u32 old = 0, new;
 	long offset = (long)pc - (long)addr;
 
+#ifdef CC_USING_PATCHABLE_FUNCTION_ENTRY
+	/* -fpatchable-function-entry= does not generate a profiling call
+	 *  initially; the NOPs are already there.
+	 */
+	if (addr == MCOUNT_ADDR)
+		return 0;
+#endif
+
 	if (offset < -SZ_128M || offset >= SZ_128M) {
 #ifdef CONFIG_ARM64_MODULE_PLTS
 		u32 replaced;
@@ -188,7 +221,15 @@ int ftrace_make_nop(struct module *mod,
 
 	new = aarch64_insn_gen_nop();
 
-	return ftrace_modify_code(pc, old, new, validate);
+	ret = ftrace_modify_code(pc, old, new, validate);
+	if (ret)
+		return ret;
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+	smp_wmb();	/* ftrace call must not remain without LR saver. */
+	old = INSN_MOV_X9_X30;
+	ret = ftrace_modify_code(pc-REC_IP_BRANCH_OFFSET, old, new, true);
+#endif
+	return ret;
 }
 
 void arch_ftrace_update_code(int command)
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -115,6 +115,7 @@
 #define MCOUNT_REC()	. = ALIGN(8);				\
 			__start_mcount_loc = .;			\
 			KEEP(*(__mcount_loc))			\
+			KEEP(*(__patchable_function_entries))	\
 			__stop_mcount_loc = .;
 #else
 #define MCOUNT_REC()
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -61,8 +61,12 @@ extern void __chk_io_ptr(const volatile
 #if defined(CC_USING_HOTPATCH) && !defined(__CHECKER__)
 #define notrace __attribute__((hotpatch(0,0)))
 #else
+#ifdef CC_USING_PATCHABLE_FUNCTION_ENTRY
+#define notrace __attribute__((patchable_function_entry(0)))
+#else
 #define notrace __attribute__((no_instrument_function))
 #endif
+#endif
 
 /* Intel compiler defines __GNUC__. So we will overwrite implementations
  * coming from above header files here

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH v2 2/3] arm64: implement live patching
  2018-08-17 10:25 [PATCH v2 0/3] arm64 live patching Torsten Duwe
  2018-08-17 10:27 ` [PATCH v2 1/3] arm64: implement ftrace with regs Torsten Duwe
@ 2018-08-17 10:27 ` Torsten Duwe
  2018-08-17 10:27 ` [PATCH v2 3/3] arm64: reliable stacktraces Torsten Duwe
  2 siblings, 0 replies; 8+ messages in thread
From: Torsten Duwe @ 2018-08-17 10:27 UTC (permalink / raw)
  To: Will Deacon, Catalin Marinas, Julien Thierry, Steven Rostedt,
	Josh Poimboeuf, Ingo Molnar, Ard Biesheuvel, Arnd Bergmann,
	AKASHI Takahiro
  Cc: linux-arm-kernel, linux-kernel, live-patching

Based on ftrace with regs, do the usual thing. Also allocate a
task flag for whatever consistency handling will be used.
Watch out for interactions with the graph tracer.

Signed-off-by: Torsten Duwe <duwe@suse.de>

--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -119,6 +119,7 @@ config ARM64
 	select HAVE_GENERIC_DMA_COHERENT
 	select HAVE_HW_BREAKPOINT if PERF_EVENTS
 	select HAVE_IRQ_TIME_ACCOUNTING
+	select HAVE_LIVEPATCH
 	select HAVE_MEMBLOCK
 	select HAVE_MEMBLOCK_NODE_MAP if NUMA
 	select HAVE_NMI
@@ -1349,4 +1350,6 @@ if CRYPTO
 source "arch/arm64/crypto/Kconfig"
 endif
 
+source "kernel/livepatch/Kconfig"
+
 source "lib/Kconfig"
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -76,6 +76,7 @@ void arch_release_task_struct(struct tas
 #define TIF_FOREIGN_FPSTATE	3	/* CPU's FP state is not current's */
 #define TIF_UPROBE		4	/* uprobe breakpoint or singlestep */
 #define TIF_FSCHECK		5	/* Check FS is USER_DS on return */
+#define TIF_PATCH_PENDING	6
 #define TIF_NOHZ		7
 #define TIF_SYSCALL_TRACE	8
 #define TIF_SYSCALL_AUDIT	9
@@ -94,6 +95,7 @@ void arch_release_task_struct(struct tas
 #define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
 #define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
 #define _TIF_FOREIGN_FPSTATE	(1 << TIF_FOREIGN_FPSTATE)
+#define _TIF_PATCH_PENDING	(1 << TIF_PATCH_PENDING)
 #define _TIF_NOHZ		(1 << TIF_NOHZ)
 #define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
 #define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
@@ -106,7 +108,8 @@ void arch_release_task_struct(struct tas
 
 #define _TIF_WORK_MASK		(_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
 				 _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \
-				 _TIF_UPROBE | _TIF_FSCHECK)
+				 _TIF_UPROBE | _TIF_FSCHECK | \
+				 _TIF_PATCH_PENDING)
 
 #define _TIF_SYSCALL_WORK	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
 				 _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
--- /dev/null
+++ b/arch/arm64/include/asm/livepatch.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * livepatch.h - arm64-specific Kernel Live Patching Core
+ *
+ * Copyright (C) 2016,2018 SUSE
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _ASM_ARM64_LIVEPATCH_H
+#define _ASM_ARM64_LIVEPATCH_H
+
+#include <linux/module.h>
+#include <linux/ftrace.h>
+
+#ifdef CONFIG_LIVEPATCH
+static inline int klp_check_compiler_support(void)
+{
+	return 0;
+}
+
+static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long ip)
+{
+	regs->pc = ip;
+}
+#endif /* CONFIG_LIVEPATCH */
+
+#endif /* _ASM_ARM64_LIVEPATCH_H */
--- a/arch/arm64/kernel/entry-ftrace.S
+++ b/arch/arm64/kernel/entry-ftrace.S
@@ -209,6 +209,9 @@ ftrace_common:
 	str	x9, [sp, #S_LR]
 	/* The program counter just after the ftrace call site */
 	str	lr, [sp, #S_PC]
+#if defined(CONFIG_LIVEPATCH) && defined(CONFIG_FUNCTION_GRAPH_TRACER)
+	mov	x28,lr          /* remember old return address */
+#endif
 	/* The stack pointer as it was on ftrace_caller entry... */
 	add	x29, sp, #S_FRAME_SIZE+16	/* ...is also our new FP */
 	str	x29, [sp, #S_SP]
@@ -224,6 +227,16 @@ ftrace_call:
 
 	bl	ftrace_stub
 
+#if defined(CONFIG_LIVEPATCH) && defined(CONFIG_FUNCTION_GRAPH_TRACER)
+	/* Is the trace function a live patcher an has messed with
+	 * the return address?
+	*/
+	ldr	x9, [sp, #S_PC]
+	cmp	x9, x28		/* compare with the value we remembered */
+	/* to not call graph tracer's "call" mechanism twice! */
+	b.ne	ftrace_common_return
+#endif
+
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 	.global ftrace_graph_call
 ftrace_graph_call:			// ftrace_graph_caller();

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH v2 3/3] arm64: reliable stacktraces
  2018-08-17 10:25 [PATCH v2 0/3] arm64 live patching Torsten Duwe
  2018-08-17 10:27 ` [PATCH v2 1/3] arm64: implement ftrace with regs Torsten Duwe
  2018-08-17 10:27 ` [PATCH v2 2/3] arm64: implement live patching Torsten Duwe
@ 2018-08-17 10:27 ` Torsten Duwe
  2018-08-20  8:28   ` Julien Thierry
  2 siblings, 1 reply; 8+ messages in thread
From: Torsten Duwe @ 2018-08-17 10:27 UTC (permalink / raw)
  To: Will Deacon, Catalin Marinas, Julien Thierry, Steven Rostedt,
	Josh Poimboeuf, Ingo Molnar, Ard Biesheuvel, Arnd Bergmann,
	AKASHI Takahiro
  Cc: linux-arm-kernel, linux-kernel, live-patching

This is more an RFC in the original sense: is this basically
the correct approach? (as I had to tweak the save_stack API a bit).

In particular the code does not detect interrupts and exception
frames, and does not yet check whether the code address is valid.
The latter check would also have to be omitted for the latest frame
on other tasks' stacks. This would require some more tweaking.

unwind_frame() now reports whether we had to stop normally or due to
an error condition; walk_stackframe() will pass that info.
__save_stack_trace() is used for a start to check the validity of a
frame; maybe save_stack_trace_tsk_reliable() will need its own callback.

The question is whether this change, once complete, is sufficient
(as on powerpc) or whether a port of objtool is needed, like x86.

I can dig into this myself and draw conclusions, but I'd prefer
to have some input from ARM people here...

Signed-off-by: Torsten Duwe <duwe@suse.de>

--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -127,8 +127,9 @@ config ARM64
 	select HAVE_PERF_EVENTS
 	select HAVE_PERF_REGS
 	select HAVE_PERF_USER_STACK_DUMP
-	select HAVE_REGS_AND_STACK_ACCESS_API
 	select HAVE_RCU_TABLE_FREE
+	select HAVE_REGS_AND_STACK_ACCESS_API
+	select HAVE_RELIABLE_STACKTRACE
 	select HAVE_STACKPROTECTOR
 	select HAVE_SYSCALL_TRACEPOINTS
 	select HAVE_KPROBES
--- a/arch/arm64/include/asm/stacktrace.h
+++ b/arch/arm64/include/asm/stacktrace.h
@@ -33,7 +33,7 @@ struct stackframe {
 };
 
 extern int unwind_frame(struct task_struct *tsk, struct stackframe *frame);
-extern void walk_stackframe(struct task_struct *tsk, struct stackframe *frame,
+extern int walk_stackframe(struct task_struct *tsk, struct stackframe *frame,
 			    int (*fn)(struct stackframe *, void *), void *data);
 extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk);
 
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index d5718a060672..fe0dd4745ff3 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -81,23 +81,27 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
 	 * both are NULL.
 	 */
 	if (!frame->fp && !frame->pc)
-		return -EINVAL;
+		return 1;
 
 	return 0;
 }
 
-void notrace walk_stackframe(struct task_struct *tsk, struct stackframe *frame,
+int notrace walk_stackframe(struct task_struct *tsk, struct stackframe *frame,
 		     int (*fn)(struct stackframe *, void *), void *data)
 {
 	while (1) {
 		int ret;
 
-		if (fn(frame, data))
-			break;
+		ret = fn(frame, data);
+		if (ret)
+			return ret;
 		ret = unwind_frame(tsk, frame);
 		if (ret < 0)
+			return ret;
+		if (ret > 0)
 			break;
 	}
+	return 0;
 }
 
 #ifdef CONFIG_STACKTRACE
@@ -145,14 +149,15 @@ void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
 		trace->entries[trace->nr_entries++] = ULONG_MAX;
 }
 
-static noinline void __save_stack_trace(struct task_struct *tsk,
+static noinline int __save_stack_trace(struct task_struct *tsk,
 	struct stack_trace *trace, unsigned int nosched)
 {
 	struct stack_trace_data data;
 	struct stackframe frame;
+	int ret;
 
 	if (!try_get_task_stack(tsk))
-		return;
+		return -EBUSY;
 
 	data.trace = trace;
 	data.skip = trace->skip;
@@ -171,11 +176,12 @@ static noinline void __save_stack_trace(struct task_struct *tsk,
 	frame.graph = tsk->curr_ret_stack;
 #endif
 
-	walk_stackframe(tsk, &frame, save_trace, &data);
+	ret = walk_stackframe(tsk, &frame, save_trace, &data);
 	if (trace->nr_entries < trace->max_entries)
 		trace->entries[trace->nr_entries++] = ULONG_MAX;
 
 	put_task_stack(tsk);
+	return ret;
 }
 EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
 
@@ -190,4 +196,12 @@ void save_stack_trace(struct stack_trace *trace)
 }
 
 EXPORT_SYMBOL_GPL(save_stack_trace);
+
+int save_stack_trace_tsk_reliable(struct task_struct *tsk,
+				  struct stack_trace *trace)
+{
+	return __save_stack_trace(tsk, trace, 1);
+}
+EXPORT_SYMBOL_GPL(save_stack_trace_tsk_reliable);
+
 #endif


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 1/3] arm64: implement ftrace with regs
  2018-08-17 10:27 ` [PATCH v2 1/3] arm64: implement ftrace with regs Torsten Duwe
@ 2018-08-17 17:20   ` Julien Thierry
  2018-08-20  6:55   ` Ard Biesheuvel
  2018-08-29  7:11   ` AKASHI Takahiro
  2 siblings, 0 replies; 8+ messages in thread
From: Julien Thierry @ 2018-08-17 17:20 UTC (permalink / raw)
  To: Torsten Duwe, Will Deacon, Catalin Marinas, Steven Rostedt,
	Josh Poimboeuf, Ingo Molnar, Ard Biesheuvel, Arnd Bergmann,
	AKASHI Takahiro
  Cc: linux-arm-kernel, linux-kernel, live-patching

Hi Torsten,

On 17/08/18 11:27, Torsten Duwe wrote:
> Check for compiler support of -fpatchable-function-entry and use it
> to intercept functions immediately on entry, saving the LR in x9.
> Disable ftracing in efi/libstub, because this triggers cross-section
> linker errors now (-pg is disabled already for those files).
> Add an ftrace_caller which can handle LR in x9, as well as an
> ftrace_regs_caller that additionally writes out a set of pt_regs
> for inspection.
> 
> Signed-off-by: Torsten Duwe <duwe@suse.de>
> 
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -110,6 +110,7 @@ config ARM64
>   	select HAVE_DEBUG_KMEMLEAK
>   	select HAVE_DMA_CONTIGUOUS
>   	select HAVE_DYNAMIC_FTRACE
> +	select HAVE_DYNAMIC_FTRACE_WITH_REGS

Actually there's an issue here. CONFIG_DYNAMIC_FTRACE_WITH_REGS is 
automatically selected if the arch has HAVE_DYNAMIC_FTRACE_WITH_REGS and 
DYNAMIC_FTRACE. Meaning we can no longer enable DYNAMIC_FTRACE without 
CONFIG_DYNAMIC_FTRACE_WITH_REGS.

So we wouldn't build an arm64 kernel with dynamic function tracing 
anymore without a recent enough compiler :( .

Is there a way to have "HAVE_DYNAMIC_FTRACE_WITH_REGS" depending on 
whether the compiler supports the patchable function entry or not?

(Not familiar enough with the build system to suggest something here).

>   	select HAVE_EFFICIENT_UNALIGNED_ACCESS
>   	select HAVE_FTRACE_MCOUNT_RECORD
>   	select HAVE_FUNCTION_TRACER
> --- a/arch/arm64/Makefile
> +++ b/arch/arm64/Makefile
> @@ -78,6 +78,15 @@ ifeq ($(CONFIG_ARM64_MODULE_PLTS),y)
>   KBUILD_LDFLAGS_MODULE	+= -T $(srctree)/arch/arm64/kernel/module.lds
>   endif
>   
> +ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS

use "ifeq($(CONFIG_DYNAMIC_FTRACE_WITH_REGS),y)", 
CONFIG_DYNAMIC_FTRACE_WITH_REGS is defined even if it is set to 'n' and 
will still try to use the compiler option.

> +  CC_FLAGS_FTRACE := -fpatchable-function-entry=2
> +  KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
> +  ifeq ($(call cc-option,-fpatchable-function-entry=2),)
> +    $(error Cannot use CONFIG_DYNAMIC_FTRACE_WITH_REGS: \
> +             -fpatchable-function-entry not supported by compiler)
> +  endif
> +endif
> +
>   # Default value
>   head-y		:= arch/arm64/kernel/head.o
>   
> --- a/arch/arm64/include/asm/ftrace.h
> +++ b/arch/arm64/include/asm/ftrace.h
> @@ -16,6 +16,13 @@
>   #define MCOUNT_ADDR		((unsigned long)_mcount)
>   #define MCOUNT_INSN_SIZE	AARCH64_INSN_SIZE
>   
> +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
> +#define ARCH_SUPPORTS_FTRACE_OPS 1
> +#define REC_IP_BRANCH_OFFSET 4
> +#else
> +#define REC_IP_BRANCH_OFFSET 0
> +#endif
> +
>   #ifndef __ASSEMBLY__
>   #include <linux/compat.h>
>   
> --- a/arch/arm64/kernel/Makefile
> +++ b/arch/arm64/kernel/Makefile
> @@ -7,9 +7,9 @@ CPPFLAGS_vmlinux.lds	:= -DTEXT_OFFSET=$(
>   AFLAGS_head.o		:= -DTEXT_OFFSET=$(TEXT_OFFSET)
>   CFLAGS_armv8_deprecated.o := -I$(src)
>   
> -CFLAGS_REMOVE_ftrace.o = -pg
> -CFLAGS_REMOVE_insn.o = -pg
> -CFLAGS_REMOVE_return_address.o = -pg
> +CFLAGS_REMOVE_ftrace.o = -pg $(CC_FLAGS_FTRACE)
> +CFLAGS_REMOVE_insn.o = -pg $(CC_FLAGS_FTRACE)
> +CFLAGS_REMOVE_return_address.o = -pg $(CC_FLAGS_FTRACE)
>   
>   # Object file lists.
>   arm64-obj-y		:= debug-monitors.o entry.o irq.o fpsimd.o		\
> --- a/drivers/firmware/efi/libstub/Makefile
> +++ b/drivers/firmware/efi/libstub/Makefile
> @@ -11,7 +11,8 @@ cflags-$(CONFIG_X86)		+= -m$(BITS) -D__K
>   				   -fPIC -fno-strict-aliasing -mno-red-zone \
>   				   -mno-mmx -mno-sse -fshort-wchar
>   
> -cflags-$(CONFIG_ARM64)		:= $(subst -pg,,$(KBUILD_CFLAGS)) -fpie
> +cflags-$(CONFIG_ARM64)		:= $(filter-out -pg $(CC_FLAGS_FTRACE)\
> +				  ,$(KBUILD_CFLAGS)) -fpie
>   cflags-$(CONFIG_ARM)		:= $(subst -pg,,$(KBUILD_CFLAGS)) \
>   				   -fno-builtin -fpic -mno-single-pic-base
>   
> --- a/arch/arm64/kernel/entry-ftrace.S
> +++ b/arch/arm64/kernel/entry-ftrace.S
> @@ -13,6 +13,8 @@
>   #include <asm/assembler.h>
>   #include <asm/ftrace.h>
>   #include <asm/insn.h>
> +#include <asm/asm-offsets.h>
> +#include <asm/assembler.h>
>   
>   /*
>    * Gcc with -pg will put the following code in the beginning of each function:
> @@ -123,6 +125,7 @@ skip_ftrace_call:			// }
>   ENDPROC(_mcount)
>   
>   #else /* CONFIG_DYNAMIC_FTRACE */
> +#ifndef CC_USING_PATCHABLE_FUNCTION_ENTRY
>   /*
>    * _mcount() is used to build the kernel with -pg option, but all the branch
>    * instructions to _mcount() are replaced to NOP initially at kernel start up,
> @@ -162,6 +165,88 @@ ftrace_graph_call:			// ftrace_graph_cal
>   
>   	mcount_exit
>   ENDPROC(ftrace_caller)
> +#else /* CC_USING_PATCHABLE_FUNCTION_ENTRY */
> +ENTRY(_mcount)
> +	mov     x10, lr
> +	mov     lr, x9
> +	ret     x10
> +ENDPROC(_mcount)
> +
> +ENTRY(ftrace_regs_caller)
> +	stp	x29, x9, [sp, #-16]!
> +	sub	sp, sp, #S_FRAME_SIZE
> +
> +	stp	x10, x11, [sp, #80]
> +	stp	x12, x13, [sp, #96]
> +	stp	x14, x15, [sp, #112]
> +	stp	x16, x17, [sp, #128]
> +	stp	x18, x19, [sp, #144]
> +	stp	x20, x21, [sp, #160]
> +	stp	x22, x23, [sp, #176]
> +	stp	x24, x25, [sp, #192]
> +	stp	x26, x27, [sp, #208]
> +
> +	b	ftrace_common
> +ENDPROC(ftrace_regs_caller)
> +
> +ENTRY(ftrace_caller)
> +	stp	x29, x9, [sp, #-16]!
> +	sub	sp, sp, #S_FRAME_SIZE
> +
> +ftrace_common:
> +	stp	x28, x29, [sp, #224]
> +
> +	/* save function arguments */
> +	stp	x0, x1, [sp]
> +	stp	x2, x3, [sp, #16]
> +	stp	x4, x5, [sp, #32]
> +	stp	x6, x7, [sp, #48]
> +	stp	x8, x9, [sp, #64]
> +
> +	/* The link Register at callee entry */
> +	str	x9, [sp, #S_LR]
> +	/* The program counter just after the ftrace call site */
> +	str	lr, [sp, #S_PC]
> +	/* The stack pointer as it was on ftrace_caller entry... */
> +	add	x29, sp, #S_FRAME_SIZE+16	/* ...is also our new FP */
> +	str	x29, [sp, #S_SP]
> +
> +	adrp    x0, function_trace_op
> +	ldr     x2, [x0, #:lo12:function_trace_op]
> +	mov	x1, x9		/* saved LR == parent IP */
> +	sub	x0, lr, #8	/* function entry == IP */
> +	mov	x3, sp		/* pt_regs are @sp */
> +
> +	.global ftrace_call
> +ftrace_call:
> +
> +	bl	ftrace_stub

I should've asked this in my initial review. But why are we always 
calling ftrace_stub rather than have a patchable "nop" like in the 
!CC_USING_PATCHABLE_FUNCTION_ENTRY version?

> +
> +#ifdef CONFIG_FUNCTION_GRAPH_TRACER
> +	.global ftrace_graph_call
> +ftrace_graph_call:			// ftrace_graph_caller();
> +	nop				// If enabled, this will be replaced
> +					// "b ftrace_graph_caller"
> +#endif
> +
> +ftrace_common_return:
> +	ldp	x0, x1, [sp]
> +	ldp	x2, x3, [sp, #16]
> +	ldp	x4, x5, [sp, #32]
> +	ldp	x6, x7, [sp, #48]
> +	ldp	x8, x9, [sp, #64]
> +
> +	ldp	x28, x29, [sp, #224]
> +
> +	ldr	x9, [sp, #S_PC]
> +	ldr	lr, [sp, #S_LR]
> +	add	sp, sp, #S_FRAME_SIZE+16
> +
> +	ret	x9
> +
> +ENDPROC(ftrace_caller)
> +
> +#endif /* CC_USING_PATCHABLE_FUNCTION_ENTRY */
>   #endif /* CONFIG_DYNAMIC_FTRACE */
>   
>   ENTRY(ftrace_stub)
> @@ -197,12 +284,20 @@ ENDPROC(ftrace_stub)
>    * and run return_to_handler() later on its exit.
>    */
>   ENTRY(ftrace_graph_caller)
> +#ifndef CC_USING_PATCHABLE_FUNCTION_ENTRY
>   	mcount_get_lr_addr	  x0	//     pointer to function's saved lr
>   	mcount_get_pc		  x1	//     function's pc
>   	mcount_get_parent_fp	  x2	//     parent's fp
>   	bl	prepare_ftrace_return	// prepare_ftrace_return(&lr, pc, fp)
>   
>   	mcount_exit
> +#else
> +	add	x0, sp, #S_LR	/* address of (LR pointing into caller) */
> +	ldr	x1, [sp, #S_PC]
> +	ldr	x2, [sp, #S_X28 + 8]	/* caller's frame pointer */
> +	bl	prepare_ftrace_return
> +	b	ftrace_common_return
> +#endif
>   ENDPROC(ftrace_graph_caller)
>   
>   /*
> --- a/arch/arm64/kernel/ftrace.c
> +++ b/arch/arm64/kernel/ftrace.c
> @@ -70,7 +70,8 @@ int ftrace_update_ftrace_func(ftrace_fun
>    */
>   int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
>   {
> -	unsigned long pc = rec->ip;
> +	unsigned long pc = rec->ip+REC_IP_BRANCH_OFFSET;
> +	int ret;
>   	u32 old, new;
>   	long offset = (long)pc - (long)addr;
>   
> @@ -128,22 +129,54 @@ int ftrace_make_call(struct dyn_ftrace *
>   	}
>   
>   	old = aarch64_insn_gen_nop();
> +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
> +#define INSN_MOV_X9_X30 aarch64_insn_gen_add_sub_imm(AARCH64_INSN_REG_9, \
> +		AARCH64_INSN_REG_30, 0, \
> +		AARCH64_INSN_VARIANT_64BIT, AARCH64_INSN_ADSB_ADD)

It might just be me, but since this macro is used in two different 
functions I'm not overly fond of defining in the middle of the function 
code.

Thanks,

> +	new = INSN_MOV_X9_X30;
> +	ret = ftrace_modify_code(pc-REC_IP_BRANCH_OFFSET, old, new, true);
> +	if (ret)
> +		return ret;
> +	smp_wmb();	/* ensure LR saver is in place before ftrace call */
> +#endif
>   	new = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK);
>   
>   	return ftrace_modify_code(pc, old, new, true);
>   }
>   
> +int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
> +			unsigned long addr)
> +{
> +	unsigned long pc = rec->ip+REC_IP_BRANCH_OFFSET;
> +	u32 old, new;
> +
> +	old = aarch64_insn_gen_branch_imm(pc, old_addr, true);
> +	new = aarch64_insn_gen_branch_imm(pc, addr, true);
> +
> +	return ftrace_modify_code(pc, old, new, true);
> +}
> +
> +
>   /*
>    * Turn off the call to ftrace_caller() in instrumented function
>    */
>   int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
>   		    unsigned long addr)
>   {
> -	unsigned long pc = rec->ip;
> +	unsigned long pc = rec->ip+REC_IP_BRANCH_OFFSET;
>   	bool validate = true;
> +	int ret;
>   	u32 old = 0, new;
>   	long offset = (long)pc - (long)addr;
>   
> +#ifdef CC_USING_PATCHABLE_FUNCTION_ENTRY
> +	/* -fpatchable-function-entry= does not generate a profiling call
> +	 *  initially; the NOPs are already there.
> +	 */
> +	if (addr == MCOUNT_ADDR)
> +		return 0;
> +#endif
> +
>   	if (offset < -SZ_128M || offset >= SZ_128M) {
>   #ifdef CONFIG_ARM64_MODULE_PLTS
>   		u32 replaced;
> @@ -188,7 +221,15 @@ int ftrace_make_nop(struct module *mod,
>   
>   	new = aarch64_insn_gen_nop();
>   
> -	return ftrace_modify_code(pc, old, new, validate);
> +	ret = ftrace_modify_code(pc, old, new, validate);
> +	if (ret)
> +		return ret;
> +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
> +	smp_wmb();	/* ftrace call must not remain without LR saver. */
> +	old = INSN_MOV_X9_X30;
> +	ret = ftrace_modify_code(pc-REC_IP_BRANCH_OFFSET, old, new, true);
> +#endif
> +	return ret;
>   }
>   
>   void arch_ftrace_update_code(int command)
> --- a/include/asm-generic/vmlinux.lds.h
> +++ b/include/asm-generic/vmlinux.lds.h
> @@ -115,6 +115,7 @@
>   #define MCOUNT_REC()	. = ALIGN(8);				\
>   			__start_mcount_loc = .;			\
>   			KEEP(*(__mcount_loc))			\
> +			KEEP(*(__patchable_function_entries))	\
>   			__stop_mcount_loc = .;
>   #else
>   #define MCOUNT_REC()
> --- a/include/linux/compiler_types.h
> +++ b/include/linux/compiler_types.h
> @@ -61,8 +61,12 @@ extern void __chk_io_ptr(const volatile
>   #if defined(CC_USING_HOTPATCH) && !defined(__CHECKER__)
>   #define notrace __attribute__((hotpatch(0,0)))
>   #else
> +#ifdef CC_USING_PATCHABLE_FUNCTION_ENTRY
> +#define notrace __attribute__((patchable_function_entry(0)))
> +#else
>   #define notrace __attribute__((no_instrument_function))
>   #endif
> +#endif
>   
>   /* Intel compiler defines __GNUC__. So we will overwrite implementations
>    * coming from above header files here
> 

-- 
Julien Thierry

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 1/3] arm64: implement ftrace with regs
  2018-08-17 10:27 ` [PATCH v2 1/3] arm64: implement ftrace with regs Torsten Duwe
  2018-08-17 17:20   ` Julien Thierry
@ 2018-08-20  6:55   ` Ard Biesheuvel
  2018-08-29  7:11   ` AKASHI Takahiro
  2 siblings, 0 replies; 8+ messages in thread
From: Ard Biesheuvel @ 2018-08-20  6:55 UTC (permalink / raw)
  To: Torsten Duwe
  Cc: Will Deacon, Catalin Marinas, Julien Thierry, Steven Rostedt,
	Josh Poimboeuf, Ingo Molnar, Arnd Bergmann, AKASHI Takahiro,
	linux-arm-kernel, Linux Kernel Mailing List, live-patching

On 17 August 2018 at 12:27, Torsten Duwe <duwe@lst.de> wrote:
> Check for compiler support of -fpatchable-function-entry and use it
> to intercept functions immediately on entry, saving the LR in x9.

Could you please add a note that this is safe because IPA register
allocation is turned off as well by that option? I had to go and find
the GCC patches to confirm that this is the case.

> Disable ftracing in efi/libstub, because this triggers cross-section
> linker errors now (-pg is disabled already for those files).
> Add an ftrace_caller which can handle LR in x9, as well as an
> ftrace_regs_caller that additionally writes out a set of pt_regs
> for inspection.
>
> Signed-off-by: Torsten Duwe <duwe@suse.de>
>
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -110,6 +110,7 @@ config ARM64
>         select HAVE_DEBUG_KMEMLEAK
>         select HAVE_DMA_CONTIGUOUS
>         select HAVE_DYNAMIC_FTRACE
> +       select HAVE_DYNAMIC_FTRACE_WITH_REGS
>         select HAVE_EFFICIENT_UNALIGNED_ACCESS
>         select HAVE_FTRACE_MCOUNT_RECORD
>         select HAVE_FUNCTION_TRACER
> --- a/arch/arm64/Makefile
> +++ b/arch/arm64/Makefile
> @@ -78,6 +78,15 @@ ifeq ($(CONFIG_ARM64_MODULE_PLTS),y)
>  KBUILD_LDFLAGS_MODULE  += -T $(srctree)/arch/arm64/kernel/module.lds
>  endif
>
> +ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
> +  CC_FLAGS_FTRACE := -fpatchable-function-entry=2
> +  KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
> +  ifeq ($(call cc-option,-fpatchable-function-entry=2),)
> +    $(error Cannot use CONFIG_DYNAMIC_FTRACE_WITH_REGS: \
> +             -fpatchable-function-entry not supported by compiler)
> +  endif
> +endif
> +
>  # Default value
>  head-y         := arch/arm64/kernel/head.o
>
> --- a/arch/arm64/include/asm/ftrace.h
> +++ b/arch/arm64/include/asm/ftrace.h
> @@ -16,6 +16,13 @@
>  #define MCOUNT_ADDR            ((unsigned long)_mcount)
>  #define MCOUNT_INSN_SIZE       AARCH64_INSN_SIZE
>
> +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
> +#define ARCH_SUPPORTS_FTRACE_OPS 1
> +#define REC_IP_BRANCH_OFFSET 4
> +#else
> +#define REC_IP_BRANCH_OFFSET 0
> +#endif
> +
>  #ifndef __ASSEMBLY__
>  #include <linux/compat.h>
>
> --- a/arch/arm64/kernel/Makefile
> +++ b/arch/arm64/kernel/Makefile
> @@ -7,9 +7,9 @@ CPPFLAGS_vmlinux.lds    := -DTEXT_OFFSET=$(
>  AFLAGS_head.o          := -DTEXT_OFFSET=$(TEXT_OFFSET)
>  CFLAGS_armv8_deprecated.o := -I$(src)
>
> -CFLAGS_REMOVE_ftrace.o = -pg
> -CFLAGS_REMOVE_insn.o = -pg
> -CFLAGS_REMOVE_return_address.o = -pg
> +CFLAGS_REMOVE_ftrace.o = -pg $(CC_FLAGS_FTRACE)
> +CFLAGS_REMOVE_insn.o = -pg $(CC_FLAGS_FTRACE)
> +CFLAGS_REMOVE_return_address.o = -pg $(CC_FLAGS_FTRACE)
>
>  # Object file lists.
>  arm64-obj-y            := debug-monitors.o entry.o irq.o fpsimd.o              \
> --- a/drivers/firmware/efi/libstub/Makefile
> +++ b/drivers/firmware/efi/libstub/Makefile
> @@ -11,7 +11,8 @@ cflags-$(CONFIG_X86)          += -m$(BITS) -D__K
>                                    -fPIC -fno-strict-aliasing -mno-red-zone \
>                                    -mno-mmx -mno-sse -fshort-wchar
>
> -cflags-$(CONFIG_ARM64)         := $(subst -pg,,$(KBUILD_CFLAGS)) -fpie
> +cflags-$(CONFIG_ARM64)         := $(filter-out -pg $(CC_FLAGS_FTRACE)\
> +                                 ,$(KBUILD_CFLAGS)) -fpie
>  cflags-$(CONFIG_ARM)           := $(subst -pg,,$(KBUILD_CFLAGS)) \
>                                    -fno-builtin -fpic -mno-single-pic-base
>
> --- a/arch/arm64/kernel/entry-ftrace.S
> +++ b/arch/arm64/kernel/entry-ftrace.S
> @@ -13,6 +13,8 @@
>  #include <asm/assembler.h>
>  #include <asm/ftrace.h>
>  #include <asm/insn.h>
> +#include <asm/asm-offsets.h>
> +#include <asm/assembler.h>
>
>  /*
>   * Gcc with -pg will put the following code in the beginning of each function:
> @@ -123,6 +125,7 @@ skip_ftrace_call:                   // }
>  ENDPROC(_mcount)
>
>  #else /* CONFIG_DYNAMIC_FTRACE */
> +#ifndef CC_USING_PATCHABLE_FUNCTION_ENTRY
>  /*
>   * _mcount() is used to build the kernel with -pg option, but all the branch
>   * instructions to _mcount() are replaced to NOP initially at kernel start up,
> @@ -162,6 +165,88 @@ ftrace_graph_call:                 // ftrace_graph_cal
>
>         mcount_exit
>  ENDPROC(ftrace_caller)
> +#else /* CC_USING_PATCHABLE_FUNCTION_ENTRY */
> +ENTRY(_mcount)
> +       mov     x10, lr
> +       mov     lr, x9
> +       ret     x10
> +ENDPROC(_mcount)
> +
> +ENTRY(ftrace_regs_caller)
> +       stp     x29, x9, [sp, #-16]!
> +       sub     sp, sp, #S_FRAME_SIZE
> +
> +       stp     x10, x11, [sp, #80]
> +       stp     x12, x13, [sp, #96]
> +       stp     x14, x15, [sp, #112]
> +       stp     x16, x17, [sp, #128]
> +       stp     x18, x19, [sp, #144]
> +       stp     x20, x21, [sp, #160]
> +       stp     x22, x23, [sp, #176]
> +       stp     x24, x25, [sp, #192]
> +       stp     x26, x27, [sp, #208]
> +

Any reason in particular this stack frame is the wrong way around? We
usually keep the frame pointer at the top.

> +       b       ftrace_common
> +ENDPROC(ftrace_regs_caller)
> +
> +ENTRY(ftrace_caller)
> +       stp     x29, x9, [sp, #-16]!
> +       sub     sp, sp, #S_FRAME_SIZE
> +
> +ftrace_common:
> +       stp     x28, x29, [sp, #224]
> +
> +       /* save function arguments */
> +       stp     x0, x1, [sp]
> +       stp     x2, x3, [sp, #16]
> +       stp     x4, x5, [sp, #32]
> +       stp     x6, x7, [sp, #48]
> +       stp     x8, x9, [sp, #64]
> +
> +       /* The link Register at callee entry */
> +       str     x9, [sp, #S_LR]
> +       /* The program counter just after the ftrace call site */
> +       str     lr, [sp, #S_PC]
> +       /* The stack pointer as it was on ftrace_caller entry... */
> +       add     x29, sp, #S_FRAME_SIZE+16       /* ...is also our new FP */
> +       str     x29, [sp, #S_SP]
> +
> +       adrp    x0, function_trace_op
> +       ldr     x2, [x0, #:lo12:function_trace_op]

Please use the ldr_l macro here

> +       mov     x1, x9          /* saved LR == parent IP */
> +       sub     x0, lr, #8      /* function entry == IP */
> +       mov     x3, sp          /* pt_regs are @sp */
> +
> +       .global ftrace_call
> +ftrace_call:
> +
> +       bl      ftrace_stub
> +
> +#ifdef CONFIG_FUNCTION_GRAPH_TRACER
> +       .global ftrace_graph_call
> +ftrace_graph_call:                     // ftrace_graph_caller();
> +       nop                             // If enabled, this will be replaced
> +                                       // "b ftrace_graph_caller"
> +#endif
> +
> +ftrace_common_return:
> +       ldp     x0, x1, [sp]
> +       ldp     x2, x3, [sp, #16]
> +       ldp     x4, x5, [sp, #32]
> +       ldp     x6, x7, [sp, #48]
> +       ldp     x8, x9, [sp, #64]
> +
> +       ldp     x28, x29, [sp, #224]
> +
> +       ldr     x9, [sp, #S_PC]
> +       ldr     lr, [sp, #S_LR]
> +       add     sp, sp, #S_FRAME_SIZE+16
> +
> +       ret     x9
> +
> +ENDPROC(ftrace_caller)
> +
> +#endif /* CC_USING_PATCHABLE_FUNCTION_ENTRY */
>  #endif /* CONFIG_DYNAMIC_FTRACE */
>
>  ENTRY(ftrace_stub)
> @@ -197,12 +284,20 @@ ENDPROC(ftrace_stub)
>   * and run return_to_handler() later on its exit.
>   */
>  ENTRY(ftrace_graph_caller)
> +#ifndef CC_USING_PATCHABLE_FUNCTION_ENTRY
>         mcount_get_lr_addr        x0    //     pointer to function's saved lr
>         mcount_get_pc             x1    //     function's pc
>         mcount_get_parent_fp      x2    //     parent's fp
>         bl      prepare_ftrace_return   // prepare_ftrace_return(&lr, pc, fp)
>
>         mcount_exit
> +#else
> +       add     x0, sp, #S_LR   /* address of (LR pointing into caller) */
> +       ldr     x1, [sp, #S_PC]
> +       ldr     x2, [sp, #S_X28 + 8]    /* caller's frame pointer */
> +       bl      prepare_ftrace_return
> +       b       ftrace_common_return
> +#endif
>  ENDPROC(ftrace_graph_caller)
>
>  /*
> --- a/arch/arm64/kernel/ftrace.c
> +++ b/arch/arm64/kernel/ftrace.c
> @@ -70,7 +70,8 @@ int ftrace_update_ftrace_func(ftrace_fun
>   */
>  int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
>  {
> -       unsigned long pc = rec->ip;
> +       unsigned long pc = rec->ip+REC_IP_BRANCH_OFFSET;

We usually put spaces around arithmetic operators.

> +       int ret;
>         u32 old, new;
>         long offset = (long)pc - (long)addr;
>
> @@ -128,22 +129,54 @@ int ftrace_make_call(struct dyn_ftrace *
>         }
>
>         old = aarch64_insn_gen_nop();
> +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS

Please use IS_ENABLED() so that the code below is visible to the
compiler regardless of whether it is actually enabled.

> +#define INSN_MOV_X9_X30 aarch64_insn_gen_add_sub_imm(AARCH64_INSN_REG_9, \
> +               AARCH64_INSN_REG_30, 0, \
> +               AARCH64_INSN_VARIANT_64BIT, AARCH64_INSN_ADSB_ADD)

Please don't define the macro inline like that, and in fact, I think
you can drop it altogether and add a comment instead to clarify that
add x9, x30, #0 == mov x9, x30

> +       new = INSN_MOV_X9_X30;
> +       ret = ftrace_modify_code(pc-REC_IP_BRANCH_OFFSET, old, new, true);
> +       if (ret)
> +               return ret;
> +       smp_wmb();      /* ensure LR saver is in place before ftrace call */
> +#endif
>         new = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK);
>
>         return ftrace_modify_code(pc, old, new, true);
>  }
>
> +int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
> +                       unsigned long addr)
> +{
> +       unsigned long pc = rec->ip+REC_IP_BRANCH_OFFSET;
> +       u32 old, new;
> +
> +       old = aarch64_insn_gen_branch_imm(pc, old_addr, true);
> +       new = aarch64_insn_gen_branch_imm(pc, addr, true);
> +
> +       return ftrace_modify_code(pc, old, new, true);
> +}
> +
> +
>  /*
>   * Turn off the call to ftrace_caller() in instrumented function
>   */
>  int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
>                     unsigned long addr)
>  {
> -       unsigned long pc = rec->ip;
> +       unsigned long pc = rec->ip+REC_IP_BRANCH_OFFSET;
>         bool validate = true;
> +       int ret;
>         u32 old = 0, new;
>         long offset = (long)pc - (long)addr;
>
> +#ifdef CC_USING_PATCHABLE_FUNCTION_ENTRY

Use IS_ENABLED()

> +       /* -fpatchable-function-entry= does not generate a profiling call
> +        *  initially; the NOPs are already there.
> +        */
> +       if (addr == MCOUNT_ADDR)
> +               return 0;
> +#endif
> +
>         if (offset < -SZ_128M || offset >= SZ_128M) {
>  #ifdef CONFIG_ARM64_MODULE_PLTS

No changes needed at all for modules that are more than 128 MB away
from the core kernel? That seems odd to me, given that we currently
have a single PLT entry per module, and so you cannot mix ordinary
ftrace and ftrace with regs in a single module. Or is that not a
problem?

>                 u32 replaced;
> @@ -188,7 +221,15 @@ int ftrace_make_nop(struct module *mod,
>
>         new = aarch64_insn_gen_nop();
>
> -       return ftrace_modify_code(pc, old, new, validate);
> +       ret = ftrace_modify_code(pc, old, new, validate);
> +       if (ret)
> +               return ret;
> +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS

Use IS_ENABLED()

> +       smp_wmb();      /* ftrace call must not remain without LR saver. */
> +       old = INSN_MOV_X9_X30;

Uhm, ok, you're using this multiple times. You can still keep the
comment and use the add/sub version directly, or introduce a mov
variant in a separate patch.

> +       ret = ftrace_modify_code(pc-REC_IP_BRANCH_OFFSET, old, new, true);
> +#endif
> +       return ret;
>  }
>
>  void arch_ftrace_update_code(int command)
> --- a/include/asm-generic/vmlinux.lds.h
> +++ b/include/asm-generic/vmlinux.lds.h
> @@ -115,6 +115,7 @@
>  #define MCOUNT_REC()   . = ALIGN(8);                           \
>                         __start_mcount_loc = .;                 \
>                         KEEP(*(__mcount_loc))                   \
> +                       KEEP(*(__patchable_function_entries))   \
>                         __stop_mcount_loc = .;
>  #else
>  #define MCOUNT_REC()
> --- a/include/linux/compiler_types.h
> +++ b/include/linux/compiler_types.h
> @@ -61,8 +61,12 @@ extern void __chk_io_ptr(const volatile
>  #if defined(CC_USING_HOTPATCH) && !defined(__CHECKER__)
>  #define notrace __attribute__((hotpatch(0,0)))
>  #else
> +#ifdef CC_USING_PATCHABLE_FUNCTION_ENTRY
> +#define notrace __attribute__((patchable_function_entry(0)))
> +#else
>  #define notrace __attribute__((no_instrument_function))
>  #endif
> +#endif
>
>  /* Intel compiler defines __GNUC__. So we will overwrite implementations
>   * coming from above header files here

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 3/3] arm64: reliable stacktraces
  2018-08-17 10:27 ` [PATCH v2 3/3] arm64: reliable stacktraces Torsten Duwe
@ 2018-08-20  8:28   ` Julien Thierry
  0 siblings, 0 replies; 8+ messages in thread
From: Julien Thierry @ 2018-08-20  8:28 UTC (permalink / raw)
  To: Torsten Duwe, Will Deacon, Catalin Marinas, Steven Rostedt,
	Josh Poimboeuf, Ingo Molnar, Ard Biesheuvel, Arnd Bergmann,
	AKASHI Takahiro
  Cc: linux-arm-kernel, linux-kernel, live-patching

Hi Torsten,

On 17/08/18 11:27, Torsten Duwe wrote:
> This is more an RFC in the original sense: is this basically
> the correct approach? (as I had to tweak the save_stack API a bit).
> 
> In particular the code does not detect interrupts and exception
> frames, and does not yet check whether the code address is valid.
> The latter check would also have to be omitted for the latest frame
> on other tasks' stacks. This would require some more tweaking.
> 
> unwind_frame() now reports whether we had to stop normally or due to
> an error condition; walk_stackframe() will pass that info.
> __save_stack_trace() is used for a start to check the validity of a
> frame; maybe save_stack_trace_tsk_reliable() will need its own callback.
> 
> The question is whether this change, once complete, is sufficient
> (as on powerpc) or whether a port of objtool is needed, like x86.
> 

I'm not sure I understand the involvement of objtool in this.
IIUC objtool just provides a way to check that the code manages stack 
frames consistently. It gives you more confidence in how the code 
manages the frame.

If the semantics of HAVE_RELIABLE_STACKTRACE is "are you able to tell 
whether that stacktrace is good or corrupted?", the code looks mostly 
fine to me.

> I can dig into this myself and draw conclusions, but I'd prefer
> to have some input from ARM people here...
> 
> Signed-off-by: Torsten Duwe <duwe@suse.de>
> 
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -127,8 +127,9 @@ config ARM64
>   	select HAVE_PERF_EVENTS
>   	select HAVE_PERF_REGS
>   	select HAVE_PERF_USER_STACK_DUMP
> -	select HAVE_REGS_AND_STACK_ACCESS_API
>   	select HAVE_RCU_TABLE_FREE
> +	select HAVE_REGS_AND_STACK_ACCESS_API
> +	select HAVE_RELIABLE_STACKTRACE
>   	select HAVE_STACKPROTECTOR
>   	select HAVE_SYSCALL_TRACEPOINTS
>   	select HAVE_KPROBES
> --- a/arch/arm64/include/asm/stacktrace.h
> +++ b/arch/arm64/include/asm/stacktrace.h
> @@ -33,7 +33,7 @@ struct stackframe {
>   };
>   
>   extern int unwind_frame(struct task_struct *tsk, struct stackframe *frame);
> -extern void walk_stackframe(struct task_struct *tsk, struct stackframe *frame,
> +extern int walk_stackframe(struct task_struct *tsk, struct stackframe *frame,
>   			    int (*fn)(struct stackframe *, void *), void *data);
>   extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk);
>   
> diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
> index d5718a060672..fe0dd4745ff3 100644
> --- a/arch/arm64/kernel/stacktrace.c
> +++ b/arch/arm64/kernel/stacktrace.c
> @@ -81,23 +81,27 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
>   	 * both are NULL.
>   	 */
>   	if (!frame->fp && !frame->pc)
> -		return -EINVAL;
> +		return 1;

unwind_frame is not static so we should take care of other callers.

I can see one in arch/arm64/kernel/time.c, profile_pc which checks 
"unwind_frame(...) < 0" to exit a loop, but now you have made 1 a valid 
stop condition.

I think the other callers are just checking "unwind_frame(...) != 0" as 
stop condition so those should be fine.

Perhaps adding a little comment on unwind_frame "0 -> Stack trace goes 
on, 1 -> reached end of stack trace, 2 -> stack trace looks" would be nice.

>   
>   	return 0;
>   }
>   
> -void notrace walk_stackframe(struct task_struct *tsk, struct stackframe *frame,
> +int notrace walk_stackframe(struct task_struct *tsk, struct stackframe *frame,
>   		     int (*fn)(struct stackframe *, void *), void *data)
>   {
>   	while (1) {
>   		int ret;
>   
> -		if (fn(frame, data))
> -			break;
> +		ret = fn(frame, data);
> +		if (ret)
> +			return ret;
>   		ret = unwind_frame(tsk, frame);
>   		if (ret < 0)
> +			return ret;
> +		if (ret > 0)
>   			break;
>   	}
> +	return 0;
>   }
>   
>   #ifdef CONFIG_STACKTRACE
> @@ -145,14 +149,15 @@ void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
>   		trace->entries[trace->nr_entries++] = ULONG_MAX;
>   }
>   
> -static noinline void __save_stack_trace(struct task_struct *tsk,
> +static noinline int __save_stack_trace(struct task_struct *tsk,
>   	struct stack_trace *trace, unsigned int nosched)
>   {
>   	struct stack_trace_data data;
>   	struct stackframe frame;
> +	int ret;
>   
>   	if (!try_get_task_stack(tsk))
> -		return;
> +		return -EBUSY;
>   
>   	data.trace = trace;
>   	data.skip = trace->skip;
> @@ -171,11 +176,12 @@ static noinline void __save_stack_trace(struct task_struct *tsk,
>   	frame.graph = tsk->curr_ret_stack;
>   #endif
>   
> -	walk_stackframe(tsk, &frame, save_trace, &data);
> +	ret = walk_stackframe(tsk, &frame, save_trace, &data);
>   	if (trace->nr_entries < trace->max_entries)
>   		trace->entries[trace->nr_entries++] = ULONG_MAX;
>   
>   	put_task_stack(tsk);
> +	return ret;
>   }
>   EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
>   
> @@ -190,4 +196,12 @@ void save_stack_trace(struct stack_trace *trace)
>   }
>   
>   EXPORT_SYMBOL_GPL(save_stack_trace);
> +
> +int save_stack_trace_tsk_reliable(struct task_struct *tsk,
> +				  struct stack_trace *trace)
> +{
> +	return __save_stack_trace(tsk, trace, 1);
> +}
> +EXPORT_SYMBOL_GPL(save_stack_trace_tsk_reliable);
> +
>   #endif
> 

-- 
Julien Thierry

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 1/3] arm64: implement ftrace with regs
  2018-08-17 10:27 ` [PATCH v2 1/3] arm64: implement ftrace with regs Torsten Duwe
  2018-08-17 17:20   ` Julien Thierry
  2018-08-20  6:55   ` Ard Biesheuvel
@ 2018-08-29  7:11   ` AKASHI Takahiro
  2 siblings, 0 replies; 8+ messages in thread
From: AKASHI Takahiro @ 2018-08-29  7:11 UTC (permalink / raw)
  To: Torsten Duwe
  Cc: Will Deacon, Catalin Marinas, Julien Thierry, Steven Rostedt,
	Josh Poimboeuf, Ingo Molnar, Ard Biesheuvel, Arnd Bergmann,
	linux-arm-kernel, linux-kernel, live-patching

On Fri, Aug 17, 2018 at 12:27:24PM +0200, Torsten Duwe wrote:
> Check for compiler support of -fpatchable-function-entry and use it
> to intercept functions immediately on entry, saving the LR in x9.
> Disable ftracing in efi/libstub, because this triggers cross-section
> linker errors now (-pg is disabled already for those files).
> Add an ftrace_caller which can handle LR in x9, as well as an

I think that we have a bit detailed descriptions about what a function's
initial prologue looks like and how it will be patched to enable or
disable ftrace on that function for the sake of better understandings.
(in entry-ftrace.S or ftrace.c?)

> ftrace_regs_caller that additionally writes out a set of pt_regs
> for inspection.
> 
> Signed-off-by: Torsten Duwe <duwe@suse.de>
> 
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -110,6 +110,7 @@ config ARM64
>  	select HAVE_DEBUG_KMEMLEAK
>  	select HAVE_DMA_CONTIGUOUS
>  	select HAVE_DYNAMIC_FTRACE
> +	select HAVE_DYNAMIC_FTRACE_WITH_REGS
>  	select HAVE_EFFICIENT_UNALIGNED_ACCESS
>  	select HAVE_FTRACE_MCOUNT_RECORD
>  	select HAVE_FUNCTION_TRACER
> --- a/arch/arm64/Makefile
> +++ b/arch/arm64/Makefile
> @@ -78,6 +78,15 @@ ifeq ($(CONFIG_ARM64_MODULE_PLTS),y)
>  KBUILD_LDFLAGS_MODULE	+= -T $(srctree)/arch/arm64/kernel/module.lds
>  endif
>  
> +ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
> +  CC_FLAGS_FTRACE := -fpatchable-function-entry=2
> +  KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
> +  ifeq ($(call cc-option,-fpatchable-function-entry=2),)
> +    $(error Cannot use CONFIG_DYNAMIC_FTRACE_WITH_REGS: \
> +             -fpatchable-function-entry not supported by compiler)
> +  endif
> +endif
> +
>  # Default value
>  head-y		:= arch/arm64/kernel/head.o
>  
> --- a/arch/arm64/include/asm/ftrace.h
> +++ b/arch/arm64/include/asm/ftrace.h
> @@ -16,6 +16,13 @@
>  #define MCOUNT_ADDR		((unsigned long)_mcount)
>  #define MCOUNT_INSN_SIZE	AARCH64_INSN_SIZE
>  
> +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
> +#define ARCH_SUPPORTS_FTRACE_OPS 1
> +#define REC_IP_BRANCH_OFFSET 4

Some explanation about "4" will be helpful here.

> +#else
> +#define REC_IP_BRANCH_OFFSET 0
> +#endif
> +
>  #ifndef __ASSEMBLY__
>  #include <linux/compat.h>
>  
> --- a/arch/arm64/kernel/Makefile
> +++ b/arch/arm64/kernel/Makefile
> @@ -7,9 +7,9 @@ CPPFLAGS_vmlinux.lds	:= -DTEXT_OFFSET=$(
>  AFLAGS_head.o		:= -DTEXT_OFFSET=$(TEXT_OFFSET)
>  CFLAGS_armv8_deprecated.o := -I$(src)
>  
> -CFLAGS_REMOVE_ftrace.o = -pg
> -CFLAGS_REMOVE_insn.o = -pg
> -CFLAGS_REMOVE_return_address.o = -pg
> +CFLAGS_REMOVE_ftrace.o = -pg $(CC_FLAGS_FTRACE)
> +CFLAGS_REMOVE_insn.o = -pg $(CC_FLAGS_FTRACE)
> +CFLAGS_REMOVE_return_address.o = -pg $(CC_FLAGS_FTRACE)

You don't need to remove "-pg" explicitly here because it won't be added to
XX_CFLAGS when you defines CC_FLAGS_FTRACE.

>  # Object file lists.
>  arm64-obj-y		:= debug-monitors.o entry.o irq.o fpsimd.o		\
> --- a/drivers/firmware/efi/libstub/Makefile
> +++ b/drivers/firmware/efi/libstub/Makefile
> @@ -11,7 +11,8 @@ cflags-$(CONFIG_X86)		+= -m$(BITS) -D__K
>  				   -fPIC -fno-strict-aliasing -mno-red-zone \
>  				   -mno-mmx -mno-sse -fshort-wchar
>  
> -cflags-$(CONFIG_ARM64)		:= $(subst -pg,,$(KBUILD_CFLAGS)) -fpie
> +cflags-$(CONFIG_ARM64)		:= $(filter-out -pg $(CC_FLAGS_FTRACE)\
> +				  ,$(KBUILD_CFLAGS)) -fpie
>  cflags-$(CONFIG_ARM)		:= $(subst -pg,,$(KBUILD_CFLAGS)) \
>  				   -fno-builtin -fpic -mno-single-pic-base
>  
> --- a/arch/arm64/kernel/entry-ftrace.S
> +++ b/arch/arm64/kernel/entry-ftrace.S
> @@ -13,6 +13,8 @@
>  #include <asm/assembler.h>
>  #include <asm/ftrace.h>
>  #include <asm/insn.h>
> +#include <asm/asm-offsets.h>
> +#include <asm/assembler.h>
>  
>  /*
>   * Gcc with -pg will put the following code in the beginning of each function:
> @@ -123,6 +125,7 @@ skip_ftrace_call:			// }
>  ENDPROC(_mcount)
>  
>  #else /* CONFIG_DYNAMIC_FTRACE */
> +#ifndef CC_USING_PATCHABLE_FUNCTION_ENTRY

I think that using CONFIG_DYNAMIC_FTRACE_WITH_REG here sounds more consistent.

>  /*
>   * _mcount() is used to build the kernel with -pg option, but all the branch
>   * instructions to _mcount() are replaced to NOP initially at kernel start up,
> @@ -162,6 +165,88 @@ ftrace_graph_call:			// ftrace_graph_cal
>  
>  	mcount_exit
>  ENDPROC(ftrace_caller)
> +#else /* CC_USING_PATCHABLE_FUNCTION_ENTRY */
> +ENTRY(_mcount)
> +	mov     x10, lr
> +	mov     lr, x9
> +	ret     x10
> +ENDPROC(_mcount)

I don't think we need a definition of _mcount because patchable-function-entry
won't generate a call site of _mcount nor other code does.
You can simply define MCOUNT_ADDR as ULONG_MAX.

> +ENTRY(ftrace_regs_caller)
> +	stp	x29, x9, [sp, #-16]!
> +	sub	sp, sp, #S_FRAME_SIZE
> +
> +	stp	x10, x11, [sp, #80]
> +	stp	x12, x13, [sp, #96]
> +	stp	x14, x15, [sp, #112]
> +	stp	x16, x17, [sp, #128]
> +	stp	x18, x19, [sp, #144]
> +	stp	x20, x21, [sp, #160]
> +	stp	x22, x23, [sp, #176]
> +	stp	x24, x25, [sp, #192]
> +	stp	x26, x27, [sp, #208]
> +
> +	b	ftrace_common
> +ENDPROC(ftrace_regs_caller)
> +
> +ENTRY(ftrace_caller)
> +	stp	x29, x9, [sp, #-16]!
> +	sub	sp, sp, #S_FRAME_SIZE
> +
> +ftrace_common:
> +	stp	x28, x29, [sp, #224]
> +
> +	/* save function arguments */
> +	stp	x0, x1, [sp]
> +	stp	x2, x3, [sp, #16]
> +	stp	x4, x5, [sp, #32]
> +	stp	x6, x7, [sp, #48]
> +	stp	x8, x9, [sp, #64]
> +
> +	/* The link Register at callee entry */
> +	str	x9, [sp, #S_LR]
> +	/* The program counter just after the ftrace call site */
> +	str	lr, [sp, #S_PC]
> +	/* The stack pointer as it was on ftrace_caller entry... */
> +	add	x29, sp, #S_FRAME_SIZE+16	/* ...is also our new FP */
> +	str	x29, [sp, #S_SP]
> +
> +	adrp    x0, function_trace_op
> +	ldr     x2, [x0, #:lo12:function_trace_op]
> +	mov	x1, x9		/* saved LR == parent IP */
> +	sub	x0, lr, #8	/* function entry == IP */
> +	mov	x3, sp		/* pt_regs are @sp */
> +
> +	.global ftrace_call
> +ftrace_call:
> +
> +	bl	ftrace_stub
> +
> +#ifdef CONFIG_FUNCTION_GRAPH_TRACER
> +	.global ftrace_graph_call
> +ftrace_graph_call:			// ftrace_graph_caller();
> +	nop				// If enabled, this will be replaced
> +					// "b ftrace_graph_caller"
> +#endif
> +
> +ftrace_common_return:
> +	ldp	x0, x1, [sp]
> +	ldp	x2, x3, [sp, #16]
> +	ldp	x4, x5, [sp, #32]
> +	ldp	x6, x7, [sp, #48]
> +	ldp	x8, x9, [sp, #64]
> +
> +	ldp	x28, x29, [sp, #224]
> +
> +	ldr	x9, [sp, #S_PC]
> +	ldr	lr, [sp, #S_LR]
> +	add	sp, sp, #S_FRAME_SIZE+16
> +
> +	ret	x9
> +
> +ENDPROC(ftrace_caller)
> +
> +#endif /* CC_USING_PATCHABLE_FUNCTION_ENTRY */
>  #endif /* CONFIG_DYNAMIC_FTRACE */
>  
>  ENTRY(ftrace_stub)
> @@ -197,12 +284,20 @@ ENDPROC(ftrace_stub)
>   * and run return_to_handler() later on its exit.
>   */
>  ENTRY(ftrace_graph_caller)
> +#ifndef CC_USING_PATCHABLE_FUNCTION_ENTRY

ditto.

>  	mcount_get_lr_addr	  x0	//     pointer to function's saved lr
>  	mcount_get_pc		  x1	//     function's pc
>  	mcount_get_parent_fp	  x2	//     parent's fp
>  	bl	prepare_ftrace_return	// prepare_ftrace_return(&lr, pc, fp)
>  
>  	mcount_exit
> +#else
> +	add	x0, sp, #S_LR	/* address of (LR pointing into caller) */
> +	ldr	x1, [sp, #S_PC]
> +	ldr	x2, [sp, #S_X28 + 8]	/* caller's frame pointer */
> +	bl	prepare_ftrace_return
> +	b	ftrace_common_return
> +#endif
>  ENDPROC(ftrace_graph_caller)
>  
>  /*
> --- a/arch/arm64/kernel/ftrace.c
> +++ b/arch/arm64/kernel/ftrace.c
> @@ -70,7 +70,8 @@ int ftrace_update_ftrace_func(ftrace_fun
>   */
>  int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
>  {
> -	unsigned long pc = rec->ip;
> +	unsigned long pc = rec->ip+REC_IP_BRANCH_OFFSET;
> +	int ret;
>  	u32 old, new;
>  	long offset = (long)pc - (long)addr;
>  
> @@ -128,22 +129,54 @@ int ftrace_make_call(struct dyn_ftrace *
>  	}
>  
>  	old = aarch64_insn_gen_nop();
> +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
> +#define INSN_MOV_X9_X30 aarch64_insn_gen_add_sub_imm(AARCH64_INSN_REG_9, \
> +		AARCH64_INSN_REG_30, 0, \
> +		AARCH64_INSN_VARIANT_64BIT, AARCH64_INSN_ADSB_ADD)
> +	new = INSN_MOV_X9_X30;
> +	ret = ftrace_modify_code(pc-REC_IP_BRANCH_OFFSET, old, new, true);
> +	if (ret)
> +		return ret;
> +	smp_wmb();	/* ensure LR saver is in place before ftrace call */
> +#endif
>  	new = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK);
>  
>  	return ftrace_modify_code(pc, old, new, true);
>  }
>  
> +int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
> +			unsigned long addr)
> +{
> +	unsigned long pc = rec->ip+REC_IP_BRANCH_OFFSET;
> +	u32 old, new;
> +
> +	old = aarch64_insn_gen_branch_imm(pc, old_addr, true);
> +	new = aarch64_insn_gen_branch_imm(pc, addr, true);
> +
> +	return ftrace_modify_code(pc, old, new, true);
> +}
> +
> +
>  /*
>   * Turn off the call to ftrace_caller() in instrumented function
>   */
>  int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
>  		    unsigned long addr)
>  {
> -	unsigned long pc = rec->ip;
> +	unsigned long pc = rec->ip+REC_IP_BRANCH_OFFSET;
>  	bool validate = true;
> +	int ret;
>  	u32 old = 0, new;
>  	long offset = (long)pc - (long)addr;
>  
> +#ifdef CC_USING_PATCHABLE_FUNCTION_ENTRY

ditto.

> +	/* -fpatchable-function-entry= does not generate a profiling call
> +	 *  initially; the NOPs are already there.
> +	 */
> +	if (addr == MCOUNT_ADDR)
> +		return 0;
> +#endif
> +
>  	if (offset < -SZ_128M || offset >= SZ_128M) {
>  #ifdef CONFIG_ARM64_MODULE_PLTS
>  		u32 replaced;
> @@ -188,7 +221,15 @@ int ftrace_make_nop(struct module *mod,
>  
>  	new = aarch64_insn_gen_nop();
>  
> -	return ftrace_modify_code(pc, old, new, validate);
> +	ret = ftrace_modify_code(pc, old, new, validate);
> +	if (ret)
> +		return ret;
> +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
> +	smp_wmb();	/* ftrace call must not remain without LR saver. */
> +	old = INSN_MOV_X9_X30;

Given a minimum of performance impact, we don't necessarily have to
remove this instruction.

Thanks,
-Takahiro AKASHI

> +	ret = ftrace_modify_code(pc-REC_IP_BRANCH_OFFSET, old, new, true);
> +#endif
> +	return ret;
>  }
>  
>  void arch_ftrace_update_code(int command)
> --- a/include/asm-generic/vmlinux.lds.h
> +++ b/include/asm-generic/vmlinux.lds.h
> @@ -115,6 +115,7 @@
>  #define MCOUNT_REC()	. = ALIGN(8);				\
>  			__start_mcount_loc = .;			\
>  			KEEP(*(__mcount_loc))			\
> +			KEEP(*(__patchable_function_entries))	\
>  			__stop_mcount_loc = .;
>  #else
>  #define MCOUNT_REC()
> --- a/include/linux/compiler_types.h
> +++ b/include/linux/compiler_types.h
> @@ -61,8 +61,12 @@ extern void __chk_io_ptr(const volatile
>  #if defined(CC_USING_HOTPATCH) && !defined(__CHECKER__)
>  #define notrace __attribute__((hotpatch(0,0)))
>  #else
> +#ifdef CC_USING_PATCHABLE_FUNCTION_ENTRY
> +#define notrace __attribute__((patchable_function_entry(0)))
> +#else
>  #define notrace __attribute__((no_instrument_function))
>  #endif
> +#endif
>  
>  /* Intel compiler defines __GNUC__. So we will overwrite implementations
>   * coming from above header files here

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2018-08-29  7:11 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-08-17 10:25 [PATCH v2 0/3] arm64 live patching Torsten Duwe
2018-08-17 10:27 ` [PATCH v2 1/3] arm64: implement ftrace with regs Torsten Duwe
2018-08-17 17:20   ` Julien Thierry
2018-08-20  6:55   ` Ard Biesheuvel
2018-08-29  7:11   ` AKASHI Takahiro
2018-08-17 10:27 ` [PATCH v2 2/3] arm64: implement live patching Torsten Duwe
2018-08-17 10:27 ` [PATCH v2 3/3] arm64: reliable stacktraces Torsten Duwe
2018-08-20  8:28   ` Julien Thierry

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).