From: Peter Zijlstra <peterz@infradead.org> To: x86@kernel.org Cc: peterz@infradead.org, linux-kernel@vger.kernel.org, rostedt@goodmis.org, mhiramat@kernel.org, bristot@redhat.com, jbaron@akamai.com, torvalds@linux-foundation.org, tglx@linutronix.de, mingo@kernel.org, namit@vmware.com, hpa@zytor.com, luto@kernel.org, ard.biesheuvel@linaro.org, jpoimboe@redhat.com, jeyu@kernel.org Subject: [PATCH v4 01/16] x86/alternatives: Teach text_poke_bp() to emulate instructions Date: Fri, 18 Oct 2019 09:35:26 +0200 [thread overview] Message-ID: <20191018074633.998168044@infradead.org> (raw) In-Reply-To: <20191018073525.768931536@infradead.org> In preparation for static_call and variable size jump_label support, teach text_poke_bp() to emulate instructions, namely: JMP32, JMP8, CALL, NOP2, NOP_ATOMIC5, INT3 The current text_poke_bp() takes a @handler argument which is used as a jump target when the temporary INT3 is hit by a different CPU. When patching CALL instructions, this doesn't work because we'd miss the PUSH of the return address. Instead, teach poke_int3_handler() to emulate an instruction, typically the instruction we're patching in. This fits almost all text_poke_bp() users, except arch_unoptimize_kprobe() which restores random text, and for that site we have to build an explicit emulate instruction. Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Reviewed-by: Masami Hiramatsu <mhiramat@kernel.org> Reviewed-by: Daniel Bristot de Oliveira <bristot@redhat.com> Cc: Steven Rostedt <rostedt@goodmis.org> --- arch/x86/include/asm/text-patching.h | 24 ++++-- arch/x86/kernel/alternative.c | 132 ++++++++++++++++++++++++++--------- arch/x86/kernel/jump_label.c | 9 -- arch/x86/kernel/kprobes/opt.c | 11 ++ 4 files changed, 130 insertions(+), 46 deletions(-) --- a/arch/x86/include/asm/text-patching.h +++ b/arch/x86/include/asm/text-patching.h @@ -26,10 +26,11 @@ static inline void apply_paravirt(struct #define POKE_MAX_OPCODE_SIZE 5 struct text_poke_loc { - void *detour; void *addr; - size_t len; - const char opcode[POKE_MAX_OPCODE_SIZE]; + int len; + s32 rel32; + u8 opcode; + const u8 text[POKE_MAX_OPCODE_SIZE]; }; extern void text_poke_early(void *addr, const void *opcode, size_t len); @@ -51,8 +52,10 @@ extern void text_poke_early(void *addr, extern void *text_poke(void *addr, const void *opcode, size_t len); extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len); extern int poke_int3_handler(struct pt_regs *regs); -extern void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler); +extern void text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate); extern void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries); +extern void text_poke_loc_init(struct text_poke_loc *tp, void *addr, + const void *opcode, size_t len, const void *emulate); extern int after_bootmem; extern __ro_after_init struct mm_struct *poking_mm; extern __ro_after_init unsigned long poking_addr; @@ -63,8 +66,17 @@ static inline void int3_emulate_jmp(stru regs->ip = ip; } -#define INT3_INSN_SIZE 1 -#define CALL_INSN_SIZE 5 +#define INT3_INSN_SIZE 1 +#define INT3_INSN_OPCODE 0xCC + +#define CALL_INSN_SIZE 5 +#define CALL_INSN_OPCODE 0xE8 + +#define JMP32_INSN_SIZE 5 +#define JMP32_INSN_OPCODE 0xE9 + +#define JMP8_INSN_SIZE 2 +#define JMP8_INSN_OPCODE 0xEB static inline void int3_emulate_push(struct pt_regs *regs, unsigned long val) { --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -956,16 +956,15 @@ NOKPROBE_SYMBOL(patch_cmp); int poke_int3_handler(struct pt_regs *regs) { struct text_poke_loc *tp; - unsigned char int3 = 0xcc; void *ip; /* * Having observed our INT3 instruction, we now must observe * bp_patching.nr_entries. * - * nr_entries != 0 INT3 - * WMB RMB - * write INT3 if (nr_entries) + * nr_entries != 0 INT3 + * WMB RMB + * write INT3 if (nr_entries) * * Idem for other elements in bp_patching. */ @@ -978,9 +977,9 @@ int poke_int3_handler(struct pt_regs *re return 0; /* - * Discount the sizeof(int3). See text_poke_bp_batch(). + * Discount the INT3. See text_poke_bp_batch(). */ - ip = (void *) regs->ip - sizeof(int3); + ip = (void *) regs->ip - INT3_INSN_SIZE; /* * Skip the binary search if there is a single member in the vector. @@ -997,8 +996,28 @@ int poke_int3_handler(struct pt_regs *re return 0; } - /* set up the specified breakpoint detour */ - regs->ip = (unsigned long) tp->detour; + ip += tp->len; + + switch (tp->opcode) { + case INT3_INSN_OPCODE: + /* + * Someone poked an explicit INT3, they'll want to handle it, + * do not consume. + */ + return 0; + + case CALL_INSN_OPCODE: + int3_emulate_call(regs, (long)ip + tp->rel32); + break; + + case JMP32_INSN_OPCODE: + case JMP8_INSN_OPCODE: + int3_emulate_jmp(regs, (long)ip + tp->rel32); + break; + + default: + BUG(); + } return 1; } @@ -1014,7 +1033,7 @@ NOKPROBE_SYMBOL(poke_int3_handler); * synchronization using int3 breakpoint. * * The way it is done: - * - For each entry in the vector: + * - For each entry in the vector: * - add a int3 trap to the address that will be patched * - sync cores * - For each entry in the vector: @@ -1027,9 +1046,9 @@ NOKPROBE_SYMBOL(poke_int3_handler); */ void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries) { - int patched_all_but_first = 0; - unsigned char int3 = 0xcc; + unsigned char int3 = INT3_INSN_OPCODE; unsigned int i; + int do_sync; lockdep_assert_held(&text_mutex); @@ -1053,16 +1072,16 @@ void text_poke_bp_batch(struct text_poke /* * Second step: update all but the first byte of the patched range. */ - for (i = 0; i < nr_entries; i++) { + for (do_sync = 0, i = 0; i < nr_entries; i++) { if (tp[i].len - sizeof(int3) > 0) { text_poke((char *)tp[i].addr + sizeof(int3), - (const char *)tp[i].opcode + sizeof(int3), + (const char *)tp[i].text + sizeof(int3), tp[i].len - sizeof(int3)); - patched_all_but_first++; + do_sync++; } } - if (patched_all_but_first) { + if (do_sync) { /* * According to Intel, this core syncing is very likely * not necessary and we'd be safe even without it. But @@ -1075,10 +1094,17 @@ void text_poke_bp_batch(struct text_poke * Third step: replace the first byte (int3) by the first byte of * replacing opcode. */ - for (i = 0; i < nr_entries; i++) - text_poke(tp[i].addr, tp[i].opcode, sizeof(int3)); + for (do_sync = 0, i = 0; i < nr_entries; i++) { + if (tp[i].text[0] == INT3_INSN_OPCODE) + continue; + + text_poke(tp[i].addr, tp[i].text, sizeof(int3)); + do_sync++; + } + + if (do_sync) + on_each_cpu(do_sync_core, NULL, 1); - on_each_cpu(do_sync_core, NULL, 1); /* * sync_core() implies an smp_mb() and orders this store against * the writing of the new instruction. @@ -1087,6 +1113,60 @@ void text_poke_bp_batch(struct text_poke bp_patching.nr_entries = 0; } +void text_poke_loc_init(struct text_poke_loc *tp, void *addr, + const void *opcode, size_t len, const void *emulate) +{ + struct insn insn; + + if (!opcode) + opcode = (void *)tp->text; + else + memcpy((void *)tp->text, opcode, len); + + if (!emulate) + emulate = opcode; + + kernel_insn_init(&insn, emulate, MAX_INSN_SIZE); + insn_get_length(&insn); + + BUG_ON(!insn_complete(&insn)); + BUG_ON(len != insn.length); + + tp->addr = addr; + tp->len = len; + tp->opcode = insn.opcode.bytes[0]; + + switch (tp->opcode) { + case INT3_INSN_OPCODE: + break; + + case CALL_INSN_OPCODE: + case JMP32_INSN_OPCODE: + case JMP8_INSN_OPCODE: + tp->rel32 = insn.immediate.value; + break; + + default: /* assume NOP */ + switch (len) { + case 2: /* NOP2 -- emulate as JMP8+0 */ + BUG_ON(memcmp(emulate, ideal_nops[len], len)); + tp->opcode = JMP8_INSN_OPCODE; + tp->rel32 = 0; + break; + + case 5: /* NOP5 -- emulate as JMP32+0 */ + BUG_ON(memcmp(emulate, ideal_nops[NOP_ATOMIC5], len)); + tp->opcode = JMP32_INSN_OPCODE; + tp->rel32 = 0; + break; + + default: /* unknown instruction */ + BUG(); + } + break; + } +} + /** * text_poke_bp() -- update instructions on live kernel on SMP * @addr: address to patch @@ -1098,20 +1178,10 @@ void text_poke_bp_batch(struct text_poke * dynamically allocated memory. This function should be used when it is * not possible to allocate memory. */ -void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler) +void text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate) { - struct text_poke_loc tp = { - .detour = handler, - .addr = addr, - .len = len, - }; - - if (len > POKE_MAX_OPCODE_SIZE) { - WARN_ONCE(1, "len is larger than %d\n", POKE_MAX_OPCODE_SIZE); - return; - } - - memcpy((void *)tp.opcode, opcode, len); + struct text_poke_loc tp; + text_poke_loc_init(&tp, addr, opcode, len, emulate); text_poke_bp_batch(&tp, 1); } --- a/arch/x86/kernel/jump_label.c +++ b/arch/x86/kernel/jump_label.c @@ -89,8 +89,7 @@ static void __ref __jump_label_transform return; } - text_poke_bp((void *)jump_entry_code(entry), &code, JUMP_LABEL_NOP_SIZE, - (void *)jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE); + text_poke_bp((void *)jump_entry_code(entry), &code, JUMP_LABEL_NOP_SIZE, NULL); } void arch_jump_label_transform(struct jump_entry *entry, @@ -147,11 +146,9 @@ bool arch_jump_label_transform_queue(str } __jump_label_set_jump_code(entry, type, - (union jump_code_union *) &tp->opcode, 0); + (union jump_code_union *)&tp->text, 0); - tp->addr = entry_code; - tp->detour = entry_code + JUMP_LABEL_NOP_SIZE; - tp->len = JUMP_LABEL_NOP_SIZE; + text_poke_loc_init(tp, entry_code, NULL, JUMP_LABEL_NOP_SIZE, NULL); tp_vec_nr++; --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -437,8 +437,7 @@ void arch_optimize_kprobes(struct list_h insn_buff[0] = RELATIVEJUMP_OPCODE; *(s32 *)(&insn_buff[1]) = rel; - text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE, - op->optinsn.insn); + text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE, NULL); list_del_init(&op->list); } @@ -448,12 +447,18 @@ void arch_optimize_kprobes(struct list_h void arch_unoptimize_kprobe(struct optimized_kprobe *op) { u8 insn_buff[RELATIVEJUMP_SIZE]; + u8 emulate_buff[RELATIVEJUMP_SIZE]; /* Set int3 to first byte for kprobes */ insn_buff[0] = BREAKPOINT_INSTRUCTION; memcpy(insn_buff + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); + + emulate_buff[0] = RELATIVEJUMP_OPCODE; + *(s32 *)(&emulate_buff[1]) = (s32)((long)op->optinsn.insn - + ((long)op->kp.addr + RELATIVEJUMP_SIZE)); + text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE, - op->optinsn.insn); + emulate_buff); } /*
next prev parent reply other threads:[~2019-10-18 7:51 UTC|newest] Thread overview: 70+ messages / expand[flat|nested] mbox.gz Atom feed top 2019-10-18 7:35 [PATCH v4 00/16] Rewrite x86/ftrace to use text_poke (and more) Peter Zijlstra 2019-10-18 7:35 ` Peter Zijlstra [this message] 2019-10-18 7:35 ` [PATCH v4 02/16] x86/alternatives: Update int3_emulate_push() comment Peter Zijlstra 2019-10-18 7:35 ` [PATCH v4 03/16] x86/alternatives,jump_label: Provide better text_poke() batching interface Peter Zijlstra 2019-10-21 8:48 ` Ingo Molnar 2019-10-21 9:21 ` Peter Zijlstra 2019-10-18 7:35 ` [PATCH v4 04/16] x86/alternatives: Add and use text_gen_insn() helper Peter Zijlstra 2019-10-18 7:35 ` [PATCH v4 05/16] x86/ftrace: Use text_poke() Peter Zijlstra 2019-10-18 7:35 ` [PATCH v4 06/16] x86/mm: Remove set_kernel_text_r[ow]() Peter Zijlstra 2019-10-18 7:35 ` [PATCH v4 07/16] x86/alternative: Add text_opcode_size() Peter Zijlstra 2019-10-18 7:35 ` [PATCH v4 08/16] x86/ftrace: Use text_gen_insn() Peter Zijlstra 2019-10-18 7:35 ` [PATCH v4 09/16] x86/alternative: Remove text_poke_loc::len Peter Zijlstra 2019-10-21 8:58 ` Ingo Molnar 2019-10-21 9:02 ` Ingo Molnar 2019-10-18 7:35 ` [PATCH v4 10/16] x86/alternative: Shrink text_poke_loc Peter Zijlstra 2019-10-21 9:01 ` Ingo Molnar 2019-10-21 9:25 ` Peter Zijlstra 2019-10-21 9:33 ` Ingo Molnar 2019-10-18 7:35 ` [PATCH v4 11/16] x86/kprobes: Convert to text-patching.h Peter Zijlstra 2019-10-21 14:57 ` Masami Hiramatsu 2019-10-18 7:35 ` [PATCH v4 12/16] x86/kprobes: Fix ordering Peter Zijlstra 2019-10-22 1:35 ` Masami Hiramatsu 2019-10-22 10:31 ` Peter Zijlstra 2019-10-18 7:35 ` [PATCH v4 13/16] arm/ftrace: Use __patch_text_real() Peter Zijlstra 2019-10-28 16:25 ` Will Deacon 2019-10-28 16:34 ` Peter Zijlstra 2019-10-28 16:35 ` Peter Zijlstra 2019-10-28 16:47 ` Will Deacon 2019-10-28 16:55 ` Peter Zijlstra 2019-10-18 7:35 ` [PATCH v4 14/16] module: Remove set_all_modules_text_*() Peter Zijlstra 2019-10-18 7:35 ` [PATCH v4 15/16] module: Move where we mark modules RO,X Peter Zijlstra 2019-10-21 13:53 ` Josh Poimboeuf 2019-10-21 14:14 ` Peter Zijlstra 2019-10-21 15:34 ` Peter Zijlstra 2019-10-21 15:44 ` Peter Zijlstra 2019-10-21 16:11 ` Peter Zijlstra 2019-10-22 11:31 ` Heiko Carstens 2019-10-22 12:31 ` Peter Zijlstra 2019-10-23 11:48 ` Peter Zijlstra 2019-10-23 15:16 ` Peter Zijlstra 2019-10-23 17:15 ` Josh Poimboeuf 2019-10-24 10:59 ` Peter Zijlstra 2019-10-24 18:31 ` Josh Poimboeuf 2019-10-24 20:33 ` Peter Zijlstra 2019-10-23 17:00 ` Josh Poimboeuf 2019-10-24 13:16 ` Peter Zijlstra 2019-10-25 6:44 ` Petr Mladek 2019-10-25 8:43 ` Peter Zijlstra 2019-10-25 10:06 ` Peter Zijlstra 2019-10-25 13:50 ` Josh Poimboeuf 2019-10-26 1:17 ` Josh Poimboeuf 2019-10-28 10:07 ` Peter Zijlstra 2019-10-28 10:43 ` Peter Zijlstra 2019-10-25 9:16 ` Peter Zijlstra 2019-10-22 2:21 ` Steven Rostedt 2019-10-22 20:24 ` Peter Zijlstra 2019-10-22 20:40 ` Steven Rostedt 2019-10-23 9:07 ` Peter Zijlstra 2019-10-23 18:52 ` Steven Rostedt 2019-10-24 10:16 ` Peter Zijlstra 2019-10-24 10:18 ` Peter Zijlstra 2019-10-24 15:00 ` Steven Rostedt 2019-10-24 16:43 ` Peter Zijlstra 2019-10-24 18:17 ` Steven Rostedt 2019-10-24 20:24 ` Peter Zijlstra 2019-10-24 20:28 ` Steven Rostedt 2019-10-18 7:35 ` [PATCH v4 16/16] ftrace: Merge ftrace_module_{init,enable}() Peter Zijlstra 2019-10-18 8:20 ` Peter Zijlstra 2019-10-21 9:09 ` [PATCH v4 00/16] Rewrite x86/ftrace to use text_poke (and more) Ingo Molnar 2019-10-21 13:38 ` Steven Rostedt
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20191018074633.998168044@infradead.org \ --to=peterz@infradead.org \ --cc=ard.biesheuvel@linaro.org \ --cc=bristot@redhat.com \ --cc=hpa@zytor.com \ --cc=jbaron@akamai.com \ --cc=jeyu@kernel.org \ --cc=jpoimboe@redhat.com \ --cc=linux-kernel@vger.kernel.org \ --cc=luto@kernel.org \ --cc=mhiramat@kernel.org \ --cc=mingo@kernel.org \ --cc=namit@vmware.com \ --cc=rostedt@goodmis.org \ --cc=tglx@linutronix.de \ --cc=torvalds@linux-foundation.org \ --cc=x86@kernel.org \ --subject='Re: [PATCH v4 01/16] x86/alternatives: Teach text_poke_bp() to emulate instructions' \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.