From: Peter Zijlstra <peterz@infradead.org>
To: x86@kernel.org
Cc: peterz@infradead.org, linux-kernel@vger.kernel.org,
rostedt@goodmis.org, mhiramat@kernel.org, bristot@redhat.com,
jbaron@akamai.com, torvalds@linux-foundation.org,
tglx@linutronix.de, mingo@kernel.org, namit@vmware.com,
hpa@zytor.com, luto@kernel.org, ard.biesheuvel@linaro.org,
jpoimboe@redhat.com, jeyu@kernel.org,
alexei.starovoitov@gmail.com
Subject: [PATCH -v5 01/17] x86/alternatives: Teach text_poke_bp() to emulate instructions
Date: Mon, 11 Nov 2019 14:12:53 +0100 [thread overview]
Message-ID: <20191111132457.529086974@infradead.org> (raw)
In-Reply-To: 20191111131252.921588318@infradead.org
In preparation for static_call and variable size jump_label support,
teach text_poke_bp() to emulate instructions, namely:
JMP32, JMP8, CALL, NOP2, NOP_ATOMIC5, INT3
The current text_poke_bp() takes a @handler argument which is used as
a jump target when the temporary INT3 is hit by a different CPU.
When patching CALL instructions, this doesn't work because we'd miss
the PUSH of the return address. Instead, teach poke_int3_handler() to
emulate an instruction, typically the instruction we're patching in.
This fits almost all text_poke_bp() users, except
arch_unoptimize_kprobe() which restores random text, and for that site
we have to build an explicit emulate instruction.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Masami Hiramatsu <mhiramat@kernel.org>
Reviewed-by: Daniel Bristot de Oliveira <bristot@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
---
arch/x86/include/asm/text-patching.h | 24 ++++--
arch/x86/kernel/alternative.c | 132 ++++++++++++++++++++++++++---------
arch/x86/kernel/jump_label.c | 9 --
arch/x86/kernel/kprobes/opt.c | 11 ++
4 files changed, 130 insertions(+), 46 deletions(-)
--- a/arch/x86/include/asm/text-patching.h
+++ b/arch/x86/include/asm/text-patching.h
@@ -26,10 +26,11 @@ static inline void apply_paravirt(struct
#define POKE_MAX_OPCODE_SIZE 5
struct text_poke_loc {
- void *detour;
void *addr;
- size_t len;
- const char opcode[POKE_MAX_OPCODE_SIZE];
+ int len;
+ s32 rel32;
+ u8 opcode;
+ const u8 text[POKE_MAX_OPCODE_SIZE];
};
extern void text_poke_early(void *addr, const void *opcode, size_t len);
@@ -51,8 +52,10 @@ extern void text_poke_early(void *addr,
extern void *text_poke(void *addr, const void *opcode, size_t len);
extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len);
extern int poke_int3_handler(struct pt_regs *regs);
-extern void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler);
+extern void text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate);
extern void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries);
+extern void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
+ const void *opcode, size_t len, const void *emulate);
extern int after_bootmem;
extern __ro_after_init struct mm_struct *poking_mm;
extern __ro_after_init unsigned long poking_addr;
@@ -63,8 +66,17 @@ static inline void int3_emulate_jmp(stru
regs->ip = ip;
}
-#define INT3_INSN_SIZE 1
-#define CALL_INSN_SIZE 5
+#define INT3_INSN_SIZE 1
+#define INT3_INSN_OPCODE 0xCC
+
+#define CALL_INSN_SIZE 5
+#define CALL_INSN_OPCODE 0xE8
+
+#define JMP32_INSN_SIZE 5
+#define JMP32_INSN_OPCODE 0xE9
+
+#define JMP8_INSN_SIZE 2
+#define JMP8_INSN_OPCODE 0xEB
static inline void int3_emulate_push(struct pt_regs *regs, unsigned long val)
{
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -956,16 +956,15 @@ NOKPROBE_SYMBOL(patch_cmp);
int poke_int3_handler(struct pt_regs *regs)
{
struct text_poke_loc *tp;
- unsigned char int3 = 0xcc;
void *ip;
/*
* Having observed our INT3 instruction, we now must observe
* bp_patching.nr_entries.
*
- * nr_entries != 0 INT3
- * WMB RMB
- * write INT3 if (nr_entries)
+ * nr_entries != 0 INT3
+ * WMB RMB
+ * write INT3 if (nr_entries)
*
* Idem for other elements in bp_patching.
*/
@@ -978,9 +977,9 @@ int poke_int3_handler(struct pt_regs *re
return 0;
/*
- * Discount the sizeof(int3). See text_poke_bp_batch().
+ * Discount the INT3. See text_poke_bp_batch().
*/
- ip = (void *) regs->ip - sizeof(int3);
+ ip = (void *) regs->ip - INT3_INSN_SIZE;
/*
* Skip the binary search if there is a single member in the vector.
@@ -997,8 +996,28 @@ int poke_int3_handler(struct pt_regs *re
return 0;
}
- /* set up the specified breakpoint detour */
- regs->ip = (unsigned long) tp->detour;
+ ip += tp->len;
+
+ switch (tp->opcode) {
+ case INT3_INSN_OPCODE:
+ /*
+ * Someone poked an explicit INT3, they'll want to handle it,
+ * do not consume.
+ */
+ return 0;
+
+ case CALL_INSN_OPCODE:
+ int3_emulate_call(regs, (long)ip + tp->rel32);
+ break;
+
+ case JMP32_INSN_OPCODE:
+ case JMP8_INSN_OPCODE:
+ int3_emulate_jmp(regs, (long)ip + tp->rel32);
+ break;
+
+ default:
+ BUG();
+ }
return 1;
}
@@ -1014,7 +1033,7 @@ NOKPROBE_SYMBOL(poke_int3_handler);
* synchronization using int3 breakpoint.
*
* The way it is done:
- * - For each entry in the vector:
+ * - For each entry in the vector:
* - add a int3 trap to the address that will be patched
* - sync cores
* - For each entry in the vector:
@@ -1027,9 +1046,9 @@ NOKPROBE_SYMBOL(poke_int3_handler);
*/
void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
{
- int patched_all_but_first = 0;
- unsigned char int3 = 0xcc;
+ unsigned char int3 = INT3_INSN_OPCODE;
unsigned int i;
+ int do_sync;
lockdep_assert_held(&text_mutex);
@@ -1053,16 +1072,16 @@ void text_poke_bp_batch(struct text_poke
/*
* Second step: update all but the first byte of the patched range.
*/
- for (i = 0; i < nr_entries; i++) {
+ for (do_sync = 0, i = 0; i < nr_entries; i++) {
if (tp[i].len - sizeof(int3) > 0) {
text_poke((char *)tp[i].addr + sizeof(int3),
- (const char *)tp[i].opcode + sizeof(int3),
+ (const char *)tp[i].text + sizeof(int3),
tp[i].len - sizeof(int3));
- patched_all_but_first++;
+ do_sync++;
}
}
- if (patched_all_but_first) {
+ if (do_sync) {
/*
* According to Intel, this core syncing is very likely
* not necessary and we'd be safe even without it. But
@@ -1075,10 +1094,17 @@ void text_poke_bp_batch(struct text_poke
* Third step: replace the first byte (int3) by the first byte of
* replacing opcode.
*/
- for (i = 0; i < nr_entries; i++)
- text_poke(tp[i].addr, tp[i].opcode, sizeof(int3));
+ for (do_sync = 0, i = 0; i < nr_entries; i++) {
+ if (tp[i].text[0] == INT3_INSN_OPCODE)
+ continue;
+
+ text_poke(tp[i].addr, tp[i].text, sizeof(int3));
+ do_sync++;
+ }
+
+ if (do_sync)
+ on_each_cpu(do_sync_core, NULL, 1);
- on_each_cpu(do_sync_core, NULL, 1);
/*
* sync_core() implies an smp_mb() and orders this store against
* the writing of the new instruction.
@@ -1087,6 +1113,60 @@ void text_poke_bp_batch(struct text_poke
bp_patching.nr_entries = 0;
}
+void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
+ const void *opcode, size_t len, const void *emulate)
+{
+ struct insn insn;
+
+ if (!opcode)
+ opcode = (void *)tp->text;
+ else
+ memcpy((void *)tp->text, opcode, len);
+
+ if (!emulate)
+ emulate = opcode;
+
+ kernel_insn_init(&insn, emulate, MAX_INSN_SIZE);
+ insn_get_length(&insn);
+
+ BUG_ON(!insn_complete(&insn));
+ BUG_ON(len != insn.length);
+
+ tp->addr = addr;
+ tp->len = len;
+ tp->opcode = insn.opcode.bytes[0];
+
+ switch (tp->opcode) {
+ case INT3_INSN_OPCODE:
+ break;
+
+ case CALL_INSN_OPCODE:
+ case JMP32_INSN_OPCODE:
+ case JMP8_INSN_OPCODE:
+ tp->rel32 = insn.immediate.value;
+ break;
+
+ default: /* assume NOP */
+ switch (len) {
+ case 2: /* NOP2 -- emulate as JMP8+0 */
+ BUG_ON(memcmp(emulate, ideal_nops[len], len));
+ tp->opcode = JMP8_INSN_OPCODE;
+ tp->rel32 = 0;
+ break;
+
+ case 5: /* NOP5 -- emulate as JMP32+0 */
+ BUG_ON(memcmp(emulate, ideal_nops[NOP_ATOMIC5], len));
+ tp->opcode = JMP32_INSN_OPCODE;
+ tp->rel32 = 0;
+ break;
+
+ default: /* unknown instruction */
+ BUG();
+ }
+ break;
+ }
+}
+
/**
* text_poke_bp() -- update instructions on live kernel on SMP
* @addr: address to patch
@@ -1098,20 +1178,10 @@ void text_poke_bp_batch(struct text_poke
* dynamically allocated memory. This function should be used when it is
* not possible to allocate memory.
*/
-void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler)
+void text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate)
{
- struct text_poke_loc tp = {
- .detour = handler,
- .addr = addr,
- .len = len,
- };
-
- if (len > POKE_MAX_OPCODE_SIZE) {
- WARN_ONCE(1, "len is larger than %d\n", POKE_MAX_OPCODE_SIZE);
- return;
- }
-
- memcpy((void *)tp.opcode, opcode, len);
+ struct text_poke_loc tp;
+ text_poke_loc_init(&tp, addr, opcode, len, emulate);
text_poke_bp_batch(&tp, 1);
}
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -89,8 +89,7 @@ static void __ref __jump_label_transform
return;
}
- text_poke_bp((void *)jump_entry_code(entry), &code, JUMP_LABEL_NOP_SIZE,
- (void *)jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE);
+ text_poke_bp((void *)jump_entry_code(entry), &code, JUMP_LABEL_NOP_SIZE, NULL);
}
void arch_jump_label_transform(struct jump_entry *entry,
@@ -147,11 +146,9 @@ bool arch_jump_label_transform_queue(str
}
__jump_label_set_jump_code(entry, type,
- (union jump_code_union *) &tp->opcode, 0);
+ (union jump_code_union *)&tp->text, 0);
- tp->addr = entry_code;
- tp->detour = entry_code + JUMP_LABEL_NOP_SIZE;
- tp->len = JUMP_LABEL_NOP_SIZE;
+ text_poke_loc_init(tp, entry_code, NULL, JUMP_LABEL_NOP_SIZE, NULL);
tp_vec_nr++;
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -437,8 +437,7 @@ void arch_optimize_kprobes(struct list_h
insn_buff[0] = RELATIVEJUMP_OPCODE;
*(s32 *)(&insn_buff[1]) = rel;
- text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE,
- op->optinsn.insn);
+ text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE, NULL);
list_del_init(&op->list);
}
@@ -448,12 +447,18 @@ void arch_optimize_kprobes(struct list_h
void arch_unoptimize_kprobe(struct optimized_kprobe *op)
{
u8 insn_buff[RELATIVEJUMP_SIZE];
+ u8 emulate_buff[RELATIVEJUMP_SIZE];
/* Set int3 to first byte for kprobes */
insn_buff[0] = BREAKPOINT_INSTRUCTION;
memcpy(insn_buff + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
+
+ emulate_buff[0] = RELATIVEJUMP_OPCODE;
+ *(s32 *)(&emulate_buff[1]) = (s32)((long)op->optinsn.insn -
+ ((long)op->kp.addr + RELATIVEJUMP_SIZE));
+
text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE,
- op->optinsn.insn);
+ emulate_buff);
}
/*
next prev parent reply other threads:[~2019-11-11 14:48 UTC|newest]
Thread overview: 106+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-11-11 13:12 [PATCH -v5 00/17] Rewrite x86/ftrace to use text_poke (and more) Peter Zijlstra
2019-11-11 13:12 ` Peter Zijlstra [this message]
2019-11-15 9:43 ` [tip: core/kprobes] x86/alternatives: Teach text_poke_bp() to emulate instructions tip-bot2 for Peter Zijlstra
2019-11-11 13:12 ` [PATCH -v5 02/17] x86/alternatives: Update int3_emulate_push() comment Peter Zijlstra
2019-11-15 9:43 ` [tip: core/kprobes] " tip-bot2 for Peter Zijlstra
2019-12-04 8:33 ` tip-bot2 for Peter Zijlstra
2019-11-11 13:12 ` [PATCH -v5 03/17] x86/alternatives,jump_label: Provide better text_poke() batching interface Peter Zijlstra
2019-11-15 9:43 ` [tip: core/kprobes] x86/alternatives, jump_label: " tip-bot2 for Peter Zijlstra
2019-12-04 8:33 ` tip-bot2 for Peter Zijlstra
2019-11-11 13:12 ` [PATCH -v5 04/17] x86/alternatives: Add and use text_gen_insn() helper Peter Zijlstra
2019-11-12 17:10 ` Steven Rostedt
2019-11-12 22:25 ` Peter Zijlstra
2019-11-15 9:43 ` [tip: core/kprobes] " tip-bot2 for Peter Zijlstra
2019-12-04 8:33 ` tip-bot2 for Peter Zijlstra
2019-11-11 13:12 ` [PATCH -v5 05/17] x86/ftrace: Use text_poke() Peter Zijlstra
2019-11-12 18:25 ` Steven Rostedt
2019-11-12 22:24 ` Peter Zijlstra
2019-11-12 22:48 ` Steven Rostedt
2019-11-13 9:01 ` Peter Zijlstra
2019-11-13 14:27 ` Steven Rostedt
2019-11-14 13:18 ` Peter Zijlstra
2019-11-14 13:56 ` Steven Rostedt
2019-11-14 14:05 ` Peter Zijlstra
2019-11-13 8:53 ` Peter Zijlstra
2019-11-15 9:43 ` [tip: core/kprobes] " tip-bot2 for Peter Zijlstra
2019-11-16 20:46 ` Borislav Petkov
2019-11-18 17:35 ` [PATCH] x86/ftrace: Mark ftrace_modify_code_direct() __ref Borislav Petkov
2019-11-18 17:52 ` Steven Rostedt
2019-11-19 9:55 ` [tip: core/kprobes] " tip-bot2 for Borislav Petkov
2019-12-04 8:33 ` tip-bot2 for Borislav Petkov
2019-12-04 8:33 ` [tip: core/kprobes] x86/ftrace: Use text_poke() tip-bot2 for Peter Zijlstra
2019-11-11 13:12 ` [PATCH -v5 06/17] x86/mm: Remove set_kernel_text_r[ow]() Peter Zijlstra
2019-11-15 9:43 ` [tip: core/kprobes] " tip-bot2 for Peter Zijlstra
2019-12-04 8:33 ` tip-bot2 for Peter Zijlstra
2019-11-11 13:12 ` [PATCH -v5 07/17] x86/alternative: Add text_opcode_size() Peter Zijlstra
2019-11-15 9:43 ` [tip: core/kprobes] " tip-bot2 for Peter Zijlstra
2019-12-04 8:33 ` tip-bot2 for Peter Zijlstra
2019-11-11 13:13 ` [PATCH -v5 08/17] x86/ftrace: Use text_gen_insn() Peter Zijlstra
2019-11-15 9:43 ` [tip: core/kprobes] " tip-bot2 for Peter Zijlstra
2019-12-04 8:33 ` tip-bot2 for Peter Zijlstra
2019-11-11 13:13 ` [PATCH -v5 09/17] x86/alternative: Remove text_poke_loc::len Peter Zijlstra
2019-11-15 9:43 ` [tip: core/kprobes] " tip-bot2 for Peter Zijlstra
2019-12-04 8:33 ` tip-bot2 for Peter Zijlstra
2019-11-11 13:13 ` [PATCH -v5 10/17] x86/alternative: Shrink text_poke_loc Peter Zijlstra
2019-11-15 9:43 ` [tip: core/kprobes] " tip-bot2 for Peter Zijlstra
2019-12-04 8:33 ` tip-bot2 for Peter Zijlstra
2019-11-11 13:13 ` [PATCH -v5 11/17] x86/kprobes: Convert to text-patching.h Peter Zijlstra
2019-11-19 16:56 ` [tip: core/kprobes] " tip-bot2 for Peter Zijlstra
2019-12-04 8:33 ` tip-bot2 for Peter Zijlstra
2019-11-11 13:13 ` [PATCH -v5 12/17] x86/kprobes: Fix ordering Peter Zijlstra
2019-11-13 14:31 ` Paul E. McKenney
2019-11-13 15:42 ` Mathieu Desnoyers
2019-11-14 13:53 ` Peter Zijlstra
2019-11-14 15:06 ` Mathieu Desnoyers
2019-11-14 15:13 ` Paul E. McKenney
2019-11-14 15:22 ` Mathieu Desnoyers
2019-11-14 15:28 ` Peter Zijlstra
2019-11-14 15:30 ` Mathieu Desnoyers
2019-11-14 15:42 ` Peter Zijlstra
2019-11-14 15:58 ` Peter Zijlstra
2019-11-19 16:56 ` [tip: core/kprobes] x86/kprobes: Fix ordering while text-patching tip-bot2 for Peter Zijlstra
2019-12-04 8:33 ` tip-bot2 for Peter Zijlstra
2019-11-11 13:13 ` [PATCH -v5 13/17] arm/ftrace: Use __patch_text_real() Peter Zijlstra
2019-11-11 16:47 ` Will Deacon
2019-11-11 17:19 ` Peter Zijlstra
2019-11-11 17:25 ` Peter Zijlstra
2019-11-12 11:29 ` Will Deacon
2019-11-13 9:26 ` [PATCH -v5mkII 13/17] arm/ftrace: Use __patch_text() Peter Zijlstra
2019-11-19 16:56 ` [tip: core/kprobes] " tip-bot2 for Peter Zijlstra
2019-12-04 8:33 ` tip-bot2 for Peter Zijlstra
2020-01-22 21:26 ` Dmitry Osipenko
2020-02-07 10:17 ` Peter Zijlstra
2020-02-07 10:26 ` Peter Zijlstra
2020-02-07 11:27 ` Peter Zijlstra
2020-02-07 16:47 ` Dmitry Osipenko
2020-01-08 12:22 ` [PATCH -v5mkII 13/17] " Arnd Bergmann
2020-01-08 14:16 ` Steven Rostedt
2020-01-08 14:22 ` Arnd Bergmann
2019-11-11 13:13 ` [PATCH -v5 14/17] module: Remove set_all_modules_text_*() Peter Zijlstra
2019-11-19 16:56 ` [tip: core/kprobes] " tip-bot2 for Peter Zijlstra
2019-12-04 8:33 ` tip-bot2 for Peter Zijlstra
2019-11-11 13:13 ` [PATCH -v5 15/17] ftrace: Rework event_create_dir() Peter Zijlstra
2019-11-14 14:07 ` Steven Rostedt
2019-11-19 16:56 ` [tip: core/kprobes] " tip-bot2 for Peter Zijlstra
2019-12-04 8:33 ` tip-bot2 for Peter Zijlstra
2019-11-11 13:13 ` [PATCH -v5 16/17] x86/kprobe: Add comments to arch_{,un}optimize_kprobes() Peter Zijlstra
2019-11-19 16:56 ` [tip: core/kprobes] " tip-bot2 for Peter Zijlstra
2019-12-04 8:33 ` tip-bot2 for Peter Zijlstra
2019-11-11 13:13 ` [PATCH -v5 17/17] x86/alternative: Use INT3_INSN_SIZE Peter Zijlstra
2019-11-19 16:56 ` [tip: core/kprobes] x86/alternatives: " tip-bot2 for Peter Zijlstra
2019-12-04 8:33 ` tip-bot2 for Peter Zijlstra
2019-11-11 19:47 ` [PATCH -v5 00/17] Rewrite x86/ftrace to use text_poke (and more) Alexei Starovoitov
2019-11-11 20:39 ` Peter Zijlstra
2019-11-11 20:42 ` Peter Zijlstra
2019-11-11 20:56 ` Alexei Starovoitov
2019-11-12 18:26 ` Steven Rostedt
2019-11-25 3:55 ` Masami Hiramatsu
2019-11-25 6:47 ` Masami Hiramatsu
2019-11-25 17:32 ` Steven Rostedt
2019-11-26 0:11 ` Masami Hiramatsu
2019-11-26 8:58 ` Masami Hiramatsu
2019-11-26 9:58 ` Masami Hiramatsu
2019-11-26 23:48 ` Masami Hiramatsu
[not found] ` <CAADnVQK4twuXzFhD-qLHmCVK0n1h-GDENQLu+4PVV3Hp++R6kQ@mail.gmail.com>
2019-11-27 4:32 ` Alexei Starovoitov
2019-11-27 5:01 ` Alexei Starovoitov
2019-11-27 6:41 ` Masami Hiramatsu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20191111132457.529086974@infradead.org \
--to=peterz@infradead.org \
--cc=alexei.starovoitov@gmail.com \
--cc=ard.biesheuvel@linaro.org \
--cc=bristot@redhat.com \
--cc=hpa@zytor.com \
--cc=jbaron@akamai.com \
--cc=jeyu@kernel.org \
--cc=jpoimboe@redhat.com \
--cc=linux-kernel@vger.kernel.org \
--cc=luto@kernel.org \
--cc=mhiramat@kernel.org \
--cc=mingo@kernel.org \
--cc=namit@vmware.com \
--cc=rostedt@goodmis.org \
--cc=tglx@linutronix.de \
--cc=torvalds@linux-foundation.org \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).