All of lore.kernel.org
 help / color / mirror / Atom feed
From: Masami Hiramatsu <mhiramat@kernel.org>
To: Peter Zijlstra <peterz@infradead.org>
Cc: linux-kernel@vger.kernel.org, x86@kernel.org,
	Nadav Amit <nadav.amit@gmail.com>,
	Andy Lutomirski <luto@kernel.org>,
	Dave Hansen <dave.hansen@linux.intel.com>,
	Song Liu <songliubraving@fb.com>,
	Masami Hiramatsu <mhiramat@kernel.org>,
	Steven Rostedt <rostedt@goodmis.org>,
	Daniel Bristot de Oliveira <bristot@redhat.com>
Subject: Re: [PATCH 1/3] x86/alternatives: Teach text_poke_bp() to emulate instructions
Date: Thu, 3 Oct 2019 14:00:50 +0900	[thread overview]
Message-ID: <20191003140050.1d4cf59d3de8b5396d36c269@kernel.org> (raw)
In-Reply-To: <20190827181147.053490768@infradead.org>

Hi Peter,

On Tue, 27 Aug 2019 20:06:23 +0200
Peter Zijlstra <peterz@infradead.org> wrote:

> In preparation for static_call and variable size jump_label support,
> teach text_poke_bp() to emulate instructions, namely:
> 
>   JMP32, JMP8, CALL, NOP2, NOP_ATOMIC5
> 
> The current text_poke_bp() takes a @handler argument which is used as
> a jump target when the temporary INT3 is hit by a different CPU.
> 
> When patching CALL instructions, this doesn't work because we'd miss
> the PUSH of the return address. Instead, teach poke_int3_handler() to
> emulate an instruction, typically the instruction we're patching in.
> 
> This fits almost all text_poke_bp() users, except
> arch_unoptimize_kprobe() which restores random text, and for that site
> we have to build an explicit emulate instruction.

OK, and in this case, I would like to change RELATIVEJUMP_OPCODE
to JMP32_INSN_OPCODE for readability. (or at least
making RELATIVEJUMP_OPCODE as an alias of JMP32_INSN_OPCODE)

Except for that, this looks good to me.

Reviewed-by: Masami Hiramatsu <mhiramat@kernel.org>

Thank you,

> 
> Cc: Masami Hiramatsu <mhiramat@kernel.org>
> Cc: Steven Rostedt <rostedt@goodmis.org>
> Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> ---
>  arch/x86/include/asm/text-patching.h |   24 ++++++--
>  arch/x86/kernel/alternative.c        |   98 ++++++++++++++++++++++++++---------
>  arch/x86/kernel/jump_label.c         |    9 +--
>  arch/x86/kernel/kprobes/opt.c        |   11 ++-
>  4 files changed, 103 insertions(+), 39 deletions(-)
> 
> --- a/arch/x86/include/asm/text-patching.h
> +++ b/arch/x86/include/asm/text-patching.h
> @@ -26,10 +26,11 @@ static inline void apply_paravirt(struct
>  #define POKE_MAX_OPCODE_SIZE	5
>  
>  struct text_poke_loc {
> -	void *detour;
>  	void *addr;
> -	size_t len;
> -	const char opcode[POKE_MAX_OPCODE_SIZE];
> +	int len;
> +	s32 rel32;
> +	u8 opcode;
> +	const char text[POKE_MAX_OPCODE_SIZE];
>  };
>  
>  extern void text_poke_early(void *addr, const void *opcode, size_t len);
> @@ -51,8 +52,10 @@ extern void text_poke_early(void *addr,
>  extern void *text_poke(void *addr, const void *opcode, size_t len);
>  extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len);
>  extern int poke_int3_handler(struct pt_regs *regs);
> -extern void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler);
> +extern void text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate);
>  extern void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries);
> +extern void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
> +			       const void *opcode, size_t len, const void *emulate);
>  extern int after_bootmem;
>  extern __ro_after_init struct mm_struct *poking_mm;
>  extern __ro_after_init unsigned long poking_addr;
> @@ -63,8 +66,17 @@ static inline void int3_emulate_jmp(stru
>  	regs->ip = ip;
>  }
>  
> -#define INT3_INSN_SIZE 1
> -#define CALL_INSN_SIZE 5
> +#define INT3_INSN_SIZE		1
> +#define INT3_INSN_OPCODE	0xCC
> +
> +#define CALL_INSN_SIZE		5
> +#define CALL_INSN_OPCODE	0xE8
> +
> +#define JMP32_INSN_SIZE		5
> +#define JMP32_INSN_OPCODE	0xE9
> +
> +#define JMP8_INSN_SIZE		2
> +#define JMP8_INSN_OPCODE	0xEB
>  
>  static inline void int3_emulate_push(struct pt_regs *regs, unsigned long val)
>  {
> --- a/arch/x86/kernel/alternative.c
> +++ b/arch/x86/kernel/alternative.c
> @@ -956,16 +956,15 @@ NOKPROBE_SYMBOL(patch_cmp);
>  int poke_int3_handler(struct pt_regs *regs)
>  {
>  	struct text_poke_loc *tp;
> -	unsigned char int3 = 0xcc;
>  	void *ip;
>  
>  	/*
>  	 * Having observed our INT3 instruction, we now must observe
>  	 * bp_patching.nr_entries.
>  	 *
> -	 * 	nr_entries != 0			INT3
> -	 * 	WMB				RMB
> -	 * 	write INT3			if (nr_entries)
> +	 *	nr_entries != 0			INT3
> +	 *	WMB				RMB
> +	 *	write INT3			if (nr_entries)
>  	 *
>  	 * Idem for other elements in bp_patching.
>  	 */
> @@ -978,9 +977,9 @@ int poke_int3_handler(struct pt_regs *re
>  		return 0;
>  
>  	/*
> -	 * Discount the sizeof(int3). See text_poke_bp_batch().
> +	 * Discount the INT3. See text_poke_bp_batch().
>  	 */
> -	ip = (void *) regs->ip - sizeof(int3);
> +	ip = (void *) regs->ip - INT3_INSN_SIZE;
>  
>  	/*
>  	 * Skip the binary search if there is a single member in the vector.
> @@ -997,8 +996,22 @@ int poke_int3_handler(struct pt_regs *re
>  			return 0;
>  	}
>  
> -	/* set up the specified breakpoint detour */
> -	regs->ip = (unsigned long) tp->detour;
> +	ip += tp->len;
> +
> +	switch (tp->opcode) {
> +	case CALL_INSN_OPCODE:
> +		int3_emulate_call(regs, (long)ip + tp->rel32);
> +		break;
> +
> +	case JMP32_INSN_OPCODE:
> +	case JMP8_INSN_OPCODE:
> +		int3_emulate_jmp(regs, (long)ip + tp->rel32);
> +		break;
> +
> +	default: /* nop */
> +		int3_emulate_jmp(regs, (long)ip);
> +		break;
> +	}
>  
>  	return 1;
>  }
> @@ -1027,8 +1040,8 @@ NOKPROBE_SYMBOL(poke_int3_handler);
>   */
>  void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
>  {
> +	unsigned char int3 = INT3_INSN_OPCODE;
>  	int patched_all_but_first = 0;
> -	unsigned char int3 = 0xcc;
>  	unsigned int i;
>  
>  	lockdep_assert_held(&text_mutex);
> @@ -1056,7 +1069,7 @@ void text_poke_bp_batch(struct text_poke
>  	for (i = 0; i < nr_entries; i++) {
>  		if (tp[i].len - sizeof(int3) > 0) {
>  			text_poke((char *)tp[i].addr + sizeof(int3),
> -				  (const char *)tp[i].opcode + sizeof(int3),
> +				  (const char *)tp[i].text + sizeof(int3),
>  				  tp[i].len - sizeof(int3));
>  			patched_all_but_first++;
>  		}
> @@ -1076,7 +1089,7 @@ void text_poke_bp_batch(struct text_poke
>  	 * replacing opcode.
>  	 */
>  	for (i = 0; i < nr_entries; i++)
> -		text_poke(tp[i].addr, tp[i].opcode, sizeof(int3));
> +		text_poke(tp[i].addr, tp[i].text, sizeof(int3));
>  
>  	on_each_cpu(do_sync_core, NULL, 1);
>  	/*
> @@ -1087,6 +1100,53 @@ void text_poke_bp_batch(struct text_poke
>  	bp_patching.nr_entries = 0;
>  }
>  
> +void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
> +			const void *opcode, size_t len, const void *emulate)
> +{
> +	struct insn insn;
> +
> +	if (!opcode)
> +		opcode = (void *)tp->text;
> +	else
> +		memcpy((void *)tp->text, opcode, len);
> +
> +	if (!emulate)
> +		emulate = opcode;
> +
> +	kernel_insn_init(&insn, emulate, MAX_INSN_SIZE);
> +	insn_get_length(&insn);
> +
> +	BUG_ON(!insn_complete(&insn));
> +	BUG_ON(len != insn.length);
> +
> +	tp->addr = addr;
> +	tp->len = len;
> +	tp->opcode = insn.opcode.bytes[0];
> +
> +	switch (tp->opcode) {
> +	case CALL_INSN_OPCODE:
> +	case JMP32_INSN_OPCODE:
> +	case JMP8_INSN_OPCODE:
> +		tp->rel32 = insn.immediate.value;
> +		break;
> +
> +	default: /* assume NOP */
> +		switch (len) {
> +		case 2:
> +			BUG_ON(memcmp(emulate, ideal_nops[len], len));
> +			break;
> +
> +		case 5:
> +			BUG_ON(memcmp(emulate, ideal_nops[NOP_ATOMIC5], len));
> +			break;
> +
> +		default:
> +			BUG();
> +		}
> +		break;
> +	}
> +}
> +
>  /**
>   * text_poke_bp() -- update instructions on live kernel on SMP
>   * @addr:	address to patch
> @@ -1098,20 +1158,10 @@ void text_poke_bp_batch(struct text_poke
>   * dynamically allocated memory. This function should be used when it is
>   * not possible to allocate memory.
>   */
> -void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler)
> +void text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate)
>  {
> -	struct text_poke_loc tp = {
> -		.detour = handler,
> -		.addr = addr,
> -		.len = len,
> -	};
> -
> -	if (len > POKE_MAX_OPCODE_SIZE) {
> -		WARN_ONCE(1, "len is larger than %d\n", POKE_MAX_OPCODE_SIZE);
> -		return;
> -	}
> -
> -	memcpy((void *)tp.opcode, opcode, len);
> +	struct text_poke_loc tp;
>  
> +	text_poke_loc_init(&tp, addr, opcode, len, emulate);
>  	text_poke_bp_batch(&tp, 1);
>  }
> --- a/arch/x86/kernel/jump_label.c
> +++ b/arch/x86/kernel/jump_label.c
> @@ -89,8 +89,7 @@ static void __ref __jump_label_transform
>  		return;
>  	}
>  
> -	text_poke_bp((void *)jump_entry_code(entry), &code, JUMP_LABEL_NOP_SIZE,
> -		     (void *)jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE);
> +	text_poke_bp((void *)jump_entry_code(entry), &code, JUMP_LABEL_NOP_SIZE, NULL);
>  }
>  
>  void arch_jump_label_transform(struct jump_entry *entry,
> @@ -147,11 +146,9 @@ bool arch_jump_label_transform_queue(str
>  	}
>  
>  	__jump_label_set_jump_code(entry, type,
> -				   (union jump_code_union *) &tp->opcode, 0);
> +				   (union jump_code_union *)&tp->text, 0);
>  
> -	tp->addr = entry_code;
> -	tp->detour = entry_code + JUMP_LABEL_NOP_SIZE;
> -	tp->len = JUMP_LABEL_NOP_SIZE;
> +	text_poke_loc_init(tp, entry_code, NULL, JUMP_LABEL_NOP_SIZE, NULL);
>  
>  	tp_vec_nr++;
>  
> --- a/arch/x86/kernel/kprobes/opt.c
> +++ b/arch/x86/kernel/kprobes/opt.c
> @@ -437,8 +437,7 @@ void arch_optimize_kprobes(struct list_h
>  		insn_buff[0] = RELATIVEJUMP_OPCODE;
>  		*(s32 *)(&insn_buff[1]) = rel;
>  
> -		text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE,
> -			     op->optinsn.insn);
> +		text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE, NULL);
>  
>  		list_del_init(&op->list);
>  	}
> @@ -448,12 +447,18 @@ void arch_optimize_kprobes(struct list_h
>  void arch_unoptimize_kprobe(struct optimized_kprobe *op)
>  {
>  	u8 insn_buff[RELATIVEJUMP_SIZE];
> +	u8 emulate_buff[RELATIVEJUMP_SIZE];
>  
>  	/* Set int3 to first byte for kprobes */
>  	insn_buff[0] = BREAKPOINT_INSTRUCTION;
>  	memcpy(insn_buff + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
> +
> +	emulate_buff[0] = RELATIVEJUMP_OPCODE;
> +	*(s32 *)(&emulate_buff[1]) = (s32)((long)op->optinsn.insn -
> +			((long)op->kp.addr + RELATIVEJUMP_SIZE));
> +
>  	text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE,
> -		     op->optinsn.insn);
> +		     emulate_buff);
>  }
>  
>  /*
> 
> 


-- 
Masami Hiramatsu <mhiramat@kernel.org>

  reply	other threads:[~2019-10-03  5:00 UTC|newest]

Thread overview: 61+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-08-27 18:06 [PATCH 0/3] Rewrite x86/ftrace to use text_poke() Peter Zijlstra
2019-08-27 18:06 ` [PATCH 1/3] x86/alternatives: Teach text_poke_bp() to emulate instructions Peter Zijlstra
2019-10-03  5:00   ` Masami Hiramatsu [this message]
2019-10-03  8:27     ` Peter Zijlstra
2019-10-03 11:01       ` Peter Zijlstra
2019-10-03 12:32         ` Peter Zijlstra
2019-10-04 13:45         ` Masami Hiramatsu
2019-10-07  8:05           ` Peter Zijlstra
2019-10-09 13:07           ` x86/kprobes bug? (was: [PATCH 1/3] x86/alternatives: Teach text_poke_bp() to emulate instructions) Peter Zijlstra
2019-10-09 13:26             ` Peter Zijlstra
2019-10-09 13:28               ` Peter Zijlstra
2019-10-09 14:26             ` Mathieu Desnoyers
2019-10-17 19:59               ` Peter Zijlstra
2019-10-03 13:05       ` [PATCH 1/3] x86/alternatives: Teach text_poke_bp() to emulate instructions Peter Zijlstra
2019-08-27 18:06 ` [PATCH 2/3] x86/alternatives,jump_label: Provide better text_poke() batching interface Peter Zijlstra
2019-10-02 16:34   ` Daniel Bristot de Oliveira
2019-10-03  5:50   ` Masami Hiramatsu
2019-08-27 18:06 ` [PATCH 3/3] x86/ftrace: Use text_poke() Peter Zijlstra
2019-10-02 16:35   ` Daniel Bristot de Oliveira
2019-10-02 18:21     ` Peter Zijlstra
2019-10-03 22:10       ` Steven Rostedt
2019-10-04  8:10         ` Daniel Bristot de Oliveira
2019-10-04 13:40           ` Steven Rostedt
2019-10-04 14:44             ` Daniel Bristot de Oliveira
2019-10-04 15:13               ` Steven Rostedt
2019-10-07  8:08           ` Peter Zijlstra
2019-10-11  7:01           ` Peter Zijlstra
2019-10-11  7:37             ` Daniel Bristot de Oliveira
2019-10-11 10:57               ` Peter Zijlstra
2019-10-11 13:11               ` Steven Rostedt
2019-10-04 11:22         ` Peter Zijlstra
2019-10-04 13:42           ` Steven Rostedt
2019-10-22  0:36             ` Alexei Starovoitov
2019-10-22  0:43               ` Steven Rostedt
2019-10-22  3:10                 ` Alexei Starovoitov
2019-10-22  3:16                   ` Steven Rostedt
2019-10-22  3:19                     ` Steven Rostedt
2019-10-22  4:05                       ` Alexei Starovoitov
2019-10-22 11:19                         ` Steven Rostedt
2019-10-22 13:44                           ` Steven Rostedt
2019-10-22 17:50                             ` Alexei Starovoitov
2019-10-22 18:10                               ` Steven Rostedt
2019-10-22 20:46                                 ` Alexei Starovoitov
2019-10-22 21:04                                   ` Steven Rostedt
2019-10-22 21:58                                     ` Alexei Starovoitov
2019-10-22 22:17                                       ` Steven Rostedt
2019-10-23  2:02                                         ` Steven Rostedt
2019-10-22 22:45                                       ` Andy Lutomirski
2019-10-22 23:21                                         ` Steven Rostedt
2019-10-22 23:49                                         ` Alexei Starovoitov
2019-10-23  4:20                                           ` Andy Lutomirski
2019-10-23  9:02                                             ` Peter Zijlstra
2019-10-23 16:23                                       ` Steven Rostedt
2019-10-23 17:42                                         ` Steven Rostedt
2019-10-23 19:34                                         ` Alexei Starovoitov
2019-10-23 20:08                                           ` Steven Rostedt
2019-10-23 22:36                                             ` Alexei Starovoitov
2019-10-22  3:55                     ` Alexei Starovoitov
2019-10-03  5:52     ` Masami Hiramatsu
2019-08-28  7:22 ` [PATCH 0/3] Rewrite x86/ftrace to use text_poke() Song Liu
  -- strict thread matches above, loose matches on Subject: below --
2019-08-26 12:51 Peter Zijlstra
2019-08-26 12:51 ` [PATCH 1/3] x86/alternatives: Teach text_poke_bp() to emulate instructions Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191003140050.1d4cf59d3de8b5396d36c269@kernel.org \
    --to=mhiramat@kernel.org \
    --cc=bristot@redhat.com \
    --cc=dave.hansen@linux.intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luto@kernel.org \
    --cc=nadav.amit@gmail.com \
    --cc=peterz@infradead.org \
    --cc=rostedt@goodmis.org \
    --cc=songliubraving@fb.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.