All of lore.kernel.org
 help / color / mirror / Atom feed
From: Daniel Bristot de Oliveira <bristot@redhat.com>
To: Peter Zijlstra <peterz@infradead.org>,
	linux-kernel@vger.kernel.org, x86@kernel.org
Cc: Nadav Amit <nadav.amit@gmail.com>,
	Andy Lutomirski <luto@kernel.org>,
	Dave Hansen <dave.hansen@linux.intel.com>,
	Song Liu <songliubraving@fb.com>,
	Masami Hiramatsu <mhiramat@kernel.org>,
	Steven Rostedt <rostedt@goodmis.org>
Subject: Re: [PATCH 3/3] x86/ftrace: Use text_poke()
Date: Wed, 2 Oct 2019 18:35:26 +0200	[thread overview]
Message-ID: <aaffb32f-6ca9-f9e3-9b1a-627125c563ed@redhat.com> (raw)
In-Reply-To: <20190827181147.166658077@infradead.org>

On 27/08/2019 20:06, Peter Zijlstra wrote:
> Move ftrace over to using the generic x86 text_poke functions; this
> avoids having a second/different copy of that code around.
> 
> This also avoids ftrace violating the (new) W^X rule and avoids
> fragmenting the kernel text page-tables, due to no longer having to
> toggle them RW.

I tested this patch, and it works... but it generates more IPIs than the
previous one.

> Cc: Steven Rostedt <rostedt@goodmis.org>
> Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> ---
>  arch/x86/include/asm/ftrace.h |    2 
>  arch/x86/kernel/alternative.c |    4 
>  arch/x86/kernel/ftrace.c      |  630 ++++++------------------------------------
>  arch/x86/kernel/traps.c       |    9 
>  4 files changed, 93 insertions(+), 552 deletions(-)
> 
> --- a/arch/x86/include/asm/ftrace.h
> +++ b/arch/x86/include/asm/ftrace.h

[ ... jumping to the point ...]

> -
>  void ftrace_replace_code(int enable)
>  {
>  	struct ftrace_rec_iter *iter;
>  	struct dyn_ftrace *rec;
> -	const char *report = "adding breakpoints";
> -	int count = 0;
> +	const char *new, *old;
>  	int ret;
>  
>  	for_ftrace_rec_iter(iter) {
>  		rec = ftrace_rec_iter_record(iter);
>  
> -		ret = add_breakpoints(rec, enable);
> -		if (ret)
> -			goto remove_breakpoints;
> -		count++;
> -	}
> -
> -	run_sync();

ftrace was already batching the updates, for instance, causing 3 IPIs to enable
all functions. The text_poke() batching also works. But because of the limited
buffer [ see the reply to the patch 2/3 ], it is flushing the buffer during the
operation, causing more IPIs than the previous code. Using the 5.4-rc1 in a VM,
when enabling the function tracer, I see 250+ intermediate text_poke_finish()
because of a full buffer...

Would this be the case of trying to use a dynamically allocated buffer?

Thoughts?

-- Daniel

> -
> -	report = "updating code";
> -	count = 0;
> -
> -	for_ftrace_rec_iter(iter) {
> -		rec = ftrace_rec_iter_record(iter);
> -
> -		ret = add_update(rec, enable);
> -		if (ret)
> -			goto remove_breakpoints;
> -		count++;
> +		switch (ftrace_test_record(rec, enable)) {
> +		case FTRACE_UPDATE_IGNORE:
> +		default:
> +			continue;
> +
> +		case FTRACE_UPDATE_MAKE_CALL:
> +			old = ftrace_nop_replace();
> +			break;
> +
> +		case FTRACE_UPDATE_MODIFY_CALL:
> +		case FTRACE_UPDATE_MAKE_NOP:
> +			old = ftrace_call_replace(rec->ip, ftrace_get_addr_curr(rec));
> +			break;
> +		};
> +
> +		ret = ftrace_verify_code(rec->ip, old);
> +		if (ret) {
> +			ftrace_bug(ret, rec);
> +			return;
> +		}
>  	}
>  
> -	run_sync();
> -
> -	report = "removing breakpoints";
> -	count = 0;
> -
>  	for_ftrace_rec_iter(iter) {
>  		rec = ftrace_rec_iter_record(iter);
>  
> -		ret = finish_update(rec, enable);
> -		if (ret)
> -			goto remove_breakpoints;
> -		count++;
> -	}
> -
> -	run_sync();
> -
> -	return;
> +		switch (ftrace_test_record(rec, enable)) {
> +		case FTRACE_UPDATE_IGNORE:
> +		default:
> +			continue;
> +
> +		case FTRACE_UPDATE_MAKE_CALL:
> +		case FTRACE_UPDATE_MODIFY_CALL:
> +			new = ftrace_call_replace(rec->ip, ftrace_get_addr_new(rec));
> +			break;
> +
> +		case FTRACE_UPDATE_MAKE_NOP:
> +			new = ftrace_nop_replace();
> +			break;
> +		};
>  
> - remove_breakpoints:
> -	pr_warn("Failed on %s (%d):\n", report, count);
> -	ftrace_bug(ret, rec);
> -	for_ftrace_rec_iter(iter) {
> -		rec = ftrace_rec_iter_record(iter);
> -		/*
> -		 * Breakpoints are handled only when this function is in
> -		 * progress. The system could not work with them.
> -		 */
> -		if (remove_breakpoint(rec))
> -			BUG();
> +		text_poke_queue((void *)rec->ip, new, MCOUNT_INSN_SIZE, NULL);
> +		ftrace_update_record(rec, enable);
>  	}
> -	run_sync();
> -}
> -
> -static int
> -ftrace_modify_code(unsigned long ip, unsigned const char *old_code,
> -		   unsigned const char *new_code)
> -{
> -	int ret;
> -
> -	ret = add_break(ip, old_code);
> -	if (ret)
> -		goto out;
> -
> -	run_sync();
> -
> -	ret = add_update_code(ip, new_code);
> -	if (ret)
> -		goto fail_update;
> -
> -	run_sync();
> -
> -	ret = ftrace_write(ip, new_code, 1);
> -	/*
> -	 * The breakpoint is handled only when this function is in progress.
> -	 * The system could not work if we could not remove it.
> -	 */
> -	BUG_ON(ret);
> - out:
> -	run_sync();
> -	return ret;
> -
> - fail_update:
> -	/* Also here the system could not work with the breakpoint */
> -	if (ftrace_write(ip, old_code, 1))
> -		BUG();
> -	goto out;
> +	text_poke_finish();
>  }
>  
>  void arch_ftrace_update_code(int command)
>  {
> -	/* See comment above by declaration of modifying_ftrace_code */
> -	atomic_inc(&modifying_ftrace_code);
> -
>  	ftrace_modify_all_code(command);
> -
> -	atomic_dec(&modifying_ftrace_code);
>  }
>  
>  int __init ftrace_dyn_arch_init(void)
> @@ -828,11 +394,7 @@ create_trampoline(struct ftrace_ops *ops
>  
>  	set_vm_flush_reset_perms(trampoline);
>  
> -	/*
> -	 * Module allocation needs to be completed by making the page
> -	 * executable. The page is still writable, which is a security hazard,
> -	 * but anyhow ftrace breaks W^X completely.
> -	 */
> +	set_memory_ro((unsigned long)trampoline, npages);
>  	set_memory_x((unsigned long)trampoline, npages);
>  	return (unsigned long)trampoline;
>  fail:
> @@ -859,11 +421,10 @@ static unsigned long calc_trampoline_cal
>  void arch_ftrace_update_trampoline(struct ftrace_ops *ops)
>  {
>  	ftrace_func_t func;
> -	unsigned char *new;
>  	unsigned long offset;
>  	unsigned long ip;
>  	unsigned int size;
> -	int ret, npages;
> +	const char *new;
>  
>  	if (ops->trampoline) {
>  		/*
> @@ -872,14 +433,11 @@ void arch_ftrace_update_trampoline(struc
>  		 */
>  		if (!(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP))
>  			return;
> -		npages = PAGE_ALIGN(ops->trampoline_size) >> PAGE_SHIFT;
> -		set_memory_rw(ops->trampoline, npages);
>  	} else {
>  		ops->trampoline = create_trampoline(ops, &size);
>  		if (!ops->trampoline)
>  			return;
>  		ops->trampoline_size = size;
> -		npages = PAGE_ALIGN(size) >> PAGE_SHIFT;
>  	}
>  
>  	offset = calc_trampoline_call_offset(ops->flags & FTRACE_OPS_FL_SAVE_REGS);
> @@ -887,15 +445,11 @@ void arch_ftrace_update_trampoline(struc
>  
>  	func = ftrace_ops_get_func(ops);
>  
> -	ftrace_update_func_call = (unsigned long)func;
> -
> +	mutex_lock(&text_mutex);
>  	/* Do a safe modify in case the trampoline is executing */
>  	new = ftrace_call_replace(ip, (unsigned long)func);
> -	ret = update_ftrace_func(ip, new);
> -	set_memory_ro(ops->trampoline, npages);
> -
> -	/* The update should never fail */
> -	WARN_ON(ret);
> +	text_poke_bp((void *)ip, new, MCOUNT_INSN_SIZE, NULL);
> +	mutex_unlock(&text_mutex);
>  }
>  
>  /* Return the address of the function the trampoline calls */
> @@ -981,19 +535,18 @@ void arch_ftrace_trampoline_free(struct
>  #ifdef CONFIG_DYNAMIC_FTRACE
>  extern void ftrace_graph_call(void);
>  
> -static unsigned char *ftrace_jmp_replace(unsigned long ip, unsigned long addr)
> +static const char *ftrace_jmp_replace(unsigned long ip, unsigned long addr)
>  {
> -	return ftrace_text_replace(0xe9, ip, addr);
> +	return ftrace_text_replace(JMP32_INSN_OPCODE, ip, addr);
>  }
>  
>  static int ftrace_mod_jmp(unsigned long ip, void *func)
>  {
> -	unsigned char *new;
> +	const char *new;
>  
> -	ftrace_update_func_call = 0UL;
>  	new = ftrace_jmp_replace(ip, (unsigned long)func);
> -
> -	return update_ftrace_func(ip, new);
> +	text_poke_bp((void *)ip, new, MCOUNT_INSN_SIZE, NULL); // BATCH
> +	return 0;
>  }
>  
>  int ftrace_enable_ftrace_graph_caller(void)
> @@ -1019,10 +572,9 @@ int ftrace_disable_ftrace_graph_caller(v
>  void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
>  			   unsigned long frame_pointer)
>  {
> +	unsigned long return_hooker = (unsigned long)&return_to_handler;
>  	unsigned long old;
>  	int faulted;
> -	unsigned long return_hooker = (unsigned long)
> -				&return_to_handler;
>  
>  	/*
>  	 * When resuming from suspend-to-ram, this function can be indirectly
> --- a/arch/x86/kernel/traps.c
> +++ b/arch/x86/kernel/traps.c
> @@ -568,15 +568,6 @@ NOKPROBE_SYMBOL(do_general_protection);
>  
>  dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
>  {
> -#ifdef CONFIG_DYNAMIC_FTRACE
> -	/*
> -	 * ftrace must be first, everything else may cause a recursive crash.
> -	 * See note by declaration of modifying_ftrace_code in ftrace.c
> -	 */
> -	if (unlikely(atomic_read(&modifying_ftrace_code)) &&
> -	    ftrace_int3_handler(regs))
> -		return;
> -#endif
>  	if (poke_int3_handler(regs))
>  		return;
>  
> 
> 


  reply	other threads:[~2019-10-02 16:35 UTC|newest]

Thread overview: 60+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-08-27 18:06 [PATCH 0/3] Rewrite x86/ftrace to use text_poke() Peter Zijlstra
2019-08-27 18:06 ` [PATCH 1/3] x86/alternatives: Teach text_poke_bp() to emulate instructions Peter Zijlstra
2019-10-03  5:00   ` Masami Hiramatsu
2019-10-03  8:27     ` Peter Zijlstra
2019-10-03 11:01       ` Peter Zijlstra
2019-10-03 12:32         ` Peter Zijlstra
2019-10-04 13:45         ` Masami Hiramatsu
2019-10-07  8:05           ` Peter Zijlstra
2019-10-09 13:07           ` x86/kprobes bug? (was: [PATCH 1/3] x86/alternatives: Teach text_poke_bp() to emulate instructions) Peter Zijlstra
2019-10-09 13:26             ` Peter Zijlstra
2019-10-09 13:28               ` Peter Zijlstra
2019-10-09 14:26             ` Mathieu Desnoyers
2019-10-17 19:59               ` Peter Zijlstra
2019-10-03 13:05       ` [PATCH 1/3] x86/alternatives: Teach text_poke_bp() to emulate instructions Peter Zijlstra
2019-08-27 18:06 ` [PATCH 2/3] x86/alternatives,jump_label: Provide better text_poke() batching interface Peter Zijlstra
2019-10-02 16:34   ` Daniel Bristot de Oliveira
2019-10-03  5:50   ` Masami Hiramatsu
2019-08-27 18:06 ` [PATCH 3/3] x86/ftrace: Use text_poke() Peter Zijlstra
2019-10-02 16:35   ` Daniel Bristot de Oliveira [this message]
2019-10-02 18:21     ` Peter Zijlstra
2019-10-03 22:10       ` Steven Rostedt
2019-10-04  8:10         ` Daniel Bristot de Oliveira
2019-10-04 13:40           ` Steven Rostedt
2019-10-04 14:44             ` Daniel Bristot de Oliveira
2019-10-04 15:13               ` Steven Rostedt
2019-10-07  8:08           ` Peter Zijlstra
2019-10-11  7:01           ` Peter Zijlstra
2019-10-11  7:37             ` Daniel Bristot de Oliveira
2019-10-11 10:57               ` Peter Zijlstra
2019-10-11 13:11               ` Steven Rostedt
2019-10-04 11:22         ` Peter Zijlstra
2019-10-04 13:42           ` Steven Rostedt
2019-10-22  0:36             ` Alexei Starovoitov
2019-10-22  0:43               ` Steven Rostedt
2019-10-22  3:10                 ` Alexei Starovoitov
2019-10-22  3:16                   ` Steven Rostedt
2019-10-22  3:19                     ` Steven Rostedt
2019-10-22  4:05                       ` Alexei Starovoitov
2019-10-22 11:19                         ` Steven Rostedt
2019-10-22 13:44                           ` Steven Rostedt
2019-10-22 17:50                             ` Alexei Starovoitov
2019-10-22 18:10                               ` Steven Rostedt
2019-10-22 20:46                                 ` Alexei Starovoitov
2019-10-22 21:04                                   ` Steven Rostedt
2019-10-22 21:58                                     ` Alexei Starovoitov
2019-10-22 22:17                                       ` Steven Rostedt
2019-10-23  2:02                                         ` Steven Rostedt
2019-10-22 22:45                                       ` Andy Lutomirski
2019-10-22 23:21                                         ` Steven Rostedt
2019-10-22 23:49                                         ` Alexei Starovoitov
2019-10-23  4:20                                           ` Andy Lutomirski
2019-10-23  9:02                                             ` Peter Zijlstra
2019-10-23 16:23                                       ` Steven Rostedt
2019-10-23 17:42                                         ` Steven Rostedt
2019-10-23 19:34                                         ` Alexei Starovoitov
2019-10-23 20:08                                           ` Steven Rostedt
2019-10-23 22:36                                             ` Alexei Starovoitov
2019-10-22  3:55                     ` Alexei Starovoitov
2019-10-03  5:52     ` Masami Hiramatsu
2019-08-28  7:22 ` [PATCH 0/3] Rewrite x86/ftrace to use text_poke() Song Liu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=aaffb32f-6ca9-f9e3-9b1a-627125c563ed@redhat.com \
    --to=bristot@redhat.com \
    --cc=dave.hansen@linux.intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luto@kernel.org \
    --cc=mhiramat@kernel.org \
    --cc=nadav.amit@gmail.com \
    --cc=peterz@infradead.org \
    --cc=rostedt@goodmis.org \
    --cc=songliubraving@fb.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.