All of lore.kernel.org
 help / color / mirror / Atom feed
From: Alexandre Chartre <alexandre.chartre@oracle.com>
To: Thomas Gleixner <tglx@linutronix.de>,
	LKML <linux-kernel@vger.kernel.org>
Cc: x86@kernel.org, "Paul E. McKenney" <paulmck@kernel.org>,
	Andy Lutomirski <luto@kernel.org>,
	Frederic Weisbecker <frederic@kernel.org>,
	Paolo Bonzini <pbonzini@redhat.com>,
	Sean Christopherson <sean.j.christopherson@intel.com>,
	Masami Hiramatsu <mhiramat@kernel.org>,
	Petr Mladek <pmladek@suse.com>,
	Steven Rostedt <rostedt@goodmis.org>,
	Joel Fernandes <joel@joelfernandes.org>,
	Boris Ostrovsky <boris.ostrovsky@oracle.com>,
	Juergen Gross <jgross@suse.com>, Brian Gerst <brgerst@gmail.com>,
	Mathieu Desnoyers <mathieu.desnoyers@efficios.com>,
	Josh Poimboeuf <jpoimboe@redhat.com>,
	Will Deacon <will@kernel.org>
Subject: Re: [patch V4 part 2 04/18] x86/entry/common: Protect against instrumentation
Date: Thu, 7 May 2020 15:39:20 +0200	[thread overview]
Message-ID: <87976f9b-2c36-b4f7-5382-4eba569cc687@oracle.com> (raw)
In-Reply-To: <20200505134340.520277507@linutronix.de>


On 5/5/20 3:41 PM, Thomas Gleixner wrote:
> Mark the various syscall entries with noinstr to protect them against
> instrumentation and add the noinstr_begin()/end() annotations to mark the
> parts of the functions which are safe to call out into instrumentable code.
> 
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> ---
>   arch/x86/entry/common.c |  135 ++++++++++++++++++++++++++++++++----------------
>   1 file changed, 90 insertions(+), 45 deletions(-)
> 
> --- a/arch/x86/entry/common.c
> +++ b/arch/x86/entry/common.c
> @@ -41,15 +41,26 @@
>   
>   #ifdef CONFIG_CONTEXT_TRACKING
>   /* Called on entry from user mode with IRQs off. */
> -__visible inline noinstr void enter_from_user_mode(void)
> +__visible noinstr void enter_from_user_mode(void)
>   {
> -	CT_WARN_ON(ct_state() != CONTEXT_USER);
> +	enum ctx_state state = ct_state();
> +
>   	user_exit_irqoff();
> +
> +	instr_begin();
> +	CT_WARN_ON(state != CONTEXT_USER);
> +	instr_end();
>   }
>   #else
>   static inline void enter_from_user_mode(void) {}
>   #endif
>   
> +static noinstr void exit_to_user_mode(void)
> +{
> +	user_enter_irqoff();
> +	mds_user_clear_cpu_buffers();
> +}
> +
>   static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch)
>   {
>   #ifdef CONFIG_X86_64
> @@ -179,8 +190,7 @@ static void exit_to_usermode_loop(struct
>   	}
>   }
>   
> -/* Called with IRQs disabled. */
> -__visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
> +static void __prepare_exit_to_usermode(struct pt_regs *regs)
>   {
>   	struct thread_info *ti = current_thread_info();
>   	u32 cached_flags;
> @@ -219,10 +229,14 @@ static void exit_to_usermode_loop(struct
>   	 */
>   	ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED);
>   #endif
> +}
>   
> -	user_enter_irqoff();
> -
> -	mds_user_clear_cpu_buffers();
> +__visible noinstr void prepare_exit_to_usermode(struct pt_regs *regs)
> +{
> +	instr_begin();
> +	__prepare_exit_to_usermode(regs);
> +	instr_end();
> +	exit_to_user_mode();
>   }
>   
>   #define SYSCALL_EXIT_WORK_FLAGS				\
> @@ -251,11 +265,7 @@ static void syscall_slow_exit_work(struc
>   		tracehook_report_syscall_exit(regs, step);
>   }
>   
> -/*
> - * Called with IRQs on and fully valid regs.  Returns with IRQs off in a
> - * state such that we can immediately switch to user mode.
> - */
> -__visible inline void syscall_return_slowpath(struct pt_regs *regs)
> +static void __syscall_return_slowpath(struct pt_regs *regs)
>   {
>   	struct thread_info *ti = current_thread_info();
>   	u32 cached_flags = READ_ONCE(ti->flags);
> @@ -276,15 +286,29 @@ static void syscall_slow_exit_work(struc
>   		syscall_slow_exit_work(regs, cached_flags);
>   
>   	local_irq_disable();
> -	prepare_exit_to_usermode(regs);
> +	__prepare_exit_to_usermode(regs);
> +}
> +
> +/*
> + * Called with IRQs on and fully valid regs.  Returns with IRQs off in a
> + * state such that we can immediately switch to user mode.
> + */
> +__visible noinstr void syscall_return_slowpath(struct pt_regs *regs)
> +{
> +	instr_begin();
> +	__syscall_return_slowpath(regs);
> +	instr_end();
> +	exit_to_user_mode();
>   }
>   
>   #ifdef CONFIG_X86_64
> -__visible void do_syscall_64(unsigned long nr, struct pt_regs *regs)
> +__visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs)
>   {
>   	struct thread_info *ti;
>   
>   	enter_from_user_mode();
> +	instr_begin();
> +
>   	local_irq_enable();
>   	ti = current_thread_info();
>   	if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY)
> @@ -301,8 +325,10 @@ static void syscall_slow_exit_work(struc
>   		regs->ax = x32_sys_call_table[nr](regs);
>   #endif
>   	}
> +	__syscall_return_slowpath(regs);
>   
> -	syscall_return_slowpath(regs);
> +	instr_end();
> +	exit_to_user_mode();
>   }
>   #endif
>   
> @@ -310,10 +336,10 @@ static void syscall_slow_exit_work(struc
>   /*
>    * Does a 32-bit syscall.  Called with IRQs on in CONTEXT_KERNEL.  Does
>    * all entry and exit work and returns with IRQs off.  This function is
> - * extremely hot in workloads that use it, and it's usually called from
> + * ex2tremely hot in workloads that use it, and it's usually called from

typo: "ex2tremely"

alex.


>    * do_fast_syscall_32, so forcibly inline it to improve performance.
>    */
> -static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
> +static void do_syscall_32_irqs_on(struct pt_regs *regs)
>   {
>   	struct thread_info *ti = current_thread_info();
>   	unsigned int nr = (unsigned int)regs->orig_ax;
> @@ -337,27 +363,62 @@ static __always_inline void do_syscall_3
>   		regs->ax = ia32_sys_call_table[nr](regs);
>   	}
>   
> -	syscall_return_slowpath(regs);
> +	__syscall_return_slowpath(regs);
>   }
>   
>   /* Handles int $0x80 */
> -__visible void do_int80_syscall_32(struct pt_regs *regs)
> +__visible noinstr void do_int80_syscall_32(struct pt_regs *regs)
>   {
>   	enter_from_user_mode();
> +	instr_begin();
> +
>   	local_irq_enable();
>   	do_syscall_32_irqs_on(regs);
> +
> +	instr_end();
> +	exit_to_user_mode();
> +}
> +
> +static bool __do_fast_syscall_32(struct pt_regs *regs)
> +{
> +	int res;
> +
> +	/* Fetch EBP from where the vDSO stashed it. */
> +	if (IS_ENABLED(CONFIG_X86_64)) {
> +		/*
> +		 * Micro-optimization: the pointer we're following is
> +		 * explicitly 32 bits, so it can't be out of range.
> +		 */
> +		res = __get_user(*(u32 *)&regs->bp,
> +			 (u32 __user __force *)(unsigned long)(u32)regs->sp);
> +	} else {
> +		res = get_user(*(u32 *)&regs->bp,
> +		       (u32 __user __force *)(unsigned long)(u32)regs->sp);
> +	}
> +
> +	if (res) {
> +		/* User code screwed up. */
> +		regs->ax = -EFAULT;
> +		local_irq_disable();
> +		__prepare_exit_to_usermode(regs);
> +		return false;
> +	}
> +
> +	/* Now this is just like a normal syscall. */
> +	do_syscall_32_irqs_on(regs);
> +	return true;
>   }
>   
>   /* Returns 0 to return using IRET or 1 to return using SYSEXIT/SYSRETL. */
> -__visible long do_fast_syscall_32(struct pt_regs *regs)
> +__visible noinstr long do_fast_syscall_32(struct pt_regs *regs)
>   {
>   	/*
>   	 * Called using the internal vDSO SYSENTER/SYSCALL32 calling
>   	 * convention.  Adjust regs so it looks like we entered using int80.
>   	 */
> -
>   	unsigned long landing_pad = (unsigned long)current->mm->context.vdso +
> -		vdso_image_32.sym_int80_landing_pad;
> +					vdso_image_32.sym_int80_landing_pad;
> +	bool success;
>   
>   	/*
>   	 * SYSENTER loses EIP, and even SYSCALL32 needs us to skip forward
> @@ -367,33 +428,17 @@ static __always_inline void do_syscall_3
>   	regs->ip = landing_pad;
>   
>   	enter_from_user_mode();
> +	instr_begin();
>   
>   	local_irq_enable();
> +	success = __do_fast_syscall_32(regs);
>   
> -	/* Fetch EBP from where the vDSO stashed it. */
> -	if (
> -#ifdef CONFIG_X86_64
> -		/*
> -		 * Micro-optimization: the pointer we're following is explicitly
> -		 * 32 bits, so it can't be out of range.
> -		 */
> -		__get_user(*(u32 *)&regs->bp,
> -			    (u32 __user __force *)(unsigned long)(u32)regs->sp)
> -#else
> -		get_user(*(u32 *)&regs->bp,
> -			 (u32 __user __force *)(unsigned long)(u32)regs->sp)
> -#endif
> -		) {
> -
> -		/* User code screwed up. */
> -		local_irq_disable();
> -		regs->ax = -EFAULT;
> -		prepare_exit_to_usermode(regs);
> -		return 0;	/* Keep it simple: use IRET. */
> -	}
> +	instr_end();
> +	exit_to_user_mode();
>   
> -	/* Now this is just like a normal syscall. */
> -	do_syscall_32_irqs_on(regs);
> +	/* If it failed, keep it simple: use IRET. */
> +	if (!success)
> +		return 0;
>   
>   #ifdef CONFIG_X86_64
>   	/*
> 

  reply	other threads:[~2020-05-07 13:41 UTC|newest]

Thread overview: 69+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-05-05 13:41 [patch V4 part 2 00/18] x86/entry: Entry/exception code rework, syscall and KVM changes Thomas Gleixner
2020-05-05 13:41 ` [patch V4 part 2 01/18] x86/entry/64: Move non entry code into .text section Thomas Gleixner
2020-05-06 15:51   ` Peter Zijlstra
2020-05-08  1:31   ` Steven Rostedt
2020-05-08 23:53   ` Andy Lutomirski
2020-05-10 13:39     ` Thomas Gleixner
2020-05-19 19:58   ` [tip: x86/entry] " tip-bot2 for Thomas Gleixner
2020-05-05 13:41 ` [patch V4 part 2 02/18] x86/entry/32: " Thomas Gleixner
2020-05-07 13:15   ` Alexandre Chartre
2020-05-07 14:14     ` Thomas Gleixner
2020-05-19 19:58   ` [tip: x86/entry] " tip-bot2 for Thomas Gleixner
2020-05-05 13:41 ` [patch V4 part 2 03/18] x86/entry: Mark enter_from_user_mode() noinstr Thomas Gleixner
2020-05-08  8:21   ` Masami Hiramatsu
2020-05-19 19:58   ` [tip: x86/entry] " tip-bot2 for Thomas Gleixner
2020-05-05 13:41 ` [patch V4 part 2 04/18] x86/entry/common: Protect against instrumentation Thomas Gleixner
2020-05-07 13:39   ` Alexandre Chartre [this message]
2020-05-07 14:13     ` Thomas Gleixner
2020-05-19 19:58   ` [tip: x86/entry] " tip-bot2 for Thomas Gleixner
2020-05-05 13:41 ` [patch V4 part 2 05/18] x86/entry: Move irq tracing on syscall entry to C-code Thomas Gleixner
2020-05-07 13:55   ` Alexandre Chartre
2020-05-07 14:10     ` Thomas Gleixner
2020-05-07 15:03       ` Thomas Gleixner
2020-05-07 17:06         ` Thomas Gleixner
2020-05-19 19:58   ` [tip: x86/entry] " tip-bot2 for Thomas Gleixner
2020-05-05 13:41 ` [patch V4 part 2 06/18] x86/entry: Move irq flags tracing to prepare_exit_to_usermode() Thomas Gleixner
2020-05-08 23:57   ` Andy Lutomirski
2020-05-09 10:16     ` Thomas Gleixner
2020-05-19 19:58   ` [tip: x86/entry] " tip-bot2 for Thomas Gleixner
2020-05-05 13:41 ` [patch V4 part 2 07/18] context_tracking: Ensure that the critical path cannot be instrumented Thomas Gleixner
2020-05-08  8:23   ` Masami Hiramatsu
2020-05-19 19:58   ` [tip: x86/entry] " tip-bot2 for Thomas Gleixner
2020-05-05 13:41 ` [patch V4 part 2 08/18] lib/smp_processor_id: Move it into noinstr section Thomas Gleixner
2020-05-19 19:58   ` [tip: x86/entry] x86/speculation/mds: Mark mds_user_clear_cpu_buffers() __always_inline tip-bot2 for Thomas Gleixner
2020-05-19 19:58   ` [tip: x86/entry] lib/smp_processor_id: Move it into noinstr section tip-bot2 for Thomas Gleixner
2020-05-05 13:41 ` [patch V4 part 2 09/18] x86/speculation/mds: Mark mds_user_clear_cpu_buffers() __always_inline Thomas Gleixner
2020-05-05 13:41 ` [patch V4 part 2 10/18] x86/entry/64: Check IF in __preempt_enable_notrace() thunk Thomas Gleixner
2020-05-07 14:15   ` Alexandre Chartre
2020-05-09  0:10   ` Andy Lutomirski
2020-05-09 10:25     ` Thomas Gleixner
2020-05-10 18:47       ` Thomas Gleixner
2020-05-11 18:27         ` Thomas Gleixner
2020-05-12  1:48     ` Steven Rostedt
2020-05-12  1:51   ` Steven Rostedt
2020-05-12  8:14     ` Thomas Gleixner
2020-05-05 13:41 ` [patch V4 part 2 11/18] x86/entry/64: Mark ___preempt_schedule_notrace() thunk noinstr Thomas Gleixner
2020-05-05 13:41 ` [patch V4 part 2 12/18] x86,objtool: Make entry_64_compat.S objtool clean Thomas Gleixner
2020-05-09  0:11   ` Andy Lutomirski
2020-05-09 10:06     ` Thomas Gleixner
2020-05-19 19:58   ` [tip: x86/entry] x86/entry: " tip-bot2 for Peter Zijlstra
2020-05-05 13:41 ` [patch V4 part 2 13/18] x86/kvm: Move context tracking where it belongs Thomas Gleixner
2020-05-06  7:42   ` Paolo Bonzini
2020-05-09  0:14   ` Andy Lutomirski
2020-05-09 10:12     ` Thomas Gleixner
2020-05-05 13:41 ` [patch V4 part 2 14/18] x86/kvm/vmx: Add hardirq tracing to guest enter/exit Thomas Gleixner
2020-05-06  7:55   ` Paolo Bonzini
2020-05-05 13:41 ` [patch V4 part 2 15/18] x86/kvm/svm: Handle hardirqs proper on " Thomas Gleixner
2020-05-06  8:15   ` Paolo Bonzini
2020-05-06  8:48     ` Thomas Gleixner
2020-05-06  9:21       ` Paolo Bonzini
2020-05-07 14:44         ` [patch V5 " Thomas Gleixner
2020-05-08 13:45           ` Paolo Bonzini
2020-05-08 14:01             ` Thomas Gleixner
2020-05-05 13:41 ` [patch V4 part 2 16/18] context_tracking: Make guest_enter/exit() .noinstr ready Thomas Gleixner
2020-05-19 19:58   ` [tip: x86/entry] " tip-bot2 for Thomas Gleixner
2020-05-05 13:41 ` [patch V4 part 2 17/18] x86/kvm/vmx: Move guest enter/exit into .noinstr.text Thomas Gleixner
2020-05-06  8:17   ` Paolo Bonzini
2020-05-05 13:41 ` [patch V4 part 2 18/18] x86/kvm/svm: " Thomas Gleixner
2020-05-06  8:17   ` Paolo Bonzini
2020-05-07 14:47   ` Alexandre Chartre

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87976f9b-2c36-b4f7-5382-4eba569cc687@oracle.com \
    --to=alexandre.chartre@oracle.com \
    --cc=boris.ostrovsky@oracle.com \
    --cc=brgerst@gmail.com \
    --cc=frederic@kernel.org \
    --cc=jgross@suse.com \
    --cc=joel@joelfernandes.org \
    --cc=jpoimboe@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luto@kernel.org \
    --cc=mathieu.desnoyers@efficios.com \
    --cc=mhiramat@kernel.org \
    --cc=paulmck@kernel.org \
    --cc=pbonzini@redhat.com \
    --cc=pmladek@suse.com \
    --cc=rostedt@goodmis.org \
    --cc=sean.j.christopherson@intel.com \
    --cc=tglx@linutronix.de \
    --cc=will@kernel.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.