linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Thomas Gleixner <tglx@linutronix.de>
To: LKML <linux-kernel@vger.kernel.org>
Cc: x86@kernel.org, "Paul E. McKenney" <paulmck@kernel.org>,
	Andy Lutomirski <luto@kernel.org>,
	Alexandre Chartre <alexandre.chartre@oracle.com>,
	Frederic Weisbecker <frederic@kernel.org>,
	Paolo Bonzini <pbonzini@redhat.com>,
	Sean Christopherson <sean.j.christopherson@intel.com>,
	Masami Hiramatsu <mhiramat@kernel.org>,
	Petr Mladek <pmladek@suse.com>,
	Steven Rostedt <rostedt@goodmis.org>,
	Joel Fernandes <joel@joelfernandes.org>,
	Boris Ostrovsky <boris.ostrovsky@oracle.com>,
	Juergen Gross <jgross@suse.com>, Brian Gerst <brgerst@gmail.com>,
	Mathieu Desnoyers <mathieu.desnoyers@efficios.com>,
	Josh Poimboeuf <jpoimboe@redhat.com>,
	Will Deacon <will@kernel.org>,
	Tom Lendacky <thomas.lendacky@amd.com>,
	Wei Liu <wei.liu@kernel.org>,
	Michael Kelley <mikelley@microsoft.com>,
	Jason Chen CJ <jason.cj.chen@intel.com>,
	Zhao Yakui <yakui.zhao@intel.com>,
	"Peter Zijlstra (Intel)" <peterz@infradead.org>
Subject: [patch V5 13/38] x86/entry: Switch page fault exception to IDTENTRY_RAW
Date: Tue, 12 May 2020 23:01:12 +0200	[thread overview]
Message-ID: <20200512213810.713078890@linutronix.de> (raw)
In-Reply-To: 20200512210059.056244513@linutronix.de

Convert page fault exceptions to IDTENTRY_RAW:
  - Implement the C entry point with DEFINE_IDTENTRY_RAW
  - Add the CR2 read into the exception handler
  - Add the idtentry_enter/exit_cond_rcu() invocations in
    in the regular page fault handler and use the regular
    idtentry_enter/exit() for the async PF part.
  - Emit the ASM stub with DECLARE_IDTENTRY_RAW
  - Remove the ASM idtentry in 64bit
  - Remove the CR2 read from 64bit
  - Remove the open coded ASM entry code in 32bit
  - Fixup the XEN/PV code
  - Remove the old prototypes

No functional change.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/entry/entry_32.S       |   30 -----------------
 arch/x86/entry/entry_64.S       |   19 -----------
 arch/x86/include/asm/idtentry.h |    3 +
 arch/x86/include/asm/traps.h    |   11 ------
 arch/x86/kernel/idt.c           |    4 +-
 arch/x86/kernel/kvm.c           |   14 ++++----
 arch/x86/mm/fault.c             |   69 +++++++++++++++++++++++++++-------------
 arch/x86/xen/enlighten_pv.c     |    2 -
 arch/x86/xen/xen-asm_64.S       |    2 -
 9 files changed, 62 insertions(+), 92 deletions(-)

--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -1395,36 +1395,6 @@ BUILD_INTERRUPT3(hv_stimer0_callback_vec
 
 #endif /* CONFIG_HYPERV */
 
-SYM_CODE_START(page_fault)
-	ASM_CLAC
-	pushl	$do_page_fault
-	jmp	common_exception_read_cr2
-SYM_CODE_END(page_fault)
-
-SYM_CODE_START_LOCAL_NOALIGN(common_exception_read_cr2)
-	/* the function address is in %gs's slot on the stack */
-	SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1
-
-	ENCODE_FRAME_POINTER
-
-	/* fixup %gs */
-	GS_TO_REG %ecx
-	movl	PT_GS(%esp), %edi
-	REG_TO_PTGS %ecx
-	SET_KERNEL_GS %ecx
-
-	GET_CR2_INTO(%ecx)			# might clobber %eax
-
-	/* fixup orig %eax */
-	movl	PT_ORIG_EAX(%esp), %edx		# get the error code
-	movl	$-1, PT_ORIG_EAX(%esp)		# no syscall to restart
-
-	TRACE_IRQS_OFF
-	movl	%esp, %eax			# pt_regs pointer
-	CALL_NOSPEC edi
-	jmp	ret_from_exception
-SYM_CODE_END(common_exception_read_cr2)
-
 SYM_CODE_START_LOCAL_NOALIGN(common_exception)
 	/* the function address is in %gs's slot on the stack */
 	SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -507,15 +507,6 @@ SYM_CODE_END(spurious_entries_start)
 	call	error_entry
 	UNWIND_HINT_REGS
 
-	.if \vector == X86_TRAP_PF
-		/*
-		 * Store CR2 early so subsequent faults cannot clobber it. Use R12 as
-		 * intermediate storage as RDX can be clobbered in enter_from_user_mode().
-		 * GET_CR2_INTO can clobber RAX.
-		 */
-		GET_CR2_INTO(%r12);
-	.endif
-
 	.if \sane == 0
 	TRACE_IRQS_OFF
 
@@ -534,10 +525,6 @@ SYM_CODE_END(spurious_entries_start)
 		movq	$-1, ORIG_RAX(%rsp)	/* no syscall to restart */
 	.endif
 
-	.if \vector == X86_TRAP_PF
-		movq	%r12, %rdx		/* Move CR2 into 3rd argument */
-	.endif
-
 	call	\cfunc
 
 	.if \sane == 0
@@ -1060,12 +1047,6 @@ apicinterrupt IRQ_WORK_VECTOR			irq_work
 #endif
 
 /*
- * Exception entry points.
- */
-
-idtentry	X86_TRAP_PF		page_fault		do_page_fault			has_error_code=1
-
-/*
  * Reload gs selector with exception handling
  * edi:  new selector
  *
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -364,7 +364,8 @@ DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_GP,
 DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_AC,	exc_alignment_check);
 
 /* Raw exception entries which need extra work */
-DECLARE_IDTENTRY_RAW(X86_TRAP_BP,	exc_int3);
+DECLARE_IDTENTRY_RAW(X86_TRAP_BP,		exc_int3);
+DECLARE_IDTENTRY_RAW_ERRORCODE(X86_TRAP_PF,	exc_page_fault);
 
 #ifdef CONFIG_X86_MCE
 DECLARE_IDTENTRY_MCE(X86_TRAP_MC,	exc_machine_check);
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -9,17 +9,6 @@
 #include <asm/idtentry.h>
 #include <asm/siginfo.h>			/* TRAP_TRACE, ... */
 
-#define dotraplinkage __visible
-
-asmlinkage void page_fault(void);
-asmlinkage void async_page_fault(void);
-
-#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
-asmlinkage void xen_page_fault(void);
-#endif
-
-dotraplinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address);
-
 #ifdef CONFIG_X86_64
 asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs);
 asmlinkage __visible notrace
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -59,7 +59,7 @@ static const __initconst struct idt_data
 	INTG(X86_TRAP_DB,		asm_exc_debug),
 	SYSG(X86_TRAP_BP,		asm_exc_int3),
 #ifdef CONFIG_X86_32
-	INTG(X86_TRAP_PF,		page_fault),
+	INTG(X86_TRAP_PF,		asm_exc_page_fault),
 #endif
 };
 
@@ -153,7 +153,7 @@ static const __initconst struct idt_data
  * stacks work only after cpu_init().
  */
 static const __initconst struct idt_data early_pf_idts[] = {
-	INTG(X86_TRAP_PF,		page_fault),
+	INTG(X86_TRAP_PF,		asm_exc_page_fault),
 };
 
 /*
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -218,7 +218,7 @@ void kvm_async_pf_task_wake(u32 token)
 }
 EXPORT_SYMBOL_GPL(kvm_async_pf_task_wake);
 
-u32 kvm_read_and_reset_pf_reason(void)
+u32 noinstr kvm_read_and_reset_pf_reason(void)
 {
 	u32 reason = 0;
 
@@ -230,9 +230,8 @@ u32 kvm_read_and_reset_pf_reason(void)
 	return reason;
 }
 EXPORT_SYMBOL_GPL(kvm_read_and_reset_pf_reason);
-NOKPROBE_SYMBOL(kvm_read_and_reset_pf_reason);
 
-bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token)
+noinstr bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token)
 {
 	u32 reason = kvm_read_and_reset_pf_reason();
 
@@ -244,6 +243,9 @@ bool __kvm_handle_async_pf(struct pt_reg
 		return false;
 	}
 
+	idtentry_enter(regs);
+	instrumentation_begin();
+
 	/*
 	 * If the host managed to inject an async #PF into an interrupt
 	 * disabled region, then die hard as this is not going to end well
@@ -258,13 +260,13 @@ bool __kvm_handle_async_pf(struct pt_reg
 		/* Page is swapped out by the host. */
 		kvm_async_pf_task_wait_schedule(token);
 	} else {
-		rcu_irq_enter();
 		kvm_async_pf_task_wake(token);
-		rcu_irq_exit();
 	}
+
+	instrumentation_end();
+	idtentry_exit(regs);
 	return true;
 }
-NOKPROBE_SYMBOL(__kvm_handle_async_pf);
 
 static void __init paravirt_ops_setup(void)
 {
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1521,11 +1521,38 @@ trace_page_fault_entries(struct pt_regs
 		trace_page_fault_kernel(address, regs, error_code);
 }
 
-dotraplinkage void
-do_page_fault(struct pt_regs *regs, unsigned long hw_error_code,
-		unsigned long address)
+static __always_inline void
+handle_page_fault(struct pt_regs *regs, unsigned long error_code,
+			      unsigned long address)
 {
+	trace_page_fault_entries(regs, error_code, address);
+
+	if (unlikely(kmmio_fault(regs, address)))
+		return;
+
+	/* Was the fault on kernel-controlled part of the address space? */
+	if (unlikely(fault_in_kernel_space(address))) {
+		do_kern_addr_fault(regs, error_code, address);
+	} else {
+		do_user_addr_fault(regs, error_code, address);
+		/*
+		 * User address page fault handling might have reenabled
+		 * interrupts. Fixing up all potential exit points of
+		 * do_user_addr_fault() and its leaf functions is just not
+		 * doable w/o creating an unholy mess or turning the code
+		 * upside down.
+		 */
+		local_irq_disable();
+	}
+}
+
+DEFINE_IDTENTRY_RAW_ERRORCODE(exc_page_fault)
+{
+	unsigned long address = read_cr2();
+	bool rcu_exit;
+
 	prefetchw(&current->mm->mmap_sem);
+
 	/*
 	 * KVM has two types of events that are, logically, interrupts, but
 	 * are unfortunately delivered using the #PF vector.  These events are
@@ -1540,28 +1567,28 @@ do_page_fault(struct pt_regs *regs, unsi
 	 * getting values from real and async page faults mixed up.
 	 *
 	 * Fingers crossed.
+	 *
+	 * The async #PF handling code takes care of idtentry handling
+	 * itself.
 	 */
 	if (kvm_handle_async_pf(regs, (u32)address))
 		return;
 
-	trace_page_fault_entries(regs, hw_error_code, address);
+	/*
+	 * Entry handling for valid #PF from kernel mode is slightly
+	 * different: RCU is already watching and rcu_irq_enter() must not
+	 * be invoked because a kernel fault on a user space address might
+	 * sleep.
+	 *
+	 * In case the fault hit a RCU idle region the conditional entry
+	 * code reenabled RCU to avoid subsequent wreckage which helps
+	 * debugability.
+	 */
+	rcu_exit = idtentry_enter_cond_rcu(regs);
 
-	if (unlikely(kmmio_fault(regs, address)))
-		return;
+	instrumentation_begin();
+	handle_page_fault(regs, error_code, address);
+	instrumentation_end();
 
-	/* Was the fault on kernel-controlled part of the address space? */
-	if (unlikely(fault_in_kernel_space(address))) {
-		do_kern_addr_fault(regs, hw_error_code, address);
-	} else {
-		do_user_addr_fault(regs, hw_error_code, address);
-		/*
-		 * User address page fault handling might have reenabled
-		 * interrupts. Fixing up all potential exit points of
-		 * do_user_addr_fault() and its leaf functions is just not
-		 * doable w/o creating an unholy mess or turning the code
-		 * upside down.
-		 */
-		local_irq_disable();
-	}
+	idtentry_exit_cond_rcu(regs, rcu_exit);
 }
-NOKPROBE_SYMBOL(do_page_fault);
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -627,7 +627,7 @@ static struct trap_array_entry trap_arra
 #ifdef CONFIG_IA32_EMULATION
 	{ entry_INT80_compat,          xen_entry_INT80_compat,          false },
 #endif
-	{ page_fault,                  xen_page_fault,                  false },
+	TRAP_ENTRY(exc_page_fault,			false ),
 	TRAP_ENTRY(exc_divide_error,			false ),
 	TRAP_ENTRY(exc_bounds,				false ),
 	TRAP_ENTRY(exc_invalid_op,			false ),
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -43,7 +43,7 @@ xen_pv_trap asm_exc_invalid_tss
 xen_pv_trap asm_exc_segment_not_present
 xen_pv_trap asm_exc_stack_segment
 xen_pv_trap asm_exc_general_protection
-xen_pv_trap page_fault
+xen_pv_trap asm_exc_page_fault
 xen_pv_trap asm_exc_spurious_interrupt_bug
 xen_pv_trap asm_exc_coprocessor_error
 xen_pv_trap asm_exc_alignment_check


  parent reply	other threads:[~2020-05-12 22:23 UTC|newest]

Thread overview: 55+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-05-12 21:00 [patch V5 00/38] x86/entry: Entry/exception code rework - the leftovers Thomas Gleixner
2020-05-12 21:01 ` [patch V5 01/38] x86/kvm/svm: Use uninstrumented wrmsrl() to restore GS Thomas Gleixner
2020-05-13  7:11   ` Jürgen Groß
2020-05-12 21:01 ` [patch V5 02/38] x86/entry/64: Use native swapgs in asm_native_load_gs_index() Thomas Gleixner
2020-05-13  2:02   ` Steven Rostedt
2020-05-13  6:34     ` Thomas Gleixner
2020-05-13  7:12   ` Jürgen Groß
2020-05-19 19:58   ` [tip: x86/entry] x86/entry/64: Use native swapgs in asm_load_gs_index() tip-bot2 for Thomas Gleixner
2020-05-12 21:01 ` [patch V5 03/38] nmi, tracing: Provide nmi_enter/exit_notrace() Thomas Gleixner
2020-05-15  1:32   ` Steven Rostedt
2020-05-15  1:35     ` Steven Rostedt
2020-05-15  1:37       ` Steven Rostedt
2020-05-12 21:01 ` [patch V5 04/38] x86: Make hardware latency tracing explicit Thomas Gleixner
2020-05-15  1:43   ` Steven Rostedt
2020-05-15 15:08     ` Thomas Gleixner
2020-05-12 21:01 ` [patch V5 05/38] genirq: Provide irq_enter/exit_rcu() Thomas Gleixner
2020-05-12 21:01 ` [patch V5 06/38] x86/entry: Provide helpers for execute on irqstack Thomas Gleixner
2020-05-13 21:43   ` Josh Poimboeuf
2020-05-12 21:01 ` [patch V5 07/38] x86/entry/64: Move do_softirq_own_stack() to C Thomas Gleixner
2020-05-12 21:01 ` [patch V5 08/38] x86/entry: Split idtentry_enter/exit() Thomas Gleixner
2020-05-12 21:01 ` [patch V5 09/38] x86/entry: Switch XEN/PV hypercall entry to IDTENTRY Thomas Gleixner
2020-05-14 16:24   ` Boris Ostrovsky
2020-05-12 21:01 ` [patch V5 10/38] x86/entry/64: Simplify idtentry_body Thomas Gleixner
2020-05-12 21:01 ` [patch V5 11/38] rcu: Provide __rcu_is_watching() Thomas Gleixner
2020-05-19 19:52   ` [tip: core/rcu] " tip-bot2 for Thomas Gleixner
2020-05-12 21:01 ` [patch V5 12/38] x86/entry: Provide idtentry_entry/exit_cond_rcu() Thomas Gleixner
2020-05-12 21:01 ` Thomas Gleixner [this message]
2020-05-12 21:01 ` [patch V5 14/38] x86/entry: Remove the transition leftovers Thomas Gleixner
2020-05-12 21:01 ` [patch V5 15/38] x86/entry: Change exit path of xen_failsafe_callback Thomas Gleixner
2020-05-12 21:01 ` [patch V5 16/38] x86/entry/64: Remove error_exit Thomas Gleixner
2020-05-12 21:01 ` [patch V5 17/38] x86/entry/32: Remove common_exception Thomas Gleixner
2020-05-12 21:01 ` [patch V5 18/38] x86/irq: Use generic irq_regs implementation Thomas Gleixner
2020-05-12 21:01 ` [patch V5 19/38] x86/irq: Convey vector as argument and not in ptregs Thomas Gleixner
2020-05-12 21:01 ` [patch V5 20/38] x86/irq/64: Provide handle_irq() Thomas Gleixner
2020-05-12 21:01 ` [patch V5 21/38] x86/entry: Add IRQENTRY_IRQ macro Thomas Gleixner
2020-05-12 21:01 ` [patch V5 22/38] x86/entry: Use idtentry for interrupts Thomas Gleixner
2020-05-12 21:01 ` [patch V5 23/38] genirq: Provde __irq_enter/exit_raw() Thomas Gleixner
2020-05-12 21:01 ` [patch V5 24/38] x86/entry: Provide IDTENTRY_SYSVEC Thomas Gleixner
2020-05-15  0:16   ` Boris Ostrovsky
2020-05-15  8:52     ` Thomas Gleixner
2020-05-12 21:01 ` [patch V5 25/38] x86/entry: Convert APIC interrupts to IDTENTRY_SYSVEC Thomas Gleixner
2020-05-12 21:01 ` [patch V5 26/38] x86/entry: Convert SMP system vectors " Thomas Gleixner
2020-05-12 21:01 ` [patch V5 27/38] x86/entry: Convert various system vectors Thomas Gleixner
2020-05-12 21:01 ` [patch V5 28/38] x86/entry: Convert KVM vectors to IDTENTRY_SYSVEC Thomas Gleixner
2020-05-12 21:01 ` [patch V5 29/38] x86/entry: Convert various hypervisor " Thomas Gleixner
2020-05-12 21:01 ` [patch V5 30/38] x86/entry: Convert XEN hypercall vector " Thomas Gleixner
2020-05-12 21:01 ` [patch V5 31/38] x86/entry: Convert reschedule interrupt to IDTENTRY_SYSVEC_SIMPLE Thomas Gleixner
2020-05-12 21:01 ` [patch V5 32/38] x86/entry: Remove the apic/BUILD interrupt leftovers Thomas Gleixner
2020-05-12 21:01 ` [patch V5 33/38] x86/entry/64: Remove IRQ stack switching ASM Thomas Gleixner
2020-05-12 21:01 ` [patch V5 34/38] x86/entry: Make enter_from_user_mode() static Thomas Gleixner
2020-05-12 21:01 ` [patch V5 35/38] x86/entry/32: Remove redundant irq disable code Thomas Gleixner
2020-05-12 21:01 ` [patch V5 36/38] x86/entry/64: Remove TRACE_IRQS_*_DEBUG Thomas Gleixner
2020-05-12 21:01 ` [patch V5 37/38] x86/entry: Move paranoid irq tracing out of ASM code Thomas Gleixner
2020-05-12 21:01 ` [patch V5 38/38] x86/entry: Remove the TRACE_IRQS cruft Thomas Gleixner
2020-05-14 16:35 ` [patch V5 00/38] x86/entry: Entry/exception code rework - the leftovers Paul E. McKenney

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200512213810.713078890@linutronix.de \
    --to=tglx@linutronix.de \
    --cc=alexandre.chartre@oracle.com \
    --cc=boris.ostrovsky@oracle.com \
    --cc=brgerst@gmail.com \
    --cc=frederic@kernel.org \
    --cc=jason.cj.chen@intel.com \
    --cc=jgross@suse.com \
    --cc=joel@joelfernandes.org \
    --cc=jpoimboe@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luto@kernel.org \
    --cc=mathieu.desnoyers@efficios.com \
    --cc=mhiramat@kernel.org \
    --cc=mikelley@microsoft.com \
    --cc=paulmck@kernel.org \
    --cc=pbonzini@redhat.com \
    --cc=peterz@infradead.org \
    --cc=pmladek@suse.com \
    --cc=rostedt@goodmis.org \
    --cc=sean.j.christopherson@intel.com \
    --cc=thomas.lendacky@amd.com \
    --cc=wei.liu@kernel.org \
    --cc=will@kernel.org \
    --cc=x86@kernel.org \
    --cc=yakui.zhao@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).