All of lore.kernel.org
 help / color / mirror / Atom feed
From: Thomas Gleixner <tglx@linutronix.de>
To: LKML <linux-kernel@vger.kernel.org>
Cc: x86@kernel.org, "Paul E. McKenney" <paulmck@kernel.org>,
	Andy Lutomirski <luto@kernel.org>,
	Alexandre Chartre <alexandre.chartre@oracle.com>,
	Frederic Weisbecker <frederic@kernel.org>,
	Paolo Bonzini <pbonzini@redhat.com>,
	Sean Christopherson <sean.j.christopherson@intel.com>,
	Masami Hiramatsu <mhiramat@kernel.org>,
	Petr Mladek <pmladek@suse.com>,
	Steven Rostedt <rostedt@goodmis.org>,
	Joel Fernandes <joel@joelfernandes.org>,
	Boris Ostrovsky <boris.ostrovsky@oracle.com>,
	Juergen Gross <jgross@suse.com>, Brian Gerst <brgerst@gmail.com>,
	Mathieu Desnoyers <mathieu.desnoyers@efficios.com>,
	Josh Poimboeuf <jpoimboe@redhat.com>,
	Will Deacon <will@kernel.org>,
	Tom Lendacky <thomas.lendacky@amd.com>,
	Wei Liu <wei.liu@kernel.org>,
	Michael Kelley <mikelley@microsoft.com>,
	Jason Chen CJ <jason.cj.chen@intel.com>,
	Zhao Yakui <yakui.zhao@intel.com>,
	"Peter Zijlstra (Intel)" <peterz@infradead.org>
Subject: [patch V5 13/38] x86/entry: Switch page fault exception to IDTENTRY_RAW
Date: Tue, 12 May 2020 23:01:12 +0200	[thread overview]
Message-ID: <20200512213810.713078890@linutronix.de> (raw)
In-Reply-To: 20200512210059.056244513@linutronix.de

Convert page fault exceptions to IDTENTRY_RAW:
  - Implement the C entry point with DEFINE_IDTENTRY_RAW
  - Add the CR2 read into the exception handler
  - Add the idtentry_enter/exit_cond_rcu() invocations in
    in the regular page fault handler and use the regular
    idtentry_enter/exit() for the async PF part.
  - Emit the ASM stub with DECLARE_IDTENTRY_RAW
  - Remove the ASM idtentry in 64bit
  - Remove the CR2 read from 64bit
  - Remove the open coded ASM entry code in 32bit
  - Fixup the XEN/PV code
  - Remove the old prototypes

No functional change.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/entry/entry_32.S       |   30 -----------------
 arch/x86/entry/entry_64.S       |   19 -----------
 arch/x86/include/asm/idtentry.h |    3 +
 arch/x86/include/asm/traps.h    |   11 ------
 arch/x86/kernel/idt.c           |    4 +-
 arch/x86/kernel/kvm.c           |   14 ++++----
 arch/x86/mm/fault.c             |   69 +++++++++++++++++++++++++++-------------
 arch/x86/xen/enlighten_pv.c     |    2 -
 arch/x86/xen/xen-asm_64.S       |    2 -
 9 files changed, 62 insertions(+), 92 deletions(-)

--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -1395,36 +1395,6 @@ BUILD_INTERRUPT3(hv_stimer0_callback_vec
 
 #endif /* CONFIG_HYPERV */
 
-SYM_CODE_START(page_fault)
-	ASM_CLAC
-	pushl	$do_page_fault
-	jmp	common_exception_read_cr2
-SYM_CODE_END(page_fault)
-
-SYM_CODE_START_LOCAL_NOALIGN(common_exception_read_cr2)
-	/* the function address is in %gs's slot on the stack */
-	SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1
-
-	ENCODE_FRAME_POINTER
-
-	/* fixup %gs */
-	GS_TO_REG %ecx
-	movl	PT_GS(%esp), %edi
-	REG_TO_PTGS %ecx
-	SET_KERNEL_GS %ecx
-
-	GET_CR2_INTO(%ecx)			# might clobber %eax
-
-	/* fixup orig %eax */
-	movl	PT_ORIG_EAX(%esp), %edx		# get the error code
-	movl	$-1, PT_ORIG_EAX(%esp)		# no syscall to restart
-
-	TRACE_IRQS_OFF
-	movl	%esp, %eax			# pt_regs pointer
-	CALL_NOSPEC edi
-	jmp	ret_from_exception
-SYM_CODE_END(common_exception_read_cr2)
-
 SYM_CODE_START_LOCAL_NOALIGN(common_exception)
 	/* the function address is in %gs's slot on the stack */
 	SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -507,15 +507,6 @@ SYM_CODE_END(spurious_entries_start)
 	call	error_entry
 	UNWIND_HINT_REGS
 
-	.if \vector == X86_TRAP_PF
-		/*
-		 * Store CR2 early so subsequent faults cannot clobber it. Use R12 as
-		 * intermediate storage as RDX can be clobbered in enter_from_user_mode().
-		 * GET_CR2_INTO can clobber RAX.
-		 */
-		GET_CR2_INTO(%r12);
-	.endif
-
 	.if \sane == 0
 	TRACE_IRQS_OFF
 
@@ -534,10 +525,6 @@ SYM_CODE_END(spurious_entries_start)
 		movq	$-1, ORIG_RAX(%rsp)	/* no syscall to restart */
 	.endif
 
-	.if \vector == X86_TRAP_PF
-		movq	%r12, %rdx		/* Move CR2 into 3rd argument */
-	.endif
-
 	call	\cfunc
 
 	.if \sane == 0
@@ -1060,12 +1047,6 @@ apicinterrupt IRQ_WORK_VECTOR			irq_work
 #endif
 
 /*
- * Exception entry points.
- */
-
-idtentry	X86_TRAP_PF		page_fault		do_page_fault			has_error_code=1
-
-/*
  * Reload gs selector with exception handling
  * edi:  new selector
  *
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -364,7 +364,8 @@ DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_GP,
 DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_AC,	exc_alignment_check);
 
 /* Raw exception entries which need extra work */
-DECLARE_IDTENTRY_RAW(X86_TRAP_BP,	exc_int3);
+DECLARE_IDTENTRY_RAW(X86_TRAP_BP,		exc_int3);
+DECLARE_IDTENTRY_RAW_ERRORCODE(X86_TRAP_PF,	exc_page_fault);
 
 #ifdef CONFIG_X86_MCE
 DECLARE_IDTENTRY_MCE(X86_TRAP_MC,	exc_machine_check);
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -9,17 +9,6 @@
 #include <asm/idtentry.h>
 #include <asm/siginfo.h>			/* TRAP_TRACE, ... */
 
-#define dotraplinkage __visible
-
-asmlinkage void page_fault(void);
-asmlinkage void async_page_fault(void);
-
-#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
-asmlinkage void xen_page_fault(void);
-#endif
-
-dotraplinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address);
-
 #ifdef CONFIG_X86_64
 asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs);
 asmlinkage __visible notrace
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -59,7 +59,7 @@ static const __initconst struct idt_data
 	INTG(X86_TRAP_DB,		asm_exc_debug),
 	SYSG(X86_TRAP_BP,		asm_exc_int3),
 #ifdef CONFIG_X86_32
-	INTG(X86_TRAP_PF,		page_fault),
+	INTG(X86_TRAP_PF,		asm_exc_page_fault),
 #endif
 };
 
@@ -153,7 +153,7 @@ static const __initconst struct idt_data
  * stacks work only after cpu_init().
  */
 static const __initconst struct idt_data early_pf_idts[] = {
-	INTG(X86_TRAP_PF,		page_fault),
+	INTG(X86_TRAP_PF,		asm_exc_page_fault),
 };
 
 /*
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -218,7 +218,7 @@ void kvm_async_pf_task_wake(u32 token)
 }
 EXPORT_SYMBOL_GPL(kvm_async_pf_task_wake);
 
-u32 kvm_read_and_reset_pf_reason(void)
+u32 noinstr kvm_read_and_reset_pf_reason(void)
 {
 	u32 reason = 0;
 
@@ -230,9 +230,8 @@ u32 kvm_read_and_reset_pf_reason(void)
 	return reason;
 }
 EXPORT_SYMBOL_GPL(kvm_read_and_reset_pf_reason);
-NOKPROBE_SYMBOL(kvm_read_and_reset_pf_reason);
 
-bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token)
+noinstr bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token)
 {
 	u32 reason = kvm_read_and_reset_pf_reason();
 
@@ -244,6 +243,9 @@ bool __kvm_handle_async_pf(struct pt_reg
 		return false;
 	}
 
+	idtentry_enter(regs);
+	instrumentation_begin();
+
 	/*
 	 * If the host managed to inject an async #PF into an interrupt
 	 * disabled region, then die hard as this is not going to end well
@@ -258,13 +260,13 @@ bool __kvm_handle_async_pf(struct pt_reg
 		/* Page is swapped out by the host. */
 		kvm_async_pf_task_wait_schedule(token);
 	} else {
-		rcu_irq_enter();
 		kvm_async_pf_task_wake(token);
-		rcu_irq_exit();
 	}
+
+	instrumentation_end();
+	idtentry_exit(regs);
 	return true;
 }
-NOKPROBE_SYMBOL(__kvm_handle_async_pf);
 
 static void __init paravirt_ops_setup(void)
 {
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1521,11 +1521,38 @@ trace_page_fault_entries(struct pt_regs
 		trace_page_fault_kernel(address, regs, error_code);
 }
 
-dotraplinkage void
-do_page_fault(struct pt_regs *regs, unsigned long hw_error_code,
-		unsigned long address)
+static __always_inline void
+handle_page_fault(struct pt_regs *regs, unsigned long error_code,
+			      unsigned long address)
 {
+	trace_page_fault_entries(regs, error_code, address);
+
+	if (unlikely(kmmio_fault(regs, address)))
+		return;
+
+	/* Was the fault on kernel-controlled part of the address space? */
+	if (unlikely(fault_in_kernel_space(address))) {
+		do_kern_addr_fault(regs, error_code, address);
+	} else {
+		do_user_addr_fault(regs, error_code, address);
+		/*
+		 * User address page fault handling might have reenabled
+		 * interrupts. Fixing up all potential exit points of
+		 * do_user_addr_fault() and its leaf functions is just not
+		 * doable w/o creating an unholy mess or turning the code
+		 * upside down.
+		 */
+		local_irq_disable();
+	}
+}
+
+DEFINE_IDTENTRY_RAW_ERRORCODE(exc_page_fault)
+{
+	unsigned long address = read_cr2();
+	bool rcu_exit;
+
 	prefetchw(&current->mm->mmap_sem);
+
 	/*
 	 * KVM has two types of events that are, logically, interrupts, but
 	 * are unfortunately delivered using the #PF vector.  These events are
@@ -1540,28 +1567,28 @@ do_page_fault(struct pt_regs *regs, unsi
 	 * getting values from real and async page faults mixed up.
 	 *
 	 * Fingers crossed.
+	 *
+	 * The async #PF handling code takes care of idtentry handling
+	 * itself.
 	 */
 	if (kvm_handle_async_pf(regs, (u32)address))
 		return;
 
-	trace_page_fault_entries(regs, hw_error_code, address);
+	/*
+	 * Entry handling for valid #PF from kernel mode is slightly
+	 * different: RCU is already watching and rcu_irq_enter() must not
+	 * be invoked because a kernel fault on a user space address might
+	 * sleep.
+	 *
+	 * In case the fault hit a RCU idle region the conditional entry
+	 * code reenabled RCU to avoid subsequent wreckage which helps
+	 * debugability.
+	 */
+	rcu_exit = idtentry_enter_cond_rcu(regs);
 
-	if (unlikely(kmmio_fault(regs, address)))
-		return;
+	instrumentation_begin();
+	handle_page_fault(regs, error_code, address);
+	instrumentation_end();
 
-	/* Was the fault on kernel-controlled part of the address space? */
-	if (unlikely(fault_in_kernel_space(address))) {
-		do_kern_addr_fault(regs, hw_error_code, address);
-	} else {
-		do_user_addr_fault(regs, hw_error_code, address);
-		/*
-		 * User address page fault handling might have reenabled
-		 * interrupts. Fixing up all potential exit points of
-		 * do_user_addr_fault() and its leaf functions is just not
-		 * doable w/o creating an unholy mess or turning the code
-		 * upside down.
-		 */
-		local_irq_disable();
-	}
+	idtentry_exit_cond_rcu(regs, rcu_exit);
 }
-NOKPROBE_SYMBOL(do_page_fault);
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -627,7 +627,7 @@ static struct trap_array_entry trap_arra
 #ifdef CONFIG_IA32_EMULATION
 	{ entry_INT80_compat,          xen_entry_INT80_compat,          false },
 #endif
-	{ page_fault,                  xen_page_fault,                  false },
+	TRAP_ENTRY(exc_page_fault,			false ),
 	TRAP_ENTRY(exc_divide_error,			false ),
 	TRAP_ENTRY(exc_bounds,				false ),
 	TRAP_ENTRY(exc_invalid_op,			false ),
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -43,7 +43,7 @@ xen_pv_trap asm_exc_invalid_tss
 xen_pv_trap asm_exc_segment_not_present
 xen_pv_trap asm_exc_stack_segment
 xen_pv_trap asm_exc_general_protection
-xen_pv_trap page_fault
+xen_pv_trap asm_exc_page_fault
 xen_pv_trap asm_exc_spurious_interrupt_bug
 xen_pv_trap asm_exc_coprocessor_error
 xen_pv_trap asm_exc_alignment_check


  parent reply	other threads:[~2020-05-12 22:23 UTC|newest]

Thread overview: 55+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-05-12 21:00 [patch V5 00/38] x86/entry: Entry/exception code rework - the leftovers Thomas Gleixner
2020-05-12 21:01 ` [patch V5 01/38] x86/kvm/svm: Use uninstrumented wrmsrl() to restore GS Thomas Gleixner
2020-05-13  7:11   ` Jürgen Groß
2020-05-12 21:01 ` [patch V5 02/38] x86/entry/64: Use native swapgs in asm_native_load_gs_index() Thomas Gleixner
2020-05-13  2:02   ` Steven Rostedt
2020-05-13  6:34     ` Thomas Gleixner
2020-05-13  7:12   ` Jürgen Groß
2020-05-19 19:58   ` [tip: x86/entry] x86/entry/64: Use native swapgs in asm_load_gs_index() tip-bot2 for Thomas Gleixner
2020-05-12 21:01 ` [patch V5 03/38] nmi, tracing: Provide nmi_enter/exit_notrace() Thomas Gleixner
2020-05-15  1:32   ` Steven Rostedt
2020-05-15  1:35     ` Steven Rostedt
2020-05-15  1:37       ` Steven Rostedt
2020-05-12 21:01 ` [patch V5 04/38] x86: Make hardware latency tracing explicit Thomas Gleixner
2020-05-15  1:43   ` Steven Rostedt
2020-05-15 15:08     ` Thomas Gleixner
2020-05-12 21:01 ` [patch V5 05/38] genirq: Provide irq_enter/exit_rcu() Thomas Gleixner
2020-05-12 21:01 ` [patch V5 06/38] x86/entry: Provide helpers for execute on irqstack Thomas Gleixner
2020-05-13 21:43   ` Josh Poimboeuf
2020-05-12 21:01 ` [patch V5 07/38] x86/entry/64: Move do_softirq_own_stack() to C Thomas Gleixner
2020-05-12 21:01 ` [patch V5 08/38] x86/entry: Split idtentry_enter/exit() Thomas Gleixner
2020-05-12 21:01 ` [patch V5 09/38] x86/entry: Switch XEN/PV hypercall entry to IDTENTRY Thomas Gleixner
2020-05-14 16:24   ` Boris Ostrovsky
2020-05-12 21:01 ` [patch V5 10/38] x86/entry/64: Simplify idtentry_body Thomas Gleixner
2020-05-12 21:01 ` [patch V5 11/38] rcu: Provide __rcu_is_watching() Thomas Gleixner
2020-05-19 19:52   ` [tip: core/rcu] " tip-bot2 for Thomas Gleixner
2020-05-12 21:01 ` [patch V5 12/38] x86/entry: Provide idtentry_entry/exit_cond_rcu() Thomas Gleixner
2020-05-12 21:01 ` Thomas Gleixner [this message]
2020-05-12 21:01 ` [patch V5 14/38] x86/entry: Remove the transition leftovers Thomas Gleixner
2020-05-12 21:01 ` [patch V5 15/38] x86/entry: Change exit path of xen_failsafe_callback Thomas Gleixner
2020-05-12 21:01 ` [patch V5 16/38] x86/entry/64: Remove error_exit Thomas Gleixner
2020-05-12 21:01 ` [patch V5 17/38] x86/entry/32: Remove common_exception Thomas Gleixner
2020-05-12 21:01 ` [patch V5 18/38] x86/irq: Use generic irq_regs implementation Thomas Gleixner
2020-05-12 21:01 ` [patch V5 19/38] x86/irq: Convey vector as argument and not in ptregs Thomas Gleixner
2020-05-12 21:01 ` [patch V5 20/38] x86/irq/64: Provide handle_irq() Thomas Gleixner
2020-05-12 21:01 ` [patch V5 21/38] x86/entry: Add IRQENTRY_IRQ macro Thomas Gleixner
2020-05-12 21:01 ` [patch V5 22/38] x86/entry: Use idtentry for interrupts Thomas Gleixner
2020-05-12 21:01 ` [patch V5 23/38] genirq: Provde __irq_enter/exit_raw() Thomas Gleixner
2020-05-12 21:01 ` [patch V5 24/38] x86/entry: Provide IDTENTRY_SYSVEC Thomas Gleixner
2020-05-15  0:16   ` Boris Ostrovsky
2020-05-15  8:52     ` Thomas Gleixner
2020-05-12 21:01 ` [patch V5 25/38] x86/entry: Convert APIC interrupts to IDTENTRY_SYSVEC Thomas Gleixner
2020-05-12 21:01 ` [patch V5 26/38] x86/entry: Convert SMP system vectors " Thomas Gleixner
2020-05-12 21:01 ` [patch V5 27/38] x86/entry: Convert various system vectors Thomas Gleixner
2020-05-12 21:01 ` [patch V5 28/38] x86/entry: Convert KVM vectors to IDTENTRY_SYSVEC Thomas Gleixner
2020-05-12 21:01 ` [patch V5 29/38] x86/entry: Convert various hypervisor " Thomas Gleixner
2020-05-12 21:01 ` [patch V5 30/38] x86/entry: Convert XEN hypercall vector " Thomas Gleixner
2020-05-12 21:01 ` [patch V5 31/38] x86/entry: Convert reschedule interrupt to IDTENTRY_SYSVEC_SIMPLE Thomas Gleixner
2020-05-12 21:01 ` [patch V5 32/38] x86/entry: Remove the apic/BUILD interrupt leftovers Thomas Gleixner
2020-05-12 21:01 ` [patch V5 33/38] x86/entry/64: Remove IRQ stack switching ASM Thomas Gleixner
2020-05-12 21:01 ` [patch V5 34/38] x86/entry: Make enter_from_user_mode() static Thomas Gleixner
2020-05-12 21:01 ` [patch V5 35/38] x86/entry/32: Remove redundant irq disable code Thomas Gleixner
2020-05-12 21:01 ` [patch V5 36/38] x86/entry/64: Remove TRACE_IRQS_*_DEBUG Thomas Gleixner
2020-05-12 21:01 ` [patch V5 37/38] x86/entry: Move paranoid irq tracing out of ASM code Thomas Gleixner
2020-05-12 21:01 ` [patch V5 38/38] x86/entry: Remove the TRACE_IRQS cruft Thomas Gleixner
2020-05-14 16:35 ` [patch V5 00/38] x86/entry: Entry/exception code rework - the leftovers Paul E. McKenney

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200512213810.713078890@linutronix.de \
    --to=tglx@linutronix.de \
    --cc=alexandre.chartre@oracle.com \
    --cc=boris.ostrovsky@oracle.com \
    --cc=brgerst@gmail.com \
    --cc=frederic@kernel.org \
    --cc=jason.cj.chen@intel.com \
    --cc=jgross@suse.com \
    --cc=joel@joelfernandes.org \
    --cc=jpoimboe@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luto@kernel.org \
    --cc=mathieu.desnoyers@efficios.com \
    --cc=mhiramat@kernel.org \
    --cc=mikelley@microsoft.com \
    --cc=paulmck@kernel.org \
    --cc=pbonzini@redhat.com \
    --cc=peterz@infradead.org \
    --cc=pmladek@suse.com \
    --cc=rostedt@goodmis.org \
    --cc=sean.j.christopherson@intel.com \
    --cc=thomas.lendacky@amd.com \
    --cc=wei.liu@kernel.org \
    --cc=will@kernel.org \
    --cc=x86@kernel.org \
    --cc=yakui.zhao@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.