From: Thomas Gleixner <tglx@linutronix.de>
To: LKML <linux-kernel@vger.kernel.org>
Cc: x86@kernel.org, "Paul E. McKenney" <paulmck@kernel.org>,
Andy Lutomirski <luto@kernel.org>,
Alexandre Chartre <alexandre.chartre@oracle.com>,
Frederic Weisbecker <frederic@kernel.org>,
Paolo Bonzini <pbonzini@redhat.com>,
Sean Christopherson <sean.j.christopherson@intel.com>,
Masami Hiramatsu <mhiramat@kernel.org>,
Petr Mladek <pmladek@suse.com>,
Steven Rostedt <rostedt@goodmis.org>,
Joel Fernandes <joel@joelfernandes.org>,
Boris Ostrovsky <boris.ostrovsky@oracle.com>,
Juergen Gross <jgross@suse.com>, Brian Gerst <brgerst@gmail.com>,
Mathieu Desnoyers <mathieu.desnoyers@efficios.com>,
Josh Poimboeuf <jpoimboe@redhat.com>,
Will Deacon <will@kernel.org>
Subject: [patch V4 part 5 08/31] x86/entry: Switch page fault exception to IDTENTRY_RAW
Date: Tue, 05 May 2020 15:53:49 +0200 [thread overview]
Message-ID: <20200505135828.911280411@linutronix.de> (raw)
In-Reply-To: 20200505135341.730586321@linutronix.de
Convert page fault exceptions to IDTENTRY_RAW:
- Implement the C entry point with DEFINE_IDTENTRY_RAW
- Add the CR2 read into the exception handler
- Add the idtentry_enter/exit_cond_rcu() invocations in
in the regular page fault handler and use the regular
idtentry_enter/exit() for the async PF part.
- Emit the ASM stub with DECLARE_IDTENTRY_RAW
- Remove the ASM idtentry in 64bit
- Remove the CR2 read from 64bit
- Remove the open coded ASM entry code in 32bit
- Fixup the XEN/PV code
- Remove the old prototypes
No functional change.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/x86/entry/entry_32.S | 30 -----------------
arch/x86/entry/entry_64.S | 19 -----------
arch/x86/include/asm/idtentry.h | 3 +
arch/x86/include/asm/traps.h | 11 ------
arch/x86/kernel/idt.c | 4 +-
arch/x86/kernel/kvm.c | 14 ++++----
arch/x86/mm/fault.c | 69 +++++++++++++++++++++++++++-------------
arch/x86/xen/enlighten_pv.c | 2 -
arch/x86/xen/xen-asm_64.S | 2 -
9 files changed, 62 insertions(+), 92 deletions(-)
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -1395,36 +1395,6 @@ BUILD_INTERRUPT3(hv_stimer0_callback_vec
#endif /* CONFIG_HYPERV */
-SYM_CODE_START(page_fault)
- ASM_CLAC
- pushl $do_page_fault
- jmp common_exception_read_cr2
-SYM_CODE_END(page_fault)
-
-SYM_CODE_START_LOCAL_NOALIGN(common_exception_read_cr2)
- /* the function address is in %gs's slot on the stack */
- SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1
-
- ENCODE_FRAME_POINTER
-
- /* fixup %gs */
- GS_TO_REG %ecx
- movl PT_GS(%esp), %edi
- REG_TO_PTGS %ecx
- SET_KERNEL_GS %ecx
-
- GET_CR2_INTO(%ecx) # might clobber %eax
-
- /* fixup orig %eax */
- movl PT_ORIG_EAX(%esp), %edx # get the error code
- movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
-
- TRACE_IRQS_OFF
- movl %esp, %eax # pt_regs pointer
- CALL_NOSPEC edi
- jmp ret_from_exception
-SYM_CODE_END(common_exception_read_cr2)
-
SYM_CODE_START_LOCAL_NOALIGN(common_exception)
/* the function address is in %gs's slot on the stack */
SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -507,15 +507,6 @@ SYM_CODE_END(spurious_entries_start)
call error_entry
UNWIND_HINT_REGS
- .if \vector == X86_TRAP_PF
- /*
- * Store CR2 early so subsequent faults cannot clobber it. Use R12 as
- * intermediate storage as RDX can be clobbered in enter_from_user_mode().
- * GET_CR2_INTO can clobber RAX.
- */
- GET_CR2_INTO(%r12);
- .endif
-
.if \sane == 0
TRACE_IRQS_OFF
@@ -534,10 +525,6 @@ SYM_CODE_END(spurious_entries_start)
movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */
.endif
- .if \vector == X86_TRAP_PF
- movq %r12, %rdx /* Move CR2 into 3rd argument */
- .endif
-
call \cfunc
.if \sane == 0
@@ -1061,12 +1048,6 @@ apicinterrupt IRQ_WORK_VECTOR irq_work
#endif
/*
- * Exception entry points.
- */
-
-idtentry X86_TRAP_PF page_fault do_page_fault has_error_code=1
-
-/*
* Reload gs selector with exception handling
* edi: new selector
*/
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -376,7 +376,8 @@ DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_GP,
DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_AC, exc_alignment_check);
/* Raw exception entries which need extra work */
-DECLARE_IDTENTRY_RAW(X86_TRAP_BP, exc_int3);
+DECLARE_IDTENTRY_RAW(X86_TRAP_BP, exc_int3);
+DECLARE_IDTENTRY_RAW_ERRORCODE(X86_TRAP_PF, exc_page_fault);
#ifdef CONFIG_X86_MCE
DECLARE_IDTENTRY_MCE(X86_TRAP_MC, exc_machine_check);
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -9,17 +9,6 @@
#include <asm/idtentry.h>
#include <asm/siginfo.h> /* TRAP_TRACE, ... */
-#define dotraplinkage __visible
-
-asmlinkage void page_fault(void);
-asmlinkage void async_page_fault(void);
-
-#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
-asmlinkage void xen_page_fault(void);
-#endif
-
-dotraplinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address);
-
#ifdef CONFIG_X86_64
asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs);
asmlinkage __visible notrace
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -59,7 +59,7 @@ static const __initconst struct idt_data
INTG(X86_TRAP_DB, asm_exc_debug),
SYSG(X86_TRAP_BP, asm_exc_int3),
#ifdef CONFIG_X86_32
- INTG(X86_TRAP_PF, page_fault),
+ INTG(X86_TRAP_PF, asm_exc_page_fault),
#endif
};
@@ -153,7 +153,7 @@ static const __initconst struct idt_data
* stacks work only after cpu_init().
*/
static const __initconst struct idt_data early_pf_idts[] = {
- INTG(X86_TRAP_PF, page_fault),
+ INTG(X86_TRAP_PF, asm_exc_page_fault),
};
/*
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -218,7 +218,7 @@ void kvm_async_pf_task_wake(u32 token)
}
EXPORT_SYMBOL_GPL(kvm_async_pf_task_wake);
-u32 kvm_read_and_reset_pf_reason(void)
+u32 noinstr kvm_read_and_reset_pf_reason(void)
{
u32 reason = 0;
@@ -230,9 +230,8 @@ u32 kvm_read_and_reset_pf_reason(void)
return reason;
}
EXPORT_SYMBOL_GPL(kvm_read_and_reset_pf_reason);
-NOKPROBE_SYMBOL(kvm_read_and_reset_pf_reason);
-bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token)
+noinstr bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token)
{
u32 reason = kvm_read_and_reset_pf_reason();
@@ -244,6 +243,9 @@ bool __kvm_handle_async_pf(struct pt_reg
return false;
}
+ idtentry_enter(regs);
+ instr_begin();
+
/*
* If the host managed to inject an async #PF into an interrupt
* disabled region, then die hard as this is not going to end well
@@ -258,13 +260,13 @@ bool __kvm_handle_async_pf(struct pt_reg
/* Page is swapped out by the host. */
kvm_async_pf_task_wait_schedule(token);
} else {
- rcu_irq_enter();
kvm_async_pf_task_wake(token);
- rcu_irq_exit();
}
+
+ instr_end();
+ idtentry_exit(regs);
return true;
}
-NOKPROBE_SYMBOL(__kvm_handle_async_pf);
static void __init paravirt_ops_setup(void)
{
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1519,11 +1519,38 @@ trace_page_fault_entries(struct pt_regs
trace_page_fault_kernel(address, regs, error_code);
}
-dotraplinkage void
-do_page_fault(struct pt_regs *regs, unsigned long hw_error_code,
- unsigned long address)
+static __always_inline void
+handle_page_fault(struct pt_regs *regs, unsigned long error_code,
+ unsigned long address)
{
+ trace_page_fault_entries(regs, error_code, address);
+
+ if (unlikely(kmmio_fault(regs, address)))
+ return;
+
+ /* Was the fault on kernel-controlled part of the address space? */
+ if (unlikely(fault_in_kernel_space(address))) {
+ do_kern_addr_fault(regs, error_code, address);
+ } else {
+ do_user_addr_fault(regs, error_code, address);
+ /*
+ * User address page fault handling might have reenabled
+ * interrupts. Fixing up all potential exit points of
+ * do_user_addr_fault() and its leaf functions is just not
+ * doable w/o creating an unholy mess or turning the code
+ * upside down.
+ */
+ local_irq_disable();
+ }
+}
+
+DEFINE_IDTENTRY_RAW_ERRORCODE(exc_page_fault)
+{
+ unsigned long address = read_cr2();
+ bool rcu_exit;
+
prefetchw(¤t->mm->mmap_sem);
+
/*
* KVM has two types of events that are, logically, interrupts, but
* are unfortunately delivered using the #PF vector. These events are
@@ -1538,28 +1565,28 @@ do_page_fault(struct pt_regs *regs, unsi
* getting values from real and async page faults mixed up.
*
* Fingers crossed.
+ *
+ * The async #PF handling code takes care of idtentry handling
+ * itself.
*/
if (kvm_handle_async_pf(regs, (u32)address))
return;
- trace_page_fault_entries(regs, hw_error_code, address);
+ /*
+ * Entry handling for valid #PF from kernel mode is slightly
+ * different: RCU is already watching and rcu_irq_enter() must not
+ * be invoked because a kernel fault on a user space address might
+ * sleep.
+ *
+ * In case the fault hit a RCU idle region the conditional entry
+ * code reenabled RCU to avoid subsequent wreckage which helps
+ * debugability.
+ */
+ rcu_exit = idtentry_enter_cond_rcu(regs);
- if (unlikely(kmmio_fault(regs, address)))
- return;
+ instr_begin();
+ handle_page_fault(regs, error_code, address);
+ instr_end();
- /* Was the fault on kernel-controlled part of the address space? */
- if (unlikely(fault_in_kernel_space(address))) {
- do_kern_addr_fault(regs, hw_error_code, address);
- } else {
- do_user_addr_fault(regs, hw_error_code, address);
- /*
- * User address page fault handling might have reenabled
- * interrupts. Fixing up all potential exit points of
- * do_user_addr_fault() and its leaf functions is just not
- * doable w/o creating an unholy mess or turning the code
- * upside down.
- */
- local_irq_disable();
- }
+ idtentry_exit_cond_rcu(regs, rcu_exit);
}
-NOKPROBE_SYMBOL(do_page_fault);
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -627,7 +627,7 @@ static struct trap_array_entry trap_arra
#ifdef CONFIG_IA32_EMULATION
{ entry_INT80_compat, xen_entry_INT80_compat, false },
#endif
- { page_fault, xen_page_fault, false },
+ TRAP_ENTRY(exc_page_fault, false ),
TRAP_ENTRY(exc_divide_error, false ),
TRAP_ENTRY(exc_bounds, false ),
TRAP_ENTRY(exc_invalid_op, false ),
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -43,7 +43,7 @@ xen_pv_trap asm_exc_invalid_tss
xen_pv_trap asm_exc_segment_not_present
xen_pv_trap asm_exc_stack_segment
xen_pv_trap asm_exc_general_protection
-xen_pv_trap page_fault
+xen_pv_trap asm_exc_page_fault
xen_pv_trap asm_exc_spurious_interrupt_bug
xen_pv_trap asm_exc_coprocessor_error
xen_pv_trap asm_exc_alignment_check
next prev parent reply other threads:[~2020-05-05 14:18 UTC|newest]
Thread overview: 49+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-05-05 13:53 [patch V4 part 5 00/31] x86/entry: Entry/exception code rework, Thomas Gleixner
2020-05-05 13:53 ` [patch V4 part 5 01/31] genirq: Provide irq_enter/exit_rcu() Thomas Gleixner
2020-05-15 5:53 ` Andy Lutomirski
2020-05-05 13:53 ` [patch V4 part 5 02/31] x86/entry: Provide helpers for execute on irqstack Thomas Gleixner
2020-05-06 8:20 ` Thomas Gleixner
2020-05-10 4:33 ` Lai Jiangshan
2020-05-11 9:07 ` Alexandre Chartre
2020-05-11 11:54 ` Thomas Gleixner
2020-05-05 13:53 ` [patch V4 part 5 03/31] x86/entry/64: Move softirq stack switch to C Thomas Gleixner
2020-05-05 13:53 ` [patch V4 part 5 04/31] x86/entry: Split idtentry_enter/exit() Thomas Gleixner
2020-05-11 12:42 ` Alexandre Chartre
2020-05-05 13:53 ` [patch V4 part 5 05/31] x86/entry: Switch XEN/PV hypercall entry to IDTENTRY Thomas Gleixner
2020-05-07 2:11 ` Boris Ostrovsky
2020-05-07 8:30 ` Thomas Gleixner
2020-05-05 13:53 ` [patch V4 part 5 06/31] x86/entry/64: Simplify idtentry_body Thomas Gleixner
2020-05-05 13:53 ` [patch V4 part 5 07/31] x86/entry: Provide idtentry_entry/exit_cond_rcu() Thomas Gleixner
2020-05-11 13:53 ` Alexandre Chartre
2020-05-11 14:13 ` Peter Zijlstra
2020-05-12 16:30 ` Thomas Gleixner
2020-05-05 13:53 ` Thomas Gleixner [this message]
2020-05-05 13:53 ` [patch V4 part 5 09/31] x86/entry: Remove the transition leftovers Thomas Gleixner
2020-05-11 14:11 ` Alexandre Chartre
2020-05-05 13:53 ` [patch V4 part 5 10/31] x86/entry: Change exit path of xen_failsafe_callback Thomas Gleixner
2020-05-05 13:53 ` [patch V4 part 5 11/31] x86/entry/64: Remove error_exit Thomas Gleixner
2020-05-05 13:53 ` [patch V4 part 5 12/31] x86/entry/32: Remove common_exception Thomas Gleixner
2020-05-05 13:53 ` [patch V4 part 5 13/31] x86/irq: Convey vector as argument and not in ptregs Thomas Gleixner
2020-05-10 2:44 ` Lai Jiangshan
2020-05-11 14:35 ` Thomas Gleixner
2020-05-11 15:11 ` Lai Jiangshan
2020-05-05 13:53 ` [patch V4 part 5 14/31] x86/irq/64: Provide handle_irq() Thomas Gleixner
2020-05-05 13:53 ` [patch V4 part 5 15/31] x86/entry: Add IRQENTRY_IRQ macro Thomas Gleixner
2020-05-05 13:53 ` [patch V4 part 5 16/31] x86/entry: Use idtentry for interrupts Thomas Gleixner
2020-05-05 13:53 ` [patch V4 part 5 17/31] x86/entry: Provide IDTENTRY_SYSVEC Thomas Gleixner
2020-05-05 13:53 ` [patch V4 part 5 18/31] x86/entry: Convert APIC interrupts to IDTENTRY_SYSVEC Thomas Gleixner
2020-05-05 13:54 ` [patch V4 part 5 19/31] x86/entry: Convert SMP system vectors " Thomas Gleixner
2020-05-05 13:54 ` [patch V4 part 5 20/31] x86/entry: Convert various system vectors Thomas Gleixner
2020-05-05 13:54 ` [patch V4 part 5 21/31] x86/entry: Convert KVM vectors to IDTENTRY_SYSVEC Thomas Gleixner
2020-05-05 13:54 ` [patch V4 part 5 22/31] x86/entry: Convert various hypervisor " Thomas Gleixner
2020-05-06 16:56 ` Wei Liu
2020-05-06 17:11 ` Thomas Gleixner
2020-05-05 13:54 ` [patch V4 part 5 23/31] x86/entry: Convert XEN hypercall vector " Thomas Gleixner
2020-05-05 13:54 ` [patch V4 part 5 24/31] x86/entry: Convert reschedule interrupt to IDTENTRY_RAW Thomas Gleixner
2020-05-05 13:54 ` [patch V4 part 5 25/31] x86/entry: Remove the apic/BUILD interrupt leftovers Thomas Gleixner
2020-05-05 13:54 ` [patch V4 part 5 26/31] x86/entry/64: Remove IRQ stack switching ASM Thomas Gleixner
2020-05-05 13:54 ` [patch V4 part 5 27/31] x86/entry: Make enter_from_user_mode() static Thomas Gleixner
2020-05-05 13:54 ` [patch V4 part 5 28/31] x86/entry/32: Remove redundant irq disable code Thomas Gleixner
2020-05-05 13:54 ` [patch V4 part 5 29/31] x86/entry/64: Remove TRACE_IRQS_*_DEBUG Thomas Gleixner
2020-05-05 13:54 ` [patch V4 part 5 30/31] x86/entry: Move paranoid irq tracing out of ASM code Thomas Gleixner
2020-05-05 13:54 ` [patch V4 part 5 31/31] x86/entry: Remove the TRACE_IRQS cruft Thomas Gleixner
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200505135828.911280411@linutronix.de \
--to=tglx@linutronix.de \
--cc=alexandre.chartre@oracle.com \
--cc=boris.ostrovsky@oracle.com \
--cc=brgerst@gmail.com \
--cc=frederic@kernel.org \
--cc=jgross@suse.com \
--cc=joel@joelfernandes.org \
--cc=jpoimboe@redhat.com \
--cc=linux-kernel@vger.kernel.org \
--cc=luto@kernel.org \
--cc=mathieu.desnoyers@efficios.com \
--cc=mhiramat@kernel.org \
--cc=paulmck@kernel.org \
--cc=pbonzini@redhat.com \
--cc=pmladek@suse.com \
--cc=rostedt@goodmis.org \
--cc=sean.j.christopherson@intel.com \
--cc=will@kernel.org \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).