* [patch V4 part 5 01/31] genirq: Provide irq_enter/exit_rcu()
2020-05-05 13:53 [patch V4 part 5 00/31] x86/entry: Entry/exception code rework, Thomas Gleixner
@ 2020-05-05 13:53 ` Thomas Gleixner
2020-05-15 5:53 ` Andy Lutomirski
2020-05-05 13:53 ` [patch V4 part 5 02/31] x86/entry: Provide helpers for execute on irqstack Thomas Gleixner
` (29 subsequent siblings)
30 siblings, 1 reply; 49+ messages in thread
From: Thomas Gleixner @ 2020-05-05 13:53 UTC (permalink / raw)
To: LKML
Cc: x86, Paul E. McKenney, Andy Lutomirski, Alexandre Chartre,
Frederic Weisbecker, Paolo Bonzini, Sean Christopherson,
Masami Hiramatsu, Petr Mladek, Steven Rostedt, Joel Fernandes,
Boris Ostrovsky, Juergen Gross, Brian Gerst, Mathieu Desnoyers,
Josh Poimboeuf, Will Deacon
irq_enter()/exit() include the RCU handling. To properly separate the RCU
handling provide variants which contain only the non-RCU related
functionality.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
include/linux/hardirq.h | 13 +++++++++++--
kernel/softirq.c | 35 +++++++++++++++++++++++++++--------
2 files changed, 38 insertions(+), 10 deletions(-)
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -43,7 +43,11 @@ extern void rcu_nmi_exit(void);
/*
* Enter irq context (on NO_HZ, update jiffies):
*/
-extern void irq_enter(void);
+void irq_enter(void);
+/*
+ * Like irq_enter(), but RCU is already watching.
+ */
+void irq_enter_rcu(void);
/*
* Exit irq context without processing softirqs:
@@ -58,7 +62,12 @@ extern void irq_enter(void);
/*
* Exit irq context and process softirqs if needed:
*/
-extern void irq_exit(void);
+void irq_exit(void);
+
+/*
+ * Like irq_exit(), but return with RCU watching.
+ */
+void irq_exit_rcu(void);
#ifndef arch_nmi_enter
#define arch_nmi_enter() do { } while (0)
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -339,12 +339,11 @@ asmlinkage __visible void do_softirq(voi
local_irq_restore(flags);
}
-/*
- * Enter an interrupt context.
+/**
+ * irq_enter_rcu - Enter an interrupt context with RCU watching
*/
-void irq_enter(void)
+void irq_enter_rcu(void)
{
- rcu_irq_enter();
if (is_idle_task(current) && !in_interrupt()) {
/*
* Prevent raise_softirq from needlessly waking up ksoftirqd
@@ -354,10 +353,18 @@ void irq_enter(void)
tick_irq_enter();
_local_bh_enable();
}
-
__irq_enter();
}
+/**
+ * irq_enter - Enter an interrupt context including RCU update
+ */
+void irq_enter(void)
+{
+ rcu_irq_enter();
+ irq_enter_rcu();
+}
+
static inline void invoke_softirq(void)
{
if (ksoftirqd_running(local_softirq_pending()))
@@ -397,10 +404,12 @@ static inline void tick_irq_exit(void)
#endif
}
-/*
- * Exit an interrupt context. Process softirqs if needed and possible:
+/**
+ * irq_exit_rcu() - Exit an interrupt context without updating RCU
+ *
+ * Also processes softirqs if needed and possible.
*/
-void irq_exit(void)
+void irq_exit_rcu(void)
{
#ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED
local_irq_disable();
@@ -413,6 +422,16 @@ void irq_exit(void)
invoke_softirq();
tick_irq_exit();
+}
+
+/**
+ * irq_exit - Exit an interrupt context, update RCU and lockdep
+ *
+ * Also processes softirqs if needed and possible.
+ */
+void irq_exit(void)
+{
+ irq_exit_rcu();
rcu_irq_exit();
/* must be last! */
lockdep_hardirq_exit();
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [patch V4 part 5 01/31] genirq: Provide irq_enter/exit_rcu()
2020-05-05 13:53 ` [patch V4 part 5 01/31] genirq: Provide irq_enter/exit_rcu() Thomas Gleixner
@ 2020-05-15 5:53 ` Andy Lutomirski
0 siblings, 0 replies; 49+ messages in thread
From: Andy Lutomirski @ 2020-05-15 5:53 UTC (permalink / raw)
To: Thomas Gleixner
Cc: LKML, X86 ML, Paul E. McKenney, Andy Lutomirski,
Alexandre Chartre, Frederic Weisbecker, Paolo Bonzini,
Sean Christopherson, Masami Hiramatsu, Petr Mladek,
Steven Rostedt, Joel Fernandes, Boris Ostrovsky, Juergen Gross,
Brian Gerst, Mathieu Desnoyers, Josh Poimboeuf, Will Deacon
On Tue, May 5, 2020 at 7:16 AM Thomas Gleixner <tglx@linutronix.de> wrote:
>
> irq_enter()/exit() include the RCU handling. To properly separate the RCU
> handling provide variants which contain only the non-RCU related
> functionality.
Acked-by: Andy Lutomirski <luto@kernel.org>
^ permalink raw reply [flat|nested] 49+ messages in thread
* [patch V4 part 5 02/31] x86/entry: Provide helpers for execute on irqstack
2020-05-05 13:53 [patch V4 part 5 00/31] x86/entry: Entry/exception code rework, Thomas Gleixner
2020-05-05 13:53 ` [patch V4 part 5 01/31] genirq: Provide irq_enter/exit_rcu() Thomas Gleixner
@ 2020-05-05 13:53 ` Thomas Gleixner
2020-05-06 8:20 ` Thomas Gleixner
` (2 more replies)
2020-05-05 13:53 ` [patch V4 part 5 03/31] x86/entry/64: Move softirq stack switch to C Thomas Gleixner
` (28 subsequent siblings)
30 siblings, 3 replies; 49+ messages in thread
From: Thomas Gleixner @ 2020-05-05 13:53 UTC (permalink / raw)
To: LKML
Cc: x86, Paul E. McKenney, Andy Lutomirski, Alexandre Chartre,
Frederic Weisbecker, Paolo Bonzini, Sean Christopherson,
Masami Hiramatsu, Petr Mladek, Steven Rostedt, Joel Fernandes,
Boris Ostrovsky, Juergen Gross, Brian Gerst, Mathieu Desnoyers,
Josh Poimboeuf, Will Deacon
Device interrupt handlers and system vector handlers are executed on the
interrupt stack. The stack switch happens in the low level assembly entry
code. This conflicts with the efforts to consolidate the exit code in C to
ensure correctness vs. RCU and tracing.
As there is no way to move #DB away from IST due to the MOV SS issue, the
requirements vs. #DB and NMI for switching to the interrupt stack do not
exist anymore. The only requirement is that interrupts are disabled.
That allows to move the stack switching to C code which simplifies the
entry/exit handling further because it allows to switch stacks after
handling the entry and on exit before handling RCU, return to usermode and
kernel preemption in the same way as for regular exceptions.
That also allows to move the xen hypercall extra magic code and the softirq
stack switching into C.
The mechanism is straight forward:
1) Store the current stack pointer on top of the interrupt stack. That's
required for the unwinder.
2) Switch the stack pointer
3) Call the function
4) Restore the stackpointer
The full code sequence to make the unwinder happy is:
pushq %rbp
movq %rsp, %rbp
movq $(top_of_hardirq_stack - 8), %reg
movq %rsp, (%reg)
movq %reg , %rsp
call function
popq %rsp
leaveq
While the following sequence would spare the 'popq %rsp':
pushq %rbp
movq $(top_of_hardirq_stack - 8), %rbp
movq %rsp, (%rrbp)
xchgq %rbp, %rsp
call function
movq %rbp, %rsp
leaveq
but that requires further changes to objtool so that the unwinder works
correctly. Can be done on top and is not critical for now.
Provide helper functions to check whether the interrupt stack is already
active and whether stack switching is required.
64 bit only for now. 32 bit has a variant of that already. Once this is
cleaned up the two implementations might be consolidated as a cleanup on
top.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/x86/include/asm/irq_stack.h | 61 +++++++++++++++++++++++++++++++++++++++
1 file changed, 61 insertions(+)
--- /dev/null
+++ b/arch/x86/include/asm/irq_stack.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_IRQ_STACK_H
+#define _ASM_X86_IRQ_STACK_H
+
+#include <linux/ptrace.h>
+
+#include <asm/processor.h>
+
+#ifdef CONFIG_X86_64
+static __always_inline bool irqstack_active(void)
+{
+ return __this_cpu_read(irq_count) != -1;
+}
+
+/*
+ * Macro to emit code for running @func on the irq stack.
+ */
+#define RUN_ON_IRQSTACK(func) { \
+ unsigned long tos; \
+ \
+ lockdep_assert_irqs_disabled(); \
+ \
+ tos = ((unsigned long)__this_cpu_read(hardirq_stack_ptr)) - 8; \
+ \
+ __this_cpu_add(irq_count, 1); \
+ asm volatile( \
+ "pushq %%rbp \n" \
+ "movq %%rsp, %%rbp \n" \
+ "movq %%rsp, (%[ts]) \n" \
+ "movq %[ts], %%rsp \n" \
+ "1: \n" \
+ " .pushsection .discard.instr_begin \n" \
+ " .long 1b - . \n" \
+ " .popsection \n" \
+ "call " __ASM_FORM(func) " \n" \
+ "2: \n" \
+ " .pushsection .discard.instr_end \n" \
+ " .long 2b - . \n" \
+ " .popsection \n" \
+ "popq %%rsp \n" \
+ "leaveq \n" \
+ : \
+ : [ts] "r" (tos) \
+ : "memory" \
+ ); \
+ __this_cpu_sub(irq_count, 1); \
+}
+
+#else /* CONFIG_X86_64 */
+static __always_inline bool irqstack_active(void) { return false; }
+#define RUN_ON_IRQSTACK(func) do { } while (0)
+#endif /* !CONFIG_X86_64 */
+
+static __always_inline bool irq_needs_irq_stack(struct pt_regs *regs)
+{
+ if (IS_ENABLED(CONFIG_X86_32))
+ return false;
+ return !user_mode(regs) && !irqstack_active();
+}
+
+#endif
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [patch V4 part 5 02/31] x86/entry: Provide helpers for execute on irqstack
2020-05-05 13:53 ` [patch V4 part 5 02/31] x86/entry: Provide helpers for execute on irqstack Thomas Gleixner
@ 2020-05-06 8:20 ` Thomas Gleixner
2020-05-10 4:33 ` Lai Jiangshan
2020-05-11 9:07 ` Alexandre Chartre
2 siblings, 0 replies; 49+ messages in thread
From: Thomas Gleixner @ 2020-05-06 8:20 UTC (permalink / raw)
To: LKML
Cc: x86, Paul E. McKenney, Andy Lutomirski, Alexandre Chartre,
Frederic Weisbecker, Paolo Bonzini, Sean Christopherson,
Masami Hiramatsu, Petr Mladek, Steven Rostedt, Joel Fernandes,
Boris Ostrovsky, Juergen Gross, Brian Gerst, Mathieu Desnoyers,
Josh Poimboeuf, Will Deacon
Thomas Gleixner <tglx@linutronix.de> writes:
> That also allows to move the xen hypercall extra magic code and the softirq
> stack switching into C.
>
> The mechanism is straight forward:
>
> 1) Store the current stack pointer on top of the interrupt stack. That's
> required for the unwinder.
>
> 2) Switch the stack pointer
>
> 3) Call the function
>
> 4) Restore the stackpointer
>
> The full code sequence to make the unwinder happy is:
>
> pushq %rbp
> movq %rsp, %rbp
> movq $(top_of_hardirq_stack - 8), %reg
> movq %rsp, (%reg)
> movq %reg , %rsp
> call function
> popq %rsp
> leaveq
>
> While the following sequence would spare the 'popq %rsp':
>
> pushq %rbp
> movq $(top_of_hardirq_stack - 8), %rbp
> movq %rsp, (%rrbp)
> xchgq %rbp, %rsp
> call function
> movq %rbp, %rsp
> leaveq
So I stared some more into that.
The push rbp is wrong for the frame unwinder case. That one is happy
(except for objtool) with the most minimalistic variant:
movq %%rsp, (%[tos])
movq %[tos], %%rsp
call function
popq %%rsp
which is not surprising because for the frame unwinder this is similar
to the 'gcc aligns stack in the middle of the function' handling. BP
still has to point to the previous frame. Adjustment of BP must only
happen on function entry.
The stack border convention of having the pointer to the previous stack
in the top word is sufficient for this.
objtool complains though:
warning: objtool: do_softirq_own_stack()+0x67: return with modified stack frame
That obviously makes also the ORC unwinder unhappty as objtool fails to
provide the right hint. But also for ORC this construct should be
completely sufficient.
I'm exploring another idea right now, but wanted to share the info.
Thanks,
tglx
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [patch V4 part 5 02/31] x86/entry: Provide helpers for execute on irqstack
2020-05-05 13:53 ` [patch V4 part 5 02/31] x86/entry: Provide helpers for execute on irqstack Thomas Gleixner
2020-05-06 8:20 ` Thomas Gleixner
@ 2020-05-10 4:33 ` Lai Jiangshan
2020-05-11 9:07 ` Alexandre Chartre
2 siblings, 0 replies; 49+ messages in thread
From: Lai Jiangshan @ 2020-05-10 4:33 UTC (permalink / raw)
To: Thomas Gleixner
Cc: LKML, x86, Paul E. McKenney, Andy Lutomirski, Alexandre Chartre,
Frederic Weisbecker, Paolo Bonzini, Sean Christopherson,
Masami Hiramatsu, Petr Mladek, Steven Rostedt, Joel Fernandes,
Boris Ostrovsky, Juergen Gross, Brian Gerst, Mathieu Desnoyers,
Josh Poimboeuf, Will Deacon
On Tue, May 5, 2020 at 10:19 PM Thomas Gleixner <tglx@linutronix.de> wrote:
>
> Device interrupt handlers and system vector handlers are executed on the
> interrupt stack. The stack switch happens in the low level assembly entry
> code. This conflicts with the efforts to consolidate the exit code in C to
> ensure correctness vs. RCU and tracing.
>
> As there is no way to move #DB away from IST due to the MOV SS issue, the
> requirements vs. #DB and NMI for switching to the interrupt stack do not
> exist anymore. The only requirement is that interrupts are disabled.
Hi, tglx and Andy Lutomirski,
Is there any information about "no way to move #DB away from IST
due to the MOV SS issue"? IST-based #DB results to ist_shift(for
nested #DB) and debug_idt(for #NMI vs. #DB) which are somewhat
ugly. If IST-less #DB should work, debug stack should be switched
in software manner like interrupt stack.
There was a "POP/MOV SS" CVE/issue about #BP which lead to
moving #BP to IST-less by d8ba61ba58c8
(x86/entry/64: Don't use IST entry for #BP stack)
#DB #BP are considered as #NMI due to their super-interrupt
ability. But the kernel has much more control over #DB and #BP
which can be disabled by putting the code snip into non-instrument
sections like __entry noinstr etc.
Is it possible to implement IST-less #DB?
Thanks,
Lai
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [patch V4 part 5 02/31] x86/entry: Provide helpers for execute on irqstack
2020-05-05 13:53 ` [patch V4 part 5 02/31] x86/entry: Provide helpers for execute on irqstack Thomas Gleixner
2020-05-06 8:20 ` Thomas Gleixner
2020-05-10 4:33 ` Lai Jiangshan
@ 2020-05-11 9:07 ` Alexandre Chartre
2020-05-11 11:54 ` Thomas Gleixner
2 siblings, 1 reply; 49+ messages in thread
From: Alexandre Chartre @ 2020-05-11 9:07 UTC (permalink / raw)
To: Thomas Gleixner, LKML
Cc: x86, Paul E. McKenney, Andy Lutomirski, Frederic Weisbecker,
Paolo Bonzini, Sean Christopherson, Masami Hiramatsu,
Petr Mladek, Steven Rostedt, Joel Fernandes, Boris Ostrovsky,
Juergen Gross, Brian Gerst, Mathieu Desnoyers, Josh Poimboeuf,
Will Deacon
On 5/5/20 3:53 PM, Thomas Gleixner wrote:
> Device interrupt handlers and system vector handlers are executed on the
> interrupt stack. The stack switch happens in the low level assembly entry
> code. This conflicts with the efforts to consolidate the exit code in C to
> ensure correctness vs. RCU and tracing.
>
> As there is no way to move #DB away from IST due to the MOV SS issue, the
> requirements vs. #DB and NMI for switching to the interrupt stack do not
> exist anymore. The only requirement is that interrupts are disabled.
>
> That allows to move the stack switching to C code which simplifies the
> entry/exit handling further because it allows to switch stacks after
> handling the entry and on exit before handling RCU, return to usermode and
> kernel preemption in the same way as for regular exceptions.
>
> That also allows to move the xen hypercall extra magic code and the softirq
> stack switching into C.
>
> The mechanism is straight forward:
>
> 1) Store the current stack pointer on top of the interrupt stack. That's
> required for the unwinder.
>
> 2) Switch the stack pointer
>
> 3) Call the function
>
> 4) Restore the stackpointer
>
> The full code sequence to make the unwinder happy is:
>
> pushq %rbp
> movq %rsp, %rbp
> movq $(top_of_hardirq_stack - 8), %reg
> movq %rsp, (%reg)
> movq %reg , %rsp
> call function
> popq %rsp
> leaveq
>
> While the following sequence would spare the 'popq %rsp':
>
> pushq %rbp
> movq $(top_of_hardirq_stack - 8), %rbp
> movq %rsp, (%rrbp)
Should be (%rbp) instead of (%rrbp).
> xchgq %rbp, %rsp
> call function
> movq %rbp, %rsp
> leaveq
>
> but that requires further changes to objtool so that the unwinder works
> correctly. Can be done on top and is not critical for now.
>
> Provide helper functions to check whether the interrupt stack is already
> active and whether stack switching is required.
>
> 64 bit only for now. 32 bit has a variant of that already. Once this is
> cleaned up the two implementations might be consolidated as a cleanup on
> top.
>
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> ---
> arch/x86/include/asm/irq_stack.h | 61 +++++++++++++++++++++++++++++++++++++++
> 1 file changed, 61 insertions(+)
>
> --- /dev/null
> +++ b/arch/x86/include/asm/irq_stack.h
...
> +/*
> + * Macro to emit code for running @func on the irq stack.
> + */
> +#define RUN_ON_IRQSTACK(func) { \
> + unsigned long tos; \
> + \
> + lockdep_assert_irqs_disabled(); \
> + \
> + tos = ((unsigned long)__this_cpu_read(hardirq_stack_ptr)) - 8; \
> + \
> + __this_cpu_add(irq_count, 1); \
> + asm volatile( \
> + "pushq %%rbp \n" \
> + "movq %%rsp, %%rbp \n" \
> + "movq %%rsp, (%[ts]) \n" \
> + "movq %[ts], %%rsp \n" \
> + "1: \n" \
> + " .pushsection .discard.instr_begin \n" \
> + " .long 1b - . \n" \
> + " .popsection \n" \
> + "call " __ASM_FORM(func) " \n" \
> + "2: \n" \
> + " .pushsection .discard.instr_end \n" \
> + " .long 2b - . \n" \
> + " .popsection \n" \
> + "popq %%rsp \n" \
> + "leaveq \n" \
> + : \
> + : [ts] "r" (tos) \
> + : "memory" \
> + ); \
> + __this_cpu_sub(irq_count, 1); \
> +}
The pushsection/popsection discard.instr_begin/end sequences are used several
times in asm() statement at different places, so I wonder if it might be worth
having a macro.
In part 1, patch 20/36 adds instr_begin()/end(): they provide the sequence
but already encapsulated into an asm() statement, then we could do something
like this:
/* Begin/end of an instrumentation safe region */
#define instr_begin_insn(label) \
__stringify(label) ":\n\t" \
".pushsection .discard.instr_begin\n\t" \
".long " __stringify(label) "b - .\n\t" \
".popsection\n\t"
#define instr_end_insn(label) \
__stringify(label) ":\n\t" \
".pushsection .discard.instr_end\n\t" \
".long " __stringify(label) "b - .\n\t" \
".popsection\n\t"
#define instr_begin() ({asm volatile(instr_begin_insn(__COUNTER__));})
#define instr_end() ({asm volatile(instr_end_insn(__COUNTER__));})> +#else /* CONFIG_X86_64 */
And the RUN_ON_IRQSTACK macro would become:
#define RUN_ON_IRQSTACK(func) { \
unsigned long tos; \
\
lockdep_assert_irqs_disabled(); \
\
tos = ((unsigned long)__this_cpu_read(hardirq_stack_ptr)) - 8; \
\
__this_cpu_add(irq_count, 1); \
asm volatile( \
"pushq %%rbp \n" \
"movq %%rsp, %%rbp \n" \
"movq %%rsp, (%[ts]) \n" \
"movq %[ts], %%rsp \n" \
instr_begin_insn(1) \
"call " __ASM_FORM(func) " \n" \
instr_end_insn(2) \
"popq %%rsp \n" \
"leaveq \n" \
: \
: [ts] "r" (tos) \
: "memory" \
); \
__this_cpu_sub(irq_count, 1); \
}
alex.
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [patch V4 part 5 02/31] x86/entry: Provide helpers for execute on irqstack
2020-05-11 9:07 ` Alexandre Chartre
@ 2020-05-11 11:54 ` Thomas Gleixner
0 siblings, 0 replies; 49+ messages in thread
From: Thomas Gleixner @ 2020-05-11 11:54 UTC (permalink / raw)
To: Alexandre Chartre, LKML
Cc: x86, Paul E. McKenney, Andy Lutomirski, Frederic Weisbecker,
Paolo Bonzini, Sean Christopherson, Masami Hiramatsu,
Petr Mladek, Steven Rostedt, Joel Fernandes, Boris Ostrovsky,
Juergen Gross, Brian Gerst, Mathieu Desnoyers, Josh Poimboeuf,
Will Deacon
Alexandre Chartre <alexandre.chartre@oracle.com> writes:
> On 5/5/20 3:53 PM, Thomas Gleixner wrote:
>> + __this_cpu_add(irq_count, 1); \
>> + asm volatile( \
>> + "pushq %%rbp \n" \
>> + "movq %%rsp, %%rbp \n" \
>> + "movq %%rsp, (%[ts]) \n" \
>> + "movq %[ts], %%rsp \n" \
>> + "1: \n" \
>> + " .pushsection .discard.instr_begin \n" \
>> + " .long 1b - . \n" \
>> + " .popsection \n" \
>> + "call " __ASM_FORM(func) " \n" \
>> + "2: \n" \
>> + " .pushsection .discard.instr_end \n" \
>> + " .long 2b - . \n" \
>> + " .popsection \n" \
>> + "popq %%rsp \n" \
>> + "leaveq \n" \
>> + : \
>> + : [ts] "r" (tos) \
>> + : "memory" \
>> + ); \
>> + __this_cpu_sub(irq_count, 1); \
>> +}
>
> The pushsection/popsection discard.instr_begin/end sequences are used several
> times in asm() statement at different places, so I wonder if it might be worth
> having a macro.
As discussed elsewhere this is going to move to ASM partially and the
various variants are not longer necessary.
Thanks,
tglx
^ permalink raw reply [flat|nested] 49+ messages in thread
* [patch V4 part 5 03/31] x86/entry/64: Move softirq stack switch to C
2020-05-05 13:53 [patch V4 part 5 00/31] x86/entry: Entry/exception code rework, Thomas Gleixner
2020-05-05 13:53 ` [patch V4 part 5 01/31] genirq: Provide irq_enter/exit_rcu() Thomas Gleixner
2020-05-05 13:53 ` [patch V4 part 5 02/31] x86/entry: Provide helpers for execute on irqstack Thomas Gleixner
@ 2020-05-05 13:53 ` Thomas Gleixner
2020-05-05 13:53 ` [patch V4 part 5 04/31] x86/entry: Split idtentry_enter/exit() Thomas Gleixner
` (27 subsequent siblings)
30 siblings, 0 replies; 49+ messages in thread
From: Thomas Gleixner @ 2020-05-05 13:53 UTC (permalink / raw)
To: LKML
Cc: x86, Paul E. McKenney, Andy Lutomirski, Alexandre Chartre,
Frederic Weisbecker, Paolo Bonzini, Sean Christopherson,
Masami Hiramatsu, Petr Mladek, Steven Rostedt, Joel Fernandes,
Boris Ostrovsky, Juergen Gross, Brian Gerst, Mathieu Desnoyers,
Josh Poimboeuf, Will Deacon
Use the new C code helpers to move do_softirq_own_stack() out of ASM
code. Mark the switching function noinstr as this is really not a good
place for being instrumented.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/x86/entry/entry_64.S | 13 -------------
arch/x86/kernel/irq_64.c | 12 ++++++++++++
2 files changed, 12 insertions(+), 13 deletions(-)
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1107,19 +1107,6 @@ SYM_CODE_START_LOCAL_NOALIGN(.Lbad_gs)
SYM_CODE_END(.Lbad_gs)
.previous
-/* Call softirq on interrupt stack. Interrupts are off. */
-.pushsection .text, "ax"
-SYM_FUNC_START(do_softirq_own_stack)
- pushq %rbp
- mov %rsp, %rbp
- ENTER_IRQ_STACK regs=0 old_rsp=%r11
- call __do_softirq
- LEAVE_IRQ_STACK regs=0
- leaveq
- ret
-SYM_FUNC_END(do_softirq_own_stack)
-.popsection
-
#ifdef CONFIG_XEN_PV
/*
* A note on the "critical region" in our callback handler.
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -20,6 +20,7 @@
#include <linux/sched/task_stack.h>
#include <asm/cpu_entry_area.h>
+#include <asm/irq_stack.h>
#include <asm/io_apic.h>
#include <asm/apic.h>
@@ -70,3 +71,14 @@ int irq_init_percpu_irqstack(unsigned in
return 0;
return map_irq_stack(cpu);
}
+
+noinstr void do_softirq_own_stack(void)
+{
+ if (irqstack_active()) {
+ instr_begin();
+ __do_softirq();
+ instr_end();
+ } else {
+ RUN_ON_IRQSTACK(__do_softirq);
+ }
+}
^ permalink raw reply [flat|nested] 49+ messages in thread
* [patch V4 part 5 04/31] x86/entry: Split idtentry_enter/exit()
2020-05-05 13:53 [patch V4 part 5 00/31] x86/entry: Entry/exception code rework, Thomas Gleixner
` (2 preceding siblings ...)
2020-05-05 13:53 ` [patch V4 part 5 03/31] x86/entry/64: Move softirq stack switch to C Thomas Gleixner
@ 2020-05-05 13:53 ` Thomas Gleixner
2020-05-11 12:42 ` Alexandre Chartre
2020-05-05 13:53 ` [patch V4 part 5 05/31] x86/entry: Switch XEN/PV hypercall entry to IDTENTRY Thomas Gleixner
` (26 subsequent siblings)
30 siblings, 1 reply; 49+ messages in thread
From: Thomas Gleixner @ 2020-05-05 13:53 UTC (permalink / raw)
To: LKML
Cc: x86, Paul E. McKenney, Andy Lutomirski, Alexandre Chartre,
Frederic Weisbecker, Paolo Bonzini, Sean Christopherson,
Masami Hiramatsu, Petr Mladek, Steven Rostedt, Joel Fernandes,
Boris Ostrovsky, Juergen Gross, Brian Gerst, Mathieu Desnoyers,
Josh Poimboeuf, Will Deacon
Split the implementation of idtentry_enter/exit() out into inline functions
so that variaants of idtentry_enter/exit() can be implemented without
duplicating code.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/x86/entry/common.c | 37 +++++++++++++++++++++----------------
1 file changed, 21 insertions(+), 16 deletions(-)
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -539,22 +539,7 @@ void noinstr idtentry_enter(struct pt_re
}
}
-/**
- * idtentry_exit - Common code to handle return from exceptions
- * @regs: Pointer to pt_regs (exception entry regs)
- *
- * Depending on the return target (kernel/user) this runs the necessary
- * preemption and work checks if possible and reguired and returns to
- * the caller with interrupts disabled and no further work pending.
- *
- * This is the last action before returning to the low level ASM code which
- * just needs to return to the appropriate context.
- *
- * Invoked by all exception/interrupt IDTENTRY handlers which are not
- * returning through the paranoid exit path (all except NMI, #DF and the IST
- * variants of #MC and #DB).
- */
-void noinstr idtentry_exit(struct pt_regs *regs)
+static __always_inline void __idtentry_exit(struct pt_regs *regs)
{
lockdep_assert_irqs_disabled();
@@ -599,3 +584,23 @@ void noinstr idtentry_exit(struct pt_reg
rcu_irq_exit();
}
}
+
+/**
+ * idtentry_exit - Common code to handle return from exceptions
+ * @regs: Pointer to pt_regs (exception entry regs)
+ *
+ * Depending on the return target (kernel/user) this runs the necessary
+ * preemption and work checks if possible and reguired and returns to
+ * the caller with interrupts disabled and no further work pending.
+ *
+ * This is the last action before returning to the low level ASM code which
+ * just needs to return to the appropriate context.
+ *
+ * Invoked by all exception/interrupt IDTENTRY handlers which are not
+ * returning through the paranoid exit path (all except NMI, #DF and the IST
+ * variants of #MC and #DB).
+ */
+void noinstr idtentry_exit(struct pt_regs *regs)
+{
+ __idtentry_exit(regs);
+}
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [patch V4 part 5 04/31] x86/entry: Split idtentry_enter/exit()
2020-05-05 13:53 ` [patch V4 part 5 04/31] x86/entry: Split idtentry_enter/exit() Thomas Gleixner
@ 2020-05-11 12:42 ` Alexandre Chartre
0 siblings, 0 replies; 49+ messages in thread
From: Alexandre Chartre @ 2020-05-11 12:42 UTC (permalink / raw)
To: Thomas Gleixner, LKML
Cc: x86, Paul E. McKenney, Andy Lutomirski, Frederic Weisbecker,
Paolo Bonzini, Sean Christopherson, Masami Hiramatsu,
Petr Mladek, Steven Rostedt, Joel Fernandes, Boris Ostrovsky,
Juergen Gross, Brian Gerst, Mathieu Desnoyers, Josh Poimboeuf,
Will Deacon
On 5/5/20 3:53 PM, Thomas Gleixner wrote:
> Split the implementation of idtentry_enter/exit() out into inline functions
> so that variaants of idtentry_enter/exit() can be implemented without
typo: "variaants"
alex.
> duplicating code.
>
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> ---
> arch/x86/entry/common.c | 37 +++++++++++++++++++++----------------
> 1 file changed, 21 insertions(+), 16 deletions(-)
^ permalink raw reply [flat|nested] 49+ messages in thread
* [patch V4 part 5 05/31] x86/entry: Switch XEN/PV hypercall entry to IDTENTRY
2020-05-05 13:53 [patch V4 part 5 00/31] x86/entry: Entry/exception code rework, Thomas Gleixner
` (3 preceding siblings ...)
2020-05-05 13:53 ` [patch V4 part 5 04/31] x86/entry: Split idtentry_enter/exit() Thomas Gleixner
@ 2020-05-05 13:53 ` Thomas Gleixner
2020-05-07 2:11 ` Boris Ostrovsky
2020-05-05 13:53 ` [patch V4 part 5 06/31] x86/entry/64: Simplify idtentry_body Thomas Gleixner
` (25 subsequent siblings)
30 siblings, 1 reply; 49+ messages in thread
From: Thomas Gleixner @ 2020-05-05 13:53 UTC (permalink / raw)
To: LKML
Cc: x86, Paul E. McKenney, Andy Lutomirski, Alexandre Chartre,
Frederic Weisbecker, Paolo Bonzini, Sean Christopherson,
Masami Hiramatsu, Petr Mladek, Steven Rostedt, Joel Fernandes,
Boris Ostrovsky, Juergen Gross, Brian Gerst, Mathieu Desnoyers,
Josh Poimboeuf, Will Deacon
Convert the XEN/PV hypercall to IDTENTRY:
- Emit the ASM stub with DECLARE_IDTENTRY
- Remove the ASM idtentry in 64bit
- Remove the open coded ASM entry code in 32bit
- Remove the old prototypes
The handler stubs need to stay in ASM code as it needs corner case handling
and adjustment of the stack pointer.
Provide a new C function which invokes the entry/exit handling and calls
into the XEN handler on the interrupt stack.
The exit code is slightly different from the regular idtentry_exit() on
non-preemptible kernels. If the hypercall is preemptible and need_resched()
is set then XEN provides a preempt hypercall scheduling function. Add it as
conditional path to __idtentry_exit() so the function can be reused.
__idtentry_exit() is forced inlined so on the regular idtentry_exit() path
the extra condition is optimized out by the compiler.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Juergen Gross <jgross@suse.com>
---
arch/x86/entry/common.c | 66 ++++++++++++++++++++++++++++++++++++++--
arch/x86/entry/entry_32.S | 17 +++++-----
arch/x86/entry/entry_64.S | 22 ++++---------
arch/x86/include/asm/idtentry.h | 13 +++++++
arch/x86/xen/setup.c | 4 +-
arch/x86/xen/smp_pv.c | 3 +
arch/x86/xen/xen-asm_32.S | 12 ++++---
arch/x86/xen/xen-asm_64.S | 2 -
arch/x86/xen/xen-ops.h | 1
drivers/xen/preempt.c | 2 -
include/xen/xen-ops.h | 4 ++
11 files changed, 111 insertions(+), 35 deletions(-)
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -27,6 +27,9 @@
#include <linux/syscalls.h>
#include <linux/uaccess.h>
+#include <xen/xen-ops.h>
+#include <xen/events.h>
+
#include <asm/desc.h>
#include <asm/traps.h>
#include <asm/vdso.h>
@@ -35,6 +38,7 @@
#include <asm/nospec-branch.h>
#include <asm/io_bitmap.h>
#include <asm/syscall.h>
+#include <asm/irq_stack.h>
#define CREATE_TRACE_POINTS
#include <trace/events/syscalls.h>
@@ -539,7 +543,8 @@ void noinstr idtentry_enter(struct pt_re
}
}
-static __always_inline void __idtentry_exit(struct pt_regs *regs)
+static __always_inline void __idtentry_exit(struct pt_regs *regs,
+ bool preempt_hcall)
{
lockdep_assert_irqs_disabled();
@@ -571,6 +576,16 @@ static __always_inline void __idtentry_e
instr_end();
return;
}
+ } else if (IS_ENABLED(CONFIG_XEN_PV)) {
+ if (preempt_hcall) {
+ /* See CONFIG_PREEMPTION above */
+ instr_begin();
+ rcu_irq_exit_preempt();
+ xen_maybe_preempt_hcall();
+ trace_hardirqs_on();
+ instr_end();
+ return;
+ }
}
instr_begin();
/* Tell the tracer that IRET will enable interrupts */
@@ -602,5 +617,52 @@ static __always_inline void __idtentry_e
*/
void noinstr idtentry_exit(struct pt_regs *regs)
{
- __idtentry_exit(regs);
+ __idtentry_exit(regs, false);
+}
+
+#ifdef CONFIG_XEN_PV
+__visible void __xen_pv_evtchn_do_upcall(void)
+{
+ irq_enter_rcu();
+ inc_irq_stat(irq_hv_callback_count);
+
+ xen_hvm_evtchn_do_upcall();
+
+ irq_exit_rcu();
+}
+
+/*
+ * Separate function as objtool is unhappy about having
+ * the macro at the call site.
+ */
+static noinstr void run_on_irqstack(void)
+{
+ RUN_ON_IRQSTACK(__xen_pv_evtchn_do_upcall);
+}
+
+__visible noinstr void xen_pv_evtchn_do_upcall(struct pt_regs *regs)
+{
+ struct pt_regs *old_regs;
+
+ idtentry_enter(regs);
+ old_regs = set_irq_regs(regs);
+
+ if (!irq_needs_irq_stack(regs)) {
+ instr_begin();
+ __xen_pv_evtchn_do_upcall();
+ instr_end();
+ } else {
+ run_on_irqstack();
+ }
+
+ set_irq_regs(old_regs);
+
+ if (IS_ENABLED(CONFIG_PREEMPTION)) {
+ __idtentry_exit(regs, false);
+ } else {
+ bool inhcall = __this_cpu_read(xen_in_preemptible_hcall);
+
+ __idtentry_exit(regs, inhcall && need_resched());
+ }
}
+#endif /* CONFIG_XEN_PV */
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -1298,7 +1298,10 @@ SYM_CODE_END(native_iret)
#endif
#ifdef CONFIG_XEN_PV
-SYM_FUNC_START(xen_hypervisor_callback)
+/*
+ * See comment in entry_64.S for further explanation
+ */
+SYM_FUNC_START(exc_xen_hypervisor_callback)
/*
* Check to see if we got the event in the critical
* region in xen_iret_direct, after we've reenabled
@@ -1315,14 +1318,11 @@ SYM_FUNC_START(xen_hypervisor_callback)
pushl $-1 /* orig_ax = -1 => not a system call */
SAVE_ALL
ENCODE_FRAME_POINTER
- TRACE_IRQS_OFF
+
mov %esp, %eax
- call xen_evtchn_do_upcall
-#ifndef CONFIG_PREEMPTION
- call xen_maybe_preempt_hcall
-#endif
- jmp ret_from_intr
-SYM_FUNC_END(xen_hypervisor_callback)
+ call xen_pv_evtchn_do_upcall
+ jmp handle_exception_return
+SYM_FUNC_END(exc_xen_hypervisor_callback)
/*
* Hypervisor uses this for application faults while it executes.
@@ -1464,6 +1464,7 @@ SYM_CODE_START_LOCAL_NOALIGN(handle_exce
movl %esp, %eax # pt_regs pointer
CALL_NOSPEC edi
+handle_exception_return:
#ifdef CONFIG_VM86
movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS
movb PT_CS(%esp), %al
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1068,10 +1068,6 @@ apicinterrupt IRQ_WORK_VECTOR irq_work
idtentry X86_TRAP_PF page_fault do_page_fault has_error_code=1
-#ifdef CONFIG_XEN_PV
-idtentry 512 /* dummy */ hypervisor_callback xen_do_hypervisor_callback has_error_code=0
-#endif
-
/*
* Reload gs selector with exception handling
* edi: new selector
@@ -1120,9 +1116,10 @@ SYM_CODE_END(.Lbad_gs)
* So, on entry to the handler we detect whether we interrupted an
* existing activation in its critical region -- if so, we pop the current
* activation and restart the handler using the previous one.
+ *
+ * C calling convention: exc_xen_hypervisor_callback(struct *pt_regs)
*/
-/* do_hypervisor_callback(struct *pt_regs) */
-SYM_CODE_START_LOCAL(xen_do_hypervisor_callback)
+SYM_CODE_START_LOCAL(exc_xen_hypervisor_callback)
/*
* Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
@@ -1132,15 +1129,10 @@ SYM_CODE_START_LOCAL(xen_do_hypervisor_c
movq %rdi, %rsp /* we don't return, adjust the stack frame */
UNWIND_HINT_REGS
- ENTER_IRQ_STACK old_rsp=%r10
- call xen_evtchn_do_upcall
- LEAVE_IRQ_STACK
-
-#ifndef CONFIG_PREEMPTION
- call xen_maybe_preempt_hcall
-#endif
- jmp error_exit
-SYM_CODE_END(xen_do_hypervisor_callback)
+ call xen_pv_evtchn_do_upcall
+
+ jmp error_return
+SYM_CODE_END(exc_xen_hypervisor_callback)
/*
* Hypervisor uses this for application faults while it executes.
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -344,6 +344,13 @@ static __always_inline void __##func(str
* This avoids duplicate defines and ensures that everything is consistent.
*/
+/*
+ * Dummy trap number so the low level ASM macro vector number checks do not
+ * match which results in emitting plain IDTENTRY stubs without bells and
+ * whistels.
+ */
+#define X86_TRAP_OTHER 0xFFFF
+
/* Simple exception entry points. No hardware error code */
DECLARE_IDTENTRY(X86_TRAP_DE, exc_divide_error);
DECLARE_IDTENTRY(X86_TRAP_OF, exc_overflow);
@@ -385,4 +392,10 @@ DECLARE_IDTENTRY_XEN(X86_TRAP_DB, debug)
DECLARE_IDTENTRY_DF(X86_TRAP_DF, exc_double_fault);
#endif
+#ifdef CONFIG_XEN_PV
+DECLARE_IDTENTRY(X86_TRAP_OTHER, exc_xen_hypervisor_callback);
+#endif
+
+#undef X86_TRAP_OTHER
+
#endif
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -20,6 +20,7 @@
#include <asm/setup.h>
#include <asm/acpi.h>
#include <asm/numa.h>
+#include <asm/idtentry.h>
#include <asm/xen/hypervisor.h>
#include <asm/xen/hypercall.h>
@@ -993,7 +994,8 @@ static void __init xen_pvmmu_arch_setup(
HYPERVISOR_vm_assist(VMASST_CMD_enable,
VMASST_TYPE_pae_extended_cr3);
- if (register_callback(CALLBACKTYPE_event, xen_hypervisor_callback) ||
+ if (register_callback(CALLBACKTYPE_event,
+ asm_exc_xen_hypervisor_callback) ||
register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback))
BUG();
--- a/arch/x86/xen/smp_pv.c
+++ b/arch/x86/xen/smp_pv.c
@@ -27,6 +27,7 @@
#include <asm/paravirt.h>
#include <asm/desc.h>
#include <asm/pgtable.h>
+#include <asm/idtentry.h>
#include <asm/cpu.h>
#include <xen/interface/xen.h>
@@ -347,7 +348,7 @@ cpu_initialize_context(unsigned int cpu,
ctxt->gs_base_kernel = per_cpu_offset(cpu);
#endif
ctxt->event_callback_eip =
- (unsigned long)xen_hypervisor_callback;
+ (unsigned long)asm_exc_xen_hypervisor_callback;
ctxt->failsafe_callback_eip =
(unsigned long)xen_failsafe_callback;
per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
--- a/arch/x86/xen/xen-asm_32.S
+++ b/arch/x86/xen/xen-asm_32.S
@@ -93,7 +93,7 @@ SYM_CODE_START(xen_iret)
/*
* If there's something pending, mask events again so we can
- * jump back into xen_hypervisor_callback. Otherwise do not
+ * jump back into exc_xen_hypervisor_callback. Otherwise do not
* touch XEN_vcpu_info_mask.
*/
jne 1f
@@ -113,7 +113,7 @@ SYM_CODE_START(xen_iret)
* Events are masked, so jumping out of the critical region is
* OK.
*/
- je xen_hypervisor_callback
+ je asm_exc_xen_hypervisor_callback
1: iret
xen_iret_end_crit:
@@ -127,7 +127,7 @@ SYM_CODE_END(xen_iret)
.globl xen_iret_start_crit, xen_iret_end_crit
/*
- * This is called by xen_hypervisor_callback in entry_32.S when it sees
+ * This is called by exc_xen_hypervisor_callback in entry_32.S when it sees
* that the EIP at the time of interrupt was between
* xen_iret_start_crit and xen_iret_end_crit.
*
@@ -144,7 +144,7 @@ SYM_CODE_END(xen_iret)
* eflags }
* cs } nested exception info
* eip }
- * return address : (into xen_hypervisor_callback)
+ * return address : (into asm_exc_xen_hypervisor_callback)
*
* In order to deliver the nested exception properly, we need to discard the
* nested exception frame such that when we handle the exception, we do it
@@ -152,7 +152,8 @@ SYM_CODE_END(xen_iret)
*
* The only caveat is that if the outer eax hasn't been restored yet (i.e.
* it's still on stack), we need to restore its value here.
- */
+*/
+.pushsection .noinstr.text, "ax"
SYM_CODE_START(xen_iret_crit_fixup)
/*
* Paranoia: Make sure we're really coming from kernel space.
@@ -181,3 +182,4 @@ SYM_CODE_START(xen_iret_crit_fixup)
2:
ret
SYM_CODE_END(xen_iret_crit_fixup)
+.popsection
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -54,7 +54,7 @@ xen_pv_trap asm_exc_simd_coprocessor_err
#ifdef CONFIG_IA32_EMULATION
xen_pv_trap entry_INT80_compat
#endif
-xen_pv_trap hypervisor_callback
+xen_pv_trap asm_exc_xen_hypervisor_callback
__INIT
SYM_CODE_START(xen_early_idt_handler_array)
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -8,7 +8,6 @@
#include <xen/xen-ops.h>
/* These are code, but not functions. Defined in entry.S */
-extern const char xen_hypervisor_callback[];
extern const char xen_failsafe_callback[];
void xen_sysenter_target(void);
--- a/drivers/xen/preempt.c
+++ b/drivers/xen/preempt.c
@@ -24,7 +24,7 @@
DEFINE_PER_CPU(bool, xen_in_preemptible_hcall);
EXPORT_SYMBOL_GPL(xen_in_preemptible_hcall);
-asmlinkage __visible void xen_maybe_preempt_hcall(void)
+void xen_maybe_preempt_hcall(void)
{
if (unlikely(__this_cpu_read(xen_in_preemptible_hcall)
&& need_resched())) {
--- a/include/xen/xen-ops.h
+++ b/include/xen/xen-ops.h
@@ -225,6 +225,8 @@ static inline void xen_preemptible_hcall
{
}
+static inline void xen_maybe_preempt_hcall(void) { }
+
#else
DECLARE_PER_CPU(bool, xen_in_preemptible_hcall);
@@ -239,6 +241,8 @@ static inline void xen_preemptible_hcall
__this_cpu_write(xen_in_preemptible_hcall, false);
}
+void xen_maybe_preempt_hcall(void);
+
#endif /* CONFIG_PREEMPTION */
#endif /* INCLUDE_XEN_OPS_H */
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [patch V4 part 5 05/31] x86/entry: Switch XEN/PV hypercall entry to IDTENTRY
2020-05-05 13:53 ` [patch V4 part 5 05/31] x86/entry: Switch XEN/PV hypercall entry to IDTENTRY Thomas Gleixner
@ 2020-05-07 2:11 ` Boris Ostrovsky
2020-05-07 8:30 ` Thomas Gleixner
0 siblings, 1 reply; 49+ messages in thread
From: Boris Ostrovsky @ 2020-05-07 2:11 UTC (permalink / raw)
To: Thomas Gleixner, LKML
Cc: x86, Paul E. McKenney, Andy Lutomirski, Alexandre Chartre,
Frederic Weisbecker, Paolo Bonzini, Sean Christopherson,
Masami Hiramatsu, Petr Mladek, Steven Rostedt, Joel Fernandes,
Juergen Gross, Brian Gerst, Mathieu Desnoyers, Josh Poimboeuf,
Will Deacon
On 5/5/20 9:53 AM, Thomas Gleixner wrote:
> --- a/arch/x86/xen/setup.c
> +++ b/arch/x86/xen/setup.c
> @@ -20,6 +20,7 @@
> #include <asm/setup.h>
> #include <asm/acpi.h>
> #include <asm/numa.h>
> +#include <asm/idtentry.h>
> #include <asm/xen/hypervisor.h>
> #include <asm/xen/hypercall.h>
>
> @@ -993,7 +994,8 @@ static void __init xen_pvmmu_arch_setup(
> HYPERVISOR_vm_assist(VMASST_CMD_enable,
> VMASST_TYPE_pae_extended_cr3);
>
> - if (register_callback(CALLBACKTYPE_event, xen_hypervisor_callback) ||
> + if (register_callback(CALLBACKTYPE_event,
> + asm_exc_xen_hypervisor_callback) ||
This ...
> register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback))
> BUG();
>
> --- a/arch/x86/xen/smp_pv.c
> +++ b/arch/x86/xen/smp_pv.c
> @@ -27,6 +27,7 @@
> #include <asm/paravirt.h>
> #include <asm/desc.h>
> #include <asm/pgtable.h>
> +#include <asm/idtentry.h>
> #include <asm/cpu.h>
>
> #include <xen/interface/xen.h>
> @@ -347,7 +348,7 @@ cpu_initialize_context(unsigned int cpu,
> ctxt->gs_base_kernel = per_cpu_offset(cpu);
> #endif
> ctxt->event_callback_eip =
> - (unsigned long)xen_hypervisor_callback;
> + (unsigned long)asm_exc_xen_hypervisor_callback;
... and this should be xen_asm_exc_xen_hypervisor_callback (we need to
pop %rcx and %r11).
And then Xen PV guest boots.
-boris
> ctxt->failsafe_callback_eip =
> (unsigned long)xen_failsafe_callback;
> per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [patch V4 part 5 05/31] x86/entry: Switch XEN/PV hypercall entry to IDTENTRY
2020-05-07 2:11 ` Boris Ostrovsky
@ 2020-05-07 8:30 ` Thomas Gleixner
0 siblings, 0 replies; 49+ messages in thread
From: Thomas Gleixner @ 2020-05-07 8:30 UTC (permalink / raw)
To: Boris Ostrovsky, LKML
Cc: x86, Paul E. McKenney, Andy Lutomirski, Alexandre Chartre,
Frederic Weisbecker, Paolo Bonzini, Sean Christopherson,
Masami Hiramatsu, Petr Mladek, Steven Rostedt, Joel Fernandes,
Juergen Gross, Brian Gerst, Mathieu Desnoyers, Josh Poimboeuf,
Will Deacon
Boris Ostrovsky <boris.ostrovsky@oracle.com> writes:
>> - if (register_callback(CALLBACKTYPE_event, xen_hypervisor_callback) ||
>> + if (register_callback(CALLBACKTYPE_event,
>> + asm_exc_xen_hypervisor_callback) ||
>
>
> This ...
>
>
>> register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback))
>> BUG();
...
>> @@ -347,7 +348,7 @@ cpu_initialize_context(unsigned int cpu,
>> ctxt->gs_base_kernel = per_cpu_offset(cpu);
>> #endif
>> ctxt->event_callback_eip =
>> - (unsigned long)xen_hypervisor_callback;
>> + (unsigned long)asm_exc_xen_hypervisor_callback;
>
>
> ... and this should be xen_asm_exc_xen_hypervisor_callback (we need to
> pop %rcx and %r11).
>
>
> And then Xen PV guest boots.
Ooops. Thanks for tracking this down!
^ permalink raw reply [flat|nested] 49+ messages in thread
* [patch V4 part 5 06/31] x86/entry/64: Simplify idtentry_body
2020-05-05 13:53 [patch V4 part 5 00/31] x86/entry: Entry/exception code rework, Thomas Gleixner
` (4 preceding siblings ...)
2020-05-05 13:53 ` [patch V4 part 5 05/31] x86/entry: Switch XEN/PV hypercall entry to IDTENTRY Thomas Gleixner
@ 2020-05-05 13:53 ` Thomas Gleixner
2020-05-05 13:53 ` [patch V4 part 5 07/31] x86/entry: Provide idtentry_entry/exit_cond_rcu() Thomas Gleixner
` (24 subsequent siblings)
30 siblings, 0 replies; 49+ messages in thread
From: Thomas Gleixner @ 2020-05-05 13:53 UTC (permalink / raw)
To: LKML
Cc: x86, Paul E. McKenney, Andy Lutomirski, Alexandre Chartre,
Frederic Weisbecker, Paolo Bonzini, Sean Christopherson,
Masami Hiramatsu, Petr Mladek, Steven Rostedt, Joel Fernandes,
Boris Ostrovsky, Juergen Gross, Brian Gerst, Mathieu Desnoyers,
Josh Poimboeuf, Will Deacon
All C functions which do not have an error code have been converted to the
new IDTENTRY interface which does not expect an error code in the
arguments. Spare the XORL.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/x86/entry/entry_64.S | 2 --
1 file changed, 2 deletions(-)
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -532,8 +532,6 @@ SYM_CODE_END(spurious_entries_start)
.if \has_error_code == 1
movq ORIG_RAX(%rsp), %rsi /* get error code into 2nd argument*/
movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */
- .else
- xorl %esi, %esi /* Clear the error code */
.endif
.if \vector == X86_TRAP_PF
^ permalink raw reply [flat|nested] 49+ messages in thread
* [patch V4 part 5 07/31] x86/entry: Provide idtentry_entry/exit_cond_rcu()
2020-05-05 13:53 [patch V4 part 5 00/31] x86/entry: Entry/exception code rework, Thomas Gleixner
` (5 preceding siblings ...)
2020-05-05 13:53 ` [patch V4 part 5 06/31] x86/entry/64: Simplify idtentry_body Thomas Gleixner
@ 2020-05-05 13:53 ` Thomas Gleixner
2020-05-11 13:53 ` Alexandre Chartre
2020-05-05 13:53 ` [patch V4 part 5 08/31] x86/entry: Switch page fault exception to IDTENTRY_RAW Thomas Gleixner
` (23 subsequent siblings)
30 siblings, 1 reply; 49+ messages in thread
From: Thomas Gleixner @ 2020-05-05 13:53 UTC (permalink / raw)
To: LKML
Cc: x86, Paul E. McKenney, Andy Lutomirski, Alexandre Chartre,
Frederic Weisbecker, Paolo Bonzini, Sean Christopherson,
Masami Hiramatsu, Petr Mladek, Steven Rostedt, Joel Fernandes,
Boris Ostrovsky, Juergen Gross, Brian Gerst, Mathieu Desnoyers,
Josh Poimboeuf, Will Deacon
The pagefault handler cannot use the regular idtentry_enter() because on
that invokes rcu_irq_enter() the pagefault was caused in the kernel. Not a
problem per se, but kernel side page faults can schedule which is not
possible without invoking rcu_irq_exit().
Adding rcu_irq_exit() and a matching rcu_irq_enter() into the actual
pagefault handling code is possible, but not pretty either.
Provide idtentry_entry/exit_cond_rcu() which calls rcu_irq_enter() only
when RCU is not watching. While this is not a legit kernel #PF establishing
RCU before handling it avoids RCU side effects which might affect
debugability.
The function is also useful for implementing lightweight scheduler IPI
entry handling later.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/x86/entry/common.c | 119 ++++++++++++++++++++++++++++++++++------
arch/x86/include/asm/idtentry.h | 3 +
2 files changed, 106 insertions(+), 16 deletions(-)
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -515,6 +515,28 @@ SYSCALL_DEFINE0(ni_syscall)
return -ENOSYS;
}
+static __always_inline bool __idtentry_enter(struct pt_regs *regs,
+ bool cond_rcu)
+{
+ if (user_mode(regs)) {
+ enter_from_user_mode();
+ } else {
+ if (!cond_rcu || !rcu_is_watching()) {
+ lockdep_hardirqs_off(CALLER_ADDR0);
+ rcu_irq_enter();
+ instr_begin();
+ trace_hardirqs_off_prepare();
+ instr_end();
+ return true;
+ } else {
+ instr_begin();
+ trace_hardirqs_off();
+ instr_end();
+ }
+ }
+ return false;
+}
+
/**
* idtentry_enter - Handle state tracking on idtentry
* @regs: Pointer to pt_regs of interrupted context
@@ -532,19 +554,60 @@ SYSCALL_DEFINE0(ni_syscall)
*/
void noinstr idtentry_enter(struct pt_regs *regs)
{
- if (user_mode(regs)) {
- enter_from_user_mode();
- } else {
- lockdep_hardirqs_off(CALLER_ADDR0);
- rcu_irq_enter();
- instr_begin();
- trace_hardirqs_off_prepare();
- instr_end();
- }
+ __idtentry_enter(regs, false);
+}
+
+/**
+ * idtentry_enter_cond_rcu - Handle state tracking on idtentry with conditional
+ * RCU handling
+ * @regs: Pointer to pt_regs of interrupted context
+ *
+ * Invokes:
+ * - lockdep irqflag state tracking as low level ASM entry disabled
+ * interrupts.
+ *
+ * - Context tracking if the exception hit user mode.
+ *
+ * - The hardirq tracer to keep the state consistent as low level ASM
+ * entry disabled interrupts.
+ *
+ * For kernel mode entries the conditional RCU handling is useful for two
+ * purposes
+ *
+ * 1) Pagefaults: Kernel code can fault and sleep, e.g. on exec. This code
+ * is not in an RCU idle section. If rcu_irq_enter() would be invoked
+ * then nothing would invoke rcu_irq_exit() before scheduling.
+ *
+ * If the kernel faults in a RCU idle section then all bets are off
+ * anyway but at least avoiding a subsequent issue vs. RCU is helpful for
+ * debugging.
+ *
+ * 2) Scheduler IPI: To avoid the overhead of a regular idtentry vs. RCU
+ * and irq_enter() the IPI can be made lightweight if the tracepoints
+ * are not enabled. While the IPI functionality itself does not require
+ * RCU (folding preempt count) it still calls out into instrumentable
+ * functions, e.g. ack_APIC_irq(). The scheduler IPI can hit RCU idle
+ * sections, so RCU needs to be adjusted. For the fast path case, e.g.
+ * KVM kicking a vCPU out of guest mode this can be avoided because the
+ * IPI is handled after KVM reestablished kernel context including RCU.
+ *
+ * For user mode entries enter_from_user_mode() must be invoked to
+ * establish the proper context for NOHZ_FULL. Otherwise scheduling on exit
+ * would not be possible.
+ *
+ * Returns: True if RCU has been adjusted on a kernel entry
+ * False otherwise
+ *
+ * The return value must be fed into the rcu_exit argument of
+ * idtentry_exit_cond_rcu().
+ */
+bool noinstr idtentry_enter_cond_rcu(struct pt_regs *regs)
+{
+ return __idtentry_enter(regs, true);
}
static __always_inline void __idtentry_exit(struct pt_regs *regs,
- bool preempt_hcall)
+ bool preempt_hcall, bool rcu_exit)
{
lockdep_assert_irqs_disabled();
@@ -568,7 +631,8 @@ static __always_inline void __idtentry_e
*/
if (!preempt_count()) {
instr_begin();
- rcu_irq_exit_preempt();
+ if (rcu_exit)
+ rcu_irq_exit_preempt();
if (need_resched())
preempt_schedule_irq();
/* Covers both tracing and lockdep */
@@ -592,11 +656,13 @@ static __always_inline void __idtentry_e
trace_hardirqs_on_prepare();
lockdep_hardirqs_on_prepare(CALLER_ADDR0);
instr_end();
- rcu_irq_exit();
+ if (rcu_exit)
+ rcu_irq_exit();
lockdep_hardirqs_on(CALLER_ADDR0);
} else {
/* IRQ flags state is correct already. Just tell RCU */
- rcu_irq_exit();
+ if (rcu_exit)
+ rcu_irq_exit();
}
}
@@ -617,7 +683,28 @@ static __always_inline void __idtentry_e
*/
void noinstr idtentry_exit(struct pt_regs *regs)
{
- __idtentry_exit(regs, false);
+ __idtentry_exit(regs, false, true);
+}
+
+/**
+ * idtentry_exit_cond_rcu - Handle return from exception with conditional RCU
+ * handling
+ * @regs: Pointer to pt_regs (exception entry regs)
+ * @rcu_exit: Invoke rcu_irq_exit() if true
+ *
+ * Depending on the return target (kernel/user) this runs the necessary
+ * preemption and work checks if possible and reguired and returns to
+ * the caller with interrupts disabled and no further work pending.
+ *
+ * This is the last action before returning to the low level ASM code which
+ * just needs to return to the appropriate context.
+ *
+ * Counterpart to idtentry_enter_cond_rcu(). The return value of the entry
+ * function must be fed into the @rcu_exit argument.
+ */
+void noinstr idtentry_exit_cond_rcu(struct pt_regs *regs, bool rcu_exit)
+{
+ __idtentry_exit(regs, false, rcu_exit);
}
#ifdef CONFIG_XEN_PV
@@ -658,11 +745,11 @@ static noinstr void run_on_irqstack(void
set_irq_regs(old_regs);
if (IS_ENABLED(CONFIG_PREEMPTION)) {
- __idtentry_exit(regs, false);
+ __idtentry_exit(regs, false, true);
} else {
bool inhcall = __this_cpu_read(xen_in_preemptible_hcall);
- __idtentry_exit(regs, inhcall && need_resched());
+ __idtentry_exit(regs, inhcall && need_resched(), true);
}
}
#endif /* CONFIG_XEN_PV */
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -10,6 +10,9 @@
void idtentry_enter(struct pt_regs *regs);
void idtentry_exit(struct pt_regs *regs);
+bool idtentry_enter_cond_rcu(struct pt_regs *regs);
+void idtentry_exit_cond_rcu(struct pt_regs *regs, bool rcu_exit);
+
/**
* DECLARE_IDTENTRY - Declare functions for simple IDT entry points
* No error code pushed by hardware
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [patch V4 part 5 07/31] x86/entry: Provide idtentry_entry/exit_cond_rcu()
2020-05-05 13:53 ` [patch V4 part 5 07/31] x86/entry: Provide idtentry_entry/exit_cond_rcu() Thomas Gleixner
@ 2020-05-11 13:53 ` Alexandre Chartre
2020-05-11 14:13 ` Peter Zijlstra
2020-05-12 16:30 ` Thomas Gleixner
0 siblings, 2 replies; 49+ messages in thread
From: Alexandre Chartre @ 2020-05-11 13:53 UTC (permalink / raw)
To: Thomas Gleixner, LKML
Cc: x86, Paul E. McKenney, Andy Lutomirski, Frederic Weisbecker,
Paolo Bonzini, Sean Christopherson, Masami Hiramatsu,
Petr Mladek, Steven Rostedt, Joel Fernandes, Boris Ostrovsky,
Juergen Gross, Brian Gerst, Mathieu Desnoyers, Josh Poimboeuf,
Will Deacon
On 5/5/20 3:53 PM, Thomas Gleixner wrote:
> The pagefault handler cannot use the regular idtentry_enter() because on
> that invokes rcu_irq_enter() the pagefault was caused in the kernel.
I am struggling to understand this part of the sentence: "because on
that invokes rcu_irq_enter() the pagefault was caused in the kernel."
Do you mean: "because that invokes rcu_irq_enter() if the pagefault was
caused in the kernel." ?
alex.
> Not a
> problem per se, but kernel side page faults can schedule which is not
> possible without invoking rcu_irq_exit().
>
> Adding rcu_irq_exit() and a matching rcu_irq_enter() into the actual
> pagefault handling code is possible, but not pretty either.
>
> Provide idtentry_entry/exit_cond_rcu() which calls rcu_irq_enter() only
> when RCU is not watching. While this is not a legit kernel #PF establishing
> RCU before handling it avoids RCU side effects which might affect
> debugability.
>
> The function is also useful for implementing lightweight scheduler IPI
> entry handling later.
>
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> ---
> arch/x86/entry/common.c | 119 ++++++++++++++++++++++++++++++++++------
> arch/x86/include/asm/idtentry.h | 3 +
> 2 files changed, 106 insertions(+), 16 deletions(-)
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [patch V4 part 5 07/31] x86/entry: Provide idtentry_entry/exit_cond_rcu()
2020-05-11 13:53 ` Alexandre Chartre
@ 2020-05-11 14:13 ` Peter Zijlstra
2020-05-12 16:30 ` Thomas Gleixner
1 sibling, 0 replies; 49+ messages in thread
From: Peter Zijlstra @ 2020-05-11 14:13 UTC (permalink / raw)
To: Alexandre Chartre
Cc: Thomas Gleixner, LKML, x86, Paul E. McKenney, Andy Lutomirski,
Frederic Weisbecker, Paolo Bonzini, Sean Christopherson,
Masami Hiramatsu, Petr Mladek, Steven Rostedt, Joel Fernandes,
Boris Ostrovsky, Juergen Gross, Brian Gerst, Mathieu Desnoyers,
Josh Poimboeuf, Will Deacon
On Mon, May 11, 2020 at 03:53:17PM +0200, Alexandre Chartre wrote:
>
> On 5/5/20 3:53 PM, Thomas Gleixner wrote:
> > The pagefault handler cannot use the regular idtentry_enter() because on
> > that invokes rcu_irq_enter() the pagefault was caused in the kernel.
>
> I am struggling to understand this part of the sentence: "because on
> that invokes rcu_irq_enter() the pagefault was caused in the kernel."
>
> Do you mean: "because that invokes rcu_irq_enter() if the pagefault was
> caused in the kernel." ?
The problem is that rcu_irq_enter() uses per-cpu state and expect
rcu_irq_exit() to be called on the very same cpu we did enter on.
However, #PF likes to schedule and breaks that expectation.
While there are more exceptions that schedule when from userspace, #PF
is the only one that does so when from kernel space, which makes is
'special'.
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [patch V4 part 5 07/31] x86/entry: Provide idtentry_entry/exit_cond_rcu()
2020-05-11 13:53 ` Alexandre Chartre
2020-05-11 14:13 ` Peter Zijlstra
@ 2020-05-12 16:30 ` Thomas Gleixner
1 sibling, 0 replies; 49+ messages in thread
From: Thomas Gleixner @ 2020-05-12 16:30 UTC (permalink / raw)
To: Alexandre Chartre, LKML
Cc: x86, Paul E. McKenney, Andy Lutomirski, Frederic Weisbecker,
Paolo Bonzini, Sean Christopherson, Masami Hiramatsu,
Petr Mladek, Steven Rostedt, Joel Fernandes, Boris Ostrovsky,
Juergen Gross, Brian Gerst, Mathieu Desnoyers, Josh Poimboeuf,
Will Deacon
Alexandre Chartre <alexandre.chartre@oracle.com> writes:
> On 5/5/20 3:53 PM, Thomas Gleixner wrote:
>> The pagefault handler cannot use the regular idtentry_enter() because on
>> that invokes rcu_irq_enter() the pagefault was caused in the kernel.
>
> I am struggling to understand this part of the sentence: "because on
> that invokes rcu_irq_enter() the pagefault was caused in the kernel."
>
> Do you mean: "because that invokes rcu_irq_enter() if the pagefault was
> caused in the kernel." ?
Yeah. Wordsalad ...
^ permalink raw reply [flat|nested] 49+ messages in thread
* [patch V4 part 5 08/31] x86/entry: Switch page fault exception to IDTENTRY_RAW
2020-05-05 13:53 [patch V4 part 5 00/31] x86/entry: Entry/exception code rework, Thomas Gleixner
` (6 preceding siblings ...)
2020-05-05 13:53 ` [patch V4 part 5 07/31] x86/entry: Provide idtentry_entry/exit_cond_rcu() Thomas Gleixner
@ 2020-05-05 13:53 ` Thomas Gleixner
2020-05-05 13:53 ` [patch V4 part 5 09/31] x86/entry: Remove the transition leftovers Thomas Gleixner
` (22 subsequent siblings)
30 siblings, 0 replies; 49+ messages in thread
From: Thomas Gleixner @ 2020-05-05 13:53 UTC (permalink / raw)
To: LKML
Cc: x86, Paul E. McKenney, Andy Lutomirski, Alexandre Chartre,
Frederic Weisbecker, Paolo Bonzini, Sean Christopherson,
Masami Hiramatsu, Petr Mladek, Steven Rostedt, Joel Fernandes,
Boris Ostrovsky, Juergen Gross, Brian Gerst, Mathieu Desnoyers,
Josh Poimboeuf, Will Deacon
Convert page fault exceptions to IDTENTRY_RAW:
- Implement the C entry point with DEFINE_IDTENTRY_RAW
- Add the CR2 read into the exception handler
- Add the idtentry_enter/exit_cond_rcu() invocations in
in the regular page fault handler and use the regular
idtentry_enter/exit() for the async PF part.
- Emit the ASM stub with DECLARE_IDTENTRY_RAW
- Remove the ASM idtentry in 64bit
- Remove the CR2 read from 64bit
- Remove the open coded ASM entry code in 32bit
- Fixup the XEN/PV code
- Remove the old prototypes
No functional change.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/x86/entry/entry_32.S | 30 -----------------
arch/x86/entry/entry_64.S | 19 -----------
arch/x86/include/asm/idtentry.h | 3 +
arch/x86/include/asm/traps.h | 11 ------
arch/x86/kernel/idt.c | 4 +-
arch/x86/kernel/kvm.c | 14 ++++----
arch/x86/mm/fault.c | 69 +++++++++++++++++++++++++++-------------
arch/x86/xen/enlighten_pv.c | 2 -
arch/x86/xen/xen-asm_64.S | 2 -
9 files changed, 62 insertions(+), 92 deletions(-)
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -1395,36 +1395,6 @@ BUILD_INTERRUPT3(hv_stimer0_callback_vec
#endif /* CONFIG_HYPERV */
-SYM_CODE_START(page_fault)
- ASM_CLAC
- pushl $do_page_fault
- jmp common_exception_read_cr2
-SYM_CODE_END(page_fault)
-
-SYM_CODE_START_LOCAL_NOALIGN(common_exception_read_cr2)
- /* the function address is in %gs's slot on the stack */
- SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1
-
- ENCODE_FRAME_POINTER
-
- /* fixup %gs */
- GS_TO_REG %ecx
- movl PT_GS(%esp), %edi
- REG_TO_PTGS %ecx
- SET_KERNEL_GS %ecx
-
- GET_CR2_INTO(%ecx) # might clobber %eax
-
- /* fixup orig %eax */
- movl PT_ORIG_EAX(%esp), %edx # get the error code
- movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
-
- TRACE_IRQS_OFF
- movl %esp, %eax # pt_regs pointer
- CALL_NOSPEC edi
- jmp ret_from_exception
-SYM_CODE_END(common_exception_read_cr2)
-
SYM_CODE_START_LOCAL_NOALIGN(common_exception)
/* the function address is in %gs's slot on the stack */
SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -507,15 +507,6 @@ SYM_CODE_END(spurious_entries_start)
call error_entry
UNWIND_HINT_REGS
- .if \vector == X86_TRAP_PF
- /*
- * Store CR2 early so subsequent faults cannot clobber it. Use R12 as
- * intermediate storage as RDX can be clobbered in enter_from_user_mode().
- * GET_CR2_INTO can clobber RAX.
- */
- GET_CR2_INTO(%r12);
- .endif
-
.if \sane == 0
TRACE_IRQS_OFF
@@ -534,10 +525,6 @@ SYM_CODE_END(spurious_entries_start)
movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */
.endif
- .if \vector == X86_TRAP_PF
- movq %r12, %rdx /* Move CR2 into 3rd argument */
- .endif
-
call \cfunc
.if \sane == 0
@@ -1061,12 +1048,6 @@ apicinterrupt IRQ_WORK_VECTOR irq_work
#endif
/*
- * Exception entry points.
- */
-
-idtentry X86_TRAP_PF page_fault do_page_fault has_error_code=1
-
-/*
* Reload gs selector with exception handling
* edi: new selector
*/
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -376,7 +376,8 @@ DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_GP,
DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_AC, exc_alignment_check);
/* Raw exception entries which need extra work */
-DECLARE_IDTENTRY_RAW(X86_TRAP_BP, exc_int3);
+DECLARE_IDTENTRY_RAW(X86_TRAP_BP, exc_int3);
+DECLARE_IDTENTRY_RAW_ERRORCODE(X86_TRAP_PF, exc_page_fault);
#ifdef CONFIG_X86_MCE
DECLARE_IDTENTRY_MCE(X86_TRAP_MC, exc_machine_check);
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -9,17 +9,6 @@
#include <asm/idtentry.h>
#include <asm/siginfo.h> /* TRAP_TRACE, ... */
-#define dotraplinkage __visible
-
-asmlinkage void page_fault(void);
-asmlinkage void async_page_fault(void);
-
-#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
-asmlinkage void xen_page_fault(void);
-#endif
-
-dotraplinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address);
-
#ifdef CONFIG_X86_64
asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs);
asmlinkage __visible notrace
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -59,7 +59,7 @@ static const __initconst struct idt_data
INTG(X86_TRAP_DB, asm_exc_debug),
SYSG(X86_TRAP_BP, asm_exc_int3),
#ifdef CONFIG_X86_32
- INTG(X86_TRAP_PF, page_fault),
+ INTG(X86_TRAP_PF, asm_exc_page_fault),
#endif
};
@@ -153,7 +153,7 @@ static const __initconst struct idt_data
* stacks work only after cpu_init().
*/
static const __initconst struct idt_data early_pf_idts[] = {
- INTG(X86_TRAP_PF, page_fault),
+ INTG(X86_TRAP_PF, asm_exc_page_fault),
};
/*
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -218,7 +218,7 @@ void kvm_async_pf_task_wake(u32 token)
}
EXPORT_SYMBOL_GPL(kvm_async_pf_task_wake);
-u32 kvm_read_and_reset_pf_reason(void)
+u32 noinstr kvm_read_and_reset_pf_reason(void)
{
u32 reason = 0;
@@ -230,9 +230,8 @@ u32 kvm_read_and_reset_pf_reason(void)
return reason;
}
EXPORT_SYMBOL_GPL(kvm_read_and_reset_pf_reason);
-NOKPROBE_SYMBOL(kvm_read_and_reset_pf_reason);
-bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token)
+noinstr bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token)
{
u32 reason = kvm_read_and_reset_pf_reason();
@@ -244,6 +243,9 @@ bool __kvm_handle_async_pf(struct pt_reg
return false;
}
+ idtentry_enter(regs);
+ instr_begin();
+
/*
* If the host managed to inject an async #PF into an interrupt
* disabled region, then die hard as this is not going to end well
@@ -258,13 +260,13 @@ bool __kvm_handle_async_pf(struct pt_reg
/* Page is swapped out by the host. */
kvm_async_pf_task_wait_schedule(token);
} else {
- rcu_irq_enter();
kvm_async_pf_task_wake(token);
- rcu_irq_exit();
}
+
+ instr_end();
+ idtentry_exit(regs);
return true;
}
-NOKPROBE_SYMBOL(__kvm_handle_async_pf);
static void __init paravirt_ops_setup(void)
{
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1519,11 +1519,38 @@ trace_page_fault_entries(struct pt_regs
trace_page_fault_kernel(address, regs, error_code);
}
-dotraplinkage void
-do_page_fault(struct pt_regs *regs, unsigned long hw_error_code,
- unsigned long address)
+static __always_inline void
+handle_page_fault(struct pt_regs *regs, unsigned long error_code,
+ unsigned long address)
{
+ trace_page_fault_entries(regs, error_code, address);
+
+ if (unlikely(kmmio_fault(regs, address)))
+ return;
+
+ /* Was the fault on kernel-controlled part of the address space? */
+ if (unlikely(fault_in_kernel_space(address))) {
+ do_kern_addr_fault(regs, error_code, address);
+ } else {
+ do_user_addr_fault(regs, error_code, address);
+ /*
+ * User address page fault handling might have reenabled
+ * interrupts. Fixing up all potential exit points of
+ * do_user_addr_fault() and its leaf functions is just not
+ * doable w/o creating an unholy mess or turning the code
+ * upside down.
+ */
+ local_irq_disable();
+ }
+}
+
+DEFINE_IDTENTRY_RAW_ERRORCODE(exc_page_fault)
+{
+ unsigned long address = read_cr2();
+ bool rcu_exit;
+
prefetchw(¤t->mm->mmap_sem);
+
/*
* KVM has two types of events that are, logically, interrupts, but
* are unfortunately delivered using the #PF vector. These events are
@@ -1538,28 +1565,28 @@ do_page_fault(struct pt_regs *regs, unsi
* getting values from real and async page faults mixed up.
*
* Fingers crossed.
+ *
+ * The async #PF handling code takes care of idtentry handling
+ * itself.
*/
if (kvm_handle_async_pf(regs, (u32)address))
return;
- trace_page_fault_entries(regs, hw_error_code, address);
+ /*
+ * Entry handling for valid #PF from kernel mode is slightly
+ * different: RCU is already watching and rcu_irq_enter() must not
+ * be invoked because a kernel fault on a user space address might
+ * sleep.
+ *
+ * In case the fault hit a RCU idle region the conditional entry
+ * code reenabled RCU to avoid subsequent wreckage which helps
+ * debugability.
+ */
+ rcu_exit = idtentry_enter_cond_rcu(regs);
- if (unlikely(kmmio_fault(regs, address)))
- return;
+ instr_begin();
+ handle_page_fault(regs, error_code, address);
+ instr_end();
- /* Was the fault on kernel-controlled part of the address space? */
- if (unlikely(fault_in_kernel_space(address))) {
- do_kern_addr_fault(regs, hw_error_code, address);
- } else {
- do_user_addr_fault(regs, hw_error_code, address);
- /*
- * User address page fault handling might have reenabled
- * interrupts. Fixing up all potential exit points of
- * do_user_addr_fault() and its leaf functions is just not
- * doable w/o creating an unholy mess or turning the code
- * upside down.
- */
- local_irq_disable();
- }
+ idtentry_exit_cond_rcu(regs, rcu_exit);
}
-NOKPROBE_SYMBOL(do_page_fault);
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -627,7 +627,7 @@ static struct trap_array_entry trap_arra
#ifdef CONFIG_IA32_EMULATION
{ entry_INT80_compat, xen_entry_INT80_compat, false },
#endif
- { page_fault, xen_page_fault, false },
+ TRAP_ENTRY(exc_page_fault, false ),
TRAP_ENTRY(exc_divide_error, false ),
TRAP_ENTRY(exc_bounds, false ),
TRAP_ENTRY(exc_invalid_op, false ),
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -43,7 +43,7 @@ xen_pv_trap asm_exc_invalid_tss
xen_pv_trap asm_exc_segment_not_present
xen_pv_trap asm_exc_stack_segment
xen_pv_trap asm_exc_general_protection
-xen_pv_trap page_fault
+xen_pv_trap asm_exc_page_fault
xen_pv_trap asm_exc_spurious_interrupt_bug
xen_pv_trap asm_exc_coprocessor_error
xen_pv_trap asm_exc_alignment_check
^ permalink raw reply [flat|nested] 49+ messages in thread
* [patch V4 part 5 09/31] x86/entry: Remove the transition leftovers
2020-05-05 13:53 [patch V4 part 5 00/31] x86/entry: Entry/exception code rework, Thomas Gleixner
` (7 preceding siblings ...)
2020-05-05 13:53 ` [patch V4 part 5 08/31] x86/entry: Switch page fault exception to IDTENTRY_RAW Thomas Gleixner
@ 2020-05-05 13:53 ` Thomas Gleixner
2020-05-11 14:11 ` Alexandre Chartre
2020-05-05 13:53 ` [patch V4 part 5 10/31] x86/entry: Change exit path of xen_failsafe_callback Thomas Gleixner
` (21 subsequent siblings)
30 siblings, 1 reply; 49+ messages in thread
From: Thomas Gleixner @ 2020-05-05 13:53 UTC (permalink / raw)
To: LKML
Cc: x86, Paul E. McKenney, Andy Lutomirski, Alexandre Chartre,
Frederic Weisbecker, Paolo Bonzini, Sean Christopherson,
Masami Hiramatsu, Petr Mladek, Steven Rostedt, Joel Fernandes,
Boris Ostrovsky, Juergen Gross, Brian Gerst, Mathieu Desnoyers,
Josh Poimboeuf, Will Deacon
Now that all exceptions are converted over the sane flag is not longer
needed. Also the vector argument of idtentry_body on 64 bit is pointless
now.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/x86/entry/entry_32.S | 3 +--
arch/x86/entry/entry_64.S | 26 ++++----------------------
arch/x86/include/asm/idtentry.h | 6 +++---
3 files changed, 8 insertions(+), 27 deletions(-)
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -734,9 +734,8 @@
* @asmsym: ASM symbol for the entry point
* @cfunc: C function to be called
* @has_error_code: Hardware pushed error code on stack
- * @sane: Compatibility flag with 64bit
*/
-.macro idtentry vector asmsym cfunc has_error_code:req sane=0
+.macro idtentry vector asmsym cfunc has_error_code:req
SYM_CODE_START(\asmsym)
ASM_CLAC
cld
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -497,27 +497,14 @@ SYM_CODE_END(spurious_entries_start)
/**
* idtentry_body - Macro to emit code calling the C function
- * @vector: Vector number
* @cfunc: C function to be called
* @has_error_code: Hardware pushed error code on stack
- * @sane: Sane variant which handles irq tracing, context tracking in C
*/
-.macro idtentry_body vector cfunc has_error_code:req sane=0
+.macro idtentry_body cfunc has_error_code:req
call error_entry
UNWIND_HINT_REGS
- .if \sane == 0
- TRACE_IRQS_OFF
-
-#ifdef CONFIG_CONTEXT_TRACKING
- testb $3, CS(%rsp)
- jz .Lfrom_kernel_no_ctxt_tracking_\@
- CALL_enter_from_user_mode
-.Lfrom_kernel_no_ctxt_tracking_\@:
-#endif
- .endif
-
movq %rsp, %rdi /* pt_regs pointer into 1st argument*/
.if \has_error_code == 1
@@ -527,11 +514,7 @@ SYM_CODE_END(spurious_entries_start)
call \cfunc
- .if \sane == 0
- jmp error_exit
- .else
jmp error_return
- .endif
.endm
/**
@@ -540,12 +523,11 @@ SYM_CODE_END(spurious_entries_start)
* @asmsym: ASM symbol for the entry point
* @cfunc: C function to be called
* @has_error_code: Hardware pushed error code on stack
- * @sane: Sane variant which handles irq tracing, context tracking in C
*
* The macro emits code to set up the kernel context for straight forward
* and simple IDT entries. No IST stack, no paranoid entry checks.
*/
-.macro idtentry vector asmsym cfunc has_error_code:req sane=0
+.macro idtentry vector asmsym cfunc has_error_code:req
SYM_CODE_START(\asmsym)
UNWIND_HINT_IRET_REGS offset=\has_error_code*8
ASM_CLAC
@@ -568,7 +550,7 @@ SYM_CODE_START(\asmsym)
.Lfrom_usermode_no_gap_\@:
.endif
- idtentry_body \vector \cfunc \has_error_code \sane
+ idtentry_body \cfunc \has_error_code
_ASM_NOKPROBE(\asmsym)
SYM_CODE_END(\asmsym)
@@ -643,7 +625,7 @@ SYM_CODE_START(\asmsym)
/* Switch to the regular task stack and use the noist entry point */
.Lfrom_usermode_switch_stack_\@:
- idtentry_body vector noist_\cfunc, has_error_code=0 sane=1
+ idtentry_body noist_\cfunc, has_error_code=0
_ASM_NOKPROBE(\asmsym)
SYM_CODE_END(\asmsym)
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -293,10 +293,10 @@ static __always_inline void __##func(str
* The ASM variants for DECLARE_IDTENTRY*() which emit the ASM entry stubs.
*/
#define DECLARE_IDTENTRY(vector, func) \
- idtentry vector asm_##func func has_error_code=0 sane=1
+ idtentry vector asm_##func func has_error_code=0
#define DECLARE_IDTENTRY_ERRORCODE(vector, func) \
- idtentry vector asm_##func func has_error_code=1 sane=1
+ idtentry vector asm_##func func has_error_code=1
/* Special case for 32bit IRET 'trap'. Do not emit ASM code */
#define DECLARE_IDTENTRY_SW(vector, func)
@@ -334,7 +334,7 @@ static __always_inline void __##func(str
/* XEN NMI and DB wrapper */
#define DECLARE_IDTENTRY_XEN(vector, func) \
- idtentry vector asm_exc_xen##func exc_##func has_error_code=0 sane=1
+ idtentry vector asm_exc_xen##func exc_##func has_error_code=0
#endif /* __ASSEMBLY__ */
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [patch V4 part 5 09/31] x86/entry: Remove the transition leftovers
2020-05-05 13:53 ` [patch V4 part 5 09/31] x86/entry: Remove the transition leftovers Thomas Gleixner
@ 2020-05-11 14:11 ` Alexandre Chartre
0 siblings, 0 replies; 49+ messages in thread
From: Alexandre Chartre @ 2020-05-11 14:11 UTC (permalink / raw)
To: Thomas Gleixner, LKML
Cc: x86, Paul E. McKenney, Andy Lutomirski, Frederic Weisbecker,
Paolo Bonzini, Sean Christopherson, Masami Hiramatsu,
Petr Mladek, Steven Rostedt, Joel Fernandes, Boris Ostrovsky,
Juergen Gross, Brian Gerst, Mathieu Desnoyers, Josh Poimboeuf,
Will Deacon
On 5/5/20 3:53 PM, Thomas Gleixner wrote:
> Now that all exceptions are converted over the sane flag is not longer
typo: "not" instead of "no" (the sane flag is no longer needed).
> needed. Also the vector argument of idtentry_body on 64 bit is pointless
> now.
>
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> ---
> arch/x86/entry/entry_32.S | 3 +--
> arch/x86/entry/entry_64.S | 26 ++++----------------------
> arch/x86/include/asm/idtentry.h | 6 +++---
> 3 files changed, 8 insertions(+), 27 deletions(-)
>
alex.
^ permalink raw reply [flat|nested] 49+ messages in thread
* [patch V4 part 5 10/31] x86/entry: Change exit path of xen_failsafe_callback
2020-05-05 13:53 [patch V4 part 5 00/31] x86/entry: Entry/exception code rework, Thomas Gleixner
` (8 preceding siblings ...)
2020-05-05 13:53 ` [patch V4 part 5 09/31] x86/entry: Remove the transition leftovers Thomas Gleixner
@ 2020-05-05 13:53 ` Thomas Gleixner
2020-05-05 13:53 ` [patch V4 part 5 11/31] x86/entry/64: Remove error_exit Thomas Gleixner
` (20 subsequent siblings)
30 siblings, 0 replies; 49+ messages in thread
From: Thomas Gleixner @ 2020-05-05 13:53 UTC (permalink / raw)
To: LKML
Cc: x86, Paul E. McKenney, Andy Lutomirski, Alexandre Chartre,
Frederic Weisbecker, Paolo Bonzini, Sean Christopherson,
Masami Hiramatsu, Petr Mladek, Steven Rostedt, Joel Fernandes,
Boris Ostrovsky, Juergen Gross, Brian Gerst, Mathieu Desnoyers,
Josh Poimboeuf, Will Deacon
xen_failsafe_callback is invoked from XEN for two cases:
1. Fault while reloading DS, ES, FS or GS
2. Fault while executing IRET
#1 retries the IRET after XEN has fixed up the segments.
#2 injects a #GP which kills the task
For #1 there is no reason to go through the full exception return path
because the tasks TIF state is still the same. So just going straight to
the IRET path is good enough.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Juergen Gross <jgross@suse.com>
---
arch/x86/entry/entry_32.S | 2 +-
arch/x86/entry/entry_64.S | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -1352,7 +1352,7 @@ SYM_FUNC_START(xen_failsafe_callback)
5: pushl $-1 /* orig_ax = -1 => not a system call */
SAVE_ALL
ENCODE_FRAME_POINTER
- jmp ret_from_exception
+ jmp handle_exception_return
.section .fixup, "ax"
6: xorl %eax, %eax
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1137,7 +1137,7 @@ SYM_CODE_START(xen_failsafe_callback)
pushq $-1 /* orig_ax = -1 => not a system call */
PUSH_AND_CLEAR_REGS
ENCODE_FRAME_POINTER
- jmp error_exit
+ jmp error_return
SYM_CODE_END(xen_failsafe_callback)
#endif /* CONFIG_XEN_PV */
^ permalink raw reply [flat|nested] 49+ messages in thread
* [patch V4 part 5 11/31] x86/entry/64: Remove error_exit
2020-05-05 13:53 [patch V4 part 5 00/31] x86/entry: Entry/exception code rework, Thomas Gleixner
` (9 preceding siblings ...)
2020-05-05 13:53 ` [patch V4 part 5 10/31] x86/entry: Change exit path of xen_failsafe_callback Thomas Gleixner
@ 2020-05-05 13:53 ` Thomas Gleixner
2020-05-05 13:53 ` [patch V4 part 5 12/31] x86/entry/32: Remove common_exception Thomas Gleixner
` (19 subsequent siblings)
30 siblings, 0 replies; 49+ messages in thread
From: Thomas Gleixner @ 2020-05-05 13:53 UTC (permalink / raw)
To: LKML
Cc: x86, Paul E. McKenney, Andy Lutomirski, Alexandre Chartre,
Frederic Weisbecker, Paolo Bonzini, Sean Christopherson,
Masami Hiramatsu, Petr Mladek, Steven Rostedt, Joel Fernandes,
Boris Ostrovsky, Juergen Gross, Brian Gerst, Mathieu Desnoyers,
Josh Poimboeuf, Will Deacon
No more users.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/x86/entry/entry_64.S | 9 ---------
1 file changed, 9 deletions(-)
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1319,15 +1319,6 @@ SYM_CODE_START_LOCAL(error_entry)
jmp .Lerror_entry_from_usermode_after_swapgs
SYM_CODE_END(error_entry)
-SYM_CODE_START_LOCAL(error_exit)
- UNWIND_HINT_REGS
- DISABLE_INTERRUPTS(CLBR_ANY)
- TRACE_IRQS_OFF
- testb $3, CS(%rsp)
- jz retint_kernel
- jmp .Lretint_user
-SYM_CODE_END(error_exit)
-
SYM_CODE_START_LOCAL(error_return)
UNWIND_HINT_REGS
DEBUG_ENTRY_ASSERT_IRQS_OFF
^ permalink raw reply [flat|nested] 49+ messages in thread
* [patch V4 part 5 12/31] x86/entry/32: Remove common_exception
2020-05-05 13:53 [patch V4 part 5 00/31] x86/entry: Entry/exception code rework, Thomas Gleixner
` (10 preceding siblings ...)
2020-05-05 13:53 ` [patch V4 part 5 11/31] x86/entry/64: Remove error_exit Thomas Gleixner
@ 2020-05-05 13:53 ` Thomas Gleixner
2020-05-05 13:53 ` [patch V4 part 5 13/31] x86/irq: Convey vector as argument and not in ptregs Thomas Gleixner
` (18 subsequent siblings)
30 siblings, 0 replies; 49+ messages in thread
From: Thomas Gleixner @ 2020-05-05 13:53 UTC (permalink / raw)
To: LKML
Cc: x86, Paul E. McKenney, Andy Lutomirski, Alexandre Chartre,
Frederic Weisbecker, Paolo Bonzini, Sean Christopherson,
Masami Hiramatsu, Petr Mladek, Steven Rostedt, Joel Fernandes,
Boris Ostrovsky, Juergen Gross, Brian Gerst, Mathieu Desnoyers,
Josh Poimboeuf, Will Deacon
No more users.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/x86/entry/entry_32.S | 21 ---------------------
1 file changed, 21 deletions(-)
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -1394,27 +1394,6 @@ BUILD_INTERRUPT3(hv_stimer0_callback_vec
#endif /* CONFIG_HYPERV */
-SYM_CODE_START_LOCAL_NOALIGN(common_exception)
- /* the function address is in %gs's slot on the stack */
- SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1
- ENCODE_FRAME_POINTER
-
- /* fixup %gs */
- GS_TO_REG %ecx
- movl PT_GS(%esp), %edi # get the function address
- REG_TO_PTGS %ecx
- SET_KERNEL_GS %ecx
-
- /* fixup orig %eax */
- movl PT_ORIG_EAX(%esp), %edx # get the error code
- movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
-
- TRACE_IRQS_OFF
- movl %esp, %eax # pt_regs pointer
- CALL_NOSPEC edi
- jmp ret_from_exception
-SYM_CODE_END(common_exception)
-
SYM_CODE_START_LOCAL_NOALIGN(handle_exception)
/* the function address is in %gs's slot on the stack */
SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1
^ permalink raw reply [flat|nested] 49+ messages in thread
* [patch V4 part 5 13/31] x86/irq: Convey vector as argument and not in ptregs
2020-05-05 13:53 [patch V4 part 5 00/31] x86/entry: Entry/exception code rework, Thomas Gleixner
` (11 preceding siblings ...)
2020-05-05 13:53 ` [patch V4 part 5 12/31] x86/entry/32: Remove common_exception Thomas Gleixner
@ 2020-05-05 13:53 ` Thomas Gleixner
2020-05-10 2:44 ` Lai Jiangshan
2020-05-05 13:53 ` [patch V4 part 5 14/31] x86/irq/64: Provide handle_irq() Thomas Gleixner
` (17 subsequent siblings)
30 siblings, 1 reply; 49+ messages in thread
From: Thomas Gleixner @ 2020-05-05 13:53 UTC (permalink / raw)
To: LKML
Cc: x86, Paul E. McKenney, Andy Lutomirski, Alexandre Chartre,
Frederic Weisbecker, Paolo Bonzini, Sean Christopherson,
Masami Hiramatsu, Petr Mladek, Steven Rostedt, Joel Fernandes,
Boris Ostrovsky, Juergen Gross, Brian Gerst, Mathieu Desnoyers,
Josh Poimboeuf, Will Deacon
Device interrupts which go through do_IRQ() or the spurious interrupt
handler have their separate entry code on 64 bit for no good reason.
Both 32 and 64 bit transport the vector number through ORIG_[RE]AX in
pt_regs. Further the vector number is forced to fit into an u8 and is
complemented and offset by 0x80 so it's in the signed character
range. Otherwise GAS would expand the pushq to a 5 byte instruction for any
vector > 0x7F.
Treat the vector number like an error code and hand it to the C function as
argument. This allows to get rid of the extra entry code in a later step.
Simplify the error code push magic by implementing the pushq imm8 via a
'.byte 0x6a, vector' sequence so GAS is not able to screw it up. As the
pushq imm8 is sign extending the resulting error code needs to be truncated
to 8 bits in C code.
Originally-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Brian Gerst <brgerst@gmail.com>
---
V2: Fix the pushq thinko (Brian)
---
arch/x86/entry/calling.h | 5 +++-
arch/x86/entry/entry_32.S | 33 +++----------------------------
arch/x86/entry/entry_64.S | 40 ++++++--------------------------------
arch/x86/include/asm/entry_arch.h | 2 -
arch/x86/include/asm/hw_irq.h | 1
arch/x86/include/asm/idtentry.h | 40 ++++++++++++++++++++++++++++++++++++++
arch/x86/include/asm/irq.h | 2 -
arch/x86/include/asm/traps.h | 3 +-
arch/x86/kernel/apic/apic.c | 31 +++++++++++++++++++++++------
arch/x86/kernel/idt.c | 2 -
arch/x86/kernel/irq.c | 14 ++++++++-----
11 files changed, 95 insertions(+), 78 deletions(-)
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -339,7 +339,10 @@ For 32-bit we have the following convent
#endif
.endm
-#endif /* CONFIG_X86_64 */
+#else /* CONFIG_X86_64 */
+# undef UNWIND_HINT_IRET_REGS
+# define UNWIND_HINT_IRET_REGS
+#endif /* !CONFIG_X86_64 */
.macro STACKLEAK_ERASE
#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -1215,40 +1215,15 @@ SYM_FUNC_END(entry_INT80_32)
#endif
.endm
-/*
- * Build the entry stubs with some assembler magic.
- * We pack 1 stub into every 8-byte block.
- */
- .align 8
-SYM_CODE_START(irq_entries_start)
- vector=FIRST_EXTERNAL_VECTOR
- .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
- pushl $(~vector+0x80) /* Note: always in signed byte range */
- vector=vector+1
- jmp common_interrupt
- .align 8
- .endr
-SYM_CODE_END(irq_entries_start)
-
#ifdef CONFIG_X86_LOCAL_APIC
- .align 8
-SYM_CODE_START(spurious_entries_start)
- vector=FIRST_SYSTEM_VECTOR
- .rept (NR_VECTORS - FIRST_SYSTEM_VECTOR)
- pushl $(~vector+0x80) /* Note: always in signed byte range */
- vector=vector+1
- jmp common_spurious
- .align 8
- .endr
-SYM_CODE_END(spurious_entries_start)
-
SYM_CODE_START_LOCAL(common_spurious)
ASM_CLAC
- addl $-0x80, (%esp) /* Adjust vector into the [-256, -1] range */
SAVE_ALL switch_stacks=1
ENCODE_FRAME_POINTER
TRACE_IRQS_OFF
movl %esp, %eax
+ movl PT_ORIG_EAX(%esp), %edx /* get the vector from stack */
+ movl $-1, PT_ORIG_EAX(%esp) /* no syscall to restart */
call smp_spurious_interrupt
jmp ret_from_intr
SYM_CODE_END(common_spurious)
@@ -1261,12 +1236,12 @@ SYM_CODE_END(common_spurious)
.p2align CONFIG_X86_L1_CACHE_SHIFT
SYM_CODE_START_LOCAL(common_interrupt)
ASM_CLAC
- addl $-0x80, (%esp) /* Adjust vector into the [-256, -1] range */
-
SAVE_ALL switch_stacks=1
ENCODE_FRAME_POINTER
TRACE_IRQS_OFF
movl %esp, %eax
+ movl PT_ORIG_EAX(%esp), %edx /* get the vector from stack */
+ movl $-1, PT_ORIG_EAX(%esp) /* no syscall to restart */
call do_IRQ
jmp ret_from_intr
SYM_CODE_END(common_interrupt)
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -359,34 +359,6 @@ SYM_CODE_START(ret_from_fork)
SYM_CODE_END(ret_from_fork)
.popsection
-/*
- * Build the entry stubs with some assembler magic.
- * We pack 1 stub into every 8-byte block.
- */
- .align 8
-SYM_CODE_START(irq_entries_start)
- vector=FIRST_EXTERNAL_VECTOR
- .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
- UNWIND_HINT_IRET_REGS
- pushq $(~vector+0x80) /* Note: always in signed byte range */
- jmp common_interrupt
- .align 8
- vector=vector+1
- .endr
-SYM_CODE_END(irq_entries_start)
-
- .align 8
-SYM_CODE_START(spurious_entries_start)
- vector=FIRST_SYSTEM_VECTOR
- .rept (NR_VECTORS - FIRST_SYSTEM_VECTOR)
- UNWIND_HINT_IRET_REGS
- pushq $(~vector+0x80) /* Note: always in signed byte range */
- jmp common_spurious
- .align 8
- vector=vector+1
- .endr
-SYM_CODE_END(spurious_entries_start)
-
.macro DEBUG_ENTRY_ASSERT_IRQS_OFF
#ifdef CONFIG_DEBUG_ENTRY
pushq %rax
@@ -756,13 +728,14 @@ SYM_CODE_END(interrupt_entry)
/* Interrupt entry/exit. */
/*
- * The interrupt stubs push (~vector+0x80) onto the stack and
+ * The interrupt stubs push vector onto the stack and
* then jump to common_spurious/interrupt.
*/
SYM_CODE_START_LOCAL(common_spurious)
- addq $-0x80, (%rsp) /* Adjust vector to [-256, -1] range */
call interrupt_entry
UNWIND_HINT_REGS indirect=1
+ movq ORIG_RAX(%rdi), %rsi /* get vector from stack */
+ movq $-1, ORIG_RAX(%rdi) /* no syscall to restart */
call smp_spurious_interrupt /* rdi points to pt_regs */
jmp ret_from_intr
SYM_CODE_END(common_spurious)
@@ -771,10 +744,11 @@ SYM_CODE_END(common_spurious)
/* common_interrupt is a hotpath. Align it */
.p2align CONFIG_X86_L1_CACHE_SHIFT
SYM_CODE_START_LOCAL(common_interrupt)
- addq $-0x80, (%rsp) /* Adjust vector to [-256, -1] range */
call interrupt_entry
UNWIND_HINT_REGS indirect=1
- call do_IRQ /* rdi points to pt_regs */
+ movq ORIG_RAX(%rdi), %rsi /* get vector from stack */
+ movq $-1, ORIG_RAX(%rdi) /* no syscall to restart */
+ call do_IRQ /* rdi points to pt_regs */
/* 0(%rsp): old RSP */
ret_from_intr:
DISABLE_INTERRUPTS(CLBR_ANY)
@@ -1023,7 +997,7 @@ apicinterrupt RESCHEDULE_VECTOR resche
#endif
apicinterrupt ERROR_APIC_VECTOR error_interrupt smp_error_interrupt
-apicinterrupt SPURIOUS_APIC_VECTOR spurious_interrupt smp_spurious_interrupt
+apicinterrupt SPURIOUS_APIC_VECTOR spurious_apic_interrupt smp_spurious_apic_interrupt
#ifdef CONFIG_IRQ_WORK
apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -35,7 +35,7 @@ BUILD_INTERRUPT(kvm_posted_intr_nested_i
BUILD_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR)
BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR)
-BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
+BUILD_INTERRUPT(spurious_apic_interrupt,SPURIOUS_APIC_VECTOR)
BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR)
#ifdef CONFIG_IRQ_WORK
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -39,6 +39,7 @@ extern asmlinkage void irq_work_interrup
extern asmlinkage void uv_bau_message_intr1(void);
extern asmlinkage void spurious_interrupt(void);
+extern asmlinkage void spurious_apic_interrupt(void);
extern asmlinkage void thermal_interrupt(void);
extern asmlinkage void reschedule_interrupt(void);
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -336,6 +336,46 @@ static __always_inline void __##func(str
#define DECLARE_IDTENTRY_XEN(vector, func) \
idtentry vector asm_exc_xen##func exc_##func has_error_code=0
+/*
+ * ASM code to emit the common vector entry stubs where each stub is
+ * packed into 8 bytes.
+ *
+ * Note, that the 'pushq imm8' is emitted via '.byte 0x6a, vector' because
+ * GCC treats the local vector variable as unsigned int and would expand
+ * all vectors above 0x7F to a 5 byte push. The original code did an
+ * adjustment of the vector number to be in the signed byte range to avoid
+ * this. While clever it's mindboggling counterintuitive and requires the
+ * odd conversion back to a real vector number in the C entry points. Using
+ * .byte achieves the same thing and the only fixup needed in the C entry
+ * point is to mask off the bits above bit 7 because the push is sign
+ * extending.
+ */
+ .align 8
+SYM_CODE_START(irq_entries_start)
+ vector=FIRST_EXTERNAL_VECTOR
+ .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
+ UNWIND_HINT_IRET_REGS
+ .byte 0x6a, vector
+ jmp common_interrupt
+ .align 8
+ vector=vector+1
+ .endr
+SYM_CODE_END(irq_entries_start)
+
+#ifdef CONFIG_X86_LOCAL_APIC
+ .align 8
+SYM_CODE_START(spurious_entries_start)
+ vector=FIRST_SYSTEM_VECTOR
+ .rept (NR_VECTORS - FIRST_SYSTEM_VECTOR)
+ UNWIND_HINT_IRET_REGS
+ .byte 0x6a, vector
+ jmp common_spurious
+ .align 8
+ vector=vector+1
+ .endr
+SYM_CODE_END(spurious_entries_start)
+#endif
+
#endif /* __ASSEMBLY__ */
/*
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -36,7 +36,7 @@ extern void native_init_IRQ(void);
extern void handle_irq(struct irq_desc *desc, struct pt_regs *regs);
-extern __visible void do_IRQ(struct pt_regs *regs);
+extern __visible void do_IRQ(struct pt_regs *regs, unsigned long vector);
extern void init_ISA_irqs(void);
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -41,8 +41,9 @@ asmlinkage void smp_deferred_error_inter
#endif
void smp_apic_timer_interrupt(struct pt_regs *regs);
-void smp_spurious_interrupt(struct pt_regs *regs);
void smp_error_interrupt(struct pt_regs *regs);
+void smp_spurious_apic_interrupt(struct pt_regs *regs);
+void smp_spurious_interrupt(struct pt_regs *regs, unsigned long vector);
asmlinkage void smp_irq_move_cleanup_interrupt(void);
#ifdef CONFIG_VMAP_STACK
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2152,15 +2152,29 @@ void __init register_lapic_address(unsig
* Local APIC interrupts
*/
-/*
- * This interrupt should _never_ happen with our APIC/SMP architecture
+/**
+ * smp_spurious_interrupt - Catch all for interrupts raised on unused vectors
+ * @regs: Pointer to pt_regs on stack
+ * @error_code: The vector number is in the lower 8 bits
+ *
+ * This is invoked from ASM entry code to catch all interrupts which
+ * trigger on an entry which is routed to the common_spurious idtentry
+ * point.
+ *
+ * Also called from smp_spurious_apic_interrupt().
*/
-__visible void __irq_entry smp_spurious_interrupt(struct pt_regs *regs)
+__visible void __irq_entry smp_spurious_interrupt(struct pt_regs *regs,
+ unsigned long vector)
{
- u8 vector = ~regs->orig_ax;
u32 v;
entering_irq();
+ /*
+ * The push in the entry ASM code which stores the vector number on
+ * the stack in the error code slot is sign expanding. Just use the
+ * lower 8 bits.
+ */
+ vector &= 0xFF;
trace_spurious_apic_entry(vector);
inc_irq_stat(irq_spurious_count);
@@ -2181,11 +2195,11 @@ void __init register_lapic_address(unsig
*/
v = apic_read(APIC_ISR + ((vector & ~0x1f) >> 1));
if (v & (1 << (vector & 0x1f))) {
- pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Acked\n",
+ pr_info("Spurious interrupt (vector 0x%02lx) on CPU#%d. Acked\n",
vector, smp_processor_id());
ack_APIC_irq();
} else {
- pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Not pending!\n",
+ pr_info("Spurious interrupt (vector 0x%02lx) on CPU#%d. Not pending!\n",
vector, smp_processor_id());
}
out:
@@ -2193,6 +2207,11 @@ void __init register_lapic_address(unsig
exiting_irq();
}
+__visible void smp_spurious_apic_interrupt(struct pt_regs *regs)
+{
+ smp_spurious_interrupt(regs, SPURIOUS_APIC_VECTOR);
+}
+
/*
* This interrupt should never happen with our APIC/SMP architecture
*/
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -142,7 +142,7 @@ static const __initconst struct idt_data
#ifdef CONFIG_X86_UV
INTG(UV_BAU_MESSAGE, uv_bau_message_intr1),
#endif
- INTG(SPURIOUS_APIC_VECTOR, spurious_interrupt),
+ INTG(SPURIOUS_APIC_VECTOR, spurious_apic_interrupt),
INTG(ERROR_APIC_VECTOR, error_interrupt),
#endif
};
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -230,14 +230,18 @@ u64 arch_irq_stat(void)
* SMP cross-CPU interrupts have their own specific
* handlers).
*/
-__visible void __irq_entry do_IRQ(struct pt_regs *regs)
+__visible void __irq_entry do_IRQ(struct pt_regs *regs, unsigned long vector)
{
struct pt_regs *old_regs = set_irq_regs(regs);
- struct irq_desc * desc;
- /* high bit used in ret_from_ code */
- unsigned vector = ~regs->orig_ax;
+ struct irq_desc *desc;
entering_irq();
+ /*
+ * The push in the entry ASM code which stores the vector number on
+ * the stack in the error code slot is sign expanding. Just use the
+ * lower 8 bits.
+ */
+ vector &= 0xFF;
/* entering_irq() tells RCU that we're not quiescent. Check it. */
RCU_LOCKDEP_WARN(!rcu_is_watching(), "IRQ failed to wake up RCU");
@@ -252,7 +256,7 @@ u64 arch_irq_stat(void)
ack_APIC_irq();
if (desc == VECTOR_UNUSED) {
- pr_emerg_ratelimited("%s: %d.%d No irq handler for vector\n",
+ pr_emerg_ratelimited("%s: %d.%lu No irq handler for vector\n",
__func__, smp_processor_id(),
vector);
} else {
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [patch V4 part 5 13/31] x86/irq: Convey vector as argument and not in ptregs
2020-05-05 13:53 ` [patch V4 part 5 13/31] x86/irq: Convey vector as argument and not in ptregs Thomas Gleixner
@ 2020-05-10 2:44 ` Lai Jiangshan
2020-05-11 14:35 ` Thomas Gleixner
0 siblings, 1 reply; 49+ messages in thread
From: Lai Jiangshan @ 2020-05-10 2:44 UTC (permalink / raw)
To: Thomas Gleixner
Cc: LKML, x86, Paul E. McKenney, Andy Lutomirski, Alexandre Chartre,
Frederic Weisbecker, Paolo Bonzini, Sean Christopherson,
Masami Hiramatsu, Petr Mladek, Steven Rostedt, Joel Fernandes,
Boris Ostrovsky, Juergen Gross, Brian Gerst, Mathieu Desnoyers,
Josh Poimboeuf, Will Deacon
On Tue, May 5, 2020 at 10:23 PM Thomas Gleixner <tglx@linutronix.de> wrote:
> +/*
> + * ASM code to emit the common vector entry stubs where each stub is
> + * packed into 8 bytes.
> + *
> + * Note, that the 'pushq imm8' is emitted via '.byte 0x6a, vector' because
> + * GCC treats the local vector variable as unsigned int and would expand
> + * all vectors above 0x7F to a 5 byte push. The original code did an
> + * adjustment of the vector number to be in the signed byte range to avoid
> + * this. While clever it's mindboggling counterintuitive and requires the
> + * odd conversion back to a real vector number in the C entry points. Using
> + * .byte achieves the same thing and the only fixup needed in the C entry
> + * point is to mask off the bits above bit 7 because the push is sign
> + * extending.
> + */
> + .align 8
> +SYM_CODE_START(irq_entries_start)
> + vector=FIRST_EXTERNAL_VECTOR
> + .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
> + UNWIND_HINT_IRET_REGS
> + .byte 0x6a, vector
> + jmp common_interrupt
> + .align 8
> + vector=vector+1
> + .endr
> +SYM_CODE_END(irq_entries_start)
Hello, tglx
Using ".byte 0x6a, vector" is somewhat ugly.
I hope it should be " pushq $(s8_to_s64(vector))", which can also
help to reduce bunches of comments about ".byte 0x6a, vector".
However, I don't know how to implement s8_to_s64() here. But at
least the following code works (generates the same two-byte machine
code as ".byte 0x6a, vector" does):
.if vector < 128
pushq $(vector)
.else
pushq $(0xffffffffffffff00+vector)
.endif
Thanks,
Lai
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [patch V4 part 5 13/31] x86/irq: Convey vector as argument and not in ptregs
2020-05-10 2:44 ` Lai Jiangshan
@ 2020-05-11 14:35 ` Thomas Gleixner
2020-05-11 15:11 ` Lai Jiangshan
0 siblings, 1 reply; 49+ messages in thread
From: Thomas Gleixner @ 2020-05-11 14:35 UTC (permalink / raw)
To: Lai Jiangshan
Cc: LKML, x86, Paul E. McKenney, Andy Lutomirski, Alexandre Chartre,
Frederic Weisbecker, Paolo Bonzini, Sean Christopherson,
Masami Hiramatsu, Petr Mladek, Steven Rostedt, Joel Fernandes,
Boris Ostrovsky, Juergen Gross, Brian Gerst, Mathieu Desnoyers,
Josh Poimboeuf, Will Deacon
Lai,
Lai Jiangshan <jiangshanlai+lkml@gmail.com> writes:
> On Tue, May 5, 2020 at 10:23 PM Thomas Gleixner <tglx@linutronix.de> wrote:
>> +SYM_CODE_START(irq_entries_start)
>> + vector=FIRST_EXTERNAL_VECTOR
>> + .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
>> + UNWIND_HINT_IRET_REGS
>> + .byte 0x6a, vector
>> + jmp common_interrupt
>> + .align 8
>> + vector=vector+1
>> + .endr
>> +SYM_CODE_END(irq_entries_start)
>
> Using ".byte 0x6a, vector" is somewhat ugly.
>
> I hope it should be " pushq $(s8_to_s64(vector))", which can also
> help to reduce bunches of comments about ".byte 0x6a, vector".
>
> However, I don't know how to implement s8_to_s64() here.
Neither do I.
> But at least the following code works (generates the same two-byte
> machine code as ".byte 0x6a, vector" does):
>
> .if vector < 128
> pushq $(vector)
> .else
> pushq $(0xffffffffffffff00+vector)
> .endif
Only slightly less ugly and needs as much commentry as the above.
Thanks,
tglx
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [patch V4 part 5 13/31] x86/irq: Convey vector as argument and not in ptregs
2020-05-11 14:35 ` Thomas Gleixner
@ 2020-05-11 15:11 ` Lai Jiangshan
0 siblings, 0 replies; 49+ messages in thread
From: Lai Jiangshan @ 2020-05-11 15:11 UTC (permalink / raw)
To: Thomas Gleixner
Cc: LKML, x86, Paul E. McKenney, Andy Lutomirski, Alexandre Chartre,
Frederic Weisbecker, Paolo Bonzini, Sean Christopherson,
Masami Hiramatsu, Petr Mladek, Steven Rostedt, Joel Fernandes,
Boris Ostrovsky, Juergen Gross, Brian Gerst, Mathieu Desnoyers,
Josh Poimboeuf, Will Deacon
Hello
On Mon, May 11, 2020 at 10:35 PM Thomas Gleixner <tglx@linutronix.de> wrote:
>
> Lai,
>
> Lai Jiangshan <jiangshanlai+lkml@gmail.com> writes:
> > On Tue, May 5, 2020 at 10:23 PM Thomas Gleixner <tglx@linutronix.de> wrote:
> >> +SYM_CODE_START(irq_entries_start)
> >> + vector=FIRST_EXTERNAL_VECTOR
> >> + .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
> >> + UNWIND_HINT_IRET_REGS
> >> + .byte 0x6a, vector
> >> + jmp common_interrupt
> >> + .align 8
> >> + vector=vector+1
> >> + .endr
> >> +SYM_CODE_END(irq_entries_start)
> >
> > Using ".byte 0x6a, vector" is somewhat ugly.
> >
> > I hope it should be " pushq $(s8_to_s64(vector))", which can also
> > help to reduce bunches of comments about ".byte 0x6a, vector".
> >
> > However, I don't know how to implement s8_to_s64() here.
>
> Neither do I.
>
> > But at least the following code works (generates the same two-byte
> > machine code as ".byte 0x6a, vector" does):
> >
> > .if vector < 128
> > pushq $(vector)
> > .else
> > pushq $(0xffffffffffffff00+vector)
> > .endif
>
> Only slightly less ugly and needs as much commentry as the above.
Agree.
Just FYI, I tried this later, it can work.
#define S8_TO_S64(vector) ((vector>>7)*0xffffffffffffff00+vector)
Thanks
Lai
^ permalink raw reply [flat|nested] 49+ messages in thread
* [patch V4 part 5 14/31] x86/irq/64: Provide handle_irq()
2020-05-05 13:53 [patch V4 part 5 00/31] x86/entry: Entry/exception code rework, Thomas Gleixner
` (12 preceding siblings ...)
2020-05-05 13:53 ` [patch V4 part 5 13/31] x86/irq: Convey vector as argument and not in ptregs Thomas Gleixner
@ 2020-05-05 13:53 ` Thomas Gleixner
2020-05-05 13:53 ` [patch V4 part 5 15/31] x86/entry: Add IRQENTRY_IRQ macro Thomas Gleixner
` (16 subsequent siblings)
30 siblings, 0 replies; 49+ messages in thread
From: Thomas Gleixner @ 2020-05-05 13:53 UTC (permalink / raw)
To: LKML
Cc: x86, Paul E. McKenney, Andy Lutomirski, Alexandre Chartre,
Frederic Weisbecker, Paolo Bonzini, Sean Christopherson,
Masami Hiramatsu, Petr Mladek, Steven Rostedt, Joel Fernandes,
Boris Ostrovsky, Juergen Gross, Brian Gerst, Mathieu Desnoyers,
Josh Poimboeuf, Will Deacon
To consolidate the interrupt entry/exit code vs. the other exceptions
provide handle_irq() (similar to 32bit) to move the interrupt stack
switching to C code. That allows to consolidate the entry exit handling by
reusing the idtentry machinery both in ASM and C.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/x86/kernel/irq_64.c | 42 ++++++++++++++++++++++++++++++++++++++++++
1 file changed, 42 insertions(+)
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -72,6 +72,48 @@ int irq_init_percpu_irqstack(unsigned in
return map_irq_stack(cpu);
}
+static noinstr void handle_irq_on_irqstack(struct irq_desc *desc)
+{
+ unsigned long tos;
+
+ tos = (unsigned long) __this_cpu_read(hardirq_stack_ptr);
+ tos -= 8;
+ /*
+ * The unwinder requires that the top of the IRQ stack links back
+ * to the previous stack and RBP is set up.
+ */
+ asm volatile(
+ "pushq %%rbp \n"
+ "movq %%rsp, %%rbp \n"
+ "movq %%rsp, (%[ts]) \n"
+ "movq %[ts], %%rsp \n"
+ "1: \n"
+ " .pushsection .discard.instr_begin \n"
+ " .long 1b - . \n"
+ " .popsection \n"
+ CALL_NOSPEC
+ "2: \n"
+ " .pushsection .discard.instr_end \n"
+ " .long 2b - . \n"
+ " .popsection \n"
+ "popq %%rsp \n"
+ "leaveq \n"
+ :
+ : [ts] "r" (tos),
+ [thunk_target] "r" (desc->handle_irq),
+ "D" (desc)
+ : "memory"
+ );
+}
+
+void handle_irq(struct irq_desc *desc, struct pt_regs *regs)
+{
+ if (!irq_needs_irq_stack(regs))
+ generic_handle_irq_desc(desc);
+ else
+ handle_irq_on_irqstack(desc);
+}
+
noinstr void do_softirq_own_stack(void)
{
if (irqstack_active()) {
^ permalink raw reply [flat|nested] 49+ messages in thread
* [patch V4 part 5 15/31] x86/entry: Add IRQENTRY_IRQ macro
2020-05-05 13:53 [patch V4 part 5 00/31] x86/entry: Entry/exception code rework, Thomas Gleixner
` (13 preceding siblings ...)
2020-05-05 13:53 ` [patch V4 part 5 14/31] x86/irq/64: Provide handle_irq() Thomas Gleixner
@ 2020-05-05 13:53 ` Thomas Gleixner
2020-05-05 13:53 ` [patch V4 part 5 16/31] x86/entry: Use idtentry for interrupts Thomas Gleixner
` (15 subsequent siblings)
30 siblings, 0 replies; 49+ messages in thread
From: Thomas Gleixner @ 2020-05-05 13:53 UTC (permalink / raw)
To: LKML
Cc: x86, Paul E. McKenney, Andy Lutomirski, Alexandre Chartre,
Frederic Weisbecker, Paolo Bonzini, Sean Christopherson,
Masami Hiramatsu, Petr Mladek, Steven Rostedt, Joel Fernandes,
Boris Ostrovsky, Juergen Gross, Brian Gerst, Mathieu Desnoyers,
Josh Poimboeuf, Will Deacon
Provide a seperate IDTENTRY macro for device interrupts. Similar to
IDTENTRY_ERRORCODE with the addition of invoking irq_enter/exit_rcu() and
providing the errorcode as a 'u8' argument to the C function, which
truncates the sign extended vector number.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/x86/entry/entry_32.S | 14 +++++++++++
arch/x86/entry/entry_64.S | 14 +++++++++++
arch/x86/include/asm/idtentry.h | 47 ++++++++++++++++++++++++++++++++++++++++
3 files changed, 75 insertions(+)
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -751,6 +751,20 @@ SYM_CODE_START(\asmsym)
SYM_CODE_END(\asmsym)
.endm
+.macro idtentry_irq vector cfunc
+ .p2align CONFIG_X86_L1_CACHE_SHIFT
+SYM_CODE_START_LOCAL(asm_\cfunc)
+ ASM_CLAC
+ SAVE_ALL switch_stacks=1
+ ENCODE_FRAME_POINTER
+ movl %esp, %eax
+ movl PT_ORIG_EAX(%esp), %edx /* get the vector from stack */
+ movl $-1, PT_ORIG_EAX(%esp) /* no syscall to restart */
+ call \cfunc
+ jmp handle_exception_return
+SYM_CODE_END(asm_\cfunc)
+.endm
+
/*
* Include the defines which emit the idt entries which are shared
* shared between 32 and 64 bit.
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -529,6 +529,20 @@ SYM_CODE_END(\asmsym)
.endm
/*
+ * Interrupt entry/exit.
+ *
+ + The interrupt stubs push (vector) onto the stack, which is the error_code
+ * position of idtentry exceptions, and jump to one of the two idtentry points
+ * (common/spurious).
+ *
+ * common_interrupt is a hotpath, align it to a cache line
+ */
+.macro idtentry_irq vector cfunc
+ .p2align CONFIG_X86_L1_CACHE_SHIFT
+ idtentry \vector asm_\cfunc \cfunc has_error_code=1
+.endm
+
+/*
* MCE and DB exceptions
*/
#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + (x) * 8)
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -163,6 +163,49 @@ static __always_inline void __##func(str
#define DEFINE_IDTENTRY_RAW_ERRORCODE(func) \
__visible noinstr void func(struct pt_regs *regs, unsigned long error_code)
+/**
+ * DECLARE_IDTENTRY_IRQ - Declare functions for device interrupt IDT entry
+ * points (common/spurious)
+ * @vector: Vector number (ignored for C)
+ * @func: Function name of the entry point
+ *
+ * Maps to DECLARE_IDTENTRY_ERRORCODE()
+ */
+#define DECLARE_IDTENTRY_IRQ(vector, func) \
+ DECLARE_IDTENTRY_ERRORCODE(vector, func)
+
+/**
+ * DEFINE_IDTENTRY_IRQ - Emit code for device interrupt IDT entry points
+ * @func: Function name of the entry point
+ *
+ * The vector number is pushed by the low level entry stub and handed
+ * to the function as error_code argument which needs to be truncated
+ * to an u8 because the push is sign extending.
+ *
+ * On 64bit dtentry_enter/exit() are invoked in the ASM entry code before
+ * and after switching to the interrupt stack. On 32bit this happens in C.
+ *
+ * irq_enter/exit_rcu() are invoked before the function body and the
+ * KVM L1D flush request is set.
+ */
+#define DEFINE_IDTENTRY_IRQ(func) \
+static __always_inline void __##func(struct pt_regs *regs, u8 vector); \
+ \
+__visible noinstr void func(struct pt_regs *regs, \
+ unsigned long error_code) \
+{ \
+ idtentry_enter(regs); \
+ instr_begin(); \
+ irq_enter_rcu(); \
+ kvm_set_cpu_l1tf_flush_l1d(); \
+ __##func (regs, (u8)error_code); \
+ irq_exit_rcu(); \
+ lockdep_hardirq_exit(); \
+ instr_end(); \
+ idtentry_exit(regs); \
+} \
+ \
+static __always_inline void __##func(struct pt_regs *regs, u8 vector)
#ifdef CONFIG_X86_64
/**
@@ -307,6 +350,10 @@ static __always_inline void __##func(str
#define DECLARE_IDTENTRY_RAW_ERRORCODE(vector, func) \
DECLARE_IDTENTRY_ERRORCODE(vector, func)
+/* Entries for common/spurious (device) interrupts */
+#define DECLARE_IDTENTRY_IRQ(vector, func) \
+ idtentry_irq vector func
+
#ifdef CONFIG_X86_64
# define DECLARE_IDTENTRY_MCE(vector, func) \
idtentry_mce_db vector asm_##func func
^ permalink raw reply [flat|nested] 49+ messages in thread
* [patch V4 part 5 16/31] x86/entry: Use idtentry for interrupts
2020-05-05 13:53 [patch V4 part 5 00/31] x86/entry: Entry/exception code rework, Thomas Gleixner
` (14 preceding siblings ...)
2020-05-05 13:53 ` [patch V4 part 5 15/31] x86/entry: Add IRQENTRY_IRQ macro Thomas Gleixner
@ 2020-05-05 13:53 ` Thomas Gleixner
2020-05-05 13:53 ` [patch V4 part 5 17/31] x86/entry: Provide IDTENTRY_SYSVEC Thomas Gleixner
` (14 subsequent siblings)
30 siblings, 0 replies; 49+ messages in thread
From: Thomas Gleixner @ 2020-05-05 13:53 UTC (permalink / raw)
To: LKML
Cc: x86, Paul E. McKenney, Andy Lutomirski, Alexandre Chartre,
Frederic Weisbecker, Paolo Bonzini, Sean Christopherson,
Masami Hiramatsu, Petr Mladek, Steven Rostedt, Joel Fernandes,
Boris Ostrovsky, Juergen Gross, Brian Gerst, Mathieu Desnoyers,
Josh Poimboeuf, Will Deacon
Replace the extra interrupt handling code and reuse the existing idtentry
machinery. This moves the irq stack switching on 64 bit from ASM to C code;
32bit already does the stack switching in C.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/x86/entry/entry_32.S | 31 -------------------------------
arch/x86/entry/entry_64.S | 31 +++----------------------------
arch/x86/include/asm/hw_irq.h | 1 -
arch/x86/include/asm/idtentry.h | 10 ++++++++--
arch/x86/include/asm/irq.h | 2 --
arch/x86/include/asm/traps.h | 1 -
arch/x86/kernel/apic/apic.c | 23 ++++++++---------------
arch/x86/kernel/apic/msi.c | 3 ++-
arch/x86/kernel/irq.c | 27 +++++++--------------------
9 files changed, 28 insertions(+), 101 deletions(-)
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -1229,37 +1229,6 @@ SYM_FUNC_END(entry_INT80_32)
#endif
.endm
-#ifdef CONFIG_X86_LOCAL_APIC
-SYM_CODE_START_LOCAL(common_spurious)
- ASM_CLAC
- SAVE_ALL switch_stacks=1
- ENCODE_FRAME_POINTER
- TRACE_IRQS_OFF
- movl %esp, %eax
- movl PT_ORIG_EAX(%esp), %edx /* get the vector from stack */
- movl $-1, PT_ORIG_EAX(%esp) /* no syscall to restart */
- call smp_spurious_interrupt
- jmp ret_from_intr
-SYM_CODE_END(common_spurious)
-#endif
-
-/*
- * the CPU automatically disables interrupts when executing an IRQ vector,
- * so IRQ-flags tracing has to follow that:
- */
- .p2align CONFIG_X86_L1_CACHE_SHIFT
-SYM_CODE_START_LOCAL(common_interrupt)
- ASM_CLAC
- SAVE_ALL switch_stacks=1
- ENCODE_FRAME_POINTER
- TRACE_IRQS_OFF
- movl %esp, %eax
- movl PT_ORIG_EAX(%esp), %edx /* get the vector from stack */
- movl $-1, PT_ORIG_EAX(%esp) /* no syscall to restart */
- call do_IRQ
- jmp ret_from_intr
-SYM_CODE_END(common_interrupt)
-
#define BUILD_INTERRUPT3(name, nr, fn) \
SYM_FUNC_START(name) \
ASM_CLAC; \
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -738,32 +738,7 @@ SYM_CODE_START(interrupt_entry)
SYM_CODE_END(interrupt_entry)
_ASM_NOKPROBE(interrupt_entry)
-
-/* Interrupt entry/exit. */
-
-/*
- * The interrupt stubs push vector onto the stack and
- * then jump to common_spurious/interrupt.
- */
-SYM_CODE_START_LOCAL(common_spurious)
- call interrupt_entry
- UNWIND_HINT_REGS indirect=1
- movq ORIG_RAX(%rdi), %rsi /* get vector from stack */
- movq $-1, ORIG_RAX(%rdi) /* no syscall to restart */
- call smp_spurious_interrupt /* rdi points to pt_regs */
- jmp ret_from_intr
-SYM_CODE_END(common_spurious)
-_ASM_NOKPROBE(common_spurious)
-
-/* common_interrupt is a hotpath. Align it */
- .p2align CONFIG_X86_L1_CACHE_SHIFT
-SYM_CODE_START_LOCAL(common_interrupt)
- call interrupt_entry
- UNWIND_HINT_REGS indirect=1
- movq ORIG_RAX(%rdi), %rsi /* get vector from stack */
- movq $-1, ORIG_RAX(%rdi) /* no syscall to restart */
- call do_IRQ /* rdi points to pt_regs */
- /* 0(%rsp): old RSP */
+SYM_CODE_START_LOCAL(common_interrupt_return)
ret_from_intr:
DISABLE_INTERRUPTS(CLBR_ANY)
TRACE_IRQS_OFF
@@ -945,8 +920,8 @@ SYM_INNER_LABEL(native_irq_return_iret,
*/
jmp native_irq_return_iret
#endif
-SYM_CODE_END(common_interrupt)
-_ASM_NOKPROBE(common_interrupt)
+SYM_CODE_END(common_interrupt_return)
+_ASM_NOKPROBE(common_interrupt_return)
/*
* APIC interrupts.
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -38,7 +38,6 @@ extern asmlinkage void error_interrupt(v
extern asmlinkage void irq_work_interrupt(void);
extern asmlinkage void uv_bau_message_intr1(void);
-extern asmlinkage void spurious_interrupt(void);
extern asmlinkage void spurious_apic_interrupt(void);
extern asmlinkage void thermal_interrupt(void);
extern asmlinkage void reschedule_interrupt(void);
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -403,7 +403,7 @@ SYM_CODE_START(irq_entries_start)
.rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
UNWIND_HINT_IRET_REGS
.byte 0x6a, vector
- jmp common_interrupt
+ jmp asm_common_interrupt
.align 8
vector=vector+1
.endr
@@ -416,7 +416,7 @@ SYM_CODE_START(spurious_entries_start)
.rept (NR_VECTORS - FIRST_SYSTEM_VECTOR)
UNWIND_HINT_IRET_REGS
.byte 0x6a, vector
- jmp common_spurious
+ jmp asm_spurious_interrupt
.align 8
vector=vector+1
.endr
@@ -487,6 +487,12 @@ DECLARE_IDTENTRY_DF(X86_TRAP_DF, exc_dou
DECLARE_IDTENTRY(X86_TRAP_OTHER, exc_xen_hypervisor_callback);
#endif
+/* Device interrupts common/spurious */
+DECLARE_IDTENTRY_IRQ(X86_TRAP_OTHER, common_interrupt);
+#ifdef CONFIG_X86_LOCAL_APIC
+DECLARE_IDTENTRY_IRQ(X86_TRAP_OTHER, spurious_interrupt);
+#endif
+
#undef X86_TRAP_OTHER
#endif
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -36,8 +36,6 @@ extern void native_init_IRQ(void);
extern void handle_irq(struct irq_desc *desc, struct pt_regs *regs);
-extern __visible void do_IRQ(struct pt_regs *regs, unsigned long vector);
-
extern void init_ISA_irqs(void);
extern void __init init_IRQ(void);
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -43,7 +43,6 @@ asmlinkage void smp_deferred_error_inter
void smp_apic_timer_interrupt(struct pt_regs *regs);
void smp_error_interrupt(struct pt_regs *regs);
void smp_spurious_apic_interrupt(struct pt_regs *regs);
-void smp_spurious_interrupt(struct pt_regs *regs, unsigned long vector);
asmlinkage void smp_irq_move_cleanup_interrupt(void);
#ifdef CONFIG_VMAP_STACK
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2153,9 +2153,9 @@ void __init register_lapic_address(unsig
*/
/**
- * smp_spurious_interrupt - Catch all for interrupts raised on unused vectors
+ * spurious_interrupt - Catch all for interrupts raised on unused vectors
* @regs: Pointer to pt_regs on stack
- * @error_code: The vector number is in the lower 8 bits
+ * @vector: The vector number
*
* This is invoked from ASM entry code to catch all interrupts which
* trigger on an entry which is routed to the common_spurious idtentry
@@ -2163,18 +2163,10 @@ void __init register_lapic_address(unsig
*
* Also called from smp_spurious_apic_interrupt().
*/
-__visible void __irq_entry smp_spurious_interrupt(struct pt_regs *regs,
- unsigned long vector)
+DEFINE_IDTENTRY_IRQ(spurious_interrupt)
{
u32 v;
- entering_irq();
- /*
- * The push in the entry ASM code which stores the vector number on
- * the stack in the error code slot is sign expanding. Just use the
- * lower 8 bits.
- */
- vector &= 0xFF;
trace_spurious_apic_entry(vector);
inc_irq_stat(irq_spurious_count);
@@ -2195,21 +2187,22 @@ void __init register_lapic_address(unsig
*/
v = apic_read(APIC_ISR + ((vector & ~0x1f) >> 1));
if (v & (1 << (vector & 0x1f))) {
- pr_info("Spurious interrupt (vector 0x%02lx) on CPU#%d. Acked\n",
+ pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Acked\n",
vector, smp_processor_id());
ack_APIC_irq();
} else {
- pr_info("Spurious interrupt (vector 0x%02lx) on CPU#%d. Not pending!\n",
+ pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Not pending!\n",
vector, smp_processor_id());
}
out:
trace_spurious_apic_exit(vector);
- exiting_irq();
}
__visible void smp_spurious_apic_interrupt(struct pt_regs *regs)
{
- smp_spurious_interrupt(regs, SPURIOUS_APIC_VECTOR);
+ entering_irq();
+ __spurious_interrupt(regs, SPURIOUS_APIC_VECTOR);
+ exiting_irq();
}
/*
--- a/arch/x86/kernel/apic/msi.c
+++ b/arch/x86/kernel/apic/msi.c
@@ -115,7 +115,8 @@ msi_set_affinity(struct irq_data *irqd,
* denote it as spurious which is no harm as this is a rare event
* and interrupt handlers have to cope with spurious interrupts
* anyway. If the vector is unused, then it is marked so it won't
- * trigger the 'No irq handler for vector' warning in do_IRQ().
+ * trigger the 'No irq handler for vector' warning in
+ * common_interrupt().
*
* This requires to hold vector lock to prevent concurrent updates to
* the affected vector.
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -19,6 +19,7 @@
#include <asm/mce.h>
#include <asm/hw_irq.h>
#include <asm/desc.h>
+#include <asm/traps.h>
#define CREATE_TRACE_POINTS
#include <asm/trace/irq_vectors.h>
@@ -226,37 +227,25 @@ u64 arch_irq_stat(void)
/*
- * do_IRQ handles all normal device IRQ's (the special
- * SMP cross-CPU interrupts have their own specific
- * handlers).
+ * common_interrupt() handles all normal device IRQ's (the special SMP
+ * cross-CPU interrupts have their own entry points).
*/
-__visible void __irq_entry do_IRQ(struct pt_regs *regs, unsigned long vector)
+DEFINE_IDTENTRY_IRQ(common_interrupt)
{
struct pt_regs *old_regs = set_irq_regs(regs);
struct irq_desc *desc;
- entering_irq();
- /*
- * The push in the entry ASM code which stores the vector number on
- * the stack in the error code slot is sign expanding. Just use the
- * lower 8 bits.
- */
- vector &= 0xFF;
-
- /* entering_irq() tells RCU that we're not quiescent. Check it. */
+ /* entry code tells RCU that we're not quiescent. Check it. */
RCU_LOCKDEP_WARN(!rcu_is_watching(), "IRQ failed to wake up RCU");
desc = __this_cpu_read(vector_irq[vector]);
if (likely(!IS_ERR_OR_NULL(desc))) {
- if (IS_ENABLED(CONFIG_X86_32))
- handle_irq(desc, regs);
- else
- generic_handle_irq_desc(desc);
+ handle_irq(desc, regs);
} else {
ack_APIC_irq();
if (desc == VECTOR_UNUSED) {
- pr_emerg_ratelimited("%s: %d.%lu No irq handler for vector\n",
+ pr_emerg_ratelimited("%s: %d.%u No irq handler for vector\n",
__func__, smp_processor_id(),
vector);
} else {
@@ -264,8 +253,6 @@ u64 arch_irq_stat(void)
}
}
- exiting_irq();
-
set_irq_regs(old_regs);
}
^ permalink raw reply [flat|nested] 49+ messages in thread
* [patch V4 part 5 17/31] x86/entry: Provide IDTENTRY_SYSVEC
2020-05-05 13:53 [patch V4 part 5 00/31] x86/entry: Entry/exception code rework, Thomas Gleixner
` (15 preceding siblings ...)
2020-05-05 13:53 ` [patch V4 part 5 16/31] x86/entry: Use idtentry for interrupts Thomas Gleixner
@ 2020-05-05 13:53 ` Thomas Gleixner
2020-05-05 13:53 ` [patch V4 part 5 18/31] x86/entry: Convert APIC interrupts to IDTENTRY_SYSVEC Thomas Gleixner
` (13 subsequent siblings)
30 siblings, 0 replies; 49+ messages in thread
From: Thomas Gleixner @ 2020-05-05 13:53 UTC (permalink / raw)
To: LKML
Cc: x86, Paul E. McKenney, Andy Lutomirski, Alexandre Chartre,
Frederic Weisbecker, Paolo Bonzini, Sean Christopherson,
Masami Hiramatsu, Petr Mladek, Steven Rostedt, Joel Fernandes,
Boris Ostrovsky, Juergen Gross, Brian Gerst, Mathieu Desnoyers,
Josh Poimboeuf, Will Deacon
Provide a IDTENTRY variant for system vectors to consolidate the different
mechanisms to emit the ASM stubs for 32 an 64 bit.
On 64bit this also moves the stack switching from ASM to C code. 32bit will
excute the system vectors w/o stack switching as before. As some of the
system vector handlers require access to pt_regs this requires a new stack
switching macro which can handle an argument.
The alternative solution would be to implement the set_irq_regs() dance
right in the entry macro, but most system vector handlers do not require
it, so avoid the overhead.
Provide the entry/exit handling as inline functions so the scheduler IPI
can use it to implement lightweight entry handling depending on trace point
enablement. This ensures that the code is consistent.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/x86/entry/entry_32.S | 4 ++
arch/x86/entry/entry_64.S | 8 ++++
arch/x86/include/asm/idtentry.h | 72 +++++++++++++++++++++++++++++++++++++++
arch/x86/include/asm/irq_stack.h | 33 +++++++++++++++++
4 files changed, 116 insertions(+), 1 deletion(-)
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -765,6 +765,10 @@ SYM_CODE_START_LOCAL(asm_\cfunc)
SYM_CODE_END(asm_\cfunc)
.endm
+.macro idtentry_sysvec vector cfunc
+ idtentry \vector asm_\cfunc \cfunc has_error_code=0
+.endm
+
/*
* Include the defines which emit the idt entries which are shared
* shared between 32 and 64 bit.
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -543,6 +543,14 @@ SYM_CODE_END(\asmsym)
.endm
/*
+ * System vectors which invoke their handlers directly and are not
+ * going through the regular common device interrupt handling code.
+ */
+.macro idtentry_sysvec vector cfunc
+ idtentry \vector asm_\cfunc \cfunc has_error_code=0
+.endm
+
+/*
* MCE and DB exceptions
*/
#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + (x) * 8)
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -6,6 +6,9 @@
#include <asm/trapnr.h>
#ifndef __ASSEMBLY__
+#include <linux/hardirq.h>
+
+#include <asm/irq_stack.h>
void idtentry_enter(struct pt_regs *regs);
void idtentry_exit(struct pt_regs *regs);
@@ -207,6 +210,71 @@ static __always_inline void __##func(str
\
static __always_inline void __##func(struct pt_regs *regs, u8 vector)
+/**
+ * DECLARE_IDTENTRY_SYSVEC - Declare functions for system vector entry points
+ * @vector: Vector number (ignored for C)
+ * @func: Function name of the entry point
+ *
+ * Declares three functions:
+ * - The ASM entry point: asm_##func
+ * - The XEN PV trap entry point: xen_##func (maybe unused)
+ * - The C handler called from the ASM entry point
+ *
+ * Maps to DECLARE_IDTENTRY().
+ */
+#define DECLARE_IDTENTRY_SYSVEC(vector, func) \
+ DECLARE_IDTENTRY(vector, func)
+
+
+static __always_inline void idtentry_sysvec_enter(struct pt_regs *regs)
+{
+ idtentry_enter(regs);
+ instr_begin();
+ irq_enter_rcu();
+ kvm_set_cpu_l1tf_flush_l1d();
+ instr_end();
+}
+
+static __always_inline void idtentry_sysvec_exit(struct pt_regs *regs)
+{
+ instr_begin();
+ irq_exit_rcu();
+ lockdep_hardirq_exit();
+ instr_end();
+ idtentry_exit(regs);
+}
+
+/**
+ * DEFINE_IDTENTRY_SYSVEC - Emit code for system vector IDT entry points
+ * @func: Function name of the entry point
+ *
+ * idtentry_enter/exit() and irq_enter/exit_rcu() are invoked before the
+ * function body. KVM L1D flush request is set.
+ *
+ * Runs the function on the interrupt stack if the entry hit kernel mode
+ */
+#define DEFINE_IDTENTRY_SYSVEC(func) \
+__visible void __##func(struct pt_regs *regs); \
+ \
+static noinstr void irqst_##func(struct pt_regs *regs) \
+{ \
+ RUN_ON_IRQSTACK_ARG1(__##func, regs); \
+} \
+ \
+__visible noinstr void func(struct pt_regs *regs) \
+{ \
+ idtentry_sysvec_enter(regs); \
+ instr_begin(); \
+ if (!irq_needs_irq_stack(regs)) \
+ __##func (regs); \
+ else \
+ irqst_##func(regs); \
+ instr_end(); \
+ idtentry_sysvec_exit(regs); \
+} \
+ \
+__visible void __##func(struct pt_regs *regs)
+
#ifdef CONFIG_X86_64
/**
* DECLARE_IDTENTRY_IST - Declare functions for IST handling IDT entry points
@@ -354,6 +422,10 @@ static __always_inline void __##func(str
#define DECLARE_IDTENTRY_IRQ(vector, func) \
idtentry_irq vector func
+/* System vector entries */
+#define DECLARE_IDTENTRY_SYSVEC(vector, func) \
+ idtentry_sysvec vector func
+
#ifdef CONFIG_X86_64
# define DECLARE_IDTENTRY_MCE(vector, func) \
idtentry_mce_db vector asm_##func func
--- a/arch/x86/include/asm/irq_stack.h
+++ b/arch/x86/include/asm/irq_stack.h
@@ -46,9 +46,40 @@ static __always_inline bool irqstack_act
__this_cpu_sub(irq_count, 1); \
}
+#define RUN_ON_IRQSTACK_ARG1(func, arg) { \
+ unsigned long tos; \
+ \
+ tos = ((unsigned long)__this_cpu_read(hardirq_stack_ptr)) - 8; \
+ \
+ __this_cpu_add(irq_count, 1); \
+ asm volatile( \
+ "pushq %%rbp \n" \
+ "movq %%rsp, %%rbp \n" \
+ "movq %%rsp, (%[ts]) \n" \
+ "movq %[ts], %%rsp \n" \
+ "1: \n" \
+ " .pushsection .discard.instr_begin \n" \
+ " .long 1b - . \n" \
+ " .popsection \n" \
+ "call " __ASM_FORM(func) " \n" \
+ "2: \n" \
+ " .pushsection .discard.instr_end \n" \
+ " .long 2b - . \n" \
+ " .popsection \n" \
+ "popq %%rsp \n" \
+ "leaveq \n" \
+ : \
+ : [ts] "r" (tos), \
+ "D" (arg) \
+ : "memory" \
+ ); \
+ __this_cpu_sub(irq_count, 1); \
+}
+
#else /* CONFIG_X86_64 */
static __always_inline bool irqstack_active(void) { return false; }
-#define RUN_ON_IRQSTACK(func) do { } while (0)
+#define RUN_ON_IRQSTACK(func) do { } while (0)
+#define RUN_ON_IRQSTACK_ARG1(func, arg) do { } while (0)
#endif /* !CONFIG_X86_64 */
static __always_inline bool irq_needs_irq_stack(struct pt_regs *regs)
^ permalink raw reply [flat|nested] 49+ messages in thread
* [patch V4 part 5 18/31] x86/entry: Convert APIC interrupts to IDTENTRY_SYSVEC
2020-05-05 13:53 [patch V4 part 5 00/31] x86/entry: Entry/exception code rework, Thomas Gleixner
` (16 preceding siblings ...)
2020-05-05 13:53 ` [patch V4 part 5 17/31] x86/entry: Provide IDTENTRY_SYSVEC Thomas Gleixner
@ 2020-05-05 13:53 ` Thomas Gleixner
2020-05-05 13:54 ` [patch V4 part 5 19/31] x86/entry: Convert SMP system vectors " Thomas Gleixner
` (12 subsequent siblings)
30 siblings, 0 replies; 49+ messages in thread
From: Thomas Gleixner @ 2020-05-05 13:53 UTC (permalink / raw)
To: LKML
Cc: x86, Paul E. McKenney, Andy Lutomirski, Alexandre Chartre,
Frederic Weisbecker, Paolo Bonzini, Sean Christopherson,
Masami Hiramatsu, Petr Mladek, Steven Rostedt, Joel Fernandes,
Boris Ostrovsky, Juergen Gross, Brian Gerst, Mathieu Desnoyers,
Josh Poimboeuf, Will Deacon
Convert APIC interrupts to IDTENTRY_SYSVEC
- Implement the C entry point with DEFINE_IDTENTRY_SYSVEC
- Emit the ASM stub with DECLARE_IDTENTRY_SYSVEC
- Remove the ASM idtentries in 64bit
- Remove the BUILD_INTERRUPT entries in 32bit
- Remove the old prototypes
No functional change.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/x86/entry/entry_64.S | 6 ------
arch/x86/include/asm/entry_arch.h | 5 -----
arch/x86/include/asm/hw_irq.h | 4 ----
arch/x86/include/asm/idtentry.h | 8 ++++++++
arch/x86/include/asm/irq.h | 1 -
arch/x86/include/asm/traps.h | 3 ---
arch/x86/kernel/apic/apic.c | 23 +++++------------------
arch/x86/kernel/idt.c | 8 ++++----
arch/x86/kernel/irq.c | 6 +++---
9 files changed, 20 insertions(+), 44 deletions(-)
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -966,9 +966,6 @@ apicinterrupt3 REBOOT_VECTOR reboot_in
apicinterrupt3 UV_BAU_MESSAGE uv_bau_message_intr1 uv_bau_message_interrupt
#endif
-apicinterrupt LOCAL_TIMER_VECTOR apic_timer_interrupt smp_apic_timer_interrupt
-apicinterrupt X86_PLATFORM_IPI_VECTOR x86_platform_ipi smp_x86_platform_ipi
-
#ifdef CONFIG_HAVE_KVM
apicinterrupt3 POSTED_INTR_VECTOR kvm_posted_intr_ipi smp_kvm_posted_intr_ipi
apicinterrupt3 POSTED_INTR_WAKEUP_VECTOR kvm_posted_intr_wakeup_ipi smp_kvm_posted_intr_wakeup_ipi
@@ -993,9 +990,6 @@ apicinterrupt CALL_FUNCTION_VECTOR call
apicinterrupt RESCHEDULE_VECTOR reschedule_interrupt smp_reschedule_interrupt
#endif
-apicinterrupt ERROR_APIC_VECTOR error_interrupt smp_error_interrupt
-apicinterrupt SPURIOUS_APIC_VECTOR spurious_apic_interrupt smp_spurious_apic_interrupt
-
#ifdef CONFIG_IRQ_WORK
apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
#endif
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -33,11 +33,6 @@ BUILD_INTERRUPT(kvm_posted_intr_nested_i
*/
#ifdef CONFIG_X86_LOCAL_APIC
-BUILD_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR)
-BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR)
-BUILD_INTERRUPT(spurious_apic_interrupt,SPURIOUS_APIC_VECTOR)
-BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR)
-
#ifdef CONFIG_IRQ_WORK
BUILD_INTERRUPT(irq_work_interrupt, IRQ_WORK_VECTOR)
#endif
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -29,16 +29,12 @@
#include <asm/sections.h>
/* Interrupt handlers registered during init_IRQ */
-extern asmlinkage void apic_timer_interrupt(void);
-extern asmlinkage void x86_platform_ipi(void);
extern asmlinkage void kvm_posted_intr_ipi(void);
extern asmlinkage void kvm_posted_intr_wakeup_ipi(void);
extern asmlinkage void kvm_posted_intr_nested_ipi(void);
-extern asmlinkage void error_interrupt(void);
extern asmlinkage void irq_work_interrupt(void);
extern asmlinkage void uv_bau_message_intr1(void);
-extern asmlinkage void spurious_apic_interrupt(void);
extern asmlinkage void thermal_interrupt(void);
extern asmlinkage void reschedule_interrupt(void);
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -565,6 +565,14 @@ DECLARE_IDTENTRY_IRQ(X86_TRAP_OTHER, com
DECLARE_IDTENTRY_IRQ(X86_TRAP_OTHER, spurious_interrupt);
#endif
+/* System vector entry points */
+#ifdef CONFIG_X86_LOCAL_APIC
+DECLARE_IDTENTRY_SYSVEC(ERROR_APIC_VECTOR, sysvec_error_interrupt);
+DECLARE_IDTENTRY_SYSVEC(SPURIOUS_APIC_VECTOR, sysvec_spurious_apic_interrupt);
+DECLARE_IDTENTRY_SYSVEC(LOCAL_TIMER_VECTOR, sysvec_apic_timer_interrupt);
+DECLARE_IDTENTRY_SYSVEC(X86_PLATFORM_IPI_VECTOR, sysvec_x86_platform_ipi);
+#endif
+
#undef X86_TRAP_OTHER
#endif
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -44,7 +44,6 @@ extern void __init init_IRQ(void);
void arch_trigger_cpumask_backtrace(const struct cpumask *mask,
bool exclude_self);
-extern __visible void smp_x86_platform_ipi(struct pt_regs *regs);
#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
#endif
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -40,9 +40,6 @@ asmlinkage void smp_threshold_interrupt(
asmlinkage void smp_deferred_error_interrupt(struct pt_regs *regs);
#endif
-void smp_apic_timer_interrupt(struct pt_regs *regs);
-void smp_error_interrupt(struct pt_regs *regs);
-void smp_spurious_apic_interrupt(struct pt_regs *regs);
asmlinkage void smp_irq_move_cleanup_interrupt(void);
#ifdef CONFIG_VMAP_STACK
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1121,23 +1121,14 @@ static void local_apic_timer_interrupt(v
* [ if a single-CPU system runs an SMP kernel then we call the local
* interrupt as well. Thus we cannot inline the local irq ... ]
*/
-__visible void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs)
+DEFINE_IDTENTRY_SYSVEC(sysvec_apic_timer_interrupt)
{
struct pt_regs *old_regs = set_irq_regs(regs);
- /*
- * NOTE! We'd better ACK the irq immediately,
- * because timer handling can be slow.
- *
- * update_process_times() expects us to have done irq_enter().
- * Besides, if we don't timer interrupts ignore the global
- * interrupt lock, which is the WrongThing (tm) to do.
- */
- entering_ack_irq();
+ ack_APIC_irq();
trace_local_timer_entry(LOCAL_TIMER_VECTOR);
local_apic_timer_interrupt();
trace_local_timer_exit(LOCAL_TIMER_VECTOR);
- exiting_irq();
set_irq_regs(old_regs);
}
@@ -2161,7 +2152,7 @@ void __init register_lapic_address(unsig
* trigger on an entry which is routed to the common_spurious idtentry
* point.
*
- * Also called from smp_spurious_apic_interrupt().
+ * Also called from sysvec_spurious_apic_interrupt().
*/
DEFINE_IDTENTRY_IRQ(spurious_interrupt)
{
@@ -2198,17 +2189,15 @@ DEFINE_IDTENTRY_IRQ(spurious_interrupt)
trace_spurious_apic_exit(vector);
}
-__visible void smp_spurious_apic_interrupt(struct pt_regs *regs)
+DEFINE_IDTENTRY_SYSVEC(sysvec_spurious_apic_interrupt)
{
- entering_irq();
__spurious_interrupt(regs, SPURIOUS_APIC_VECTOR);
- exiting_irq();
}
/*
* This interrupt should never happen with our APIC/SMP architecture
*/
-__visible void __irq_entry smp_error_interrupt(struct pt_regs *regs)
+DEFINE_IDTENTRY_SYSVEC(sysvec_error_interrupt)
{
static const char * const error_interrupt_reason[] = {
"Send CS error", /* APIC Error Bit 0 */
@@ -2222,7 +2211,6 @@ DEFINE_IDTENTRY_IRQ(spurious_interrupt)
};
u32 v, i = 0;
- entering_irq();
trace_error_apic_entry(ERROR_APIC_VECTOR);
/* First tickle the hardware, only then report what went on. -- REW */
@@ -2246,7 +2234,6 @@ DEFINE_IDTENTRY_IRQ(spurious_interrupt)
apic_printk(APIC_DEBUG, KERN_CONT "\n");
trace_error_apic_exit(ERROR_APIC_VECTOR);
- exiting_irq();
}
/**
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -129,8 +129,8 @@ static const __initconst struct idt_data
#endif
#ifdef CONFIG_X86_LOCAL_APIC
- INTG(LOCAL_TIMER_VECTOR, apic_timer_interrupt),
- INTG(X86_PLATFORM_IPI_VECTOR, x86_platform_ipi),
+ INTG(LOCAL_TIMER_VECTOR, asm_sysvec_apic_timer_interrupt),
+ INTG(X86_PLATFORM_IPI_VECTOR, asm_sysvec_x86_platform_ipi),
# ifdef CONFIG_HAVE_KVM
INTG(POSTED_INTR_VECTOR, kvm_posted_intr_ipi),
INTG(POSTED_INTR_WAKEUP_VECTOR, kvm_posted_intr_wakeup_ipi),
@@ -142,8 +142,8 @@ static const __initconst struct idt_data
#ifdef CONFIG_X86_UV
INTG(UV_BAU_MESSAGE, uv_bau_message_intr1),
#endif
- INTG(SPURIOUS_APIC_VECTOR, spurious_apic_interrupt),
- INTG(ERROR_APIC_VECTOR, error_interrupt),
+ INTG(SPURIOUS_APIC_VECTOR, asm_sysvec_spurious_apic_interrupt),
+ INTG(ERROR_APIC_VECTOR, asm_sysvec_error_interrupt),
#endif
};
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -14,6 +14,7 @@
#include <linux/irq.h>
#include <asm/apic.h>
+#include <asm/traps.h>
#include <asm/io_apic.h>
#include <asm/irq.h>
#include <asm/mce.h>
@@ -262,17 +263,16 @@ void (*x86_platform_ipi_callback)(void)
/*
* Handler for X86_PLATFORM_IPI_VECTOR.
*/
-__visible void __irq_entry smp_x86_platform_ipi(struct pt_regs *regs)
+DEFINE_IDTENTRY_SYSVEC(sysvec_x86_platform_ipi)
{
struct pt_regs *old_regs = set_irq_regs(regs);
- entering_ack_irq();
+ ack_APIC_irq();
trace_x86_platform_ipi_entry(X86_PLATFORM_IPI_VECTOR);
inc_irq_stat(x86_platform_ipis);
if (x86_platform_ipi_callback)
x86_platform_ipi_callback();
trace_x86_platform_ipi_exit(X86_PLATFORM_IPI_VECTOR);
- exiting_irq();
set_irq_regs(old_regs);
}
#endif
^ permalink raw reply [flat|nested] 49+ messages in thread
* [patch V4 part 5 19/31] x86/entry: Convert SMP system vectors to IDTENTRY_SYSVEC
2020-05-05 13:53 [patch V4 part 5 00/31] x86/entry: Entry/exception code rework, Thomas Gleixner
` (17 preceding siblings ...)
2020-05-05 13:53 ` [patch V4 part 5 18/31] x86/entry: Convert APIC interrupts to IDTENTRY_SYSVEC Thomas Gleixner
@ 2020-05-05 13:54 ` Thomas Gleixner
2020-05-05 13:54 ` [patch V4 part 5 20/31] x86/entry: Convert various system vectors Thomas Gleixner
` (11 subsequent siblings)
30 siblings, 0 replies; 49+ messages in thread
From: Thomas Gleixner @ 2020-05-05 13:54 UTC (permalink / raw)
To: LKML
Cc: x86, Paul E. McKenney, Andy Lutomirski, Alexandre Chartre,
Frederic Weisbecker, Paolo Bonzini, Sean Christopherson,
Masami Hiramatsu, Petr Mladek, Steven Rostedt, Joel Fernandes,
Boris Ostrovsky, Juergen Gross, Brian Gerst, Mathieu Desnoyers,
Josh Poimboeuf, Will Deacon
From: Thomas Gleixner <tglx@linutronix.de>
Convert SMP system vectors to IDTENTRY_SYSVEC
- Implement the C entry point with DEFINE_IDTENTRY_SYSVEC
- Emit the ASM stub with DECLARE_IDTENTRY_SYSVEC
- Remove the ASM idtentries in 64bit
- Remove the BUILD_INTERRUPT entries in 32bit
- Remove the old prototypes
No functional change.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/x86/entry/entry_64.S | 7 -------
arch/x86/include/asm/entry_arch.h | 4 ----
arch/x86/include/asm/hw_irq.h | 5 -----
arch/x86/include/asm/idtentry.h | 7 +++++++
arch/x86/include/asm/traps.h | 2 --
arch/x86/kernel/apic/vector.c | 5 ++---
arch/x86/kernel/idt.c | 10 +++++-----
arch/x86/kernel/smp.c | 18 +++++++-----------
8 files changed, 21 insertions(+), 37 deletions(-)
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -957,11 +957,6 @@ apicinterrupt3 \num \sym \do_sym
POP_SECTION_IRQENTRY
.endm
-#ifdef CONFIG_SMP
-apicinterrupt3 IRQ_MOVE_CLEANUP_VECTOR irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt
-apicinterrupt3 REBOOT_VECTOR reboot_interrupt smp_reboot_interrupt
-#endif
-
#ifdef CONFIG_X86_UV
apicinterrupt3 UV_BAU_MESSAGE uv_bau_message_intr1 uv_bau_message_interrupt
#endif
@@ -985,8 +980,6 @@ apicinterrupt THERMAL_APIC_VECTOR therm
#endif
#ifdef CONFIG_SMP
-apicinterrupt CALL_FUNCTION_SINGLE_VECTOR call_function_single_interrupt smp_call_function_single_interrupt
-apicinterrupt CALL_FUNCTION_VECTOR call_function_interrupt smp_call_function_interrupt
apicinterrupt RESCHEDULE_VECTOR reschedule_interrupt smp_reschedule_interrupt
#endif
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -12,10 +12,6 @@
*/
#ifdef CONFIG_SMP
BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR)
-BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
-BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
-BUILD_INTERRUPT(irq_move_cleanup_interrupt, IRQ_MOVE_CLEANUP_VECTOR)
-BUILD_INTERRUPT(reboot_interrupt, REBOOT_VECTOR)
#endif
#ifdef CONFIG_HAVE_KVM
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -38,14 +38,9 @@ extern asmlinkage void uv_bau_message_in
extern asmlinkage void thermal_interrupt(void);
extern asmlinkage void reschedule_interrupt(void);
-extern asmlinkage void irq_move_cleanup_interrupt(void);
-extern asmlinkage void reboot_interrupt(void);
extern asmlinkage void threshold_interrupt(void);
extern asmlinkage void deferred_error_interrupt(void);
-extern asmlinkage void call_function_interrupt(void);
-extern asmlinkage void call_function_single_interrupt(void);
-
#ifdef CONFIG_X86_LOCAL_APIC
struct irq_data;
struct pci_dev;
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -573,6 +573,13 @@ DECLARE_IDTENTRY_SYSVEC(LOCAL_TIMER_VECT
DECLARE_IDTENTRY_SYSVEC(X86_PLATFORM_IPI_VECTOR, sysvec_x86_platform_ipi);
#endif
+#ifdef CONFIG_SMP
+DECLARE_IDTENTRY_SYSVEC(IRQ_MOVE_CLEANUP_VECTOR, sysvec_irq_move_cleanup);
+DECLARE_IDTENTRY_SYSVEC(REBOOT_VECTOR, sysvec_reboot);
+DECLARE_IDTENTRY_SYSVEC(CALL_FUNCTION_SINGLE_VECTOR, sysvec_call_function_single);
+DECLARE_IDTENTRY_SYSVEC(CALL_FUNCTION_VECTOR, sysvec_call_function);
+#endif
+
#undef X86_TRAP_OTHER
#endif
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -40,8 +40,6 @@ asmlinkage void smp_threshold_interrupt(
asmlinkage void smp_deferred_error_interrupt(struct pt_regs *regs);
#endif
-asmlinkage void smp_irq_move_cleanup_interrupt(void);
-
#ifdef CONFIG_VMAP_STACK
void __noreturn handle_stack_overflow(const char *message,
struct pt_regs *regs,
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -861,13 +861,13 @@ static void free_moved_vector(struct api
apicd->move_in_progress = 0;
}
-asmlinkage __visible void __irq_entry smp_irq_move_cleanup_interrupt(void)
+DEFINE_IDTENTRY_SYSVEC(sysvec_irq_move_cleanup)
{
struct hlist_head *clhead = this_cpu_ptr(&cleanup_list);
struct apic_chip_data *apicd;
struct hlist_node *tmp;
- entering_ack_irq();
+ ack_APIC_irq();
/* Prevent vectors vanishing under us */
raw_spin_lock(&vector_lock);
@@ -892,7 +892,6 @@ asmlinkage __visible void __irq_entry sm
}
raw_spin_unlock(&vector_lock);
- exiting_irq();
}
static void __send_cleanup_vector(struct apic_chip_data *apicd)
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -109,11 +109,11 @@ static const __initconst struct idt_data
*/
static const __initconst struct idt_data apic_idts[] = {
#ifdef CONFIG_SMP
- INTG(RESCHEDULE_VECTOR, reschedule_interrupt),
- INTG(CALL_FUNCTION_VECTOR, call_function_interrupt),
- INTG(CALL_FUNCTION_SINGLE_VECTOR, call_function_single_interrupt),
- INTG(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt),
- INTG(REBOOT_VECTOR, reboot_interrupt),
+ INTG(RESCHEDULE_VECTOR, reschedule_interrupt),
+ INTG(CALL_FUNCTION_VECTOR, asm_sysvec_call_function),
+ INTG(CALL_FUNCTION_SINGLE_VECTOR, asm_sysvec_call_function_single),
+ INTG(IRQ_MOVE_CLEANUP_VECTOR, asm_sysvec_irq_move_cleanup),
+ INTG(REBOOT_VECTOR, asm_sysvec_reboot),
#endif
#ifdef CONFIG_X86_THERMAL_VECTOR
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -27,6 +27,7 @@
#include <asm/mmu_context.h>
#include <asm/proto.h>
#include <asm/apic.h>
+#include <asm/idtentry.h>
#include <asm/nmi.h>
#include <asm/mce.h>
#include <asm/trace/irq_vectors.h>
@@ -130,13 +131,11 @@ static int smp_stop_nmi_callback(unsigne
/*
* this function calls the 'stop' function on all other CPUs in the system.
*/
-
-asmlinkage __visible void smp_reboot_interrupt(void)
+DEFINE_IDTENTRY_SYSVEC(sysvec_reboot)
{
- ipi_entering_ack_irq();
+ ack_APIC_irq();
cpu_emergency_vmxoff();
stop_this_cpu(NULL);
- irq_exit();
}
static int register_stop_handler(void)
@@ -227,7 +226,6 @@ static void native_stop_other_cpus(int w
{
ack_APIC_irq();
inc_irq_stat(irq_resched_count);
- kvm_set_cpu_l1tf_flush_l1d();
if (trace_resched_ipi_enabled()) {
/*
@@ -244,24 +242,22 @@ static void native_stop_other_cpus(int w
scheduler_ipi();
}
-__visible void __irq_entry smp_call_function_interrupt(struct pt_regs *regs)
+DEFINE_IDTENTRY_SYSVEC(sysvec_call_function)
{
- ipi_entering_ack_irq();
+ ack_APIC_irq();
trace_call_function_entry(CALL_FUNCTION_VECTOR);
inc_irq_stat(irq_call_count);
generic_smp_call_function_interrupt();
trace_call_function_exit(CALL_FUNCTION_VECTOR);
- exiting_irq();
}
-__visible void __irq_entry smp_call_function_single_interrupt(struct pt_regs *r)
+DEFINE_IDTENTRY_SYSVEC(sysvec_call_function_single)
{
- ipi_entering_ack_irq();
+ ack_APIC_irq();
trace_call_function_single_entry(CALL_FUNCTION_SINGLE_VECTOR);
inc_irq_stat(irq_call_count);
generic_smp_call_function_single_interrupt();
trace_call_function_single_exit(CALL_FUNCTION_SINGLE_VECTOR);
- exiting_irq();
}
static int __init nonmi_ipi_setup(char *str)
^ permalink raw reply [flat|nested] 49+ messages in thread
* [patch V4 part 5 20/31] x86/entry: Convert various system vectors
2020-05-05 13:53 [patch V4 part 5 00/31] x86/entry: Entry/exception code rework, Thomas Gleixner
` (18 preceding siblings ...)
2020-05-05 13:54 ` [patch V4 part 5 19/31] x86/entry: Convert SMP system vectors " Thomas Gleixner
@ 2020-05-05 13:54 ` Thomas Gleixner
2020-05-05 13:54 ` [patch V4 part 5 21/31] x86/entry: Convert KVM vectors to IDTENTRY_SYSVEC Thomas Gleixner
` (10 subsequent siblings)
30 siblings, 0 replies; 49+ messages in thread
From: Thomas Gleixner @ 2020-05-05 13:54 UTC (permalink / raw)
To: LKML
Cc: x86, Paul E. McKenney, Andy Lutomirski, Alexandre Chartre,
Frederic Weisbecker, Paolo Bonzini, Sean Christopherson,
Masami Hiramatsu, Petr Mladek, Steven Rostedt, Joel Fernandes,
Boris Ostrovsky, Juergen Gross, Brian Gerst, Mathieu Desnoyers,
Josh Poimboeuf, Will Deacon
From: Thomas Gleixner <tglx@linutronix.de>
Convert various system vectors to IDTENTRY_SYSVEC
- Implement the C entry point with DEFINE_IDTENTRY_SYSVEC
- Emit the ASM stub with DECLARE_IDTENTRY_SYSVEC
- Remove the ASM idtentries in 64bit
- Remove the BUILD_INTERRUPT entries in 32bit
- Remove the old prototypes
No functional change.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/x86/entry/entry_64.S | 19 -------------------
arch/x86/include/asm/apic.h | 13 -------------
arch/x86/include/asm/entry_arch.h | 25 -------------------------
arch/x86/include/asm/hw_irq.h | 6 ------
arch/x86/include/asm/idtentry.h | 22 ++++++++++++++++++++++
arch/x86/include/asm/irq_work.h | 1 -
arch/x86/include/asm/traps.h | 5 -----
arch/x86/include/asm/uv/uv_bau.h | 8 ++------
arch/x86/kernel/cpu/mce/amd.c | 5 ++---
arch/x86/kernel/cpu/mce/therm_throt.c | 5 ++---
arch/x86/kernel/cpu/mce/threshold.c | 5 ++---
arch/x86/kernel/idt.c | 28 ++++++++++++++--------------
arch/x86/kernel/irq_work.c | 6 +++---
arch/x86/platform/uv/tlb_uv.c | 2 +-
14 files changed, 48 insertions(+), 102 deletions(-)
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -957,9 +957,6 @@ apicinterrupt3 \num \sym \do_sym
POP_SECTION_IRQENTRY
.endm
-#ifdef CONFIG_X86_UV
-apicinterrupt3 UV_BAU_MESSAGE uv_bau_message_intr1 uv_bau_message_interrupt
-#endif
#ifdef CONFIG_HAVE_KVM
apicinterrupt3 POSTED_INTR_VECTOR kvm_posted_intr_ipi smp_kvm_posted_intr_ipi
@@ -967,26 +964,10 @@ apicinterrupt3 POSTED_INTR_WAKEUP_VECTOR
apicinterrupt3 POSTED_INTR_NESTED_VECTOR kvm_posted_intr_nested_ipi smp_kvm_posted_intr_nested_ipi
#endif
-#ifdef CONFIG_X86_MCE_THRESHOLD
-apicinterrupt THRESHOLD_APIC_VECTOR threshold_interrupt smp_threshold_interrupt
-#endif
-
-#ifdef CONFIG_X86_MCE_AMD
-apicinterrupt DEFERRED_ERROR_VECTOR deferred_error_interrupt smp_deferred_error_interrupt
-#endif
-
-#ifdef CONFIG_X86_THERMAL_VECTOR
-apicinterrupt THERMAL_APIC_VECTOR thermal_interrupt smp_thermal_interrupt
-#endif
-
#ifdef CONFIG_SMP
apicinterrupt RESCHEDULE_VECTOR reschedule_interrupt smp_reschedule_interrupt
#endif
-#ifdef CONFIG_IRQ_WORK
-apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
-#endif
-
/*
* Reload gs selector with exception handling
* edi: new selector
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -534,24 +534,11 @@ static inline void entering_ack_irq(void
ack_APIC_irq();
}
-static inline void ipi_entering_ack_irq(void)
-{
- irq_enter();
- ack_APIC_irq();
- kvm_set_cpu_l1tf_flush_l1d();
-}
-
static inline void exiting_irq(void)
{
irq_exit();
}
-static inline void exiting_ack_irq(void)
-{
- ack_APIC_irq();
- irq_exit();
-}
-
extern void ioapic_zap_locks(void);
#endif /* _ASM_X86_APIC_H */
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -20,28 +20,3 @@ BUILD_INTERRUPT(kvm_posted_intr_wakeup_i
BUILD_INTERRUPT(kvm_posted_intr_nested_ipi, POSTED_INTR_NESTED_VECTOR)
#endif
-/*
- * every pentium local APIC has two 'local interrupts', with a
- * soft-definable vector attached to both interrupts, one of
- * which is a timer interrupt, the other one is error counter
- * overflow. Linux uses the local APIC timer interrupt to get
- * a much simpler SMP time architecture:
- */
-#ifdef CONFIG_X86_LOCAL_APIC
-
-#ifdef CONFIG_IRQ_WORK
-BUILD_INTERRUPT(irq_work_interrupt, IRQ_WORK_VECTOR)
-#endif
-
-#ifdef CONFIG_X86_THERMAL_VECTOR
-BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR)
-#endif
-
-#ifdef CONFIG_X86_MCE_THRESHOLD
-BUILD_INTERRUPT(threshold_interrupt,THRESHOLD_APIC_VECTOR)
-#endif
-
-#ifdef CONFIG_X86_MCE_AMD
-BUILD_INTERRUPT(deferred_error_interrupt, DEFERRED_ERROR_VECTOR)
-#endif
-#endif
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -32,15 +32,9 @@
extern asmlinkage void kvm_posted_intr_ipi(void);
extern asmlinkage void kvm_posted_intr_wakeup_ipi(void);
extern asmlinkage void kvm_posted_intr_nested_ipi(void);
-extern asmlinkage void irq_work_interrupt(void);
-extern asmlinkage void uv_bau_message_intr1(void);
-extern asmlinkage void thermal_interrupt(void);
extern asmlinkage void reschedule_interrupt(void);
-extern asmlinkage void threshold_interrupt(void);
-extern asmlinkage void deferred_error_interrupt(void);
-
#ifdef CONFIG_X86_LOCAL_APIC
struct irq_data;
struct pci_dev;
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -580,6 +580,28 @@ DECLARE_IDTENTRY_SYSVEC(CALL_FUNCTION_SI
DECLARE_IDTENTRY_SYSVEC(CALL_FUNCTION_VECTOR, sysvec_call_function);
#endif
+#ifdef CONFIG_X86_LOCAL_APIC
+# ifdef CONFIG_X86_UV
+DECLARE_IDTENTRY_SYSVEC(UV_BAU_MESSAGE, sysvec_uv_bau_message);
+# endif
+
+# ifdef CONFIG_X86_MCE_THRESHOLD
+DECLARE_IDTENTRY_SYSVEC(THRESHOLD_APIC_VECTOR, sysvec_threshold);
+# endif
+
+# ifdef CONFIG_X86_MCE_AMD
+DECLARE_IDTENTRY_SYSVEC(DEFERRED_ERROR_VECTOR, sysvec_deferred_error);
+# endif
+
+# ifdef CONFIG_X86_THERMAL_VECTOR
+DECLARE_IDTENTRY_SYSVEC(THERMAL_APIC_VECTOR, sysvec_thermal);
+# endif
+
+# ifdef CONFIG_IRQ_WORK
+DECLARE_IDTENTRY_SYSVEC(IRQ_WORK_VECTOR, sysvec_irq_work);
+# endif
+#endif
+
#undef X86_TRAP_OTHER
#endif
--- a/arch/x86/include/asm/irq_work.h
+++ b/arch/x86/include/asm/irq_work.h
@@ -10,7 +10,6 @@ static inline bool arch_irq_work_has_int
return boot_cpu_has(X86_FEATURE_APIC);
}
extern void arch_irq_work_raise(void);
-extern __visible void smp_irq_work_interrupt(struct pt_regs *regs);
#else
static inline bool arch_irq_work_has_interrupt(void)
{
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -34,11 +34,6 @@ static inline int get_si_code(unsigned l
extern int panic_on_unrecovered_nmi;
void math_emulate(struct math_emu_info *);
-#ifndef CONFIG_X86_32
-asmlinkage void smp_thermal_interrupt(struct pt_regs *regs);
-asmlinkage void smp_threshold_interrupt(struct pt_regs *regs);
-asmlinkage void smp_deferred_error_interrupt(struct pt_regs *regs);
-#endif
#ifdef CONFIG_VMAP_STACK
void __noreturn handle_stack_overflow(const char *message,
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -12,6 +12,8 @@
#define _ASM_X86_UV_UV_BAU_H
#include <linux/bitmap.h>
+#include <asm/idtentry.h>
+
#define BITSPERBYTE 8
/*
@@ -799,12 +801,6 @@ static inline void bau_cpubits_clear(str
bitmap_zero(&dstp->bits, nbits);
}
-extern void uv_bau_message_intr1(void);
-#ifdef CONFIG_TRACING
-#define trace_uv_bau_message_intr1 uv_bau_message_intr1
-#endif
-extern void uv_bau_timeout_intr1(void);
-
struct atomic_short {
short counter;
};
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -907,14 +907,13 @@ static void __log_error(unsigned int ban
mce_log(&m);
}
-asmlinkage __visible void __irq_entry smp_deferred_error_interrupt(struct pt_regs *regs)
+DEFINE_IDTENTRY_SYSVEC(sysvec_deferred_error)
{
- entering_irq();
trace_deferred_error_apic_entry(DEFERRED_ERROR_VECTOR);
inc_irq_stat(irq_deferred_error_count);
deferred_error_int_vector();
trace_deferred_error_apic_exit(DEFERRED_ERROR_VECTOR);
- exiting_ack_irq();
+ ack_APIC_irq();
}
/*
--- a/arch/x86/kernel/cpu/mce/therm_throt.c
+++ b/arch/x86/kernel/cpu/mce/therm_throt.c
@@ -614,14 +614,13 @@ static void unexpected_thermal_interrupt
static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt;
-asmlinkage __visible void __irq_entry smp_thermal_interrupt(struct pt_regs *regs)
+DEFINE_IDTENTRY_SYSVEC(sysvec_thermal)
{
- entering_irq();
trace_thermal_apic_entry(THERMAL_APIC_VECTOR);
inc_irq_stat(irq_thermal_count);
smp_thermal_vector();
trace_thermal_apic_exit(THERMAL_APIC_VECTOR);
- exiting_ack_irq();
+ ack_APIC_irq();
}
/* Thermal monitoring depends on APIC, ACPI and clock modulation */
--- a/arch/x86/kernel/cpu/mce/threshold.c
+++ b/arch/x86/kernel/cpu/mce/threshold.c
@@ -21,12 +21,11 @@ static void default_threshold_interrupt(
void (*mce_threshold_vector)(void) = default_threshold_interrupt;
-asmlinkage __visible void __irq_entry smp_threshold_interrupt(struct pt_regs *regs)
+DEFINE_IDTENTRY_SYSVEC(sysvec_threshold)
{
- entering_irq();
trace_threshold_apic_entry(THRESHOLD_APIC_VECTOR);
inc_irq_stat(irq_threshold_count);
mce_threshold_vector();
trace_threshold_apic_exit(THRESHOLD_APIC_VECTOR);
- exiting_ack_irq();
+ ack_APIC_irq();
}
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -117,33 +117,33 @@ static const __initconst struct idt_data
#endif
#ifdef CONFIG_X86_THERMAL_VECTOR
- INTG(THERMAL_APIC_VECTOR, thermal_interrupt),
+ INTG(THERMAL_APIC_VECTOR, asm_sysvec_thermal),
#endif
#ifdef CONFIG_X86_MCE_THRESHOLD
- INTG(THRESHOLD_APIC_VECTOR, threshold_interrupt),
+ INTG(THRESHOLD_APIC_VECTOR, asm_sysvec_threshold),
#endif
#ifdef CONFIG_X86_MCE_AMD
- INTG(DEFERRED_ERROR_VECTOR, deferred_error_interrupt),
+ INTG(DEFERRED_ERROR_VECTOR, asm_sysvec_deferred_error),
#endif
#ifdef CONFIG_X86_LOCAL_APIC
- INTG(LOCAL_TIMER_VECTOR, asm_sysvec_apic_timer_interrupt),
- INTG(X86_PLATFORM_IPI_VECTOR, asm_sysvec_x86_platform_ipi),
+ INTG(LOCAL_TIMER_VECTOR, asm_sysvec_apic_timer_interrupt),
+ INTG(X86_PLATFORM_IPI_VECTOR, asm_sysvec_x86_platform_ipi),
# ifdef CONFIG_HAVE_KVM
- INTG(POSTED_INTR_VECTOR, kvm_posted_intr_ipi),
- INTG(POSTED_INTR_WAKEUP_VECTOR, kvm_posted_intr_wakeup_ipi),
- INTG(POSTED_INTR_NESTED_VECTOR, kvm_posted_intr_nested_ipi),
+ INTG(POSTED_INTR_VECTOR, kvm_posted_intr_ipi),
+ INTG(POSTED_INTR_WAKEUP_VECTOR, kvm_posted_intr_wakeup_ipi),
+ INTG(POSTED_INTR_NESTED_VECTOR, kvm_posted_intr_nested_ipi),
# endif
# ifdef CONFIG_IRQ_WORK
- INTG(IRQ_WORK_VECTOR, irq_work_interrupt),
+ INTG(IRQ_WORK_VECTOR, asm_sysvec_irq_work),
# endif
-#ifdef CONFIG_X86_UV
- INTG(UV_BAU_MESSAGE, uv_bau_message_intr1),
-#endif
- INTG(SPURIOUS_APIC_VECTOR, asm_sysvec_spurious_apic_interrupt),
- INTG(ERROR_APIC_VECTOR, asm_sysvec_error_interrupt),
+# ifdef CONFIG_X86_UV
+ INTG(UV_BAU_MESSAGE, asm_sysvec_uv_bau_message),
+# endif
+ INTG(SPURIOUS_APIC_VECTOR, asm_sysvec_spurious_apic_interrupt),
+ INTG(ERROR_APIC_VECTOR, asm_sysvec_error_interrupt),
#endif
};
--- a/arch/x86/kernel/irq_work.c
+++ b/arch/x86/kernel/irq_work.c
@@ -9,18 +9,18 @@
#include <linux/irq_work.h>
#include <linux/hardirq.h>
#include <asm/apic.h>
+#include <asm/idtentry.h>
#include <asm/trace/irq_vectors.h>
#include <linux/interrupt.h>
#ifdef CONFIG_X86_LOCAL_APIC
-__visible void __irq_entry smp_irq_work_interrupt(struct pt_regs *regs)
+DEFINE_IDTENTRY_SYSVEC(sysvec_irq_work)
{
- ipi_entering_ack_irq();
+ ack_APIC_irq();
trace_irq_work_entry(IRQ_WORK_VECTOR);
inc_irq_stat(apic_irq_work_irqs);
irq_work_run();
trace_irq_work_exit(IRQ_WORK_VECTOR);
- exiting_irq();
}
void arch_irq_work_raise(void)
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -1272,7 +1272,7 @@ static void process_uv2_message(struct m
* (the resource will not be freed until noninterruptable cpus see this
* interrupt; hardware may timeout the s/w ack and reply ERROR)
*/
-void uv_bau_message_interrupt(struct pt_regs *regs)
+DEFINE_IDTENTRY_SYSVEC(sysvec_uv_bau_message)
{
int count = 0;
cycles_t time_start;
^ permalink raw reply [flat|nested] 49+ messages in thread
* [patch V4 part 5 21/31] x86/entry: Convert KVM vectors to IDTENTRY_SYSVEC
2020-05-05 13:53 [patch V4 part 5 00/31] x86/entry: Entry/exception code rework, Thomas Gleixner
` (19 preceding siblings ...)
2020-05-05 13:54 ` [patch V4 part 5 20/31] x86/entry: Convert various system vectors Thomas Gleixner
@ 2020-05-05 13:54 ` Thomas Gleixner
2020-05-05 13:54 ` [patch V4 part 5 22/31] x86/entry: Convert various hypervisor " Thomas Gleixner
` (9 subsequent siblings)
30 siblings, 0 replies; 49+ messages in thread
From: Thomas Gleixner @ 2020-05-05 13:54 UTC (permalink / raw)
To: LKML
Cc: x86, Paul E. McKenney, Andy Lutomirski, Alexandre Chartre,
Frederic Weisbecker, Paolo Bonzini, Sean Christopherson,
Masami Hiramatsu, Petr Mladek, Steven Rostedt, Joel Fernandes,
Boris Ostrovsky, Juergen Gross, Brian Gerst, Mathieu Desnoyers,
Josh Poimboeuf, Will Deacon
From: Thomas Gleixner <tglx@linutronix.de>
Convert KVM specific system vectors to IDTENTRY_SYSVEC
- Implement the C entry point with DEFINE_IDTENTRY_SYSVEC
- Emit the ASM stub with DECLARE_IDTENTRY_SYSVEC
- Remove the ASM idtentries in 64bit
- Remove the BUILD_INTERRUPT entries in 32bit
- Remove the old prototypes
No functional change.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
---
arch/x86/entry/entry_64.S | 7 -------
arch/x86/include/asm/entry_arch.h | 7 -------
arch/x86/include/asm/hw_irq.h | 4 ----
arch/x86/include/asm/idtentry.h | 6 ++++++
arch/x86/include/asm/irq.h | 3 ---
arch/x86/kernel/idt.c | 6 +++---
arch/x86/kernel/irq.c | 15 ++++++---------
7 files changed, 15 insertions(+), 33 deletions(-)
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -957,13 +957,6 @@ apicinterrupt3 \num \sym \do_sym
POP_SECTION_IRQENTRY
.endm
-
-#ifdef CONFIG_HAVE_KVM
-apicinterrupt3 POSTED_INTR_VECTOR kvm_posted_intr_ipi smp_kvm_posted_intr_ipi
-apicinterrupt3 POSTED_INTR_WAKEUP_VECTOR kvm_posted_intr_wakeup_ipi smp_kvm_posted_intr_wakeup_ipi
-apicinterrupt3 POSTED_INTR_NESTED_VECTOR kvm_posted_intr_nested_ipi smp_kvm_posted_intr_nested_ipi
-#endif
-
#ifdef CONFIG_SMP
apicinterrupt RESCHEDULE_VECTOR reschedule_interrupt smp_reschedule_interrupt
#endif
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -13,10 +13,3 @@
#ifdef CONFIG_SMP
BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR)
#endif
-
-#ifdef CONFIG_HAVE_KVM
-BUILD_INTERRUPT(kvm_posted_intr_ipi, POSTED_INTR_VECTOR)
-BUILD_INTERRUPT(kvm_posted_intr_wakeup_ipi, POSTED_INTR_WAKEUP_VECTOR)
-BUILD_INTERRUPT(kvm_posted_intr_nested_ipi, POSTED_INTR_NESTED_VECTOR)
-#endif
-
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -29,10 +29,6 @@
#include <asm/sections.h>
/* Interrupt handlers registered during init_IRQ */
-extern asmlinkage void kvm_posted_intr_ipi(void);
-extern asmlinkage void kvm_posted_intr_wakeup_ipi(void);
-extern asmlinkage void kvm_posted_intr_nested_ipi(void);
-
extern asmlinkage void reschedule_interrupt(void);
#ifdef CONFIG_X86_LOCAL_APIC
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -602,6 +602,12 @@ DECLARE_IDTENTRY_SYSVEC(IRQ_WORK_VECTOR,
# endif
#endif
+#ifdef CONFIG_HAVE_KVM
+DECLARE_IDTENTRY_SYSVEC(POSTED_INTR_VECTOR, sysvec_kvm_posted_intr_ipi);
+DECLARE_IDTENTRY_SYSVEC(POSTED_INTR_WAKEUP_VECTOR, sysvec_kvm_posted_intr_wakeup_ipi);
+DECLARE_IDTENTRY_SYSVEC(POSTED_INTR_NESTED_VECTOR, sysvec_kvm_posted_intr_nested_ipi);
+#endif
+
#undef X86_TRAP_OTHER
#endif
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -26,9 +26,6 @@ extern void fixup_irqs(void);
#ifdef CONFIG_HAVE_KVM
extern void kvm_set_posted_intr_wakeup_handler(void (*handler)(void));
-extern __visible void smp_kvm_posted_intr_ipi(struct pt_regs *regs);
-extern __visible void smp_kvm_posted_intr_wakeup_ipi(struct pt_regs *regs);
-extern __visible void smp_kvm_posted_intr_nested_ipi(struct pt_regs *regs);
#endif
extern void (*x86_platform_ipi_callback)(void);
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -132,9 +132,9 @@ static const __initconst struct idt_data
INTG(LOCAL_TIMER_VECTOR, asm_sysvec_apic_timer_interrupt),
INTG(X86_PLATFORM_IPI_VECTOR, asm_sysvec_x86_platform_ipi),
# ifdef CONFIG_HAVE_KVM
- INTG(POSTED_INTR_VECTOR, kvm_posted_intr_ipi),
- INTG(POSTED_INTR_WAKEUP_VECTOR, kvm_posted_intr_wakeup_ipi),
- INTG(POSTED_INTR_NESTED_VECTOR, kvm_posted_intr_nested_ipi),
+ INTG(POSTED_INTR_VECTOR, asm_sysvec_kvm_posted_intr_ipi),
+ INTG(POSTED_INTR_WAKEUP_VECTOR, asm_sysvec_kvm_posted_intr_wakeup_ipi),
+ INTG(POSTED_INTR_NESTED_VECTOR, asm_sysvec_kvm_posted_intr_nested_ipi),
# endif
# ifdef CONFIG_IRQ_WORK
INTG(IRQ_WORK_VECTOR, asm_sysvec_irq_work),
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -293,40 +293,37 @@ EXPORT_SYMBOL_GPL(kvm_set_posted_intr_wa
/*
* Handler for POSTED_INTERRUPT_VECTOR.
*/
-__visible void smp_kvm_posted_intr_ipi(struct pt_regs *regs)
+DEFINE_IDTENTRY_SYSVEC(sysvec_kvm_posted_intr_ipi)
{
struct pt_regs *old_regs = set_irq_regs(regs);
- entering_ack_irq();
+ ack_APIC_irq();
inc_irq_stat(kvm_posted_intr_ipis);
- exiting_irq();
set_irq_regs(old_regs);
}
/*
* Handler for POSTED_INTERRUPT_WAKEUP_VECTOR.
*/
-__visible void smp_kvm_posted_intr_wakeup_ipi(struct pt_regs *regs)
+DEFINE_IDTENTRY_SYSVEC(sysvec_kvm_posted_intr_wakeup_ipi)
{
struct pt_regs *old_regs = set_irq_regs(regs);
- entering_ack_irq();
+ ack_APIC_irq();
inc_irq_stat(kvm_posted_intr_wakeup_ipis);
kvm_posted_intr_wakeup_handler();
- exiting_irq();
set_irq_regs(old_regs);
}
/*
* Handler for POSTED_INTERRUPT_NESTED_VECTOR.
*/
-__visible void smp_kvm_posted_intr_nested_ipi(struct pt_regs *regs)
+DEFINE_IDTENTRY_SYSVEC(sysvec_kvm_posted_intr_nested_ipi)
{
struct pt_regs *old_regs = set_irq_regs(regs);
- entering_ack_irq();
+ ack_APIC_irq();
inc_irq_stat(kvm_posted_intr_nested_ipis);
- exiting_irq();
set_irq_regs(old_regs);
}
#endif
^ permalink raw reply [flat|nested] 49+ messages in thread
* [patch V4 part 5 22/31] x86/entry: Convert various hypervisor vectors to IDTENTRY_SYSVEC
2020-05-05 13:53 [patch V4 part 5 00/31] x86/entry: Entry/exception code rework, Thomas Gleixner
` (20 preceding siblings ...)
2020-05-05 13:54 ` [patch V4 part 5 21/31] x86/entry: Convert KVM vectors to IDTENTRY_SYSVEC Thomas Gleixner
@ 2020-05-05 13:54 ` Thomas Gleixner
2020-05-06 16:56 ` Wei Liu
2020-05-05 13:54 ` [patch V4 part 5 23/31] x86/entry: Convert XEN hypercall vector " Thomas Gleixner
` (8 subsequent siblings)
30 siblings, 1 reply; 49+ messages in thread
From: Thomas Gleixner @ 2020-05-05 13:54 UTC (permalink / raw)
To: LKML
Cc: x86, Paul E. McKenney, Andy Lutomirski, Alexandre Chartre,
Frederic Weisbecker, Paolo Bonzini, Sean Christopherson,
Masami Hiramatsu, Petr Mladek, Steven Rostedt, Joel Fernandes,
Boris Ostrovsky, Juergen Gross, Brian Gerst, Mathieu Desnoyers,
Josh Poimboeuf, Will Deacon, Wei Liu, Michael Kelley,
Jason Chen CJ, Zhao Yakui
From: Thomas Gleixner <tglx@linutronix.de>
Convert various hypervisor vectors to IDTENTRY_SYSVEC
- Implement the C entry point with DEFINE_IDTENTRY_SYSVEC
- Emit the ASM stub with DECLARE_IDTENTRY_SYSVEC
- Remove the ASM idtentries in 64bit
- Remove the BUILD_INTERRUPT entries in 32bit
- Remove the old prototypes
No functional change.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Wei Liu <wei.liu@kernel.org>
Cc: Michael Kelley <mikelley@microsoft.com>
Cc: Jason Chen CJ <jason.cj.chen@intel.com>
Cc: Zhao Yakui <yakui.zhao@intel.com>
---
arch/x86/entry/entry_32.S | 14 --------------
arch/x86/entry/entry_64.S | 17 -----------------
arch/x86/hyperv/hv_init.c | 9 +++------
arch/x86/include/asm/acrn.h | 11 -----------
arch/x86/include/asm/apic.h | 20 --------------------
arch/x86/include/asm/idtentry.h | 10 ++++++++++
arch/x86/include/asm/mshyperv.h | 13 -------------
arch/x86/kernel/cpu/acrn.c | 9 ++++-----
arch/x86/kernel/cpu/mshyperv.c | 22 ++++++++++------------
9 files changed, 27 insertions(+), 98 deletions(-)
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -1342,20 +1342,6 @@ BUILD_INTERRUPT3(xen_hvm_callback_vector
xen_evtchn_do_upcall)
#endif
-
-#if IS_ENABLED(CONFIG_HYPERV)
-
-BUILD_INTERRUPT3(hyperv_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
- hyperv_vector_handler)
-
-BUILD_INTERRUPT3(hyperv_reenlightenment_vector, HYPERV_REENLIGHTENMENT_VECTOR,
- hyperv_reenlightenment_intr)
-
-BUILD_INTERRUPT3(hv_stimer0_callback_vector, HYPERV_STIMER0_VECTOR,
- hv_stimer0_vector_handler)
-
-#endif /* CONFIG_HYPERV */
-
SYM_CODE_START_LOCAL_NOALIGN(handle_exception)
/* the function address is in %gs's slot on the stack */
SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1078,23 +1078,6 @@ apicinterrupt3 HYPERVISOR_CALLBACK_VECTO
xen_hvm_callback_vector xen_evtchn_do_upcall
#endif
-
-#if IS_ENABLED(CONFIG_HYPERV)
-apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
- hyperv_callback_vector hyperv_vector_handler
-
-apicinterrupt3 HYPERV_REENLIGHTENMENT_VECTOR \
- hyperv_reenlightenment_vector hyperv_reenlightenment_intr
-
-apicinterrupt3 HYPERV_STIMER0_VECTOR \
- hv_stimer0_callback_vector hv_stimer0_vector_handler
-#endif /* CONFIG_HYPERV */
-
-#if IS_ENABLED(CONFIG_ACRN_GUEST)
-apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
- acrn_hv_callback_vector acrn_hv_vector_handler
-#endif
-
/*
* Save all registers in pt_regs, and switch gs if needed.
* Use slow, but surefire "are we in kernel?" check.
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -15,6 +15,7 @@
#include <asm/hypervisor.h>
#include <asm/hyperv-tlfs.h>
#include <asm/mshyperv.h>
+#include <asm/idtentry.h>
#include <linux/version.h>
#include <linux/vmalloc.h>
#include <linux/mm.h>
@@ -152,15 +153,11 @@ static inline bool hv_reenlightenment_av
ms_hyperv.features & HV_X64_ACCESS_REENLIGHTENMENT;
}
-__visible void __irq_entry hyperv_reenlightenment_intr(struct pt_regs *regs)
+DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_reenlightenment)
{
- entering_ack_irq();
-
+ ack_APIC_irq();
inc_irq_stat(irq_hv_reenlightenment_count);
-
schedule_delayed_work(&hv_reenlightenment_work, HZ/10);
-
- exiting_irq();
}
void set_hv_tscchange_cb(void (*cb)(void))
--- a/arch/x86/include/asm/acrn.h
+++ /dev/null
@@ -1,11 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_ACRN_H
-#define _ASM_X86_ACRN_H
-
-extern void acrn_hv_callback_vector(void);
-#ifdef CONFIG_TRACING
-#define trace_acrn_hv_callback_vector acrn_hv_callback_vector
-#endif
-
-extern void acrn_hv_vector_handler(struct pt_regs *regs);
-#endif /* _ASM_X86_ACRN_H */
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -519,26 +519,6 @@ static inline bool apic_id_is_primary_th
static inline void apic_smt_update(void) { }
#endif
-extern void irq_enter(void);
-extern void irq_exit(void);
-
-static inline void entering_irq(void)
-{
- irq_enter();
- kvm_set_cpu_l1tf_flush_l1d();
-}
-
-static inline void entering_ack_irq(void)
-{
- entering_irq();
- ack_APIC_irq();
-}
-
-static inline void exiting_irq(void)
-{
- irq_exit();
-}
-
extern void ioapic_zap_locks(void);
#endif /* _ASM_X86_APIC_H */
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -608,6 +608,16 @@ DECLARE_IDTENTRY_SYSVEC(POSTED_INTR_WAKE
DECLARE_IDTENTRY_SYSVEC(POSTED_INTR_NESTED_VECTOR, sysvec_kvm_posted_intr_nested_ipi);
#endif
+#if IS_ENABLED(CONFIG_HYPERV)
+DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_CALLBACK_VECTOR, sysvec_hyperv_callback);
+DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_REENLIGHTENMENT_VECTOR, sysvec_hyperv_reenlightenment);
+DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_STIMER0_VECTOR, sysvec_hyperv_stimer0);
+#endif
+
+#if IS_ENABLED(CONFIG_ACRN_GUEST)
+DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_CALLBACK_VECTOR, sysvec_acrn_hv_callback);
+#endif
+
#undef X86_TRAP_OTHER
#endif
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -52,20 +52,8 @@ typedef int (*hyperv_fill_flush_list_fun
vclocks_set_used(VDSO_CLOCKMODE_HVCLOCK);
#define hv_get_raw_timer() rdtsc_ordered()
-void hyperv_callback_vector(void);
-void hyperv_reenlightenment_vector(void);
-#ifdef CONFIG_TRACING
-#define trace_hyperv_callback_vector hyperv_callback_vector
-#endif
void hyperv_vector_handler(struct pt_regs *regs);
-/*
- * Routines for stimer0 Direct Mode handling.
- * On x86/x64, there are no percpu actions to take.
- */
-void hv_stimer0_vector_handler(struct pt_regs *regs);
-void hv_stimer0_callback_vector(void);
-
static inline void hv_enable_stimer0_percpu_irq(int irq) {}
static inline void hv_disable_stimer0_percpu_irq(int irq) {}
@@ -224,7 +212,6 @@ void hyperv_setup_mmu_ops(void);
void *hv_alloc_hyperv_page(void);
void *hv_alloc_hyperv_zeroed_page(void);
void hv_free_hyperv_page(unsigned long addr);
-void hyperv_reenlightenment_intr(struct pt_regs *regs);
void set_hv_tscchange_cb(void (*cb)(void));
void clear_hv_tscchange_cb(void);
void hyperv_stop_tsc_emulation(void);
--- a/arch/x86/kernel/cpu/acrn.c
+++ b/arch/x86/kernel/cpu/acrn.c
@@ -10,10 +10,10 @@
*/
#include <linux/interrupt.h>
-#include <asm/acrn.h>
#include <asm/apic.h>
#include <asm/desc.h>
#include <asm/hypervisor.h>
+#include <asm/idtentry.h>
#include <asm/irq_regs.h>
static uint32_t __init acrn_detect(void)
@@ -24,7 +24,7 @@ static uint32_t __init acrn_detect(void)
static void __init acrn_init_platform(void)
{
/* Setup the IDT for ACRN hypervisor callback */
- alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, acrn_hv_callback_vector);
+ alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, asm_sysvec_acrn_hv_callback);
}
static bool acrn_x2apic_available(void)
@@ -39,7 +39,7 @@ static bool acrn_x2apic_available(void)
static void (*acrn_intr_handler)(void);
-__visible void __irq_entry acrn_hv_vector_handler(struct pt_regs *regs)
+DEFINE_IDTENTRY_SYSVEC(sysvec_acrn_hv_callback)
{
struct pt_regs *old_regs = set_irq_regs(regs);
@@ -50,13 +50,12 @@ static void (*acrn_intr_handler)(void);
* will block the interrupt whose vector is lower than
* HYPERVISOR_CALLBACK_VECTOR.
*/
- entering_ack_irq();
+ ack_APIC_irq();
inc_irq_stat(irq_hv_callback_count);
if (acrn_intr_handler)
acrn_intr_handler();
- exiting_irq();
set_irq_regs(old_regs);
}
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -23,6 +23,7 @@
#include <asm/hyperv-tlfs.h>
#include <asm/mshyperv.h>
#include <asm/desc.h>
+#include <asm/idtentry.h>
#include <asm/irq_regs.h>
#include <asm/i8259.h>
#include <asm/apic.h>
@@ -40,11 +41,10 @@ static void (*hv_stimer0_handler)(void);
static void (*hv_kexec_handler)(void);
static void (*hv_crash_handler)(struct pt_regs *regs);
-__visible void __irq_entry hyperv_vector_handler(struct pt_regs *regs)
+DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_callback)
{
struct pt_regs *old_regs = set_irq_regs(regs);
- entering_irq();
inc_irq_stat(irq_hv_callback_count);
if (vmbus_handler)
vmbus_handler();
@@ -52,7 +52,6 @@ static void (*hv_crash_handler)(struct p
if (ms_hyperv.hints & HV_DEPRECATING_AEOI_RECOMMENDED)
ack_APIC_irq();
- exiting_irq();
set_irq_regs(old_regs);
}
@@ -73,19 +72,16 @@ EXPORT_SYMBOL_GPL(hv_remove_vmbus_irq);
* Routines to do per-architecture handling of stimer0
* interrupts when in Direct Mode
*/
-
-__visible void __irq_entry hv_stimer0_vector_handler(struct pt_regs *regs)
+DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_stimer0)
{
struct pt_regs *old_regs = set_irq_regs(regs);
- entering_irq();
inc_irq_stat(hyperv_stimer0_count);
if (hv_stimer0_handler)
hv_stimer0_handler();
add_interrupt_randomness(HYPERV_STIMER0_VECTOR, 0);
ack_APIC_irq();
- exiting_irq();
set_irq_regs(old_regs);
}
@@ -331,17 +327,19 @@ static void __init ms_hyperv_init_platfo
x86_platform.apic_post_init = hyperv_init;
hyperv_setup_mmu_ops();
/* Setup the IDT for hypervisor callback */
- alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, hyperv_callback_vector);
+ alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, sysvec_hyperv_callback);
/* Setup the IDT for reenlightenment notifications */
- if (ms_hyperv.features & HV_X64_ACCESS_REENLIGHTENMENT)
+ if (ms_hyperv.features & HV_X64_ACCESS_REENLIGHTENMENT) {
alloc_intr_gate(HYPERV_REENLIGHTENMENT_VECTOR,
- hyperv_reenlightenment_vector);
+ asm_sysvec_hyperv_reenlightenment);
+ }
/* Setup the IDT for stimer0 */
- if (ms_hyperv.misc_features & HV_STIMER_DIRECT_MODE_AVAILABLE)
+ if (ms_hyperv.misc_features & HV_STIMER_DIRECT_MODE_AVAILABLE) {
alloc_intr_gate(HYPERV_STIMER0_VECTOR,
- hv_stimer0_callback_vector);
+ asm_sysvec_hyperv_stimer0);
+ }
# ifdef CONFIG_SMP
smp_ops.smp_prepare_boot_cpu = hv_smp_prepare_boot_cpu;
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [patch V4 part 5 22/31] x86/entry: Convert various hypervisor vectors to IDTENTRY_SYSVEC
2020-05-05 13:54 ` [patch V4 part 5 22/31] x86/entry: Convert various hypervisor " Thomas Gleixner
@ 2020-05-06 16:56 ` Wei Liu
2020-05-06 17:11 ` Thomas Gleixner
0 siblings, 1 reply; 49+ messages in thread
From: Wei Liu @ 2020-05-06 16:56 UTC (permalink / raw)
To: Thomas Gleixner
Cc: LKML, x86, Paul E. McKenney, Andy Lutomirski, Alexandre Chartre,
Frederic Weisbecker, Paolo Bonzini, Sean Christopherson,
Masami Hiramatsu, Petr Mladek, Steven Rostedt, Joel Fernandes,
Boris Ostrovsky, Juergen Gross, Brian Gerst, Mathieu Desnoyers,
Josh Poimboeuf, Will Deacon, Wei Liu, Michael Kelley,
Jason Chen CJ, Zhao Yakui
On Tue, May 05, 2020 at 03:54:03PM +0200, Thomas Gleixner wrote:
> From: Thomas Gleixner <tglx@linutronix.de>
>
> Convert various hypervisor vectors to IDTENTRY_SYSVEC
> - Implement the C entry point with DEFINE_IDTENTRY_SYSVEC
> - Emit the ASM stub with DECLARE_IDTENTRY_SYSVEC
> - Remove the ASM idtentries in 64bit
> - Remove the BUILD_INTERRUPT entries in 32bit
> - Remove the old prototypes
>
> No functional change.
>
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> Cc: Wei Liu <wei.liu@kernel.org>
> Cc: Michael Kelley <mikelley@microsoft.com>
> Cc: Jason Chen CJ <jason.cj.chen@intel.com>
> Cc: Zhao Yakui <yakui.zhao@intel.com>
>
> ---
> arch/x86/entry/entry_32.S | 14 --------------
> arch/x86/entry/entry_64.S | 17 -----------------
> arch/x86/hyperv/hv_init.c | 9 +++------
> arch/x86/include/asm/acrn.h | 11 -----------
> arch/x86/include/asm/apic.h | 20 --------------------
> arch/x86/include/asm/idtentry.h | 10 ++++++++++
> arch/x86/include/asm/mshyperv.h | 13 -------------
> arch/x86/kernel/cpu/acrn.c | 9 ++++-----
> arch/x86/kernel/cpu/mshyperv.c | 22 ++++++++++------------
> 9 files changed, 27 insertions(+), 98 deletions(-)
>
> --- a/arch/x86/entry/entry_32.S
> +++ b/arch/x86/entry/entry_32.S
> @@ -1342,20 +1342,6 @@ BUILD_INTERRUPT3(xen_hvm_callback_vector
> xen_evtchn_do_upcall)
> #endif
>
You seem to have missed the Xen entry.
> -
> -#if IS_ENABLED(CONFIG_HYPERV)
> -
> -BUILD_INTERRUPT3(hyperv_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
> - hyperv_vector_handler)
> -
> -BUILD_INTERRUPT3(hyperv_reenlightenment_vector, HYPERV_REENLIGHTENMENT_VECTOR,
> - hyperv_reenlightenment_intr)
> -
> -BUILD_INTERRUPT3(hv_stimer0_callback_vector, HYPERV_STIMER0_VECTOR,
> - hv_stimer0_vector_handler)
> -
> -#endif /* CONFIG_HYPERV */
> -
> SYM_CODE_START_LOCAL_NOALIGN(handle_exception)
> /* the function address is in %gs's slot on the stack */
> SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1
> --- a/arch/x86/entry/entry_64.S
> +++ b/arch/x86/entry/entry_64.S
> @@ -1078,23 +1078,6 @@ apicinterrupt3 HYPERVISOR_CALLBACK_VECTO
> xen_hvm_callback_vector xen_evtchn_do_upcall
> #endif
>
Ditto.
Wei.
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [patch V4 part 5 22/31] x86/entry: Convert various hypervisor vectors to IDTENTRY_SYSVEC
2020-05-06 16:56 ` Wei Liu
@ 2020-05-06 17:11 ` Thomas Gleixner
0 siblings, 0 replies; 49+ messages in thread
From: Thomas Gleixner @ 2020-05-06 17:11 UTC (permalink / raw)
To: Wei Liu
Cc: LKML, x86, Paul E. McKenney, Andy Lutomirski, Alexandre Chartre,
Frederic Weisbecker, Paolo Bonzini, Sean Christopherson,
Masami Hiramatsu, Petr Mladek, Steven Rostedt, Joel Fernandes,
Boris Ostrovsky, Juergen Gross, Brian Gerst, Mathieu Desnoyers,
Josh Poimboeuf, Will Deacon, Wei Liu, Michael Kelley,
Jason Chen CJ, Zhao Yakui
Wei Liu <wei.liu@kernel.org> writes:
>> --- a/arch/x86/entry/entry_32.S
>> +++ b/arch/x86/entry/entry_32.S
>> @@ -1342,20 +1342,6 @@ BUILD_INTERRUPT3(xen_hvm_callback_vector
>> xen_evtchn_do_upcall)
>> #endif
>>
>
> You seem to have missed the Xen entry.
See the next patch ...
^ permalink raw reply [flat|nested] 49+ messages in thread
* [patch V4 part 5 23/31] x86/entry: Convert XEN hypercall vector to IDTENTRY_SYSVEC
2020-05-05 13:53 [patch V4 part 5 00/31] x86/entry: Entry/exception code rework, Thomas Gleixner
` (21 preceding siblings ...)
2020-05-05 13:54 ` [patch V4 part 5 22/31] x86/entry: Convert various hypervisor " Thomas Gleixner
@ 2020-05-05 13:54 ` Thomas Gleixner
2020-05-05 13:54 ` [patch V4 part 5 24/31] x86/entry: Convert reschedule interrupt to IDTENTRY_RAW Thomas Gleixner
` (7 subsequent siblings)
30 siblings, 0 replies; 49+ messages in thread
From: Thomas Gleixner @ 2020-05-05 13:54 UTC (permalink / raw)
To: LKML
Cc: x86, Paul E. McKenney, Andy Lutomirski, Alexandre Chartre,
Frederic Weisbecker, Paolo Bonzini, Sean Christopherson,
Masami Hiramatsu, Petr Mladek, Steven Rostedt, Joel Fernandes,
Boris Ostrovsky, Juergen Gross, Brian Gerst, Mathieu Desnoyers,
Josh Poimboeuf, Will Deacon
From: Thomas Gleixner <tglx@linutronix.de>
Convert the last oldstyle defined vector to IDTENTRY_SYSVEC
- Implement the C entry point with DEFINE_IDTENTRY_SYSVEC
- Emit the ASM stub with DECLARE_IDTENTRY_SYSVEC
- Remove the ASM idtentries in 64bit
- Remove the BUILD_INTERRUPT entries in 32bit
- Remove the old prototyoes
Fixup the related XEN code by providing the primary C entry point in x86 to
avoid cluttering the generic code with X86'isms.
No functional change.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Juergen Gross <jgross@suse.com>
---
arch/x86/entry/entry_32.S | 5 -----
arch/x86/entry/entry_64.S | 5 -----
arch/x86/include/asm/idtentry.h | 4 ++++
arch/x86/xen/enlighten_hvm.c | 12 ++++++++++++
drivers/xen/events/events_base.c | 6 ++----
include/xen/events.h | 7 -------
6 files changed, 18 insertions(+), 21 deletions(-)
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -1337,11 +1337,6 @@ SYM_FUNC_START(xen_failsafe_callback)
SYM_FUNC_END(xen_failsafe_callback)
#endif /* CONFIG_XEN_PV */
-#ifdef CONFIG_XEN_PVHVM
-BUILD_INTERRUPT3(xen_hvm_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
- xen_evtchn_do_upcall)
-#endif
-
SYM_CODE_START_LOCAL_NOALIGN(handle_exception)
/* the function address is in %gs's slot on the stack */
SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1073,11 +1073,6 @@ SYM_CODE_START(xen_failsafe_callback)
SYM_CODE_END(xen_failsafe_callback)
#endif /* CONFIG_XEN_PV */
-#ifdef CONFIG_XEN_PVHVM
-apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
- xen_hvm_callback_vector xen_evtchn_do_upcall
-#endif
-
/*
* Save all registers in pt_regs, and switch gs if needed.
* Use slow, but surefire "are we in kernel?" check.
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -618,6 +618,10 @@ DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_STIME
DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_CALLBACK_VECTOR, sysvec_acrn_hv_callback);
#endif
+#ifdef CONFIG_XEN_PVHVM
+DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_CALLBACK_VECTOR, sysvec_xen_hvm_callback);
+#endif
+
#undef X86_TRAP_OTHER
#endif
--- a/arch/x86/xen/enlighten_hvm.c
+++ b/arch/x86/xen/enlighten_hvm.c
@@ -13,6 +13,7 @@
#include <asm/smp.h>
#include <asm/reboot.h>
#include <asm/setup.h>
+#include <asm/idtentry.h>
#include <asm/hypervisor.h>
#include <asm/e820/api.h>
#include <asm/early_ioremap.h>
@@ -118,6 +119,17 @@ static void __init init_hvm_pv_info(void
this_cpu_write(xen_vcpu_id, smp_processor_id());
}
+DEFINE_IDTENTRY_SYSVEC(sysvec_xen_hvm_callback)
+{
+ struct pt_regs *old_regs = set_irq_regs(regs);
+
+ inc_irq_stat(irq_hv_callback_count);
+
+ xen_hvm_evtchn_do_upcall();
+
+ set_irq_regs(old_regs);
+}
+
#ifdef CONFIG_KEXEC_CORE
static void xen_hvm_shutdown(void)
{
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -37,6 +37,7 @@
#ifdef CONFIG_X86
#include <asm/desc.h>
#include <asm/ptrace.h>
+#include <asm/idtentry.h>
#include <asm/irq.h>
#include <asm/io_apic.h>
#include <asm/i8259.h>
@@ -1236,9 +1237,6 @@ void xen_evtchn_do_upcall(struct pt_regs
struct pt_regs *old_regs = set_irq_regs(regs);
irq_enter();
-#ifdef CONFIG_X86
- inc_irq_stat(irq_hv_callback_count);
-#endif
__xen_evtchn_do_upcall();
@@ -1654,7 +1652,7 @@ void xen_callback_vector(void)
}
pr_info_once("Xen HVM callback vector for event delivery is enabled\n");
alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR,
- xen_hvm_callback_vector);
+ asm_sysvec_xen_hvm_callback);
}
}
#else
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -90,13 +90,6 @@ unsigned int irq_from_evtchn(evtchn_port
int irq_from_virq(unsigned int cpu, unsigned int virq);
evtchn_port_t evtchn_from_irq(unsigned irq);
-#ifdef CONFIG_XEN_PVHVM
-/* Xen HVM evtchn vector callback */
-void xen_hvm_callback_vector(void);
-#ifdef CONFIG_TRACING
-#define trace_xen_hvm_callback_vector xen_hvm_callback_vector
-#endif
-#endif
int xen_set_callback_via(uint64_t via);
void xen_evtchn_do_upcall(struct pt_regs *regs);
void xen_hvm_evtchn_do_upcall(void);
^ permalink raw reply [flat|nested] 49+ messages in thread
* [patch V4 part 5 24/31] x86/entry: Convert reschedule interrupt to IDTENTRY_RAW
2020-05-05 13:53 [patch V4 part 5 00/31] x86/entry: Entry/exception code rework, Thomas Gleixner
` (22 preceding siblings ...)
2020-05-05 13:54 ` [patch V4 part 5 23/31] x86/entry: Convert XEN hypercall vector " Thomas Gleixner
@ 2020-05-05 13:54 ` Thomas Gleixner
2020-05-05 13:54 ` [patch V4 part 5 25/31] x86/entry: Remove the apic/BUILD interrupt leftovers Thomas Gleixner
` (6 subsequent siblings)
30 siblings, 0 replies; 49+ messages in thread
From: Thomas Gleixner @ 2020-05-05 13:54 UTC (permalink / raw)
To: LKML
Cc: x86, Paul E. McKenney, Andy Lutomirski, Alexandre Chartre,
Frederic Weisbecker, Paolo Bonzini, Sean Christopherson,
Masami Hiramatsu, Petr Mladek, Steven Rostedt, Joel Fernandes,
Boris Ostrovsky, Juergen Gross, Brian Gerst, Mathieu Desnoyers,
Josh Poimboeuf, Will Deacon
The scheduler IPI does not need the full interrupt entry handling logic
when the entry is from kernel mode.
Even if tracing is enabled the only requirement is that RCU is watching and
preempt_count has the hardirq bit on.
The NOHZ tick state does not have to be adjusted. If the tick is not
running then the CPU is in idle and the idle exit will restore the
tick. Softinterrupts are not raised here, so handling them on return is not
required either.
User mode entry must go through the regular entry path as it will invoke
the scheduler on return so context tracking needs to be in the correct
state.
Use IDTENTRY_RAW and the RCU conditional variants of idtentry_enter/exit()
to guarantee that RCU is watching even if the IPI hits a RCU idle section.
Remove the tracepoint static key conditional which is incomplete
vs. tracing anyway because e.g. ack_APIC_irq() calls out into
instrumentable code.
Avoid the overhead of irq time accounting and introduce variants of
__irq_enter/exit() so instrumentation observes the correct preempt count
state.
Spare the switch to the interrupt stack as the IPI is not going to use only
a minimal amount of stack space.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/x86/entry/entry_64.S | 4 ---
arch/x86/include/asm/entry_arch.h | 3 --
arch/x86/include/asm/hw_irq.h | 3 --
arch/x86/include/asm/idtentry.h | 1
arch/x86/include/asm/trace/common.h | 4 ---
arch/x86/include/asm/trace/irq_vectors.h | 17 ------------
arch/x86/kernel/idt.c | 2 -
arch/x86/kernel/smp.c | 41 +++++++++++++++++++++----------
arch/x86/kernel/tracepoint.c | 17 ------------
include/linux/hardirq.h | 18 +++++++++++++
10 files changed, 49 insertions(+), 61 deletions(-)
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -957,10 +957,6 @@ apicinterrupt3 \num \sym \do_sym
POP_SECTION_IRQENTRY
.endm
-#ifdef CONFIG_SMP
-apicinterrupt RESCHEDULE_VECTOR reschedule_interrupt smp_reschedule_interrupt
-#endif
-
/*
* Reload gs selector with exception handling
* edi: new selector
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -10,6 +10,3 @@
* is no hardware IRQ pin equivalent for them, they are triggered
* through the ICC by us (IPIs)
*/
-#ifdef CONFIG_SMP
-BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR)
-#endif
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -28,9 +28,6 @@
#include <asm/irq.h>
#include <asm/sections.h>
-/* Interrupt handlers registered during init_IRQ */
-extern asmlinkage void reschedule_interrupt(void);
-
#ifdef CONFIG_X86_LOCAL_APIC
struct irq_data;
struct pci_dev;
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -574,6 +574,7 @@ DECLARE_IDTENTRY_SYSVEC(X86_PLATFORM_IPI
#endif
#ifdef CONFIG_SMP
+DECLARE_IDTENTRY(RESCHEDULE_VECTOR, sysvec_reschedule_ipi);
DECLARE_IDTENTRY_SYSVEC(IRQ_MOVE_CLEANUP_VECTOR, sysvec_irq_move_cleanup);
DECLARE_IDTENTRY_SYSVEC(REBOOT_VECTOR, sysvec_reboot);
DECLARE_IDTENTRY_SYSVEC(CALL_FUNCTION_SINGLE_VECTOR, sysvec_call_function_single);
--- a/arch/x86/include/asm/trace/common.h
+++ b/arch/x86/include/asm/trace/common.h
@@ -5,12 +5,8 @@
DECLARE_STATIC_KEY_FALSE(trace_pagefault_key);
#define trace_pagefault_enabled() \
static_branch_unlikely(&trace_pagefault_key)
-DECLARE_STATIC_KEY_FALSE(trace_resched_ipi_key);
-#define trace_resched_ipi_enabled() \
- static_branch_unlikely(&trace_resched_ipi_key)
#else
static inline bool trace_pagefault_enabled(void) { return false; }
-static inline bool trace_resched_ipi_enabled(void) { return false; }
#endif
#endif
--- a/arch/x86/include/asm/trace/irq_vectors.h
+++ b/arch/x86/include/asm/trace/irq_vectors.h
@@ -10,9 +10,6 @@
#ifdef CONFIG_X86_LOCAL_APIC
-extern int trace_resched_ipi_reg(void);
-extern void trace_resched_ipi_unreg(void);
-
DECLARE_EVENT_CLASS(x86_irq_vector,
TP_PROTO(int vector),
@@ -37,18 +34,6 @@ DEFINE_EVENT_FN(x86_irq_vector, name##_e
TP_PROTO(int vector), \
TP_ARGS(vector), NULL, NULL);
-#define DEFINE_RESCHED_IPI_EVENT(name) \
-DEFINE_EVENT_FN(x86_irq_vector, name##_entry, \
- TP_PROTO(int vector), \
- TP_ARGS(vector), \
- trace_resched_ipi_reg, \
- trace_resched_ipi_unreg); \
-DEFINE_EVENT_FN(x86_irq_vector, name##_exit, \
- TP_PROTO(int vector), \
- TP_ARGS(vector), \
- trace_resched_ipi_reg, \
- trace_resched_ipi_unreg);
-
/*
* local_timer - called when entering/exiting a local timer interrupt
* vector handler
@@ -99,7 +84,7 @@ TRACE_EVENT_PERF_PERM(irq_work_exit, is_
/*
* reschedule - called when entering/exiting a reschedule vector handler
*/
-DEFINE_RESCHED_IPI_EVENT(reschedule);
+DEFINE_IRQ_VECTOR_EVENT(reschedule);
/*
* call_function - called when entering/exiting a call function interrupt
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -109,7 +109,7 @@ static const __initconst struct idt_data
*/
static const __initconst struct idt_data apic_idts[] = {
#ifdef CONFIG_SMP
- INTG(RESCHEDULE_VECTOR, reschedule_interrupt),
+ INTG(RESCHEDULE_VECTOR, asm_sysvec_reschedule_ipi),
INTG(CALL_FUNCTION_VECTOR, asm_sysvec_call_function),
INTG(CALL_FUNCTION_SINGLE_VECTOR, asm_sysvec_call_function_single),
INTG(IRQ_MOVE_CLEANUP_VECTOR, asm_sysvec_irq_move_cleanup),
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -222,24 +222,39 @@ static void native_stop_other_cpus(int w
* Reschedule call back. KVM uses this interrupt to force a cpu out of
* guest mode
*/
-__visible void __irq_entry smp_reschedule_interrupt(struct pt_regs *regs)
+DEFINE_IDTENTRY_RAW(sysvec_reschedule_ipi)
{
+ /*
+ * User mode entry goes through the regular entry_from_user_mode()
+ * path in both cases otherwise scheduling on return could be
+ * invoked with the wrong NOHZ_FULL state.
+ *
+ * Kernel entry does not require the full sysvec treatment just for
+ * folding the preempt count.
+ *
+ * Even if tracing is enabled the only requirement is that RCU is
+ * watching and preempt_count has the hardirq bit on.
+ *
+ * The NOHZ tick state does not have to be adjusted. If the tick is
+ * not running then the CPU is in idle and the idle exit will
+ * restore the tick. Softinterrupts are not raised here, so handling
+ * them on return is not required either.
+ */
+ bool rcu_exit = idtentry_enter_cond_rcu(regs);
+
+ instr_begin();
+ __irq_enter_raw();
+ trace_reschedule_entry(RESCHEDULE_VECTOR);
ack_APIC_irq();
inc_irq_stat(irq_resched_count);
- if (trace_resched_ipi_enabled()) {
- /*
- * scheduler_ipi() might call irq_enter() as well, but
- * nested calls are fine.
- */
- irq_enter();
- trace_reschedule_entry(RESCHEDULE_VECTOR);
- scheduler_ipi();
- trace_reschedule_exit(RESCHEDULE_VECTOR);
- irq_exit();
- return;
- }
scheduler_ipi();
+
+ trace_reschedule_entry(RESCHEDULE_VECTOR);
+ __irq_exit_raw();
+ instr_end();
+
+ idtentry_exit_cond_rcu(regs, rcu_exit);
}
DEFINE_IDTENTRY_SYSVEC(sysvec_call_function)
--- a/arch/x86/kernel/tracepoint.c
+++ b/arch/x86/kernel/tracepoint.c
@@ -25,20 +25,3 @@ void trace_pagefault_unreg(void)
{
static_branch_dec(&trace_pagefault_key);
}
-
-#ifdef CONFIG_SMP
-
-DEFINE_STATIC_KEY_FALSE(trace_resched_ipi_key);
-
-int trace_resched_ipi_reg(void)
-{
- static_branch_inc(&trace_resched_ipi_key);
- return 0;
-}
-
-void trace_resched_ipi_unreg(void)
-{
- static_branch_dec(&trace_resched_ipi_key);
-}
-
-#endif
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -41,6 +41,15 @@ extern void rcu_nmi_exit(void);
} while (0)
/*
+ * Like __irq_enter() without time accounting
+ */
+#define __irq_enter_raw() \
+ do { \
+ preempt_count_add(HARDIRQ_OFFSET); \
+ lockdep_hardirq_enter(); \
+ } while (0)
+
+/*
* Enter irq context (on NO_HZ, update jiffies):
*/
void irq_enter(void);
@@ -59,6 +68,15 @@ void irq_enter_rcu(void);
preempt_count_sub(HARDIRQ_OFFSET); \
} while (0)
+/*
+ * Like __irq_exit() without time accounting
+ */
+#define __irq_exit_raw() \
+ do { \
+ lockdep_hardirq_exit(); \
+ preempt_count_sub(HARDIRQ_OFFSET); \
+ } while (0)
+
/*
* Exit irq context and process softirqs if needed:
*/
^ permalink raw reply [flat|nested] 49+ messages in thread
* [patch V4 part 5 25/31] x86/entry: Remove the apic/BUILD interrupt leftovers
2020-05-05 13:53 [patch V4 part 5 00/31] x86/entry: Entry/exception code rework, Thomas Gleixner
` (23 preceding siblings ...)
2020-05-05 13:54 ` [patch V4 part 5 24/31] x86/entry: Convert reschedule interrupt to IDTENTRY_RAW Thomas Gleixner
@ 2020-05-05 13:54 ` Thomas Gleixner
2020-05-05 13:54 ` [patch V4 part 5 26/31] x86/entry/64: Remove IRQ stack switching ASM Thomas Gleixner
` (5 subsequent siblings)
30 siblings, 0 replies; 49+ messages in thread
From: Thomas Gleixner @ 2020-05-05 13:54 UTC (permalink / raw)
To: LKML
Cc: x86, Paul E. McKenney, Andy Lutomirski, Alexandre Chartre,
Frederic Weisbecker, Paolo Bonzini, Sean Christopherson,
Masami Hiramatsu, Petr Mladek, Steven Rostedt, Joel Fernandes,
Boris Ostrovsky, Juergen Gross, Brian Gerst, Mathieu Desnoyers,
Josh Poimboeuf, Will Deacon
Remove all the code which was there to emit the system vector stubs. All
users are gone.
Move the now unused GET_CR2_INTO macro muck to head_64.S where the last
user is. Fixup the eye hurting comment there while at it.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/x86/entry/calling.h | 20 -----
arch/x86/entry/entry_32.S | 18 ----
arch/x86/entry/entry_64.S | 144 --------------------------------------
arch/x86/include/asm/entry_arch.h | 12 ---
arch/x86/kernel/head_64.S | 7 +
5 files changed, 4 insertions(+), 197 deletions(-)
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -349,23 +349,3 @@ For 32-bit we have the following convent
call stackleak_erase
#endif
.endm
-
-/*
- * This does 'call enter_from_user_mode' unless we can avoid it based on
- * kernel config or using the static jump infrastructure.
- */
-.macro CALL_enter_from_user_mode
-#ifdef CONFIG_CONTEXT_TRACKING
-#ifdef CONFIG_JUMP_LABEL
- STATIC_JUMP_IF_FALSE .Lafter_call_\@, context_tracking_key, def=0
-#endif
- call enter_from_user_mode
-.Lafter_call_\@:
-#endif
-.endm
-
-#ifdef CONFIG_PARAVIRT_XXL
-#define GET_CR2_INTO(reg) GET_CR2_INTO_AX ; _ASM_MOV %_ASM_AX, reg
-#else
-#define GET_CR2_INTO(reg) _ASM_MOV %cr2, reg
-#endif
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -1233,24 +1233,6 @@ SYM_FUNC_END(entry_INT80_32)
#endif
.endm
-#define BUILD_INTERRUPT3(name, nr, fn) \
-SYM_FUNC_START(name) \
- ASM_CLAC; \
- pushl $~(nr); \
- SAVE_ALL switch_stacks=1; \
- ENCODE_FRAME_POINTER; \
- TRACE_IRQS_OFF \
- movl %esp, %eax; \
- call fn; \
- jmp ret_from_intr; \
-SYM_FUNC_END(name)
-
-#define BUILD_INTERRUPT(name, nr) \
- BUILD_INTERRUPT3(name, nr, smp_##name); \
-
-/* The include is where all of the SMP etc. interrupts come from */
-#include <asm/entry_arch.h>
-
#ifdef CONFIG_PARAVIRT
SYM_CODE_START(native_iret)
iret
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -659,108 +659,7 @@ SYM_CODE_END(\asmsym)
*/
#include <asm/idtentry.h>
-/*
- * Interrupt entry helper function.
- *
- * Entry runs with interrupts off. Stack layout at entry:
- * +----------------------------------------------------+
- * | regs->ss |
- * | regs->rsp |
- * | regs->eflags |
- * | regs->cs |
- * | regs->ip |
- * +----------------------------------------------------+
- * | regs->orig_ax = ~(interrupt number) |
- * +----------------------------------------------------+
- * | return address |
- * +----------------------------------------------------+
- */
-SYM_CODE_START(interrupt_entry)
- UNWIND_HINT_FUNC
- ASM_CLAC
- cld
-
- testb $3, CS-ORIG_RAX+8(%rsp)
- jz 1f
- SWAPGS
- FENCE_SWAPGS_USER_ENTRY
- /*
- * Switch to the thread stack. The IRET frame and orig_ax are
- * on the stack, as well as the return address. RDI..R12 are
- * not (yet) on the stack and space has not (yet) been
- * allocated for them.
- */
- pushq %rdi
-
- /* Need to switch before accessing the thread stack. */
- SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
- movq %rsp, %rdi
- movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
-
- /*
- * We have RDI, return address, and orig_ax on the stack on
- * top of the IRET frame. That means offset=24
- */
- UNWIND_HINT_IRET_REGS base=%rdi offset=24
-
- pushq 7*8(%rdi) /* regs->ss */
- pushq 6*8(%rdi) /* regs->rsp */
- pushq 5*8(%rdi) /* regs->eflags */
- pushq 4*8(%rdi) /* regs->cs */
- pushq 3*8(%rdi) /* regs->ip */
- pushq 2*8(%rdi) /* regs->orig_ax */
- pushq 8(%rdi) /* return address */
- UNWIND_HINT_FUNC
-
- movq (%rdi), %rdi
- jmp 2f
-1:
- FENCE_SWAPGS_KERNEL_ENTRY
-2:
- PUSH_AND_CLEAR_REGS save_ret=1
- ENCODE_FRAME_POINTER 8
-
- testb $3, CS+8(%rsp)
- jz 1f
-
- /*
- * IRQ from user mode.
- *
- * We need to tell lockdep that IRQs are off. We can't do this until
- * we fix gsbase, and we should do it before enter_from_user_mode
- * (which can take locks). Since TRACE_IRQS_OFF is idempotent,
- * the simplest way to handle it is to just call it twice if
- * we enter from user mode. There's no reason to optimize this since
- * TRACE_IRQS_OFF is a no-op if lockdep is off.
- */
- TRACE_IRQS_OFF
-
- CALL_enter_from_user_mode
-
-1:
- ENTER_IRQ_STACK old_rsp=%rdi save_ret=1
- /* We entered an interrupt context - irqs are off: */
- TRACE_IRQS_OFF
-
- ret
-SYM_CODE_END(interrupt_entry)
-_ASM_NOKPROBE(interrupt_entry)
-
SYM_CODE_START_LOCAL(common_interrupt_return)
-ret_from_intr:
- DISABLE_INTERRUPTS(CLBR_ANY)
- TRACE_IRQS_OFF
-
- LEAVE_IRQ_STACK
-
- testb $3, CS(%rsp)
- jz retint_kernel
-
- /* Interrupt came from user space */
-.Lretint_user:
- mov %rsp,%rdi
- call prepare_exit_to_usermode
-
SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL)
#ifdef CONFIG_DEBUG_ENTRY
/* Assert that pt_regs indicates user mode. */
@@ -802,23 +701,6 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_
INTERRUPT_RETURN
-/* Returning to kernel space */
-retint_kernel:
-#ifdef CONFIG_PREEMPTION
- /* Interrupts are off */
- /* Check if we need preemption */
- btl $9, EFLAGS(%rsp) /* were interrupts off? */
- jnc 1f
- cmpl $0, PER_CPU_VAR(__preempt_count)
- jnz 1f
- call preempt_schedule_irq
-1:
-#endif
- /*
- * The iretq could re-enable interrupts:
- */
- TRACE_IRQS_IRETQ
-
SYM_INNER_LABEL(restore_regs_and_return_to_kernel, SYM_L_GLOBAL)
#ifdef CONFIG_DEBUG_ENTRY
/* Assert that pt_regs indicates kernel mode. */
@@ -932,32 +814,6 @@ SYM_CODE_END(common_interrupt_return)
_ASM_NOKPROBE(common_interrupt_return)
/*
- * APIC interrupts.
- */
-.macro apicinterrupt3 num sym do_sym
-SYM_CODE_START(\sym)
- UNWIND_HINT_IRET_REGS
- pushq $~(\num)
-.Lcommon_\sym:
- call interrupt_entry
- UNWIND_HINT_REGS indirect=1
- call \do_sym /* rdi points to pt_regs */
- jmp ret_from_intr
-SYM_CODE_END(\sym)
-_ASM_NOKPROBE(\sym)
-.endm
-
-/* Make sure APIC interrupt handlers end up in the irqentry section: */
-#define PUSH_SECTION_IRQENTRY .pushsection .irqentry.text, "ax"
-#define POP_SECTION_IRQENTRY .popsection
-
-.macro apicinterrupt num sym do_sym
-PUSH_SECTION_IRQENTRY
-apicinterrupt3 \num \sym \do_sym
-POP_SECTION_IRQENTRY
-.endm
-
-/*
* Reload gs selector with exception handling
* edi: new selector
*/
--- a/arch/x86/include/asm/entry_arch.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * This file is designed to contain the BUILD_INTERRUPT specifications for
- * all of the extra named interrupt vectors used by the architecture.
- * Usually this is the Inter Process Interrupts (IPIs)
- */
-
-/*
- * The following vectors are part of the Linux architecture, there
- * is no hardware IRQ pin equivalent for them, they are triggered
- * through the ICC by us (IPIs)
- */
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -29,15 +29,16 @@
#ifdef CONFIG_PARAVIRT_XXL
#include <asm/asm-offsets.h>
#include <asm/paravirt.h>
+#define GET_CR2_INTO(reg) GET_CR2_INTO_AX ; _ASM_MOV %_ASM_AX, reg
#else
#define INTERRUPT_RETURN iretq
+#define GET_CR2_INTO(reg) _ASM_MOV %cr2, reg
#endif
-/* we are not able to switch in one step to the final KERNEL ADDRESS SPACE
+/*
+ * We are not able to switch in one step to the final KERNEL ADDRESS SPACE
* because we need identity-mapped pages.
- *
*/
-
#define l4_index(x) (((x) >> 39) & 511)
#define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
^ permalink raw reply [flat|nested] 49+ messages in thread
* [patch V4 part 5 26/31] x86/entry/64: Remove IRQ stack switching ASM
2020-05-05 13:53 [patch V4 part 5 00/31] x86/entry: Entry/exception code rework, Thomas Gleixner
` (24 preceding siblings ...)
2020-05-05 13:54 ` [patch V4 part 5 25/31] x86/entry: Remove the apic/BUILD interrupt leftovers Thomas Gleixner
@ 2020-05-05 13:54 ` Thomas Gleixner
2020-05-05 13:54 ` [patch V4 part 5 27/31] x86/entry: Make enter_from_user_mode() static Thomas Gleixner
` (4 subsequent siblings)
30 siblings, 0 replies; 49+ messages in thread
From: Thomas Gleixner @ 2020-05-05 13:54 UTC (permalink / raw)
To: LKML
Cc: x86, Paul E. McKenney, Andy Lutomirski, Alexandre Chartre,
Frederic Weisbecker, Paolo Bonzini, Sean Christopherson,
Masami Hiramatsu, Petr Mladek, Steven Rostedt, Joel Fernandes,
Boris Ostrovsky, Juergen Gross, Brian Gerst, Mathieu Desnoyers,
Josh Poimboeuf, Will Deacon
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/x86/entry/entry_64.S | 96 ----------------------------------------------
1 file changed, 96 deletions(-)
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -371,102 +371,6 @@ SYM_CODE_END(ret_from_fork)
#endif
.endm
-/*
- * Enters the IRQ stack if we're not already using it. NMI-safe. Clobbers
- * flags and puts old RSP into old_rsp, and leaves all other GPRs alone.
- * Requires kernel GSBASE.
- *
- * The invariant is that, if irq_count != -1, then the IRQ stack is in use.
- */
-.macro ENTER_IRQ_STACK regs=1 old_rsp save_ret=0
- DEBUG_ENTRY_ASSERT_IRQS_OFF
-
- .if \save_ret
- /*
- * If save_ret is set, the original stack contains one additional
- * entry -- the return address. Therefore, move the address one
- * entry below %rsp to \old_rsp.
- */
- leaq 8(%rsp), \old_rsp
- .else
- movq %rsp, \old_rsp
- .endif
-
- .if \regs
- UNWIND_HINT_REGS base=\old_rsp
- .endif
-
- incl PER_CPU_VAR(irq_count)
- jnz .Lirq_stack_push_old_rsp_\@
-
- /*
- * Right now, if we just incremented irq_count to zero, we've
- * claimed the IRQ stack but we haven't switched to it yet.
- *
- * If anything is added that can interrupt us here without using IST,
- * it must be *extremely* careful to limit its stack usage. This
- * could include kprobes and a hypothetical future IST-less #DB
- * handler.
- *
- * The OOPS unwinder relies on the word at the top of the IRQ
- * stack linking back to the previous RSP for the entire time we're
- * on the IRQ stack. For this to work reliably, we need to write
- * it before we actually move ourselves to the IRQ stack.
- */
-
- movq \old_rsp, PER_CPU_VAR(irq_stack_backing_store + IRQ_STACK_SIZE - 8)
- movq PER_CPU_VAR(hardirq_stack_ptr), %rsp
-
-#ifdef CONFIG_DEBUG_ENTRY
- /*
- * If the first movq above becomes wrong due to IRQ stack layout
- * changes, the only way we'll notice is if we try to unwind right
- * here. Assert that we set up the stack right to catch this type
- * of bug quickly.
- */
- cmpq -8(%rsp), \old_rsp
- je .Lirq_stack_okay\@
- ud2
- .Lirq_stack_okay\@:
-#endif
-
-.Lirq_stack_push_old_rsp_\@:
- pushq \old_rsp
-
- .if \regs
- UNWIND_HINT_REGS indirect=1
- .endif
-
- .if \save_ret
- /*
- * Push the return address to the stack. This return address can
- * be found at the "real" original RSP, which was offset by 8 at
- * the beginning of this macro.
- */
- pushq -8(\old_rsp)
- .endif
-.endm
-
-/*
- * Undoes ENTER_IRQ_STACK.
- */
-.macro LEAVE_IRQ_STACK regs=1
- DEBUG_ENTRY_ASSERT_IRQS_OFF
- /* We need to be off the IRQ stack before decrementing irq_count. */
- popq %rsp
-
- .if \regs
- UNWIND_HINT_REGS
- .endif
-
- /*
- * As in ENTER_IRQ_STACK, irq_count == 0, we are still claiming
- * the irq stack but we're not on it.
- */
-
- decl PER_CPU_VAR(irq_count)
-.endm
-
/**
* idtentry_body - Macro to emit code calling the C function
* @cfunc: C function to be called
^ permalink raw reply [flat|nested] 49+ messages in thread
* [patch V4 part 5 27/31] x86/entry: Make enter_from_user_mode() static
2020-05-05 13:53 [patch V4 part 5 00/31] x86/entry: Entry/exception code rework, Thomas Gleixner
` (25 preceding siblings ...)
2020-05-05 13:54 ` [patch V4 part 5 26/31] x86/entry/64: Remove IRQ stack switching ASM Thomas Gleixner
@ 2020-05-05 13:54 ` Thomas Gleixner
2020-05-05 13:54 ` [patch V4 part 5 28/31] x86/entry/32: Remove redundant irq disable code Thomas Gleixner
` (3 subsequent siblings)
30 siblings, 0 replies; 49+ messages in thread
From: Thomas Gleixner @ 2020-05-05 13:54 UTC (permalink / raw)
To: LKML
Cc: x86, Paul E. McKenney, Andy Lutomirski, Alexandre Chartre,
Frederic Weisbecker, Paolo Bonzini, Sean Christopherson,
Masami Hiramatsu, Petr Mladek, Steven Rostedt, Joel Fernandes,
Boris Ostrovsky, Juergen Gross, Brian Gerst, Mathieu Desnoyers,
Josh Poimboeuf, Will Deacon
The ASM users are gone. All callers are local.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/x86/entry/common.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -54,7 +54,7 @@
* 2) Invoke context tracking if enabled to reactivate RCU
* 3) Trace interrupts off state
*/
-__visible noinstr void enter_from_user_mode(void)
+static noinstr void enter_from_user_mode(void)
{
enum ctx_state state = ct_state();
^ permalink raw reply [flat|nested] 49+ messages in thread
* [patch V4 part 5 28/31] x86/entry/32: Remove redundant irq disable code
2020-05-05 13:53 [patch V4 part 5 00/31] x86/entry: Entry/exception code rework, Thomas Gleixner
` (26 preceding siblings ...)
2020-05-05 13:54 ` [patch V4 part 5 27/31] x86/entry: Make enter_from_user_mode() static Thomas Gleixner
@ 2020-05-05 13:54 ` Thomas Gleixner
2020-05-05 13:54 ` [patch V4 part 5 29/31] x86/entry/64: Remove TRACE_IRQS_*_DEBUG Thomas Gleixner
` (2 subsequent siblings)
30 siblings, 0 replies; 49+ messages in thread
From: Thomas Gleixner @ 2020-05-05 13:54 UTC (permalink / raw)
To: LKML
Cc: x86, Paul E. McKenney, Andy Lutomirski, Alexandre Chartre,
Frederic Weisbecker, Paolo Bonzini, Sean Christopherson,
Masami Hiramatsu, Petr Mladek, Steven Rostedt, Joel Fernandes,
Boris Ostrovsky, Juergen Gross, Brian Gerst, Mathieu Desnoyers,
Josh Poimboeuf, Will Deacon
From: Thomas Gleixner <tglx@linutronix.de>
All exceptions/interrupts return with interrupts disabled now. No point in
doing this in ASM again.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/x86/entry/entry_32.S | 76 ----------------------------------------------
1 file changed, 76 deletions(-)
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -51,34 +51,6 @@
.section .entry.text, "ax"
-/*
- * We use macros for low-level operations which need to be overridden
- * for paravirtualization. The following will never clobber any registers:
- * INTERRUPT_RETURN (aka. "iret")
- * GET_CR0_INTO_EAX (aka. "movl %cr0, %eax")
- * ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit").
- *
- * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must
- * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY).
- * Allowing a register to be clobbered can shrink the paravirt replacement
- * enough to patch inline, increasing performance.
- */
-
-#ifdef CONFIG_PREEMPTION
-# define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
-#else
-# define preempt_stop(clobbers)
-#endif
-
-.macro TRACE_IRQS_IRET
-#ifdef CONFIG_TRACE_IRQFLAGS
- testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off?
- jz 1f
- TRACE_IRQS_ON
-1:
-#endif
-.endm
-
#define PTI_SWITCH_MASK (1 << PAGE_SHIFT)
/*
@@ -881,38 +853,6 @@ SYM_CODE_START(ret_from_fork)
SYM_CODE_END(ret_from_fork)
.popsection
-/*
- * Return to user mode is not as complex as all this looks,
- * but we want the default path for a system call return to
- * go as quickly as possible which is why some of this is
- * less clear than it otherwise should be.
- */
-
- # userspace resumption stub bypassing syscall exit tracing
-SYM_CODE_START_LOCAL(ret_from_exception)
- preempt_stop(CLBR_ANY)
-ret_from_intr:
-#ifdef CONFIG_VM86
- movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS
- movb PT_CS(%esp), %al
- andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax
-#else
- /*
- * We can be coming here from child spawned by kernel_thread().
- */
- movl PT_CS(%esp), %eax
- andl $SEGMENT_RPL_MASK, %eax
-#endif
- cmpl $USER_RPL, %eax
- jb restore_all_kernel # not returning to v8086 or userspace
-
- DISABLE_INTERRUPTS(CLBR_ANY)
- TRACE_IRQS_OFF
- movl %esp, %eax
- call prepare_exit_to_usermode
- jmp restore_all_switch_stack
-SYM_CODE_END(ret_from_exception)
-
SYM_ENTRY(__begin_SYSENTER_singlestep_region, SYM_L_GLOBAL, SYM_A_NONE)
/*
* All code from here through __end_SYSENTER_singlestep_region is subject
@@ -1147,22 +1087,6 @@ SYM_FUNC_START(entry_INT80_32)
*/
INTERRUPT_RETURN
-restore_all_kernel:
-#ifdef CONFIG_PREEMPTION
- DISABLE_INTERRUPTS(CLBR_ANY)
- cmpl $0, PER_CPU_VAR(__preempt_count)
- jnz .Lno_preempt
- testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off (exception path) ?
- jz .Lno_preempt
- call preempt_schedule_irq
-.Lno_preempt:
-#endif
- TRACE_IRQS_IRET
- PARANOID_EXIT_TO_KERNEL_MODE
- BUG_IF_WRONG_CR3
- RESTORE_REGS 4
- jmp .Lirq_return
-
.section .fixup, "ax"
SYM_CODE_START(asm_exc_iret_error)
pushl $0 # no error code
^ permalink raw reply [flat|nested] 49+ messages in thread
* [patch V4 part 5 29/31] x86/entry/64: Remove TRACE_IRQS_*_DEBUG
2020-05-05 13:53 [patch V4 part 5 00/31] x86/entry: Entry/exception code rework, Thomas Gleixner
` (27 preceding siblings ...)
2020-05-05 13:54 ` [patch V4 part 5 28/31] x86/entry/32: Remove redundant irq disable code Thomas Gleixner
@ 2020-05-05 13:54 ` Thomas Gleixner
2020-05-05 13:54 ` [patch V4 part 5 30/31] x86/entry: Move paranoid irq tracing out of ASM code Thomas Gleixner
2020-05-05 13:54 ` [patch V4 part 5 31/31] x86/entry: Remove the TRACE_IRQS cruft Thomas Gleixner
30 siblings, 0 replies; 49+ messages in thread
From: Thomas Gleixner @ 2020-05-05 13:54 UTC (permalink / raw)
To: LKML
Cc: x86, Paul E. McKenney, Andy Lutomirski, Alexandre Chartre,
Frederic Weisbecker, Paolo Bonzini, Sean Christopherson,
Masami Hiramatsu, Petr Mladek, Steven Rostedt, Joel Fernandes,
Boris Ostrovsky, Juergen Gross, Brian Gerst, Mathieu Desnoyers,
Josh Poimboeuf, Will Deacon, Peter Zijlstra (Intel)
From: Peter Zijlstra <peterz@infradead.org>
Since INT3/#BP no longer runs on an IST, this workaround is no longer
required.
Tested by running lockdep+ftrace as described in the initial commit:
5963e317b1e9 ("ftrace/x86: Do not change stacks in DEBUG when calling lockdep")
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
arch/x86/entry/entry_64.S | 48 ++--------------------------------------------
1 file changed, 3 insertions(+), 45 deletions(-)
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -68,44 +68,6 @@ SYM_CODE_END(native_usergs_sysret64)
.endm
/*
- * When dynamic function tracer is enabled it will add a breakpoint
- * to all locations that it is about to modify, sync CPUs, update
- * all the code, sync CPUs, then remove the breakpoints. In this time
- * if lockdep is enabled, it might jump back into the debug handler
- * outside the updating of the IST protection. (TRACE_IRQS_ON/OFF).
- *
- * We need to change the IDT table before calling TRACE_IRQS_ON/OFF to
- * make sure the stack pointer does not get reset back to the top
- * of the debug stack, and instead just reuses the current stack.
- */
-#if defined(CONFIG_DYNAMIC_FTRACE) && defined(CONFIG_TRACE_IRQFLAGS)
-
-.macro TRACE_IRQS_OFF_DEBUG
- call debug_stack_set_zero
- TRACE_IRQS_OFF
- call debug_stack_reset
-.endm
-
-.macro TRACE_IRQS_ON_DEBUG
- call debug_stack_set_zero
- TRACE_IRQS_ON
- call debug_stack_reset
-.endm
-
-.macro TRACE_IRQS_IRETQ_DEBUG
- btl $9, EFLAGS(%rsp) /* interrupts off? */
- jnc 1f
- TRACE_IRQS_ON_DEBUG
-1:
-.endm
-
-#else
-# define TRACE_IRQS_OFF_DEBUG TRACE_IRQS_OFF
-# define TRACE_IRQS_ON_DEBUG TRACE_IRQS_ON
-# define TRACE_IRQS_IRETQ_DEBUG TRACE_IRQS_IRETQ
-#endif
-
-/*
* 64-bit SYSCALL instruction entry. Up to 6 arguments in registers.
*
* This is the only entry point used for 64-bit system calls. The
@@ -501,11 +463,7 @@ SYM_CODE_START(\asmsym)
UNWIND_HINT_REGS
- .if \vector == X86_TRAP_DB
- TRACE_IRQS_OFF_DEBUG
- .else
- TRACE_IRQS_OFF
- .endif
+ TRACE_IRQS_OFF
movq %rsp, %rdi /* pt_regs pointer */
@@ -885,7 +843,7 @@ SYM_CODE_END(paranoid_entry)
SYM_CODE_START_LOCAL(paranoid_exit)
UNWIND_HINT_REGS
DISABLE_INTERRUPTS(CLBR_ANY)
- TRACE_IRQS_OFF_DEBUG
+ TRACE_IRQS_OFF
testl %ebx, %ebx /* swapgs needed? */
jnz .Lparanoid_exit_no_swapgs
TRACE_IRQS_IRETQ
@@ -894,7 +852,7 @@ SYM_CODE_START_LOCAL(paranoid_exit)
SWAPGS_UNSAFE_STACK
jmp restore_regs_and_return_to_kernel
.Lparanoid_exit_no_swapgs:
- TRACE_IRQS_IRETQ_DEBUG
+ TRACE_IRQS_IRETQ
/* Always restore stashed CR3 value (see paranoid_entry) */
RESTORE_CR3 scratch_reg=%rbx save_reg=%r14
jmp restore_regs_and_return_to_kernel
^ permalink raw reply [flat|nested] 49+ messages in thread
* [patch V4 part 5 30/31] x86/entry: Move paranoid irq tracing out of ASM code
2020-05-05 13:53 [patch V4 part 5 00/31] x86/entry: Entry/exception code rework, Thomas Gleixner
` (28 preceding siblings ...)
2020-05-05 13:54 ` [patch V4 part 5 29/31] x86/entry/64: Remove TRACE_IRQS_*_DEBUG Thomas Gleixner
@ 2020-05-05 13:54 ` Thomas Gleixner
2020-05-05 13:54 ` [patch V4 part 5 31/31] x86/entry: Remove the TRACE_IRQS cruft Thomas Gleixner
30 siblings, 0 replies; 49+ messages in thread
From: Thomas Gleixner @ 2020-05-05 13:54 UTC (permalink / raw)
To: LKML
Cc: x86, Paul E. McKenney, Andy Lutomirski, Alexandre Chartre,
Frederic Weisbecker, Paolo Bonzini, Sean Christopherson,
Masami Hiramatsu, Petr Mladek, Steven Rostedt, Joel Fernandes,
Boris Ostrovsky, Juergen Gross, Brian Gerst, Mathieu Desnoyers,
Josh Poimboeuf, Will Deacon
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/x86/entry/entry_64.S | 13 -------------
arch/x86/kernel/nmi.c | 11 +++++++----
2 files changed, 7 insertions(+), 17 deletions(-)
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -16,7 +16,6 @@
*
* Some macro usage:
* - SYM_FUNC_START/END:Define functions in the symbol table.
- * - TRACE_IRQ_*: Trace hardirq state for lock debugging.
* - idtentry: Define exception entry points.
*/
#include <linux/linkage.h>
@@ -107,11 +106,6 @@ SYM_CODE_END(native_usergs_sysret64)
SYM_CODE_START(entry_SYSCALL_64)
UNWIND_HINT_EMPTY
- /*
- * Interrupts are off on entry.
- * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
- * it is too small to ever cause noticeable irq latency.
- */
swapgs
/* tss.sp2 is scratch space. */
@@ -463,8 +457,6 @@ SYM_CODE_START(\asmsym)
UNWIND_HINT_REGS
- TRACE_IRQS_OFF
-
movq %rsp, %rdi /* pt_regs pointer */
.if \vector == X86_TRAP_DB
@@ -842,17 +834,13 @@ SYM_CODE_END(paranoid_entry)
*/
SYM_CODE_START_LOCAL(paranoid_exit)
UNWIND_HINT_REGS
- DISABLE_INTERRUPTS(CLBR_ANY)
- TRACE_IRQS_OFF
testl %ebx, %ebx /* swapgs needed? */
jnz .Lparanoid_exit_no_swapgs
- TRACE_IRQS_IRETQ
/* Always restore stashed CR3 value (see paranoid_entry) */
RESTORE_CR3 scratch_reg=%rbx save_reg=%r14
SWAPGS_UNSAFE_STACK
jmp restore_regs_and_return_to_kernel
.Lparanoid_exit_no_swapgs:
- TRACE_IRQS_IRETQ
/* Always restore stashed CR3 value (see paranoid_entry) */
RESTORE_CR3 scratch_reg=%rbx save_reg=%r14
jmp restore_regs_and_return_to_kernel
@@ -1254,7 +1242,6 @@ SYM_CODE_START(asm_exc_nmi)
call paranoid_entry
UNWIND_HINT_REGS
- /* paranoidentry exc_nmi(), 0; without TRACE_IRQS_OFF */
movq %rsp, %rdi
movq $-1, %rsi
call exc_nmi
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -334,6 +334,8 @@ static noinstr void default_do_nmi(struc
__this_cpu_write(last_nmi_rip, regs->ip);
instr_begin();
+ trace_hardirqs_off_prepare();
+
handled = nmi_handle(NMI_LOCAL, regs);
__this_cpu_add(nmi_stats.normal, handled);
if (handled) {
@@ -347,8 +349,7 @@ static noinstr void default_do_nmi(struc
*/
if (handled > 1)
__this_cpu_write(swallow_nmi, true);
- instr_end();
- return;
+ goto out;
}
/*
@@ -380,8 +381,7 @@ static noinstr void default_do_nmi(struc
#endif
__this_cpu_add(nmi_stats.external, 1);
raw_spin_unlock(&nmi_reason_lock);
- instr_end();
- return;
+ goto out;
}
raw_spin_unlock(&nmi_reason_lock);
@@ -419,6 +419,9 @@ static noinstr void default_do_nmi(struc
__this_cpu_add(nmi_stats.swallow, 1);
else
unknown_nmi_error(reason, regs);
+out:
+ if (regs->flags & X86_EFLAGS_IF)
+ trace_hardirqs_on_prepare();
instr_end();
}
^ permalink raw reply [flat|nested] 49+ messages in thread
* [patch V4 part 5 31/31] x86/entry: Remove the TRACE_IRQS cruft
2020-05-05 13:53 [patch V4 part 5 00/31] x86/entry: Entry/exception code rework, Thomas Gleixner
` (29 preceding siblings ...)
2020-05-05 13:54 ` [patch V4 part 5 30/31] x86/entry: Move paranoid irq tracing out of ASM code Thomas Gleixner
@ 2020-05-05 13:54 ` Thomas Gleixner
30 siblings, 0 replies; 49+ messages in thread
From: Thomas Gleixner @ 2020-05-05 13:54 UTC (permalink / raw)
To: LKML
Cc: x86, Paul E. McKenney, Andy Lutomirski, Alexandre Chartre,
Frederic Weisbecker, Paolo Bonzini, Sean Christopherson,
Masami Hiramatsu, Petr Mladek, Steven Rostedt, Joel Fernandes,
Boris Ostrovsky, Juergen Gross, Brian Gerst, Mathieu Desnoyers,
Josh Poimboeuf, Will Deacon
No more users.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/x86/entry/entry_64.S | 13 -------------
arch/x86/entry/thunk_64.S | 9 +--------
arch/x86/include/asm/irqflags.h | 10 ----------
3 files changed, 1 insertion(+), 31 deletions(-)
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -53,19 +53,6 @@ SYM_CODE_START(native_usergs_sysret64)
SYM_CODE_END(native_usergs_sysret64)
#endif /* CONFIG_PARAVIRT */
-.macro TRACE_IRQS_FLAGS flags:req
-#ifdef CONFIG_TRACE_IRQFLAGS
- btl $9, \flags /* interrupts off? */
- jnc 1f
- TRACE_IRQS_ON
-1:
-#endif
-.endm
-
-.macro TRACE_IRQS_IRETQ
- TRACE_IRQS_FLAGS EFLAGS(%rsp)
-.endm
-
/*
* 64-bit SYSCALL instruction entry. Up to 6 arguments in registers.
*
--- a/arch/x86/entry/thunk_64.S
+++ b/arch/x86/entry/thunk_64.S
@@ -3,7 +3,6 @@
* Save registers before calling assembly functions. This avoids
* disturbance of register allocation in some inline assembly constructs.
* Copyright 2001,2002 by Andi Kleen, SuSE Labs.
- * Added trace_hardirqs callers - Copyright 2007 Steven Rostedt, Red Hat, Inc.
*/
#include <linux/linkage.h>
#include "calling.h"
@@ -70,11 +69,6 @@ SYM_FUNC_START_NOALIGN(\name)
SYM_FUNC_END(\name)
.endm
-#ifdef CONFIG_TRACE_IRQFLAGS
- THUNK trace_hardirqs_on_thunk,trace_hardirqs_on_caller, put_ret_addr_in_rdi=1
- THUNK trace_hardirqs_off_thunk,trace_hardirqs_off_caller, put_ret_addr_in_rdi=1
-#endif
-
#ifdef CONFIG_PREEMPTION
THUNK preempt_schedule_thunk, preempt_schedule
EXPORT_SYMBOL(preempt_schedule_thunk)
@@ -83,8 +77,7 @@ SYM_FUNC_END(\name)
EXPORT_SYMBOL(preempt_schedule_notrace_thunk)
#endif
-#if defined(CONFIG_TRACE_IRQFLAGS) \
- || defined(CONFIG_PREEMPTION)
+#ifdef CONFIG_PREEMPTION
SYM_CODE_START_LOCAL_NOALIGN(.L_restore)
popq %r11
popq %r10
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -171,14 +171,4 @@ static inline int arch_irqs_disabled(voi
}
#endif /* !__ASSEMBLY__ */
-#ifdef __ASSEMBLY__
-#ifdef CONFIG_TRACE_IRQFLAGS
-# define TRACE_IRQS_ON call trace_hardirqs_on_thunk;
-# define TRACE_IRQS_OFF call trace_hardirqs_off_thunk;
-#else
-# define TRACE_IRQS_ON
-# define TRACE_IRQS_OFF
-#endif
-#endif /* __ASSEMBLY__ */
-
#endif
^ permalink raw reply [flat|nested] 49+ messages in thread