linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v1] xen: get rid of paravirt op adjust_exception_frame
@ 2017-07-24 14:28 Juergen Gross
  2017-07-26 12:59 ` Boris Ostrovsky
  2017-07-26 13:48 ` Andy Lutomirski
  0 siblings, 2 replies; 11+ messages in thread
From: Juergen Gross @ 2017-07-24 14:28 UTC (permalink / raw)
  To: linux-kernel, xen-devel, x86
  Cc: boris.ostrovsky, hpa, tglx, mingo, luto, Juergen Gross

When running as Xen pv-guest the exception frame on the stack contains
%r11 and %rcx additional to the other data pushed by the processor.

Instead of having a paravirt op being called for each exception type
prepend the Xen specific code to each exception entry. When running as
Xen pv-guest just use the exception entry with prepended instructions,
otherwise use the entry without the Xen specific code.

Signed-off-by: Juergen Gross <jgross@suse.com>
---
 arch/x86/entry/calling.h              |  6 ++++++
 arch/x86/entry/entry_64.S             | 19 ++-----------------
 arch/x86/entry/entry_64_compat.S      |  3 +--
 arch/x86/include/asm/desc.h           |  7 +++++++
 arch/x86/include/asm/paravirt.h       |  5 -----
 arch/x86/include/asm/paravirt_types.h |  4 ----
 arch/x86/kernel/asm-offsets_64.c      |  1 -
 arch/x86/kernel/paravirt.c            |  3 ---
 arch/x86/xen/enlighten_pv.c           |  8 ++++++--
 arch/x86/xen/irq.c                    |  3 ---
 arch/x86/xen/setup.c                  |  3 ++-
 arch/x86/xen/smp_pv.c                 |  2 +-
 arch/x86/xen/xen-asm_64.S             |  6 ------
 arch/x86/xen/xen-ops.h                |  2 +-
 14 files changed, 26 insertions(+), 46 deletions(-)

diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 05ed3d393da7..8b315ee49c93 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -227,3 +227,9 @@ For 32-bit we have the following conventions - kernel is built with
 .Lafter_call_\@:
 #endif
 .endm
+
+#ifdef CONFIG_XEN_PV
+#define PV_ENTRY(sym) ENTRY(_xen_##sym); pop %rcx; pop %r11; .globl sym; sym:
+#else
+#define PV_ENTRY(sym) ENTRY(sym)
+#endif
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index a9a8027a6c0e..94b6b56fa005 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -738,14 +738,13 @@ apicinterrupt IRQ_WORK_VECTOR			irq_work_interrupt		smp_irq_work_interrupt
 #define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss) + (TSS_ist + ((x) - 1) * 8)
 
 .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
-ENTRY(\sym)
+PV_ENTRY(\sym)
 	/* Sanity check */
 	.if \shift_ist != -1 && \paranoid == 0
 	.error "using shift_ist requires paranoid=1"
 	.endif
 
 	ASM_CLAC
-	PARAVIRT_ADJUST_EXCEPTION_FRAME
 
 	.ifeq \has_error_code
 	pushq	$-1				/* ORIG_RAX: no syscall to restart */
@@ -967,8 +966,6 @@ ENTRY(xen_failsafe_callback)
 	movq	8(%rsp), %r11
 	addq	$0x30, %rsp
 	pushq	$0				/* RIP */
-	pushq	%r11
-	pushq	%rcx
 	jmp	general_protection
 1:	/* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
 	movq	(%rsp), %rcx
@@ -1159,19 +1156,7 @@ ENTRY(error_exit)
 END(error_exit)
 
 /* Runs on exception stack */
-ENTRY(nmi)
-	/*
-	 * Fix up the exception frame if we're on Xen.
-	 * PARAVIRT_ADJUST_EXCEPTION_FRAME is guaranteed to push at most
-	 * one value to the stack on native, so it may clobber the rdx
-	 * scratch slot, but it won't clobber any of the important
-	 * slots past it.
-	 *
-	 * Xen is a different story, because the Xen frame itself overlaps
-	 * the "NMI executing" variable.
-	 */
-	PARAVIRT_ADJUST_EXCEPTION_FRAME
-
+PV_ENTRY(nmi)
 	/*
 	 * We allow breakpoints in NMIs. If a breakpoint occurs, then
 	 * the iretq it performs will take us out of NMI context.
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index e1721dafbcb1..9fd8c8f6004e 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -290,11 +290,10 @@ END(entry_SYSCALL_compat)
  * edi  arg5
  * ebp  arg6
  */
-ENTRY(entry_INT80_compat)
+PV_ENTRY(entry_INT80_compat)
 	/*
 	 * Interrupts are off on entry.
 	 */
-	PARAVIRT_ADJUST_EXCEPTION_FRAME
 	ASM_CLAC			/* Do this early to minimize exposure */
 	SWAPGS
 
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index d0a21b12dd58..ff19be44877a 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -85,9 +85,16 @@ static inline phys_addr_t get_cpu_gdt_paddr(unsigned int cpu)
 
 #ifdef CONFIG_X86_64
 
+#ifdef CONFIG_XEN_PV
+extern unsigned int pv_idt_prologue;
+#else
+#define pv_idt_prologue 0
+#endif
+
 static inline void pack_gate(gate_desc *gate, unsigned type, unsigned long func,
 			     unsigned dpl, unsigned ist, unsigned seg)
 {
+	func -= pv_idt_prologue;
 	gate->offset_low	= PTR_LOW(func);
 	gate->segment		= __KERNEL_CS;
 	gate->ist		= ist;
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 9ccac1926587..c25dd22f7c70 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -960,11 +960,6 @@ extern void default_banner(void);
 #define GET_CR2_INTO_RAX				\
 	call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2)
 
-#define PARAVIRT_ADJUST_EXCEPTION_FRAME					\
-	PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_adjust_exception_frame), \
-		  CLBR_NONE,						\
-		  call PARA_INDIRECT(pv_irq_ops+PV_IRQ_adjust_exception_frame))
-
 #define USERGS_SYSRET64							\
 	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64),	\
 		  CLBR_NONE,						\
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 9ffc36bfe4cd..c55106726938 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -195,10 +195,6 @@ struct pv_irq_ops {
 
 	void (*safe_halt)(void);
 	void (*halt)(void);
-
-#ifdef CONFIG_X86_64
-	void (*adjust_exception_frame)(void);
-#endif
 } __no_randomize_layout;
 
 struct pv_mmu_ops {
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 99332f550c48..cf42206926af 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -20,7 +20,6 @@ static char syscalls_ia32[] = {
 int main(void)
 {
 #ifdef CONFIG_PARAVIRT
-	OFFSET(PV_IRQ_adjust_exception_frame, pv_irq_ops, adjust_exception_frame);
 	OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64);
 	OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs);
 	BLANK();
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index bc0a849589bb..a14df9eecfed 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -319,9 +319,6 @@ __visible struct pv_irq_ops pv_irq_ops = {
 	.irq_enable = __PV_IS_CALLEE_SAVE(native_irq_enable),
 	.safe_halt = native_safe_halt,
 	.halt = native_halt,
-#ifdef CONFIG_X86_64
-	.adjust_exception_frame = paravirt_nop,
-#endif
 };
 
 __visible struct pv_cpu_ops pv_cpu_ops = {
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 811e4ddb3f37..27e0d964be7b 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -88,6 +88,7 @@
 #include "pmu.h"
 
 void *xen_initial_gdt;
+unsigned int pv_idt_prologue;
 
 static int xen_cpu_up_prepare_pv(unsigned int cpu);
 static int xen_cpu_dead_pv(unsigned int cpu);
@@ -589,7 +590,7 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val,
 
 	info->vector = vector;
 
-	addr = gate_offset(*val);
+	addr = gate_offset(*val) + pv_idt_prologue;
 #ifdef CONFIG_X86_64
 	/*
 	 * Look for known traps using IST, and substitute them
@@ -626,7 +627,7 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val,
 			return 0;
 	}
 #endif	/* CONFIG_X86_64 */
-	info->address = addr;
+	info->address = addr - pv_idt_prologue;
 
 	info->cs = gate_segment(*val);
 	info->flags = val->dpl;
@@ -1246,6 +1247,9 @@ asmlinkage __visible void __init xen_start_kernel(void)
 	pv_info = xen_info;
 	pv_init_ops = xen_init_ops;
 	pv_cpu_ops = xen_cpu_ops;
+#ifdef CONFIG_X86_64
+	pv_idt_prologue = 3; /* size of pop %rcx; pop %r11; */
+#endif
 
 	x86_platform.get_nmi_reason = xen_get_nmi_reason;
 
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
index 33e92955e09d..d4eff5676cfa 100644
--- a/arch/x86/xen/irq.c
+++ b/arch/x86/xen/irq.c
@@ -123,9 +123,6 @@ static const struct pv_irq_ops xen_irq_ops __initconst = {
 
 	.safe_halt = xen_safe_halt,
 	.halt = xen_halt,
-#ifdef CONFIG_X86_64
-	.adjust_exception_frame = xen_adjust_exception_frame,
-#endif
 };
 
 void __init xen_init_irq_ops(void)
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index c81046323ebc..2f56dd849ea3 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -1012,7 +1012,8 @@ void __init xen_pvmmu_arch_setup(void)
 	HYPERVISOR_vm_assist(VMASST_CMD_enable,
 			     VMASST_TYPE_pae_extended_cr3);
 
-	if (register_callback(CALLBACKTYPE_event, xen_hypervisor_callback) ||
+	if (register_callback(CALLBACKTYPE_event,
+			      xen_hypervisor_callback - pv_idt_prologue) ||
 	    register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback))
 		BUG();
 
diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c
index 51471408fdd1..4be2c5e08dac 100644
--- a/arch/x86/xen/smp_pv.c
+++ b/arch/x86/xen/smp_pv.c
@@ -323,7 +323,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
 	ctxt->gs_base_kernel = per_cpu_offset(cpu);
 #endif
 	ctxt->event_callback_eip    =
-		(unsigned long)xen_hypervisor_callback;
+		(unsigned long)xen_hypervisor_callback - pv_idt_prologue;
 	ctxt->failsafe_callback_eip =
 		(unsigned long)xen_failsafe_callback;
 	ctxt->user_regs.cs = __KERNEL_CS;
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
index c3df43141e70..8db45fdba96d 100644
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -22,12 +22,6 @@
 
 #include "xen-asm.h"
 
-ENTRY(xen_adjust_exception_frame)
-	mov 8+0(%rsp), %rcx
-	mov 8+8(%rsp), %r11
-	ret $16
-ENDPROC(xen_adjust_exception_frame)
-
 hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32
 /*
  * Xen64 iret frame:
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 0d5004477db6..f08f7cd722ab 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -17,6 +17,7 @@ void xen_syscall32_target(void);
 #endif
 
 extern void *xen_initial_gdt;
+extern unsigned int pv_idt_prologue;
 
 struct trap_info;
 void xen_copy_trap_info(struct trap_info *traps);
@@ -145,7 +146,6 @@ DECL_ASM(void, xen_restore_fl_direct, unsigned long);
 __visible void xen_iret(void);
 __visible void xen_sysret32(void);
 __visible void xen_sysret64(void);
-__visible void xen_adjust_exception_frame(void);
 
 extern int xen_panic_handler_init(void);
 
-- 
2.12.3

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH v1] xen: get rid of paravirt op adjust_exception_frame
  2017-07-24 14:28 [PATCH v1] xen: get rid of paravirt op adjust_exception_frame Juergen Gross
@ 2017-07-26 12:59 ` Boris Ostrovsky
  2017-07-26 13:48 ` Andy Lutomirski
  1 sibling, 0 replies; 11+ messages in thread
From: Boris Ostrovsky @ 2017-07-26 12:59 UTC (permalink / raw)
  To: Juergen Gross, linux-kernel, xen-devel, x86; +Cc: hpa, tglx, mingo, luto



On 7/24/2017 10:28 AM, Juergen Gross wrote:
> When running as Xen pv-guest the exception frame on the stack contains
> %r11 and %rcx additional to the other data pushed by the processor.
>
> Instead of having a paravirt op being called for each exception type
> prepend the Xen specific code to each exception entry. When running as
> Xen pv-guest just use the exception entry with prepended instructions,
> otherwise use the entry without the Xen specific code.
>
> Signed-off-by: Juergen Gross <jgross@suse.com>

Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>

(I'd s/xen/x86/ in subject to get x86 maintainers' attention ;-))

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v1] xen: get rid of paravirt op adjust_exception_frame
  2017-07-24 14:28 [PATCH v1] xen: get rid of paravirt op adjust_exception_frame Juergen Gross
  2017-07-26 12:59 ` Boris Ostrovsky
@ 2017-07-26 13:48 ` Andy Lutomirski
  2017-07-26 14:01   ` [Xen-devel] " Andrew Cooper
                     ` (2 more replies)
  1 sibling, 3 replies; 11+ messages in thread
From: Andy Lutomirski @ 2017-07-26 13:48 UTC (permalink / raw)
  To: Juergen Gross
  Cc: linux-kernel, xen-devel, X86 ML, Boris Ostrovsky, H. Peter Anvin,
	Thomas Gleixner, Ingo Molnar

On Mon, Jul 24, 2017 at 7:28 AM, Juergen Gross <jgross@suse.com> wrote:
> When running as Xen pv-guest the exception frame on the stack contains
> %r11 and %rcx additional to the other data pushed by the processor.
>
> Instead of having a paravirt op being called for each exception type
> prepend the Xen specific code to each exception entry. When running as
> Xen pv-guest just use the exception entry with prepended instructions,
> otherwise use the entry without the Xen specific code.

I think this is a nice cleanup, but I'm wondering if it would be even
nicer if the Xen part was kept out-of-line.  That is, could Xen have
little stubs like:

xen_alignment_check:
  pop %rcx
  pop %r11
  jmp alignment_check

rather than using the macros in entry_64.S that you have?  Then you
could adjust set_trap_gate instead of pack_gate and maybe even do
something like:

#define set_trap_gate(..., name, ...) set_native_or_xen_trap_gate(...,
name, xen_##name, ...)

>  /* Runs on exception stack */
> -ENTRY(nmi)
> -       /*
> -        * Fix up the exception frame if we're on Xen.
> -        * PARAVIRT_ADJUST_EXCEPTION_FRAME is guaranteed to push at most
> -        * one value to the stack on native, so it may clobber the rdx
> -        * scratch slot, but it won't clobber any of the important
> -        * slots past it.
> -        *
> -        * Xen is a different story, because the Xen frame itself overlaps
> -        * the "NMI executing" variable.
> -        */

I would keep this comment.  The Xen frame really is in the way AFAICT.


--Andy

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Xen-devel] [PATCH v1] xen: get rid of paravirt op adjust_exception_frame
  2017-07-26 13:48 ` Andy Lutomirski
@ 2017-07-26 14:01   ` Andrew Cooper
  2017-07-26 14:09     ` Andy Lutomirski
  2017-07-26 15:12   ` Juergen Gross
  2017-07-26 15:50   ` Juergen Gross
  2 siblings, 1 reply; 11+ messages in thread
From: Andrew Cooper @ 2017-07-26 14:01 UTC (permalink / raw)
  To: Andy Lutomirski, Juergen Gross
  Cc: X86 ML, linux-kernel, Ingo Molnar, H. Peter Anvin, xen-devel,
	Boris Ostrovsky, Thomas Gleixner

On 26/07/17 14:48, Andy Lutomirski wrote:
>
>>  /* Runs on exception stack */
>> -ENTRY(nmi)
>> -       /*
>> -        * Fix up the exception frame if we're on Xen.
>> -        * PARAVIRT_ADJUST_EXCEPTION_FRAME is guaranteed to push at most
>> -        * one value to the stack on native, so it may clobber the rdx
>> -        * scratch slot, but it won't clobber any of the important
>> -        * slots past it.
>> -        *
>> -        * Xen is a different story, because the Xen frame itself overlaps
>> -        * the "NMI executing" variable.
>> -        */
> I would keep this comment.  The Xen frame really is in the way AFAICT.

(For reasons best explained by the original authors) there is only ever
a single stack which a PV guest registers with Xen, which functions
equivalently to tss.sp0.  There is no support for stack switching via
task switch or IST.

Therefore, nested NMIs won't clobber the top of this stack.

~Andrew

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Xen-devel] [PATCH v1] xen: get rid of paravirt op adjust_exception_frame
  2017-07-26 14:01   ` [Xen-devel] " Andrew Cooper
@ 2017-07-26 14:09     ` Andy Lutomirski
  2017-07-26 14:31       ` Andrew Cooper
  0 siblings, 1 reply; 11+ messages in thread
From: Andy Lutomirski @ 2017-07-26 14:09 UTC (permalink / raw)
  To: Andrew Cooper
  Cc: Juergen Gross, X86 ML, linux-kernel, Ingo Molnar, H. Peter Anvin,
	xen-devel, Boris Ostrovsky, Thomas Gleixner

On Wed, Jul 26, 2017 at 7:01 AM, Andrew Cooper
<andrew.cooper3@citrix.com> wrote:
> On 26/07/17 14:48, Andy Lutomirski wrote:
>>
>>>  /* Runs on exception stack */
>>> -ENTRY(nmi)
>>> -       /*
>>> -        * Fix up the exception frame if we're on Xen.
>>> -        * PARAVIRT_ADJUST_EXCEPTION_FRAME is guaranteed to push at most
>>> -        * one value to the stack on native, so it may clobber the rdx
>>> -        * scratch slot, but it won't clobber any of the important
>>> -        * slots past it.
>>> -        *
>>> -        * Xen is a different story, because the Xen frame itself overlaps
>>> -        * the "NMI executing" variable.
>>> -        */
>> I would keep this comment.  The Xen frame really is in the way AFAICT.
>
> (For reasons best explained by the original authors) there is only ever
> a single stack which a PV guest registers with Xen, which functions
> equivalently to tss.sp0.  There is no support for stack switching via
> task switch or IST.
>
> Therefore, nested NMIs won't clobber the top of this stack.
>

Does that mean that nested NMIs on Xen just nest normally without
clobbering each other?  If so, that's neat, although I wonder how we
don't get crashes due to this:

/* Normal 64-bit system call target */
ENTRY(xen_syscall_target)
        undo_xen_syscall
        <-- NMI right here
        jmp entry_SYSCALL_64_after_swapgs
ENDPROC(xen_syscall_target)

I think it would be nice if Xen could instead enter the native syscall
path a bit later like this:

ENTRY(entry_SYSCALL_64)
        /*
         * Interrupts are off on entry.
         * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
         * it is too small to ever cause noticeable irq latency.
         */
        SWAPGS_UNSAFE_STACK
        /*
         * A hypervisor implementation might want to use a label
         * after the swapgs, so that it can do the swapgs
         * for the guest and jump here on syscall.
         */
GLOBAL(entry_SYSCALL_64_after_swapgs)

        movq    %rsp, PER_CPU_VAR(rsp_scratch)
        movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp

        TRACE_IRQS_OFF

        /* Construct struct pt_regs on stack */
        pushq   $__USER_DS                      /* pt_regs->ss */
        pushq   PER_CPU_VAR(rsp_scratch)        /* pt_regs->sp */
        pushq   %r11                            /* pt_regs->flags */
        pushq   $__USER_CS                      /* pt_regs->cs */
        pushq   %rcx                            /* pt_regs->ip */

<-- Xen enters here

then we wouldn't have all this funny business.  And Xen could
completely skip the nmi nesting code.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Xen-devel] [PATCH v1] xen: get rid of paravirt op adjust_exception_frame
  2017-07-26 14:09     ` Andy Lutomirski
@ 2017-07-26 14:31       ` Andrew Cooper
  0 siblings, 0 replies; 11+ messages in thread
From: Andrew Cooper @ 2017-07-26 14:31 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Juergen Gross, X86 ML, linux-kernel, Ingo Molnar, H. Peter Anvin,
	xen-devel, Boris Ostrovsky, Thomas Gleixner

On 26/07/17 15:09, Andy Lutomirski wrote:
> On Wed, Jul 26, 2017 at 7:01 AM, Andrew Cooper
> <andrew.cooper3@citrix.com> wrote:
>> On 26/07/17 14:48, Andy Lutomirski wrote:
>>>>  /* Runs on exception stack */
>>>> -ENTRY(nmi)
>>>> -       /*
>>>> -        * Fix up the exception frame if we're on Xen.
>>>> -        * PARAVIRT_ADJUST_EXCEPTION_FRAME is guaranteed to push at most
>>>> -        * one value to the stack on native, so it may clobber the rdx
>>>> -        * scratch slot, but it won't clobber any of the important
>>>> -        * slots past it.
>>>> -        *
>>>> -        * Xen is a different story, because the Xen frame itself overlaps
>>>> -        * the "NMI executing" variable.
>>>> -        */
>>> I would keep this comment.  The Xen frame really is in the way AFAICT.
>> (For reasons best explained by the original authors) there is only ever
>> a single stack which a PV guest registers with Xen, which functions
>> equivalently to tss.sp0.  There is no support for stack switching via
>> task switch or IST.
>>
>> Therefore, nested NMIs won't clobber the top of this stack.
>>
> Does that mean that nested NMIs on Xen just nest normally without
> clobbering each other?

Yes.

> If so, that's neat, although I wonder how we
> don't get crashes due to this:
>
> /* Normal 64-bit system call target */
> ENTRY(xen_syscall_target)
>         undo_xen_syscall
>         <-- NMI right here
>         jmp entry_SYSCALL_64_after_swapgs
> ENDPROC(xen_syscall_target)

I'm going to go out on a limb here and say "because no has hit that
condition yet" (or at least managed to diagnose such a crash).

PV domU's don't get given NMIs.  PV dom0 might, depending on how Xen is
handling the NMI itself.  On XenServer at least, dom0 never gets handed
an NMI.

I expect is a sufficiently rarely used path that noone has noticed if it
is indeed broken.

~Andrew

>
> I think it would be nice if Xen could instead enter the native syscall
> path a bit later like this:
>
> ENTRY(entry_SYSCALL_64)
>         /*
>          * Interrupts are off on entry.
>          * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
>          * it is too small to ever cause noticeable irq latency.
>          */
>         SWAPGS_UNSAFE_STACK
>         /*
>          * A hypervisor implementation might want to use a label
>          * after the swapgs, so that it can do the swapgs
>          * for the guest and jump here on syscall.
>          */
> GLOBAL(entry_SYSCALL_64_after_swapgs)
>
>         movq    %rsp, PER_CPU_VAR(rsp_scratch)
>         movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
>
>         TRACE_IRQS_OFF
>
>         /* Construct struct pt_regs on stack */
>         pushq   $__USER_DS                      /* pt_regs->ss */
>         pushq   PER_CPU_VAR(rsp_scratch)        /* pt_regs->sp */
>         pushq   %r11                            /* pt_regs->flags */
>         pushq   $__USER_CS                      /* pt_regs->cs */
>         pushq   %rcx                            /* pt_regs->ip */
>
> <-- Xen enters here
>
> then we wouldn't have all this funny business.  And Xen could
> completely skip the nmi nesting code.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v1] xen: get rid of paravirt op adjust_exception_frame
  2017-07-26 13:48 ` Andy Lutomirski
  2017-07-26 14:01   ` [Xen-devel] " Andrew Cooper
@ 2017-07-26 15:12   ` Juergen Gross
  2017-07-26 15:50   ` Juergen Gross
  2 siblings, 0 replies; 11+ messages in thread
From: Juergen Gross @ 2017-07-26 15:12 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: linux-kernel, xen-devel, X86 ML, Boris Ostrovsky, H. Peter Anvin,
	Thomas Gleixner, Ingo Molnar

On 26/07/17 15:48, Andy Lutomirski wrote:
> On Mon, Jul 24, 2017 at 7:28 AM, Juergen Gross <jgross@suse.com> wrote:
>> When running as Xen pv-guest the exception frame on the stack contains
>> %r11 and %rcx additional to the other data pushed by the processor.
>>
>> Instead of having a paravirt op being called for each exception type
>> prepend the Xen specific code to each exception entry. When running as
>> Xen pv-guest just use the exception entry with prepended instructions,
>> otherwise use the entry without the Xen specific code.
> 
> I think this is a nice cleanup, but I'm wondering if it would be even
> nicer if the Xen part was kept out-of-line.  That is, could Xen have
> little stubs like:
> 
> xen_alignment_check:
>   pop %rcx
>   pop %r11
>   jmp alignment_check
> 
> rather than using the macros in entry_64.S that you have?  Then you
> could adjust set_trap_gate instead of pack_gate and maybe even do
> something like:
> 
> #define set_trap_gate(..., name, ...) set_native_or_xen_trap_gate(...,
> name, xen_##name, ...)

Okay.

>>  /* Runs on exception stack */
>> -ENTRY(nmi)
>> -       /*
>> -        * Fix up the exception frame if we're on Xen.
>> -        * PARAVIRT_ADJUST_EXCEPTION_FRAME is guaranteed to push at most
>> -        * one value to the stack on native, so it may clobber the rdx
>> -        * scratch slot, but it won't clobber any of the important
>> -        * slots past it.
>> -        *
>> -        * Xen is a different story, because the Xen frame itself overlaps
>> -        * the "NMI executing" variable.
>> -        */
> 
> I would keep this comment.  The Xen frame really is in the way AFAICT.

Taking Andrew's comments into account I can drop it?


Juergen

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v1] xen: get rid of paravirt op adjust_exception_frame
  2017-07-26 13:48 ` Andy Lutomirski
  2017-07-26 14:01   ` [Xen-devel] " Andrew Cooper
  2017-07-26 15:12   ` Juergen Gross
@ 2017-07-26 15:50   ` Juergen Gross
  2017-07-26 17:57     ` Andy Lutomirski
  2 siblings, 1 reply; 11+ messages in thread
From: Juergen Gross @ 2017-07-26 15:50 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: linux-kernel, xen-devel, X86 ML, Boris Ostrovsky, H. Peter Anvin,
	Thomas Gleixner, Ingo Molnar

On 26/07/17 15:48, Andy Lutomirski wrote:
> On Mon, Jul 24, 2017 at 7:28 AM, Juergen Gross <jgross@suse.com> wrote:
>> When running as Xen pv-guest the exception frame on the stack contains
>> %r11 and %rcx additional to the other data pushed by the processor.
>>
>> Instead of having a paravirt op being called for each exception type
>> prepend the Xen specific code to each exception entry. When running as
>> Xen pv-guest just use the exception entry with prepended instructions,
>> otherwise use the entry without the Xen specific code.
> 
> I think this is a nice cleanup, but I'm wondering if it would be even
> nicer if the Xen part was kept out-of-line.  That is, could Xen have
> little stubs like:
> 
> xen_alignment_check:
>   pop %rcx
>   pop %r11
>   jmp alignment_check
> 
> rather than using the macros in entry_64.S that you have?  Then you
> could adjust set_trap_gate instead of pack_gate and maybe even do
> something like:
> 
> #define set_trap_gate(..., name, ...) set_native_or_xen_trap_gate(...,
> name, xen_##name, ...)

I think I'll have something like:

#define pv_trap_entry(name) (xen_pv_domain() ? xen_ ## name : name)

and use it like:

set_intr_gate(X86_TRAP_AC, pv_trap_entry(alignment_check));

This will avoid having to define macros for all variants of
set_intr_gate(), e.g. set_intr_gate_ist(), set_system_intr_gate().

Do you have any objections?


Juergen

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v1] xen: get rid of paravirt op adjust_exception_frame
  2017-07-26 15:50   ` Juergen Gross
@ 2017-07-26 17:57     ` Andy Lutomirski
  2017-07-26 18:43       ` Juergen Gross
  0 siblings, 1 reply; 11+ messages in thread
From: Andy Lutomirski @ 2017-07-26 17:57 UTC (permalink / raw)
  To: Juergen Gross
  Cc: linux-kernel, xen-devel, X86 ML, Boris Ostrovsky, H. Peter Anvin,
	Thomas Gleixner, Ingo Molnar



> On Jul 26, 2017, at 11:50 AM, Juergen Gross <jgross@suse.com> wrote:
> 
>> On 26/07/17 15:48, Andy Lutomirski wrote:
>>> On Mon, Jul 24, 2017 at 7:28 AM, Juergen Gross <jgross@suse.com> wrote:
>>> When running as Xen pv-guest the exception frame on the stack contains
>>> %r11 and %rcx additional to the other data pushed by the processor.
>>> 
>>> Instead of having a paravirt op being called for each exception type
>>> prepend the Xen specific code to each exception entry. When running as
>>> Xen pv-guest just use the exception entry with prepended instructions,
>>> otherwise use the entry without the Xen specific code.
>> 
>> I think this is a nice cleanup, but I'm wondering if it would be even
>> nicer if the Xen part was kept out-of-line.  That is, could Xen have
>> little stubs like:
>> 
>> xen_alignment_check:
>>  pop %rcx
>>  pop %r11
>>  jmp alignment_check
>> 
>> rather than using the macros in entry_64.S that you have?  Then you
>> could adjust set_trap_gate instead of pack_gate and maybe even do
>> something like:
>> 
>> #define set_trap_gate(..., name, ...) set_native_or_xen_trap_gate(...,
>> name, xen_##name, ...)
> 
> I think I'll have something like:
> 
> #define pv_trap_entry(name) (xen_pv_domain() ? xen_ ## name : name)
> 
> and use it like:
> 
> set_intr_gate(X86_TRAP_AC, pv_trap_entry(alignment_check));
> 
> This will avoid having to define macros for all variants of
> set_intr_gate(), e.g. set_intr_gate_ist(), set_system_intr_gate().
> 
> Do you have any objections?
> 

Sounds good to me.

FWIW, I have no real objection to putting the Xen entry right before the native entry and falling through.  I don't love the ip -= 3 bit, though, and I think that the PV_ENTRY macro is too magical.

This might be okay, though:

XEN_PV_ENTRY_FALLTHROUGH(foo)
ENTRY(foo)
  code here



> 
> Juergen

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v1] xen: get rid of paravirt op adjust_exception_frame
  2017-07-26 17:57     ` Andy Lutomirski
@ 2017-07-26 18:43       ` Juergen Gross
  2017-07-26 20:15         ` Andy Lutomirski
  0 siblings, 1 reply; 11+ messages in thread
From: Juergen Gross @ 2017-07-26 18:43 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: linux-kernel, xen-devel, X86 ML, Boris Ostrovsky, H. Peter Anvin,
	Thomas Gleixner, Ingo Molnar

On 26/07/17 19:57, Andy Lutomirski wrote:
> 
> 
>> On Jul 26, 2017, at 11:50 AM, Juergen Gross <jgross@suse.com> wrote:
>>
>>> On 26/07/17 15:48, Andy Lutomirski wrote:
>>>> On Mon, Jul 24, 2017 at 7:28 AM, Juergen Gross <jgross@suse.com> wrote:
>>>> When running as Xen pv-guest the exception frame on the stack contains
>>>> %r11 and %rcx additional to the other data pushed by the processor.
>>>>
>>>> Instead of having a paravirt op being called for each exception type
>>>> prepend the Xen specific code to each exception entry. When running as
>>>> Xen pv-guest just use the exception entry with prepended instructions,
>>>> otherwise use the entry without the Xen specific code.
>>>
>>> I think this is a nice cleanup, but I'm wondering if it would be even
>>> nicer if the Xen part was kept out-of-line.  That is, could Xen have
>>> little stubs like:
>>>
>>> xen_alignment_check:
>>>  pop %rcx
>>>  pop %r11
>>>  jmp alignment_check
>>>
>>> rather than using the macros in entry_64.S that you have?  Then you
>>> could adjust set_trap_gate instead of pack_gate and maybe even do
>>> something like:
>>>
>>> #define set_trap_gate(..., name, ...) set_native_or_xen_trap_gate(...,
>>> name, xen_##name, ...)
>>
>> I think I'll have something like:
>>
>> #define pv_trap_entry(name) (xen_pv_domain() ? xen_ ## name : name)
>>
>> and use it like:
>>
>> set_intr_gate(X86_TRAP_AC, pv_trap_entry(alignment_check));
>>
>> This will avoid having to define macros for all variants of
>> set_intr_gate(), e.g. set_intr_gate_ist(), set_system_intr_gate().
>>
>> Do you have any objections?
>>
> 
> Sounds good to me.
> 
> FWIW, I have no real objection to putting the Xen entry right before the native entry and falling through.  I don't love the ip -= 3 bit, though, and I think that the PV_ENTRY macro is too magical.
> 
> This might be okay, though:
> 
> XEN_PV_ENTRY_FALLTHROUGH(foo)
> ENTRY(foo)

ENTRY() aligns on 16 byte boundary. So I have to avoid ENTRY(foo) above
in the Xen case when I want to fall through.

So either I have to do something like PV_ENTRY (I could avoid the magic
"3" by using the xen_foo entry via pv_trap_entry()), or I need the stub
with "jmp" for Xen.


Juergen

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v1] xen: get rid of paravirt op adjust_exception_frame
  2017-07-26 18:43       ` Juergen Gross
@ 2017-07-26 20:15         ` Andy Lutomirski
  0 siblings, 0 replies; 11+ messages in thread
From: Andy Lutomirski @ 2017-07-26 20:15 UTC (permalink / raw)
  To: Juergen Gross
  Cc: linux-kernel, xen-devel, X86 ML, Boris Ostrovsky, H. Peter Anvin,
	Thomas Gleixner, Ingo Molnar



> On Jul 26, 2017, at 2:43 PM, Juergen Gross <jgross@suse.com> wrote:
> 
>> On 26/07/17 19:57, Andy Lutomirski wrote:
>> 
>> 
>>>> On Jul 26, 2017, at 11:50 AM, Juergen Gross <jgross@suse.com> wrote:
>>>> 
>>>>> On 26/07/17 15:48, Andy Lutomirski wrote:
>>>>> On Mon, Jul 24, 2017 at 7:28 AM, Juergen Gross <jgross@suse.com> wrote:
>>>>> When running as Xen pv-guest the exception frame on the stack contains
>>>>> %r11 and %rcx additional to the other data pushed by the processor.
>>>>> 
>>>>> Instead of having a paravirt op being called for each exception type
>>>>> prepend the Xen specific code to each exception entry. When running as
>>>>> Xen pv-guest just use the exception entry with prepended instructions,
>>>>> otherwise use the entry without the Xen specific code.
>>>> 
>>>> I think this is a nice cleanup, but I'm wondering if it would be even
>>>> nicer if the Xen part was kept out-of-line.  That is, could Xen have
>>>> little stubs like:
>>>> 
>>>> xen_alignment_check:
>>>> pop %rcx
>>>> pop %r11
>>>> jmp alignment_check
>>>> 
>>>> rather than using the macros in entry_64.S that you have?  Then you
>>>> could adjust set_trap_gate instead of pack_gate and maybe even do
>>>> something like:
>>>> 
>>>> #define set_trap_gate(..., name, ...) set_native_or_xen_trap_gate(...,
>>>> name, xen_##name, ...)
>>> 
>>> I think I'll have something like:
>>> 
>>> #define pv_trap_entry(name) (xen_pv_domain() ? xen_ ## name : name)
>>> 
>>> and use it like:
>>> 
>>> set_intr_gate(X86_TRAP_AC, pv_trap_entry(alignment_check));
>>> 
>>> This will avoid having to define macros for all variants of
>>> set_intr_gate(), e.g. set_intr_gate_ist(), set_system_intr_gate().
>>> 
>>> Do you have any objections?
>>> 
>> 
>> Sounds good to me.
>> 
>> FWIW, I have no real objection to putting the Xen entry right before the native entry and falling through.  I don't love the ip -= 3 bit, though, and I think that the PV_ENTRY macro is too magical.
>> 
>> This might be okay, though:
>> 
>> XEN_PV_ENTRY_FALLTHROUGH(foo)
>> ENTRY(foo)
> 
> ENTRY() aligns on 16 byte boundary. So I have to avoid ENTRY(foo) above
> in the Xen case when I want to fall through.
> 
> So either I have to do something like PV_ENTRY (I could avoid the magic
> "3" by using the xen_foo entry via pv_trap_entry()), or I need the stub
> with "jmp" for Xen.

Hmm.  Either the 16 byte alignment is pointless or the PV_ENTRT macro is wrong.

Anyway, the jmp is probably the best approach.  Then we could eventually make a .text.xen_pv section and discard it on non-xen_pv boots.

> 
> 
> Juergen

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2017-07-26 20:15 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-07-24 14:28 [PATCH v1] xen: get rid of paravirt op adjust_exception_frame Juergen Gross
2017-07-26 12:59 ` Boris Ostrovsky
2017-07-26 13:48 ` Andy Lutomirski
2017-07-26 14:01   ` [Xen-devel] " Andrew Cooper
2017-07-26 14:09     ` Andy Lutomirski
2017-07-26 14:31       ` Andrew Cooper
2017-07-26 15:12   ` Juergen Gross
2017-07-26 15:50   ` Juergen Gross
2017-07-26 17:57     ` Andy Lutomirski
2017-07-26 18:43       ` Juergen Gross
2017-07-26 20:15         ` Andy Lutomirski

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).