LKML Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH 0/2] x86_64: LRET to userspace
@ 2014-07-23 15:34 Andy Lutomirski
  2014-07-23 15:34 ` [PATCH 1/2] x86_64,entry,xen: Do not invoke espfix64 on Xen Andy Lutomirski
  2014-07-23 15:34 ` [PATCH 2/2] x86_64,entry: Use lret to return to userspace when possible Andy Lutomirski
  0 siblings, 2 replies; 6+ messages in thread
From: Andy Lutomirski @ 2014-07-23 15:34 UTC (permalink / raw)
  To: H. Peter Anvin, Linus Torvalds, linux-kernel, x86, Borislav Petkov
  Cc: Andy Lutomirski

This series is based on tip/perf/urgent.  It should apply to any
recent kernel, but my test case [1] will OOPS without the fix in
tip/perf/urgent.

The first patch or something like it is needed for 3.16, regardless
of any lret hackery -- I think it's making its way toward a pull
request this week, but I haven't seen it land in -tip yet.

I'm repeating it here because the LRET stuff won't work on paravirt,
and patch 1 puts the machinery for that in place.

tl;dr Read patch 2.  To test, apply patch 1 and consider basing on
tip/perf/urgent.

[1] sigreturn_32 from https://gitorious.org/linux-test-utils/linux-clock-tests/

Andy Lutomirski (2):
  x86_64,entry,xen: Do not invoke espfix64 on Xen
  x86_64,entry: Use lret to return to userspace when possible

 arch/x86/include/asm/irqflags.h     |   3 +-
 arch/x86/include/asm/paravirt.h     |   4 ++
 arch/x86/include/asm/traps.h        |   6 ++
 arch/x86/kernel/cpu/mcheck/mce.c    |   2 +
 arch/x86/kernel/entry_64.S          | 121 ++++++++++++++++++++++++++++++------
 arch/x86/kernel/nmi.c               |  21 +++++++
 arch/x86/kernel/paravirt_patch_64.c |   2 -
 7 files changed, 136 insertions(+), 23 deletions(-)

-- 
1.9.3


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 1/2] x86_64,entry,xen: Do not invoke espfix64 on Xen
  2014-07-23 15:34 [PATCH 0/2] x86_64: LRET to userspace Andy Lutomirski
@ 2014-07-23 15:34 ` Andy Lutomirski
  2014-07-28 22:33   ` [tip:x86/urgent] x86_64/entry/xen: " tip-bot for Andy Lutomirski
  2014-07-23 15:34 ` [PATCH 2/2] x86_64,entry: Use lret to return to userspace when possible Andy Lutomirski
  1 sibling, 1 reply; 6+ messages in thread
From: Andy Lutomirski @ 2014-07-23 15:34 UTC (permalink / raw)
  To: H. Peter Anvin, Linus Torvalds, linux-kernel, x86, Borislav Petkov
  Cc: Andy Lutomirski

This moves the espfix64 logic into native_iret.  To make this work,
it gets rid of the native patch for INTERRUPT_RETURN:
INTERRUPT_RETURN on native kernels is now 'jmp native_iret'.

This changes the 16-bit SS behavior on Xen from OOPSing to leaking
some bits of the Xen hypervisor's RSP (I think).

Signed-off-by: Andy Lutomirski <luto@amacapital.net>
---
 arch/x86/include/asm/irqflags.h     |  2 +-
 arch/x86/kernel/entry_64.S          | 28 ++++++++++------------------
 arch/x86/kernel/paravirt_patch_64.c |  2 --
 3 files changed, 11 insertions(+), 21 deletions(-)

diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index bba3cf8..0a8b519 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -129,7 +129,7 @@ static inline notrace unsigned long arch_local_irq_save(void)
 
 #define PARAVIRT_ADJUST_EXCEPTION_FRAME	/*  */
 
-#define INTERRUPT_RETURN	iretq
+#define INTERRUPT_RETURN	jmp native_iret
 #define USERGS_SYSRET64				\
 	swapgs;					\
 	sysretq;
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index b25ca96..c844f08 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -830,27 +830,24 @@ restore_args:
 	RESTORE_ARGS 1,8,1
 
 irq_return:
+	INTERRUPT_RETURN
+
+ENTRY(native_iret)
 	/*
 	 * Are we returning to a stack segment from the LDT?  Note: in
 	 * 64-bit mode SS:RSP on the exception stack is always valid.
 	 */
 #ifdef CONFIG_X86_ESPFIX64
 	testb $4,(SS-RIP)(%rsp)
-	jnz irq_return_ldt
+	jnz native_irq_return_ldt
 #endif
 
-irq_return_iret:
-	INTERRUPT_RETURN
-	_ASM_EXTABLE(irq_return_iret, bad_iret)
-
-#ifdef CONFIG_PARAVIRT
-ENTRY(native_iret)
+native_irq_return_iret:
 	iretq
-	_ASM_EXTABLE(native_iret, bad_iret)
-#endif
+	_ASM_EXTABLE(native_irq_return_iret, bad_iret)
 
 #ifdef CONFIG_X86_ESPFIX64
-irq_return_ldt:
+native_irq_return_ldt:
 	pushq_cfi %rax
 	pushq_cfi %rdi
 	SWAPGS
@@ -872,7 +869,7 @@ irq_return_ldt:
 	SWAPGS
 	movq %rax,%rsp
 	popq_cfi %rax
-	jmp irq_return_iret
+	jmp native_irq_return_iret
 #endif
 
 	.section .fixup,"ax"
@@ -956,13 +953,8 @@ __do_double_fault:
 	cmpl $__KERNEL_CS,CS(%rdi)
 	jne do_double_fault
 	movq RIP(%rdi),%rax
-	cmpq $irq_return_iret,%rax
-#ifdef CONFIG_PARAVIRT
-	je 1f
-	cmpq $native_iret,%rax
-#endif
+	cmpq $native_irq_return_iret,%rax
 	jne do_double_fault		/* This shouldn't happen... */
-1:
 	movq PER_CPU_VAR(kernel_stack),%rax
 	subq $(6*8-KERNEL_STACK_OFFSET),%rax	/* Reset to original stack */
 	movq %rax,RSP(%rdi)
@@ -1428,7 +1420,7 @@ error_sti:
  */
 error_kernelspace:
 	incl %ebx
-	leaq irq_return_iret(%rip),%rcx
+	leaq native_irq_return_iret(%rip),%rcx
 	cmpq %rcx,RIP+8(%rsp)
 	je error_swapgs
 	movl %ecx,%eax	/* zero extend */
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
index 3f08f34..a1da673 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -6,7 +6,6 @@ DEF_NATIVE(pv_irq_ops, irq_disable, "cli");
 DEF_NATIVE(pv_irq_ops, irq_enable, "sti");
 DEF_NATIVE(pv_irq_ops, restore_fl, "pushq %rdi; popfq");
 DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax");
-DEF_NATIVE(pv_cpu_ops, iret, "iretq");
 DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax");
 DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax");
 DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3");
@@ -50,7 +49,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
 		PATCH_SITE(pv_irq_ops, save_fl);
 		PATCH_SITE(pv_irq_ops, irq_enable);
 		PATCH_SITE(pv_irq_ops, irq_disable);
-		PATCH_SITE(pv_cpu_ops, iret);
 		PATCH_SITE(pv_cpu_ops, irq_enable_sysexit);
 		PATCH_SITE(pv_cpu_ops, usergs_sysret32);
 		PATCH_SITE(pv_cpu_ops, usergs_sysret64);
-- 
1.9.3


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 2/2] x86_64,entry: Use lret to return to userspace when possible
  2014-07-23 15:34 [PATCH 0/2] x86_64: LRET to userspace Andy Lutomirski
  2014-07-23 15:34 ` [PATCH 1/2] x86_64,entry,xen: Do not invoke espfix64 on Xen Andy Lutomirski
@ 2014-07-23 15:34 ` Andy Lutomirski
  1 sibling, 0 replies; 6+ messages in thread
From: Andy Lutomirski @ 2014-07-23 15:34 UTC (permalink / raw)
  To: H. Peter Anvin, Linus Torvalds, linux-kernel, x86, Borislav Petkov
  Cc: Andy Lutomirski

IRET serializes, but LRET does not.  Try to use LRET to return
to userspace when possible.  It's possible if the saved RF and TF
are clear, IF is set, and espfix isn't needed.

This cuts about 23ns off of the IRET-to-userspace path on my
machine.  (YMMV -- this was in a tight loop, and I can imagine
the performance hit from serialization to be somewhat higher
in real code.)

Signed-off-by: Andy Lutomirski <luto@amacapital.net>
---

I've tested normal code, iret faults, lret faults, and returns to funny
SS values.

I haven't explicitly exercised this code under heavy NMI load, nor
have I tested it on Xen (although, unless I screwed up, it shouldn't
do anything on Xen).

Benchmark away.  I know that Linus had a test for this stuff.  I doubt
that this will be as spectacular as my old sysret trampoline hack, but
it could still be a nice speedup.

 arch/x86/include/asm/irqflags.h  |  3 +-
 arch/x86/include/asm/paravirt.h  |  4 ++
 arch/x86/include/asm/traps.h     |  6 +++
 arch/x86/kernel/cpu/mcheck/mce.c |  2 +
 arch/x86/kernel/entry_64.S       | 93 +++++++++++++++++++++++++++++++++++++++-
 arch/x86/kernel/nmi.c            | 21 +++++++++
 6 files changed, 126 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index 0a8b519..04c45bb 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -129,7 +129,8 @@ static inline notrace unsigned long arch_local_irq_save(void)
 
 #define PARAVIRT_ADJUST_EXCEPTION_FRAME	/*  */
 
-#define INTERRUPT_RETURN	jmp native_iret
+#define INTERRUPT_RETURN		jmp native_iret
+#define INTERRUPT_RETURN_UNBLOCK_NMI	jmp native_irq_return_need_iret
 #define USERGS_SYSRET64				\
 	swapgs;					\
 	sysretq;
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index cd6e161..3716b3d 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -913,6 +913,10 @@ extern void default_banner(void);
 	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), CLBR_NONE,	\
 		  jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret))
 
+#define INTERRUPT_RETURN_UNBLOCK_NMI					\
+	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), CLBR_NONE,	\
+		  jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret))
+
 #define DISABLE_INTERRUPTS(clobbers)					\
 	PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \
 		  PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE);		\
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index bc8352e..2e3dfe8 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -134,4 +134,10 @@ enum {
 	X86_TRAP_IRET = 32,	/* 32, IRET Exception */
 };
 
+#ifdef CONFIG_X86_64
+extern void fixup_lret_nmi(struct pt_regs *regs);
+#else
+static inline void fixup_lret_nmi(struct pt_regs *regs) {}
+#endif
+
 #endif /* _ASM_X86_TRAPS_H */
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index bb92f38..0bb9b9b 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -43,6 +43,7 @@
 #include <linux/export.h>
 
 #include <asm/processor.h>
+#include <asm/traps.h>
 #include <asm/mce.h>
 #include <asm/msr.h>
 
@@ -1168,6 +1169,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 	mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
 out:
 	sync_core();
+	fixup_lret_nmi(regs);
 }
 EXPORT_SYMBOL_GPL(do_machine_check);
 
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index c844f08..a23b302 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -834,6 +834,81 @@ irq_return:
 
 ENTRY(native_iret)
 	/*
+	 * This implements "iret" the Platonic ideal, not "iret" the
+	 * instruction.  Specifically, it will pop RIP, CS, FLAGS,
+	 * RSP, and SS and load them, correctly, into the CPU state.
+	 * (IRET screws up RSP depending on SS).  It tries to avoid
+	 * serializing (IRET always serializes).
+	 *
+	 * This code does *not* promise to unblock NMIs.  Use
+	 * INTERRUPT_RETURN_UNBLOCK_NMI if you need NMIs to be unblocked.
+	 */
+
+	/*
+	 * Only IRET can set RF correctly, and our sti trick is
+	 * is incompatible with TF being set.
+	 */
+	testl $(X86_EFLAGS_RF|X86_EFLAGS_TF), (EFLAGS-RIP)(%rsp)
+	jnz native_irq_return_need_iret
+
+	/*
+	 * While it's technically possible to be in userspace with IF
+	 * clear (using iopl(2)), it's so unlikely that there's no point
+	 * in optimizing it.
+	 */
+	testl $X86_EFLAGS_IF, (EFLAGS-RIP)(%rsp)
+	jz native_irq_return_need_iret
+
+	/*
+	 * Returning without IRET to kernel space is possible, but
+	 * the considerations are different and we're not ready for that
+	 * yet.
+	 */
+	testl $3, (CS-RIP)(%rsp)
+	jz native_irq_return_need_iret
+
+#ifdef CONFIG_X86_ESPFIX64
+	/* lret has the same bug^Wfeature as iret wrt 16-bit SS. */
+	testb $4,(SS-RIP)(%rsp)
+	jnz native_irq_return_ldt
+#endif
+
+	/*
+	 * Rearrange the stack to pretend we got here via a call gate
+	 * (yes, really), and do a long return.
+	 */
+	pushq (SS     - RIP + 0*8)(%rsp)
+	pushq (RSP    - RIP + 1*8)(%rsp)
+	pushq (CS     - RIP + 2*8)(%rsp)
+	pushq (RIP    - RIP + 3*8)(%rsp)
+	pushq (EFLAGS - RIP + 4*8)(%rsp)
+	andl $~X86_EFLAGS_IF, (%rsp)	/* Clear saved IF. */
+	popfq				/* Restore all regs except IF. */
+
+.global native_sti_before_lret_to_userspace
+native_sti_before_lret_to_userspace:
+	sti				/* Restore IF. */
+
+	/*
+	 * This relies on the one-instruction interrupt grace period here
+	 * between sti and lret.  A non-paranoid interrupt here will
+	 * explode because GS is wrong.  More subtly, we may be on an IST
+	 * stack, and if we enable interrupts before leaving the IST stack,
+	 * we could cause a recursive IST interrupt, which would blow away
+	 * our stack frame.
+	 *
+	 * NMI and MCE are safe here -- see fixup_lret_nmi.
+	 */
+
+.global native_lret_to_userspace
+native_lret_to_userspace:
+	lretq
+
+	/* This fixup is special -- see error_lret. */
+	_ASM_EXTABLE(native_lret_to_userspace, bad_iret)
+
+native_irq_return_need_iret:
+	/*
 	 * Are we returning to a stack segment from the LDT?  Note: in
 	 * 64-bit mode SS:RSP on the exception stack is always valid.
 	 */
@@ -883,6 +958,8 @@ bad_iret:
 	 * We are now running with the kernel GS after exception recovery.
 	 * But error_entry expects us to have user GS to match the user %cs,
 	 * so swap back.
+	 *
+	 * lret faults land here, too.
 	 */
 	pushq $0
 
@@ -1412,7 +1489,7 @@ error_sti:
 	ret
 
 /*
- * There are two places in the kernel that can potentially fault with
+ * There are three places in the kernel that can potentially fault with
  * usergs. Handle them here. The exception handlers after iret run with
  * kernel gs again, so don't set the user space flag. B stepping K8s
  * sometimes report an truncated RIP for IRET exceptions returning to
@@ -1428,6 +1505,8 @@ error_kernelspace:
 	je bstep_iret
 	cmpq $gs_change,RIP+8(%rsp)
 	je error_swapgs
+	cmpq $native_lret_to_userspace,RIP+8(%rsp)
+	je error_lret
 	jmp error_sti
 
 bstep_iret:
@@ -1435,6 +1514,16 @@ bstep_iret:
 	movq %rcx,RIP+8(%rsp)
 	jmp error_swapgs
 	CFI_ENDPROC
+
+error_lret:
+	/*
+	 * We can't return from this fault with IF set because we'll lose
+	 * the sti grace period.  Fix up the fault so that it looks just
+	 * like an iret fault instead.
+	 */
+	addq $4*8,RSP+8(%rsp)			/* pop the lret frame */
+	andl $~X86_EFLAGS_IF,EFLAGS+8(%rsp)	/* clear IF */
+	jmp error_swapgs			/* return w/ kernel GS */
 END(error_entry)
 
 
@@ -1706,7 +1795,7 @@ nmi_restore:
 
 	/* Clear the NMI executing stack variable */
 	movq $0, 5*8(%rsp)
-	jmp irq_return
+	INTERRUPT_RETURN_UNBLOCK_NMI
 	CFI_ENDPROC
 END(nmi)
 
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index c3e985d..ca8be8e 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -306,12 +306,33 @@ NOKPROBE_SYMBOL(unknown_nmi_error);
 static DEFINE_PER_CPU(bool, swallow_nmi);
 static DEFINE_PER_CPU(unsigned long, last_nmi_rip);
 
+#ifdef CONFIG_X86_64
+void fixup_lret_nmi(struct pt_regs *regs)
+{
+	/*
+	 * There is no architectural guarantee that an NMI or MCE can't
+	 * happen between sti and lret.  To avoid returning to the lret
+	 * instruction with interrupts on, we back up one instruction.
+	 */
+	extern const char native_lret_to_userspace[];
+	extern const char native_sti_before_lret_to_userspace[];
+
+	if (!user_mode_vm(regs) &&
+	    regs->ip == (unsigned long)native_lret_to_userspace) {
+		regs->ip = (unsigned long)native_sti_before_lret_to_userspace;
+		regs->flags &= ~X86_EFLAGS_IF;
+	}
+}
+#endif
+
 static void default_do_nmi(struct pt_regs *regs)
 {
 	unsigned char reason = 0;
 	int handled;
 	bool b2b = false;
 
+	fixup_lret_nmi(regs);
+
 	/*
 	 * CPU-specific NMI must be processed before non-CPU-specific
 	 * NMI, otherwise we may lose it, because the CPU-specific
-- 
1.9.3


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [tip:x86/urgent] x86_64/entry/xen: Do not invoke espfix64 on Xen
  2014-07-23 15:34 ` [PATCH 1/2] x86_64,entry,xen: Do not invoke espfix64 on Xen Andy Lutomirski
@ 2014-07-28 22:33   ` tip-bot for Andy Lutomirski
  2014-07-29  3:39     ` Andy Lutomirski
  0 siblings, 1 reply; 6+ messages in thread
From: tip-bot for Andy Lutomirski @ 2014-07-28 22:33 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: linux-kernel, luto, hpa, mingo, stable, tglx, hpa

Commit-ID:  7209a75d2009dbf7745e2fd354abf25c3deb3ca3
Gitweb:     http://git.kernel.org/tip/7209a75d2009dbf7745e2fd354abf25c3deb3ca3
Author:     Andy Lutomirski <luto@amacapital.net>
AuthorDate: Wed, 23 Jul 2014 08:34:11 -0700
Committer:  H. Peter Anvin <hpa@linux.intel.com>
CommitDate: Mon, 28 Jul 2014 15:25:40 -0700

x86_64/entry/xen: Do not invoke espfix64 on Xen

This moves the espfix64 logic into native_iret.  To make this work,
it gets rid of the native patch for INTERRUPT_RETURN:
INTERRUPT_RETURN on native kernels is now 'jmp native_iret'.

This changes the 16-bit SS behavior on Xen from OOPSing to leaking
some bits of the Xen hypervisor's RSP (I think).

[ hpa: this is a nonzero cost on native, but probably not enough to
  measure. Xen needs to fix this in their own code, probably doing
  something equivalent to espfix64. ]

Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Link: http://lkml.kernel.org/r/7b8f1d8ef6597cb16ae004a43c56980a7de3cf94.1406129132.git.luto@amacapital.net
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Cc: <stable@vger.kernel.org>
---
 arch/x86/include/asm/irqflags.h     |  2 +-
 arch/x86/kernel/entry_64.S          | 28 ++++++++++------------------
 arch/x86/kernel/paravirt_patch_64.c |  2 --
 3 files changed, 11 insertions(+), 21 deletions(-)

diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index bba3cf8..0a8b519 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -129,7 +129,7 @@ static inline notrace unsigned long arch_local_irq_save(void)
 
 #define PARAVIRT_ADJUST_EXCEPTION_FRAME	/*  */
 
-#define INTERRUPT_RETURN	iretq
+#define INTERRUPT_RETURN	jmp native_iret
 #define USERGS_SYSRET64				\
 	swapgs;					\
 	sysretq;
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index b25ca96..c844f08 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -830,27 +830,24 @@ restore_args:
 	RESTORE_ARGS 1,8,1
 
 irq_return:
+	INTERRUPT_RETURN
+
+ENTRY(native_iret)
 	/*
 	 * Are we returning to a stack segment from the LDT?  Note: in
 	 * 64-bit mode SS:RSP on the exception stack is always valid.
 	 */
 #ifdef CONFIG_X86_ESPFIX64
 	testb $4,(SS-RIP)(%rsp)
-	jnz irq_return_ldt
+	jnz native_irq_return_ldt
 #endif
 
-irq_return_iret:
-	INTERRUPT_RETURN
-	_ASM_EXTABLE(irq_return_iret, bad_iret)
-
-#ifdef CONFIG_PARAVIRT
-ENTRY(native_iret)
+native_irq_return_iret:
 	iretq
-	_ASM_EXTABLE(native_iret, bad_iret)
-#endif
+	_ASM_EXTABLE(native_irq_return_iret, bad_iret)
 
 #ifdef CONFIG_X86_ESPFIX64
-irq_return_ldt:
+native_irq_return_ldt:
 	pushq_cfi %rax
 	pushq_cfi %rdi
 	SWAPGS
@@ -872,7 +869,7 @@ irq_return_ldt:
 	SWAPGS
 	movq %rax,%rsp
 	popq_cfi %rax
-	jmp irq_return_iret
+	jmp native_irq_return_iret
 #endif
 
 	.section .fixup,"ax"
@@ -956,13 +953,8 @@ __do_double_fault:
 	cmpl $__KERNEL_CS,CS(%rdi)
 	jne do_double_fault
 	movq RIP(%rdi),%rax
-	cmpq $irq_return_iret,%rax
-#ifdef CONFIG_PARAVIRT
-	je 1f
-	cmpq $native_iret,%rax
-#endif
+	cmpq $native_irq_return_iret,%rax
 	jne do_double_fault		/* This shouldn't happen... */
-1:
 	movq PER_CPU_VAR(kernel_stack),%rax
 	subq $(6*8-KERNEL_STACK_OFFSET),%rax	/* Reset to original stack */
 	movq %rax,RSP(%rdi)
@@ -1428,7 +1420,7 @@ error_sti:
  */
 error_kernelspace:
 	incl %ebx
-	leaq irq_return_iret(%rip),%rcx
+	leaq native_irq_return_iret(%rip),%rcx
 	cmpq %rcx,RIP+8(%rsp)
 	je error_swapgs
 	movl %ecx,%eax	/* zero extend */
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
index 3f08f34..a1da673 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -6,7 +6,6 @@ DEF_NATIVE(pv_irq_ops, irq_disable, "cli");
 DEF_NATIVE(pv_irq_ops, irq_enable, "sti");
 DEF_NATIVE(pv_irq_ops, restore_fl, "pushq %rdi; popfq");
 DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax");
-DEF_NATIVE(pv_cpu_ops, iret, "iretq");
 DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax");
 DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax");
 DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3");
@@ -50,7 +49,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
 		PATCH_SITE(pv_irq_ops, save_fl);
 		PATCH_SITE(pv_irq_ops, irq_enable);
 		PATCH_SITE(pv_irq_ops, irq_disable);
-		PATCH_SITE(pv_cpu_ops, iret);
 		PATCH_SITE(pv_cpu_ops, irq_enable_sysexit);
 		PATCH_SITE(pv_cpu_ops, usergs_sysret32);
 		PATCH_SITE(pv_cpu_ops, usergs_sysret64);

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [tip:x86/urgent] x86_64/entry/xen: Do not invoke espfix64 on Xen
  2014-07-28 22:33   ` [tip:x86/urgent] x86_64/entry/xen: " tip-bot for Andy Lutomirski
@ 2014-07-29  3:39     ` Andy Lutomirski
  2014-07-29  3:47       ` H. Peter Anvin
  0 siblings, 1 reply; 6+ messages in thread
From: Andy Lutomirski @ 2014-07-29  3:39 UTC (permalink / raw)
  To: Ingo Molnar, H. Peter Anvin, Andy Lutomirski, linux-kernel,
	Thomas Gleixner, stable, H. Peter Anvin
  Cc: linux-tip-commits

On Mon, Jul 28, 2014 at 3:33 PM, tip-bot for Andy Lutomirski
<tipbot@zytor.com> wrote:
> Commit-ID:  7209a75d2009dbf7745e2fd354abf25c3deb3ca3
> Gitweb:     http://git.kernel.org/tip/7209a75d2009dbf7745e2fd354abf25c3deb3ca3
> Author:     Andy Lutomirski <luto@amacapital.net>
> AuthorDate: Wed, 23 Jul 2014 08:34:11 -0700
> Committer:  H. Peter Anvin <hpa@linux.intel.com>
> CommitDate: Mon, 28 Jul 2014 15:25:40 -0700
>
> x86_64/entry/xen: Do not invoke espfix64 on Xen
>
> This moves the espfix64 logic into native_iret.  To make this work,
> it gets rid of the native patch for INTERRUPT_RETURN:
> INTERRUPT_RETURN on native kernels is now 'jmp native_iret'.
>
> This changes the 16-bit SS behavior on Xen from OOPSing to leaking
> some bits of the Xen hypervisor's RSP (I think).
>
> [ hpa: this is a nonzero cost on native, but probably not enough to
>   measure. Xen needs to fix this in their own code, probably doing
>   something equivalent to espfix64. ]

Any plan to either fix or remove the buggy bsp-only initialization of
espfix64 on paravirt?  It's harmless now, but IMO it's still ugly.  Or
should this wait for 3.17?

--Andy

>
> Signed-off-by: Andy Lutomirski <luto@amacapital.net>
> Link: http://lkml.kernel.org/r/7b8f1d8ef6597cb16ae004a43c56980a7de3cf94.1406129132.git.luto@amacapital.net
> Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
> Cc: <stable@vger.kernel.org>
> ---
>  arch/x86/include/asm/irqflags.h     |  2 +-
>  arch/x86/kernel/entry_64.S          | 28 ++++++++++------------------
>  arch/x86/kernel/paravirt_patch_64.c |  2 --
>  3 files changed, 11 insertions(+), 21 deletions(-)
>
> diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
> index bba3cf8..0a8b519 100644
> --- a/arch/x86/include/asm/irqflags.h
> +++ b/arch/x86/include/asm/irqflags.h
> @@ -129,7 +129,7 @@ static inline notrace unsigned long arch_local_irq_save(void)
>
>  #define PARAVIRT_ADJUST_EXCEPTION_FRAME        /*  */
>
> -#define INTERRUPT_RETURN       iretq
> +#define INTERRUPT_RETURN       jmp native_iret
>  #define USERGS_SYSRET64                                \
>         swapgs;                                 \
>         sysretq;
> diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
> index b25ca96..c844f08 100644
> --- a/arch/x86/kernel/entry_64.S
> +++ b/arch/x86/kernel/entry_64.S
> @@ -830,27 +830,24 @@ restore_args:
>         RESTORE_ARGS 1,8,1
>
>  irq_return:
> +       INTERRUPT_RETURN
> +
> +ENTRY(native_iret)
>         /*
>          * Are we returning to a stack segment from the LDT?  Note: in
>          * 64-bit mode SS:RSP on the exception stack is always valid.
>          */
>  #ifdef CONFIG_X86_ESPFIX64
>         testb $4,(SS-RIP)(%rsp)
> -       jnz irq_return_ldt
> +       jnz native_irq_return_ldt
>  #endif
>
> -irq_return_iret:
> -       INTERRUPT_RETURN
> -       _ASM_EXTABLE(irq_return_iret, bad_iret)
> -
> -#ifdef CONFIG_PARAVIRT
> -ENTRY(native_iret)
> +native_irq_return_iret:
>         iretq
> -       _ASM_EXTABLE(native_iret, bad_iret)
> -#endif
> +       _ASM_EXTABLE(native_irq_return_iret, bad_iret)
>
>  #ifdef CONFIG_X86_ESPFIX64
> -irq_return_ldt:
> +native_irq_return_ldt:
>         pushq_cfi %rax
>         pushq_cfi %rdi
>         SWAPGS
> @@ -872,7 +869,7 @@ irq_return_ldt:
>         SWAPGS
>         movq %rax,%rsp
>         popq_cfi %rax
> -       jmp irq_return_iret
> +       jmp native_irq_return_iret
>  #endif
>
>         .section .fixup,"ax"
> @@ -956,13 +953,8 @@ __do_double_fault:
>         cmpl $__KERNEL_CS,CS(%rdi)
>         jne do_double_fault
>         movq RIP(%rdi),%rax
> -       cmpq $irq_return_iret,%rax
> -#ifdef CONFIG_PARAVIRT
> -       je 1f
> -       cmpq $native_iret,%rax
> -#endif
> +       cmpq $native_irq_return_iret,%rax
>         jne do_double_fault             /* This shouldn't happen... */
> -1:
>         movq PER_CPU_VAR(kernel_stack),%rax
>         subq $(6*8-KERNEL_STACK_OFFSET),%rax    /* Reset to original stack */
>         movq %rax,RSP(%rdi)
> @@ -1428,7 +1420,7 @@ error_sti:
>   */
>  error_kernelspace:
>         incl %ebx
> -       leaq irq_return_iret(%rip),%rcx
> +       leaq native_irq_return_iret(%rip),%rcx
>         cmpq %rcx,RIP+8(%rsp)
>         je error_swapgs
>         movl %ecx,%eax  /* zero extend */
> diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
> index 3f08f34..a1da673 100644
> --- a/arch/x86/kernel/paravirt_patch_64.c
> +++ b/arch/x86/kernel/paravirt_patch_64.c
> @@ -6,7 +6,6 @@ DEF_NATIVE(pv_irq_ops, irq_disable, "cli");
>  DEF_NATIVE(pv_irq_ops, irq_enable, "sti");
>  DEF_NATIVE(pv_irq_ops, restore_fl, "pushq %rdi; popfq");
>  DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax");
> -DEF_NATIVE(pv_cpu_ops, iret, "iretq");
>  DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax");
>  DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax");
>  DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3");
> @@ -50,7 +49,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
>                 PATCH_SITE(pv_irq_ops, save_fl);
>                 PATCH_SITE(pv_irq_ops, irq_enable);
>                 PATCH_SITE(pv_irq_ops, irq_disable);
> -               PATCH_SITE(pv_cpu_ops, iret);
>                 PATCH_SITE(pv_cpu_ops, irq_enable_sysexit);
>                 PATCH_SITE(pv_cpu_ops, usergs_sysret32);
>                 PATCH_SITE(pv_cpu_ops, usergs_sysret64);



-- 
Andy Lutomirski
AMA Capital Management, LLC

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [tip:x86/urgent] x86_64/entry/xen: Do not invoke espfix64 on Xen
  2014-07-29  3:39     ` Andy Lutomirski
@ 2014-07-29  3:47       ` H. Peter Anvin
  0 siblings, 0 replies; 6+ messages in thread
From: H. Peter Anvin @ 2014-07-29  3:47 UTC (permalink / raw)
  To: Andy Lutomirski, Ingo Molnar, linux-kernel, Thomas Gleixner,
	stable, H. Peter Anvin
  Cc: linux-tip-commits

Would like to  absolutely minimize changes at this point.

On July 28, 2014 8:39:43 PM PDT, Andy Lutomirski <luto@amacapital.net> wrote:
>On Mon, Jul 28, 2014 at 3:33 PM, tip-bot for Andy Lutomirski
><tipbot@zytor.com> wrote:
>> Commit-ID:  7209a75d2009dbf7745e2fd354abf25c3deb3ca3
>> Gitweb:    
>http://git.kernel.org/tip/7209a75d2009dbf7745e2fd354abf25c3deb3ca3
>> Author:     Andy Lutomirski <luto@amacapital.net>
>> AuthorDate: Wed, 23 Jul 2014 08:34:11 -0700
>> Committer:  H. Peter Anvin <hpa@linux.intel.com>
>> CommitDate: Mon, 28 Jul 2014 15:25:40 -0700
>>
>> x86_64/entry/xen: Do not invoke espfix64 on Xen
>>
>> This moves the espfix64 logic into native_iret.  To make this work,
>> it gets rid of the native patch for INTERRUPT_RETURN:
>> INTERRUPT_RETURN on native kernels is now 'jmp native_iret'.
>>
>> This changes the 16-bit SS behavior on Xen from OOPSing to leaking
>> some bits of the Xen hypervisor's RSP (I think).
>>
>> [ hpa: this is a nonzero cost on native, but probably not enough to
>>   measure. Xen needs to fix this in their own code, probably doing
>>   something equivalent to espfix64. ]
>
>Any plan to either fix or remove the buggy bsp-only initialization of
>espfix64 on paravirt?  It's harmless now, but IMO it's still ugly.  Or
>should this wait for 3.17?
>
>--Andy
>
>>
>> Signed-off-by: Andy Lutomirski <luto@amacapital.net>
>> Link:
>http://lkml.kernel.org/r/7b8f1d8ef6597cb16ae004a43c56980a7de3cf94.1406129132.git.luto@amacapital.net
>> Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
>> Cc: <stable@vger.kernel.org>
>> ---
>>  arch/x86/include/asm/irqflags.h     |  2 +-
>>  arch/x86/kernel/entry_64.S          | 28
>++++++++++------------------
>>  arch/x86/kernel/paravirt_patch_64.c |  2 --
>>  3 files changed, 11 insertions(+), 21 deletions(-)
>>
>> diff --git a/arch/x86/include/asm/irqflags.h
>b/arch/x86/include/asm/irqflags.h
>> index bba3cf8..0a8b519 100644
>> --- a/arch/x86/include/asm/irqflags.h
>> +++ b/arch/x86/include/asm/irqflags.h
>> @@ -129,7 +129,7 @@ static inline notrace unsigned long
>arch_local_irq_save(void)
>>
>>  #define PARAVIRT_ADJUST_EXCEPTION_FRAME        /*  */
>>
>> -#define INTERRUPT_RETURN       iretq
>> +#define INTERRUPT_RETURN       jmp native_iret
>>  #define USERGS_SYSRET64                                \
>>         swapgs;                                 \
>>         sysretq;
>> diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
>> index b25ca96..c844f08 100644
>> --- a/arch/x86/kernel/entry_64.S
>> +++ b/arch/x86/kernel/entry_64.S
>> @@ -830,27 +830,24 @@ restore_args:
>>         RESTORE_ARGS 1,8,1
>>
>>  irq_return:
>> +       INTERRUPT_RETURN
>> +
>> +ENTRY(native_iret)
>>         /*
>>          * Are we returning to a stack segment from the LDT?  Note:
>in
>>          * 64-bit mode SS:RSP on the exception stack is always valid.
>>          */
>>  #ifdef CONFIG_X86_ESPFIX64
>>         testb $4,(SS-RIP)(%rsp)
>> -       jnz irq_return_ldt
>> +       jnz native_irq_return_ldt
>>  #endif
>>
>> -irq_return_iret:
>> -       INTERRUPT_RETURN
>> -       _ASM_EXTABLE(irq_return_iret, bad_iret)
>> -
>> -#ifdef CONFIG_PARAVIRT
>> -ENTRY(native_iret)
>> +native_irq_return_iret:
>>         iretq
>> -       _ASM_EXTABLE(native_iret, bad_iret)
>> -#endif
>> +       _ASM_EXTABLE(native_irq_return_iret, bad_iret)
>>
>>  #ifdef CONFIG_X86_ESPFIX64
>> -irq_return_ldt:
>> +native_irq_return_ldt:
>>         pushq_cfi %rax
>>         pushq_cfi %rdi
>>         SWAPGS
>> @@ -872,7 +869,7 @@ irq_return_ldt:
>>         SWAPGS
>>         movq %rax,%rsp
>>         popq_cfi %rax
>> -       jmp irq_return_iret
>> +       jmp native_irq_return_iret
>>  #endif
>>
>>         .section .fixup,"ax"
>> @@ -956,13 +953,8 @@ __do_double_fault:
>>         cmpl $__KERNEL_CS,CS(%rdi)
>>         jne do_double_fault
>>         movq RIP(%rdi),%rax
>> -       cmpq $irq_return_iret,%rax
>> -#ifdef CONFIG_PARAVIRT
>> -       je 1f
>> -       cmpq $native_iret,%rax
>> -#endif
>> +       cmpq $native_irq_return_iret,%rax
>>         jne do_double_fault             /* This shouldn't happen...
>*/
>> -1:
>>         movq PER_CPU_VAR(kernel_stack),%rax
>>         subq $(6*8-KERNEL_STACK_OFFSET),%rax    /* Reset to original
>stack */
>>         movq %rax,RSP(%rdi)
>> @@ -1428,7 +1420,7 @@ error_sti:
>>   */
>>  error_kernelspace:
>>         incl %ebx
>> -       leaq irq_return_iret(%rip),%rcx
>> +       leaq native_irq_return_iret(%rip),%rcx
>>         cmpq %rcx,RIP+8(%rsp)
>>         je error_swapgs
>>         movl %ecx,%eax  /* zero extend */
>> diff --git a/arch/x86/kernel/paravirt_patch_64.c
>b/arch/x86/kernel/paravirt_patch_64.c
>> index 3f08f34..a1da673 100644
>> --- a/arch/x86/kernel/paravirt_patch_64.c
>> +++ b/arch/x86/kernel/paravirt_patch_64.c
>> @@ -6,7 +6,6 @@ DEF_NATIVE(pv_irq_ops, irq_disable, "cli");
>>  DEF_NATIVE(pv_irq_ops, irq_enable, "sti");
>>  DEF_NATIVE(pv_irq_ops, restore_fl, "pushq %rdi; popfq");
>>  DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax");
>> -DEF_NATIVE(pv_cpu_ops, iret, "iretq");
>>  DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax");
>>  DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax");
>>  DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3");
>> @@ -50,7 +49,6 @@ unsigned native_patch(u8 type, u16 clobbers, void
>*ibuf,
>>                 PATCH_SITE(pv_irq_ops, save_fl);
>>                 PATCH_SITE(pv_irq_ops, irq_enable);
>>                 PATCH_SITE(pv_irq_ops, irq_disable);
>> -               PATCH_SITE(pv_cpu_ops, iret);
>>                 PATCH_SITE(pv_cpu_ops, irq_enable_sysexit);
>>                 PATCH_SITE(pv_cpu_ops, usergs_sysret32);
>>                 PATCH_SITE(pv_cpu_ops, usergs_sysret64);

-- 
Sent from my mobile phone.  Please pardon brevity and lack of formatting.

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, back to index

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-07-23 15:34 [PATCH 0/2] x86_64: LRET to userspace Andy Lutomirski
2014-07-23 15:34 ` [PATCH 1/2] x86_64,entry,xen: Do not invoke espfix64 on Xen Andy Lutomirski
2014-07-28 22:33   ` [tip:x86/urgent] x86_64/entry/xen: " tip-bot for Andy Lutomirski
2014-07-29  3:39     ` Andy Lutomirski
2014-07-29  3:47       ` H. Peter Anvin
2014-07-23 15:34 ` [PATCH 2/2] x86_64,entry: Use lret to return to userspace when possible Andy Lutomirski

LKML Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/lkml/0 lkml/git/0.git
	git clone --mirror https://lore.kernel.org/lkml/1 lkml/git/1.git
	git clone --mirror https://lore.kernel.org/lkml/2 lkml/git/2.git
	git clone --mirror https://lore.kernel.org/lkml/3 lkml/git/3.git
	git clone --mirror https://lore.kernel.org/lkml/4 lkml/git/4.git
	git clone --mirror https://lore.kernel.org/lkml/5 lkml/git/5.git
	git clone --mirror https://lore.kernel.org/lkml/6 lkml/git/6.git
	git clone --mirror https://lore.kernel.org/lkml/7 lkml/git/7.git
	git clone --mirror https://lore.kernel.org/lkml/8 lkml/git/8.git
	git clone --mirror https://lore.kernel.org/lkml/9 lkml/git/9.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 lkml lkml/ https://lore.kernel.org/lkml \
		linux-kernel@vger.kernel.org
	public-inbox-index lkml

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-kernel


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git