All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/3] FSGSBASE fix, test, and a semi-related cleanup
@ 2019-07-02  3:43 Andy Lutomirski
  2019-07-02  3:43 ` [PATCH 1/3] selftests/x86: Test SYSCALL and SYSENTER manually with TF set Andy Lutomirski
                   ` (3 more replies)
  0 siblings, 4 replies; 8+ messages in thread
From: Andy Lutomirski @ 2019-07-02  3:43 UTC (permalink / raw)
  To: LKML
  Cc: x86, Bae, Chang Seok, Borislav Petkov, Peter Zijlstra, Andy Lutomirski

In -tip, if FSGSBASE and PTI are on, the kernel crashes if SYSENTER
happens with TF set.  It also crashes under if a non-NMI paranoid
entry happens for any other reason from kernel mode with user GSBASE
and user CR3, e.g. due to MOV SS shenanigans.

This series fixes the bug.  It also adds another test to make sure
we exercise SYSENTER with TF set regardless of what vendor's CPU
we're on, although the test isn't needed to detect the bug: the
single_step_syscall_32 and mov_ss_trap_* tests also trigger it.  And
it compiles ignore_sysret out on IA32_EMULATION kernels -- I wasted
a couple minutes while debugging this wondering whether I was
accidentally triggering ignore_sysret.

Andy Lutomirski (3):
  selftests/x86: Test SYSCALL and SYSENTER manually with TF set
  x86/entry/64: Don't compile ignore_sysret if 32-bit emulation is
    enabled
  x86/entry/64: Fix and clean up paranoid_exit

 arch/x86/entry/entry_64.S                     |  39 +++---
 tools/testing/selftests/x86/Makefile          |   5 +-
 .../testing/selftests/x86/syscall_arg_fault.c | 112 +++++++++++++++++-
 3 files changed, 133 insertions(+), 23 deletions(-)

-- 
2.21.0


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 1/3] selftests/x86: Test SYSCALL and SYSENTER manually with TF set
  2019-07-02  3:43 [PATCH 0/3] FSGSBASE fix, test, and a semi-related cleanup Andy Lutomirski
@ 2019-07-02  3:43 ` Andy Lutomirski
  2019-07-02  6:49   ` [tip:x86/cpu] " tip-bot for Andy Lutomirski
  2019-07-02  3:43 ` [PATCH 2/3] x86/entry/64: Don't compile ignore_sysret if 32-bit emulation is enabled Andy Lutomirski
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 8+ messages in thread
From: Andy Lutomirski @ 2019-07-02  3:43 UTC (permalink / raw)
  To: LKML
  Cc: x86, Bae, Chang Seok, Borislav Petkov, Peter Zijlstra,
	Andy Lutomirski, Vegard Nossum, Paolo Bonzini, kvm

Make sure that we exercise both variants of the nasty
TF-in-compat-syscall regardless of what vendor's CPU is running the
tests.

Also change the intentional signal after SYSCALL to use ud2, which
is a lot more comprehensible.

This crashes the kernel due to an FSGSBASE bug right now.

This test *also* detects a bug in KVM when run on an Intel host.
KVM people, feel free to use it to help debug.  There's a bunch of
code in this test to warn instead of infinite looping when the bug
gets triggered.

Reported-by: Vegard Nossum <vegard.nossum@oracle.com>
Cc: "Bae, Chang Seok" <chang.seok.bae@intel.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: kvm@vger.kernel.org
Signed-off-by: Andy Lutomirski <luto@kernel.org>
---
 tools/testing/selftests/x86/Makefile          |   5 +-
 .../testing/selftests/x86/syscall_arg_fault.c | 112 +++++++++++++++++-
 2 files changed, 110 insertions(+), 7 deletions(-)

diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index 186520198de7..fa07d526fe39 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -12,8 +12,9 @@ CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh $(CC) trivial_program.c -no-pie)
 
 TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \
 			check_initial_reg_state sigreturn iopl mpx-mini-test ioperm \
-			protection_keys test_vdso test_vsyscall mov_ss_trap
-TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
+			protection_keys test_vdso test_vsyscall mov_ss_trap \
+			syscall_arg_fault
+TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \
 			test_FCMOV test_FCOMI test_FISTTP \
 			vdso_restorer
 TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip
diff --git a/tools/testing/selftests/x86/syscall_arg_fault.c b/tools/testing/selftests/x86/syscall_arg_fault.c
index 4e25d38c8bbd..bc0ecc2e862e 100644
--- a/tools/testing/selftests/x86/syscall_arg_fault.c
+++ b/tools/testing/selftests/x86/syscall_arg_fault.c
@@ -15,9 +15,30 @@
 #include <setjmp.h>
 #include <errno.h>
 
+#ifdef __x86_64__
+# define WIDTH "q"
+#else
+# define WIDTH "l"
+#endif
+
 /* Our sigaltstack scratch space. */
 static unsigned char altstack_data[SIGSTKSZ];
 
+static unsigned long get_eflags(void)
+{
+	unsigned long eflags;
+	asm volatile ("pushf" WIDTH "\n\tpop" WIDTH " %0" : "=rm" (eflags));
+	return eflags;
+}
+
+static void set_eflags(unsigned long eflags)
+{
+	asm volatile ("push" WIDTH " %0\n\tpopf" WIDTH
+		      : : "rm" (eflags) : "flags");
+}
+
+#define X86_EFLAGS_TF (1UL << 8)
+
 static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
 		       int flags)
 {
@@ -35,13 +56,22 @@ static sigjmp_buf jmpbuf;
 
 static volatile sig_atomic_t n_errs;
 
+#ifdef __x86_64__
+#define REG_AX REG_RAX
+#define REG_IP REG_RIP
+#else
+#define REG_AX REG_EAX
+#define REG_IP REG_EIP
+#endif
+
 static void sigsegv_or_sigbus(int sig, siginfo_t *info, void *ctx_void)
 {
 	ucontext_t *ctx = (ucontext_t*)ctx_void;
+	long ax = (long)ctx->uc_mcontext.gregs[REG_AX];
 
-	if (ctx->uc_mcontext.gregs[REG_EAX] != -EFAULT) {
-		printf("[FAIL]\tAX had the wrong value: 0x%x\n",
-		       ctx->uc_mcontext.gregs[REG_EAX]);
+	if (ax != -EFAULT && ax != -ENOSYS) {
+		printf("[FAIL]\tAX had the wrong value: 0x%lx\n",
+		       (unsigned long)ax);
 		n_errs++;
 	} else {
 		printf("[OK]\tSeems okay\n");
@@ -50,9 +80,42 @@ static void sigsegv_or_sigbus(int sig, siginfo_t *info, void *ctx_void)
 	siglongjmp(jmpbuf, 1);
 }
 
+static volatile sig_atomic_t sigtrap_consecutive_syscalls;
+
+static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
+{
+	/*
+	 * KVM has some bugs that can cause us to stop making progress.
+	 * detect them and complain, but don't infinite loop or fail the
+	 * test.
+	 */
+
+	ucontext_t *ctx = (ucontext_t*)ctx_void;
+	unsigned short *ip = (unsigned short *)ctx->uc_mcontext.gregs[REG_IP];
+
+	if (*ip == 0x340f || *ip == 0x050f) {
+		/* The trap was on SYSCALL or SYSENTER */
+		sigtrap_consecutive_syscalls++;
+		if (sigtrap_consecutive_syscalls > 3) {
+			printf("[WARN]\tGot stuck single-stepping -- you probably have a KVM bug\n");
+			siglongjmp(jmpbuf, 1);
+		}
+	} else {
+		sigtrap_consecutive_syscalls = 0;
+	}
+}
+
 static void sigill(int sig, siginfo_t *info, void *ctx_void)
 {
-	printf("[SKIP]\tIllegal instruction\n");
+	ucontext_t *ctx = (ucontext_t*)ctx_void;
+	unsigned short *ip = (unsigned short *)ctx->uc_mcontext.gregs[REG_IP];
+
+	if (*ip == 0x0b0f) {
+		/* one of the ud2 instructions faulted */
+		printf("[OK]\tSYSCALL returned normally\n");
+	} else {
+		printf("[SKIP]\tIllegal instruction\n");
+	}
 	siglongjmp(jmpbuf, 1);
 }
 
@@ -120,9 +183,48 @@ int main()
 			"movl $-1, %%ebp\n\t"
 			"movl $-1, %%esp\n\t"
 			"syscall\n\t"
-			"pushl $0"	/* make sure we segfault cleanly */
+			"ud2"		/* make sure we recover cleanly */
+			: : : "memory", "flags");
+	}
+
+	printf("[RUN]\tSYSENTER with TF and invalid state\n");
+	sethandler(SIGTRAP, sigtrap, SA_ONSTACK);
+
+	if (sigsetjmp(jmpbuf, 1) == 0) {
+		sigtrap_consecutive_syscalls = 0;
+		set_eflags(get_eflags() | X86_EFLAGS_TF);
+		asm volatile (
+			"movl $-1, %%eax\n\t"
+			"movl $-1, %%ebx\n\t"
+			"movl $-1, %%ecx\n\t"
+			"movl $-1, %%edx\n\t"
+			"movl $-1, %%esi\n\t"
+			"movl $-1, %%edi\n\t"
+			"movl $-1, %%ebp\n\t"
+			"movl $-1, %%esp\n\t"
+			"sysenter"
+			: : : "memory", "flags");
+	}
+	set_eflags(get_eflags() & ~X86_EFLAGS_TF);
+
+	printf("[RUN]\tSYSCALL with TF and invalid state\n");
+	if (sigsetjmp(jmpbuf, 1) == 0) {
+		sigtrap_consecutive_syscalls = 0;
+		set_eflags(get_eflags() | X86_EFLAGS_TF);
+		asm volatile (
+			"movl $-1, %%eax\n\t"
+			"movl $-1, %%ebx\n\t"
+			"movl $-1, %%ecx\n\t"
+			"movl $-1, %%edx\n\t"
+			"movl $-1, %%esi\n\t"
+			"movl $-1, %%edi\n\t"
+			"movl $-1, %%ebp\n\t"
+			"movl $-1, %%esp\n\t"
+			"syscall\n\t"
+			"ud2"		/* make sure we recover cleanly */
 			: : : "memory", "flags");
 	}
+	set_eflags(get_eflags() & ~X86_EFLAGS_TF);
 
 	return 0;
 }
-- 
2.21.0


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 2/3] x86/entry/64: Don't compile ignore_sysret if 32-bit emulation is enabled
  2019-07-02  3:43 [PATCH 0/3] FSGSBASE fix, test, and a semi-related cleanup Andy Lutomirski
  2019-07-02  3:43 ` [PATCH 1/3] selftests/x86: Test SYSCALL and SYSENTER manually with TF set Andy Lutomirski
@ 2019-07-02  3:43 ` Andy Lutomirski
  2019-07-02  6:49   ` [tip:x86/cpu] " tip-bot for Andy Lutomirski
  2019-07-02  3:43 ` [PATCH 3/3] x86/entry/64: Fix and clean up paranoid_exit Andy Lutomirski
  2019-07-02  3:57 ` [PATCH 0/3] FSGSBASE fix, test, and a semi-related cleanup Andy Lutomirski
  3 siblings, 1 reply; 8+ messages in thread
From: Andy Lutomirski @ 2019-07-02  3:43 UTC (permalink / raw)
  To: LKML
  Cc: x86, Bae, Chang Seok, Borislav Petkov, Peter Zijlstra, Andy Lutomirski

It's only used if !CONFIG_IA32_EMULATION, so disable it in normal
configs.  This will save a few bytes of text and reduce confusion.

Cc: "Bae, Chang Seok" <chang.seok.bae@intel.com>
Signed-off-by: Andy Lutomirski <luto@kernel.org>
---
 arch/x86/entry/entry_64.S | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 7f9f5119d6b1..54b1b0468b2b 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1743,11 +1743,17 @@ nmi_restore:
 	iretq
 END(nmi)
 
+#ifndef CONFIG_IA32_EMULATION
+/*
+ * This handles SYSCALL from 32-bit code.  There is no way to program
+ * MSRs to fully disable 32-bit SYSCALL.
+ */
 ENTRY(ignore_sysret)
 	UNWIND_HINT_EMPTY
 	mov	$-ENOSYS, %eax
 	sysret
 END(ignore_sysret)
+#endif
 
 ENTRY(rewind_stack_do_exit)
 	UNWIND_HINT_FUNC
-- 
2.21.0


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 3/3] x86/entry/64: Fix and clean up paranoid_exit
  2019-07-02  3:43 [PATCH 0/3] FSGSBASE fix, test, and a semi-related cleanup Andy Lutomirski
  2019-07-02  3:43 ` [PATCH 1/3] selftests/x86: Test SYSCALL and SYSENTER manually with TF set Andy Lutomirski
  2019-07-02  3:43 ` [PATCH 2/3] x86/entry/64: Don't compile ignore_sysret if 32-bit emulation is enabled Andy Lutomirski
@ 2019-07-02  3:43 ` Andy Lutomirski
  2019-07-02  6:50   ` [tip:x86/cpu] " tip-bot for Andy Lutomirski
  2019-07-02  3:57 ` [PATCH 0/3] FSGSBASE fix, test, and a semi-related cleanup Andy Lutomirski
  3 siblings, 1 reply; 8+ messages in thread
From: Andy Lutomirski @ 2019-07-02  3:43 UTC (permalink / raw)
  To: LKML
  Cc: x86, Bae, Chang Seok, Borislav Petkov, Peter Zijlstra,
	Andy Lutomirski, Vegard Nossum, Thomas Gleixner, H . Peter Anvin,
	Andi Kleen, Ravi Shankar

paranoid_exit needs to restore CR3 before GSBASE.  Doing it in the
opposite order crashes if the exception came from a context with
user GSBASE and user CR3 -- RESTORE_CR3 cannot resture user CR3 if
run with user GSBASE.  This results in infinitely recursing
exceptions if user code does SYSENTER with TF set if both FSGSBASE
and PTI are enabled.

The old code worked if user code just set TF without SYSENTER
because #DB from user mode is special cased in idtentry and
paranoid_exit doesn't run.

Fix it by cleaning up the spaghetti code.  All that paranoid_exit
needs to do is to disable IRQs, handle IRQ tracing, then restore
CR3, and restore GSBASE.  Simply do those actions in that order.

Fixes: 708078f65721 ("x86/entry/64: Handle FSGSBASE enabled paranoid entry/exit")
Reported-by: Vegard Nossum <vegard.nossum@oracle.com>
Signed-off-by: Chang S. Bae <chang.seok.bae@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ravi Shankar <ravi.v.shankar@intel.com>
Cc: "Bae, Chang Seok" <chang.seok.bae@intel.com>
Signed-off-by: Andy Lutomirski <luto@kernel.org>
---
 arch/x86/entry/entry_64.S | 33 +++++++++++++++++----------------
 1 file changed, 17 insertions(+), 16 deletions(-)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 54b1b0468b2b..670306f588bf 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1256,31 +1256,32 @@ END(paranoid_entry)
 ENTRY(paranoid_exit)
 	UNWIND_HINT_REGS
 	DISABLE_INTERRUPTS(CLBR_ANY)
-	TRACE_IRQS_OFF_DEBUG
 
-	/* Handle GS depending on FSGSBASE availability */
-	ALTERNATIVE "jmp .Lparanoid_exit_checkgs", "nop",X86_FEATURE_FSGSBASE
+	/*
+	 * The order of operations is important.  IRQ tracing requires
+	 * kernel GSBASE and CR3.  RESTORE_CR3 requires kernel GS base.
+	 *
+	 * NB to anyone to tries to optimize this code: this code does
+	 * not execute at all for exceptions coming from user mode.  Those
+	 * exceptions go through error_exit instead.
+	 */
+	TRACE_IRQS_IRETQ_DEBUG
+	RESTORE_CR3	scratch_reg=%rax save_reg=%r14
+
+	/* Handle the three GSBASE cases. */
+	ALTERNATIVE "jmp .Lparanoid_exit_checkgs", "", X86_FEATURE_FSGSBASE
 
 	/* With FSGSBASE enabled, unconditionally restore GSBASE */
 	wrgsbase	%rbx
-	jmp	.Lparanoid_exit_no_swapgs;
+	jmp	restore_regs_and_return_to_kernel
 
 .Lparanoid_exit_checkgs:
 	/* On non-FSGSBASE systems, conditionally do SWAPGS */
 	testl	%ebx, %ebx
-	jnz	.Lparanoid_exit_no_swapgs
-	TRACE_IRQS_IRETQ
-	/* Always restore stashed CR3 value (see paranoid_entry) */
-	RESTORE_CR3	scratch_reg=%rbx save_reg=%r14
-	SWAPGS_UNSAFE_STACK
-	jmp	.Lparanoid_exit_restore
-
-.Lparanoid_exit_no_swapgs:
-	TRACE_IRQS_IRETQ_DEBUG
-	/* Always restore stashed CR3 value (see paranoid_entry) */
-	RESTORE_CR3	scratch_reg=%rbx save_reg=%r14
+	jnz	restore_regs_and_return_to_kernel
 
-.Lparanoid_exit_restore:
+	/* We are returning to a context with user GSBASE. */
+	SWAPGS_UNSAFE_STACK
 	jmp	restore_regs_and_return_to_kernel
 END(paranoid_exit)
 
-- 
2.21.0


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 0/3] FSGSBASE fix, test, and a semi-related cleanup
  2019-07-02  3:43 [PATCH 0/3] FSGSBASE fix, test, and a semi-related cleanup Andy Lutomirski
                   ` (2 preceding siblings ...)
  2019-07-02  3:43 ` [PATCH 3/3] x86/entry/64: Fix and clean up paranoid_exit Andy Lutomirski
@ 2019-07-02  3:57 ` Andy Lutomirski
  3 siblings, 0 replies; 8+ messages in thread
From: Andy Lutomirski @ 2019-07-02  3:57 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: LKML, X86 ML, Bae, Chang Seok, Borislav Petkov, Peter Zijlstra

On Mon, Jul 1, 2019 at 8:43 PM Andy Lutomirski <luto@kernel.org> wrote:
>
> In -tip, if FSGSBASE and PTI are on, the kernel crashes if SYSENTER
> happens with TF set.  It also crashes under if a non-NMI paranoid
> entry happens for any other reason from kernel mode with user GSBASE
> and user CR3, e.g. due to MOV SS shenanigans.
>
> This series fixes the bug.  It also adds another test to make sure
> we exercise SYSENTER with TF set regardless of what vendor's CPU
> we're on, although the test isn't needed to detect the bug: the
> single_step_syscall_32 and mov_ss_trap_* tests also trigger it.  And
> it compiles ignore_sysret out on IA32_EMULATION kernels -- I wasted
> a couple minutes while debugging this wondering whether I was
> accidentally triggering ignore_sysret.

I forgot to mention: even with this applied, the x86/cpu tree is not
ready for prime time.  The fsgsbase test case fails on released
kernels and crashes on x86/cpu.  I haven't gotten to the bottom of it
yet.  The test code looks a bit dubious, but that doesn't necessarily
mean the kernel is okay.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [tip:x86/cpu] selftests/x86: Test SYSCALL and SYSENTER manually with TF set
  2019-07-02  3:43 ` [PATCH 1/3] selftests/x86: Test SYSCALL and SYSENTER manually with TF set Andy Lutomirski
@ 2019-07-02  6:49   ` tip-bot for Andy Lutomirski
  0 siblings, 0 replies; 8+ messages in thread
From: tip-bot for Andy Lutomirski @ 2019-07-02  6:49 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: bp, peterz, pbonzini, linux-kernel, tglx, chang.seok.bae, hpa,
	mingo, vegard.nossum, luto

Commit-ID:  9402eaf4c11f0b892eda7b2bcb4654ab34ce34f9
Gitweb:     https://git.kernel.org/tip/9402eaf4c11f0b892eda7b2bcb4654ab34ce34f9
Author:     Andy Lutomirski <luto@kernel.org>
AuthorDate: Mon, 1 Jul 2019 20:43:19 -0700
Committer:  Thomas Gleixner <tglx@linutronix.de>
CommitDate: Tue, 2 Jul 2019 08:45:20 +0200

selftests/x86: Test SYSCALL and SYSENTER manually with TF set

Make sure that both variants of the nasty TF-in-compat-syscall are
exercised regardless of what vendor's CPU is running the tests.

Also change the intentional signal after SYSCALL to use ud2, which
is a lot more comprehensible.

This crashes the kernel due to an FSGSBASE bug right now.

This test *also* detects a bug in KVM when run on an Intel host.  KVM
people, feel free to use it to help debug.  There's a bunch of code in this
test to warn instead of going into an infinite looping when the bug gets
triggered.

Reported-by: Vegard Nossum <vegard.nossum@oracle.com>
Signed-off-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc:  "BaeChang Seok" <chang.seok.bae@intel.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: kvm@vger.kernel.org
Cc: "Bae, Chang Seok" <chang.seok.bae@intel.com>
Link: https://lkml.kernel.org/r/5f5de10441ab2e3005538b4c33be9b1965d1bb63.1562035429.git.luto@kernel.org

---
 tools/testing/selftests/x86/Makefile            |   5 +-
 tools/testing/selftests/x86/syscall_arg_fault.c | 112 ++++++++++++++++++++++--
 2 files changed, 110 insertions(+), 7 deletions(-)

diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index 186520198de7..fa07d526fe39 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -12,8 +12,9 @@ CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh $(CC) trivial_program.c -no-pie)
 
 TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \
 			check_initial_reg_state sigreturn iopl mpx-mini-test ioperm \
-			protection_keys test_vdso test_vsyscall mov_ss_trap
-TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
+			protection_keys test_vdso test_vsyscall mov_ss_trap \
+			syscall_arg_fault
+TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \
 			test_FCMOV test_FCOMI test_FISTTP \
 			vdso_restorer
 TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip
diff --git a/tools/testing/selftests/x86/syscall_arg_fault.c b/tools/testing/selftests/x86/syscall_arg_fault.c
index 4e25d38c8bbd..bc0ecc2e862e 100644
--- a/tools/testing/selftests/x86/syscall_arg_fault.c
+++ b/tools/testing/selftests/x86/syscall_arg_fault.c
@@ -15,9 +15,30 @@
 #include <setjmp.h>
 #include <errno.h>
 
+#ifdef __x86_64__
+# define WIDTH "q"
+#else
+# define WIDTH "l"
+#endif
+
 /* Our sigaltstack scratch space. */
 static unsigned char altstack_data[SIGSTKSZ];
 
+static unsigned long get_eflags(void)
+{
+	unsigned long eflags;
+	asm volatile ("pushf" WIDTH "\n\tpop" WIDTH " %0" : "=rm" (eflags));
+	return eflags;
+}
+
+static void set_eflags(unsigned long eflags)
+{
+	asm volatile ("push" WIDTH " %0\n\tpopf" WIDTH
+		      : : "rm" (eflags) : "flags");
+}
+
+#define X86_EFLAGS_TF (1UL << 8)
+
 static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
 		       int flags)
 {
@@ -35,13 +56,22 @@ static sigjmp_buf jmpbuf;
 
 static volatile sig_atomic_t n_errs;
 
+#ifdef __x86_64__
+#define REG_AX REG_RAX
+#define REG_IP REG_RIP
+#else
+#define REG_AX REG_EAX
+#define REG_IP REG_EIP
+#endif
+
 static void sigsegv_or_sigbus(int sig, siginfo_t *info, void *ctx_void)
 {
 	ucontext_t *ctx = (ucontext_t*)ctx_void;
+	long ax = (long)ctx->uc_mcontext.gregs[REG_AX];
 
-	if (ctx->uc_mcontext.gregs[REG_EAX] != -EFAULT) {
-		printf("[FAIL]\tAX had the wrong value: 0x%x\n",
-		       ctx->uc_mcontext.gregs[REG_EAX]);
+	if (ax != -EFAULT && ax != -ENOSYS) {
+		printf("[FAIL]\tAX had the wrong value: 0x%lx\n",
+		       (unsigned long)ax);
 		n_errs++;
 	} else {
 		printf("[OK]\tSeems okay\n");
@@ -50,9 +80,42 @@ static void sigsegv_or_sigbus(int sig, siginfo_t *info, void *ctx_void)
 	siglongjmp(jmpbuf, 1);
 }
 
+static volatile sig_atomic_t sigtrap_consecutive_syscalls;
+
+static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
+{
+	/*
+	 * KVM has some bugs that can cause us to stop making progress.
+	 * detect them and complain, but don't infinite loop or fail the
+	 * test.
+	 */
+
+	ucontext_t *ctx = (ucontext_t*)ctx_void;
+	unsigned short *ip = (unsigned short *)ctx->uc_mcontext.gregs[REG_IP];
+
+	if (*ip == 0x340f || *ip == 0x050f) {
+		/* The trap was on SYSCALL or SYSENTER */
+		sigtrap_consecutive_syscalls++;
+		if (sigtrap_consecutive_syscalls > 3) {
+			printf("[WARN]\tGot stuck single-stepping -- you probably have a KVM bug\n");
+			siglongjmp(jmpbuf, 1);
+		}
+	} else {
+		sigtrap_consecutive_syscalls = 0;
+	}
+}
+
 static void sigill(int sig, siginfo_t *info, void *ctx_void)
 {
-	printf("[SKIP]\tIllegal instruction\n");
+	ucontext_t *ctx = (ucontext_t*)ctx_void;
+	unsigned short *ip = (unsigned short *)ctx->uc_mcontext.gregs[REG_IP];
+
+	if (*ip == 0x0b0f) {
+		/* one of the ud2 instructions faulted */
+		printf("[OK]\tSYSCALL returned normally\n");
+	} else {
+		printf("[SKIP]\tIllegal instruction\n");
+	}
 	siglongjmp(jmpbuf, 1);
 }
 
@@ -120,9 +183,48 @@ int main()
 			"movl $-1, %%ebp\n\t"
 			"movl $-1, %%esp\n\t"
 			"syscall\n\t"
-			"pushl $0"	/* make sure we segfault cleanly */
+			"ud2"		/* make sure we recover cleanly */
+			: : : "memory", "flags");
+	}
+
+	printf("[RUN]\tSYSENTER with TF and invalid state\n");
+	sethandler(SIGTRAP, sigtrap, SA_ONSTACK);
+
+	if (sigsetjmp(jmpbuf, 1) == 0) {
+		sigtrap_consecutive_syscalls = 0;
+		set_eflags(get_eflags() | X86_EFLAGS_TF);
+		asm volatile (
+			"movl $-1, %%eax\n\t"
+			"movl $-1, %%ebx\n\t"
+			"movl $-1, %%ecx\n\t"
+			"movl $-1, %%edx\n\t"
+			"movl $-1, %%esi\n\t"
+			"movl $-1, %%edi\n\t"
+			"movl $-1, %%ebp\n\t"
+			"movl $-1, %%esp\n\t"
+			"sysenter"
+			: : : "memory", "flags");
+	}
+	set_eflags(get_eflags() & ~X86_EFLAGS_TF);
+
+	printf("[RUN]\tSYSCALL with TF and invalid state\n");
+	if (sigsetjmp(jmpbuf, 1) == 0) {
+		sigtrap_consecutive_syscalls = 0;
+		set_eflags(get_eflags() | X86_EFLAGS_TF);
+		asm volatile (
+			"movl $-1, %%eax\n\t"
+			"movl $-1, %%ebx\n\t"
+			"movl $-1, %%ecx\n\t"
+			"movl $-1, %%edx\n\t"
+			"movl $-1, %%esi\n\t"
+			"movl $-1, %%edi\n\t"
+			"movl $-1, %%ebp\n\t"
+			"movl $-1, %%esp\n\t"
+			"syscall\n\t"
+			"ud2"		/* make sure we recover cleanly */
 			: : : "memory", "flags");
 	}
+	set_eflags(get_eflags() & ~X86_EFLAGS_TF);
 
 	return 0;
 }

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [tip:x86/cpu] x86/entry/64: Don't compile ignore_sysret if 32-bit emulation is enabled
  2019-07-02  3:43 ` [PATCH 2/3] x86/entry/64: Don't compile ignore_sysret if 32-bit emulation is enabled Andy Lutomirski
@ 2019-07-02  6:49   ` tip-bot for Andy Lutomirski
  0 siblings, 0 replies; 8+ messages in thread
From: tip-bot for Andy Lutomirski @ 2019-07-02  6:49 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: peterz, mingo, bp, linux-kernel, luto, tglx, hpa, chang.seok.bae

Commit-ID:  dffb3f9db6b593f3ed6ab4c8d8f10e0aa6aa7a88
Gitweb:     https://git.kernel.org/tip/dffb3f9db6b593f3ed6ab4c8d8f10e0aa6aa7a88
Author:     Andy Lutomirski <luto@kernel.org>
AuthorDate: Mon, 1 Jul 2019 20:43:20 -0700
Committer:  Thomas Gleixner <tglx@linutronix.de>
CommitDate: Tue, 2 Jul 2019 08:45:20 +0200

x86/entry/64: Don't compile ignore_sysret if 32-bit emulation is enabled

It's only used if !CONFIG_IA32_EMULATION, so disable it in normal
configs.  This will save a few bytes of text and reduce confusion.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc:  "BaeChang Seok" <chang.seok.bae@intel.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: "Bae, Chang Seok" <chang.seok.bae@intel.com>
Link: https://lkml.kernel.org/r/0f7dafa72fe7194689de5ee8cfe5d83509fabcf5.1562035429.git.luto@kernel.org

---
 arch/x86/entry/entry_64.S | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 7f9f5119d6b1..54b1b0468b2b 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1743,11 +1743,17 @@ nmi_restore:
 	iretq
 END(nmi)
 
+#ifndef CONFIG_IA32_EMULATION
+/*
+ * This handles SYSCALL from 32-bit code.  There is no way to program
+ * MSRs to fully disable 32-bit SYSCALL.
+ */
 ENTRY(ignore_sysret)
 	UNWIND_HINT_EMPTY
 	mov	$-ENOSYS, %eax
 	sysret
 END(ignore_sysret)
+#endif
 
 ENTRY(rewind_stack_do_exit)
 	UNWIND_HINT_FUNC

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [tip:x86/cpu] x86/entry/64: Fix and clean up paranoid_exit
  2019-07-02  3:43 ` [PATCH 3/3] x86/entry/64: Fix and clean up paranoid_exit Andy Lutomirski
@ 2019-07-02  6:50   ` tip-bot for Andy Lutomirski
  0 siblings, 0 replies; 8+ messages in thread
From: tip-bot for Andy Lutomirski @ 2019-07-02  6:50 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: mingo, vegard.nossum, bp, ravi.v.shankar, chang.seok.bae, ak,
	hpa, luto, tglx, peterz, linux-kernel

Commit-ID:  539bca535decb11a0861b6205c6684b8e908589b
Gitweb:     https://git.kernel.org/tip/539bca535decb11a0861b6205c6684b8e908589b
Author:     Andy Lutomirski <luto@kernel.org>
AuthorDate: Mon, 1 Jul 2019 20:43:21 -0700
Committer:  Thomas Gleixner <tglx@linutronix.de>
CommitDate: Tue, 2 Jul 2019 08:45:20 +0200

x86/entry/64: Fix and clean up paranoid_exit

paranoid_exit needs to restore CR3 before GSBASE.  Doing it in the opposite
order crashes if the exception came from a context with user GSBASE and
user CR3 -- RESTORE_CR3 cannot resture user CR3 if run with user GSBASE.
This results in infinitely recursing exceptions if user code does SYSENTER
with TF set if both FSGSBASE and PTI are enabled.

The old code worked if user code just set TF without SYSENTER because #DB
from user mode is special cased in idtentry and paranoid_exit doesn't run.

Fix it by cleaning up the spaghetti code.  All that paranoid_exit needs to
do is to disable IRQs, handle IRQ tracing, then restore CR3, and restore
GSBASE.  Simply do those actions in that order.

Fixes: 708078f65721 ("x86/entry/64: Handle FSGSBASE enabled paranoid entry/exit")
Reported-by: Vegard Nossum <vegard.nossum@oracle.com>
Signed-off-by: Chang S. Bae <chang.seok.bae@intel.com>
Signed-off-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: "H . Peter Anvin" <hpa@zytor.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ravi Shankar <ravi.v.shankar@intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Link: https://lkml.kernel.org/r/59725ceb08977359489fbed979716949ad45f616.1562035429.git.luto@kernel.org

---
 arch/x86/entry/entry_64.S | 33 +++++++++++++++++----------------
 1 file changed, 17 insertions(+), 16 deletions(-)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 54b1b0468b2b..670306f588bf 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1256,31 +1256,32 @@ END(paranoid_entry)
 ENTRY(paranoid_exit)
 	UNWIND_HINT_REGS
 	DISABLE_INTERRUPTS(CLBR_ANY)
-	TRACE_IRQS_OFF_DEBUG
 
-	/* Handle GS depending on FSGSBASE availability */
-	ALTERNATIVE "jmp .Lparanoid_exit_checkgs", "nop",X86_FEATURE_FSGSBASE
+	/*
+	 * The order of operations is important.  IRQ tracing requires
+	 * kernel GSBASE and CR3.  RESTORE_CR3 requires kernel GS base.
+	 *
+	 * NB to anyone to tries to optimize this code: this code does
+	 * not execute at all for exceptions coming from user mode.  Those
+	 * exceptions go through error_exit instead.
+	 */
+	TRACE_IRQS_IRETQ_DEBUG
+	RESTORE_CR3	scratch_reg=%rax save_reg=%r14
+
+	/* Handle the three GSBASE cases. */
+	ALTERNATIVE "jmp .Lparanoid_exit_checkgs", "", X86_FEATURE_FSGSBASE
 
 	/* With FSGSBASE enabled, unconditionally restore GSBASE */
 	wrgsbase	%rbx
-	jmp	.Lparanoid_exit_no_swapgs;
+	jmp	restore_regs_and_return_to_kernel
 
 .Lparanoid_exit_checkgs:
 	/* On non-FSGSBASE systems, conditionally do SWAPGS */
 	testl	%ebx, %ebx
-	jnz	.Lparanoid_exit_no_swapgs
-	TRACE_IRQS_IRETQ
-	/* Always restore stashed CR3 value (see paranoid_entry) */
-	RESTORE_CR3	scratch_reg=%rbx save_reg=%r14
-	SWAPGS_UNSAFE_STACK
-	jmp	.Lparanoid_exit_restore
-
-.Lparanoid_exit_no_swapgs:
-	TRACE_IRQS_IRETQ_DEBUG
-	/* Always restore stashed CR3 value (see paranoid_entry) */
-	RESTORE_CR3	scratch_reg=%rbx save_reg=%r14
+	jnz	restore_regs_and_return_to_kernel
 
-.Lparanoid_exit_restore:
+	/* We are returning to a context with user GSBASE. */
+	SWAPGS_UNSAFE_STACK
 	jmp	restore_regs_and_return_to_kernel
 END(paranoid_exit)
 

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2019-07-02  6:50 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-07-02  3:43 [PATCH 0/3] FSGSBASE fix, test, and a semi-related cleanup Andy Lutomirski
2019-07-02  3:43 ` [PATCH 1/3] selftests/x86: Test SYSCALL and SYSENTER manually with TF set Andy Lutomirski
2019-07-02  6:49   ` [tip:x86/cpu] " tip-bot for Andy Lutomirski
2019-07-02  3:43 ` [PATCH 2/3] x86/entry/64: Don't compile ignore_sysret if 32-bit emulation is enabled Andy Lutomirski
2019-07-02  6:49   ` [tip:x86/cpu] " tip-bot for Andy Lutomirski
2019-07-02  3:43 ` [PATCH 3/3] x86/entry/64: Fix and clean up paranoid_exit Andy Lutomirski
2019-07-02  6:50   ` [tip:x86/cpu] " tip-bot for Andy Lutomirski
2019-07-02  3:57 ` [PATCH 0/3] FSGSBASE fix, test, and a semi-related cleanup Andy Lutomirski

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.