All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC/PATCH v2 v2 0/6] x86_32: Migrate to new entry/exit paths
@ 2015-07-10  2:17 Andy Lutomirski
  2015-07-10  2:17 ` [RFC/PATCH v2 v2 1/6] x86/selftests, x86/vm86: Improve entry_from_vm86 selftest Andy Lutomirski
                   ` (6 more replies)
  0 siblings, 7 replies; 10+ messages in thread
From: Andy Lutomirski @ 2015-07-10  2:17 UTC (permalink / raw)
  To: x86, linux-kernel
  Cc: Frédéric Weisbecker, Rik van Riel, Oleg Nesterov,
	Denys Vlasenko, Borislav Petkov, Kees Cook, Brian Gerst,
	Linus Torvalds, Andy Lutomirski

This is a review version of the 32-bit asm-to-C migration.  I think
it works, but it's not yet well enough tested.  I'm a lot more
familiar with the 64-bit asm than the 32-bit asm.

The vm86 stuff especially needs much more careful testing.  Brian,
since you're playing with vm86 now, can you take a look?

Changes from v1:
 - Fix some nasty vm86 issues.  v1 was a regression.  v2 is an improvement
   over the status quo AFAICT.
 - Add patch 1, which is probably worthwhile on its own.
 - Get rid of the temporary ud2 hack.

Andy Lutomirski (6):
  x86/selftests, x86/vm86: Improve entry_from_vm86 selftest
  x86/entry/32: Remove 32-bit syscall audit optimizations
  x86/entry/32: Fix an incorrect comment for work_notifysig_v86
  x86/entry/32: Remove unnecessary asm check for returns to kernel mode
  x86/entry/32: Migrate to C exit path and rework vm86 exit hack
  x86/entry: Remove do_notify_resume, syscall_trace_leave, and their TIF
    masks

 arch/x86/entry/common.c                       | 107 ++++++++++-----------
 arch/x86/entry/entry_32.S                     | 130 +++----------------------
 arch/x86/include/asm/ptrace.h                 |   1 -
 arch/x86/include/asm/signal.h                 |   1 -
 arch/x86/include/asm/thread_info.h            |  18 +---
 arch/x86/kernel/vm86_32.c                     |   6 +-
 tools/testing/selftests/x86/entry_from_vm86.c | 132 ++++++++++++++++++++++++--
 7 files changed, 191 insertions(+), 204 deletions(-)

-- 
2.4.3


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [RFC/PATCH v2 v2 1/6] x86/selftests, x86/vm86: Improve entry_from_vm86 selftest
  2015-07-10  2:17 [RFC/PATCH v2 v2 0/6] x86_32: Migrate to new entry/exit paths Andy Lutomirski
@ 2015-07-10  2:17 ` Andy Lutomirski
  2015-07-13 19:33   ` Andy Lutomirski
  2015-07-21  9:44   ` [tip:x86/asm] " tip-bot for Andy Lutomirski
  2015-07-10  2:17 ` [RFC/PATCH v2 v2 2/6] x86/entry/32: Remove 32-bit syscall audit optimizations Andy Lutomirski
                   ` (5 subsequent siblings)
  6 siblings, 2 replies; 10+ messages in thread
From: Andy Lutomirski @ 2015-07-10  2:17 UTC (permalink / raw)
  To: x86, linux-kernel
  Cc: Frédéric Weisbecker, Rik van Riel, Oleg Nesterov,
	Denys Vlasenko, Borislav Petkov, Kees Cook, Brian Gerst,
	Linus Torvalds, Andy Lutomirski

The entry_from_vm86 selftest was very weak.  Improve it: test more
types of kernel entries from vm86 mode and test them more carefully.

While we're at it, try to improve behavior on non-SEP CPUs.  The
old code was buggy because I misunderstood the intended semantics
of #UD in vm86, so I didn't handle a possible signal.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
---
 tools/testing/selftests/x86/entry_from_vm86.c | 132 ++++++++++++++++++++++++--
 1 file changed, 124 insertions(+), 8 deletions(-)

diff --git a/tools/testing/selftests/x86/entry_from_vm86.c b/tools/testing/selftests/x86/entry_from_vm86.c
index 5c38a187677b..f004b2a09916 100644
--- a/tools/testing/selftests/x86/entry_from_vm86.c
+++ b/tools/testing/selftests/x86/entry_from_vm86.c
@@ -28,6 +28,55 @@
 static unsigned long load_addr = 0x10000;
 static int nerrs = 0;
 
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+		       int flags)
+{
+	struct sigaction sa;
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_sigaction = handler;
+	sa.sa_flags = SA_SIGINFO | flags;
+	sigemptyset(&sa.sa_mask);
+	if (sigaction(sig, &sa, 0))
+		err(1, "sigaction");
+}
+
+static void clearhandler(int sig)
+{
+	struct sigaction sa;
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_handler = SIG_DFL;
+	sigemptyset(&sa.sa_mask);
+	if (sigaction(sig, &sa, 0))
+		err(1, "sigaction");
+}
+
+static sig_atomic_t got_signal;
+
+static void sighandler(int sig, siginfo_t *info, void *ctx_void)
+{
+	ucontext_t *ctx = (ucontext_t*)ctx_void;
+
+	if (ctx->uc_mcontext.gregs[REG_EFL] & X86_EFLAGS_VM ||
+	    (ctx->uc_mcontext.gregs[REG_CS] & 3) != 3) {
+		printf("[FAIL]\tSignal frame should not reflect vm86 mode\n");
+		nerrs++;
+	}
+
+	const char *signame;
+	if (sig == SIGSEGV)
+		signame = "SIGSEGV";
+	else if (sig == SIGILL)
+		signame = "SIGILL";
+	else
+		signame = "unexpected signal";
+
+	printf("[INFO]\t%s: FLAGS = 0x%lx, CS = 0x%hx\n", signame,
+	       (unsigned long)ctx->uc_mcontext.gregs[REG_EFL],
+	       (unsigned short)ctx->uc_mcontext.gregs[REG_CS]);
+
+	got_signal = 1;
+}
+
 asm (
 	".pushsection .rodata\n\t"
 	".type vmcode_bound, @object\n\t"
@@ -38,6 +87,14 @@ asm (
 	"int3\n\t"
 	"vmcode_sysenter:\n\t"
 	"sysenter\n\t"
+	"vmcode_syscall:\n\t"
+	"syscall\n\t"
+	"vmcode_sti:\n\t"
+	"sti\n\t"
+	"vmcode_int3:\n\t"
+	"int3\n\t"
+	"vmcode_int80:\n\t"
+	"int $0x80\n\t"
 	".size vmcode, . - vmcode\n\t"
 	"end_vmcode:\n\t"
 	".code32\n\t"
@@ -45,9 +102,11 @@ asm (
 	);
 
 extern unsigned char vmcode[], end_vmcode[];
-extern unsigned char vmcode_bound[], vmcode_sysenter[];
+extern unsigned char vmcode_bound[], vmcode_sysenter[], vmcode_syscall[],
+	vmcode_sti[], vmcode_int3[], vmcode_int80[];
 
 static void do_test(struct vm86plus_struct *v86, unsigned long eip,
+		    unsigned int rettype, unsigned int retarg,
 		    const char *text)
 {
 	long ret;
@@ -73,13 +132,28 @@ static void do_test(struct vm86plus_struct *v86, unsigned long eip,
 		else
 			sprintf(trapname, "%d", trapno);
 
-		printf("[OK]\tExited vm86 mode due to #%s\n", trapname);
+		printf("[INFO]\tExited vm86 mode due to #%s\n", trapname);
 	} else if (VM86_TYPE(ret) == VM86_UNKNOWN) {
-		printf("[OK]\tExited vm86 mode due to unhandled GP fault\n");
+		printf("[INFO]\tExited vm86 mode due to unhandled GP fault\n");
+	} else if (VM86_TYPE(ret) == VM86_TRAP) {
+		printf("[INFO]\tExited vm86 mode due to a trap (arg=%ld)\n",
+		       VM86_ARG(ret));
+	} else if (VM86_TYPE(ret) == VM86_SIGNAL) {
+		printf("[INFO]\tExited vm86 mode due to a signal\n");
+	} else if (VM86_TYPE(ret) == VM86_STI) {
+		printf("[INFO]\tExited vm86 mode due to STI\n");
 	} else {
-		printf("[OK]\tExited vm86 mode due to type %ld, arg %ld\n",
+		printf("[INFO]\tExited vm86 mode due to type %ld, arg %ld\n",
 		       VM86_TYPE(ret), VM86_ARG(ret));
 	}
+
+	if (rettype == -1 ||
+	    (VM86_TYPE(ret) == rettype && VM86_ARG(ret) == retarg)) {
+		printf("[OK]\tReturned correctly\n");
+	} else {
+		printf("[FAIL]\tIncorrect return reason\n");
+		nerrs++;
+	}
 }
 
 int main(void)
@@ -105,10 +179,52 @@ int main(void)
 	assert((v86.regs.cs & 3) == 0);	/* Looks like RPL = 0 */
 
 	/* #BR -- should deliver SIG??? */
-	do_test(&v86, vmcode_bound - vmcode, "#BR");
-
-	/* SYSENTER -- should cause #GP or #UD depending on CPU */
-	do_test(&v86, vmcode_sysenter - vmcode, "SYSENTER");
+	do_test(&v86, vmcode_bound - vmcode, VM86_INTx, 5, "#BR");
+
+	/*
+	 * SYSENTER -- should cause #GP or #UD depending on CPU.
+	 * Expected return type -1 means that we shouldn't validate
+	 * the vm86 return value.  This will avoid problems on non-SEP
+	 * CPUs.
+	 */
+	sethandler(SIGILL, sighandler, 0);
+	do_test(&v86, vmcode_sysenter - vmcode, -1, 0, "SYSENTER");
+	clearhandler(SIGILL);
+
+	/*
+	 * SYSCALL would be a disaster in VM86 mode.  Fortunately,
+	 * there is no kernel that both enables SYSCALL and sets
+	 * EFER.SCE, so it's #UD on all systems.  But vm86 is
+	 * buggy (or has a "feature"), so the SIGILL will actually
+	 * be delivered.
+	 */
+	sethandler(SIGILL, sighandler, 0);
+	do_test(&v86, vmcode_syscall - vmcode, VM86_SIGNAL, 0, "SYSCALL");
+	clearhandler(SIGILL);
+
+	/* STI with VIP set */
+	v86.regs.eflags |= X86_EFLAGS_VIP;
+	v86.regs.eflags &= ~X86_EFLAGS_IF;
+	do_test(&v86, vmcode_sti - vmcode, VM86_STI, 0, "STI with VIP set");
+
+	/* INT3 -- should cause #BP */
+	do_test(&v86, vmcode_int3 - vmcode, VM86_TRAP, 3, "INT3");
+
+	/* INT80 -- should exit with "INTx 0x80" */
+	v86.regs.eax = (unsigned int)-1;
+	do_test(&v86, vmcode_int80 - vmcode, VM86_INTx, 0x80, "int80");
+
+	/* Execute a null pointer */
+	v86.regs.cs = 0;
+	v86.regs.ss = 0;
+	sethandler(SIGSEGV, sighandler, 0);
+	got_signal = 0;
+	do_test(&v86, 0, VM86_SIGNAL, 0, "Execute null pointer");
+	if (!got_signal) {
+		printf("[FAIL]\tDid not receive SIGSEGV\n");
+		nerrs++;
+	}
+	clearhandler(SIGSEGV);
 
 	return (nerrs == 0 ? 0 : 1);
 }
-- 
2.4.3


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [RFC/PATCH v2 v2 2/6] x86/entry/32: Remove 32-bit syscall audit optimizations
  2015-07-10  2:17 [RFC/PATCH v2 v2 0/6] x86_32: Migrate to new entry/exit paths Andy Lutomirski
  2015-07-10  2:17 ` [RFC/PATCH v2 v2 1/6] x86/selftests, x86/vm86: Improve entry_from_vm86 selftest Andy Lutomirski
@ 2015-07-10  2:17 ` Andy Lutomirski
  2015-07-10  2:17 ` [RFC/PATCH v2 v2 3/6] x86/entry/32: Fix an incorrect comment for work_notifysig_v86 Andy Lutomirski
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 10+ messages in thread
From: Andy Lutomirski @ 2015-07-10  2:17 UTC (permalink / raw)
  To: x86, linux-kernel
  Cc: Frédéric Weisbecker, Rik van Riel, Oleg Nesterov,
	Denys Vlasenko, Borislav Petkov, Kees Cook, Brian Gerst,
	Linus Torvalds, Andy Lutomirski

The asm audit optimizations are ugly and obfuscate the code too
much.  Remove them.

This will regress performance if syscall auditing is enabled on
32-bit kernels and sysenter is in use.  If this becomes a problem,
interested parties are encouraged to implement the equivalent of the
64-bit opportunistic sysret optimization.

Alternatively, a case could be made that, on 32-bit kernels, a less
messy asm audit optimization could be done.  32-bit kernels don't have
the complicated partial register saving tricks that 64-bit kernels
have, so the sysenter post-syscall path could just call the audit
hooks directly.  Any reimplementation of this ought to demonstrate
that it only calls the audit hook once per syscall, though, which does
not currently appear to be true.  Someone would have to make the case
that doing so would be better than implementing opportunistic sysexit,
though.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
---
 arch/x86/entry/entry_32.S | 48 ++---------------------------------------------
 1 file changed, 2 insertions(+), 46 deletions(-)

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 21dc60a60b5f..90f9e7f6c15e 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -45,16 +45,6 @@
 #include <asm/asm.h>
 #include <asm/smap.h>
 
-/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this.  */
-#include <linux/elf-em.h>
-#define AUDIT_ARCH_I386		(EM_386|__AUDIT_ARCH_LE)
-#define __AUDIT_ARCH_LE		0x40000000
-
-#ifndef CONFIG_AUDITSYSCALL
-# define sysenter_audit		syscall_trace_entry
-# define sysexit_audit		syscall_exit_work
-#endif
-
 	.section .entry.text, "ax"
 
 /*
@@ -339,7 +329,7 @@ sysenter_past_esp:
 	GET_THREAD_INFO(%ebp)
 
 	testl	$_TIF_WORK_SYSCALL_ENTRY, TI_flags(%ebp)
-	jnz	sysenter_audit
+	jnz	syscall_trace_entry
 sysenter_do_call:
 	cmpl	$(NR_syscalls), %eax
 	jae	sysenter_badsys
@@ -351,7 +341,7 @@ sysenter_after_call:
 	TRACE_IRQS_OFF
 	movl	TI_flags(%ebp), %ecx
 	testl	$_TIF_ALLWORK_MASK, %ecx
-	jnz	sysexit_audit
+	jnz	syscall_exit_work
 sysenter_exit:
 /* if something modifies registers it must also disable sysexit */
 	movl	PT_EIP(%esp), %edx
@@ -362,40 +352,6 @@ sysenter_exit:
 	PTGS_TO_GS
 	ENABLE_INTERRUPTS_SYSEXIT
 
-#ifdef CONFIG_AUDITSYSCALL
-sysenter_audit:
-	testl	$(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT), TI_flags(%ebp)
-	jnz	syscall_trace_entry
-	/* movl	PT_EAX(%esp), %eax already set, syscall number: 1st arg to audit */
-	movl	PT_EBX(%esp), %edx		/* ebx/a0: 2nd arg to audit */
-	/* movl	PT_ECX(%esp), %ecx already set, a1: 3nd arg to audit */
-	pushl	PT_ESI(%esp)			/* a3: 5th arg */
-	pushl	PT_EDX+4(%esp)			/* a2: 4th arg */
-	call	__audit_syscall_entry
-	popl	%ecx				/* get that remapped edx off the stack */
-	popl	%ecx				/* get that remapped esi off the stack */
-	movl	PT_EAX(%esp), %eax		/* reload syscall number */
-	jmp	sysenter_do_call
-
-sysexit_audit:
-	testl	$(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx
-	jnz	syscall_exit_work
-	TRACE_IRQS_ON
-	ENABLE_INTERRUPTS(CLBR_ANY)
-	movl	%eax, %edx			/* second arg, syscall return value */
-	cmpl	$-MAX_ERRNO, %eax		/* is it an error ? */
-	setbe %al				/* 1 if so, 0 if not */
-	movzbl %al, %eax			/* zero-extend that */
-	call	__audit_syscall_exit
-	DISABLE_INTERRUPTS(CLBR_ANY)
-	TRACE_IRQS_OFF
-	movl	TI_flags(%ebp), %ecx
-	testl	$(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx
-	jnz	syscall_exit_work
-	movl	PT_EAX(%esp), %eax		/* reload syscall return value */
-	jmp	sysenter_exit
-#endif
-
 .pushsection .fixup, "ax"
 2:	movl	$0, PT_FS(%esp)
 	jmp	1b
-- 
2.4.3


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [RFC/PATCH v2 v2 3/6] x86/entry/32: Fix an incorrect comment for work_notifysig_v86
  2015-07-10  2:17 [RFC/PATCH v2 v2 0/6] x86_32: Migrate to new entry/exit paths Andy Lutomirski
  2015-07-10  2:17 ` [RFC/PATCH v2 v2 1/6] x86/selftests, x86/vm86: Improve entry_from_vm86 selftest Andy Lutomirski
  2015-07-10  2:17 ` [RFC/PATCH v2 v2 2/6] x86/entry/32: Remove 32-bit syscall audit optimizations Andy Lutomirski
@ 2015-07-10  2:17 ` Andy Lutomirski
  2015-07-10  2:17 ` [RFC/PATCH v2 v2 4/6] x86/entry/32: Remove unnecessary asm check for returns to kernel mode Andy Lutomirski
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 10+ messages in thread
From: Andy Lutomirski @ 2015-07-10  2:17 UTC (permalink / raw)
  To: x86, linux-kernel
  Cc: Frédéric Weisbecker, Rik van Riel, Oleg Nesterov,
	Denys Vlasenko, Borislav Petkov, Kees Cook, Brian Gerst,
	Linus Torvalds, Andy Lutomirski

This code path is for returns to user mode only.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
---
 arch/x86/entry/entry_32.S | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 90f9e7f6c15e..a36d5df6a749 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -484,8 +484,7 @@ work_notifysig:					# deal with pending signals and
 #ifdef CONFIG_VM86
 	testl	$X86_EFLAGS_VM, PT_EFLAGS(%esp)
 	movl	%esp, %eax
-	jnz	work_notifysig_v86		# returning to kernel-space or
-						# vm86-space
+	jnz	work_notifysig_v86		# special case for v86
 1:
 #else
 	movl	%esp, %eax
-- 
2.4.3


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [RFC/PATCH v2 v2 4/6] x86/entry/32: Remove unnecessary asm check for returns to kernel mode
  2015-07-10  2:17 [RFC/PATCH v2 v2 0/6] x86_32: Migrate to new entry/exit paths Andy Lutomirski
                   ` (2 preceding siblings ...)
  2015-07-10  2:17 ` [RFC/PATCH v2 v2 3/6] x86/entry/32: Fix an incorrect comment for work_notifysig_v86 Andy Lutomirski
@ 2015-07-10  2:17 ` Andy Lutomirski
  2015-07-10  2:17 ` [RFC/PATCH v2 v2 5/6] x86/entry/32: Migrate to C exit path and rework vm86 exit hack Andy Lutomirski
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 10+ messages in thread
From: Andy Lutomirski @ 2015-07-10  2:17 UTC (permalink / raw)
  To: x86, linux-kernel
  Cc: Frédéric Weisbecker, Rik van Riel, Oleg Nesterov,
	Denys Vlasenko, Borislav Petkov, Kees Cook, Brian Gerst,
	Linus Torvalds, Andy Lutomirski, Al Viro

Unless I missed something, 44fbbb3dc687c ("x86: get rid of calling
do_notify_resume() when returning to kernel mode") was unnecessarily
paranoid.  It split the exit path into resume_userspace and
resume_kernel, after which it was no longer possible to get to
work_notifysig when returning to kernel mode.  The check for kernel
mode in work_notifysig is superfluous.  Remove it.

Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andy Lutomirski <luto@kernel.org>
---
 arch/x86/entry/entry_32.S | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index a36d5df6a749..66ff9c4055d7 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -491,10 +491,6 @@ work_notifysig:					# deal with pending signals and
 #endif
 	TRACE_IRQS_ON
 	ENABLE_INTERRUPTS(CLBR_NONE)
-	movb	PT_CS(%esp), %bl
-	andb	$SEGMENT_RPL_MASK, %bl
-	cmpb	$USER_RPL, %bl
-	jb	resume_kernel
 	xorl	%edx, %edx
 	call	do_notify_resume
 	jmp	resume_userspace
-- 
2.4.3


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [RFC/PATCH v2 v2 5/6] x86/entry/32: Migrate to C exit path and rework vm86 exit hack
  2015-07-10  2:17 [RFC/PATCH v2 v2 0/6] x86_32: Migrate to new entry/exit paths Andy Lutomirski
                   ` (3 preceding siblings ...)
  2015-07-10  2:17 ` [RFC/PATCH v2 v2 4/6] x86/entry/32: Remove unnecessary asm check for returns to kernel mode Andy Lutomirski
@ 2015-07-10  2:17 ` Andy Lutomirski
  2015-07-10  2:17 ` [RFC/PATCH v2 v2 6/6] x86/entry: Remove do_notify_resume, syscall_trace_leave, and their TIF masks Andy Lutomirski
  2015-07-10 14:45 ` [RFC/PATCH v2 v2 0/6] x86_32: Migrate to new entry/exit paths Brian Gerst
  6 siblings, 0 replies; 10+ messages in thread
From: Andy Lutomirski @ 2015-07-10  2:17 UTC (permalink / raw)
  To: x86, linux-kernel
  Cc: Frédéric Weisbecker, Rik van Riel, Oleg Nesterov,
	Denys Vlasenko, Borislav Petkov, Kees Cook, Brian Gerst,
	Linus Torvalds, Andy Lutomirski

This removes the hybrid asm-and-C implementation of exit work.

This patch modifies a giant hack.  vm86 used to fiddle with
TIF_NOTIFY_RESUME and fix itself up in the exit asm.  The hack was
messy and completely incorrect: it broke vm86 if the syscall slow
path was being used.

Rework the hack.  We now forcibly exit vm86 mode on return to
userspace if we're delivering a signal (this is needed to deliver
the signal correctly) or if a new TIF_EXIT_VM86 flag is set.  The
TIF_NOTIFY_RESUME hack is changed to use TIF_EXIT_VM86 instead.

This makes prepare_exit_to_usermode a bit slower on CONFIG_VM86=y
kernels.  People shouldn't use such kernels if they care about
sanity, security, or performance.

Brian Gerst is planning to further rework vm86 mode to leave pt_regs
where it belongs.  That will allow us to revert the
pt_regs_to_thread_info slowdown the stack switching parts of this
code; instead we can just exit normally, as vm86 won't have a
special stack layout any more.

Before this change, the entry_from_vm86 test failed under strace.
Now it passes.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
---
 arch/x86/entry/common.c            | 56 ++++++++++++++++++++++++++-
 arch/x86/entry/entry_32.S          | 79 ++++++--------------------------------
 arch/x86/include/asm/thread_info.h |  2 +
 arch/x86/kernel/vm86_32.c          |  6 +--
 4 files changed, 69 insertions(+), 74 deletions(-)

diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index febc53086a69..aeaf7d64be0f 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -240,10 +240,51 @@ void syscall_trace_leave(struct pt_regs *regs)
 
 static struct thread_info *pt_regs_to_thread_info(struct pt_regs *regs)
 {
+#ifdef CONFIG_VM86
+	/*
+	 * In VM86 mode, pt_regs isn't in a well-defined place on the
+	 * stack.  Skip the optimization entirely.
+	 */
+	return current_thread_info();
+#else
 	unsigned long top_of_stack =
 		(unsigned long)(regs + 1) + TOP_OF_KERNEL_STACK_PADDING;
 	return (struct thread_info *)(top_of_stack - THREAD_SIZE);
+#endif
+}
+
+#ifdef CONFIG_VM86
+static void __noreturn exit_vm86_immediately(struct pt_regs *regs)
+{
+	/*
+	 * VM86 sometimes needs to exit back to normal user mode
+	 * (unsurprisingly) and its hack of resetting the stack and
+	 * jumping into the exit asm isn't always usable (also
+	 * unsurprisingly).  Instead, we land in this abomination.
+	 *
+	 * While I can't defend this code as being anything other
+	 * than awful, at least it's more or less self-contained,
+	 * and it's less awful and much less buggy than the even
+	 * worse hack it replaces.  --Andy
+	 */
+	struct pt_regs *regs32;
+
+	clear_tsk_thread_flag(current, TIF_EXIT_VM86);
+	regs32 = save_v86_state((struct kernel_vm86_regs *)regs);
+	local_irq_disable();
+	__asm__ __volatile__(
+		"movl %0,%%esp\n\t"
+		"movl %1,%%ebp\n\t"
+		"jmp resume_userspace"
+		: : "r" (regs32), "r" (current_thread_info()));
+
+	/*
+	 * We don't get here.  Instead we restart
+	 * prepare_exit_to_usermode via resume_userspace.
+	 */
+	unreachable();
 }
+#endif
 
 /* Called with IRQs disabled. */
 __visible void prepare_exit_to_usermode(struct pt_regs *regs)
@@ -264,12 +305,18 @@ __visible void prepare_exit_to_usermode(struct pt_regs *regs)
 			READ_ONCE(pt_regs_to_thread_info(regs)->flags);
 
 		if (!(cached_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME |
-				      _TIF_UPROBE | _TIF_NEED_RESCHED)))
+				      _TIF_UPROBE | _TIF_NEED_RESCHED |
+				      _TIF_EXIT_VM86)))
 			break;
 
 		/* We have work to do. */
 		local_irq_enable();
 
+#ifdef CONFIG_VM86
+		if (cached_flags & _TIF_EXIT_VM86)
+			exit_vm86_immediately(regs);
+#endif
+
 		if (cached_flags & _TIF_NEED_RESCHED)
 			schedule();
 
@@ -277,8 +324,13 @@ __visible void prepare_exit_to_usermode(struct pt_regs *regs)
 			uprobe_notify_resume(regs);
 
 		/* deal with pending signal delivery */
-		if (cached_flags & _TIF_SIGPENDING)
+		if (cached_flags & _TIF_SIGPENDING) {
+#ifdef CONFIG_VM86
+			if (v8086_mode(regs))
+				exit_vm86_immediately(regs);
+#endif
 			do_signal(regs);
+		}
 
 		if (cached_flags & _TIF_NOTIFY_RESUME) {
 			clear_thread_flag(TIF_NOTIFY_RESUME);
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 66ff9c4055d7..b2909bf8cf70 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -256,14 +256,10 @@ ret_from_intr:
 
 ENTRY(resume_userspace)
 	LOCKDEP_SYS_EXIT
-	DISABLE_INTERRUPTS(CLBR_ANY)		# make sure we don't miss an interrupt
-						# setting need_resched or sigpending
-						# between sampling and the iret
+	DISABLE_INTERRUPTS(CLBR_ANY)
 	TRACE_IRQS_OFF
-	movl	TI_flags(%ebp), %ecx
-	andl	$_TIF_WORK_MASK, %ecx		# is there any work to be done on
-						# int/exception return?
-	jne	work_pending
+	movl	%esp, %eax
+	call	prepare_exit_to_usermode
 	jmp	restore_all
 END(ret_from_exception)
 
@@ -341,7 +337,7 @@ sysenter_after_call:
 	TRACE_IRQS_OFF
 	movl	TI_flags(%ebp), %ecx
 	testl	$_TIF_ALLWORK_MASK, %ecx
-	jnz	syscall_exit_work
+	jnz	syscall_exit_work_irqs_off
 sysenter_exit:
 /* if something modifies registers it must also disable sysexit */
 	movl	PT_EIP(%esp), %edx
@@ -377,13 +373,7 @@ syscall_after_call:
 	movl	%eax, PT_EAX(%esp)		# store the return value
 syscall_exit:
 	LOCKDEP_SYS_EXIT
-	DISABLE_INTERRUPTS(CLBR_ANY)		# make sure we don't miss an interrupt
-						# setting need_resched or sigpending
-						# between sampling and the iret
-	TRACE_IRQS_OFF
-	movl	TI_flags(%ebp), %ecx
-	testl	$_TIF_ALLWORK_MASK, %ecx	# current->work
-	jnz	syscall_exit_work
+	jmp	syscall_exit_work
 
 restore_all:
 	TRACE_IRQS_IRET
@@ -460,52 +450,6 @@ ldt_ss:
 #endif
 ENDPROC(entry_INT80_32)
 
-	# perform work that needs to be done immediately before resumption
-	ALIGN
-work_pending:
-	testb	$_TIF_NEED_RESCHED, %cl
-	jz	work_notifysig
-work_resched:
-	call	schedule
-	LOCKDEP_SYS_EXIT
-	DISABLE_INTERRUPTS(CLBR_ANY)		# make sure we don't miss an interrupt
-						# setting need_resched or sigpending
-						# between sampling and the iret
-	TRACE_IRQS_OFF
-	movl	TI_flags(%ebp), %ecx
-	andl	$_TIF_WORK_MASK, %ecx		# is there any work to be done other
-						# than syscall tracing?
-	jz	restore_all
-	testb	$_TIF_NEED_RESCHED, %cl
-	jnz	work_resched
-
-work_notifysig:					# deal with pending signals and
-						# notify-resume requests
-#ifdef CONFIG_VM86
-	testl	$X86_EFLAGS_VM, PT_EFLAGS(%esp)
-	movl	%esp, %eax
-	jnz	work_notifysig_v86		# special case for v86
-1:
-#else
-	movl	%esp, %eax
-#endif
-	TRACE_IRQS_ON
-	ENABLE_INTERRUPTS(CLBR_NONE)
-	xorl	%edx, %edx
-	call	do_notify_resume
-	jmp	resume_userspace
-
-#ifdef CONFIG_VM86
-	ALIGN
-work_notifysig_v86:
-	pushl	%ecx				# save ti_flags for do_notify_resume
-	call	save_v86_state			# %eax contains pt_regs pointer
-	popl	%ecx
-	movl	%eax, %esp
-	jmp	1b
-#endif
-END(work_pending)
-
 	# perform syscall exit tracing
 	ALIGN
 syscall_trace_entry:
@@ -520,15 +464,14 @@ END(syscall_trace_entry)
 
 	# perform syscall exit tracing
 	ALIGN
-syscall_exit_work:
-	testl	$_TIF_WORK_SYSCALL_EXIT, %ecx
-	jz	work_pending
+syscall_exit_work_irqs_off:
 	TRACE_IRQS_ON
-	ENABLE_INTERRUPTS(CLBR_ANY)		# could let syscall_trace_leave() call
-						# schedule() instead
+	ENABLE_INTERRUPTS(CLBR_ANY)
+
+syscall_exit_work:
 	movl	%esp, %eax
-	call	syscall_trace_leave
-	jmp	resume_userspace
+	call	syscall_return_slowpath
+	jmp	restore_all
 END(syscall_exit_work)
 
 syscall_fault:
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 225ee545e1a0..5a60392ce70e 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -95,6 +95,7 @@ struct thread_info {
 #define TIF_SYSCALL_EMU		6	/* syscall emulation active */
 #define TIF_SYSCALL_AUDIT	7	/* syscall auditing active */
 #define TIF_SECCOMP		8	/* secure computing */
+#define TIF_EXIT_VM86		9	/* deferred vm86 exit */
 #define TIF_USER_RETURN_NOTIFY	11	/* notify kernel of userspace return */
 #define TIF_UPROBE		12	/* breakpointed or singlestepping */
 #define TIF_NOTSC		16	/* TSC is not accessible in userland */
@@ -119,6 +120,7 @@ struct thread_info {
 #define _TIF_SYSCALL_EMU	(1 << TIF_SYSCALL_EMU)
 #define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
 #define _TIF_SECCOMP		(1 << TIF_SECCOMP)
+#define _TIF_EXIT_VM86		(1 << TIF_EXIT_VM86)
 #define _TIF_USER_RETURN_NOTIFY	(1 << TIF_USER_RETURN_NOTIFY)
 #define _TIF_UPROBE		(1 << TIF_UPROBE)
 #define _TIF_NOTSC		(1 << TIF_NOTSC)
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index fc9db6ef2a95..46dcef7046b6 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -549,11 +549,9 @@ int handle_vm86_trap(struct kernel_vm86_regs *regs, long error_code, int trapno)
 {
 	if (VMPI.is_vm86pus) {
 		if ((trapno == 3) || (trapno == 1)) {
+			/* Queue up a return to normal userspace. */
 			KVM86->regs32->ax = VM86_TRAP + (trapno << 8);
-			/* setting this flag forces the code in entry_32.S to
-			   the path where we call save_v86_state() and change
-			   the stack pointer to KVM86->regs32 */
-			set_thread_flag(TIF_NOTIFY_RESUME);
+			set_thread_flag(TIF_EXIT_VM86);
 			return 0;
 		}
 		do_int(regs, trapno, (unsigned char __user *) (regs->pt.ss << 4), SP(regs));
-- 
2.4.3


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [RFC/PATCH v2 v2 6/6] x86/entry: Remove do_notify_resume, syscall_trace_leave, and their TIF masks
  2015-07-10  2:17 [RFC/PATCH v2 v2 0/6] x86_32: Migrate to new entry/exit paths Andy Lutomirski
                   ` (4 preceding siblings ...)
  2015-07-10  2:17 ` [RFC/PATCH v2 v2 5/6] x86/entry/32: Migrate to C exit path and rework vm86 exit hack Andy Lutomirski
@ 2015-07-10  2:17 ` Andy Lutomirski
  2015-07-10 14:45 ` [RFC/PATCH v2 v2 0/6] x86_32: Migrate to new entry/exit paths Brian Gerst
  6 siblings, 0 replies; 10+ messages in thread
From: Andy Lutomirski @ 2015-07-10  2:17 UTC (permalink / raw)
  To: x86, linux-kernel
  Cc: Frédéric Weisbecker, Rik van Riel, Oleg Nesterov,
	Denys Vlasenko, Borislav Petkov, Kees Cook, Brian Gerst,
	Linus Torvalds, Andy Lutomirski

They are no longer used.  Good riddance!

Deleting the TIF_ macros is really nice.  It was never clear why
there were so many variants.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
---
 arch/x86/entry/common.c            | 57 --------------------------------------
 arch/x86/include/asm/ptrace.h      |  1 -
 arch/x86/include/asm/signal.h      |  1 -
 arch/x86/include/asm/thread_info.h | 16 -----------
 4 files changed, 75 deletions(-)

diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index aeaf7d64be0f..994ae1ed6e9d 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -207,37 +207,6 @@ long syscall_trace_enter(struct pt_regs *regs)
 		return syscall_trace_enter_phase2(regs, arch, phase1_result);
 }
 
-/* Deprecated. */
-void syscall_trace_leave(struct pt_regs *regs)
-{
-	bool step;
-
-	/*
-	 * We may come here right after calling schedule_user()
-	 * or do_notify_resume(), in which case we can be in RCU
-	 * user mode.
-	 */
-	user_exit();
-
-	audit_syscall_exit(regs);
-
-	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
-		trace_sys_exit(regs, regs->ax);
-
-	/*
-	 * If TIF_SYSCALL_EMU is set, we only get here because of
-	 * TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP).
-	 * We already reported this syscall instruction in
-	 * syscall_trace_enter().
-	 */
-	step = unlikely(test_thread_flag(TIF_SINGLESTEP)) &&
-			!test_thread_flag(TIF_SYSCALL_EMU);
-	if (step || test_thread_flag(TIF_SYSCALL_TRACE))
-		tracehook_report_syscall_exit(regs, step);
-
-	user_enter();
-}
-
 static struct thread_info *pt_regs_to_thread_info(struct pt_regs *regs)
 {
 #ifdef CONFIG_VM86
@@ -398,29 +367,3 @@ __visible void syscall_return_slowpath(struct pt_regs *regs)
 	local_irq_disable();
 	prepare_exit_to_usermode(regs);
 }
-
-/*
- * Deprecated notification of userspace execution resumption
- * - triggered by the TIF_WORK_MASK flags
- */
-__visible void
-do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
-{
-	user_exit();
-
-	if (thread_info_flags & _TIF_UPROBE)
-		uprobe_notify_resume(regs);
-
-	/* deal with pending signal delivery */
-	if (thread_info_flags & _TIF_SIGPENDING)
-		do_signal(regs);
-
-	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
-		clear_thread_flag(TIF_NOTIFY_RESUME);
-		tracehook_notify_resume(regs);
-	}
-	if (thread_info_flags & _TIF_USER_RETURN_NOTIFY)
-		fire_user_return_notifiers();
-
-	user_enter();
-}
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 5fabf1362942..6271281f947d 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -88,7 +88,6 @@ extern long syscall_trace_enter_phase2(struct pt_regs *, u32 arch,
 				       unsigned long phase1_result);
 
 extern long syscall_trace_enter(struct pt_regs *);
-extern void syscall_trace_leave(struct pt_regs *);
 
 static inline unsigned long regs_return_value(struct pt_regs *regs)
 {
diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h
index b42408bcf6b5..c481be78fcf1 100644
--- a/arch/x86/include/asm/signal.h
+++ b/arch/x86/include/asm/signal.h
@@ -31,7 +31,6 @@ typedef sigset_t compat_sigset_t;
 #include <uapi/asm/signal.h>
 #ifndef __ASSEMBLY__
 extern void do_signal(struct pt_regs *regs);
-extern void do_notify_resume(struct pt_regs *, void *, __u32);
 
 #define __ARCH_HAS_SA_RESTORER
 
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 5a60392ce70e..7bd82e365df6 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -142,27 +142,11 @@ struct thread_info {
 	 _TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT |	\
 	 _TIF_NOHZ)
 
-/* work to do in syscall_trace_leave() */
-#define _TIF_WORK_SYSCALL_EXIT	\
-	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SINGLESTEP |	\
-	 _TIF_SYSCALL_TRACEPOINT | _TIF_NOHZ)
-
-/* work to do on interrupt/exception return */
-#define _TIF_WORK_MASK							\
-	(0x0000FFFF &							\
-	 ~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|			\
-	   _TIF_SINGLESTEP|_TIF_SECCOMP|_TIF_SYSCALL_EMU))
-
 /* work to do on any return to user space */
 #define _TIF_ALLWORK_MASK						\
 	((0x0000FFFF & ~_TIF_SECCOMP) | _TIF_SYSCALL_TRACEPOINT |	\
 	_TIF_NOHZ)
 
-/* Only used for 64 bit */
-#define _TIF_DO_NOTIFY_MASK						\
-	(_TIF_SIGPENDING | _TIF_NOTIFY_RESUME |				\
-	 _TIF_USER_RETURN_NOTIFY | _TIF_UPROBE)
-
 /* flags to check in __switch_to() */
 #define _TIF_WORK_CTXSW							\
 	(_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP)
-- 
2.4.3


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [RFC/PATCH v2 v2 0/6] x86_32: Migrate to new entry/exit paths
  2015-07-10  2:17 [RFC/PATCH v2 v2 0/6] x86_32: Migrate to new entry/exit paths Andy Lutomirski
                   ` (5 preceding siblings ...)
  2015-07-10  2:17 ` [RFC/PATCH v2 v2 6/6] x86/entry: Remove do_notify_resume, syscall_trace_leave, and their TIF masks Andy Lutomirski
@ 2015-07-10 14:45 ` Brian Gerst
  6 siblings, 0 replies; 10+ messages in thread
From: Brian Gerst @ 2015-07-10 14:45 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: the arch/x86 maintainers, Linux Kernel Mailing List,
	Frédéric Weisbecker, Rik van Riel, Oleg Nesterov,
	Denys Vlasenko, Borislav Petkov, Kees Cook, Linus Torvalds

On Thu, Jul 9, 2015 at 10:17 PM, Andy Lutomirski <luto@kernel.org> wrote:
> This is a review version of the 32-bit asm-to-C migration.  I think
> it works, but it's not yet well enough tested.  I'm a lot more
> familiar with the 64-bit asm than the 32-bit asm.
>
> The vm86 stuff especially needs much more careful testing.  Brian,
> since you're playing with vm86 now, can you take a look?

The vm86 parts of these patches won't be needed after my patches.  I
should have them ready to post this weekend.  I just need to do some
more testing.

--
Brian Gerst

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC/PATCH v2 v2 1/6] x86/selftests, x86/vm86: Improve entry_from_vm86 selftest
  2015-07-10  2:17 ` [RFC/PATCH v2 v2 1/6] x86/selftests, x86/vm86: Improve entry_from_vm86 selftest Andy Lutomirski
@ 2015-07-13 19:33   ` Andy Lutomirski
  2015-07-21  9:44   ` [tip:x86/asm] " tip-bot for Andy Lutomirski
  1 sibling, 0 replies; 10+ messages in thread
From: Andy Lutomirski @ 2015-07-13 19:33 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: X86 ML, linux-kernel, Frédéric Weisbecker,
	Rik van Riel, Oleg Nesterov, Denys Vlasenko, Borislav Petkov,
	Kees Cook, Brian Gerst, Linus Torvalds

Ingo, would it make sense for you to apply just this patch?  It should
be helpful for testing whatever we end up doing with the vm86 code.

Thanks,
Andy

On Thu, Jul 9, 2015 at 7:17 PM, Andy Lutomirski <luto@kernel.org> wrote:
> The entry_from_vm86 selftest was very weak.  Improve it: test more
> types of kernel entries from vm86 mode and test them more carefully.
>
> While we're at it, try to improve behavior on non-SEP CPUs.  The
> old code was buggy because I misunderstood the intended semantics
> of #UD in vm86, so I didn't handle a possible signal.
>
> Signed-off-by: Andy Lutomirski <luto@kernel.org>
> ---
>  tools/testing/selftests/x86/entry_from_vm86.c | 132 ++++++++++++++++++++++++--
>  1 file changed, 124 insertions(+), 8 deletions(-)
>
> diff --git a/tools/testing/selftests/x86/entry_from_vm86.c b/tools/testing/selftests/x86/entry_from_vm86.c
> index 5c38a187677b..f004b2a09916 100644
> --- a/tools/testing/selftests/x86/entry_from_vm86.c
> +++ b/tools/testing/selftests/x86/entry_from_vm86.c
> @@ -28,6 +28,55 @@
>  static unsigned long load_addr = 0x10000;
>  static int nerrs = 0;
>
> +static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
> +                      int flags)
> +{
> +       struct sigaction sa;
> +       memset(&sa, 0, sizeof(sa));
> +       sa.sa_sigaction = handler;
> +       sa.sa_flags = SA_SIGINFO | flags;
> +       sigemptyset(&sa.sa_mask);
> +       if (sigaction(sig, &sa, 0))
> +               err(1, "sigaction");
> +}
> +
> +static void clearhandler(int sig)
> +{
> +       struct sigaction sa;
> +       memset(&sa, 0, sizeof(sa));
> +       sa.sa_handler = SIG_DFL;
> +       sigemptyset(&sa.sa_mask);
> +       if (sigaction(sig, &sa, 0))
> +               err(1, "sigaction");
> +}
> +
> +static sig_atomic_t got_signal;
> +
> +static void sighandler(int sig, siginfo_t *info, void *ctx_void)
> +{
> +       ucontext_t *ctx = (ucontext_t*)ctx_void;
> +
> +       if (ctx->uc_mcontext.gregs[REG_EFL] & X86_EFLAGS_VM ||
> +           (ctx->uc_mcontext.gregs[REG_CS] & 3) != 3) {
> +               printf("[FAIL]\tSignal frame should not reflect vm86 mode\n");
> +               nerrs++;
> +       }
> +
> +       const char *signame;
> +       if (sig == SIGSEGV)
> +               signame = "SIGSEGV";
> +       else if (sig == SIGILL)
> +               signame = "SIGILL";
> +       else
> +               signame = "unexpected signal";
> +
> +       printf("[INFO]\t%s: FLAGS = 0x%lx, CS = 0x%hx\n", signame,
> +              (unsigned long)ctx->uc_mcontext.gregs[REG_EFL],
> +              (unsigned short)ctx->uc_mcontext.gregs[REG_CS]);
> +
> +       got_signal = 1;
> +}
> +
>  asm (
>         ".pushsection .rodata\n\t"
>         ".type vmcode_bound, @object\n\t"
> @@ -38,6 +87,14 @@ asm (
>         "int3\n\t"
>         "vmcode_sysenter:\n\t"
>         "sysenter\n\t"
> +       "vmcode_syscall:\n\t"
> +       "syscall\n\t"
> +       "vmcode_sti:\n\t"
> +       "sti\n\t"
> +       "vmcode_int3:\n\t"
> +       "int3\n\t"
> +       "vmcode_int80:\n\t"
> +       "int $0x80\n\t"
>         ".size vmcode, . - vmcode\n\t"
>         "end_vmcode:\n\t"
>         ".code32\n\t"
> @@ -45,9 +102,11 @@ asm (
>         );
>
>  extern unsigned char vmcode[], end_vmcode[];
> -extern unsigned char vmcode_bound[], vmcode_sysenter[];
> +extern unsigned char vmcode_bound[], vmcode_sysenter[], vmcode_syscall[],
> +       vmcode_sti[], vmcode_int3[], vmcode_int80[];
>
>  static void do_test(struct vm86plus_struct *v86, unsigned long eip,
> +                   unsigned int rettype, unsigned int retarg,
>                     const char *text)
>  {
>         long ret;
> @@ -73,13 +132,28 @@ static void do_test(struct vm86plus_struct *v86, unsigned long eip,
>                 else
>                         sprintf(trapname, "%d", trapno);
>
> -               printf("[OK]\tExited vm86 mode due to #%s\n", trapname);
> +               printf("[INFO]\tExited vm86 mode due to #%s\n", trapname);
>         } else if (VM86_TYPE(ret) == VM86_UNKNOWN) {
> -               printf("[OK]\tExited vm86 mode due to unhandled GP fault\n");
> +               printf("[INFO]\tExited vm86 mode due to unhandled GP fault\n");
> +       } else if (VM86_TYPE(ret) == VM86_TRAP) {
> +               printf("[INFO]\tExited vm86 mode due to a trap (arg=%ld)\n",
> +                      VM86_ARG(ret));
> +       } else if (VM86_TYPE(ret) == VM86_SIGNAL) {
> +               printf("[INFO]\tExited vm86 mode due to a signal\n");
> +       } else if (VM86_TYPE(ret) == VM86_STI) {
> +               printf("[INFO]\tExited vm86 mode due to STI\n");
>         } else {
> -               printf("[OK]\tExited vm86 mode due to type %ld, arg %ld\n",
> +               printf("[INFO]\tExited vm86 mode due to type %ld, arg %ld\n",
>                        VM86_TYPE(ret), VM86_ARG(ret));
>         }
> +
> +       if (rettype == -1 ||
> +           (VM86_TYPE(ret) == rettype && VM86_ARG(ret) == retarg)) {
> +               printf("[OK]\tReturned correctly\n");
> +       } else {
> +               printf("[FAIL]\tIncorrect return reason\n");
> +               nerrs++;
> +       }
>  }
>
>  int main(void)
> @@ -105,10 +179,52 @@ int main(void)
>         assert((v86.regs.cs & 3) == 0); /* Looks like RPL = 0 */
>
>         /* #BR -- should deliver SIG??? */
> -       do_test(&v86, vmcode_bound - vmcode, "#BR");
> -
> -       /* SYSENTER -- should cause #GP or #UD depending on CPU */
> -       do_test(&v86, vmcode_sysenter - vmcode, "SYSENTER");
> +       do_test(&v86, vmcode_bound - vmcode, VM86_INTx, 5, "#BR");
> +
> +       /*
> +        * SYSENTER -- should cause #GP or #UD depending on CPU.
> +        * Expected return type -1 means that we shouldn't validate
> +        * the vm86 return value.  This will avoid problems on non-SEP
> +        * CPUs.
> +        */
> +       sethandler(SIGILL, sighandler, 0);
> +       do_test(&v86, vmcode_sysenter - vmcode, -1, 0, "SYSENTER");
> +       clearhandler(SIGILL);
> +
> +       /*
> +        * SYSCALL would be a disaster in VM86 mode.  Fortunately,
> +        * there is no kernel that both enables SYSCALL and sets
> +        * EFER.SCE, so it's #UD on all systems.  But vm86 is
> +        * buggy (or has a "feature"), so the SIGILL will actually
> +        * be delivered.
> +        */
> +       sethandler(SIGILL, sighandler, 0);
> +       do_test(&v86, vmcode_syscall - vmcode, VM86_SIGNAL, 0, "SYSCALL");
> +       clearhandler(SIGILL);
> +
> +       /* STI with VIP set */
> +       v86.regs.eflags |= X86_EFLAGS_VIP;
> +       v86.regs.eflags &= ~X86_EFLAGS_IF;
> +       do_test(&v86, vmcode_sti - vmcode, VM86_STI, 0, "STI with VIP set");
> +
> +       /* INT3 -- should cause #BP */
> +       do_test(&v86, vmcode_int3 - vmcode, VM86_TRAP, 3, "INT3");
> +
> +       /* INT80 -- should exit with "INTx 0x80" */
> +       v86.regs.eax = (unsigned int)-1;
> +       do_test(&v86, vmcode_int80 - vmcode, VM86_INTx, 0x80, "int80");
> +
> +       /* Execute a null pointer */
> +       v86.regs.cs = 0;
> +       v86.regs.ss = 0;
> +       sethandler(SIGSEGV, sighandler, 0);
> +       got_signal = 0;
> +       do_test(&v86, 0, VM86_SIGNAL, 0, "Execute null pointer");
> +       if (!got_signal) {
> +               printf("[FAIL]\tDid not receive SIGSEGV\n");
> +               nerrs++;
> +       }
> +       clearhandler(SIGSEGV);
>
>         return (nerrs == 0 ? 0 : 1);
>  }
> --
> 2.4.3
>



-- 
Andy Lutomirski
AMA Capital Management, LLC

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [tip:x86/asm] x86/selftests, x86/vm86: Improve entry_from_vm86 selftest
  2015-07-10  2:17 ` [RFC/PATCH v2 v2 1/6] x86/selftests, x86/vm86: Improve entry_from_vm86 selftest Andy Lutomirski
  2015-07-13 19:33   ` Andy Lutomirski
@ 2015-07-21  9:44   ` tip-bot for Andy Lutomirski
  1 sibling, 0 replies; 10+ messages in thread
From: tip-bot for Andy Lutomirski @ 2015-07-21  9:44 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: riel, luto, oleg, torvalds, brgerst, mingo, bp, fweisbec, hpa,
	tglx, shuahkh, peterz, keescook, dvlasenk, vda.linux, luto,
	linux-kernel

Commit-ID:  f2a50f8b7da45ff2de93a71393e715a2ab9f3b68
Gitweb:     http://git.kernel.org/tip/f2a50f8b7da45ff2de93a71393e715a2ab9f3b68
Author:     Andy Lutomirski <luto@kernel.org>
AuthorDate: Thu, 9 Jul 2015 19:17:29 -0700
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Tue, 21 Jul 2015 10:51:20 +0200

x86/selftests, x86/vm86: Improve entry_from_vm86 selftest

The entry_from_vm86 selftest was very weak.  Improve it: test
more types of kernel entries from vm86 mode and test them more
carefully.

While we're at it, try to improve behavior on non-SEP CPUs.  The
old code was buggy because I misunderstood the intended
semantics of #UD in vm86, so I didn't handle a possible signal.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Denys Vlasenko <vda.linux@googlemail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Shuah Khan <shuahkh@osg.samsung.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/d8ef1d7368ac70d8342481563ed50f9a7d2eea6f.1436492057.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 tools/testing/selftests/x86/entry_from_vm86.c | 132 ++++++++++++++++++++++++--
 1 file changed, 124 insertions(+), 8 deletions(-)

diff --git a/tools/testing/selftests/x86/entry_from_vm86.c b/tools/testing/selftests/x86/entry_from_vm86.c
index 5c38a18..f004b2a 100644
--- a/tools/testing/selftests/x86/entry_from_vm86.c
+++ b/tools/testing/selftests/x86/entry_from_vm86.c
@@ -28,6 +28,55 @@
 static unsigned long load_addr = 0x10000;
 static int nerrs = 0;
 
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+		       int flags)
+{
+	struct sigaction sa;
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_sigaction = handler;
+	sa.sa_flags = SA_SIGINFO | flags;
+	sigemptyset(&sa.sa_mask);
+	if (sigaction(sig, &sa, 0))
+		err(1, "sigaction");
+}
+
+static void clearhandler(int sig)
+{
+	struct sigaction sa;
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_handler = SIG_DFL;
+	sigemptyset(&sa.sa_mask);
+	if (sigaction(sig, &sa, 0))
+		err(1, "sigaction");
+}
+
+static sig_atomic_t got_signal;
+
+static void sighandler(int sig, siginfo_t *info, void *ctx_void)
+{
+	ucontext_t *ctx = (ucontext_t*)ctx_void;
+
+	if (ctx->uc_mcontext.gregs[REG_EFL] & X86_EFLAGS_VM ||
+	    (ctx->uc_mcontext.gregs[REG_CS] & 3) != 3) {
+		printf("[FAIL]\tSignal frame should not reflect vm86 mode\n");
+		nerrs++;
+	}
+
+	const char *signame;
+	if (sig == SIGSEGV)
+		signame = "SIGSEGV";
+	else if (sig == SIGILL)
+		signame = "SIGILL";
+	else
+		signame = "unexpected signal";
+
+	printf("[INFO]\t%s: FLAGS = 0x%lx, CS = 0x%hx\n", signame,
+	       (unsigned long)ctx->uc_mcontext.gregs[REG_EFL],
+	       (unsigned short)ctx->uc_mcontext.gregs[REG_CS]);
+
+	got_signal = 1;
+}
+
 asm (
 	".pushsection .rodata\n\t"
 	".type vmcode_bound, @object\n\t"
@@ -38,6 +87,14 @@ asm (
 	"int3\n\t"
 	"vmcode_sysenter:\n\t"
 	"sysenter\n\t"
+	"vmcode_syscall:\n\t"
+	"syscall\n\t"
+	"vmcode_sti:\n\t"
+	"sti\n\t"
+	"vmcode_int3:\n\t"
+	"int3\n\t"
+	"vmcode_int80:\n\t"
+	"int $0x80\n\t"
 	".size vmcode, . - vmcode\n\t"
 	"end_vmcode:\n\t"
 	".code32\n\t"
@@ -45,9 +102,11 @@ asm (
 	);
 
 extern unsigned char vmcode[], end_vmcode[];
-extern unsigned char vmcode_bound[], vmcode_sysenter[];
+extern unsigned char vmcode_bound[], vmcode_sysenter[], vmcode_syscall[],
+	vmcode_sti[], vmcode_int3[], vmcode_int80[];
 
 static void do_test(struct vm86plus_struct *v86, unsigned long eip,
+		    unsigned int rettype, unsigned int retarg,
 		    const char *text)
 {
 	long ret;
@@ -73,13 +132,28 @@ static void do_test(struct vm86plus_struct *v86, unsigned long eip,
 		else
 			sprintf(trapname, "%d", trapno);
 
-		printf("[OK]\tExited vm86 mode due to #%s\n", trapname);
+		printf("[INFO]\tExited vm86 mode due to #%s\n", trapname);
 	} else if (VM86_TYPE(ret) == VM86_UNKNOWN) {
-		printf("[OK]\tExited vm86 mode due to unhandled GP fault\n");
+		printf("[INFO]\tExited vm86 mode due to unhandled GP fault\n");
+	} else if (VM86_TYPE(ret) == VM86_TRAP) {
+		printf("[INFO]\tExited vm86 mode due to a trap (arg=%ld)\n",
+		       VM86_ARG(ret));
+	} else if (VM86_TYPE(ret) == VM86_SIGNAL) {
+		printf("[INFO]\tExited vm86 mode due to a signal\n");
+	} else if (VM86_TYPE(ret) == VM86_STI) {
+		printf("[INFO]\tExited vm86 mode due to STI\n");
 	} else {
-		printf("[OK]\tExited vm86 mode due to type %ld, arg %ld\n",
+		printf("[INFO]\tExited vm86 mode due to type %ld, arg %ld\n",
 		       VM86_TYPE(ret), VM86_ARG(ret));
 	}
+
+	if (rettype == -1 ||
+	    (VM86_TYPE(ret) == rettype && VM86_ARG(ret) == retarg)) {
+		printf("[OK]\tReturned correctly\n");
+	} else {
+		printf("[FAIL]\tIncorrect return reason\n");
+		nerrs++;
+	}
 }
 
 int main(void)
@@ -105,10 +179,52 @@ int main(void)
 	assert((v86.regs.cs & 3) == 0);	/* Looks like RPL = 0 */
 
 	/* #BR -- should deliver SIG??? */
-	do_test(&v86, vmcode_bound - vmcode, "#BR");
-
-	/* SYSENTER -- should cause #GP or #UD depending on CPU */
-	do_test(&v86, vmcode_sysenter - vmcode, "SYSENTER");
+	do_test(&v86, vmcode_bound - vmcode, VM86_INTx, 5, "#BR");
+
+	/*
+	 * SYSENTER -- should cause #GP or #UD depending on CPU.
+	 * Expected return type -1 means that we shouldn't validate
+	 * the vm86 return value.  This will avoid problems on non-SEP
+	 * CPUs.
+	 */
+	sethandler(SIGILL, sighandler, 0);
+	do_test(&v86, vmcode_sysenter - vmcode, -1, 0, "SYSENTER");
+	clearhandler(SIGILL);
+
+	/*
+	 * SYSCALL would be a disaster in VM86 mode.  Fortunately,
+	 * there is no kernel that both enables SYSCALL and sets
+	 * EFER.SCE, so it's #UD on all systems.  But vm86 is
+	 * buggy (or has a "feature"), so the SIGILL will actually
+	 * be delivered.
+	 */
+	sethandler(SIGILL, sighandler, 0);
+	do_test(&v86, vmcode_syscall - vmcode, VM86_SIGNAL, 0, "SYSCALL");
+	clearhandler(SIGILL);
+
+	/* STI with VIP set */
+	v86.regs.eflags |= X86_EFLAGS_VIP;
+	v86.regs.eflags &= ~X86_EFLAGS_IF;
+	do_test(&v86, vmcode_sti - vmcode, VM86_STI, 0, "STI with VIP set");
+
+	/* INT3 -- should cause #BP */
+	do_test(&v86, vmcode_int3 - vmcode, VM86_TRAP, 3, "INT3");
+
+	/* INT80 -- should exit with "INTx 0x80" */
+	v86.regs.eax = (unsigned int)-1;
+	do_test(&v86, vmcode_int80 - vmcode, VM86_INTx, 0x80, "int80");
+
+	/* Execute a null pointer */
+	v86.regs.cs = 0;
+	v86.regs.ss = 0;
+	sethandler(SIGSEGV, sighandler, 0);
+	got_signal = 0;
+	do_test(&v86, 0, VM86_SIGNAL, 0, "Execute null pointer");
+	if (!got_signal) {
+		printf("[FAIL]\tDid not receive SIGSEGV\n");
+		nerrs++;
+	}
+	clearhandler(SIGSEGV);
 
 	return (nerrs == 0 ? 0 : 1);
 }

^ permalink raw reply related	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2015-07-21  9:49 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-07-10  2:17 [RFC/PATCH v2 v2 0/6] x86_32: Migrate to new entry/exit paths Andy Lutomirski
2015-07-10  2:17 ` [RFC/PATCH v2 v2 1/6] x86/selftests, x86/vm86: Improve entry_from_vm86 selftest Andy Lutomirski
2015-07-13 19:33   ` Andy Lutomirski
2015-07-21  9:44   ` [tip:x86/asm] " tip-bot for Andy Lutomirski
2015-07-10  2:17 ` [RFC/PATCH v2 v2 2/6] x86/entry/32: Remove 32-bit syscall audit optimizations Andy Lutomirski
2015-07-10  2:17 ` [RFC/PATCH v2 v2 3/6] x86/entry/32: Fix an incorrect comment for work_notifysig_v86 Andy Lutomirski
2015-07-10  2:17 ` [RFC/PATCH v2 v2 4/6] x86/entry/32: Remove unnecessary asm check for returns to kernel mode Andy Lutomirski
2015-07-10  2:17 ` [RFC/PATCH v2 v2 5/6] x86/entry/32: Migrate to C exit path and rework vm86 exit hack Andy Lutomirski
2015-07-10  2:17 ` [RFC/PATCH v2 v2 6/6] x86/entry: Remove do_notify_resume, syscall_trace_leave, and their TIF masks Andy Lutomirski
2015-07-10 14:45 ` [RFC/PATCH v2 v2 0/6] x86_32: Migrate to new entry/exit paths Brian Gerst

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.