LKML Archive on lore.kernel.org
 help / color / Atom feed
From: Andy Lutomirski <luto@kernel.org>
To: Ingo Molnar <mingo@kernel.org>,
	x86@kernel.org, Shuah Khan <shuah.kh@samsung.com>
Cc: Borislav Petkov <bp@alien8.de>,
	Denys Vlasenko <vda.linux@googlemail.com>,
	linux-kernel@vger.kernel.org, Andy Lutomirski <luto@kernel.org>,
	Oleg Nesterov <oleg@redhat.com>
Subject: [PATCH] x86, selftests: Add single_step_syscall test
Date: Wed, 15 Apr 2015 16:10:07 -0700
Message-ID: <20e68021155f6ab5c60590dcad81d37c68ea2c4f.1429139075.git.luto@kernel.org> (raw)

This is a very simple test that makes system calls with TF set.
This test currently fails when running the 32-bit build on a 64-bit
kernel on an Intel CPU.  This bug is fixed by a patch in -mm called
"ptrace-x86-fix-the-tif_forced_tf-logic-in-handle_signal.patch":

    From: Oleg Nesterov <oleg@redhat.com>
    Subject: ptrace/x86: fix the TIF_FORCED_TF logic in handle_signal()

Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Andy Lutomirski <luto@kernel.org>
---

This is intended for -tip.  Not sure which subtree.

 tools/testing/selftests/x86/Makefile              |   2 +-
 tools/testing/selftests/x86/run_x86_tests.sh      |   2 +
 tools/testing/selftests/x86/single_step_syscall.c | 181 ++++++++++++++++++++++
 3 files changed, 184 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/x86/single_step_syscall.c

diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index f0a7918178dd..ddf63569df5a 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -1,6 +1,6 @@
 .PHONY: all all_32 all_64 check_build32 clean run_tests
 
-TARGETS_C_BOTHBITS := sigreturn
+TARGETS_C_BOTHBITS := sigreturn single_step_syscall
 
 BINARIES_32 := $(TARGETS_C_BOTHBITS:%=%_32)
 BINARIES_64 := $(TARGETS_C_BOTHBITS:%=%_64)
diff --git a/tools/testing/selftests/x86/run_x86_tests.sh b/tools/testing/selftests/x86/run_x86_tests.sh
index 3d3ec65f3e7c..3fc19b376812 100644
--- a/tools/testing/selftests/x86/run_x86_tests.sh
+++ b/tools/testing/selftests/x86/run_x86_tests.sh
@@ -3,9 +3,11 @@
 # This is deliberately minimal.  IMO kselftests should provide a standard
 # script here.
 ./sigreturn_32 || exit 1
+./single_step_syscall_32 || exit 1
 
 if [[ "$uname -p" -eq "x86_64" ]]; then
     ./sigreturn_64 || exit 1
+    ./single_step_syscall_64 || exit 1
 fi
 
 exit 0
diff --git a/tools/testing/selftests/x86/single_step_syscall.c b/tools/testing/selftests/x86/single_step_syscall.c
new file mode 100644
index 000000000000..50c26358e8b7
--- /dev/null
+++ b/tools/testing/selftests/x86/single_step_syscall.c
@@ -0,0 +1,181 @@
+/*
+ * single_step_syscall.c - single-steps various x86 syscalls
+ * Copyright (c) 2014-2015 Andrew Lutomirski
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * This is a very simple series of tests that makes system calls with
+ * the TF flag set.  This exercises some nasty kernel code in the
+ * SYSENTER case: SYSENTER does not clear TF, so SYSENTER with TF set
+ * immediately issues #DB from CPL 0.  This requires special handling in
+ * the kernel.
+ */
+
+#define _GNU_SOURCE
+
+#include <sys/time.h>
+#include <time.h>
+#include <stdlib.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <sys/mman.h>
+#include <sys/signal.h>
+#include <sys/ucontext.h>
+#include <asm/ldt.h>
+#include <err.h>
+#include <setjmp.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <sys/ptrace.h>
+#include <sys/user.h>
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+		       int flags)
+{
+	struct sigaction sa;
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_sigaction = handler;
+	sa.sa_flags = SA_SIGINFO | flags;
+	sigemptyset(&sa.sa_mask);
+	if (sigaction(sig, &sa, 0))
+		err(1, "sigaction");
+}
+
+static volatile sig_atomic_t sig_traps;
+
+#ifdef __x86_64__
+# define REG_IP REG_RIP
+# define WIDTH "q"
+#else
+# define REG_IP REG_EIP
+# define WIDTH "l"
+#endif
+
+static unsigned long get_eflags(void)
+{
+	unsigned long eflags;
+	asm volatile ("pushf" WIDTH "\n\tpop" WIDTH " %0" : "=rm" (eflags));
+	return eflags;
+}
+
+static void set_eflags(unsigned long eflags)
+{
+	asm volatile ("push" WIDTH " %0\n\tpopf" WIDTH
+		      : : "rm" (eflags) : "flags");
+}
+
+#define X86_EFLAGS_TF (1UL << 8)
+
+static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
+{
+	ucontext_t *ctx = (ucontext_t*)ctx_void;
+
+	if (get_eflags() & X86_EFLAGS_TF) {
+		set_eflags(get_eflags() & ~X86_EFLAGS_TF);
+		printf("[WARN]\tSIGTRAP handler had TF set\n");
+		_exit(1);
+	}
+
+	sig_traps++;
+
+	if (sig_traps == 10000 || sig_traps == 10001) {
+		printf("[WARN]\tHit %d SIGTRAPs with si_addr 0x%lx, ip 0x%lx\n",
+		       (int)sig_traps,
+		       (unsigned long)info->si_addr,
+		       (unsigned long)ctx->uc_mcontext.gregs[REG_IP]);
+	}
+}
+
+static void check_result(void)
+{
+	unsigned long new_eflags = get_eflags();
+	set_eflags(new_eflags & ~X86_EFLAGS_TF);
+
+	if (!sig_traps) {
+		printf("[FAIL]\tNo SIGTRAP\n");
+		exit(1);
+	}
+
+	if (!(new_eflags & X86_EFLAGS_TF)) {
+		printf("[FAIL]\tTF was cleared\n");
+		exit(1);
+	}
+
+	printf("[OK]\tSurvived with TF set and %d traps\n", (int)sig_traps);
+	sig_traps = 0;
+}
+
+int main()
+{
+	int tmp;
+
+	sethandler(SIGTRAP, sigtrap, 0);
+
+	printf("[RUN]\tSet TF and check nop\n");
+	set_eflags(get_eflags() | X86_EFLAGS_TF);
+	asm volatile ("nop");
+	check_result();
+
+#ifdef __x86_64__
+	printf("[RUN]\tSet TF and check syscall-less opportunistic sysret\n");
+	set_eflags(get_eflags() | X86_EFLAGS_TF);
+	extern unsigned char post_nop[];
+	asm volatile ("pushf" WIDTH "\n\t"
+		      "pop" WIDTH " %%r11\n\t"
+		      "nop\n\t"
+		      "post_nop:"
+		      : : "c" (post_nop) : "r11");
+	check_result();
+#endif
+
+	printf("[RUN]\tSet TF and check int80\n");
+	set_eflags(get_eflags() | X86_EFLAGS_TF);
+	asm volatile ("int $0x80" : "=a" (tmp) : "a" (SYS_getpid));
+	check_result();
+
+	/*
+	 * This test is particularly interesting if fast syscalls use
+	 * SYSENTER: it triggers a nasty design flaw in SYSENTER.
+	 * Specifically, SYSENTER does not clear TF, so either SYSENTER
+	 * or the next instruction traps at CPL0.  (Of course, Intel
+	 * mostly forgot to document exactly what happens here.)  So we
+	 * get a CPL0 fault with usergs (on 64-bit kernels) and possibly
+	 * no stack.  The only sane way the kernel can possibly handle
+	 * it is to clear TF on return from the #DB handler, but this
+	 * happens way too early to set TF in the saved pt_regs, so the
+	 * kernel has to do something clever to avoid losing track of
+	 * the TF bit.
+	 *
+	 * Needless to say, we've had bugs in this area.
+	 */
+	syscall(SYS_getpid);  /* Force symbol binding without TF set. */
+	printf("[RUN]\tSet TF and check a fast syscall\n");
+	set_eflags(get_eflags() | X86_EFLAGS_TF);
+	syscall(SYS_getpid);
+	check_result();
+
+	/* Now make sure that another fast syscall doesn't set TF again. */
+	printf("[RUN]\tFast syscall with TF cleared\n");
+	fflush(stdout);  /* Force a syscall */
+	if (get_eflags() & X86_EFLAGS_TF) {
+		printf("[FAIL]\tTF is now set\n");
+		exit(1);
+	}
+	if (sig_traps) {
+		printf("[FAIL]\tGot SIGTRAP\n");
+		exit(1);
+	}
+	printf("[OK]\tNothing unexpected happened\n");
+
+	return 0;
+}
-- 
2.3.0


             reply index

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-04-15 23:10 Andy Lutomirski [this message]
2015-04-16 16:16 ` [tip:x86/urgent] " tip-bot for Andy Lutomirski

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20e68021155f6ab5c60590dcad81d37c68ea2c4f.1429139075.git.luto@kernel.org \
    --to=luto@kernel.org \
    --cc=bp@alien8.de \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=oleg@redhat.com \
    --cc=shuah.kh@samsung.com \
    --cc=vda.linux@googlemail.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

LKML Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/lkml/0 lkml/git/0.git
	git clone --mirror https://lore.kernel.org/lkml/1 lkml/git/1.git
	git clone --mirror https://lore.kernel.org/lkml/2 lkml/git/2.git
	git clone --mirror https://lore.kernel.org/lkml/3 lkml/git/3.git
	git clone --mirror https://lore.kernel.org/lkml/4 lkml/git/4.git
	git clone --mirror https://lore.kernel.org/lkml/5 lkml/git/5.git
	git clone --mirror https://lore.kernel.org/lkml/6 lkml/git/6.git
	git clone --mirror https://lore.kernel.org/lkml/7 lkml/git/7.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 lkml lkml/ https://lore.kernel.org/lkml \
		linux-kernel@vger.kernel.org
	public-inbox-index lkml

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-kernel


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git