linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 1/3] syscalls,x86 Expose arch_prctl on x86-32.
       [not found] <1473886902-17902-1-git-send-email-khuey@kylehuey.com>
@ 2016-09-14 21:01 ` Kyle Huey
  2016-09-14 21:29   ` Dave Hansen
                     ` (4 more replies)
  2016-09-14 21:01 ` [PATCH v2 2/3] x86 Test and expose CPUID faulting capabilities in /proc/cpuinfo Kyle Huey
  2016-09-14 21:01 ` [PATCH v2 3/3] x86,arch_prctl Add ARCH_[GET|SET]_CPUID for controlling the CPUID instruction Kyle Huey
  2 siblings, 5 replies; 28+ messages in thread
From: Kyle Huey @ 2016-09-14 21:01 UTC (permalink / raw)
  To: Robert O'Callahan
  Cc: Thomas Gleixner, Ingo Molnar, H. Peter Anvin,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	Al Viro, Anna Schumaker, David Howells, Dmitry V. Levin,
	Eric B Munson, Andy Lutomirski, Peter Zijlstra,
	Michael S. Tsirkin, Jiri Slaby, Andrey Ryabinin, Paul Gortmaker,
	Borislav Petkov, Dmitry Vyukov, Dave Hansen,
	open list:X86 ARCHITECTURE (32-BIT AND 64-BIT)

Signed-off-by: Kyle Huey <khuey@kylehuey.com>
---
 arch/x86/entry/syscalls/syscall_32.tbl |  1 +
 arch/x86/kernel/process.c              | 80 ++++++++++++++++++++++++++++++++++
 arch/x86/kernel/process_64.c           | 66 ----------------------------
 3 files changed, 81 insertions(+), 66 deletions(-)

diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index f848572..3b6965b 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -386,3 +386,4 @@
 377	i386	copy_file_range		sys_copy_file_range
 378	i386	preadv2			sys_preadv2			compat_sys_preadv2
 379	i386	pwritev2		sys_pwritev2			compat_sys_pwritev2
+380	i386	arch_prctl		sys_arch_prctl
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 62c0b0e..0f857c3 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -20,6 +20,7 @@
 #include <linux/cpuidle.h>
 #include <trace/events/power.h>
 #include <linux/hw_breakpoint.h>
+#include <linux/syscalls.h>
 #include <asm/cpu.h>
 #include <asm/apic.h>
 #include <asm/syscalls.h>
@@ -32,6 +33,7 @@
 #include <asm/tlbflush.h>
 #include <asm/mce.h>
 #include <asm/vm86.h>
+#include <asm/prctl.h>
 
 /*
  * per-CPU TSS segments. Threads are completely 'soft' on Linux,
@@ -567,3 +569,81 @@ unsigned long get_wchan(struct task_struct *p)
 	} while (count++ < 16 && p->state != TASK_RUNNING);
 	return 0;
 }
+
+long do_arch_prctl(struct task_struct *task, int code, unsigned long arg2)
+{
+	int ret = 0;
+	int doit = task == current;
+	int is_32 = IS_ENABLED(CONFIG_IA32_EMULATION) && test_thread_flag(TIF_IA32);
+	int cpu;
+
+	switch (code) {
+#ifdef CONFIG_X86_64
+	case ARCH_SET_GS:
+		if (is_32)
+			return -EINVAL;
+		if (arg2 >= TASK_SIZE_MAX)
+			return -EPERM;
+		cpu = get_cpu();
+		task->thread.gsindex = 0;
+		task->thread.gsbase = arg2;
+		if (doit) {
+			load_gs_index(0);
+			ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, arg2);
+		}
+		put_cpu();
+		break;
+	case ARCH_SET_FS:
+		if (is_32)
+			return -EINVAL;
+		/* Not strictly needed for fs, but do it for symmetry
+		   with gs */
+		if (arg2 >= TASK_SIZE_MAX)
+			return -EPERM;
+		cpu = get_cpu();
+		task->thread.fsindex = 0;
+		task->thread.fsbase = arg2;
+		if (doit) {
+			/* set the selector to 0 to not confuse __switch_to */
+			loadsegment(fs, 0);
+			ret = wrmsrl_safe(MSR_FS_BASE, arg2);
+		}
+		put_cpu();
+		break;
+	case ARCH_GET_FS: {
+		unsigned long base;
+
+		if (is_32)
+			return -EINVAL;
+		if (doit)
+			rdmsrl(MSR_FS_BASE, base);
+		else
+			base = task->thread.fsbase;
+		ret = put_user(base, (unsigned long __user *)arg2);
+		break;
+	}
+	case ARCH_GET_GS: {
+		unsigned long base;
+
+		if (is_32)
+			return -EINVAL;
+		if (doit)
+			rdmsrl(MSR_KERNEL_GS_BASE, base);
+		else
+			base = task->thread.gsbase;
+		ret = put_user(base, (unsigned long __user *)arg2);
+		break;
+	}
+#endif
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+SYSCALL_DEFINE2(arch_prctl, int, code, unsigned long, arg2)
+{
+	return do_arch_prctl(current, code, arg2);
+}
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 63236d8..e8c6302 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -524,72 +524,6 @@ void set_personality_ia32(bool x32)
 }
 EXPORT_SYMBOL_GPL(set_personality_ia32);
 
-long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
-{
-	int ret = 0;
-	int doit = task == current;
-	int cpu;
-
-	switch (code) {
-	case ARCH_SET_GS:
-		if (addr >= TASK_SIZE_MAX)
-			return -EPERM;
-		cpu = get_cpu();
-		task->thread.gsindex = 0;
-		task->thread.gsbase = addr;
-		if (doit) {
-			load_gs_index(0);
-			ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr);
-		}
-		put_cpu();
-		break;
-	case ARCH_SET_FS:
-		/* Not strictly needed for fs, but do it for symmetry
-		   with gs */
-		if (addr >= TASK_SIZE_MAX)
-			return -EPERM;
-		cpu = get_cpu();
-		task->thread.fsindex = 0;
-		task->thread.fsbase = addr;
-		if (doit) {
-			/* set the selector to 0 to not confuse __switch_to */
-			loadsegment(fs, 0);
-			ret = wrmsrl_safe(MSR_FS_BASE, addr);
-		}
-		put_cpu();
-		break;
-	case ARCH_GET_FS: {
-		unsigned long base;
-		if (doit)
-			rdmsrl(MSR_FS_BASE, base);
-		else
-			base = task->thread.fsbase;
-		ret = put_user(base, (unsigned long __user *)addr);
-		break;
-	}
-	case ARCH_GET_GS: {
-		unsigned long base;
-		if (doit)
-			rdmsrl(MSR_KERNEL_GS_BASE, base);
-		else
-			base = task->thread.gsbase;
-		ret = put_user(base, (unsigned long __user *)addr);
-		break;
-	}
-
-	default:
-		ret = -EINVAL;
-		break;
-	}
-
-	return ret;
-}
-
-long sys_arch_prctl(int code, unsigned long addr)
-{
-	return do_arch_prctl(current, code, addr);
-}
-
 unsigned long KSTK_ESP(struct task_struct *task)
 {
 	return task_pt_regs(task)->sp;
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH v2 2/3] x86 Test and expose CPUID faulting capabilities in /proc/cpuinfo
       [not found] <1473886902-17902-1-git-send-email-khuey@kylehuey.com>
  2016-09-14 21:01 ` [PATCH v2 1/3] syscalls,x86 Expose arch_prctl on x86-32 Kyle Huey
@ 2016-09-14 21:01 ` Kyle Huey
  2016-09-14 21:35   ` Dave Hansen
  2016-09-15 10:05   ` [Xen-devel] " David Vrabel
  2016-09-14 21:01 ` [PATCH v2 3/3] x86,arch_prctl Add ARCH_[GET|SET]_CPUID for controlling the CPUID instruction Kyle Huey
  2 siblings, 2 replies; 28+ messages in thread
From: Kyle Huey @ 2016-09-14 21:01 UTC (permalink / raw)
  To: Robert O'Callahan
  Cc: Thomas Gleixner, Ingo Molnar, H. Peter Anvin,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	Boris Ostrovsky, David Vrabel, Juergen Gross, Borislav Petkov,
	Andy Lutomirski, Peter Zijlstra, Dave Hansen, Huang Rui,
	Rafael J. Wysocki, Len Brown, Srinivas Pandruvada,
	Aravind Gopalakrishnan, Alexander Shishkin, Vladimir Zapolskiy,
	Kristen Carlson Accardi,
	open list:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	moderated list:XEN HYPERVISOR INTERFACE

Xen advertises the underlying support for CPUID faulting but not does pass
through writes to the relevant MSR, nor does it virtualize it, so it does
not actually work. For now mask off the relevant bit on MSR_PLATFORM_INFO.

Signed-off-by: Kyle Huey <khuey@kylehuey.com>
---
 arch/x86/include/asm/cpufeatures.h |  1 +
 arch/x86/include/asm/msr-index.h   |  1 +
 arch/x86/kernel/cpu/scattered.c    | 14 ++++++++++++++
 arch/x86/xen/enlighten.c           |  3 +++
 4 files changed, 19 insertions(+)

diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 92a8308..78b9d06 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -190,6 +190,7 @@
 
 #define X86_FEATURE_CPB		( 7*32+ 2) /* AMD Core Performance Boost */
 #define X86_FEATURE_EPB		( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
+#define X86_FEATURE_CPUID_FAULT ( 7*32+ 4) /* Intel CPUID faulting */
 
 #define X86_FEATURE_HW_PSTATE	( 7*32+ 8) /* AMD HW-PState */
 #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 56f4c66..83908d5 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -41,6 +41,7 @@
 #define MSR_IA32_PERFCTR1		0x000000c2
 #define MSR_FSB_FREQ			0x000000cd
 #define MSR_PLATFORM_INFO		0x000000ce
+#define CPUID_FAULTING_SUPPORT		(1UL << 31)
 
 #define MSR_NHM_SNB_PKG_CST_CFG_CTL	0x000000e2
 #define NHM_C3_AUTO_DEMOTE		(1UL << 25)
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 8cb57df..d502da1 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -24,6 +24,17 @@ enum cpuid_regs {
 	CR_EBX
 };
 
+static int supports_cpuid_faulting(void)
+{
+	unsigned int lo, hi;
+
+	if (rdmsr_safe(MSR_PLATFORM_INFO, &lo, &hi) == 0 &&
+	    (lo & CPUID_FAULTING_SUPPORT))
+		return 1;
+	else
+		return 0;
+}
+
 void init_scattered_cpuid_features(struct cpuinfo_x86 *c)
 {
 	u32 max_level;
@@ -54,4 +65,7 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c)
 		if (regs[cb->reg] & (1 << cb->bit))
 			set_cpu_cap(c, cb->feature);
 	}
+
+	if (supports_cpuid_faulting())
+		set_cpu_cap(c, X86_FEATURE_CPUID_FAULT);
 }
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index b86ebb1..2c47f0c 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1050,6 +1050,9 @@ static u64 xen_read_msr_safe(unsigned int msr, int *err)
 #endif
 			val &= ~X2APIC_ENABLE;
 		break;
+	case MSR_PLATFORM_INFO:
+		val &= ~CPUID_FAULTING_SUPPORT;
+		break;
 	}
 	return val;
 }
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH v2 3/3] x86,arch_prctl Add ARCH_[GET|SET]_CPUID for controlling the CPUID instruction
       [not found] <1473886902-17902-1-git-send-email-khuey@kylehuey.com>
  2016-09-14 21:01 ` [PATCH v2 1/3] syscalls,x86 Expose arch_prctl on x86-32 Kyle Huey
  2016-09-14 21:01 ` [PATCH v2 2/3] x86 Test and expose CPUID faulting capabilities in /proc/cpuinfo Kyle Huey
@ 2016-09-14 21:01 ` Kyle Huey
  2016-09-15  1:29   ` Andy Lutomirski
  2 siblings, 1 reply; 28+ messages in thread
From: Kyle Huey @ 2016-09-14 21:01 UTC (permalink / raw)
  To: Robert O'Callahan
  Cc: Thomas Gleixner, Ingo Molnar, H. Peter Anvin,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	Shuah Khan, Peter Zijlstra (Intel),
	Borislav Petkov, Rafael J. Wysocki, Len Brown,
	Srinivas Pandruvada, Aravind Gopalakrishnan, Alexander Shishkin,
	Vladimir Zapolskiy, Andy Lutomirski, Frederic Weisbecker,
	Dmitry Safonov, Kees Cook, Michael S. Tsirkin, Andrey Ryabinin,
	Jiri Slaby, Paul Gortmaker, Denys Vlasenko, Dave Hansen,
	open list:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	open list:KERNEL SELFTEST FRAMEWORK

Intel supports faulting on the CPUID instruction in newer processors. Bit
31 of MSR_PLATFORM_INFO advertises support for this feature. It is
documented in detail in Section 2.3.2 of
http://www.intel.com/content/dam/www/public/us/en/documents/application-notes/virtualization-technology-flexmigration-application-note.pdf

Signed-off-by: Kyle Huey <khuey@kylehuey.com>
---
 arch/x86/include/asm/msr-index.h          |   1 +
 arch/x86/include/asm/thread_info.h        |   4 +-
 arch/x86/include/uapi/asm/prctl.h         |   6 +
 arch/x86/kernel/process.c                 |  81 +++++++++++
 tools/testing/selftests/x86/Makefile      |   2 +-
 tools/testing/selftests/x86/cpuid-fault.c | 223 ++++++++++++++++++++++++++++++
 6 files changed, 315 insertions(+), 2 deletions(-)
 create mode 100644 tools/testing/selftests/x86/cpuid-fault.c

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 83908d5..4aebec2 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -53,6 +53,7 @@
 #define MSR_MTRRcap			0x000000fe
 #define MSR_IA32_BBL_CR_CTL		0x00000119
 #define MSR_IA32_BBL_CR_CTL3		0x0000011e
+#define MSR_MISC_FEATURES_ENABLES	0x00000140
 
 #define MSR_IA32_SYSENTER_CS		0x00000174
 #define MSR_IA32_SYSENTER_ESP		0x00000175
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 8b7c8d8..ec93976 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -93,6 +93,7 @@ struct thread_info {
 #define TIF_SECCOMP		8	/* secure computing */
 #define TIF_USER_RETURN_NOTIFY	11	/* notify kernel of userspace return */
 #define TIF_UPROBE		12	/* breakpointed or singlestepping */
+#define TIF_NOCPUID		15	/* CPUID is not accessible in userland */
 #define TIF_NOTSC		16	/* TSC is not accessible in userland */
 #define TIF_IA32		17	/* IA32 compatibility process */
 #define TIF_FORK		18	/* ret_from_fork */
@@ -117,6 +118,7 @@ struct thread_info {
 #define _TIF_SECCOMP		(1 << TIF_SECCOMP)
 #define _TIF_USER_RETURN_NOTIFY	(1 << TIF_USER_RETURN_NOTIFY)
 #define _TIF_UPROBE		(1 << TIF_UPROBE)
+#define _TIF_NOCPUID		(1 << TIF_NOCPUID)
 #define _TIF_NOTSC		(1 << TIF_NOTSC)
 #define _TIF_IA32		(1 << TIF_IA32)
 #define _TIF_FORK		(1 << TIF_FORK)
@@ -146,7 +148,7 @@ struct thread_info {
 
 /* flags to check in __switch_to() */
 #define _TIF_WORK_CTXSW							\
-	(_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP)
+	(_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP)
 
 #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
 #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
diff --git a/arch/x86/include/uapi/asm/prctl.h b/arch/x86/include/uapi/asm/prctl.h
index 3ac5032..c087e55 100644
--- a/arch/x86/include/uapi/asm/prctl.h
+++ b/arch/x86/include/uapi/asm/prctl.h
@@ -6,4 +6,10 @@
 #define ARCH_GET_FS 0x1003
 #define ARCH_GET_GS 0x1004
 
+/* Get/set the process' ability to use the CPUID instruction */
+#define ARCH_GET_CPUID 0x1005
+#define ARCH_SET_CPUID 0x1006
+# define ARCH_CPUID_ENABLE		1	/* allow the use of the CPUID instruction */
+# define ARCH_CPUID_SIGSEGV		2	/* throw a SIGSEGV instead of reading the CPUID */
+
 #endif /* _ASM_X86_PRCTL_H */
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 0f857c3..5fc8e9d 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -193,6 +193,69 @@ int set_tsc_mode(unsigned int val)
 	return 0;
 }
 
+static void switch_cpuid_faulting(bool on)
+{
+	if (on)
+		msr_set_bit(MSR_MISC_FEATURES_ENABLES, 0);
+	else
+		msr_clear_bit(MSR_MISC_FEATURES_ENABLES, 0);
+}
+
+static void disable_cpuid(void)
+{
+	preempt_disable();
+	if (!test_and_set_thread_flag(TIF_NOCPUID))
+		/*
+		 * Must flip the CPU state synchronously with
+		 * TIF_NOCPUID in the current running context.
+		 */
+		switch_cpuid_faulting(true);
+	preempt_enable();
+}
+
+static void enable_cpuid(void)
+{
+	preempt_disable();
+	if (test_and_clear_thread_flag(TIF_NOCPUID))
+		/*
+		 * Must flip the CPU state synchronously with
+		 * TIF_NOCPUID in the current running context.
+		 */
+		switch_cpuid_faulting(false);
+	preempt_enable();
+}
+
+int get_cpuid_mode(unsigned long adr)
+{
+	unsigned int val;
+
+	if (test_thread_flag(TIF_NOCPUID))
+		val = ARCH_CPUID_SIGSEGV;
+	else
+		val = ARCH_CPUID_ENABLE;
+
+	return put_user(val, (unsigned int __user *)adr);
+}
+
+int set_cpuid_mode(struct task_struct *task, unsigned long val)
+{
+	/* Only disable/enable_cpuid() if it is supported on this hardware. */
+	bool cpuid_fault_supported = static_cpu_has(X86_FEATURE_CPUID_FAULT);
+
+	if (val == ARCH_CPUID_ENABLE && cpuid_fault_supported) {
+		if (task_no_new_privs(task) && test_thread_flag(TIF_NOCPUID))
+			return -EACCES;
+
+		enable_cpuid();
+	} else if (val == ARCH_CPUID_SIGSEGV && cpuid_fault_supported) {
+		disable_cpuid();
+	} else {
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
 		      struct tss_struct *tss)
 {
@@ -212,6 +275,15 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
 		update_debugctlmsr(debugctl);
 	}
 
+	if (test_tsk_thread_flag(prev_p, TIF_NOCPUID) ^
+	    test_tsk_thread_flag(next_p, TIF_NOCPUID)) {
+		/* prev and next are different */
+		if (test_tsk_thread_flag(next_p, TIF_NOCPUID))
+			switch_cpuid_faulting(true);
+		else
+			switch_cpuid_faulting(false);
+	}
+
 	if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
 	    test_tsk_thread_flag(next_p, TIF_NOTSC)) {
 		/* prev and next are different */
@@ -635,6 +707,15 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long arg2)
 		break;
 	}
 #endif
+	case ARCH_GET_CPUID: {
+		ret = get_cpuid_mode(arg2);
+		break;
+	}
+	case ARCH_SET_CPUID: {
+		ret = set_cpuid_mode(task, arg2);
+		break;
+	}
+
 	default:
 		ret = -EINVAL;
 		break;
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index 4f747ee..fbf34d3 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -5,7 +5,7 @@ include ../lib.mk
 .PHONY: all all_32 all_64 warn_32bit_failure clean
 
 TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall test_mremap_vdso \
-			check_initial_reg_state sigreturn ldt_gdt iopl mpx-mini-test
+			check_initial_reg_state sigreturn ldt_gdt iopl mpx-mini-test cpuid-fault
 TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
 			test_FCMOV test_FCOMI test_FISTTP \
 			vdso_restorer
diff --git a/tools/testing/selftests/x86/cpuid-fault.c b/tools/testing/selftests/x86/cpuid-fault.c
new file mode 100644
index 0000000..d72b723
--- /dev/null
+++ b/tools/testing/selftests/x86/cpuid-fault.c
@@ -0,0 +1,223 @@
+
+/*
+ * Tests for arch_prctl(ARCH_GET_CPUID, ...) / prctl(ARCH_SET_CPUID, ...)
+ *
+ * Basic test to test behaviour of ARCH_GET_CPUID and ARCH_SET_CPUID
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <signal.h>
+#include <inttypes.h>
+#include <cpuid.h>
+#include <errno.h>
+#include <sys/wait.h>
+
+#include <sys/prctl.h>
+#include <linux/prctl.h>
+
+const char *cpuid_names[] = {
+	[0] = "[not set]",
+	[ARCH_CPUID_ENABLE] = "ARCH_CPUID_ENABLE",
+	[ARCH_CPUID_SIGSEGV] = "ARCH_CPUID_SIGSEGV",
+};
+
+int arch_prctl(int code, unsigned long arg2)
+{
+	return syscall(SYS_arch_prctl, code, arg2);
+}
+
+int cpuid(unsigned int *eax, unsigned int *ebx, unsigned int *ecx,
+	  unsigned int *edx)
+{
+	return __get_cpuid(0, eax, ebx, ecx, edx);
+}
+
+int do_child_exec_test(int eax, int ebx, int ecx, int edx)
+{i
+	int cpuid_val = 0, child = 0, status = 0;
+
+	printf("arch_prctl(ARCH_GET_CPUID, &cpuid_val); ");
+	fflush(stdout);
+
+	if (arch_prctl(ARCH_GET_CPUID, (unsigned long)&cpuid_val) != 0)
+		exit(42);
+
+	printf("cpuid_val == %s\n", cpuid_names[cpuid_val]);
+	if (cpuid_val != ARCH_CPUID_SIGSEGV)
+		exit(42);
+
+	if ((child = fork()) == 0)
+		execl("/proc/self/exe", "cpuid-fault", NULL);
+
+	/* That will blow up almost immediately, since dl/libc use cpuid. */
+	if (child != waitpid(child, &status, 0))
+		exit(42);
+
+	if (!WIFSIGNALED(status) || WTERMSIG(status) != SIGSEGV)
+		exit(42);
+
+	return 0;
+}
+
+int child_received_signal;
+
+void child_sigsegv_cb(int sig)
+{
+	int cpuid_val = 0;
+
+	child_received_signal = 1;
+	printf("[ SIG_SEGV ]\n");
+	printf("arch_prctl(ARCH_GET_CPUID, &cpuid_val); ");
+	fflush(stdout);
+
+	if (arch_prctl(ARCH_GET_CPUID, (unsigned long)&cpuid_val) != 0)
+		exit(42);
+
+	printf("cpuid_val == %s\n", cpuid_names[cpuid_val]);
+	printf("arch_prctl(ARCH_SET_CPUID, ARCH_CPUID_ENABLE)\n");
+	fflush(stdout);
+	if (arch_prctl(ARCH_SET_CPUID, ARCH_CPUID_ENABLE) != 0)
+		exit(errno);
+
+	printf("cpuid() == ");
+}
+
+int do_child_test(void)
+{
+	unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0;
+
+	signal(SIGSEGV, child_sigsegv_cb);
+
+	/* the child starts out with cpuid disabled, the signal handler
+	 * attempts to enable and retry
+	 */
+	printf("cpuid() == ");
+	fflush(stdout);
+	cpuid(&eax, &ebx, &ecx, &edx);
+	printf("{%x, %x, %x, %x}\n", eax, ebx, ecx, edx);
+	return child_received_signal ? 0 : 42;
+}
+
+void sigsegv_cb(int sig)
+{
+	int cpuid_val = 0;
+
+	printf("[ SIG_SEGV ]\n");
+	printf("arch_prctl(ARCH_GET_CPUID, &cpuid_val); ");
+	fflush(stdout);
+
+	if (arch_prctl(ARCH_GET_CPUID, (unsigned long)&cpuid_val) != 0)
+		exit(42);
+
+	printf("cpuid_val == %s\n", cpuid_names[cpuid_val]);
+	printf("arch_prctl(ARC_SET_CPUID, ARCH_CPUID_ENABLE)\n");
+	fflush(stdout);
+	if (arch_prctl(ARCH_SET_CPUID, ARCH_CPUID_ENABLE) != 0)
+		exit(42);
+
+	printf("cpuid() == ");
+}
+
+int main(void)
+{
+	int cpuid_val = 0, child = 0, status = 0;
+	unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0;
+
+	signal(SIGSEGV, sigsegv_cb);
+
+	cpuid(&eax, &ebx, &ecx, &edx);
+	printf("cpuid() == {%x, %x, %x, %x}\n", eax, ebx, ecx, edx);
+	printf("arch_prctl(ARCH_GET_CPUID, &cpuid_val); ");
+	fflush(stdout);
+
+	if (arch_prctl(ARCH_GET_CPUID, (unsigned long)&cpuid_val) != 0) {
+		if (errno == EINVAL) {
+			printf("ARCH_GET_CPUID is unsupported on this system.");
+			fflush(stdout);
+			exit(0); /* no ARCH_GET_CPUID on this system */
+		} else {
+			exit(42);
+		}
+	}
+
+	printf("cpuid_val == %s\n", cpuid_names[cpuid_val]);
+	cpuid(&eax, &ebx, &ecx, &edx);
+	printf("cpuid() == {%x, %x, %x, %x}\n", eax, ebx, ecx, edx);
+	printf("arch_prctl(ARCH_SET_CPUID, ARCH_CPUID_ENABLE)\n");
+	fflush(stdout);
+
+	if (arch_prctl(ARCH_SET_CPUID, ARCH_CPUID_ENABLE) != 0) {
+		if (errno == EINVAL) {
+			printf("ARCH_SET_CPUID is unsupported on this system.");
+			fflush(stdout);
+			exit(0); /* no ARCH_SET_CPUID on this system */
+		} else {
+			exit(42);
+		}
+	}
+
+
+	cpuid(&eax, &ebx, &ecx, &edx);
+	printf("cpuid() == {%x, %x, %x, %x}\n", eax, ebx, ecx, edx);
+	printf("arch_prctl(ARCH_SET_CPUID, ARCH_CPUID_SIGSEGV)\n");
+	fflush(stdout);
+
+	if (arch_prctl(ARCH_SET_CPUID, ARCH_CPUID_SIGSEGV) == -1)
+		exit(42);
+
+	printf("cpuid() == ");
+	fflush(stdout);
+	eax = ebx = ecx = edx = 0;
+	cpuid(&eax, &ebx, &ecx, &edx);
+	printf("{%x, %x, %x, %x}\n", eax, ebx, ecx, edx);
+	printf("arch_prctl(ARCH_SET_CPUID, ARCH_CPUID_SIGSEGV)\n");
+	fflush(stdout);
+
+	if (arch_prctl(ARCH_SET_CPUID, ARCH_CPUID_SIGSEGV) == -1)
+		exit(42);
+
+	printf("do_child_test\n");
+	fflush(stdout);
+	if ((child = fork()) == 0)
+		return do_child_test();
+
+	if (child != waitpid(child, &status, 0))
+		exit(42);
+
+	if (WEXITSTATUS(status) != 0)
+		exit(42);
+
+	printf("prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)\n");
+	fflush(stdout);
+	if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) != 0)
+		exit(42);
+
+	printf("do_child_test\n");
+	fflush(stdout);
+	if ((child = fork()) == 0)
+		return do_child_test();
+
+	if (child != waitpid(child, &status, 0))
+		exit(42);
+
+	if (WEXITSTATUS(status) != EACCES)
+		exit(42);
+
+	printf("do_child_exec_test\n");
+	fflush(stdout);
+	if ((child = fork()) == 0)
+		return do_child_exec_test(eax, ebx, ecx, edx);
+
+	if (child != waitpid(child, &status, 0))
+		exit(42);
+
+	if (WEXITSTATUS(status) != 0)
+		exit(42);
+
+	printf("All tests passed!\n");
+	fflush(stdout);
+	exit(EXIT_SUCCESS);
+}
+
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [PATCH v2 1/3] syscalls,x86 Expose arch_prctl on x86-32.
  2016-09-14 21:01 ` [PATCH v2 1/3] syscalls,x86 Expose arch_prctl on x86-32 Kyle Huey
@ 2016-09-14 21:29   ` Dave Hansen
  2016-09-14 21:35     ` Kyle Huey
  2016-09-14 22:23   ` kbuild test robot
                     ` (3 subsequent siblings)
  4 siblings, 1 reply; 28+ messages in thread
From: Dave Hansen @ 2016-09-14 21:29 UTC (permalink / raw)
  To: Kyle Huey, Robert O'Callahan
  Cc: Thomas Gleixner, Ingo Molnar, H. Peter Anvin,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	Al Viro, Anna Schumaker, David Howells, Dmitry V. Levin,
	Eric B Munson, Andy Lutomirski, Peter Zijlstra,
	Michael S. Tsirkin, Jiri Slaby, Andrey Ryabinin, Paul Gortmaker,
	Borislav Petkov, Dmitry Vyukov,
	open list:X86 ARCHITECTURE (32-BIT AND 64-BIT)

On 09/14/2016 02:01 PM, Kyle Huey wrote:
> Signed-off-by: Kyle Huey <khuey@kylehuey.com>
> ---
>  arch/x86/entry/syscalls/syscall_32.tbl |  1 +
>  arch/x86/kernel/process.c              | 80 ++++++++++++++++++++++++++++++++++
>  arch/x86/kernel/process_64.c           | 66 ----------------------------
>  3 files changed, 81 insertions(+), 66 deletions(-)

Could you explain a bit about what is going on here?  Is it just a plain
old code move, _why_ you had to do it this way, etc...?

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v2 1/3] syscalls,x86 Expose arch_prctl on x86-32.
  2016-09-14 21:29   ` Dave Hansen
@ 2016-09-14 21:35     ` Kyle Huey
  2016-09-14 21:46       ` Dave Hansen
  0 siblings, 1 reply; 28+ messages in thread
From: Kyle Huey @ 2016-09-14 21:35 UTC (permalink / raw)
  To: Dave Hansen
  Cc: Robert O'Callahan, Thomas Gleixner, Ingo Molnar,
	H. Peter Anvin, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	Al Viro, Anna Schumaker, David Howells, Dmitry V. Levin,
	Eric B Munson, Andy Lutomirski, Peter Zijlstra,
	Michael S. Tsirkin, Jiri Slaby, Andrey Ryabinin, Paul Gortmaker,
	Borislav Petkov, Dmitry Vyukov,
	open list:X86 ARCHITECTURE (32-BIT AND 64-BIT)

On Wed, Sep 14, 2016 at 2:29 PM, Dave Hansen
<dave.hansen@linux.intel.com> wrote:
> On 09/14/2016 02:01 PM, Kyle Huey wrote:
>> Signed-off-by: Kyle Huey <khuey@kylehuey.com>
>> ---
>>  arch/x86/entry/syscalls/syscall_32.tbl |  1 +
>>  arch/x86/kernel/process.c              | 80 ++++++++++++++++++++++++++++++++++
>>  arch/x86/kernel/process_64.c           | 66 ----------------------------
>>  3 files changed, 81 insertions(+), 66 deletions(-)
>
> Could you explain a bit about what is going on here?  Is it just a plain
> old code move, _why_ you had to do it this way, etc...?

Sure.  In the subsequent patches in this series I add an arch_prctl
that is available for both 64 and 32 bit programs/kernels.  Since
process_64.c is only built for 64 bit kernels, this syscall can't stay
there anymore.

It's not quite a plain move.  To leave the existing arch_prctls only
accessible to 64 bit callers, I added the is_32 bit and the four early
returns for each existing ARCH_BLAH.  These cases are now
conditionally compiled out in a 32 bit kernel, so we only have to
handle the 32 bit process on a 64 bit kernel case at runtime.

I considered doing this instead with a compat wrapper for the syscall
on 32 bit systems that would filter these arch_prctls before getting
to do_arch_prctl. I didn't see any prior art for it, so decided not to
proceed that way.

- Kyle

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v2 2/3] x86 Test and expose CPUID faulting capabilities in /proc/cpuinfo
  2016-09-14 21:01 ` [PATCH v2 2/3] x86 Test and expose CPUID faulting capabilities in /proc/cpuinfo Kyle Huey
@ 2016-09-14 21:35   ` Dave Hansen
  2016-09-14 22:03     ` Kyle Huey
  2016-09-15 10:07     ` David Vrabel
  2016-09-15 10:05   ` [Xen-devel] " David Vrabel
  1 sibling, 2 replies; 28+ messages in thread
From: Dave Hansen @ 2016-09-14 21:35 UTC (permalink / raw)
  To: Kyle Huey, Robert O'Callahan
  Cc: Thomas Gleixner, Ingo Molnar, H. Peter Anvin,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	Boris Ostrovsky, David Vrabel, Juergen Gross, Borislav Petkov,
	Andy Lutomirski, Peter Zijlstra, Huang Rui, Rafael J. Wysocki,
	Len Brown, Srinivas Pandruvada, Aravind Gopalakrishnan,
	Alexander Shishkin, Vladimir Zapolskiy, Kristen Carlson Accardi,
	open list:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	moderated list:XEN HYPERVISOR INTERFACE

On 09/14/2016 02:01 PM, Kyle Huey wrote:
> Xen advertises the underlying support for CPUID faulting but not does pass
> through writes to the relevant MSR, nor does it virtualize it, so it does
> not actually work. For now mask off the relevant bit on MSR_PLATFORM_INFO.

That needs to make it into a comment, please.

That *is* a Xen bug, right?

> Signed-off-by: Kyle Huey <khuey@kylehuey.com>
> ---
>  arch/x86/include/asm/cpufeatures.h |  1 +
>  arch/x86/include/asm/msr-index.h   |  1 +
>  arch/x86/kernel/cpu/scattered.c    | 14 ++++++++++++++
>  arch/x86/xen/enlighten.c           |  3 +++
>  4 files changed, 19 insertions(+)
> 
> diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
> index 92a8308..78b9d06 100644
> --- a/arch/x86/include/asm/cpufeatures.h
> +++ b/arch/x86/include/asm/cpufeatures.h
> @@ -190,6 +190,7 @@
>  
>  #define X86_FEATURE_CPB		( 7*32+ 2) /* AMD Core Performance Boost */
>  #define X86_FEATURE_EPB		( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
> +#define X86_FEATURE_CPUID_FAULT ( 7*32+ 4) /* Intel CPUID faulting */
>  
>  #define X86_FEATURE_HW_PSTATE	( 7*32+ 8) /* AMD HW-PState */
>  #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
> diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
> index 56f4c66..83908d5 100644
> --- a/arch/x86/include/asm/msr-index.h
> +++ b/arch/x86/include/asm/msr-index.h
> @@ -41,6 +41,7 @@
>  #define MSR_IA32_PERFCTR1		0x000000c2
>  #define MSR_FSB_FREQ			0x000000cd
>  #define MSR_PLATFORM_INFO		0x000000ce
> +#define CPUID_FAULTING_SUPPORT		(1UL << 31)
>  
>  #define MSR_NHM_SNB_PKG_CST_CFG_CTL	0x000000e2
>  #define NHM_C3_AUTO_DEMOTE		(1UL << 25)
> diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
> index 8cb57df..d502da1 100644
> --- a/arch/x86/kernel/cpu/scattered.c
> +++ b/arch/x86/kernel/cpu/scattered.c
> @@ -24,6 +24,17 @@ enum cpuid_regs {
>  	CR_EBX
>  };
>  
> +static int supports_cpuid_faulting(void)
> +{
> +	unsigned int lo, hi;
> +
> +	if (rdmsr_safe(MSR_PLATFORM_INFO, &lo, &hi) == 0 &&
> +	    (lo & CPUID_FAULTING_SUPPORT))
> +		return 1;
> +	else
> +		return 0;
> +}

Is any of this useful to optimize away at compile-time?  We have config
options for when we're running as a guest, and this seems like a feature
that isn't available when running on bare metal.

> diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
> index b86ebb1..2c47f0c 100644
> --- a/arch/x86/xen/enlighten.c
> +++ b/arch/x86/xen/enlighten.c
> @@ -1050,6 +1050,9 @@ static u64 xen_read_msr_safe(unsigned int msr, int *err)
>  #endif
>  			val &= ~X2APIC_ENABLE;
>  		break;
> +	case MSR_PLATFORM_INFO:
> +		val &= ~CPUID_FAULTING_SUPPORT;
> +		break;
>  	}
>  	return val;
>  }

Does this mean that Xen guests effectively can't take advantage of this
feature?

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v2 1/3] syscalls,x86 Expose arch_prctl on x86-32.
  2016-09-14 21:35     ` Kyle Huey
@ 2016-09-14 21:46       ` Dave Hansen
  2016-09-14 21:56         ` Kyle Huey
  0 siblings, 1 reply; 28+ messages in thread
From: Dave Hansen @ 2016-09-14 21:46 UTC (permalink / raw)
  To: Kyle Huey
  Cc: Robert O'Callahan, Thomas Gleixner, Ingo Molnar,
	H. Peter Anvin, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	Al Viro, Anna Schumaker, David Howells, Dmitry V. Levin,
	Eric B Munson, Andy Lutomirski, Peter Zijlstra,
	Michael S. Tsirkin, Jiri Slaby, Andrey Ryabinin, Paul Gortmaker,
	Borislav Petkov, Dmitry Vyukov,
	open list:X86 ARCHITECTURE (32-BIT AND 64-BIT)

On 09/14/2016 02:35 PM, Kyle Huey wrote:
> It's not quite a plain move.  To leave the existing arch_prctls only
> accessible to 64 bit callers, I added the is_32 bit and the four early
> returns for each existing ARCH_BLAH.  These cases are now
> conditionally compiled out in a 32 bit kernel, so we only have to
> handle the 32 bit process on a 64 bit kernel case at runtime.

I think it would make a lot of sense to do the move and the modification
in two patches.

Oh, and arch_prctl() really *is* 64-bit only.  I didn't realize that.
That would have been nice to call out in the changelog, too.  It's
totally non-obvious.

You're going to owe some manpage updates after this too, I guess.  It
says: "arch_prctl() is supported only on Linux/x86-64 for 64-bit
programs currently."

FWIW, I don't think it would be horrible to leave the existing
do_arch_prctl() code in process_64.h and call it
do_64_bit_only_something_arch_prctl(), and only call in to it from the
generic do_arch_prctl().  You really have one reason for all the "if
(is_32)"'s and it would be nice to document why in one single place.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v2 1/3] syscalls,x86 Expose arch_prctl on x86-32.
  2016-09-14 21:46       ` Dave Hansen
@ 2016-09-14 21:56         ` Kyle Huey
  0 siblings, 0 replies; 28+ messages in thread
From: Kyle Huey @ 2016-09-14 21:56 UTC (permalink / raw)
  To: Dave Hansen
  Cc: Robert O'Callahan, Thomas Gleixner, Ingo Molnar,
	H. Peter Anvin, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	Al Viro, Anna Schumaker, David Howells, Dmitry V. Levin,
	Eric B Munson, Andy Lutomirski, Peter Zijlstra,
	Michael S. Tsirkin, Jiri Slaby, Andrey Ryabinin, Paul Gortmaker,
	Borislav Petkov, Dmitry Vyukov,
	open list:X86 ARCHITECTURE (32-BIT AND 64-BIT)

On Wed, Sep 14, 2016 at 2:46 PM, Dave Hansen
<dave.hansen@linux.intel.com> wrote:
> On 09/14/2016 02:35 PM, Kyle Huey wrote:
>> It's not quite a plain move.  To leave the existing arch_prctls only
>> accessible to 64 bit callers, I added the is_32 bit and the four early
>> returns for each existing ARCH_BLAH.  These cases are now
>> conditionally compiled out in a 32 bit kernel, so we only have to
>> handle the 32 bit process on a 64 bit kernel case at runtime.
>
> I think it would make a lot of sense to do the move and the modification
> in two patches.

Ok.

> Oh, and arch_prctl() really *is* 64-bit only.  I didn't realize that.
> That would have been nice to call out in the changelog, too.  It's
> totally non-obvious.

Ok.

> You're going to owe some manpage updates after this too, I guess.  It
> says: "arch_prctl() is supported only on Linux/x86-64 for 64-bit
> programs currently."

Indeed. There's a patch at the end of the series (sent to LKML, but
you're not directly CCd on it) with a suggested manpage patch.

> FWIW, I don't think it would be horrible to leave the existing
> do_arch_prctl() code in process_64.h and call it
> do_64_bit_only_something_arch_prctl(), and only call in to it from the
> generic do_arch_prctl().  You really have one reason for all the "if
> (is_32)"'s and it would be nice to document why in one single place.

Yeah, that seems like a good idea.

- Kyle

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v2 2/3] x86 Test and expose CPUID faulting capabilities in /proc/cpuinfo
  2016-09-14 21:35   ` Dave Hansen
@ 2016-09-14 22:03     ` Kyle Huey
  2016-09-15  1:17       ` Andy Lutomirski
  2016-09-15 10:07     ` David Vrabel
  1 sibling, 1 reply; 28+ messages in thread
From: Kyle Huey @ 2016-09-14 22:03 UTC (permalink / raw)
  To: Dave Hansen
  Cc: Robert O'Callahan, Thomas Gleixner, Ingo Molnar,
	H. Peter Anvin, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	Boris Ostrovsky, David Vrabel, Juergen Gross, Borislav Petkov,
	Andy Lutomirski, Peter Zijlstra, Huang Rui, Rafael J. Wysocki,
	Len Brown, Srinivas Pandruvada, Aravind Gopalakrishnan,
	Alexander Shishkin, Vladimir Zapolskiy, Kristen Carlson Accardi,
	open list:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	moderated list:XEN HYPERVISOR INTERFACE

On Wed, Sep 14, 2016 at 2:35 PM, Dave Hansen
<dave.hansen@linux.intel.com> wrote:
> On 09/14/2016 02:01 PM, Kyle Huey wrote:
>> Xen advertises the underlying support for CPUID faulting but not does pass
>> through writes to the relevant MSR, nor does it virtualize it, so it does
>> not actually work. For now mask off the relevant bit on MSR_PLATFORM_INFO.
>
> That needs to make it into a comment, please.
>
> That *is* a Xen bug, right?

Yes.  Xen needs to either not advertise the feature or actually
support it.  This came up in the prior thread ("[PATCH] prctl,x86 Add
PR_[GET|SET]_CPUID for controlling the CPUID instruction.").

>> Signed-off-by: Kyle Huey <khuey@kylehuey.com>
>> ---
>>  arch/x86/include/asm/cpufeatures.h |  1 +
>>  arch/x86/include/asm/msr-index.h   |  1 +
>>  arch/x86/kernel/cpu/scattered.c    | 14 ++++++++++++++
>>  arch/x86/xen/enlighten.c           |  3 +++
>>  4 files changed, 19 insertions(+)
>>
>> diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
>> index 92a8308..78b9d06 100644
>> --- a/arch/x86/include/asm/cpufeatures.h
>> +++ b/arch/x86/include/asm/cpufeatures.h
>> @@ -190,6 +190,7 @@
>>
>>  #define X86_FEATURE_CPB              ( 7*32+ 2) /* AMD Core Performance Boost */
>>  #define X86_FEATURE_EPB              ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
>> +#define X86_FEATURE_CPUID_FAULT ( 7*32+ 4) /* Intel CPUID faulting */
>>
>>  #define X86_FEATURE_HW_PSTATE        ( 7*32+ 8) /* AMD HW-PState */
>>  #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
>> diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
>> index 56f4c66..83908d5 100644
>> --- a/arch/x86/include/asm/msr-index.h
>> +++ b/arch/x86/include/asm/msr-index.h
>> @@ -41,6 +41,7 @@
>>  #define MSR_IA32_PERFCTR1            0x000000c2
>>  #define MSR_FSB_FREQ                 0x000000cd
>>  #define MSR_PLATFORM_INFO            0x000000ce
>> +#define CPUID_FAULTING_SUPPORT               (1UL << 31)
>>
>>  #define MSR_NHM_SNB_PKG_CST_CFG_CTL  0x000000e2
>>  #define NHM_C3_AUTO_DEMOTE           (1UL << 25)
>> diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
>> index 8cb57df..d502da1 100644
>> --- a/arch/x86/kernel/cpu/scattered.c
>> +++ b/arch/x86/kernel/cpu/scattered.c
>> @@ -24,6 +24,17 @@ enum cpuid_regs {
>>       CR_EBX
>>  };
>>
>> +static int supports_cpuid_faulting(void)
>> +{
>> +     unsigned int lo, hi;
>> +
>> +     if (rdmsr_safe(MSR_PLATFORM_INFO, &lo, &hi) == 0 &&
>> +         (lo & CPUID_FAULTING_SUPPORT))
>> +             return 1;
>> +     else
>> +             return 0;
>> +}
>
> Is any of this useful to optimize away at compile-time?  We have config
> options for when we're running as a guest, and this seems like a feature
> that isn't available when running on bare metal.

On the contrary, this is only available when we're on bare metal.
Neither Xen nor KVM virtualize CPUID faulting (although KVM correctly
suppresses MSR_PLATFORM_INFO's report of support for it).

>> diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
>> index b86ebb1..2c47f0c 100644
>> --- a/arch/x86/xen/enlighten.c
>> +++ b/arch/x86/xen/enlighten.c
>> @@ -1050,6 +1050,9 @@ static u64 xen_read_msr_safe(unsigned int msr, int *err)
>>  #endif
>>                       val &= ~X2APIC_ENABLE;
>>               break;
>> +     case MSR_PLATFORM_INFO:
>> +             val &= ~CPUID_FAULTING_SUPPORT;
>> +             break;
>>       }
>>       return val;
>>  }
>
> Does this mean that Xen guests effectively can't take advantage of this
> feature?

Yes.

- Kyle

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v2 1/3] syscalls,x86 Expose arch_prctl on x86-32.
  2016-09-14 21:01 ` [PATCH v2 1/3] syscalls,x86 Expose arch_prctl on x86-32 Kyle Huey
  2016-09-14 21:29   ` Dave Hansen
@ 2016-09-14 22:23   ` kbuild test robot
  2016-09-15  0:01   ` kbuild test robot
                     ` (2 subsequent siblings)
  4 siblings, 0 replies; 28+ messages in thread
From: kbuild test robot @ 2016-09-14 22:23 UTC (permalink / raw)
  To: Kyle Huey
  Cc: kbuild-all, Robert O'Callahan, Thomas Gleixner, Ingo Molnar,
	H. Peter Anvin, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	Al Viro, Anna Schumaker, David Howells, Dmitry V. Levin,
	Eric B Munson, Andy Lutomirski, Peter Zijlstra,
	Michael S. Tsirkin, Jiri Slaby, Andrey Ryabinin, Paul Gortmaker,
	Borislav Petkov, Dmitry Vyukov, Dave Hansen,
	open list:X86 ARCHITECTURE (32-BIT AND 64-BIT)

[-- Attachment #1: Type: text/plain, Size: 1858 bytes --]

Hi Kyle,

[auto build test WARNING on linus/master]
[also build test WARNING on v4.8-rc6]
[cannot apply to tip/x86/core next-20160914]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]
[Suggest to use git(>=2.9.0) format-patch --base=<commit> (or --base=auto for convenience) to record what (public, well-known) commit your patch series was built on]
[Check https://git-scm.com/docs/git-format-patch for more information]

url:    https://github.com/0day-ci/linux/commits/Kyle-Huey/syscalls-x86-Expose-arch_prctl-on-x86-32/20160915-052851
config: i386-tinyconfig (attached as .config)
compiler: gcc-6 (Debian 6.1.1-9) 6.1.1 20160705
reproduce:
        # save the attached .config to linux build tree
        make ARCH=i386 

All warnings (new ones prefixed by >>):

   arch/x86/kernel/process.c: In function 'do_arch_prctl':
>> arch/x86/kernel/process.c:578:6: warning: unused variable 'cpu' [-Wunused-variable]
     int cpu;
         ^~~
>> arch/x86/kernel/process.c:577:6: warning: unused variable 'is_32' [-Wunused-variable]
     int is_32 = IS_ENABLED(CONFIG_IA32_EMULATION) && test_thread_flag(TIF_IA32);
         ^~~~~
>> arch/x86/kernel/process.c:576:6: warning: unused variable 'doit' [-Wunused-variable]
     int doit = task == current;
         ^~~~

vim +/cpu +578 arch/x86/kernel/process.c

   570		return 0;
   571	}
   572	
   573	long do_arch_prctl(struct task_struct *task, int code, unsigned long arg2)
   574	{
   575		int ret = 0;
 > 576		int doit = task == current;
 > 577		int is_32 = IS_ENABLED(CONFIG_IA32_EMULATION) && test_thread_flag(TIF_IA32);
 > 578		int cpu;
   579	
   580		switch (code) {
   581	#ifdef CONFIG_X86_64

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/octet-stream, Size: 6336 bytes --]

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v2 1/3] syscalls,x86 Expose arch_prctl on x86-32.
  2016-09-14 21:01 ` [PATCH v2 1/3] syscalls,x86 Expose arch_prctl on x86-32 Kyle Huey
  2016-09-14 21:29   ` Dave Hansen
  2016-09-14 22:23   ` kbuild test robot
@ 2016-09-15  0:01   ` kbuild test robot
  2016-09-15  0:01   ` kbuild test robot
  2016-09-15  1:14   ` Andy Lutomirski
  4 siblings, 0 replies; 28+ messages in thread
From: kbuild test robot @ 2016-09-15  0:01 UTC (permalink / raw)
  To: Kyle Huey
  Cc: kbuild-all, Robert O'Callahan, Thomas Gleixner, Ingo Molnar,
	H. Peter Anvin, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	Al Viro, Anna Schumaker, David Howells, Dmitry V. Levin,
	Eric B Munson, Andy Lutomirski, Peter Zijlstra,
	Michael S. Tsirkin, Jiri Slaby, Andrey Ryabinin, Paul Gortmaker,
	Borislav Petkov, Dmitry Vyukov, Dave Hansen,
	open list:X86 ARCHITECTURE (32-BIT AND 64-BIT)

[-- Attachment #1: Type: text/plain, Size: 1464 bytes --]

Hi Kyle,

[auto build test WARNING on linus/master]
[also build test WARNING on v4.8-rc6]
[cannot apply to tip/x86/core next-20160914]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]
[Suggest to use git(>=2.9.0) format-patch --base=<commit> (or --base=auto for convenience) to record what (public, well-known) commit your patch series was built on]
[Check https://git-scm.com/docs/git-format-patch for more information]

url:    https://github.com/0day-ci/linux/commits/Kyle-Huey/syscalls-x86-Expose-arch_prctl-on-x86-32/20160915-052851
config: microblaze-mmu_defconfig (attached as .config)
compiler: microblaze-linux-gcc (GCC) 4.9.0
reproduce:
        wget https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        make.cross ARCH=microblaze 

All warnings (new ones prefixed by >>):

   <stdin>:1307:2: warning: #warning syscall copy_file_range not implemented [-Wcpp]
   <stdin>:1310:2: warning: #warning syscall preadv2 not implemented [-Wcpp]
   <stdin>:1313:2: warning: #warning syscall pwritev2 not implemented [-Wcpp]
>> <stdin>:1316:2: warning: #warning syscall arch_prctl not implemented [-Wcpp]

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/octet-stream, Size: 12303 bytes --]

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v2 1/3] syscalls,x86 Expose arch_prctl on x86-32.
  2016-09-14 21:01 ` [PATCH v2 1/3] syscalls,x86 Expose arch_prctl on x86-32 Kyle Huey
                     ` (2 preceding siblings ...)
  2016-09-15  0:01   ` kbuild test robot
@ 2016-09-15  0:01   ` kbuild test robot
  2016-09-15  1:14   ` Andy Lutomirski
  4 siblings, 0 replies; 28+ messages in thread
From: kbuild test robot @ 2016-09-15  0:01 UTC (permalink / raw)
  To: Kyle Huey
  Cc: kbuild-all, Robert O'Callahan, Thomas Gleixner, Ingo Molnar,
	H. Peter Anvin, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	Al Viro, Anna Schumaker, David Howells, Dmitry V. Levin,
	Eric B Munson, Andy Lutomirski, Peter Zijlstra,
	Michael S. Tsirkin, Jiri Slaby, Andrey Ryabinin, Paul Gortmaker,
	Borislav Petkov, Dmitry Vyukov, Dave Hansen,
	open list:X86 ARCHITECTURE (32-BIT AND 64-BIT)

[-- Attachment #1: Type: text/plain, Size: 1111 bytes --]

Hi Kyle,

[auto build test ERROR on linus/master]
[also build test ERROR on v4.8-rc6]
[cannot apply to tip/x86/core next-20160914]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]
[Suggest to use git(>=2.9.0) format-patch --base=<commit> (or --base=auto for convenience) to record what (public, well-known) commit your patch series was built on]
[Check https://git-scm.com/docs/git-format-patch for more information]

url:    https://github.com/0day-ci/linux/commits/Kyle-Huey/syscalls-x86-Expose-arch_prctl-on-x86-32/20160915-052851
config: um-i386_defconfig (attached as .config)
compiler: gcc-6 (Debian 6.1.1-9) 6.1.1 20160705
reproduce:
        # save the attached .config to linux build tree
        make ARCH=um SUBARCH=i386

All errors (new ones prefixed by >>):

>> arch/x86/um/built-in.o:(.rodata+0x6b0): undefined reference to `sys_arch_prctl'
   collect2: error: ld returned 1 exit status

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/octet-stream, Size: 7597 bytes --]

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v2 1/3] syscalls,x86 Expose arch_prctl on x86-32.
  2016-09-14 21:01 ` [PATCH v2 1/3] syscalls,x86 Expose arch_prctl on x86-32 Kyle Huey
                     ` (3 preceding siblings ...)
  2016-09-15  0:01   ` kbuild test robot
@ 2016-09-15  1:14   ` Andy Lutomirski
  4 siblings, 0 replies; 28+ messages in thread
From: Andy Lutomirski @ 2016-09-15  1:14 UTC (permalink / raw)
  To: Kyle Huey
  Cc: Robert O'Callahan, Thomas Gleixner, Ingo Molnar,
	H. Peter Anvin, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	Al Viro, Anna Schumaker, David Howells, Dmitry V. Levin,
	Eric B Munson, Andy Lutomirski, Peter Zijlstra,
	Michael S. Tsirkin, Jiri Slaby, Andrey Ryabinin, Paul Gortmaker,
	Borislav Petkov, Dmitry Vyukov, Dave Hansen,
	open list:X86 ARCHITECTURE (32-BIT AND 64-BIT)

On Wed, Sep 14, 2016 at 2:01 PM, Kyle Huey <me@kylehuey.com> wrote:
> Signed-off-by: Kyle Huey <khuey@kylehuey.com>
> ---
>  arch/x86/entry/syscalls/syscall_32.tbl |  1 +
>  arch/x86/kernel/process.c              | 80 ++++++++++++++++++++++++++++++++++
>  arch/x86/kernel/process_64.c           | 66 ----------------------------
>  3 files changed, 81 insertions(+), 66 deletions(-)
>
> diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
> index f848572..3b6965b 100644
> --- a/arch/x86/entry/syscalls/syscall_32.tbl
> +++ b/arch/x86/entry/syscalls/syscall_32.tbl
> @@ -386,3 +386,4 @@
>  377    i386    copy_file_range         sys_copy_file_range
>  378    i386    preadv2                 sys_preadv2                     compat_sys_preadv2
>  379    i386    pwritev2                sys_pwritev2                    compat_sys_pwritev2
> +380    i386    arch_prctl              sys_arch_prctl
> diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
> index 62c0b0e..0f857c3 100644
> --- a/arch/x86/kernel/process.c
> +++ b/arch/x86/kernel/process.c
> @@ -20,6 +20,7 @@
>  #include <linux/cpuidle.h>
>  #include <trace/events/power.h>
>  #include <linux/hw_breakpoint.h>
> +#include <linux/syscalls.h>
>  #include <asm/cpu.h>
>  #include <asm/apic.h>
>  #include <asm/syscalls.h>
> @@ -32,6 +33,7 @@
>  #include <asm/tlbflush.h>
>  #include <asm/mce.h>
>  #include <asm/vm86.h>
> +#include <asm/prctl.h>
>
>  /*
>   * per-CPU TSS segments. Threads are completely 'soft' on Linux,
> @@ -567,3 +569,81 @@ unsigned long get_wchan(struct task_struct *p)
>         } while (count++ < 16 && p->state != TASK_RUNNING);
>         return 0;
>  }
> +
> +long do_arch_prctl(struct task_struct *task, int code, unsigned long arg2)
> +{
> +       int ret = 0;
> +       int doit = task == current;
> +       int is_32 = IS_ENABLED(CONFIG_IA32_EMULATION) && test_thread_flag(TIF_IA32);

This should be in_compat_syscall().

Also, this code is sufficiently twisted that I think it would be
better to have a common function that handles common prctls and defers
to a 64-bit-specific function if needed, or vice versa.  Vice versa
might be easier -- have a do_arch_prctl_common() that is listed as the
compat entry and have the 64-bit entry call it for unhandled prctls.

--Andy

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v2 2/3] x86 Test and expose CPUID faulting capabilities in /proc/cpuinfo
  2016-09-14 22:03     ` Kyle Huey
@ 2016-09-15  1:17       ` Andy Lutomirski
  2016-09-15  2:20         ` Kyle Huey
  2016-09-15 20:38         ` H. Peter Anvin
  0 siblings, 2 replies; 28+ messages in thread
From: Andy Lutomirski @ 2016-09-15  1:17 UTC (permalink / raw)
  To: Kyle Huey
  Cc: Dave Hansen, Robert O'Callahan, Thomas Gleixner, Ingo Molnar,
	H. Peter Anvin, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	Boris Ostrovsky, David Vrabel, Juergen Gross, Borislav Petkov,
	Andy Lutomirski, Peter Zijlstra, Huang Rui, Rafael J. Wysocki,
	Len Brown, Srinivas Pandruvada, Aravind Gopalakrishnan,
	Alexander Shishkin, Vladimir Zapolskiy, Kristen Carlson Accardi,
	open list:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	moderated list:XEN HYPERVISOR INTERFACE

On Wed, Sep 14, 2016 at 3:03 PM, Kyle Huey <me@kylehuey.com> wrote:
> On Wed, Sep 14, 2016 at 2:35 PM, Dave Hansen
> <dave.hansen@linux.intel.com> wrote:
>> On 09/14/2016 02:01 PM, Kyle Huey wrote:

>> Is any of this useful to optimize away at compile-time?  We have config
>> options for when we're running as a guest, and this seems like a feature
>> that isn't available when running on bare metal.
>
> On the contrary, this is only available when we're on bare metal.
> Neither Xen nor KVM virtualize CPUID faulting (although KVM correctly
> suppresses MSR_PLATFORM_INFO's report of support for it).

KVM could easily support this.  If rr starts using it, I think KVM
*should* add support, possibly even for older CPUs that don't support
the feature in hardware.

It's too bad that x86 doesn't give us the instruction bytes on a
fault.  Otherwise we could lazily switch this feature.

--Andy

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v2 3/3] x86,arch_prctl Add ARCH_[GET|SET]_CPUID for controlling the CPUID instruction
  2016-09-14 21:01 ` [PATCH v2 3/3] x86,arch_prctl Add ARCH_[GET|SET]_CPUID for controlling the CPUID instruction Kyle Huey
@ 2016-09-15  1:29   ` Andy Lutomirski
  2016-09-15  1:47     ` Kyle Huey
  0 siblings, 1 reply; 28+ messages in thread
From: Andy Lutomirski @ 2016-09-15  1:29 UTC (permalink / raw)
  To: Kyle Huey
  Cc: Robert O'Callahan, Thomas Gleixner, Ingo Molnar,
	H. Peter Anvin, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	Shuah Khan, Peter Zijlstra (Intel),
	Borislav Petkov, Rafael J. Wysocki, Len Brown,
	Srinivas Pandruvada, Aravind Gopalakrishnan, Alexander Shishkin,
	Vladimir Zapolskiy, Andy Lutomirski, Frederic Weisbecker,
	Dmitry Safonov, Kees Cook, Michael S. Tsirkin, Andrey Ryabinin,
	Jiri Slaby, Paul Gortmaker, Denys Vlasenko, Dave Hansen,
	open list:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	open list:KERNEL SELFTEST FRAMEWORK

On Wed, Sep 14, 2016 at 2:01 PM, Kyle Huey <me@kylehuey.com> wrote:
> Intel supports faulting on the CPUID instruction in newer processors. Bit
> 31 of MSR_PLATFORM_INFO advertises support for this feature. It is
> documented in detail in Section 2.3.2 of
> http://www.intel.com/content/dam/www/public/us/en/documents/application-notes/virtualization-technology-flexmigration-application-note.pdf
>
> Signed-off-by: Kyle Huey <khuey@kylehuey.com>
> ---
>  arch/x86/include/asm/msr-index.h          |   1 +
>  arch/x86/include/asm/thread_info.h        |   4 +-
>  arch/x86/include/uapi/asm/prctl.h         |   6 +
>  arch/x86/kernel/process.c                 |  81 +++++++++++
>  tools/testing/selftests/x86/Makefile      |   2 +-
>  tools/testing/selftests/x86/cpuid-fault.c | 223 ++++++++++++++++++++++++++++++
>  6 files changed, 315 insertions(+), 2 deletions(-)
>  create mode 100644 tools/testing/selftests/x86/cpuid-fault.c
>
> diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
> index 83908d5..4aebec2 100644
> --- a/arch/x86/include/asm/msr-index.h
> +++ b/arch/x86/include/asm/msr-index.h
> @@ -53,6 +53,7 @@
>  #define MSR_MTRRcap                    0x000000fe
>  #define MSR_IA32_BBL_CR_CTL            0x00000119
>  #define MSR_IA32_BBL_CR_CTL3           0x0000011e
> +#define MSR_MISC_FEATURES_ENABLES      0x00000140
>
>  #define MSR_IA32_SYSENTER_CS           0x00000174
>  #define MSR_IA32_SYSENTER_ESP          0x00000175
> diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
> index 8b7c8d8..ec93976 100644
> --- a/arch/x86/include/asm/thread_info.h
> +++ b/arch/x86/include/asm/thread_info.h
> @@ -93,6 +93,7 @@ struct thread_info {
>  #define TIF_SECCOMP            8       /* secure computing */
>  #define TIF_USER_RETURN_NOTIFY 11      /* notify kernel of userspace return */
>  #define TIF_UPROBE             12      /* breakpointed or singlestepping */
> +#define TIF_NOCPUID            15      /* CPUID is not accessible in userland */
>  #define TIF_NOTSC              16      /* TSC is not accessible in userland */
>  #define TIF_IA32               17      /* IA32 compatibility process */
>  #define TIF_FORK               18      /* ret_from_fork */
> @@ -117,6 +118,7 @@ struct thread_info {
>  #define _TIF_SECCOMP           (1 << TIF_SECCOMP)
>  #define _TIF_USER_RETURN_NOTIFY        (1 << TIF_USER_RETURN_NOTIFY)
>  #define _TIF_UPROBE            (1 << TIF_UPROBE)
> +#define _TIF_NOCPUID           (1 << TIF_NOCPUID)
>  #define _TIF_NOTSC             (1 << TIF_NOTSC)
>  #define _TIF_IA32              (1 << TIF_IA32)
>  #define _TIF_FORK              (1 << TIF_FORK)
> @@ -146,7 +148,7 @@ struct thread_info {
>
>  /* flags to check in __switch_to() */
>  #define _TIF_WORK_CTXSW                                                        \
> -       (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP)
> +       (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP)
>
>  #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
>  #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
> diff --git a/arch/x86/include/uapi/asm/prctl.h b/arch/x86/include/uapi/asm/prctl.h
> index 3ac5032..c087e55 100644
> --- a/arch/x86/include/uapi/asm/prctl.h
> +++ b/arch/x86/include/uapi/asm/prctl.h
> @@ -6,4 +6,10 @@
>  #define ARCH_GET_FS 0x1003
>  #define ARCH_GET_GS 0x1004
>
> +/* Get/set the process' ability to use the CPUID instruction */
> +#define ARCH_GET_CPUID 0x1005
> +#define ARCH_SET_CPUID 0x1006
> +# define ARCH_CPUID_ENABLE             1       /* allow the use of the CPUID instruction */
> +# define ARCH_CPUID_SIGSEGV            2       /* throw a SIGSEGV instead of reading the CPUID */
> +
>  #endif /* _ASM_X86_PRCTL_H */
> diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
> index 0f857c3..5fc8e9d 100644
> --- a/arch/x86/kernel/process.c
> +++ b/arch/x86/kernel/process.c
> @@ -193,6 +193,69 @@ int set_tsc_mode(unsigned int val)
>         return 0;
>  }
>
> +static void switch_cpuid_faulting(bool on)
> +{
> +       if (on)
> +               msr_set_bit(MSR_MISC_FEATURES_ENABLES, 0);
> +       else
> +               msr_clear_bit(MSR_MISC_FEATURES_ENABLES, 0);
> +}
> +

I don't know if we care (yet?), but this is going to be unnecessarily
slow because of the implicit rdmsr.  You could add a percpu shadow
copy of MISC_FEATURES_ENABLES, initialized during boot, and avoid the
rdmsr.

> +
> +int set_cpuid_mode(struct task_struct *task, unsigned long val)
> +{
> +       /* Only disable/enable_cpuid() if it is supported on this hardware. */
> +       bool cpuid_fault_supported = static_cpu_has(X86_FEATURE_CPUID_FAULT);
> +
> +       if (val == ARCH_CPUID_ENABLE && cpuid_fault_supported) {
> +               if (task_no_new_privs(task) && test_thread_flag(TIF_NOCPUID))
> +                       return -EACCES;

This check seems confused.  If this flag were preserved on execve,
it's the SIGSEGV mode that would need the check.

> @@ -212,6 +275,15 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
>                 update_debugctlmsr(debugctl);
>         }
>
> +       if (test_tsk_thread_flag(prev_p, TIF_NOCPUID) ^
> +           test_tsk_thread_flag(next_p, TIF_NOCPUID)) {
> +               /* prev and next are different */
> +               if (test_tsk_thread_flag(next_p, TIF_NOCPUID))
> +                       switch_cpuid_faulting(true);
> +               else
> +                       switch_cpuid_faulting(false);
> +       }
> +

Off-topic and not needed for this patch, but IMO we should move all of
this context switch junk out of ti.flags and into thread_struct
somewhere.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v2 3/3] x86,arch_prctl Add ARCH_[GET|SET]_CPUID for controlling the CPUID instruction
  2016-09-15  1:29   ` Andy Lutomirski
@ 2016-09-15  1:47     ` Kyle Huey
  2016-09-15  1:54       ` Andy Lutomirski
  0 siblings, 1 reply; 28+ messages in thread
From: Kyle Huey @ 2016-09-15  1:47 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Robert O'Callahan, Thomas Gleixner, Ingo Molnar,
	H. Peter Anvin, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	Shuah Khan, Peter Zijlstra (Intel),
	Borislav Petkov, Rafael J. Wysocki, Len Brown,
	Srinivas Pandruvada, Aravind Gopalakrishnan, Alexander Shishkin,
	Vladimir Zapolskiy, Andy Lutomirski, Frederic Weisbecker,
	Dmitry Safonov, Kees Cook, Michael S. Tsirkin, Andrey Ryabinin,
	Jiri Slaby, Paul Gortmaker, Denys Vlasenko, Dave Hansen,
	open list:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	open list:KERNEL SELFTEST FRAMEWORK

On Wed, Sep 14, 2016 at 6:29 PM, Andy Lutomirski <luto@amacapital.net> wrote:
> On Wed, Sep 14, 2016 at 2:01 PM, Kyle Huey <me@kylehuey.com> wrote:
>> Intel supports faulting on the CPUID instruction in newer processors. Bit
>> 31 of MSR_PLATFORM_INFO advertises support for this feature. It is
>> documented in detail in Section 2.3.2 of
>> http://www.intel.com/content/dam/www/public/us/en/documents/application-notes/virtualization-technology-flexmigration-application-note.pdf
>>
>> Signed-off-by: Kyle Huey <khuey@kylehuey.com>
>> ---
>>  arch/x86/include/asm/msr-index.h          |   1 +
>>  arch/x86/include/asm/thread_info.h        |   4 +-
>>  arch/x86/include/uapi/asm/prctl.h         |   6 +
>>  arch/x86/kernel/process.c                 |  81 +++++++++++
>>  tools/testing/selftests/x86/Makefile      |   2 +-
>>  tools/testing/selftests/x86/cpuid-fault.c | 223 ++++++++++++++++++++++++++++++
>>  6 files changed, 315 insertions(+), 2 deletions(-)
>>  create mode 100644 tools/testing/selftests/x86/cpuid-fault.c
>>
>> diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
>> index 83908d5..4aebec2 100644
>> --- a/arch/x86/include/asm/msr-index.h
>> +++ b/arch/x86/include/asm/msr-index.h
>> @@ -53,6 +53,7 @@
>>  #define MSR_MTRRcap                    0x000000fe
>>  #define MSR_IA32_BBL_CR_CTL            0x00000119
>>  #define MSR_IA32_BBL_CR_CTL3           0x0000011e
>> +#define MSR_MISC_FEATURES_ENABLES      0x00000140
>>
>>  #define MSR_IA32_SYSENTER_CS           0x00000174
>>  #define MSR_IA32_SYSENTER_ESP          0x00000175
>> diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
>> index 8b7c8d8..ec93976 100644
>> --- a/arch/x86/include/asm/thread_info.h
>> +++ b/arch/x86/include/asm/thread_info.h
>> @@ -93,6 +93,7 @@ struct thread_info {
>>  #define TIF_SECCOMP            8       /* secure computing */
>>  #define TIF_USER_RETURN_NOTIFY 11      /* notify kernel of userspace return */
>>  #define TIF_UPROBE             12      /* breakpointed or singlestepping */
>> +#define TIF_NOCPUID            15      /* CPUID is not accessible in userland */
>>  #define TIF_NOTSC              16      /* TSC is not accessible in userland */
>>  #define TIF_IA32               17      /* IA32 compatibility process */
>>  #define TIF_FORK               18      /* ret_from_fork */
>> @@ -117,6 +118,7 @@ struct thread_info {
>>  #define _TIF_SECCOMP           (1 << TIF_SECCOMP)
>>  #define _TIF_USER_RETURN_NOTIFY        (1 << TIF_USER_RETURN_NOTIFY)
>>  #define _TIF_UPROBE            (1 << TIF_UPROBE)
>> +#define _TIF_NOCPUID           (1 << TIF_NOCPUID)
>>  #define _TIF_NOTSC             (1 << TIF_NOTSC)
>>  #define _TIF_IA32              (1 << TIF_IA32)
>>  #define _TIF_FORK              (1 << TIF_FORK)
>> @@ -146,7 +148,7 @@ struct thread_info {
>>
>>  /* flags to check in __switch_to() */
>>  #define _TIF_WORK_CTXSW                                                        \
>> -       (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP)
>> +       (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP)
>>
>>  #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
>>  #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
>> diff --git a/arch/x86/include/uapi/asm/prctl.h b/arch/x86/include/uapi/asm/prctl.h
>> index 3ac5032..c087e55 100644
>> --- a/arch/x86/include/uapi/asm/prctl.h
>> +++ b/arch/x86/include/uapi/asm/prctl.h
>> @@ -6,4 +6,10 @@
>>  #define ARCH_GET_FS 0x1003
>>  #define ARCH_GET_GS 0x1004
>>
>> +/* Get/set the process' ability to use the CPUID instruction */
>> +#define ARCH_GET_CPUID 0x1005
>> +#define ARCH_SET_CPUID 0x1006
>> +# define ARCH_CPUID_ENABLE             1       /* allow the use of the CPUID instruction */
>> +# define ARCH_CPUID_SIGSEGV            2       /* throw a SIGSEGV instead of reading the CPUID */
>> +
>>  #endif /* _ASM_X86_PRCTL_H */
>> diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
>> index 0f857c3..5fc8e9d 100644
>> --- a/arch/x86/kernel/process.c
>> +++ b/arch/x86/kernel/process.c
>> @@ -193,6 +193,69 @@ int set_tsc_mode(unsigned int val)
>>         return 0;
>>  }
>>
>> +static void switch_cpuid_faulting(bool on)
>> +{
>> +       if (on)
>> +               msr_set_bit(MSR_MISC_FEATURES_ENABLES, 0);
>> +       else
>> +               msr_clear_bit(MSR_MISC_FEATURES_ENABLES, 0);
>> +}
>> +
>
> I don't know if we care (yet?), but this is going to be unnecessarily
> slow because of the implicit rdmsr.  You could add a percpu shadow
> copy of MISC_FEATURES_ENABLES, initialized during boot, and avoid the
> rdmsr.
>
>> +
>> +int set_cpuid_mode(struct task_struct *task, unsigned long val)
>> +{
>> +       /* Only disable/enable_cpuid() if it is supported on this hardware. */
>> +       bool cpuid_fault_supported = static_cpu_has(X86_FEATURE_CPUID_FAULT);
>> +
>> +       if (val == ARCH_CPUID_ENABLE && cpuid_fault_supported) {
>> +               if (task_no_new_privs(task) && test_thread_flag(TIF_NOCPUID))
>> +                       return -EACCES;
>
> This check seems confused.  If this flag were preserved on execve,
> it's the SIGSEGV mode that would need the check.

Not sure I follow this one.  no_new_privs should block transitions
from SIGSEGV to ENABLE, right?  That's what this check does.

>> @@ -212,6 +275,15 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
>>                 update_debugctlmsr(debugctl);
>>         }
>>
>> +       if (test_tsk_thread_flag(prev_p, TIF_NOCPUID) ^
>> +           test_tsk_thread_flag(next_p, TIF_NOCPUID)) {
>> +               /* prev and next are different */
>> +               if (test_tsk_thread_flag(next_p, TIF_NOCPUID))
>> +                       switch_cpuid_faulting(true);
>> +               else
>> +                       switch_cpuid_faulting(false);
>> +       }
>> +
>
> Off-topic and not needed for this patch, but IMO we should move all of
> this context switch junk out of ti.flags and into thread_struct
> somewhere.

- Kyle

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v2 3/3] x86,arch_prctl Add ARCH_[GET|SET]_CPUID for controlling the CPUID instruction
  2016-09-15  1:47     ` Kyle Huey
@ 2016-09-15  1:54       ` Andy Lutomirski
  2016-09-15  2:19         ` Kyle Huey
  0 siblings, 1 reply; 28+ messages in thread
From: Andy Lutomirski @ 2016-09-15  1:54 UTC (permalink / raw)
  To: Kyle Huey
  Cc: Robert O'Callahan, Thomas Gleixner, Ingo Molnar,
	H. Peter Anvin, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	Shuah Khan, Peter Zijlstra (Intel),
	Borislav Petkov, Rafael J. Wysocki, Len Brown,
	Srinivas Pandruvada, Aravind Gopalakrishnan, Alexander Shishkin,
	Vladimir Zapolskiy, Andy Lutomirski, Frederic Weisbecker,
	Dmitry Safonov, Kees Cook, Michael S. Tsirkin, Andrey Ryabinin,
	Jiri Slaby, Paul Gortmaker, Denys Vlasenko, Dave Hansen,
	open list:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	open list:KERNEL SELFTEST FRAMEWORK

On Wed, Sep 14, 2016 at 6:47 PM, Kyle Huey <me@kylehuey.com> wrote:
> On Wed, Sep 14, 2016 at 6:29 PM, Andy Lutomirski <luto@amacapital.net> wrote:
>> On Wed, Sep 14, 2016 at 2:01 PM, Kyle Huey <me@kylehuey.com> wrote:

>>> +
>>> +int set_cpuid_mode(struct task_struct *task, unsigned long val)
>>> +{
>>> +       /* Only disable/enable_cpuid() if it is supported on this hardware. */
>>> +       bool cpuid_fault_supported = static_cpu_has(X86_FEATURE_CPUID_FAULT);
>>> +
>>> +       if (val == ARCH_CPUID_ENABLE && cpuid_fault_supported) {
>>> +               if (task_no_new_privs(task) && test_thread_flag(TIF_NOCPUID))
>>> +                       return -EACCES;
>>
>> This check seems confused.  If this flag were preserved on execve,
>> it's the SIGSEGV mode that would need the check.
>
> Not sure I follow this one.  no_new_privs should block transitions
> from SIGSEGV to ENABLE, right?  That's what this check does.

It's the other way around entirely: if you make a change to your
process context such that a subseqently execve()'d setuid program
might malfunction, you've just done something dangerous.  This is only
okay, at least in newly-supported instances, if you are either
privileged or if you have no_new_privs set.  Having privilege makes it
okay: unprivileged programs can't use it to subvert setuid programs.
no_new_privs makes it safe as well: if no_new_privs is set, you can't
gain privilege via execve(), so there's no attack surface.  So, if you
have execve() keep ARCH_CPUID_SIGSEGV set, then setting it that way in
the first place should require privilege or no_new_privs.

I personally favor resetting to ARCH_CPUID_ENABLE on execve() and not
worrying about no_new_privs.

Does that make sense?

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v2 3/3] x86,arch_prctl Add ARCH_[GET|SET]_CPUID for controlling the CPUID instruction
  2016-09-15  1:54       ` Andy Lutomirski
@ 2016-09-15  2:19         ` Kyle Huey
  0 siblings, 0 replies; 28+ messages in thread
From: Kyle Huey @ 2016-09-15  2:19 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Robert O'Callahan, Thomas Gleixner, Ingo Molnar,
	H. Peter Anvin, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	Shuah Khan, Peter Zijlstra (Intel),
	Borislav Petkov, Rafael J. Wysocki, Len Brown,
	Srinivas Pandruvada, Aravind Gopalakrishnan, Alexander Shishkin,
	Vladimir Zapolskiy, Andy Lutomirski, Frederic Weisbecker,
	Dmitry Safonov, Kees Cook, Michael S. Tsirkin, Andrey Ryabinin,
	Jiri Slaby, Paul Gortmaker, Denys Vlasenko, Dave Hansen,
	open list:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	open list:KERNEL SELFTEST FRAMEWORK

On Wed, Sep 14, 2016 at 6:54 PM, Andy Lutomirski <luto@amacapital.net> wrote:
> On Wed, Sep 14, 2016 at 6:47 PM, Kyle Huey <me@kylehuey.com> wrote:
>> On Wed, Sep 14, 2016 at 6:29 PM, Andy Lutomirski <luto@amacapital.net> wrote:
>>> On Wed, Sep 14, 2016 at 2:01 PM, Kyle Huey <me@kylehuey.com> wrote:
>
>>>> +
>>>> +int set_cpuid_mode(struct task_struct *task, unsigned long val)
>>>> +{
>>>> +       /* Only disable/enable_cpuid() if it is supported on this hardware. */
>>>> +       bool cpuid_fault_supported = static_cpu_has(X86_FEATURE_CPUID_FAULT);
>>>> +
>>>> +       if (val == ARCH_CPUID_ENABLE && cpuid_fault_supported) {
>>>> +               if (task_no_new_privs(task) && test_thread_flag(TIF_NOCPUID))
>>>> +                       return -EACCES;
>>>
>>> This check seems confused.  If this flag were preserved on execve,
>>> it's the SIGSEGV mode that would need the check.
>>
>> Not sure I follow this one.  no_new_privs should block transitions
>> from SIGSEGV to ENABLE, right?  That's what this check does.
>
> It's the other way around entirely: if you make a change to your
> process context such that a subseqently execve()'d setuid program
> might malfunction, you've just done something dangerous.  This is only
> okay, at least in newly-supported instances, if you are either
> privileged or if you have no_new_privs set.  Having privilege makes it
> okay: unprivileged programs can't use it to subvert setuid programs.
> no_new_privs makes it safe as well: if no_new_privs is set, you can't
> gain privilege via execve(), so there's no attack surface.  So, if you
> have execve() keep ARCH_CPUID_SIGSEGV set, then setting it that way in
> the first place should require privilege or no_new_privs.
>
> I personally favor resetting to ARCH_CPUID_ENABLE on execve() and not
> worrying about no_new_privs.
>
> Does that make sense?

Yes, ok.  Robert and I agree that resetting does make the most sense.
Using this usefully requires a ptrace supervisor (to catch the traps),
which can easily inject a call to arch_prctl to reenable
ARCH_CPUID_SIGSEGV when desired.

- Kyle

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v2 2/3] x86 Test and expose CPUID faulting capabilities in /proc/cpuinfo
  2016-09-15  1:17       ` Andy Lutomirski
@ 2016-09-15  2:20         ` Kyle Huey
  2016-09-15 20:38         ` H. Peter Anvin
  1 sibling, 0 replies; 28+ messages in thread
From: Kyle Huey @ 2016-09-15  2:20 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Dave Hansen, Robert O'Callahan, Thomas Gleixner, Ingo Molnar,
	H. Peter Anvin, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	Boris Ostrovsky, David Vrabel, Juergen Gross, Borislav Petkov,
	Andy Lutomirski, Peter Zijlstra, Huang Rui, Rafael J. Wysocki,
	Len Brown, Srinivas Pandruvada, Aravind Gopalakrishnan,
	Alexander Shishkin, Vladimir Zapolskiy, Kristen Carlson Accardi,
	open list:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	moderated list:XEN HYPERVISOR INTERFACE

On Wed, Sep 14, 2016 at 6:17 PM, Andy Lutomirski <luto@amacapital.net> wrote:
> On Wed, Sep 14, 2016 at 3:03 PM, Kyle Huey <me@kylehuey.com> wrote:
>> On Wed, Sep 14, 2016 at 2:35 PM, Dave Hansen
>> <dave.hansen@linux.intel.com> wrote:
>>> On 09/14/2016 02:01 PM, Kyle Huey wrote:
>
>>> Is any of this useful to optimize away at compile-time?  We have config
>>> options for when we're running as a guest, and this seems like a feature
>>> that isn't available when running on bare metal.
>>
>> On the contrary, this is only available when we're on bare metal.
>> Neither Xen nor KVM virtualize CPUID faulting (although KVM correctly
>> suppresses MSR_PLATFORM_INFO's report of support for it).
>
> KVM could easily support this.  If rr starts using it, I think KVM
> *should* add support, possibly even for older CPUs that don't support
> the feature in hardware.
>
> It's too bad that x86 doesn't give us the instruction bytes on a
> fault.  Otherwise we could lazily switch this feature.

We are *very* interested in having KVM and Xen support virtualization
of this feature.  I am planning to work on KVM after I get this series
of patches in :)

- Kyle

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Xen-devel] [PATCH v2 2/3] x86 Test and expose CPUID faulting capabilities in /proc/cpuinfo
  2016-09-14 21:01 ` [PATCH v2 2/3] x86 Test and expose CPUID faulting capabilities in /proc/cpuinfo Kyle Huey
  2016-09-14 21:35   ` Dave Hansen
@ 2016-09-15 10:05   ` David Vrabel
  2016-09-15 10:25     ` Jan Beulich
  1 sibling, 1 reply; 28+ messages in thread
From: David Vrabel @ 2016-09-15 10:05 UTC (permalink / raw)
  To: Kyle Huey, Robert O'Callahan
  Cc: Juergen Gross, Len Brown, Dave Hansen, Rafael J. Wysocki,
	Kristen Carlson Accardi, Peter Zijlstra,
	maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT, Huang Rui,
	open list:X86 ARCHITECTURE 32-BIT AND 64-BIT, Alexander Shishkin,
	Ingo Molnar, Aravind Gopalakrishnan, David Vrabel,
	Andy Lutomirski, H. Peter Anvin, Srinivas Pandruvada,
	moderated list:XEN HYPERVISOR INTERFACE, Boris Ostrovsky,
	Borislav Petkov, Thomas Gleixner, Vladimir Zapolskiy

On 14/09/16 22:01, Kyle Huey wrote:
> Xen advertises the underlying support for CPUID faulting but not does pass
> through writes to the relevant MSR, nor does it virtualize it, so it does
> not actually work. For now mask off the relevant bit on MSR_PLATFORM_INFO.

Could you clarify in the commit message that it is PV guests that are
affected.

> --- a/arch/x86/xen/enlighten.c
> +++ b/arch/x86/xen/enlighten.c
> @@ -1050,6 +1050,9 @@ static u64 xen_read_msr_safe(unsigned int msr, int *err)
>  #endif
>  			val &= ~X2APIC_ENABLE;
>  		break;
> +	case MSR_PLATFORM_INFO:
> +		val &= ~CPUID_FAULTING_SUPPORT;
> +		break;
>  	}
>  	return val;
>  }

Acked-by: David Vrabel <david.vrabel@citrix.com>

David

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v2 2/3] x86 Test and expose CPUID faulting capabilities in /proc/cpuinfo
  2016-09-14 21:35   ` Dave Hansen
  2016-09-14 22:03     ` Kyle Huey
@ 2016-09-15 10:07     ` David Vrabel
  1 sibling, 0 replies; 28+ messages in thread
From: David Vrabel @ 2016-09-15 10:07 UTC (permalink / raw)
  To: Dave Hansen, Kyle Huey, Robert O'Callahan
  Cc: Thomas Gleixner, Ingo Molnar, H. Peter Anvin,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	Boris Ostrovsky, Juergen Gross, Borislav Petkov, Andy Lutomirski,
	Peter Zijlstra, Huang Rui, Rafael J. Wysocki, Len Brown,
	Srinivas Pandruvada, Aravind Gopalakrishnan, Alexander Shishkin,
	Vladimir Zapolskiy, Kristen Carlson Accardi,
	open list:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	moderated list:XEN HYPERVISOR INTERFACE, Andrew Cooper

On 14/09/16 22:35, Dave Hansen wrote:
> On 09/14/2016 02:01 PM, Kyle Huey wrote:
>> Xen advertises the underlying support for CPUID faulting but not does pass
>> through writes to the relevant MSR, nor does it virtualize it, so it does
>> not actually work. For now mask off the relevant bit on MSR_PLATFORM_INFO.
> 
> That needs to make it into a comment, please.
> 
> That *is* a Xen bug, right?

This is probably fixed in the latest version of Xen.  Andrew Cooper
would know for sure.

>> --- a/arch/x86/xen/enlighten.c
>> +++ b/arch/x86/xen/enlighten.c
>> @@ -1050,6 +1050,9 @@ static u64 xen_read_msr_safe(unsigned int msr, int *err)
>>  #endif
>>  			val &= ~X2APIC_ENABLE;
>>  		break;
>> +	case MSR_PLATFORM_INFO:
>> +		val &= ~CPUID_FAULTING_SUPPORT;
>> +		break;
>>  	}
>>  	return val;
>>  }
> 
> Does this mean that Xen guests effectively can't take advantage of this
> feature?

PV guests only.

David

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Xen-devel] [PATCH v2 2/3] x86 Test and expose CPUID faulting capabilities in /proc/cpuinfo
  2016-09-15 10:05   ` [Xen-devel] " David Vrabel
@ 2016-09-15 10:25     ` Jan Beulich
  2016-09-15 19:11       ` Kyle Huey
  0 siblings, 1 reply; 28+ messages in thread
From: Jan Beulich @ 2016-09-15 10:25 UTC (permalink / raw)
  To: David Vrabel
  Cc: Aravind Gopalakrishnan, Huang Rui, Peter Zijlstra, Len Brown,
	Rafael J. Wysocki, Andy Lutomirski,
	maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT, Kyle Huey,
	Thomas Gleixner, Alexander Shishkin, DaveHansen,
	Kristen Carlson Accardi, Srinivas Pandruvada,
	moderated list:XEN HYPERVISOR INTERFACE, Vladimir Zapolskiy,
	Robert O'Callahan, Boris Ostrovsky, Ingo Molnar,
	Juergen Gross, Borislav Petkov,
	open list:X86 ARCHITECTURE 32-BIT AND 64-BIT, H. PeterAnvin

>>> On 15.09.16 at 12:05, <david.vrabel@citrix.com> wrote:
> On 14/09/16 22:01, Kyle Huey wrote:
>> Xen advertises the underlying support for CPUID faulting but not does pass
>> through writes to the relevant MSR, nor does it virtualize it, so it does
>> not actually work. For now mask off the relevant bit on MSR_PLATFORM_INFO.
> 
> Could you clarify in the commit message that it is PV guests that are
> affected.

What makes you think HVM ones aren't?

Jan

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Xen-devel] [PATCH v2 2/3] x86 Test and expose CPUID faulting capabilities in /proc/cpuinfo
  2016-09-15 10:25     ` Jan Beulich
@ 2016-09-15 19:11       ` Kyle Huey
  2016-09-15 19:37         ` Andy Lutomirski
  2016-09-15 19:41         ` Boris Ostrovsky
  0 siblings, 2 replies; 28+ messages in thread
From: Kyle Huey @ 2016-09-15 19:11 UTC (permalink / raw)
  To: Jan Beulich
  Cc: David Vrabel, Aravind Gopalakrishnan, Huang Rui, Peter Zijlstra,
	Len Brown, Rafael J. Wysocki, Andy Lutomirski,
	maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT, Thomas Gleixner,
	Alexander Shishkin, DaveHansen, Kristen Carlson Accardi,
	Srinivas Pandruvada, moderated list:XEN HYPERVISOR INTERFACE,
	Vladimir Zapolskiy, Robert O'Callahan, Boris Ostrovsky,
	Ingo Molnar, Juergen Gross, Borislav Petkov,
	open list:X86 ARCHITECTURE 32-BIT AND 64-BIT, H. PeterAnvin

On Thu, Sep 15, 2016 at 3:25 AM, Jan Beulich <JBeulich@suse.com> wrote:
>>>> On 15.09.16 at 12:05, <david.vrabel@citrix.com> wrote:
>> On 14/09/16 22:01, Kyle Huey wrote:
>>> Xen advertises the underlying support for CPUID faulting but not does pass
>>> through writes to the relevant MSR, nor does it virtualize it, so it does
>>> not actually work. For now mask off the relevant bit on MSR_PLATFORM_INFO.
>>
>> Could you clarify in the commit message that it is PV guests that are
>> affected.
>
> What makes you think HVM ones aren't?

Testing on EC2, HVM guests are affected as well.  Not sure what to do
about that.

- Kyle

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Xen-devel] [PATCH v2 2/3] x86 Test and expose CPUID faulting capabilities in /proc/cpuinfo
  2016-09-15 19:11       ` Kyle Huey
@ 2016-09-15 19:37         ` Andy Lutomirski
  2016-09-15 23:36           ` Kyle Huey
  2016-09-15 19:41         ` Boris Ostrovsky
  1 sibling, 1 reply; 28+ messages in thread
From: Andy Lutomirski @ 2016-09-15 19:37 UTC (permalink / raw)
  To: Kyle Huey
  Cc: Jan Beulich, David Vrabel, Aravind Gopalakrishnan, Huang Rui,
	Peter Zijlstra, Len Brown, Rafael J. Wysocki, Andy Lutomirski,
	maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT, Thomas Gleixner,
	Alexander Shishkin, DaveHansen, Kristen Carlson Accardi,
	Srinivas Pandruvada, moderated list:XEN HYPERVISOR INTERFACE,
	Vladimir Zapolskiy, Robert O'Callahan, Boris Ostrovsky,
	Ingo Molnar, Juergen Gross, Borislav Petkov,
	open list:X86 ARCHITECTURE 32-BIT AND 64-BIT, H. PeterAnvin

On Thu, Sep 15, 2016 at 12:11 PM, Kyle Huey <me@kylehuey.com> wrote:
> On Thu, Sep 15, 2016 at 3:25 AM, Jan Beulich <JBeulich@suse.com> wrote:
>>>>> On 15.09.16 at 12:05, <david.vrabel@citrix.com> wrote:
>>> On 14/09/16 22:01, Kyle Huey wrote:
>>>> Xen advertises the underlying support for CPUID faulting but not does pass
>>>> through writes to the relevant MSR, nor does it virtualize it, so it does
>>>> not actually work. For now mask off the relevant bit on MSR_PLATFORM_INFO.
>>>
>>> Could you clarify in the commit message that it is PV guests that are
>>> affected.
>>
>> What makes you think HVM ones aren't?
>
> Testing on EC2, HVM guests are affected as well.  Not sure what to do
> about that.
>

It's kind of nasty, but it shouldn't be *too* hard to probe for this
thing during early boot.  Allocate a page somewhere that has the user
bit set, put something like this in it:

cpuid
inc %eax  /* return 1 */
movw %ax, %ss /* force %GP to get out of here */

Call it like this from asm (real asm, not inline):

FRAME_BEGIN
pushq %rbx

xorl %eax, %eax

/* Push return frame */
pushq %ss
pushq %rsp
addq $8, (%rsp)
pushfq
pushq %cs
pushq $end_of_cpuid_faulting_test

/* Call it! */
pushq $__USER_DS
pushq $0
pushq $X86_EFLAGS_FIXED  /* leave IF off when running the CPL3 stub */
pushq $__USER_CS
pushq [address of userspace stub]
INTERRUPT_RETURN

end_of_cpuid_faulting_test:
pop %rbx

FRAME_END

Run this after the main GDT is loaded but while the #GP vector is
temporarily pointing to:

movq SS-RIP(%rsp), %rsp  /* pop the real return frame */
INTERRUPT_RETURN

and with interrupts off.  The function should return 0 if CPUID
faulting works and 1 if it doesn't.

Yeah, this is gross, but it should work.  I'm not sure how okay I am
with putting this crap in the kernel...

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Xen-devel] [PATCH v2 2/3] x86 Test and expose CPUID faulting capabilities in /proc/cpuinfo
  2016-09-15 19:11       ` Kyle Huey
  2016-09-15 19:37         ` Andy Lutomirski
@ 2016-09-15 19:41         ` Boris Ostrovsky
  1 sibling, 0 replies; 28+ messages in thread
From: Boris Ostrovsky @ 2016-09-15 19:41 UTC (permalink / raw)
  To: Kyle Huey, Jan Beulich
  Cc: Rafael J. Wysocki, Peter Zijlstra, DaveHansen, Huang Rui,
	Kristen Carlson Accardi, H. PeterAnvin, Srinivas Pandruvada,
	Thomas Gleixner, maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT,
	Robert O'Callahan, Alexander Shishkin, Ingo Molnar,
	moderated list:XEN HYPERVISOR INTERFACE, Borislav Petkov,
	Len Brown, Andy Lutomirski, Juergen Gross,
	open list:X86 ARCHITECTURE 32-BIT AND 64-BIT,
	Aravind Gopalakrishnan, David Vrabel, Vladimir Zapolskiy

On 09/15/2016 03:11 PM, Kyle Huey wrote:
> On Thu, Sep 15, 2016 at 3:25 AM, Jan Beulich <JBeulich@suse.com> wrote:
>>>>> On 15.09.16 at 12:05, <david.vrabel@citrix.com> wrote:
>>> On 14/09/16 22:01, Kyle Huey wrote:
>>>> Xen advertises the underlying support for CPUID faulting but not does pass
>>>> through writes to the relevant MSR, nor does it virtualize it, so it does
>>>> not actually work. For now mask off the relevant bit on MSR_PLATFORM_INFO.
>>> Could you clarify in the commit message that it is PV guests that are
>>> affected.
>> What makes you think HVM ones aren't?
> Testing on EC2, HVM guests are affected as well.  Not sure what to do
> about that.

You could clear capability bit in xen_set_cpu_features() but of course
this assumes you will never again read MSR_PLATFORM_INFO.

-boris

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v2 2/3] x86 Test and expose CPUID faulting capabilities in /proc/cpuinfo
  2016-09-15  1:17       ` Andy Lutomirski
  2016-09-15  2:20         ` Kyle Huey
@ 2016-09-15 20:38         ` H. Peter Anvin
  2016-09-15 23:18           ` Andy Lutomirski
  1 sibling, 1 reply; 28+ messages in thread
From: H. Peter Anvin @ 2016-09-15 20:38 UTC (permalink / raw)
  To: Andy Lutomirski, Kyle Huey
  Cc: Dave Hansen, Robert O'Callahan, Thomas Gleixner, Ingo Molnar,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	Boris Ostrovsky, David Vrabel, Juergen Gross, Borislav Petkov,
	Andy Lutomirski, Peter Zijlstra, Huang Rui, Rafael J. Wysocki,
	Len Brown, Srinivas Pandruvada, Aravind Gopalakrishnan,
	Alexander Shishkin, Vladimir Zapolskiy, Kristen Carlson Accardi,
	open list:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	moderated list:XEN HYPERVISOR INTERFACE

On September 14, 2016 6:17:51 PM PDT, Andy Lutomirski <luto@amacapital.net> wrote:
>On Wed, Sep 14, 2016 at 3:03 PM, Kyle Huey <me@kylehuey.com> wrote:
>> On Wed, Sep 14, 2016 at 2:35 PM, Dave Hansen
>> <dave.hansen@linux.intel.com> wrote:
>>> On 09/14/2016 02:01 PM, Kyle Huey wrote:
>
>>> Is any of this useful to optimize away at compile-time?  We have
>config
>>> options for when we're running as a guest, and this seems like a
>feature
>>> that isn't available when running on bare metal.
>>
>> On the contrary, this is only available when we're on bare metal.
>> Neither Xen nor KVM virtualize CPUID faulting (although KVM correctly
>> suppresses MSR_PLATFORM_INFO's report of support for it).
>
>KVM could easily support this.  If rr starts using it, I think KVM
>*should* add support, possibly even for older CPUs that don't support
>the feature in hardware.
>
>It's too bad that x86 doesn't give us the instruction bytes on a
>fault.  Otherwise we could lazily switch this feature.
>
>--Andy

You can "always" examine the instruction bytes in memory... have to make sure you properly consider the impact of race conditions though.
-- 
Sent from my Android device with K-9 Mail. Please excuse brevity and formatting.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v2 2/3] x86 Test and expose CPUID faulting capabilities in /proc/cpuinfo
  2016-09-15 20:38         ` H. Peter Anvin
@ 2016-09-15 23:18           ` Andy Lutomirski
  0 siblings, 0 replies; 28+ messages in thread
From: Andy Lutomirski @ 2016-09-15 23:18 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Kyle Huey, Dave Hansen, Robert O'Callahan, Thomas Gleixner,
	Ingo Molnar, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	Boris Ostrovsky, David Vrabel, Juergen Gross, Borislav Petkov,
	Andy Lutomirski, Peter Zijlstra, Huang Rui, Rafael J. Wysocki,
	Len Brown, Srinivas Pandruvada, Aravind Gopalakrishnan,
	Alexander Shishkin, Vladimir Zapolskiy, Kristen Carlson Accardi,
	open list:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	moderated list:XEN HYPERVISOR INTERFACE

On Thu, Sep 15, 2016 at 1:38 PM, H. Peter Anvin <hpa@zytor.com> wrote:
> On September 14, 2016 6:17:51 PM PDT, Andy Lutomirski <luto@amacapital.net> wrote:
>>On Wed, Sep 14, 2016 at 3:03 PM, Kyle Huey <me@kylehuey.com> wrote:
>>> On Wed, Sep 14, 2016 at 2:35 PM, Dave Hansen
>>> <dave.hansen@linux.intel.com> wrote:
>>>> On 09/14/2016 02:01 PM, Kyle Huey wrote:
>>
>>>> Is any of this useful to optimize away at compile-time?  We have
>>config
>>>> options for when we're running as a guest, and this seems like a
>>feature
>>>> that isn't available when running on bare metal.
>>>
>>> On the contrary, this is only available when we're on bare metal.
>>> Neither Xen nor KVM virtualize CPUID faulting (although KVM correctly
>>> suppresses MSR_PLATFORM_INFO's report of support for it).
>>
>>KVM could easily support this.  If rr starts using it, I think KVM
>>*should* add support, possibly even for older CPUs that don't support
>>the feature in hardware.
>>
>>It's too bad that x86 doesn't give us the instruction bytes on a
>>fault.  Otherwise we could lazily switch this feature.
>>
>>--Andy
>
> You can "always" examine the instruction bytes in memory... have to make sure you properly consider the impact of race conditions though.

I'd rather avoid needing to worry about those race conditions if at
all possible, though.  Intel and AMD both have fancy "decode assists"
and such -- it would be quite nice IMO if we could get the same data
exposed in the handlers of synchronous faults.

If Intel or AMD were to do this for real, presumably the rule would be
that any fault-class exception caused by a validly-decoded instruction
at CPL3 (so #PF and #GP would count but #DB probably wouldn't, and #DF
wouldn't either unless the initial fault did) would stash away the
faulting instruction and other entries would instead stash away
"nothing here".  Some pair of MSRs or new instruction would read out
information.  Then we could accurately emulate CPUID, we could
accurately emulate page-faulting instructions if we cared, etc.  All
of the relevant hardware must already mostly exist because VMX and SVM
both have this capability.

--Andy

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Xen-devel] [PATCH v2 2/3] x86 Test and expose CPUID faulting capabilities in /proc/cpuinfo
  2016-09-15 19:37         ` Andy Lutomirski
@ 2016-09-15 23:36           ` Kyle Huey
  0 siblings, 0 replies; 28+ messages in thread
From: Kyle Huey @ 2016-09-15 23:36 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Jan Beulich, David Vrabel, Aravind Gopalakrishnan, Huang Rui,
	Peter Zijlstra, Len Brown, Rafael J. Wysocki, Andy Lutomirski,
	maintainer:X86 ARCHITECTURE 32-BIT AND 64-BIT, Thomas Gleixner,
	Alexander Shishkin, DaveHansen, Kristen Carlson Accardi,
	Srinivas Pandruvada, moderated list:XEN HYPERVISOR INTERFACE,
	Vladimir Zapolskiy, Robert O'Callahan, Boris Ostrovsky,
	Ingo Molnar, Juergen Gross, Borislav Petkov,
	open list:X86 ARCHITECTURE 32-BIT AND 64-BIT, H. PeterAnvin

On Thu, Sep 15, 2016 at 12:37 PM, Andy Lutomirski <luto@amacapital.net> wrote:
> On Thu, Sep 15, 2016 at 12:11 PM, Kyle Huey <me@kylehuey.com> wrote:
>> On Thu, Sep 15, 2016 at 3:25 AM, Jan Beulich <JBeulich@suse.com> wrote:
>>>>>> On 15.09.16 at 12:05, <david.vrabel@citrix.com> wrote:
>>>> On 14/09/16 22:01, Kyle Huey wrote:
>>>>> Xen advertises the underlying support for CPUID faulting but not does pass
>>>>> through writes to the relevant MSR, nor does it virtualize it, so it does
>>>>> not actually work. For now mask off the relevant bit on MSR_PLATFORM_INFO.
>>>>
>>>> Could you clarify in the commit message that it is PV guests that are
>>>> affected.
>>>
>>> What makes you think HVM ones aren't?
>>
>> Testing on EC2, HVM guests are affected as well.  Not sure what to do
>> about that.
>>
>
> It's kind of nasty, but it shouldn't be *too* hard to probe for this
> thing during early boot.  Allocate a page somewhere that has the user
> bit set, put something like this in it:
>
> cpuid
> inc %eax  /* return 1 */
> movw %ax, %ss /* force %GP to get out of here */
>
> Call it like this from asm (real asm, not inline):
>
> FRAME_BEGIN
> pushq %rbx
>
> xorl %eax, %eax
>
> /* Push return frame */
> pushq %ss
> pushq %rsp
> addq $8, (%rsp)
> pushfq
> pushq %cs
> pushq $end_of_cpuid_faulting_test
>
> /* Call it! */
> pushq $__USER_DS
> pushq $0
> pushq $X86_EFLAGS_FIXED  /* leave IF off when running the CPL3 stub */
> pushq $__USER_CS
> pushq [address of userspace stub]
> INTERRUPT_RETURN
>
> end_of_cpuid_faulting_test:
> pop %rbx
>
> FRAME_END
>
> Run this after the main GDT is loaded but while the #GP vector is
> temporarily pointing to:
>
> movq SS-RIP(%rsp), %rsp  /* pop the real return frame */
> INTERRUPT_RETURN
>
> and with interrupts off.  The function should return 0 if CPUID
> faulting works and 1 if it doesn't.
>
> Yeah, this is gross, but it should work.  I'm not sure how okay I am
> with putting this crap in the kernel...

This is rather heroic :)

I think it's more trouble than it's worth though.  The latest series I
submitted doesn't try to handle this.  Instead I'll patch Xen to fix
the bug.

- Kyle

^ permalink raw reply	[flat|nested] 28+ messages in thread

end of thread, other threads:[~2016-09-15 23:36 UTC | newest]

Thread overview: 28+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <1473886902-17902-1-git-send-email-khuey@kylehuey.com>
2016-09-14 21:01 ` [PATCH v2 1/3] syscalls,x86 Expose arch_prctl on x86-32 Kyle Huey
2016-09-14 21:29   ` Dave Hansen
2016-09-14 21:35     ` Kyle Huey
2016-09-14 21:46       ` Dave Hansen
2016-09-14 21:56         ` Kyle Huey
2016-09-14 22:23   ` kbuild test robot
2016-09-15  0:01   ` kbuild test robot
2016-09-15  0:01   ` kbuild test robot
2016-09-15  1:14   ` Andy Lutomirski
2016-09-14 21:01 ` [PATCH v2 2/3] x86 Test and expose CPUID faulting capabilities in /proc/cpuinfo Kyle Huey
2016-09-14 21:35   ` Dave Hansen
2016-09-14 22:03     ` Kyle Huey
2016-09-15  1:17       ` Andy Lutomirski
2016-09-15  2:20         ` Kyle Huey
2016-09-15 20:38         ` H. Peter Anvin
2016-09-15 23:18           ` Andy Lutomirski
2016-09-15 10:07     ` David Vrabel
2016-09-15 10:05   ` [Xen-devel] " David Vrabel
2016-09-15 10:25     ` Jan Beulich
2016-09-15 19:11       ` Kyle Huey
2016-09-15 19:37         ` Andy Lutomirski
2016-09-15 23:36           ` Kyle Huey
2016-09-15 19:41         ` Boris Ostrovsky
2016-09-14 21:01 ` [PATCH v2 3/3] x86,arch_prctl Add ARCH_[GET|SET]_CPUID for controlling the CPUID instruction Kyle Huey
2016-09-15  1:29   ` Andy Lutomirski
2016-09-15  1:47     ` Kyle Huey
2016-09-15  1:54       ` Andy Lutomirski
2016-09-15  2:19         ` Kyle Huey

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).