linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [patch 0/8] x86/fpu: Mop up XSAVES and related damage
@ 2021-06-02  9:55 Thomas Gleixner
  2021-06-02  9:55 ` [patch 1/8] selftests/x86: Test signal frame XSTATE header corruption handling Thomas Gleixner
                   ` (9 more replies)
  0 siblings, 10 replies; 33+ messages in thread
From: Thomas Gleixner @ 2021-06-02  9:55 UTC (permalink / raw)
  To: LKML
  Cc: x86, Andy Lutomirski, Dave Hansen, Fenghua Yu, Tony Luck, Yu-cheng Yu

syszbot reported a warnon for XRSTOR raising #GP:

 https://lore.kernel.org/r/0000000000004c453905c30f8334@google.com

with a syzcaller reproducer and a conclusive bisect result.

It took a while to destill a simple C reproducer out of it which allowed to
pin point the root cause: The recent addition of supervisor XSTATEs broke
the signal restore path for the case where the signal handler wreckaged the
XSTATE on stack because it does not sanitize the XSTATE header which causes
a subsequent XRSTOR to fail and #GP.

The following series addresses the problem and fixes related issues which
were found while inspecting the related changes.

Thanks to Andy and Dave for working on this with me!

Thanks,

	tglx
---
 arch/x86/include/asm/fpu/xstate.h                     |    4 
 arch/x86/kernel/fpu/core.c                            |   62 ++++++---
 arch/x86/kernel/fpu/regset.c                          |   43 ++----
 arch/x86/kernel/fpu/signal.c                          |   30 +++-
 arch/x86/kernel/fpu/xstate.c                          |   95 +++++----------
 b/tools/testing/selftests/x86/corrupt_xstate_header.c |  114 ++++++++++++++++++
 tools/testing/selftests/x86/Makefile                  |    3 
 7 files changed, 234 insertions(+), 117 deletions(-)




^ permalink raw reply	[flat|nested] 33+ messages in thread

* [patch 1/8] selftests/x86: Test signal frame XSTATE header corruption handling
  2021-06-02  9:55 [patch 0/8] x86/fpu: Mop up XSAVES and related damage Thomas Gleixner
@ 2021-06-02  9:55 ` Thomas Gleixner
  2021-06-02 12:38   ` Borislav Petkov
  2021-06-02 15:59   ` [patch V2 " Thomas Gleixner
  2021-06-02  9:55 ` [patch 2/8] x86/fpu: Prevent state corruption in __fpu__restore_sig() Thomas Gleixner
                   ` (8 subsequent siblings)
  9 siblings, 2 replies; 33+ messages in thread
From: Thomas Gleixner @ 2021-06-02  9:55 UTC (permalink / raw)
  To: LKML
  Cc: x86, Andy Lutomirski, Dave Hansen, Fenghua Yu, Tony Luck, Yu-cheng Yu

From: Andy Lutomirski <luto@kernel.org>

This is very heavily based on some code from Thomas Gleixner.  On a system
without XSAVES, it triggers the WARN_ON():

    Bad FPU state detected at copy_kernel_to_fpregs+0x2f/0x40, reinitializing FPU registers.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 tools/testing/selftests/x86/Makefile                |    3 
 tools/testing/selftests/x86/corrupt_xstate_header.c |  114 ++++++++++++++++++++
 2 files changed, 116 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/x86/corrupt_xstate_header.c

--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -17,7 +17,8 @@ TARGETS_C_BOTHBITS := single_step_syscal
 TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \
 			test_FCMOV test_FCOMI test_FISTTP \
 			vdso_restorer
-TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip syscall_numbering
+TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip syscall_numbering \
+			corrupt_xstate_header
 # Some selftests require 32bit support enabled also on 64bit systems
 TARGETS_C_32BIT_NEEDED := ldt_gdt ptrace_syscall
 
--- /dev/null
+++ b/tools/testing/selftests/x86/corrupt_xstate_header.c
@@ -0,0 +1,114 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Corrupt the XSTATE header in a signal frame
+ *
+ * Based on analysis and a test case from Thomas Gleixner.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <sched.h>
+#include <signal.h>
+#include <err.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <sys/wait.h>
+
+static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
+			   unsigned int *ecx, unsigned int *edx)
+{
+	asm volatile(
+		"cpuid;"
+		: "=a" (*eax),
+		  "=b" (*ebx),
+		  "=c" (*ecx),
+		  "=d" (*edx)
+		: "0" (*eax), "2" (*ecx));
+}
+
+static inline int xsave_enabled(void)
+{
+	unsigned int eax, ebx, ecx, edx;
+
+	eax = 0x1;
+	ecx = 0x0;
+	__cpuid(&eax, &ebx, &ecx, &edx);
+
+	/* Is CR4.OSXSAVE enabled ? */
+	return ecx & (1U << 27);
+}
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+		       int flags)
+{
+	struct sigaction sa;
+
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_sigaction = handler;
+	sa.sa_flags = SA_SIGINFO | flags;
+	sigemptyset(&sa.sa_mask);
+	if (sigaction(sig, &sa, 0))
+		err(1, "sigaction");
+}
+
+static void sigusr1(int sig, siginfo_t *info, void *uc_void)
+{
+	ucontext_t *uc = uc_void;
+	uint8_t *fpstate = (uint8_t *)uc->uc_mcontext.fpregs;
+	uint64_t *xfeatures = (uint64_t *)(fpstate + 512);
+
+	printf("\tWreckage XSTATE header\n");
+	/* Wreckage the first reserved byte in the header */
+	*(xfeatures + 2) = 0xfffffff;
+}
+
+static void sigsegv(int sig, siginfo_t *info, void *uc_void)
+{
+	printf("\tGot SIGSEGV\n");
+}
+
+int main()
+{
+	cpu_set_t set;
+
+	sethandler(SIGUSR1, sigusr1, 0);
+	sethandler(SIGSEGV, sigsegv, 0);
+
+	if (!xsave_enabled()) {
+		printf("[SKIP] CR4.OSXSAVE disabled.\n");
+		return 0;
+	}
+
+	CPU_ZERO(&set);
+	CPU_SET(0, &set);
+
+	/*
+	 * Enforce that the child runs on the same CPU
+	 * which in turn forces a schedule.
+	 */
+	sched_setaffinity(getpid(), sizeof(set), &set);
+
+	printf("[RUN]\tSend ourselves a signal\n");
+	raise(SIGUSR1);
+
+	printf("[OK]\tBack from the signal.  Now schedule.\n");
+	pid_t child = fork();
+	if (child < 0)
+		err(1, "fork");
+	if (child == 0)
+		return 0;
+	if (child)
+		waitpid(child, NULL, 0);
+	printf("[OK]\tBack in the main thread.\n");
+
+	/*
+	 * We could try to confirm that extended state is still preserved
+	 * when we schedule.  For now, the only indication of failure is
+	 * a warning in the kernel logs.
+	 */
+
+	return 0;
+}


^ permalink raw reply	[flat|nested] 33+ messages in thread

* [patch 2/8] x86/fpu: Prevent state corruption in __fpu__restore_sig()
  2021-06-02  9:55 [patch 0/8] x86/fpu: Mop up XSAVES and related damage Thomas Gleixner
  2021-06-02  9:55 ` [patch 1/8] selftests/x86: Test signal frame XSTATE header corruption handling Thomas Gleixner
@ 2021-06-02  9:55 ` Thomas Gleixner
  2021-06-02 13:12   ` Borislav Petkov
  2021-06-02 15:58   ` [patch V2 " Thomas Gleixner
  2021-06-02  9:55 ` [patch 3/8] x86/fpu: Invalidate FPU state after a failed XRSTOR from a user buffer Thomas Gleixner
                   ` (7 subsequent siblings)
  9 siblings, 2 replies; 33+ messages in thread
From: Thomas Gleixner @ 2021-06-02  9:55 UTC (permalink / raw)
  To: LKML
  Cc: x86, Andy Lutomirski, Dave Hansen, Fenghua Yu, Tony Luck,
	Yu-cheng Yu, syzbot+2067e764dbcd10721e2e, stable

From: Thomas Gleixner <tglx@linutronix.de>

The non-compacted slowpath uses __copy_from_user() and copies the entire
user buffer into the kernel buffer, verbatim.  This means that the kernel
buffer may now contain entirely invalid state on which XRSTOR will #GP.
validate_user_xstate_header() can detect some of that corruption, but that
leaves the onus on callers to clear the buffer.

Prior to XSAVES support it was possible just to reinitialize the buffer,
completely, but with supervisor states that is not longer possible as the
buffer clearing code split got it backwards. Fixing that is possible, but
not corrupting the state in the first place is more robust.

Avoid corruption of the kernel XSAVE buffer by using copy_user_to_xstate()
which validates the XSAVE header contents before copying the actual states
to the kernel. copy_user_to_xstate() was previously only called for
compacted-format kernel buffers, but it works for both compacted and
non-compacted forms.

Using it for the non-compacted form is slower because of multiple
__copy_from_user() operations, but that cost is less important than robust
code in an already slow path.

[ Changelog polished by Dave Hansen ]

Fixes: b860eb8dce59 ("x86/fpu/xstate: Define new functions for clearing fpregs and xstates")
Reported-by: syzbot+2067e764dbcd10721e2e@syzkaller.appspotmail.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org
---
 arch/x86/include/asm/fpu/xstate.h |    4 ----
 arch/x86/kernel/fpu/signal.c      |    9 +--------
 arch/x86/kernel/fpu/xstate.c      |    2 +-
 3 files changed, 2 insertions(+), 13 deletions(-)

--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -112,8 +112,4 @@ void copy_supervisor_to_kernel(struct xr
 void copy_dynamic_supervisor_to_kernel(struct xregs_state *xstate, u64 mask);
 void copy_kernel_to_dynamic_supervisor(struct xregs_state *xstate, u64 mask);
 
-
-/* Validate an xstate header supplied by userspace (ptrace or sigreturn) */
-int validate_user_xstate_header(const struct xstate_header *hdr);
-
 #endif
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -405,14 +405,7 @@ static int __fpu__restore_sig(void __use
 	if (use_xsave() && !fx_only) {
 		u64 init_bv = xfeatures_mask_user() & ~user_xfeatures;
 
-		if (using_compacted_format()) {
-			ret = copy_user_to_xstate(&fpu->state.xsave, buf_fx);
-		} else {
-			ret = __copy_from_user(&fpu->state.xsave, buf_fx, state_size);
-
-			if (!ret && state_size > offsetof(struct xregs_state, header))
-				ret = validate_user_xstate_header(&fpu->state.xsave.header);
-		}
+		ret = copy_user_to_xstate(&fpu->state.xsave, buf_fx);
 		if (ret)
 			goto err_out;
 
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -515,7 +515,7 @@ int using_compacted_format(void)
 }
 
 /* Validate an xstate header supplied by userspace (ptrace or sigreturn) */
-int validate_user_xstate_header(const struct xstate_header *hdr)
+static int validate_user_xstate_header(const struct xstate_header *hdr)
 {
 	/* No unknown or supervisor features may be set */
 	if (hdr->xfeatures & ~xfeatures_mask_user())


^ permalink raw reply	[flat|nested] 33+ messages in thread

* [patch 3/8] x86/fpu: Invalidate FPU state after a failed XRSTOR from a user buffer
  2021-06-02  9:55 [patch 0/8] x86/fpu: Mop up XSAVES and related damage Thomas Gleixner
  2021-06-02  9:55 ` [patch 1/8] selftests/x86: Test signal frame XSTATE header corruption handling Thomas Gleixner
  2021-06-02  9:55 ` [patch 2/8] x86/fpu: Prevent state corruption in __fpu__restore_sig() Thomas Gleixner
@ 2021-06-02  9:55 ` Thomas Gleixner
  2021-06-02 15:06   ` Borislav Petkov
  2021-06-02  9:55 ` [patch 4/8] x86/fpu: Limit xstate copy size in xstateregs_set() Thomas Gleixner
                   ` (6 subsequent siblings)
  9 siblings, 1 reply; 33+ messages in thread
From: Thomas Gleixner @ 2021-06-02  9:55 UTC (permalink / raw)
  To: LKML
  Cc: x86, Andy Lutomirski, Dave Hansen, Fenghua Yu, Tony Luck,
	Yu-cheng Yu, stable

From: Andy Lutomirski <luto@kernel.org>

If XRSTOR fails due to sufficiently complicated paging errors (e.g.
concurrent TLB invalidation), it may fault with #PF but still modify
portions of the user extended state.

If this happens in __fpu_restore_sig() with a victim task's FPU registers
loaded and the task is preempted by the victim task, the victim task
resumes with partially corrupt extended state.

Invalidate the FPU registers when XRSTOR fails to prevent this scenario.

Fixes: 1d731e731c4c ("x86/fpu: Add a fastpath to __fpu__restore_sig()")
Signed-off-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org
---
 arch/x86/kernel/fpu/signal.c |   21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -369,6 +369,27 @@ static int __fpu__restore_sig(void __use
 			fpregs_unlock();
 			return 0;
 		}
+
+		if (test_thread_flag(TIF_NEED_FPU_LOAD)) {
+			/*
+			 * The FPU registers do not belong to current, and
+			 * we just did an FPU restore operation, restricted
+			 * to the user portion of the register file, and
+			 * failed.  In the event that the ucode was
+			 * unfriendly and modified the registers at all, we
+			 * need to make sure that we aren't corrupting an
+			 * innocent non-current task's registers.
+			 */
+			__cpu_invalidate_fpregs_state();
+		} else {
+			/*
+			 * As above, we may have just clobbered current's
+			 * user FPU state.  We will either successfully
+			 * load it or clear it below, so no action is
+			 * required here.
+			 */
+		}
+
 		fpregs_unlock();
 	} else {
 		/*


^ permalink raw reply	[flat|nested] 33+ messages in thread

* [patch 4/8] x86/fpu: Limit xstate copy size in xstateregs_set()
  2021-06-02  9:55 [patch 0/8] x86/fpu: Mop up XSAVES and related damage Thomas Gleixner
                   ` (2 preceding siblings ...)
  2021-06-02  9:55 ` [patch 3/8] x86/fpu: Invalidate FPU state after a failed XRSTOR from a user buffer Thomas Gleixner
@ 2021-06-02  9:55 ` Thomas Gleixner
  2021-06-02 17:44   ` Borislav Petkov
  2021-06-02  9:55 ` [patch 5/8] x86/fpu: Sanitize xstateregs_set() Thomas Gleixner
                   ` (5 subsequent siblings)
  9 siblings, 1 reply; 33+ messages in thread
From: Thomas Gleixner @ 2021-06-02  9:55 UTC (permalink / raw)
  To: LKML
  Cc: x86, Andy Lutomirski, Dave Hansen, Fenghua Yu, Tony Luck,
	Yu-cheng Yu, stable

If the count argument is larger than the xstate size, this will happily
copy beyond the end of xstate.

Fixes: 91c3dba7dbc1 ("x86/fpu/xstate: Fix PTRACE frames for XSAVES")
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org
---
 arch/x86/kernel/fpu/regset.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -117,7 +117,7 @@ int xstateregs_set(struct task_struct *t
 	/*
 	 * A whole standard-format XSAVE buffer is needed:
 	 */
-	if ((pos != 0) || (count < fpu_user_xstate_size))
+	if (pos != 0 || count != fpu_user_xstate_size)
 		return -EFAULT;
 
 	xsave = &fpu->state.xsave;


^ permalink raw reply	[flat|nested] 33+ messages in thread

* [patch 5/8] x86/fpu: Sanitize xstateregs_set()
  2021-06-02  9:55 [patch 0/8] x86/fpu: Mop up XSAVES and related damage Thomas Gleixner
                   ` (3 preceding siblings ...)
  2021-06-02  9:55 ` [patch 4/8] x86/fpu: Limit xstate copy size in xstateregs_set() Thomas Gleixner
@ 2021-06-02  9:55 ` Thomas Gleixner
  2021-06-02 16:01   ` [patch V2 " Thomas Gleixner
  2021-06-03 17:24   ` [patch " Andy Lutomirski
  2021-06-02  9:55 ` [patch 6/8] x86/fpu: Add address range checks to copy_user_to_xstate() Thomas Gleixner
                   ` (4 subsequent siblings)
  9 siblings, 2 replies; 33+ messages in thread
From: Thomas Gleixner @ 2021-06-02  9:55 UTC (permalink / raw)
  To: LKML
  Cc: x86, Andy Lutomirski, Dave Hansen, Fenghua Yu, Tony Luck, Yu-cheng Yu

xstateregs_set() operates on a stopped task and tries to copy the provided
buffer into the tasks fpu.state.xsave buffer.

Any error while copying or invalid state detected after copying results in
wiping the target tasks FPU state completely including supervisor states.

That's just wrong. The caller supplied invalid data or has a problem with
unmapped memory, so there is absolutely no justification to wreckage the
target.

Fix this with the following modifications:

 1) If data has to be copied from userspace, allocate a buffer and copy from
    user first.

 2) Use copy_kernel_to_xstate() unconditionally so that header checking
    works correctly.

 3) Return on error without wreckaging the target state.

This prevents corrupting supervisor states and lets the caller deal with
the problem it caused in the first place.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/fpu/regset.c |   41 +++++++++++++++++------------------------
 arch/x86/kernel/fpu/xstate.c |   10 ++++++----
 2 files changed, 23 insertions(+), 28 deletions(-)

--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -6,8 +6,12 @@
 #include <asm/fpu/signal.h>
 #include <asm/fpu/regset.h>
 #include <asm/fpu/xstate.h>
+
+#include <linux/vmalloc.h>
+
 #include <linux/sched/task_stack.h>
 
+
 /*
  * The xstateregs_active() routine is the same as the regset_fpregs_active() routine,
  * as the "regset->n" for the xstate regset will be updated based on the feature
@@ -107,8 +111,8 @@ int xstateregs_set(struct task_struct *t
 		  unsigned int pos, unsigned int count,
 		  const void *kbuf, const void __user *ubuf)
 {
+	struct xregs_state *xsave, *xbuf = NULL;
 	struct fpu *fpu = &target->thread.fpu;
-	struct xregs_state *xsave;
 	int ret;
 
 	if (!boot_cpu_has(X86_FEATURE_XSAVE))
@@ -120,32 +124,21 @@ int xstateregs_set(struct task_struct *t
 	if (pos != 0 || count != fpu_user_xstate_size)
 		return -EFAULT;
 
-	xsave = &fpu->state.xsave;
-
-	fpu__prepare_write(fpu);
-
-	if (using_compacted_format()) {
-		if (kbuf)
-			ret = copy_kernel_to_xstate(xsave, kbuf);
-		else
-			ret = copy_user_to_xstate(xsave, ubuf);
-	} else {
-		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
-		if (!ret)
-			ret = validate_user_xstate_header(&xsave->header);
+	if (!kbuf) {
+		xbuf = vmalloc(count);
+		if (!xbuf)
+			return -ENOMEM;
+		ret = user_regset_copyin(&pos, &count, NULL, &ubuf, xbuf, 0, -1);
+		if (ret)
+			goto out;
 	}
 
-	/*
-	 * mxcsr reserved bits must be masked to zero for security reasons.
-	 */
-	xsave->i387.mxcsr &= mxcsr_feature_mask;
-
-	/*
-	 * In case of failure, mark all states as init:
-	 */
-	if (ret)
-		fpstate_init(&fpu->state);
+	xsave = &fpu->state.xsave;
+	fpu__prepare_write(fpu);
+	ret = copy_kernel_to_xstate(xsave, kbuf ? kbuf : xbuf);
 
+out:
+	vfree(xbuf);
 	return ret;
 }
 
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1172,14 +1172,16 @@ int copy_kernel_to_xstate(struct xregs_s
 	 */
 	xsave->header.xfeatures |= hdr.xfeatures;
 
+	/* mxcsr reserved bits must be masked to zero for security reasons. */
+	xsave->i387.mxcsr &= mxcsr_feature_mask;
+
 	return 0;
 }
 
 /*
- * Convert from a ptrace or sigreturn standard-format user-space buffer to
- * kernel XSAVES format and copy to the target thread. This is called from
- * xstateregs_set(), as well as potentially from the sigreturn() and
- * rt_sigreturn() system calls.
+ * Convert from a sigreturn standard-format user-space buffer to kernel
+ * XSAVES format and copy to the target thread. This is called from the
+ * sigreturn() and rt_sigreturn() system calls.
  */
 int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf)
 {


^ permalink raw reply	[flat|nested] 33+ messages in thread

* [patch 6/8] x86/fpu: Add address range checks to copy_user_to_xstate()
  2021-06-02  9:55 [patch 0/8] x86/fpu: Mop up XSAVES and related damage Thomas Gleixner
                   ` (4 preceding siblings ...)
  2021-06-02  9:55 ` [patch 5/8] x86/fpu: Sanitize xstateregs_set() Thomas Gleixner
@ 2021-06-02  9:55 ` Thomas Gleixner
  2021-06-02  9:55 ` [patch 7/8] x86/fpu: Clean up the fpu__clear() variants Thomas Gleixner
                   ` (3 subsequent siblings)
  9 siblings, 0 replies; 33+ messages in thread
From: Thomas Gleixner @ 2021-06-02  9:55 UTC (permalink / raw)
  To: LKML
  Cc: x86, Andy Lutomirski, Dave Hansen, Fenghua Yu, Tony Luck, Yu-cheng Yu

From: Andy Lutomirski <luto@kernel.org>

copy_user_to_xstate() uses __copy_from_user(), which provides a negligible
speedup.  Fortunately, both call sites are at least almost correct.
__fpu__restore_sig() checks access_ok() with a length of
xstate_sigframe_size() and ptrace regset access uses fpu_user_xstate_size.
These should be valid upper bounds on the length, so, at worst, this would
cause spurious failures and not accesses to kernel memory.

Nonetheless, this is far more fragile than necessary and none of these
callers are in a hotpath. 

Use copy_from_user() instead.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/fpu/xstate.c |    6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1192,7 +1192,7 @@ int copy_user_to_xstate(struct xregs_sta
 	offset = offsetof(struct xregs_state, header);
 	size = sizeof(hdr);
 
-	if (__copy_from_user(&hdr, ubuf + offset, size))
+	if (copy_from_user(&hdr, ubuf + offset, size))
 		return -EFAULT;
 
 	if (validate_user_xstate_header(&hdr))
@@ -1207,7 +1207,7 @@ int copy_user_to_xstate(struct xregs_sta
 			offset = xstate_offsets[i];
 			size = xstate_sizes[i];
 
-			if (__copy_from_user(dst, ubuf + offset, size))
+			if (copy_from_user(dst, ubuf + offset, size))
 				return -EFAULT;
 		}
 	}
@@ -1215,7 +1215,7 @@ int copy_user_to_xstate(struct xregs_sta
 	if (xfeatures_mxcsr_quirk(hdr.xfeatures)) {
 		offset = offsetof(struct fxregs_state, mxcsr);
 		size = MXCSR_AND_FLAGS_SIZE;
-		if (__copy_from_user(&xsave->i387.mxcsr, ubuf + offset, size))
+		if (copy_from_user(&xsave->i387.mxcsr, ubuf + offset, size))
 			return -EFAULT;
 	}
 


^ permalink raw reply	[flat|nested] 33+ messages in thread

* [patch 7/8] x86/fpu: Clean up the fpu__clear() variants
  2021-06-02  9:55 [patch 0/8] x86/fpu: Mop up XSAVES and related damage Thomas Gleixner
                   ` (5 preceding siblings ...)
  2021-06-02  9:55 ` [patch 6/8] x86/fpu: Add address range checks to copy_user_to_xstate() Thomas Gleixner
@ 2021-06-02  9:55 ` Thomas Gleixner
  2021-06-02  9:55 ` [patch 8/8] x86/fpu: Deduplicate copy_xxx_to_xstate() Thomas Gleixner
                   ` (2 subsequent siblings)
  9 siblings, 0 replies; 33+ messages in thread
From: Thomas Gleixner @ 2021-06-02  9:55 UTC (permalink / raw)
  To: LKML
  Cc: x86, Andy Lutomirski, Dave Hansen, Fenghua Yu, Tony Luck, Yu-cheng Yu

From: Andy Lutomirski <luto@kernel.org>

fpu__clear() currently resets both register state and kernel XSAVE buffer
state.  It has two modes: one for all state (supervisor and user) and
another for user state only.  fpu__clear_all() uses the "all state"
(user_only=0) mode, while a number of signal paths use the user_only=1
mode.

Make fpu__clear() work only for user state (user_only=1) and remove the
"all state" (user_only=0) code.  Rename it to match so it can be used by
the signal paths.

Replace the "all state" (user_only=0) fpu__clear() functionality.  Use the
TIF_NEED_FPU_LOAD functionality instead of making any actual hardware
registers changes in this path.

[ Changelog polished by Dave Hansen ]

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/fpu/core.c |   62 ++++++++++++++++++++++++++++++---------------
 1 file changed, 42 insertions(+), 20 deletions(-)

--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -354,45 +354,67 @@ static inline void copy_init_fpstate_to_
 }
 
 /*
- * Clear the FPU state back to init state.
- *
- * Called by sys_execve(), by the signal handler code and by various
- * error paths.
+ * Reset current's user FPU states to the init states.  current's
+ * supervisor states, if any, are not modified by this function.  The
+ * caller guarantees that the XSTATE header in memory is intact.
  */
-static void fpu__clear(struct fpu *fpu, bool user_only)
+void fpu__clear_user_states(struct fpu *fpu)
 {
 	WARN_ON_FPU(fpu != &current->thread.fpu);
 
 	if (!static_cpu_has(X86_FEATURE_FPU)) {
-		fpu__drop(fpu);
-		fpu__initialize(fpu);
+		fpu__clear_all(fpu);
 		return;
 	}
 
 	fpregs_lock();
 
-	if (user_only) {
-		if (!fpregs_state_valid(fpu, smp_processor_id()) &&
-		    xfeatures_mask_supervisor())
-			copy_kernel_to_xregs(&fpu->state.xsave,
-					     xfeatures_mask_supervisor());
-		copy_init_fpstate_to_fpregs(xfeatures_mask_user());
-	} else {
-		copy_init_fpstate_to_fpregs(xfeatures_mask_all);
+	/*
+	 * Ensure that current's supervisor states are loaded into
+	 * their corresponding registers.
+	 */
+	if (xfeatures_mask_supervisor() &&
+	    !fpregs_state_valid(fpu, smp_processor_id())) {
+		copy_kernel_to_xregs(&fpu->state.xsave,
+				     xfeatures_mask_supervisor());
 	}
 
+	/* Reset user states in registers. */
+	copy_init_fpstate_to_fpregs(xfeatures_mask_user());
+
+	/*
+	 * Now all FPU registers have their desired values.  Inform the
+	 * FPU state machine that current's FPU registers are in the
+	 * hardware registers.
+	 */
 	fpregs_mark_activate();
+
 	fpregs_unlock();
 }
 
-void fpu__clear_user_states(struct fpu *fpu)
-{
-	fpu__clear(fpu, true);
-}
 
+/*
+ * Reset current's FPU registers (user and supervisor) to their INIT values.
+ * This is used by execve(); out of an abundance of caution, it completely
+ * wipes and resets the XSTATE buffer in memory.
+ *
+ * Note that XSAVE (unlike XSAVES) expects the XSTATE buffer in memory to
+ * be valid, so there are certain forms of corruption of the XSTATE buffer
+ * in memory that would survive initializing the FPU registers and XSAVEing
+ * them to memory.
+ */
 void fpu__clear_all(struct fpu *fpu)
 {
-	fpu__clear(fpu, false);
+	fpregs_lock();
+	fpu__drop(fpu);
+	/*
+	 * This does not change the actual hardware registers; when
+	 * fpu__clear_all() returns, TIF_NEED_FPU_LOAD will be set, and a
+	 * subsequent exit to user mode will reload the hardware registers
+	 * from memory.
+	 */
+	fpu__initialize(fpu);
+	fpregs_unlock();
 }
 
 /*


^ permalink raw reply	[flat|nested] 33+ messages in thread

* [patch 8/8] x86/fpu: Deduplicate copy_xxx_to_xstate()
  2021-06-02  9:55 [patch 0/8] x86/fpu: Mop up XSAVES and related damage Thomas Gleixner
                   ` (6 preceding siblings ...)
  2021-06-02  9:55 ` [patch 7/8] x86/fpu: Clean up the fpu__clear() variants Thomas Gleixner
@ 2021-06-02  9:55 ` Thomas Gleixner
  2021-06-03 16:56   ` Andy Lutomirski
  2021-06-02 21:28 ` [patch 0/8] x86/fpu: Mop up XSAVES and related damage Yu, Yu-cheng
  2021-06-04 22:04 ` Dave Hansen
  9 siblings, 1 reply; 33+ messages in thread
From: Thomas Gleixner @ 2021-06-02  9:55 UTC (permalink / raw)
  To: LKML
  Cc: x86, Andy Lutomirski, Dave Hansen, Fenghua Yu, Tony Luck, Yu-cheng Yu

copy_user_to_xstate() and copy_kernel_to_xstate() are almost identical
except for the copy function.

Unify them.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/fpu/xstate.c |   83 +++++++++++++++----------------------------
 1 file changed, 29 insertions(+), 54 deletions(-)

--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1124,20 +1124,30 @@ void copy_xstate_to_kernel(struct membuf
 	fill_gap(&to, &last, size);
 }
 
-/*
- * Convert from a ptrace standard-format kernel buffer to kernel XSAVES format
- * and copy to the target thread. This is called from xstateregs_set().
- */
-int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf)
+static int copy_from_buffer(void *dst, unsigned int offset, unsigned int size,
+			    const void *kbuf, const void __user *ubuf)
+{
+	if (kbuf) {
+		memcpy(dst, kbuf + offset, size);
+	} else {
+		if (copy_from_user(dst, ubuf + offset, size))
+			return -EFAULT;
+	}
+	return 0;
+}
+
+static int copy_to_xstate(struct xregs_state *xsave, const void *kbuf,
+			  const void __user *ubuf)
 {
 	unsigned int offset, size;
-	int i;
 	struct xstate_header hdr;
+	int i;
 
 	offset = offsetof(struct xregs_state, header);
 	size = sizeof(hdr);
 
-	memcpy(&hdr, kbuf + offset, size);
+	if (copy_from_buffer(&hdr, offset, size, kbuf, ubuf))
+		return -EFAULT;
 
 	if (validate_user_xstate_header(&hdr))
 		return -EINVAL;
@@ -1151,7 +1161,8 @@ int copy_kernel_to_xstate(struct xregs_s
 			offset = xstate_offsets[i];
 			size = xstate_sizes[i];
 
-			memcpy(dst, kbuf + offset, size);
+			if (copy_from_buffer(dst, offset, size, kbuf, ubuf))
+				return -EFAULT;
 		}
 	}
 
@@ -1179,58 +1190,22 @@ int copy_kernel_to_xstate(struct xregs_s
 }
 
 /*
+ * Convert from a ptrace standard-format kernel buffer to kernel XSAVES format
+ * and copy to the target thread. This is called from xstateregs_set().
+ */
+int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf)
+{
+	return copy_to_xstate(xsave, kbuf, NULL);
+}
+
+/*
  * Convert from a sigreturn standard-format user-space buffer to kernel
  * XSAVES format and copy to the target thread. This is called from the
  * sigreturn() and rt_sigreturn() system calls.
  */
 int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf)
 {
-	unsigned int offset, size;
-	int i;
-	struct xstate_header hdr;
-
-	offset = offsetof(struct xregs_state, header);
-	size = sizeof(hdr);
-
-	if (copy_from_user(&hdr, ubuf + offset, size))
-		return -EFAULT;
-
-	if (validate_user_xstate_header(&hdr))
-		return -EINVAL;
-
-	for (i = 0; i < XFEATURE_MAX; i++) {
-		u64 mask = ((u64)1 << i);
-
-		if (hdr.xfeatures & mask) {
-			void *dst = __raw_xsave_addr(xsave, i);
-
-			offset = xstate_offsets[i];
-			size = xstate_sizes[i];
-
-			if (copy_from_user(dst, ubuf + offset, size))
-				return -EFAULT;
-		}
-	}
-
-	if (xfeatures_mxcsr_quirk(hdr.xfeatures)) {
-		offset = offsetof(struct fxregs_state, mxcsr);
-		size = MXCSR_AND_FLAGS_SIZE;
-		if (copy_from_user(&xsave->i387.mxcsr, ubuf + offset, size))
-			return -EFAULT;
-	}
-
-	/*
-	 * The state that came in from userspace was user-state only.
-	 * Mask all the user states out of 'xfeatures':
-	 */
-	xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR_ALL;
-
-	/*
-	 * Add back in the features that came in from userspace:
-	 */
-	xsave->header.xfeatures |= hdr.xfeatures;
-
-	return 0;
+	return copy_to_xstate(xsave, NULL, ubuf);
 }
 
 /*


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [patch 1/8] selftests/x86: Test signal frame XSTATE header corruption handling
  2021-06-02  9:55 ` [patch 1/8] selftests/x86: Test signal frame XSTATE header corruption handling Thomas Gleixner
@ 2021-06-02 12:38   ` Borislav Petkov
  2021-06-02 14:15     ` Thomas Gleixner
  2021-06-02 15:59   ` [patch V2 " Thomas Gleixner
  1 sibling, 1 reply; 33+ messages in thread
From: Borislav Petkov @ 2021-06-02 12:38 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: LKML, x86, Andy Lutomirski, Dave Hansen, Fenghua Yu, Tony Luck,
	Yu-cheng Yu

On Wed, Jun 02, 2021 at 11:55:44AM +0200, Thomas Gleixner wrote:
> From: Andy Lutomirski <luto@kernel.org>
> 
> This is very heavily based on some code from Thomas Gleixner.  On a system
> without XSAVES, it triggers the WARN_ON():
> 
>     Bad FPU state detected at copy_kernel_to_fpregs+0x2f/0x40, reinitializing FPU registers.

That triggers

[  149.497274] corrupt_xstate_[1627] bad frame in rt_sigreturn frame:00000000dad08ab1 ip:7f031449ffe1 sp:7ffd0c5c59f0 orax:ffffffffffffffff in libpthread-2.31.so[7f0314493000+10000]

on an AMD laptop here.

> +static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
> +			   unsigned int *ecx, unsigned int *edx)
> +{
> +	asm volatile(
> +		"cpuid;"
> +		: "=a" (*eax),
> +		  "=b" (*ebx),
> +		  "=c" (*ecx),
> +		  "=d" (*edx)
> +		: "0" (*eax), "2" (*ecx));
> +}
> +
> +static inline int xsave_enabled(void)
> +{
> +	unsigned int eax, ebx, ecx, edx;
> +
> +	eax = 0x1;
> +	ecx = 0x0;
> +	__cpuid(&eax, &ebx, &ecx, &edx);
> +
> +	/* Is CR4.OSXSAVE enabled ? */
> +	return ecx & (1U << 27);
> +}

One fine day someone should sit down and unify all those auxillary
functions used in the selftests into a lib...

> +
> +static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
> +		       int flags)
> +{
> +	struct sigaction sa;
> +
> +	memset(&sa, 0, sizeof(sa));
> +	sa.sa_sigaction = handler;
> +	sa.sa_flags = SA_SIGINFO | flags;
> +	sigemptyset(&sa.sa_mask);
> +	if (sigaction(sig, &sa, 0))
> +		err(1, "sigaction");
> +}
> +
> +static void sigusr1(int sig, siginfo_t *info, void *uc_void)
> +{
> +	ucontext_t *uc = uc_void;
> +	uint8_t *fpstate = (uint8_t *)uc->uc_mcontext.fpregs;
> +	uint64_t *xfeatures = (uint64_t *)(fpstate + 512);
> +
> +	printf("\tWreckage XSTATE header\n");
> +	/* Wreckage the first reserved byte in the header */
> +	*(xfeatures + 2) = 0xfffffff;
> +}
> +
> +static void sigsegv(int sig, siginfo_t *info, void *uc_void)
> +{
> +	printf("\tGot SIGSEGV\n");
> +}
> +
> +int main()

ERROR: Bad function definition - int main() should probably be int main(void)

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [patch 2/8] x86/fpu: Prevent state corruption in __fpu__restore_sig()
  2021-06-02  9:55 ` [patch 2/8] x86/fpu: Prevent state corruption in __fpu__restore_sig() Thomas Gleixner
@ 2021-06-02 13:12   ` Borislav Petkov
  2021-06-02 14:46     ` Thomas Gleixner
  2021-06-02 15:58   ` [patch V2 " Thomas Gleixner
  1 sibling, 1 reply; 33+ messages in thread
From: Borislav Petkov @ 2021-06-02 13:12 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: LKML, x86, Andy Lutomirski, Dave Hansen, Fenghua Yu, Tony Luck,
	Yu-cheng Yu, syzbot+2067e764dbcd10721e2e, stable

On Wed, Jun 02, 2021 at 11:55:45AM +0200, Thomas Gleixner wrote:
> From: Thomas Gleixner <tglx@linutronix.de>
> 
> The non-compacted slowpath uses __copy_from_user() and copies the entire
> user buffer into the kernel buffer, verbatim.  This means that the kernel
> buffer may now contain entirely invalid state on which XRSTOR will #GP.
> validate_user_xstate_header() can detect some of that corruption, but that
> leaves the onus on callers to clear the buffer.
> 
> Prior to XSAVES support it was possible just to reinitialize the buffer,
> completely, but with supervisor states that is not longer possible as the
> buffer clearing code split got it backwards. Fixing that is possible, but
> not corrupting the state in the first place is more robust.
> 
> Avoid corruption of the kernel XSAVE buffer by using copy_user_to_xstate()
> which validates the XSAVE header contents before copying the actual states
> to the kernel. copy_user_to_xstate() was previously only called for
> compacted-format kernel buffers, but it works for both compacted and
> non-compacted forms.
> 
> Using it for the non-compacted form is slower because of multiple
> __copy_from_user() operations, but that cost is less important than robust
> code in an already slow path.
> 
> [ Changelog polished by Dave Hansen ]

Nice polishing!

> Fixes: b860eb8dce59 ("x86/fpu/xstate: Define new functions for clearing fpregs and xstates")
> Reported-by: syzbot+2067e764dbcd10721e2e@syzkaller.appspotmail.com
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> Cc: stable@vger.kernel.org
> ---
>  arch/x86/include/asm/fpu/xstate.h |    4 ----
>  arch/x86/kernel/fpu/signal.c      |    9 +--------
>  arch/x86/kernel/fpu/xstate.c      |    2 +-
>  3 files changed, 2 insertions(+), 13 deletions(-)
> 
> --- a/arch/x86/include/asm/fpu/xstate.h
> +++ b/arch/x86/include/asm/fpu/xstate.h
> @@ -112,8 +112,4 @@ void copy_supervisor_to_kernel(struct xr
>  void copy_dynamic_supervisor_to_kernel(struct xregs_state *xstate, u64 mask);
>  void copy_kernel_to_dynamic_supervisor(struct xregs_state *xstate, u64 mask);
>  
> -
> -/* Validate an xstate header supplied by userspace (ptrace or sigreturn) */
> -int validate_user_xstate_header(const struct xstate_header *hdr);
> -
>  #endif
> --- a/arch/x86/kernel/fpu/signal.c
> +++ b/arch/x86/kernel/fpu/signal.c
> @@ -405,14 +405,7 @@ static int __fpu__restore_sig(void __use
>  	if (use_xsave() && !fx_only) {
>  		u64 init_bv = xfeatures_mask_user() & ~user_xfeatures;
>  
> -		if (using_compacted_format()) {
> -			ret = copy_user_to_xstate(&fpu->state.xsave, buf_fx);
> -		} else {
> -			ret = __copy_from_user(&fpu->state.xsave, buf_fx, state_size);
> -
> -			if (!ret && state_size > offsetof(struct xregs_state, header))
> -				ret = validate_user_xstate_header(&fpu->state.xsave.header);
> -		}
> +		ret = copy_user_to_xstate(&fpu->state.xsave, buf_fx);
>  		if (ret)
>  			goto err_out;
>  
> --- a/arch/x86/kernel/fpu/xstate.c
> +++ b/arch/x86/kernel/fpu/xstate.c
> @@ -515,7 +515,7 @@ int using_compacted_format(void)
>  }
>  
>  /* Validate an xstate header supplied by userspace (ptrace or sigreturn) */
> -int validate_user_xstate_header(const struct xstate_header *hdr)
> +static int validate_user_xstate_header(const struct xstate_header *hdr)

Can't do that yet - that one is still called from regset.c:

arch/x86/kernel/fpu/regset.c: In function ‘xstateregs_set’:
arch/x86/kernel/fpu/regset.c:135:10: error: implicit declaration of function ‘validate_user_xstate_header’ [-Werror=implicit-function-declaration]
  135 |    ret = validate_user_xstate_header(&xsave->header);
      |          ^~~~~~~~~~~~~~~~~~~~~~~~~~~
cc1: some warnings being treated as errors

Maybe after the 5th patch which kills that usage too.

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [patch 1/8] selftests/x86: Test signal frame XSTATE header corruption handling
  2021-06-02 12:38   ` Borislav Petkov
@ 2021-06-02 14:15     ` Thomas Gleixner
  2021-06-03 13:16       ` Shuah Khan
  0 siblings, 1 reply; 33+ messages in thread
From: Thomas Gleixner @ 2021-06-02 14:15 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: LKML, x86, Andy Lutomirski, Dave Hansen, Fenghua Yu, Tony Luck,
	Yu-cheng Yu, Shuah Khan

On Wed, Jun 02 2021 at 14:38, Borislav Petkov wrote:
> On Wed, Jun 02, 2021 at 11:55:44AM +0200, Thomas Gleixner wrote:
>> From: Andy Lutomirski <luto@kernel.org>
>> 
>> This is very heavily based on some code from Thomas Gleixner.  On a system
>> without XSAVES, it triggers the WARN_ON():
>> 
>>     Bad FPU state detected at copy_kernel_to_fpregs+0x2f/0x40, reinitializing FPU registers.
>
> That triggers
>
> [  149.497274] corrupt_xstate_[1627] bad frame in rt_sigreturn frame:00000000dad08ab1 ip:7f031449ffe1 sp:7ffd0c5c59f0 orax:ffffffffffffffff in libpthread-2.31.so[7f0314493000+10000]
>
> on an AMD laptop here.

Yes, that's the ratelimited printk in the signal code.

>> +static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
>> +			   unsigned int *ecx, unsigned int *edx)
>> +{
>> +	asm volatile(
>> +		"cpuid;"
>> +		: "=a" (*eax),
>> +		  "=b" (*ebx),
>> +		  "=c" (*ecx),
>> +		  "=d" (*edx)
>> +		: "0" (*eax), "2" (*ecx));
>> +}
>> +
>> +static inline int xsave_enabled(void)
>> +{
>> +	unsigned int eax, ebx, ecx, edx;
>> +
>> +	eax = 0x1;
>> +	ecx = 0x0;
>> +	__cpuid(&eax, &ebx, &ecx, &edx);
>> +
>> +	/* Is CR4.OSXSAVE enabled ? */
>> +	return ecx & (1U << 27);
>> +}
>
> One fine day someone should sit down and unify all those auxillary
> functions used in the selftests into a lib...

Yes please. Shuah, that would be a great newcomer task...

>> +
>> +int main()
>
> ERROR: Bad function definition - int main() should probably be int main(void)

Bah, I thought I had fixed that.


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [patch 2/8] x86/fpu: Prevent state corruption in __fpu__restore_sig()
  2021-06-02 13:12   ` Borislav Petkov
@ 2021-06-02 14:46     ` Thomas Gleixner
  0 siblings, 0 replies; 33+ messages in thread
From: Thomas Gleixner @ 2021-06-02 14:46 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: LKML, x86, Andy Lutomirski, Dave Hansen, Fenghua Yu, Tony Luck,
	Yu-cheng Yu, syzbot+2067e764dbcd10721e2e, stable

On Wed, Jun 02 2021 at 15:12, Borislav Petkov wrote:
>>  /* Validate an xstate header supplied by userspace (ptrace or sigreturn) */
>> -int validate_user_xstate_header(const struct xstate_header *hdr)
>> +static int validate_user_xstate_header(const struct xstate_header *hdr)
>
> Can't do that yet - that one is still called from regset.c:
>
> arch/x86/kernel/fpu/regset.c: In function ‘xstateregs_set’:
> arch/x86/kernel/fpu/regset.c:135:10: error: implicit declaration of function ‘validate_user_xstate_header’ [-Werror=implicit-function-declaration]
>   135 |    ret = validate_user_xstate_header(&xsave->header);
>       |          ^~~~~~~~~~~~~~~~~~~~~~~~~~~
> cc1: some warnings being treated as errors
>
> Maybe after the 5th patch which kills that usage too.

Gah, yes. I had the patches ordered differently and then failed to do a
full step by step recompile after reshuffling them. Fixed localy.

Thanks,

        tglx

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [patch 3/8] x86/fpu: Invalidate FPU state after a failed XRSTOR from a user buffer
  2021-06-02  9:55 ` [patch 3/8] x86/fpu: Invalidate FPU state after a failed XRSTOR from a user buffer Thomas Gleixner
@ 2021-06-02 15:06   ` Borislav Petkov
  2021-06-03 17:30     ` Andy Lutomirski
  0 siblings, 1 reply; 33+ messages in thread
From: Borislav Petkov @ 2021-06-02 15:06 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: LKML, x86, Andy Lutomirski, Dave Hansen, Fenghua Yu, Tony Luck,
	Yu-cheng Yu, stable

On Wed, Jun 02, 2021 at 11:55:46AM +0200, Thomas Gleixner wrote:
> From: Andy Lutomirski <luto@kernel.org>
> 
> If XRSTOR fails due to sufficiently complicated paging errors (e.g.
> concurrent TLB invalidation),

I can't connect "concurrent TLB invalidation" to "sufficiently
complicated paging errors". Can you elaborate pls?

> it may fault with #PF but still modify
> portions of the user extended state.

Yikes, leaky leaky insn.

> If this happens in __fpu_restore_sig() with a victim task's FPU registers
> loaded and the task is preempted by the victim task,

This is probably meaning another task but the only task mentioned here
is a "victim task"?

> the victim task
> resumes with partially corrupt extended state.
> 
> Invalidate the FPU registers when XRSTOR fails to prevent this scenario.
> 
> Fixes: 1d731e731c4c ("x86/fpu: Add a fastpath to __fpu__restore_sig()")
> Signed-off-by: Andy Lutomirski <luto@kernel.org>
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> Cc: stable@vger.kernel.org
> ---
>  arch/x86/kernel/fpu/signal.c |   21 +++++++++++++++++++++
>  1 file changed, 21 insertions(+)
> 
> --- a/arch/x86/kernel/fpu/signal.c
> +++ b/arch/x86/kernel/fpu/signal.c
> @@ -369,6 +369,27 @@ static int __fpu__restore_sig(void __use
>  			fpregs_unlock();
>  			return 0;
>  		}
> +
> +		if (test_thread_flag(TIF_NEED_FPU_LOAD)) {
> +			/*
> +			 * The FPU registers do not belong to current, and
> +			 * we just did an FPU restore operation, restricted

Please get rid of the "we"-personal pronouns formulations.

> +			 * to the user portion of the register file, and

"register file"? That sounds like comment which belongs in microcode but
not in software. :-)

> +			 * failed.  In the event that the ucode was
> +			 * unfriendly and modified the registers at all, we
> +			 * need to make sure that we aren't corrupting an
> +			 * innocent non-current task's registers.
> +			 */
> +			__cpu_invalidate_fpregs_state();
> +		} else {
> +			/*
> +			 * As above, we may have just clobbered current's
> +			 * user FPU state.  We will either successfully
> +			 * load it or clear it below, so no action is
> +			 * required here.
> +			 */
> +		}

I'm wondering if that comment can simply be above the TIF_NEED_FPU_LOAD
testing, standalone, instead of having it in an empty else? And then get
rid of that else.

Thx.

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [patch V2 2/8] x86/fpu: Prevent state corruption in __fpu__restore_sig()
  2021-06-02  9:55 ` [patch 2/8] x86/fpu: Prevent state corruption in __fpu__restore_sig() Thomas Gleixner
  2021-06-02 13:12   ` Borislav Petkov
@ 2021-06-02 15:58   ` Thomas Gleixner
  1 sibling, 0 replies; 33+ messages in thread
From: Thomas Gleixner @ 2021-06-02 15:58 UTC (permalink / raw)
  To: LKML
  Cc: x86, Andy Lutomirski, Dave Hansen, Fenghua Yu, Tony Luck,
	Yu-cheng Yu, syzbot+2067e764dbcd10721e2e, stable

From: Thomas Gleixner <tglx@linutronix.de>

The non-compacted slowpath uses __copy_from_user() and copies the entire
user buffer into the kernel buffer, verbatim.  This means that the kernel
buffer may now contain entirely invalid state on which XRSTOR will #GP.
validate_user_xstate_header() can detect some of that corruption, but that
leaves the onus on callers to clear the buffer.

Prior to XSAVES support it was possible just to reinitialize the buffer,
completely, but with supervisor states that is not longer possible as the
buffer clearing code split got it backwards. Fixing that is possible, but
not corrupting the state in the first place is more robust.

Avoid corruption of the kernel XSAVE buffer by using copy_user_to_xstate()
which validates the XSAVE header contents before copying the actual states
to the kernel. copy_user_to_xstate() was previously only called for
compacted-format kernel buffers, but it works for both compacted and
non-compacted forms.

Using it for the non-compacted form is slower because of multiple
__copy_from_user() operations, but that cost is less important than robust
code in an already slow path.

[ Changelog polished by Dave Hansen ]

Fixes: b860eb8dce59 ("x86/fpu/xstate: Define new functions for clearing fpregs and xstates")
Reported-by: syzbot+2067e764dbcd10721e2e@syzkaller.appspotmail.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org
---
V2: Removed the make validate_user_xstate_header() static hunks (Borislav)
---
 arch/x86/kernel/fpu/signal.c |    9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -405,14 +405,7 @@ static int __fpu__restore_sig(void __use
 	if (use_xsave() && !fx_only) {
 		u64 init_bv = xfeatures_mask_user() & ~user_xfeatures;
 
-		if (using_compacted_format()) {
-			ret = copy_user_to_xstate(&fpu->state.xsave, buf_fx);
-		} else {
-			ret = __copy_from_user(&fpu->state.xsave, buf_fx, state_size);
-
-			if (!ret && state_size > offsetof(struct xregs_state, header))
-				ret = validate_user_xstate_header(&fpu->state.xsave.header);
-		}
+		ret = copy_user_to_xstate(&fpu->state.xsave, buf_fx);
 		if (ret)
 			goto err_out;
 

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [patch V2 1/8] selftests/x86: Test signal frame XSTATE header corruption handling
  2021-06-02  9:55 ` [patch 1/8] selftests/x86: Test signal frame XSTATE header corruption handling Thomas Gleixner
  2021-06-02 12:38   ` Borislav Petkov
@ 2021-06-02 15:59   ` Thomas Gleixner
  2021-06-02 16:02     ` [patch V2a " Thomas Gleixner
  1 sibling, 1 reply; 33+ messages in thread
From: Thomas Gleixner @ 2021-06-02 15:59 UTC (permalink / raw)
  To: LKML
  Cc: x86, Andy Lutomirski, Dave Hansen, Fenghua Yu, Tony Luck, Yu-cheng Yu

From: Andy Lutomirski <luto@kernel.org>

This is very heavily based on some code from Thomas Gleixner.  On a system
without XSAVES, it triggers the WARN_ON():

    Bad FPU state detected at copy_kernel_to_fpregs+0x2f/0x40, reinitializing FPU registers.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
V2: main() -> main(void) (Borislav)
---
 tools/testing/selftests/x86/Makefile                |    3 
 tools/testing/selftests/x86/corrupt_xstate_header.c |  114 ++++++++++++++++++++
 2 files changed, 116 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/x86/corrupt_xstate_header.c

--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -17,7 +17,8 @@ TARGETS_C_BOTHBITS := single_step_syscal
 TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \
 			test_FCMOV test_FCOMI test_FISTTP \
 			vdso_restorer
-TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip syscall_numbering
+TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip syscall_numbering \
+			corrupt_xstate_header
 # Some selftests require 32bit support enabled also on 64bit systems
 TARGETS_C_32BIT_NEEDED := ldt_gdt ptrace_syscall
 
--- /dev/null
+++ b/tools/testing/selftests/x86/corrupt_xstate_header.c
@@ -0,0 +1,114 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Corrupt the XSTATE header in a signal frame
+ *
+ * Based on analysis and a test case from Thomas Gleixner.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <sched.h>
+#include <signal.h>
+#include <err.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <sys/wait.h>
+
+static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
+			   unsigned int *ecx, unsigned int *edx)
+{
+	asm volatile(
+		"cpuid;"
+		: "=a" (*eax),
+		  "=b" (*ebx),
+		  "=c" (*ecx),
+		  "=d" (*edx)
+		: "0" (*eax), "2" (*ecx));
+}
+
+static inline int xsave_enabled(void)
+{
+	unsigned int eax, ebx, ecx, edx;
+
+	eax = 0x1;
+	ecx = 0x0;
+	__cpuid(&eax, &ebx, &ecx, &edx);
+
+	/* Is CR4.OSXSAVE enabled ? */
+	return ecx & (1U << 27);
+}
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+		       int flags)
+{
+	struct sigaction sa;
+
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_sigaction = handler;
+	sa.sa_flags = SA_SIGINFO | flags;
+	sigemptyset(&sa.sa_mask);
+	if (sigaction(sig, &sa, 0))
+		err(1, "sigaction");
+}
+
+static void sigusr1(int sig, siginfo_t *info, void *uc_void)
+{
+	ucontext_t *uc = uc_void;
+	uint8_t *fpstate = (uint8_t *)uc->uc_mcontext.fpregs;
+	uint64_t *xfeatures = (uint64_t *)(fpstate + 512);
+
+	printf("\tWreckage XSTATE header\n");
+	/* Wreckage the first reserved byte in the header */
+	*(xfeatures + 2) = 0xfffffff;
+}
+
+static void sigsegv(int sig, siginfo_t *info, void *uc_void)
+{
+	printf("\tGot SIGSEGV\n");
+}
+
+int main()
+{
+	cpu_set_t set;
+
+	sethandler(SIGUSR1, sigusr1, 0);
+	sethandler(SIGSEGV, sigsegv, 0);
+
+	if (!xsave_enabled()) {
+		printf("[SKIP] CR4.OSXSAVE disabled.\n");
+		return 0;
+	}
+
+	CPU_ZERO(&set);
+	CPU_SET(0, &set);
+
+	/*
+	 * Enforce that the child runs on the same CPU
+	 * which in turn forces a schedule.
+	 */
+	sched_setaffinity(getpid(), sizeof(set), &set);
+
+	printf("[RUN]\tSend ourselves a signal\n");
+	raise(SIGUSR1);
+
+	printf("[OK]\tBack from the signal.  Now schedule.\n");
+	pid_t child = fork();
+	if (child < 0)
+		err(1, "fork");
+	if (child == 0)
+		return 0;
+	if (child)
+		waitpid(child, NULL, 0);
+	printf("[OK]\tBack in the main thread.\n");
+
+	/*
+	 * We could try to confirm that extended state is still preserved
+	 * when we schedule.  For now, the only indication of failure is
+	 * a warning in the kernel logs.
+	 */
+
+	return 0;
+}

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [patch V2 5/8] x86/fpu: Sanitize xstateregs_set()
  2021-06-02  9:55 ` [patch 5/8] x86/fpu: Sanitize xstateregs_set() Thomas Gleixner
@ 2021-06-02 16:01   ` Thomas Gleixner
  2021-06-03 11:32     ` Borislav Petkov
  2021-06-03 17:24   ` [patch " Andy Lutomirski
  1 sibling, 1 reply; 33+ messages in thread
From: Thomas Gleixner @ 2021-06-02 16:01 UTC (permalink / raw)
  To: LKML
  Cc: x86, Andy Lutomirski, Dave Hansen, Fenghua Yu, Tony Luck, Yu-cheng Yu

xstateregs_set() operates on a stopped task and tries to copy the provided
buffer into the tasks fpu.state.xsave buffer.

Any error while copying or invalid state detected after copying results in
wiping the target tasks FPU state completely including supervisor states.

That's just wrong. The caller supplied invalid data or has a problem with
unmapped memory, so there is absolutely no justification to wreckage the
target.

Fix this with the following modifications:

 1) If data has to be copied from userspace, allocate a buffer and copy from
    user first.

 2) Use copy_kernel_to_xstate() unconditionally so that header checking
    works correctly.

 3) Return on error without wreckaging the target state.

This prevents corrupting supervisor states and lets the caller deal with
the problem it caused in the first place.

Make validate_user_xstate_header() static as this was the last user
outside of xstate.c

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
V2: Move the validate_user_xstate_header() static here
---
 arch/x86/include/asm/fpu/xstate.h |    4 ---
 arch/x86/kernel/fpu/regset.c      |   41 +++++++++++++++-----------------------
 arch/x86/kernel/fpu/xstate.c      |   12 ++++++-----
 3 files changed, 24 insertions(+), 33 deletions(-)

--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -112,8 +112,4 @@ void copy_supervisor_to_kernel(struct xr
 void copy_dynamic_supervisor_to_kernel(struct xregs_state *xstate, u64 mask);
 void copy_kernel_to_dynamic_supervisor(struct xregs_state *xstate, u64 mask);
 
-
-/* Validate an xstate header supplied by userspace (ptrace or sigreturn) */
-int validate_user_xstate_header(const struct xstate_header *hdr);
-
 #endif
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -6,8 +6,12 @@
 #include <asm/fpu/signal.h>
 #include <asm/fpu/regset.h>
 #include <asm/fpu/xstate.h>
+
+#include <linux/vmalloc.h>
+
 #include <linux/sched/task_stack.h>
 
+
 /*
  * The xstateregs_active() routine is the same as the regset_fpregs_active() routine,
  * as the "regset->n" for the xstate regset will be updated based on the feature
@@ -107,8 +111,8 @@ int xstateregs_set(struct task_struct *t
 		  unsigned int pos, unsigned int count,
 		  const void *kbuf, const void __user *ubuf)
 {
+	struct xregs_state *xsave, *xbuf = NULL;
 	struct fpu *fpu = &target->thread.fpu;
-	struct xregs_state *xsave;
 	int ret;
 
 	if (!boot_cpu_has(X86_FEATURE_XSAVE))
@@ -120,32 +124,21 @@ int xstateregs_set(struct task_struct *t
 	if (pos != 0 || count != fpu_user_xstate_size)
 		return -EFAULT;
 
-	xsave = &fpu->state.xsave;
-
-	fpu__prepare_write(fpu);
-
-	if (using_compacted_format()) {
-		if (kbuf)
-			ret = copy_kernel_to_xstate(xsave, kbuf);
-		else
-			ret = copy_user_to_xstate(xsave, ubuf);
-	} else {
-		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
-		if (!ret)
-			ret = validate_user_xstate_header(&xsave->header);
+	if (!kbuf) {
+		xbuf = vmalloc(count);
+		if (!xbuf)
+			return -ENOMEM;
+		ret = user_regset_copyin(&pos, &count, NULL, &ubuf, xbuf, 0, -1);
+		if (ret)
+			goto out;
 	}
 
-	/*
-	 * mxcsr reserved bits must be masked to zero for security reasons.
-	 */
-	xsave->i387.mxcsr &= mxcsr_feature_mask;
-
-	/*
-	 * In case of failure, mark all states as init:
-	 */
-	if (ret)
-		fpstate_init(&fpu->state);
+	xsave = &fpu->state.xsave;
+	fpu__prepare_write(fpu);
+	ret = copy_kernel_to_xstate(xsave, kbuf ? kbuf : xbuf);
 
+out:
+	vfree(xbuf);
 	return ret;
 }
 
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -515,7 +515,7 @@ int using_compacted_format(void)
 }
 
 /* Validate an xstate header supplied by userspace (ptrace or sigreturn) */
-int validate_user_xstate_header(const struct xstate_header *hdr)
+static int validate_user_xstate_header(const struct xstate_header *hdr)
 {
 	/* No unknown or supervisor features may be set */
 	if (hdr->xfeatures & ~xfeatures_mask_user())
@@ -1172,14 +1172,16 @@ int copy_kernel_to_xstate(struct xregs_s
 	 */
 	xsave->header.xfeatures |= hdr.xfeatures;
 
+	/* mxcsr reserved bits must be masked to zero for security reasons. */
+	xsave->i387.mxcsr &= mxcsr_feature_mask;
+
 	return 0;
 }
 
 /*
- * Convert from a ptrace or sigreturn standard-format user-space buffer to
- * kernel XSAVES format and copy to the target thread. This is called from
- * xstateregs_set(), as well as potentially from the sigreturn() and
- * rt_sigreturn() system calls.
+ * Convert from a sigreturn standard-format user-space buffer to kernel
+ * XSAVES format and copy to the target thread. This is called from the
+ * sigreturn() and rt_sigreturn() system calls.
  */
 int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf)
 {

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [patch V2a 1/8] selftests/x86: Test signal frame XSTATE header corruption handling
  2021-06-02 15:59   ` [patch V2 " Thomas Gleixner
@ 2021-06-02 16:02     ` Thomas Gleixner
  0 siblings, 0 replies; 33+ messages in thread
From: Thomas Gleixner @ 2021-06-02 16:02 UTC (permalink / raw)
  To: LKML
  Cc: x86, Andy Lutomirski, Dave Hansen, Fenghua Yu, Tony Luck, Yu-cheng Yu

From: Andy Lutomirski <luto@kernel.org>

This is very heavily based on some code from Thomas Gleixner.  On a system
without XSAVES, it triggers the WARN_ON():

    Bad FPU state detected at copy_kernel_to_fpregs+0x2f/0x40, reinitializing FPU registers.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
V2a: main() -> main(void) for real ....
---
 tools/testing/selftests/x86/Makefile                |    3 
 tools/testing/selftests/x86/corrupt_xstate_header.c |  114 ++++++++++++++++++++
 2 files changed, 116 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/x86/corrupt_xstate_header.c

--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -17,7 +17,8 @@ TARGETS_C_BOTHBITS := single_step_syscal
 TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \
 			test_FCMOV test_FCOMI test_FISTTP \
 			vdso_restorer
-TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip syscall_numbering
+TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip syscall_numbering \
+			corrupt_xstate_header
 # Some selftests require 32bit support enabled also on 64bit systems
 TARGETS_C_32BIT_NEEDED := ldt_gdt ptrace_syscall
 
--- /dev/null
+++ b/tools/testing/selftests/x86/corrupt_xstate_header.c
@@ -0,0 +1,114 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Corrupt the XSTATE header in a signal frame
+ *
+ * Based on analysis and a test case from Thomas Gleixner.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <sched.h>
+#include <signal.h>
+#include <err.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <sys/wait.h>
+
+static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
+			   unsigned int *ecx, unsigned int *edx)
+{
+	asm volatile(
+		"cpuid;"
+		: "=a" (*eax),
+		  "=b" (*ebx),
+		  "=c" (*ecx),
+		  "=d" (*edx)
+		: "0" (*eax), "2" (*ecx));
+}
+
+static inline int xsave_enabled(void)
+{
+	unsigned int eax, ebx, ecx, edx;
+
+	eax = 0x1;
+	ecx = 0x0;
+	__cpuid(&eax, &ebx, &ecx, &edx);
+
+	/* Is CR4.OSXSAVE enabled ? */
+	return ecx & (1U << 27);
+}
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+		       int flags)
+{
+	struct sigaction sa;
+
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_sigaction = handler;
+	sa.sa_flags = SA_SIGINFO | flags;
+	sigemptyset(&sa.sa_mask);
+	if (sigaction(sig, &sa, 0))
+		err(1, "sigaction");
+}
+
+static void sigusr1(int sig, siginfo_t *info, void *uc_void)
+{
+	ucontext_t *uc = uc_void;
+	uint8_t *fpstate = (uint8_t *)uc->uc_mcontext.fpregs;
+	uint64_t *xfeatures = (uint64_t *)(fpstate + 512);
+
+	printf("\tWreckage XSTATE header\n");
+	/* Wreckage the first reserved byte in the header */
+	*(xfeatures + 2) = 0xfffffff;
+}
+
+static void sigsegv(int sig, siginfo_t *info, void *uc_void)
+{
+	printf("\tGot SIGSEGV\n");
+}
+
+int main(void)
+{
+	cpu_set_t set;
+
+	sethandler(SIGUSR1, sigusr1, 0);
+	sethandler(SIGSEGV, sigsegv, 0);
+
+	if (!xsave_enabled()) {
+		printf("[SKIP] CR4.OSXSAVE disabled.\n");
+		return 0;
+	}
+
+	CPU_ZERO(&set);
+	CPU_SET(0, &set);
+
+	/*
+	 * Enforce that the child runs on the same CPU
+	 * which in turn forces a schedule.
+	 */
+	sched_setaffinity(getpid(), sizeof(set), &set);
+
+	printf("[RUN]\tSend ourselves a signal\n");
+	raise(SIGUSR1);
+
+	printf("[OK]\tBack from the signal.  Now schedule.\n");
+	pid_t child = fork();
+	if (child < 0)
+		err(1, "fork");
+	if (child == 0)
+		return 0;
+	if (child)
+		waitpid(child, NULL, 0);
+	printf("[OK]\tBack in the main thread.\n");
+
+	/*
+	 * We could try to confirm that extended state is still preserved
+	 * when we schedule.  For now, the only indication of failure is
+	 * a warning in the kernel logs.
+	 */
+
+	return 0;
+}

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [patch 4/8] x86/fpu: Limit xstate copy size in xstateregs_set()
  2021-06-02  9:55 ` [patch 4/8] x86/fpu: Limit xstate copy size in xstateregs_set() Thomas Gleixner
@ 2021-06-02 17:44   ` Borislav Petkov
  0 siblings, 0 replies; 33+ messages in thread
From: Borislav Petkov @ 2021-06-02 17:44 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: LKML, x86, Andy Lutomirski, Dave Hansen, Fenghua Yu, Tony Luck,
	Yu-cheng Yu, stable

On Wed, Jun 02, 2021 at 11:55:47AM +0200, Thomas Gleixner wrote:
> If the count argument is larger than the xstate size, this will happily
> copy beyond the end of xstate.
> 
> Fixes: 91c3dba7dbc1 ("x86/fpu/xstate: Fix PTRACE frames for XSAVES")
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> Cc: stable@vger.kernel.org
> ---
>  arch/x86/kernel/fpu/regset.c |    2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> --- a/arch/x86/kernel/fpu/regset.c
> +++ b/arch/x86/kernel/fpu/regset.c
> @@ -117,7 +117,7 @@ int xstateregs_set(struct task_struct *t
>  	/*
>  	 * A whole standard-format XSAVE buffer is needed:
>  	 */
> -	if ((pos != 0) || (count < fpu_user_xstate_size))
> +	if (pos != 0 || count != fpu_user_xstate_size)
>  		return -EFAULT;
>  
>  	xsave = &fpu->state.xsave;

Looking at this one, I guess it and all the CC:stable ones should be at
the beginning of the set so that they go to Linus now...

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [patch 0/8] x86/fpu: Mop up XSAVES and related damage
  2021-06-02  9:55 [patch 0/8] x86/fpu: Mop up XSAVES and related damage Thomas Gleixner
                   ` (7 preceding siblings ...)
  2021-06-02  9:55 ` [patch 8/8] x86/fpu: Deduplicate copy_xxx_to_xstate() Thomas Gleixner
@ 2021-06-02 21:28 ` Yu, Yu-cheng
  2021-06-04 14:05   ` Thomas Gleixner
  2021-06-04 22:04 ` Dave Hansen
  9 siblings, 1 reply; 33+ messages in thread
From: Yu, Yu-cheng @ 2021-06-02 21:28 UTC (permalink / raw)
  To: Thomas Gleixner, LKML
  Cc: x86, Andy Lutomirski, Dave Hansen, Fenghua Yu, Tony Luck

On 6/2/2021 2:55 AM, Thomas Gleixner wrote:
> syszbot reported a warnon for XRSTOR raising #GP:
> 
>   https://lore.kernel.org/r/0000000000004c453905c30f8334@google.com
> 
> with a syzcaller reproducer and a conclusive bisect result.
> 
> It took a while to destill a simple C reproducer out of it which allowed to
> pin point the root cause: The recent addition of supervisor XSTATEs broke
> the signal restore path for the case where the signal handler wreckaged the
> XSTATE on stack because it does not sanitize the XSTATE header which causes
> a subsequent XRSTOR to fail and #GP.
> 
> The following series addresses the problem and fixes related issues which
> were found while inspecting the related changes.
> 
> Thanks to Andy and Dave for working on this with me!
> 
> Thanks,
> 
> 	tglx
> ---
>   arch/x86/include/asm/fpu/xstate.h                     |    4
>   arch/x86/kernel/fpu/core.c                            |   62 ++++++---
>   arch/x86/kernel/fpu/regset.c                          |   43 ++----
>   arch/x86/kernel/fpu/signal.c                          |   30 +++-
>   arch/x86/kernel/fpu/xstate.c                          |   95 +++++----------
>   b/tools/testing/selftests/x86/corrupt_xstate_header.c |  114 ++++++++++++++++++
>   tools/testing/selftests/x86/Makefile                  |    3
>   7 files changed, 234 insertions(+), 117 deletions(-)
> 

With the series applied, glibc pkey test fails sometimes.  I will try to 
find out the cause.

The test:

https://sourceware.org/git/?p=glibc.git;a=blob;f=sysdeps/unix/sysv/linux/tst-pkey.c

The output:

error: ../sysdeps/unix/sysv/linux/tst-pkey.c:161: not true: 
!check_page_access (i, false)
error: ../sysdeps/unix/sysv/linux/tst-pkey.c:162: not true: 
!check_page_access (i, true)
error: ../sysdeps/unix/sysv/linux/tst-pkey.c:161: not true: 
!check_page_access (i, false)
error: ../sysdeps/unix/sysv/linux/tst-pkey.c:162: not true: 
!check_page_access (i, true)
error: ../sysdeps/unix/sysv/linux/tst-pkey.c:161: not true: 
!check_page_access (i, false)
error: ../sysdeps/unix/sysv/linux/tst-pkey.c:162: not true: 
!check_page_access (i, true)
../sysdeps/unix/sysv/linux/tst-pkey.c:238: numeric comparison failure
    left: 0 (0x0); from: result->access_rights[i]
   right: 1 (0x1); from: PKEY_DISABLE_ACCESS
../sysdeps/unix/sysv/linux/tst-pkey.c:238: numeric comparison failure
    left: 0 (0x0); from: result->access_rights[i]
   right: 1 (0x1); from: PKEY_DISABLE_ACCESS
../sysdeps/unix/sysv/linux/tst-pkey.c:238: numeric comparison failure
    left: 0 (0x0); from: result->access_rights[i]
   right: 1 (0x1); from: PKEY_DISABLE_ACCESS
../sysdeps/unix/sysv/linux/tst-pkey.c:242: numeric comparison failure
    left: 0 (0x0); from: result->access_rights[i]
   right: 1 (0x1); from: PKEY_DISABLE_ACCESS
../sysdeps/unix/sysv/linux/tst-pkey.c:242: numeric comparison failure
    left: 0 (0x0); from: result->access_rights[i]
   right: 1 (0x1); from: PKEY_DISABLE_ACCESS
../sysdeps/unix/sysv/linux/tst-pkey.c:242: numeric comparison failure
    left: 0 (0x0); from: result->access_rights[i]
   right: 1 (0x1); from: PKEY_DISABLE_ACCESS
error: 12 test failures

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [patch V2 5/8] x86/fpu: Sanitize xstateregs_set()
  2021-06-02 16:01   ` [patch V2 " Thomas Gleixner
@ 2021-06-03 11:32     ` Borislav Petkov
  0 siblings, 0 replies; 33+ messages in thread
From: Borislav Petkov @ 2021-06-03 11:32 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: LKML, x86, Andy Lutomirski, Dave Hansen, Fenghua Yu, Tony Luck,
	Yu-cheng Yu

On Wed, Jun 02, 2021 at 06:01:13PM +0200, Thomas Gleixner wrote:
> xstateregs_set() operates on a stopped task and tries to copy the provided
> buffer into the tasks fpu.state.xsave buffer.

task's

> 
> Any error while copying or invalid state detected after copying results in
> wiping the target tasks FPU state completely including supervisor states.

Ditto.

> 
> That's just wrong. The caller supplied invalid data or has a problem with
> unmapped memory, so there is absolutely no justification to wreckage the
> target.
> 
> Fix this with the following modifications:
> 
>  1) If data has to be copied from userspace, allocate a buffer and copy from
>     user first.
> 
>  2) Use copy_kernel_to_xstate() unconditionally so that header checking
>     works correctly.
> 
>  3) Return on error without wreckaging the target state.
> 
> This prevents corrupting supervisor states and lets the caller deal with
> the problem it caused in the first place.
> 
> Make validate_user_xstate_header() static as this was the last user
> outside of xstate.c

Niice.


> @@ -120,32 +124,21 @@ int xstateregs_set(struct task_struct *t
>  	if (pos != 0 || count != fpu_user_xstate_size)
>  		return -EFAULT;
>  
> -	xsave = &fpu->state.xsave;
> -
> -	fpu__prepare_write(fpu);
> -
> -	if (using_compacted_format()) {
> -		if (kbuf)
> -			ret = copy_kernel_to_xstate(xsave, kbuf);
> -		else
> -			ret = copy_user_to_xstate(xsave, ubuf);
> -	} else {
> -		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
> -		if (!ret)
> -			ret = validate_user_xstate_header(&xsave->header);
> +	if (!kbuf) {
> +		xbuf = vmalloc(count);
> +		if (!xbuf)
> +			return -ENOMEM;

<---- newline here.

Btw, AFAICT and if I'm staring properly at the bowels of ptrace, that
ENOMEM would percolate up the chain when ptrace(2) is called.

The ptrace(2) manpage doesn't mention ENOMEM and I guess this is a small
ABI change. I say "small" because it is still a negative error and
properly written luserspace code should check it anyway...

Or am I being overly cautious here?

 ( Frankly, can you blame me, with that madness called xstate?! )

> +		ret = user_regset_copyin(&pos, &count, NULL, &ubuf, xbuf, 0, -1);
> +		if (ret)
> +			goto out;
>  	}
>  
> -	/*
> -	 * mxcsr reserved bits must be masked to zero for security reasons.
> -	 */
> -	xsave->i387.mxcsr &= mxcsr_feature_mask;
> -
> -	/*
> -	 * In case of failure, mark all states as init:
> -	 */
> -	if (ret)
> -		fpstate_init(&fpu->state);
> +	xsave = &fpu->state.xsave;
> +	fpu__prepare_write(fpu);
> +	ret = copy_kernel_to_xstate(xsave, kbuf ? kbuf : xbuf);

Uff, three buffers in a single function. How about we call "xbuf"
"tmp_buf" so that it is even more visible what it is and it differs more
from the other <letter>buf thingies?

>  /*
> - * Convert from a ptrace or sigreturn standard-format user-space buffer to
> - * kernel XSAVES format and copy to the target thread. This is called from
> - * xstateregs_set(), as well as potentially from the sigreturn() and
> - * rt_sigreturn() system calls.
> + * Convert from a sigreturn standard-format user-space buffer to kernel
> + * XSAVES format and copy to the target thread. This is called from the

Yeah, I guess we call that the "compacted" format. Just getting our
terminology simplified because this is one helluva mess, as we all know.

> + * sigreturn() and rt_sigreturn() system calls.
>   */
>  int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf)
>  {

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [patch 1/8] selftests/x86: Test signal frame XSTATE header corruption handling
  2021-06-02 14:15     ` Thomas Gleixner
@ 2021-06-03 13:16       ` Shuah Khan
  0 siblings, 0 replies; 33+ messages in thread
From: Shuah Khan @ 2021-06-03 13:16 UTC (permalink / raw)
  To: Thomas Gleixner, Borislav Petkov
  Cc: LKML, x86, Andy Lutomirski, Dave Hansen, Fenghua Yu, Tony Luck,
	Yu-cheng Yu, Shuah Khan, Shuah Khan

On 6/2/21 8:15 AM, Thomas Gleixner wrote:
> On Wed, Jun 02 2021 at 14:38, Borislav Petkov wrote:
>> On Wed, Jun 02, 2021 at 11:55:44AM +0200, Thomas Gleixner wrote:
>>> From: Andy Lutomirski <luto@kernel.org>
>>>
>>> This is very heavily based on some code from Thomas Gleixner.  On a system
>>> without XSAVES, it triggers the WARN_ON():
>>>
>>>      Bad FPU state detected at copy_kernel_to_fpregs+0x2f/0x40, reinitializing FPU registers.
>>
>> That triggers
>>
>> [  149.497274] corrupt_xstate_[1627] bad frame in rt_sigreturn frame:00000000dad08ab1 ip:7f031449ffe1 sp:7ffd0c5c59f0 orax:ffffffffffffffff in libpthread-2.31.so[7f0314493000+10000]
>>
>> on an AMD laptop here.
> 
> Yes, that's the ratelimited printk in the signal code.
> 
>>> +static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
>>> +			   unsigned int *ecx, unsigned int *edx)
>>> +{
>>> +	asm volatile(
>>> +		"cpuid;"
>>> +		: "=a" (*eax),
>>> +		  "=b" (*ebx),
>>> +		  "=c" (*ecx),
>>> +		  "=d" (*edx)
>>> +		: "0" (*eax), "2" (*ecx));
>>> +}
>>> +
>>> +static inline int xsave_enabled(void)
>>> +{
>>> +	unsigned int eax, ebx, ecx, edx;
>>> +
>>> +	eax = 0x1;
>>> +	ecx = 0x0;
>>> +	__cpuid(&eax, &ebx, &ecx, &edx);
>>> +
>>> +	/* Is CR4.OSXSAVE enabled ? */
>>> +	return ecx & (1U << 27);
>>> +}
>>
>> One fine day someone should sit down and unify all those auxillary
>> functions used in the selftests into a lib...
> 
> Yes please. Shuah, that would be a great newcomer task...
> 

Yes. I will add to newcomer task list.

thanks,
-- Shuah

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [patch 8/8] x86/fpu: Deduplicate copy_xxx_to_xstate()
  2021-06-02  9:55 ` [patch 8/8] x86/fpu: Deduplicate copy_xxx_to_xstate() Thomas Gleixner
@ 2021-06-03 16:56   ` Andy Lutomirski
  0 siblings, 0 replies; 33+ messages in thread
From: Andy Lutomirski @ 2021-06-03 16:56 UTC (permalink / raw)
  To: Thomas Gleixner, LKML
  Cc: x86, Dave Hansen, Fenghua Yu, Tony Luck, Yu-cheng Yu

On 6/2/21 2:55 AM, Thomas Gleixner wrote:
> copy_user_to_xstate() and copy_kernel_to_xstate() are almost identical
> except for the copy function.
> 
> Unify them.

I keep looking at call sites of these functions and being vaguely
confused.  Maybe copy_to_userabi_xstate() would make the purpose of
these functions clearer?

Otherwise, and even if you don't like my suggestion:

Acked-by: Andy Lutomirski <luto@kernel.org>

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [patch 5/8] x86/fpu: Sanitize xstateregs_set()
  2021-06-02  9:55 ` [patch 5/8] x86/fpu: Sanitize xstateregs_set() Thomas Gleixner
  2021-06-02 16:01   ` [patch V2 " Thomas Gleixner
@ 2021-06-03 17:24   ` Andy Lutomirski
  1 sibling, 0 replies; 33+ messages in thread
From: Andy Lutomirski @ 2021-06-03 17:24 UTC (permalink / raw)
  To: Thomas Gleixner, LKML
  Cc: x86, Dave Hansen, Fenghua Yu, Tony Luck, Yu-cheng Yu

On 6/2/21 2:55 AM, Thomas Gleixner wrote:
> That's just wrong. The caller supplied invalid data or has a problem with
> unmapped memory, so there is absolutely no justification to wreckage the
> target.
> 

"Wreckage" is a delightful word, but it likes to be a noun.  One can
wreak the target, wreak havoc upon the target, annihilate the target, or
turn the target's state into system-destroying wreckage :)

--Andy

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [patch 3/8] x86/fpu: Invalidate FPU state after a failed XRSTOR from a user buffer
  2021-06-02 15:06   ` Borislav Petkov
@ 2021-06-03 17:30     ` Andy Lutomirski
  2021-06-03 19:28       ` Borislav Petkov
  0 siblings, 1 reply; 33+ messages in thread
From: Andy Lutomirski @ 2021-06-03 17:30 UTC (permalink / raw)
  To: Borislav Petkov, Thomas Gleixner
  Cc: LKML, x86, Dave Hansen, Fenghua Yu, Tony Luck, Yu-cheng Yu, stable

On 6/2/21 8:06 AM, Borislav Petkov wrote:
> On Wed, Jun 02, 2021 at 11:55:46AM +0200, Thomas Gleixner wrote:
>> From: Andy Lutomirski <luto@kernel.org>
>>
>> If XRSTOR fails due to sufficiently complicated paging errors (e.g.
>> concurrent TLB invalidation),
> 
> I can't connect "concurrent TLB invalidation" to "sufficiently
> complicated paging errors". Can you elaborate pls?

Think "complex microarchitectural conditions".

How about:

As far as I can tell, both Intel and AMD consider it to be
architecturally valid for XRSTOR to fail with #PF but nonetheless change
user state.  The actual conditions under which this might occur are
unclear [1], but it seems plausible that this might be triggered if one
sibling thread unmaps a page and invalidates the shared TLB while
another sibling thread is executing XRSTOR on the page in question.

__fpu__restore_sig() can execute XRSTOR while the hardware registers are
preserved on behalf of a different victim task (using the
fpu_fpregs_owner_ctx mechanism), and, in theory, XRSTOR could fail but
modify the registers.  If this happens, then there is a window in which
__fpu__restore_sig() could schedule out and the victim task could
schedule back in without reloading its own FPU registers.  This would
result in part of the FPU state that __fpu__restore_sig() was attempting
to load leaking into the victim task's user-visible state.

Invalidate preserved FPU registers on XRSTOR failure to prevent this
situation from corrupting any state.

[1] Frequent readers of the errata lists might imagine "complex
microarchitectural conditions".

>> +			 * failed.  In the event that the ucode was
>> +			 * unfriendly and modified the registers at all, we
>> +			 * need to make sure that we aren't corrupting an
>> +			 * innocent non-current task's registers.
>> +			 */
>> +			__cpu_invalidate_fpregs_state();
>> +		} else {
>> +			/*
>> +			 * As above, we may have just clobbered current's
>> +			 * user FPU state.  We will either successfully
>> +			 * load it or clear it below, so no action is
>> +			 * required here.
>> +			 */
>> +		}
> 
> I'm wondering if that comment can simply be above the TIF_NEED_FPU_LOAD
> testing, standalone, instead of having it in an empty else? And then get
> rid of that else.

I'm fine either way.

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [patch 3/8] x86/fpu: Invalidate FPU state after a failed XRSTOR from a user buffer
  2021-06-03 17:30     ` Andy Lutomirski
@ 2021-06-03 19:28       ` Borislav Petkov
  0 siblings, 0 replies; 33+ messages in thread
From: Borislav Petkov @ 2021-06-03 19:28 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Thomas Gleixner, LKML, x86, Dave Hansen, Fenghua Yu, Tony Luck,
	Yu-cheng Yu, stable

On Thu, Jun 03, 2021 at 10:30:05AM -0700, Andy Lutomirski wrote:
> Think "complex microarchitectural conditions".

Ah, the magic phrase.

> How about:
> 
> As far as I can tell, both Intel and AMD consider it to be
> architecturally valid for XRSTOR to fail with #PF but nonetheless change
> user state.  The actual conditions under which this might occur are
> unclear [1], but it seems plausible that this might be triggered if one
> sibling thread unmaps a page and invalidates the shared TLB while
> another sibling thread is executing XRSTOR on the page in question.
> 
> __fpu__restore_sig() can execute XRSTOR while the hardware registers are
> preserved on behalf of a different victim task (using the
> fpu_fpregs_owner_ctx mechanism), and, in theory, XRSTOR could fail but
> modify the registers.  If this happens, then there is a window in which
> __fpu__restore_sig() could schedule out and the victim task could
> schedule back in without reloading its own FPU registers.  This would
> result in part of the FPU state that __fpu__restore_sig() was attempting
> to load leaking into the victim task's user-visible state.
> 
> Invalidate preserved FPU registers on XRSTOR failure to prevent this
> situation from corrupting any state.
> 
> [1] Frequent readers of the errata lists might imagine "complex
> microarchitectural conditions".

Yap, very nice, thanks!

> > I'm wondering if that comment can simply be above the TIF_NEED_FPU_LOAD
> > testing, standalone, instead of having it in an empty else? And then get
> > rid of that else.
> 
> I'm fine either way.

Ok, then let's aim for common, no-surprise-there patterns as we're in a
mine field here anyway.

Thx.

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [patch 0/8] x86/fpu: Mop up XSAVES and related damage
  2021-06-02 21:28 ` [patch 0/8] x86/fpu: Mop up XSAVES and related damage Yu, Yu-cheng
@ 2021-06-04 14:05   ` Thomas Gleixner
  2021-06-04 16:27     ` Yu, Yu-cheng
  2021-06-04 17:46     ` Dave Hansen
  0 siblings, 2 replies; 33+ messages in thread
From: Thomas Gleixner @ 2021-06-04 14:05 UTC (permalink / raw)
  To: Yu, Yu-cheng, LKML
  Cc: x86, Andy Lutomirski, Dave Hansen, Fenghua Yu, Tony Luck

On Wed, Jun 02 2021 at 14:28, Yu-cheng Yu wrote:
> On 6/2/2021 2:55 AM, Thomas Gleixner wrote:
>
> With the series applied, glibc pkey test fails sometimes.  I will try to 
> find out the cause.

That fails not sometimes. It fails always due to patch 7/8. The reason
is that before that patch fpu__clear_all() reinitialized the hardware
which includes writing the initial PKRU value.

But fpu__initialize() does not store the initial PKRU value in the
task's xstate which breaks PKRU. As that patch is not urgent we can
postpone it for not.

But looking at the above, it's not clear to me why that PKRU stuff works
at all (upstream), but I'll figure it out eventually. I'm quite sure
that it does work by pure chance not by design.

Thanks,

        tglx

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [patch 0/8] x86/fpu: Mop up XSAVES and related damage
  2021-06-04 14:05   ` Thomas Gleixner
@ 2021-06-04 16:27     ` Yu, Yu-cheng
  2021-06-04 17:46     ` Dave Hansen
  1 sibling, 0 replies; 33+ messages in thread
From: Yu, Yu-cheng @ 2021-06-04 16:27 UTC (permalink / raw)
  To: Thomas Gleixner, LKML
  Cc: x86, Andy Lutomirski, Dave Hansen, Fenghua Yu, Tony Luck

On 6/4/2021 7:05 AM, Thomas Gleixner wrote:
> On Wed, Jun 02 2021 at 14:28, Yu-cheng Yu wrote:
>> On 6/2/2021 2:55 AM, Thomas Gleixner wrote:
>>
>> With the series applied, glibc pkey test fails sometimes.  I will try to
>> find out the cause.
> 
> That fails not sometimes. It fails always due to patch 7/8. The reason
> is that before that patch fpu__clear_all() reinitialized the hardware
> which includes writing the initial PKRU value.
> 
> But fpu__initialize() does not store the initial PKRU value in the
> task's xstate which breaks PKRU. As that patch is not urgent we can
> postpone it for not.
> 
> But looking at the above, it's not clear to me why that PKRU stuff works
> at all (upstream), but I'll figure it out eventually. I'm quite sure
> that it does work by pure chance not by design.
>

Thanks for the update.  I will continue looking at it.

Yu-cheng

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [patch 0/8] x86/fpu: Mop up XSAVES and related damage
  2021-06-04 14:05   ` Thomas Gleixner
  2021-06-04 16:27     ` Yu, Yu-cheng
@ 2021-06-04 17:46     ` Dave Hansen
  2021-06-04 18:14       ` Thomas Gleixner
  1 sibling, 1 reply; 33+ messages in thread
From: Dave Hansen @ 2021-06-04 17:46 UTC (permalink / raw)
  To: Thomas Gleixner, Yu, Yu-cheng, LKML
  Cc: x86, Andy Lutomirski, Dave Hansen, Fenghua Yu, Tony Luck

On 6/4/21 7:05 AM, Thomas Gleixner wrote:
> But looking at the above, it's not clear to me why that PKRU stuff works
> at all (upstream), but I'll figure it out eventually. I'm quite sure
> that it does work by pure chance not by design.

The upstream flush_thread() code appears correct and even intentionally
so.  Just how we must eagerly load PKRU on a context switch, the
fpu__clear*() code eagerly "clears" PKRU.  It doesn't actually zero it,
of course, but reverts the register and the fpstate back to the
'init_pkru_value':

flush_thread()->fpu__clear_all()->fpu__clear(user_only=false)
	copy_init_fpstate_to_fpregs()
		copy_kernel_to_xregs(init_fpu) // fpstate
		copy_init_pkru_to_fpregs()
			 write_pkru(init_pkru_value_snapshot) // fpregs

Andy said you have a fix for this, but I think the new fpu__clear_all()
is failing to do the eager write_pkru().

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [patch 0/8] x86/fpu: Mop up XSAVES and related damage
  2021-06-04 17:46     ` Dave Hansen
@ 2021-06-04 18:14       ` Thomas Gleixner
  0 siblings, 0 replies; 33+ messages in thread
From: Thomas Gleixner @ 2021-06-04 18:14 UTC (permalink / raw)
  To: Dave Hansen, Yu, Yu-cheng, LKML
  Cc: x86, Andy Lutomirski, Dave Hansen, Fenghua Yu, Tony Luck

On Fri, Jun 04 2021 at 10:46, Dave Hansen wrote:
> On 6/4/21 7:05 AM, Thomas Gleixner wrote:
>> But looking at the above, it's not clear to me why that PKRU stuff works
>> at all (upstream), but I'll figure it out eventually. I'm quite sure
>> that it does work by pure chance not by design.
>
> The upstream flush_thread() code appears correct and even intentionally
> so.  Just how we must eagerly load PKRU on a context switch, the
> fpu__clear*() code eagerly "clears" PKRU.  It doesn't actually zero it,
> of course, but reverts the register and the fpstate back to the
> 'init_pkru_value':
>
> flush_thread()->fpu__clear_all()->fpu__clear(user_only=false)
> 	copy_init_fpstate_to_fpregs()
> 		copy_kernel_to_xregs(init_fpu) // fpstate
> 		copy_init_pkru_to_fpregs()
> 			 write_pkru(init_pkru_value_snapshot) // fpregs
>
> Andy said you have a fix for this, but I think the new fpu__clear_all()
> is failing to do the eager write_pkru().

Yes, that's the reason and it took some time until I realized that
fpu__initialize() is inconsistent vs. PKRU.

We can't use copy_init_pkru_to_fregs() either because that's not
updating the xsaves area because XFEATURE_PKRU has been cleared.
Yay for consistency!

I'll post a fix soonish after testing it.

Thanks,

        tglx

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [patch 0/8] x86/fpu: Mop up XSAVES and related damage
  2021-06-02  9:55 [patch 0/8] x86/fpu: Mop up XSAVES and related damage Thomas Gleixner
                   ` (8 preceding siblings ...)
  2021-06-02 21:28 ` [patch 0/8] x86/fpu: Mop up XSAVES and related damage Yu, Yu-cheng
@ 2021-06-04 22:04 ` Dave Hansen
  2021-06-05 10:18   ` Thomas Gleixner
  9 siblings, 1 reply; 33+ messages in thread
From: Dave Hansen @ 2021-06-04 22:04 UTC (permalink / raw)
  To: Thomas Gleixner, LKML
  Cc: x86, Andy Lutomirski, Dave Hansen, Fenghua Yu, Tony Luck, Yu-cheng Yu

The nice Intel 0day folks threw some tests at this series.  It managed
to trigger an oops.  I can't right this moment publish the source for
the test, but it looks pretty trivial.  It basically creates a 0'd XSAVE
buffer, sets XCOMP_BV to:

#define XSAVES_FEATURES ( \
        XFEATURE_MASK_PT | \
        XFEATURE_MASK_SHSTK_USER | \
        XFEATURE_MASK_SHSTK_KERNEL | \
        0x8000000000000000 \
        )

Then passes that buffer in to ptrace(PTRACE_SETREGSET, ...).

The oops is below.  It doesn't *look* to be XSAVES-related.  The oops
looks like it's happening in xstateregs_set() itself as opposed to down
in the code actually concerned with supervisor state.

No bug is jumping out of the code as I took a brief look at it.  The
xbuf versus kbuf code looks a bit wonky, but I can't find a hole in it.

I'll see if I can reproduce this locally.

> [   38.612897] BAT XSAVE
> [   38.612902]
> [   38.619525] os
> [   38.619528]
> [   38.625885] enumeration
> [   38.625890]
> [   38.633098] schedule check
> [   38.633105]
> [   38.640308] fork check
> [   38.640321]
> [   38.647154] signal check
> [   38.647164]
> [   38.654065] thread check
> [   38.654072]
> [   38.661036] multi thread check
> [   38.661042]
> [   75.771095] BUG: kernel NULL pointer dereference, address: 0000000000000000
> [   75.779596] #PF: supervisor read access in kernel mode
> [   75.785980] #PF: error_code(0x0000) - not-present page
> [   75.792321] PGD 0 P4D 0
> [   75.795742] Oops: 0000 [#1] SMP PTI
> [   75.800233] CPU: 94 PID: 2145 Comm: ptrace_sys_stat Not tainted 5.13.0-rc4-00096-g2c38e60b245f #1
> [   75.810753] Hardware name: Intel Corporation BRICKLAND/BRICKLAND, BIOS BRHSXSD1.86B.0067.R02.1507221722 07/22/2015
> [   75.822934] RIP: 0010:xstateregs_set+0x9f/0x140
> [   75.828591] Code: e0 5d 41 5c 41 5d 41 5e 41 5f c3 41 89 cf 4d 89 ce 4c 89 ff e8 32 87 2e 00 48 89 c5 48 85 c0 0f 84 8a 00 00 00 45 85 e4 74 20 <48> 8b 34 25 00 00 00 00 48 85 f6 74 2f 4c 89 fa 48 89 c7 e8 89 78
> [   75.850813] RSP: 0018:ffffc90023a47e38 EFLAGS: 00010202
> [   75.857241] RAX: ffffc9000c9dd000 RBX: ffff88c107824f80 RCX: 8000000000000063
> [   75.865827] RDX: 0000000000000001 RSI: ffff888100000000 RDI: ffff888100203328
> [   75.874376] RBP: ffffc9000c9dd000 R08: ffff88810cdffef0 R09: 8000000000000063
> [   75.882928] R10: 0000000000000ee8 R11: ffff88a08159c3f0 R12: 0000000000000340
> [   75.891427] R13: ffff88c1078273c0 R14: 000055576f8960c0 R15: 0000000000000340
> [   75.899961] FS:  00007f50ad027500(0000) GS:ffff88bfff980000(0000) knlGS:0000000000000000
> [   75.909533] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [   75.916458] CR2: 0000000000000000 CR3: 000000208df04004 CR4: 00000000001706e0
> [   75.924975] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> [   75.933434] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
> [   75.941911] Call Trace:
> [   75.945118]  ptrace_request+0x4cb/0x600
> [   75.949893]  arch_ptrace+0x125/0x300
> [   75.954350]  ? ptrace_check_attach+0x69/0x140
> [   75.959690]  __x64_sys_ptrace+0x80/0x140
> [   75.964532]  do_syscall_64+0x40/0x80
> [   75.969004]  entry_SYSCALL_64_after_hwframe+0x44/0xae
> [   75.975120] RIP: 0033:0x7f50acf5692f
> [   75.979581] Code: c7 44 24 10 18 00 00 00 48 89 44 24 18 48 8d 44 24 30 8b 70 08 48 8b 50 10 4c 0f 43 50 18 48 89 44 24 20 b8 65 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 41 48 85 c0 78 06 41 83 f8 02 76 1e 48 8b 4c
> [   76.001535] RSP: 002b:00007fff18a9f6f0 EFLAGS: 00000202 ORIG_RAX: 0000000000000065
> [   76.010465] RAX: ffffffffffffffda RBX: 0000000000000862 RCX: 00007f50acf5692f
> [   76.018960] RDX: 0000000000000202 RSI: 0000000000000862 RDI: 0000000000004205
> [   76.027433] RBP: 000055576f8960c0 R08: 0000000000004204 R09: 00007f50ad027500
> [   76.035947] R10: 00007fff18a9f770 R11: 0000000000000202 R12: 0000000000000340
> [   76.044428] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
> [   76.052957] Modules linked in: xfs libcrc32c sr_mod sd_mod cdrom t10_pi sg intel_rapl_msr intel_rapl_common sb_edac x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel kvm mgag200 irqbypass crct10dif_pclmul drm_kms_helper crc32_pclmul ipmi_ssif crc32c_intel ghash_clmulni_intel rapl syscopyarea sysfillrect intel_cstate ahci sysimgblt fb_sys_fops libahci mpt3sas acpi_ipmi intel_uncore raid_class drm libata ipmi_si joydev scsi_transport_sas wmi ipmi_devintf ipmi_msghandler acpi_pad ip_tables
> [   76.104215] CR2: 0000000000000000
> [   76.108641] ---[ end trace dd34396ee89519a4 ]---
> [   76.114448] RIP: 0010:xstateregs_set+0x9f/0x140
> [   76.120185] Code: e0 5d 41 5c 41 5d 41 5e 41 5f c3 41 89 cf 4d 89 ce 4c 89 ff e8 32 87 2e 00 48 89 c5 48 85 c0 0f 84 8a 00 00 00 45 85 e4 74 20 <48> 8b 34 25 00 00 00 00 48 85 f6 74 2f 4c 89 fa 48 89 c7 e8 89 78
> [   76.142571] RSP: 0018:ffffc90023a47e38 EFLAGS: 00010202
> [   76.149152] RAX: ffffc9000c9dd000 RBX: ffff88c107824f80 RCX: 8000000000000063
> [   76.157855] RDX: 0000000000000001 RSI: ffff888100000000 RDI: ffff888100203328
> [   76.166562] RBP: ffffc9000c9dd000 R08: ffff88810cdffef0 R09: 8000000000000063
> [   76.175265] R10: 0000000000000ee8 R11: ffff88a08159c3f0 R12: 0000000000000340
> [   76.183967] R13: ffff88c1078273c0 R14: 000055576f8960c0 R15: 0000000000000340
> [   76.192662] FS:  00007f50ad027500(0000) GS:ffff88bfff980000(0000) knlGS:0000000000000000
> [   76.202425] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [   76.209590] CR2: 0000000000000000 CR3: 000000208df04004 CR4: 00000000001706e0
> [   76.218266] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> [   76.226957] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
> [   76.235597] Kernel panic - not syncing: Fatal exception
> [   76.284889] Kernel Offset: disabled
> ACPI MEMORY or I/O RESET_REG.

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [patch 0/8] x86/fpu: Mop up XSAVES and related damage
  2021-06-04 22:04 ` Dave Hansen
@ 2021-06-05 10:18   ` Thomas Gleixner
  2021-06-05 11:56     ` Thomas Gleixner
  0 siblings, 1 reply; 33+ messages in thread
From: Thomas Gleixner @ 2021-06-05 10:18 UTC (permalink / raw)
  To: Dave Hansen, LKML
  Cc: x86, Andy Lutomirski, Dave Hansen, Fenghua Yu, Tony Luck, Yu-cheng Yu

On Fri, Jun 04 2021 at 15:04, Dave Hansen wrote:
> The nice Intel 0day folks threw some tests at this series.  It managed
> to trigger an oops.  I can't right this moment publish the source for
> the test, but it looks pretty trivial.  It basically creates a 0'd XSAVE
> buffer, sets XCOMP_BV to:
>
> #define XSAVES_FEATURES ( \
>         XFEATURE_MASK_PT | \
>         XFEATURE_MASK_SHSTK_USER | \
>         XFEATURE_MASK_SHSTK_KERNEL | \
>         0x8000000000000000 \
>         )
>
> Then passes that buffer in to ptrace(PTRACE_SETREGSET, ...).
>
> The oops is below.  It doesn't *look* to be XSAVES-related.  The oops
> looks like it's happening in xstateregs_set() itself as opposed to down
> in the code actually concerned with supervisor state.
>
> No bug is jumping out of the code as I took a brief look at it.  The
> xbuf versus kbuf code looks a bit wonky, but I can't find a hole in it.

I can....

--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -128,7 +128,7 @@ int xstateregs_set(struct task_struct *t
 		xbuf = vmalloc(count);
 		if (!xbuf)
 			return -ENOMEM;
-		ret = user_regset_copyin(&pos, &count, NULL, &ubuf, xbuf, 0, -1);
+		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xbuf, 0, -1);
 		if (ret)
 			goto out;
 	}

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [patch 0/8] x86/fpu: Mop up XSAVES and related damage
  2021-06-05 10:18   ` Thomas Gleixner
@ 2021-06-05 11:56     ` Thomas Gleixner
  0 siblings, 0 replies; 33+ messages in thread
From: Thomas Gleixner @ 2021-06-05 11:56 UTC (permalink / raw)
  To: Dave Hansen, LKML
  Cc: x86, Andy Lutomirski, Dave Hansen, Fenghua Yu, Tony Luck, Yu-cheng Yu

On Sat, Jun 05 2021 at 12:18, Thomas Gleixner wrote:
> On Fri, Jun 04 2021 at 15:04, Dave Hansen wrote:
>> No bug is jumping out of the code as I took a brief look at it.  The
>> xbuf versus kbuf code looks a bit wonky, but I can't find a hole in it.
>
> I can....
>
> --- a/arch/x86/kernel/fpu/regset.c
> +++ b/arch/x86/kernel/fpu/regset.c
> @@ -128,7 +128,7 @@ int xstateregs_set(struct task_struct *t
>  		xbuf = vmalloc(count);
>  		if (!xbuf)
>  			return -ENOMEM;
> -		ret = user_regset_copyin(&pos, &count, NULL, &ubuf, xbuf, 0, -1);
> +		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xbuf, 0, -1);
>  		if (ret)
>  			goto out;
>  	}

But this whole user_regset_copyin() is pointless here. See below.

Thanks,

        tglx
---
 include/asm/fpu/xstate.h |    4 ----
 kernel/fpu/regset.c      |   40 ++++++++++++++++------------------------
 kernel/fpu/xstate.c      |   12 +++++++-----
 3 files changed, 23 insertions(+), 33 deletions(-)

--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -112,8 +112,4 @@ void copy_supervisor_to_kernel(struct xr
 void copy_dynamic_supervisor_to_kernel(struct xregs_state *xstate, u64 mask);
 void copy_kernel_to_dynamic_supervisor(struct xregs_state *xstate, u64 mask);
 
-
-/* Validate an xstate header supplied by userspace (ptrace or sigreturn) */
-int validate_user_xstate_header(const struct xstate_header *hdr);
-
 #endif
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -6,6 +6,9 @@
 #include <asm/fpu/signal.h>
 #include <asm/fpu/regset.h>
 #include <asm/fpu/xstate.h>
+
+#include <linux/vmalloc.h>
+
 #include <linux/sched/task_stack.h>
 
 /*
@@ -108,7 +111,7 @@ int xstateregs_set(struct task_struct *t
 		  const void *kbuf, const void __user *ubuf)
 {
 	struct fpu *fpu = &target->thread.fpu;
-	struct xregs_state *xsave;
+	struct xregs_state *xbuf = NULL;
 	int ret;
 
 	if (!boot_cpu_has(X86_FEATURE_XSAVE))
@@ -120,32 +123,21 @@ int xstateregs_set(struct task_struct *t
 	if (pos != 0 || count != fpu_user_xstate_size)
 		return -EFAULT;
 
-	xsave = &fpu->state.xsave;
-
-	fpu__prepare_write(fpu);
-
-	if (using_compacted_format()) {
-		if (kbuf)
-			ret = copy_kernel_to_xstate(xsave, kbuf);
-		else
-			ret = copy_user_to_xstate(xsave, ubuf);
-	} else {
-		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
-		if (!ret)
-			ret = validate_user_xstate_header(&xsave->header);
+	if (!kbuf) {
+		xbuf = vmalloc(count);
+		if (!xbuf)
+			return -ENOMEM;
+		if (copy_from_user(xbuf, ubuf, count)) {
+			ret = -EFAULT;
+			goto out;
+		}
 	}
 
-	/*
-	 * mxcsr reserved bits must be masked to zero for security reasons.
-	 */
-	xsave->i387.mxcsr &= mxcsr_feature_mask;
-
-	/*
-	 * In case of failure, mark all states as init:
-	 */
-	if (ret)
-		fpstate_init(&fpu->state);
+	fpu__prepare_write(fpu);
+	ret = copy_kernel_to_xstate(&fpu->state.xsave, kbuf ? kbuf : xbuf);
 
+out:
+	vfree(xbuf);
 	return ret;
 }
 
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -515,7 +515,7 @@ int using_compacted_format(void)
 }
 
 /* Validate an xstate header supplied by userspace (ptrace or sigreturn) */
-int validate_user_xstate_header(const struct xstate_header *hdr)
+static int validate_user_xstate_header(const struct xstate_header *hdr)
 {
 	/* No unknown or supervisor features may be set */
 	if (hdr->xfeatures & ~xfeatures_mask_user())
@@ -1172,14 +1172,16 @@ int copy_kernel_to_xstate(struct xregs_s
 	 */
 	xsave->header.xfeatures |= hdr.xfeatures;
 
+	/* mxcsr reserved bits must be masked to zero for security reasons. */
+	xsave->i387.mxcsr &= mxcsr_feature_mask;
+
 	return 0;
 }
 
 /*
- * Convert from a ptrace or sigreturn standard-format user-space buffer to
- * kernel XSAVES format and copy to the target thread. This is called from
- * xstateregs_set(), as well as potentially from the sigreturn() and
- * rt_sigreturn() system calls.
+ * Convert from a sigreturn standard-format user-space buffer to kernel
+ * XSAVES format and copy to the target thread. This is called from the
+ * sigreturn() and rt_sigreturn() system calls.
  */
 int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf)
 {

^ permalink raw reply	[flat|nested] 33+ messages in thread

end of thread, other threads:[~2021-06-05 11:56 UTC | newest]

Thread overview: 33+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-06-02  9:55 [patch 0/8] x86/fpu: Mop up XSAVES and related damage Thomas Gleixner
2021-06-02  9:55 ` [patch 1/8] selftests/x86: Test signal frame XSTATE header corruption handling Thomas Gleixner
2021-06-02 12:38   ` Borislav Petkov
2021-06-02 14:15     ` Thomas Gleixner
2021-06-03 13:16       ` Shuah Khan
2021-06-02 15:59   ` [patch V2 " Thomas Gleixner
2021-06-02 16:02     ` [patch V2a " Thomas Gleixner
2021-06-02  9:55 ` [patch 2/8] x86/fpu: Prevent state corruption in __fpu__restore_sig() Thomas Gleixner
2021-06-02 13:12   ` Borislav Petkov
2021-06-02 14:46     ` Thomas Gleixner
2021-06-02 15:58   ` [patch V2 " Thomas Gleixner
2021-06-02  9:55 ` [patch 3/8] x86/fpu: Invalidate FPU state after a failed XRSTOR from a user buffer Thomas Gleixner
2021-06-02 15:06   ` Borislav Petkov
2021-06-03 17:30     ` Andy Lutomirski
2021-06-03 19:28       ` Borislav Petkov
2021-06-02  9:55 ` [patch 4/8] x86/fpu: Limit xstate copy size in xstateregs_set() Thomas Gleixner
2021-06-02 17:44   ` Borislav Petkov
2021-06-02  9:55 ` [patch 5/8] x86/fpu: Sanitize xstateregs_set() Thomas Gleixner
2021-06-02 16:01   ` [patch V2 " Thomas Gleixner
2021-06-03 11:32     ` Borislav Petkov
2021-06-03 17:24   ` [patch " Andy Lutomirski
2021-06-02  9:55 ` [patch 6/8] x86/fpu: Add address range checks to copy_user_to_xstate() Thomas Gleixner
2021-06-02  9:55 ` [patch 7/8] x86/fpu: Clean up the fpu__clear() variants Thomas Gleixner
2021-06-02  9:55 ` [patch 8/8] x86/fpu: Deduplicate copy_xxx_to_xstate() Thomas Gleixner
2021-06-03 16:56   ` Andy Lutomirski
2021-06-02 21:28 ` [patch 0/8] x86/fpu: Mop up XSAVES and related damage Yu, Yu-cheng
2021-06-04 14:05   ` Thomas Gleixner
2021-06-04 16:27     ` Yu, Yu-cheng
2021-06-04 17:46     ` Dave Hansen
2021-06-04 18:14       ` Thomas Gleixner
2021-06-04 22:04 ` Dave Hansen
2021-06-05 10:18   ` Thomas Gleixner
2021-06-05 11:56     ` Thomas Gleixner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).