linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v5] x86/fsgsbase/64: Fix the base write helper functions
@ 2018-11-16 23:27 Chang S. Bae
  2018-11-19  1:55 ` Andy Lutomirski
  2018-11-21 21:28 ` [tip:x86/urgent] " tip-bot for Chang S. Bae
  0 siblings, 2 replies; 6+ messages in thread
From: Chang S. Bae @ 2018-11-16 23:27 UTC (permalink / raw)
  To: Andy Lutomirski, Ingo Molnar
  Cc: Thomas Gleixner, H . Peter Anvin, Andi Kleen, Dave Hansen,
	Ravi Shankar, Chang S . Bae, LKML

The helper functions that purport to write the base should just write it
only. It shouldn't have magic optimizations to change the index.

Make the index explicitly changed from the caller, instead of including
the code in the helpers.

Subsequently, the task write helpers do not handle for the current task
anymore. The range check for a base value is also factored out, to
minimize code redundancy from the caller.

v2: Fix further on the task write functions. Revert the changes on the
task read helpers.

v3: Fix putreg(). Edit the changelog.

v4: Update the task write helper functions and do_arch_prctl_64(). Fix
the comment in putreg().

v5: Fix preempt_disable() calls in do_arch_prctl_64()

Suggested-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Chang S. Bae <chang.seok.bae@intel.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
---
 arch/x86/include/asm/fsgsbase.h | 15 ++++--
 arch/x86/kernel/process_64.c    | 86 ++++++++++++++++-----------------
 arch/x86/kernel/ptrace.c        |  9 ++--
 3 files changed, 58 insertions(+), 52 deletions(-)

diff --git a/arch/x86/include/asm/fsgsbase.h b/arch/x86/include/asm/fsgsbase.h
index eb377b6e9eed..bca4c743de77 100644
--- a/arch/x86/include/asm/fsgsbase.h
+++ b/arch/x86/include/asm/fsgsbase.h
@@ -16,8 +16,8 @@
  */
 extern unsigned long x86_fsbase_read_task(struct task_struct *task);
 extern unsigned long x86_gsbase_read_task(struct task_struct *task);
-extern int x86_fsbase_write_task(struct task_struct *task, unsigned long fsbase);
-extern int x86_gsbase_write_task(struct task_struct *task, unsigned long gsbase);
+extern void x86_fsbase_write_task(struct task_struct *task, unsigned long fsbase);
+extern void x86_gsbase_write_task(struct task_struct *task, unsigned long gsbase);
 
 /* Helper functions for reading/writing FS/GS base */
 
@@ -39,8 +39,15 @@ static inline unsigned long x86_gsbase_read_cpu_inactive(void)
 	return gsbase;
 }
 
-extern void x86_fsbase_write_cpu(unsigned long fsbase);
-extern void x86_gsbase_write_cpu_inactive(unsigned long gsbase);
+static inline void x86_fsbase_write_cpu(unsigned long fsbase)
+{
+	wrmsrl(MSR_FS_BASE, fsbase);
+}
+
+static inline void x86_gsbase_write_cpu_inactive(unsigned long gsbase)
+{
+	wrmsrl(MSR_KERNEL_GS_BASE, gsbase);
+}
 
 #endif /* CONFIG_X86_64 */
 
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 31b4755369f0..e2c3dbe68d69 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -337,24 +337,6 @@ static unsigned long x86_fsgsbase_read_task(struct task_struct *task,
 	return base;
 }
 
-void x86_fsbase_write_cpu(unsigned long fsbase)
-{
-	/*
-	 * Set the selector to 0 as a notion, that the segment base is
-	 * overwritten, which will be checked for skipping the segment load
-	 * during context switch.
-	 */
-	loadseg(FS, 0);
-	wrmsrl(MSR_FS_BASE, fsbase);
-}
-
-void x86_gsbase_write_cpu_inactive(unsigned long gsbase)
-{
-	/* Set the selector to 0 for the same reason as %fs above. */
-	loadseg(GS, 0);
-	wrmsrl(MSR_KERNEL_GS_BASE, gsbase);
-}
-
 unsigned long x86_fsbase_read_task(struct task_struct *task)
 {
 	unsigned long fsbase;
@@ -383,38 +365,18 @@ unsigned long x86_gsbase_read_task(struct task_struct *task)
 	return gsbase;
 }
 
-int x86_fsbase_write_task(struct task_struct *task, unsigned long fsbase)
+void x86_fsbase_write_task(struct task_struct *task, unsigned long fsbase)
 {
-	/*
-	 * Not strictly needed for %fs, but do it for symmetry
-	 * with %gs
-	 */
-	if (unlikely(fsbase >= TASK_SIZE_MAX))
-		return -EPERM;
+	WARN_ON_ONCE(task == current);
 
-	preempt_disable();
 	task->thread.fsbase = fsbase;
-	if (task == current)
-		x86_fsbase_write_cpu(fsbase);
-	task->thread.fsindex = 0;
-	preempt_enable();
-
-	return 0;
 }
 
-int x86_gsbase_write_task(struct task_struct *task, unsigned long gsbase)
+void x86_gsbase_write_task(struct task_struct *task, unsigned long gsbase)
 {
-	if (unlikely(gsbase >= TASK_SIZE_MAX))
-		return -EPERM;
+	WARN_ON_ONCE(task == current);
 
-	preempt_disable();
 	task->thread.gsbase = gsbase;
-	if (task == current)
-		x86_gsbase_write_cpu_inactive(gsbase);
-	task->thread.gsindex = 0;
-	preempt_enable();
-
-	return 0;
 }
 
 int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
@@ -758,11 +720,47 @@ long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2)
 
 	switch (option) {
 	case ARCH_SET_GS: {
-		ret = x86_gsbase_write_task(task, arg2);
+		if (unlikely(arg2 >= TASK_SIZE_MAX))
+			return -EPERM;
+
+		preempt_disable();
+		/*
+		 * ARCH_SET_GS has always overwritten the index
+		 * and the base. Zero is the most sensible value
+		 * to put in the index, and is the only value that
+		 * makes any sense if FSGSBASE is unavailable.
+		 */
+		if (task == current) {
+			loadseg(GS, 0);
+			x86_gsbase_write_cpu_inactive(arg2);
+		} else {
+			task->thread.gsindex = 0;
+			x86_gsbase_write_task(task, arg2);
+		}
+		preempt_enable();
 		break;
 	}
 	case ARCH_SET_FS: {
-		ret = x86_fsbase_write_task(task, arg2);
+		/*
+		 * Not strictly needed for %fs, but do it for symmetry
+		 * with %gs
+		 */
+		if (unlikely(arg2 >= TASK_SIZE_MAX))
+			return -EPERM;
+
+		preempt_disable();
+		/*
+		 * Set the selector to 0 for the same reason
+		 * as %gs above.
+		 */
+		if (task == current) {
+			loadseg(FS, 0);
+			x86_fsbase_write_cpu(arg2);
+		} else {
+			task->thread.fsindex = 0;
+			x86_fsbase_write_task(task, arg2);
+		}
+		preempt_enable();
 		break;
 	}
 	case ARCH_GET_FS: {
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index ffae9b9740fd..4b8ee05dd6ad 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -397,11 +397,12 @@ static int putreg(struct task_struct *child,
 		if (value >= TASK_SIZE_MAX)
 			return -EIO;
 		/*
-		 * When changing the FS base, use the same
-		 * mechanism as for do_arch_prctl_64().
+		 * When changing the FS base, use do_arch_prctl_64()
+		 * to set the index to zero and to set the base
+		 * as requested.
 		 */
 		if (child->thread.fsbase != value)
-			return x86_fsbase_write_task(child, value);
+			return do_arch_prctl_64(child, ARCH_SET_FS, value);
 		return 0;
 	case offsetof(struct user_regs_struct,gs_base):
 		/*
@@ -410,7 +411,7 @@ static int putreg(struct task_struct *child,
 		if (value >= TASK_SIZE_MAX)
 			return -EIO;
 		if (child->thread.gsbase != value)
-			return x86_gsbase_write_task(child, value);
+			return do_arch_prctl_64(child, ARCH_SET_GS, value);
 		return 0;
 #endif
 	}
-- 
2.19.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH v5] x86/fsgsbase/64: Fix the base write helper functions
  2018-11-16 23:27 [PATCH v5] x86/fsgsbase/64: Fix the base write helper functions Chang S. Bae
@ 2018-11-19  1:55 ` Andy Lutomirski
  2018-11-22 20:56   ` Ingo Molnar
  2018-11-21 21:28 ` [tip:x86/urgent] " tip-bot for Chang S. Bae
  1 sibling, 1 reply; 6+ messages in thread
From: Andy Lutomirski @ 2018-11-19  1:55 UTC (permalink / raw)
  To: Bae, Chang Seok
  Cc: Andrew Lutomirski, Ingo Molnar, Thomas Gleixner, H. Peter Anvin,
	Andi Kleen, Dave Hansen, Ravi V. Shankar, LKML

On Fri, Nov 16, 2018 at 3:27 PM Chang S. Bae <chang.seok.bae@intel.com> wrote:
>
> The helper functions that purport to write the base should just write it
> only. It shouldn't have magic optimizations to change the index.
>
> Make the index explicitly changed from the caller, instead of including
> the code in the helpers.
>
> Subsequently, the task write helpers do not handle for the current task
> anymore. The range check for a base value is also factored out, to
> minimize code redundancy from the caller.
>
> v2: Fix further on the task write functions. Revert the changes on the
> task read helpers.
>
> v3: Fix putreg(). Edit the changelog.
>
> v4: Update the task write helper functions and do_arch_prctl_64(). Fix
> the comment in putreg().
>
> v5: Fix preempt_disable() calls in do_arch_prctl_64()

Reviewed-by: Andy Lutomirski <luto@kernel.org>

Ingo, Thomas: can we get this in x86/urgent, please?


> diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
> index ffae9b9740fd..4b8ee05dd6ad 100644
> --- a/arch/x86/kernel/ptrace.c
> +++ b/arch/x86/kernel/ptrace.c
> @@ -397,11 +397,12 @@ static int putreg(struct task_struct *child,
>                 if (value >= TASK_SIZE_MAX)
>                         return -EIO;
>                 /*
> -                * When changing the FS base, use the same
> -                * mechanism as for do_arch_prctl_64().
> +                * When changing the FS base, use do_arch_prctl_64()
> +                * to set the index to zero and to set the base
> +                * as requested.
>                  */
>                 if (child->thread.fsbase != value)
> -                       return x86_fsbase_write_task(child, value);
> +                       return do_arch_prctl_64(child, ARCH_SET_FS, value);

FWIW, this logic is and was nonsensical, but it matches historical
behavior, so I guess it's okay.  I suspect that gdb only works by
luck, since fs_base has a *higher* index than fs (and same for gs),
which means that SETREGS with a nonzero fs or gs likely only works
because the target almost always already has fs_base or gs_base == 0,
so we bypass this entire mess.

Sigh.  When you resubmit the full FSGSBASE series, I'll review the new
code extra carefully.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [tip:x86/urgent] x86/fsgsbase/64: Fix the base write helper functions
  2018-11-16 23:27 [PATCH v5] x86/fsgsbase/64: Fix the base write helper functions Chang S. Bae
  2018-11-19  1:55 ` Andy Lutomirski
@ 2018-11-21 21:28 ` tip-bot for Chang S. Bae
  1 sibling, 0 replies; 6+ messages in thread
From: tip-bot for Chang S. Bae @ 2018-11-21 21:28 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: ak, mingo, chang.seok.bae, dave.hansen, hpa, luto, linux-kernel,
	ravi.v.shankar, tglx

Commit-ID:  8b791a31e730a652537635a53b2ac02db8e6da1d
Gitweb:     https://git.kernel.org/tip/8b791a31e730a652537635a53b2ac02db8e6da1d
Author:     Chang S. Bae <chang.seok.bae@intel.com>
AuthorDate: Fri, 16 Nov 2018 15:27:28 -0800
Committer:  Thomas Gleixner <tglx@linutronix.de>
CommitDate: Wed, 21 Nov 2018 22:23:51 +0100

x86/fsgsbase/64: Fix the base write helper functions

The helper functions that purport to write the base should just write it
only. They shouldn't have magic optimizations to change the index.

Make the index explicitly changed in the caller, instead of including the
code in the helpers.

Subsequently, the task write helpers do not handle for the current task
anymore. The range check for a base value is also factored out, to
minimize code redundancy from the caller.

Fixes: b1378a561fd1 ("x86/fsgsbase/64: Introduce FS/GS base helper functions")
Suggested-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Chang S. Bae <chang.seok.bae@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Andy Lutomirski <luto@kernel.org>
Cc: "H . Peter Anvin" <hpa@zytor.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Ravi Shankar <ravi.v.shankar@intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Link: https://lkml.kernel.org/r/20181116232728.23538-1-chang.seok.bae@intel.com

---
 arch/x86/include/asm/fsgsbase.h | 15 +++++--
 arch/x86/kernel/process_64.c    | 86 ++++++++++++++++++++---------------------
 arch/x86/kernel/ptrace.c        |  9 +++--
 3 files changed, 58 insertions(+), 52 deletions(-)

diff --git a/arch/x86/include/asm/fsgsbase.h b/arch/x86/include/asm/fsgsbase.h
index eb377b6e9eed..bca4c743de77 100644
--- a/arch/x86/include/asm/fsgsbase.h
+++ b/arch/x86/include/asm/fsgsbase.h
@@ -16,8 +16,8 @@
  */
 extern unsigned long x86_fsbase_read_task(struct task_struct *task);
 extern unsigned long x86_gsbase_read_task(struct task_struct *task);
-extern int x86_fsbase_write_task(struct task_struct *task, unsigned long fsbase);
-extern int x86_gsbase_write_task(struct task_struct *task, unsigned long gsbase);
+extern void x86_fsbase_write_task(struct task_struct *task, unsigned long fsbase);
+extern void x86_gsbase_write_task(struct task_struct *task, unsigned long gsbase);
 
 /* Helper functions for reading/writing FS/GS base */
 
@@ -39,8 +39,15 @@ static inline unsigned long x86_gsbase_read_cpu_inactive(void)
 	return gsbase;
 }
 
-extern void x86_fsbase_write_cpu(unsigned long fsbase);
-extern void x86_gsbase_write_cpu_inactive(unsigned long gsbase);
+static inline void x86_fsbase_write_cpu(unsigned long fsbase)
+{
+	wrmsrl(MSR_FS_BASE, fsbase);
+}
+
+static inline void x86_gsbase_write_cpu_inactive(unsigned long gsbase)
+{
+	wrmsrl(MSR_KERNEL_GS_BASE, gsbase);
+}
 
 #endif /* CONFIG_X86_64 */
 
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 0e0b4288a4b2..74035c2a85b3 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -337,24 +337,6 @@ static unsigned long x86_fsgsbase_read_task(struct task_struct *task,
 	return base;
 }
 
-void x86_fsbase_write_cpu(unsigned long fsbase)
-{
-	/*
-	 * Set the selector to 0 as a notion, that the segment base is
-	 * overwritten, which will be checked for skipping the segment load
-	 * during context switch.
-	 */
-	loadseg(FS, 0);
-	wrmsrl(MSR_FS_BASE, fsbase);
-}
-
-void x86_gsbase_write_cpu_inactive(unsigned long gsbase)
-{
-	/* Set the selector to 0 for the same reason as %fs above. */
-	loadseg(GS, 0);
-	wrmsrl(MSR_KERNEL_GS_BASE, gsbase);
-}
-
 unsigned long x86_fsbase_read_task(struct task_struct *task)
 {
 	unsigned long fsbase;
@@ -383,38 +365,18 @@ unsigned long x86_gsbase_read_task(struct task_struct *task)
 	return gsbase;
 }
 
-int x86_fsbase_write_task(struct task_struct *task, unsigned long fsbase)
+void x86_fsbase_write_task(struct task_struct *task, unsigned long fsbase)
 {
-	/*
-	 * Not strictly needed for %fs, but do it for symmetry
-	 * with %gs
-	 */
-	if (unlikely(fsbase >= TASK_SIZE_MAX))
-		return -EPERM;
+	WARN_ON_ONCE(task == current);
 
-	preempt_disable();
 	task->thread.fsbase = fsbase;
-	if (task == current)
-		x86_fsbase_write_cpu(fsbase);
-	task->thread.fsindex = 0;
-	preempt_enable();
-
-	return 0;
 }
 
-int x86_gsbase_write_task(struct task_struct *task, unsigned long gsbase)
+void x86_gsbase_write_task(struct task_struct *task, unsigned long gsbase)
 {
-	if (unlikely(gsbase >= TASK_SIZE_MAX))
-		return -EPERM;
+	WARN_ON_ONCE(task == current);
 
-	preempt_disable();
 	task->thread.gsbase = gsbase;
-	if (task == current)
-		x86_gsbase_write_cpu_inactive(gsbase);
-	task->thread.gsindex = 0;
-	preempt_enable();
-
-	return 0;
 }
 
 int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
@@ -758,11 +720,47 @@ long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2)
 
 	switch (option) {
 	case ARCH_SET_GS: {
-		ret = x86_gsbase_write_task(task, arg2);
+		if (unlikely(arg2 >= TASK_SIZE_MAX))
+			return -EPERM;
+
+		preempt_disable();
+		/*
+		 * ARCH_SET_GS has always overwritten the index
+		 * and the base. Zero is the most sensible value
+		 * to put in the index, and is the only value that
+		 * makes any sense if FSGSBASE is unavailable.
+		 */
+		if (task == current) {
+			loadseg(GS, 0);
+			x86_gsbase_write_cpu_inactive(arg2);
+		} else {
+			task->thread.gsindex = 0;
+			x86_gsbase_write_task(task, arg2);
+		}
+		preempt_enable();
 		break;
 	}
 	case ARCH_SET_FS: {
-		ret = x86_fsbase_write_task(task, arg2);
+		/*
+		 * Not strictly needed for %fs, but do it for symmetry
+		 * with %gs
+		 */
+		if (unlikely(arg2 >= TASK_SIZE_MAX))
+			return -EPERM;
+
+		preempt_disable();
+		/*
+		 * Set the selector to 0 for the same reason
+		 * as %gs above.
+		 */
+		if (task == current) {
+			loadseg(FS, 0);
+			x86_fsbase_write_cpu(arg2);
+		} else {
+			task->thread.fsindex = 0;
+			x86_fsbase_write_task(task, arg2);
+		}
+		preempt_enable();
 		break;
 	}
 	case ARCH_GET_FS: {
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index ffae9b9740fd..4b8ee05dd6ad 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -397,11 +397,12 @@ static int putreg(struct task_struct *child,
 		if (value >= TASK_SIZE_MAX)
 			return -EIO;
 		/*
-		 * When changing the FS base, use the same
-		 * mechanism as for do_arch_prctl_64().
+		 * When changing the FS base, use do_arch_prctl_64()
+		 * to set the index to zero and to set the base
+		 * as requested.
 		 */
 		if (child->thread.fsbase != value)
-			return x86_fsbase_write_task(child, value);
+			return do_arch_prctl_64(child, ARCH_SET_FS, value);
 		return 0;
 	case offsetof(struct user_regs_struct,gs_base):
 		/*
@@ -410,7 +411,7 @@ static int putreg(struct task_struct *child,
 		if (value >= TASK_SIZE_MAX)
 			return -EIO;
 		if (child->thread.gsbase != value)
-			return x86_gsbase_write_task(child, value);
+			return do_arch_prctl_64(child, ARCH_SET_GS, value);
 		return 0;
 #endif
 	}

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH v5] x86/fsgsbase/64: Fix the base write helper functions
  2018-11-19  1:55 ` Andy Lutomirski
@ 2018-11-22 20:56   ` Ingo Molnar
  2018-11-23  0:42     ` Andy Lutomirski
  0 siblings, 1 reply; 6+ messages in thread
From: Ingo Molnar @ 2018-11-22 20:56 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Bae, Chang Seok, Thomas Gleixner, H. Peter Anvin, Andi Kleen,
	Dave Hansen, Ravi V. Shankar, LKML


* Andy Lutomirski <luto@kernel.org> wrote:

> On Fri, Nov 16, 2018 at 3:27 PM Chang S. Bae <chang.seok.bae@intel.com> wrote:
> >
> > The helper functions that purport to write the base should just write it
> > only. It shouldn't have magic optimizations to change the index.
> >
> > Make the index explicitly changed from the caller, instead of including
> > the code in the helpers.
> >
> > Subsequently, the task write helpers do not handle for the current task
> > anymore. The range check for a base value is also factored out, to
> > minimize code redundancy from the caller.
> >
> > v2: Fix further on the task write functions. Revert the changes on the
> > task read helpers.
> >
> > v3: Fix putreg(). Edit the changelog.
> >
> > v4: Update the task write helper functions and do_arch_prctl_64(). Fix
> > the comment in putreg().
> >
> > v5: Fix preempt_disable() calls in do_arch_prctl_64()
> 
> Reviewed-by: Andy Lutomirski <luto@kernel.org>
> 
> Ingo, Thomas: can we get this in x86/urgent, please?

Sadly this commit introduced a boot failure on both an Intel and an AMD 
64-bit testbox.

Symptoms range from silent bootup hang in early userspace to segfaults 
like this:

[   21.885741] random: systemd: uninitialized urandom read (16 bytes read)
[   21.964778] systemd[1]: segfault at 28 ip 00005584d8d8247d sp 00007ffc7a05aed0 error 4 in systemd[5584d8d0d000+137000]
[   21.977664] Code: c3 4c 89 ff e8 94 78 fa ff eb bb 48 89 c3 eb f1 00 00 00 00 00 00 00 00 00 00 00 00 00 41 55 41 54 55 53 48 89 fd 48 83 ec 28 <64> 48 8b 04 25 28 00 00 00 48 89 44 24 18 31 c0 48 85 ff 74 6e 48
[   22.000004] systemd[1]: segfault at 28 ip 00005584d8db0a3d sp 00007ffc7a05a7e0 error 4 in systemd[5584d8d0d000+137000]
[   22.012869] Code: 49 89 e9 ba 67 01 00 00 bf 04 00 00 00 31 c0 e8 c9 1c 03 00 59 31 c0 5e e9 ff fa ff ff 41 54 55 53 89 fb 48 81 ec 40 01 00 00 <64> 48 8b 04 25 28 00 00 00 48 89 84 24 38 01 00 00 31 c0 e8 fb 92

I've zapped the commit from x86/urgent because it's clearly not ready 
yet.

I used a fairly regular distro .config and a fairly regular distro - 
nothing fancy.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v5] x86/fsgsbase/64: Fix the base write helper functions
  2018-11-22 20:56   ` Ingo Molnar
@ 2018-11-23  0:42     ` Andy Lutomirski
  2018-11-23  1:08       ` Andy Lutomirski
  0 siblings, 1 reply; 6+ messages in thread
From: Andy Lutomirski @ 2018-11-23  0:42 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Andrew Lutomirski, Bae, Chang Seok, Thomas Gleixner,
	H. Peter Anvin, Andi Kleen, Dave Hansen, Ravi V. Shankar, LKML

On Thu, Nov 22, 2018 at 12:56 PM Ingo Molnar <mingo@kernel.org> wrote:
>
>
> * Andy Lutomirski <luto@kernel.org> wrote:
>
> > On Fri, Nov 16, 2018 at 3:27 PM Chang S. Bae <chang.seok.bae@intel.com> wrote:
> > >
> > > The helper functions that purport to write the base should just write it
> > > only. It shouldn't have magic optimizations to change the index.
> > >
> > > Make the index explicitly changed from the caller, instead of including
> > > the code in the helpers.
> > >
> > > Subsequently, the task write helpers do not handle for the current task
> > > anymore. The range check for a base value is also factored out, to
> > > minimize code redundancy from the caller.
> > >
> > > v2: Fix further on the task write functions. Revert the changes on the
> > > task read helpers.
> > >
> > > v3: Fix putreg(). Edit the changelog.
> > >
> > > v4: Update the task write helper functions and do_arch_prctl_64(). Fix
> > > the comment in putreg().
> > >
> > > v5: Fix preempt_disable() calls in do_arch_prctl_64()
> >
> > Reviewed-by: Andy Lutomirski <luto@kernel.org>
> >
> > Ingo, Thomas: can we get this in x86/urgent, please?
>
> Sadly this commit introduced a boot failure on both an Intel and an AMD
> 64-bit testbox.
>
> Symptoms range from silent bootup hang in early userspace to segfaults
> like this:
>
> [   21.885741] random: systemd: uninitialized urandom read (16 bytes read)
> [   21.964778] systemd[1]: segfault at 28 ip 00005584d8d8247d sp 00007ffc7a05aed0 error 4 in systemd[5584d8d0d000+137000]
> [   21.977664] Code: c3 4c 89 ff e8 94 78 fa ff eb bb 48 89 c3 eb f1 00 00 00 00 00 00 00 00 00 00 00 00 00 41 55 41 54 55 53 48 89 fd 48 83 ec 28 <64> 48 8b 04 25 28 00 00 00 48 89 44 24 18 31 c0 48 85 ff 74 6e 48
> [   22.000004] systemd[1]: segfault at 28 ip 00005584d8db0a3d sp 00007ffc7a05a7e0 error 4 in systemd[5584d8d0d000+137000]
> [   22.012869] Code: 49 89 e9 ba 67 01 00 00 bf 04 00 00 00 31 c0 e8 c9 1c 03 00 59 31 c0 5e e9 ff fa ff ff 41 54 55 53 89 fb 48 81 ec 40 01 00 00 <64> 48 8b 04 25 28 00 00 00 48 89 84 24 38 01 00 00 31 c0 e8 fb 92
>
> I've zapped the commit from x86/urgent because it's clearly not ready
> yet.
>
> I used a fairly regular distro .config and a fairly regular distro -
> nothing fancy.
>

I can reproduce it.  Off the top of my head, maybe 0day is using a
different, weird glibc configuration?  I think it runs some ancient
version of Yocto.

And I think I've almost root-caused the problem, and I think it's a
preexisting bug exposed by this patch.  Lemme double-check and I'll
send a fix.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v5] x86/fsgsbase/64: Fix the base write helper functions
  2018-11-23  0:42     ` Andy Lutomirski
@ 2018-11-23  1:08       ` Andy Lutomirski
  0 siblings, 0 replies; 6+ messages in thread
From: Andy Lutomirski @ 2018-11-23  1:08 UTC (permalink / raw)
  To: Andrew Lutomirski
  Cc: Ingo Molnar, Bae, Chang Seok, Thomas Gleixner, H. Peter Anvin,
	Andi Kleen, Dave Hansen, Ravi V. Shankar, LKML

[-- Attachment #1: Type: text/plain, Size: 2808 bytes --]

On Thu, Nov 22, 2018 at 4:42 PM Andy Lutomirski <luto@kernel.org> wrote:
>
> On Thu, Nov 22, 2018 at 12:56 PM Ingo Molnar <mingo@kernel.org> wrote:
> >
> >
> > * Andy Lutomirski <luto@kernel.org> wrote:
> >
> > > On Fri, Nov 16, 2018 at 3:27 PM Chang S. Bae <chang.seok.bae@intel.com> wrote:
> > > >
> > > > The helper functions that purport to write the base should just write it
> > > > only. It shouldn't have magic optimizations to change the index.
> > > >
> > > > Make the index explicitly changed from the caller, instead of including
> > > > the code in the helpers.
> > > >
> > > > Subsequently, the task write helpers do not handle for the current task
> > > > anymore. The range check for a base value is also factored out, to
> > > > minimize code redundancy from the caller.
> > > >
> > > > v2: Fix further on the task write functions. Revert the changes on the
> > > > task read helpers.
> > > >
> > > > v3: Fix putreg(). Edit the changelog.
> > > >
> > > > v4: Update the task write helper functions and do_arch_prctl_64(). Fix
> > > > the comment in putreg().
> > > >
> > > > v5: Fix preempt_disable() calls in do_arch_prctl_64()
> > >
> > > Reviewed-by: Andy Lutomirski <luto@kernel.org>
> > >
> > > Ingo, Thomas: can we get this in x86/urgent, please?
> >
> > Sadly this commit introduced a boot failure on both an Intel and an AMD
> > 64-bit testbox.
> >
> > Symptoms range from silent bootup hang in early userspace to segfaults
> > like this:
> >
> > [   21.885741] random: systemd: uninitialized urandom read (16 bytes read)
> > [   21.964778] systemd[1]: segfault at 28 ip 00005584d8d8247d sp 00007ffc7a05aed0 error 4 in systemd[5584d8d0d000+137000]
> > [   21.977664] Code: c3 4c 89 ff e8 94 78 fa ff eb bb 48 89 c3 eb f1 00 00 00 00 00 00 00 00 00 00 00 00 00 41 55 41 54 55 53 48 89 fd 48 83 ec 28 <64> 48 8b 04 25 28 00 00 00 48 89 44 24 18 31 c0 48 85 ff 74 6e 48
> > [   22.000004] systemd[1]: segfault at 28 ip 00005584d8db0a3d sp 00007ffc7a05a7e0 error 4 in systemd[5584d8d0d000+137000]
> > [   22.012869] Code: 49 89 e9 ba 67 01 00 00 bf 04 00 00 00 31 c0 e8 c9 1c 03 00 59 31 c0 5e e9 ff fa ff ff 41 54 55 53 89 fb 48 81 ec 40 01 00 00 <64> 48 8b 04 25 28 00 00 00 48 89 84 24 38 01 00 00 31 c0 e8 fb 92
> >
> > I've zapped the commit from x86/urgent because it's clearly not ready
> > yet.
> >
> > I used a fairly regular distro .config and a fairly regular distro -
> > nothing fancy.
> >
>
> I can reproduce it.  Off the top of my head, maybe 0day is using a
> different, weird glibc configuration?  I think it runs some ancient
> version of Yocto.
>
> And I think I've almost root-caused the problem, and I think it's a
> preexisting bug exposed by this patch.  Lemme double-check and I'll
> send a fix.

Nope, I'm wrong.  Delta fix attached.  Want to just fold this in?

[-- Attachment #2: fix.diff --]
[-- Type: text/x-patch, Size: 1290 bytes --]

commit a5f99bc80c9bdacf3a1902f8922ca9b939d52723
Author: Andy Lutomirski <luto@kernel.org>
Date:   Thu Nov 22 16:53:57 2018 -0800

    x86/fsgsbase/64: Fix do_arch_prctl_64() to work correctly wrt save_base_legacy()
    
    Signed-off-by: Andy Lutomirski <luto@kernel.org>

diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 74035c2a85b3..fcf528df5b8d 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -733,6 +733,13 @@ long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2)
 		if (task == current) {
 			loadseg(GS, 0);
 			x86_gsbase_write_cpu_inactive(arg2);
+
+			/*
+			 * On non-FSGSBASE systems, save_base_legacy() expects
+			 * that we also fill in thread.gsbase.
+			 */
+			task->thread.gsbase = arg2;
+
 		} else {
 			task->thread.gsindex = 0;
 			x86_gsbase_write_task(task, arg2);
@@ -756,6 +763,12 @@ long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2)
 		if (task == current) {
 			loadseg(FS, 0);
 			x86_fsbase_write_cpu(arg2);
+
+			/*
+			 * On non-FSGSBASE systems, save_base_legacy() expects
+			 * that we also fill in thread.fsbase.
+			 */
+			task->thread.fsbase = arg2;
 		} else {
 			task->thread.fsindex = 0;
 			x86_fsbase_write_task(task, arg2);

^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2018-11-23  1:09 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-11-16 23:27 [PATCH v5] x86/fsgsbase/64: Fix the base write helper functions Chang S. Bae
2018-11-19  1:55 ` Andy Lutomirski
2018-11-22 20:56   ` Ingo Molnar
2018-11-23  0:42     ` Andy Lutomirski
2018-11-23  1:08       ` Andy Lutomirski
2018-11-21 21:28 ` [tip:x86/urgent] " tip-bot for Chang S. Bae

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).