stable.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/4] x86/fsgsbase/64: Fully initialize FS and GS state in start_thread_common
       [not found] <cover.1501596485.git.luto@kernel.org>
@ 2017-08-01 14:11 ` Andy Lutomirski
  2017-08-01 14:11 ` [PATCH 2/4] x86/fsgsbase/64: Report FSBASE and GSBASE correctly in core dumps Andy Lutomirski
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 4+ messages in thread
From: Andy Lutomirski @ 2017-08-01 14:11 UTC (permalink / raw)
  To: X86 ML; +Cc: Borislav Petkov, Bae, Chang Seok, Andy Lutomirski, stable

execve used to leak FSBASE and GSBASE on AMD CPUs.  Fix it.

The security impact of this bug is small but not quite zero -- it
could weaken ASLR when a privileged task execs a less privileged
program, but only if program changed bitness across the exec, or the
child binary was highly unusual or actively malicious.  A child
program that was compromised after the exec would not have access to
the leaked base.

Cc: stable@vger.kernel.org
Signed-off-by: Andy Lutomirski <luto@kernel.org>
---
 arch/x86/kernel/process_64.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 2987e3991c2b..b544bc027859 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -229,10 +229,19 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip,
 		    unsigned long new_sp,
 		    unsigned int _cs, unsigned int _ss, unsigned int _ds)
 {
+	WARN_ON_ONCE(regs != current_pt_regs());
+
+	if (static_cpu_has(X86_BUG_NULL_SEG)) {
+		/* Loading zero below won't clear the base. */
+		loadsegment(fs, __USER_DS);
+		load_gs_index(__USER_DS);
+	}
+
 	loadsegment(fs, 0);
 	loadsegment(es, _ds);
 	loadsegment(ds, _ds);
 	load_gs_index(0);
+
 	regs->ip		= new_ip;
 	regs->sp		= new_sp;
 	regs->cs		= _cs;
-- 
2.13.3

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 2/4] x86/fsgsbase/64: Report FSBASE and GSBASE correctly in core dumps
       [not found] <cover.1501596485.git.luto@kernel.org>
  2017-08-01 14:11 ` [PATCH 1/4] x86/fsgsbase/64: Fully initialize FS and GS state in start_thread_common Andy Lutomirski
@ 2017-08-01 14:11 ` Andy Lutomirski
  2017-08-01 14:11 ` [PATCH 3/4] selftests/x86/fsgsbase: Test selectors 1, 2, and 3 Andy Lutomirski
  2017-08-01 14:11 ` [PATCH 4/4] x86/switch_to/64: Rewrite FS/GS switching yet again to fix AMD CPUs Andy Lutomirski
  3 siblings, 0 replies; 4+ messages in thread
From: Andy Lutomirski @ 2017-08-01 14:11 UTC (permalink / raw)
  To: X86 ML; +Cc: Borislav Petkov, Bae, Chang Seok, Andy Lutomirski, stable

In ELF_COPY_CORE_REGS, we're copying from the current task, so
accessing thread.fsbase and thread.gsbase makes no sense.  Just read
the values from the CPU registers.

In practice, the old code would have been correct most of the time
simply because thread.fsbase and thread.gsbase usually matched the
CPU registers.

Cc: stable@vger.kernel.org
Signed-off-by: Andy Lutomirski <luto@kernel.org>
---
 arch/x86/include/asm/elf.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 1c18d83d3f09..29df2a02beae 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -204,6 +204,7 @@ void set_personality_ia32(bool);
 
 #define ELF_CORE_COPY_REGS(pr_reg, regs)			\
 do {								\
+	unsigned long base;					\
 	unsigned v;						\
 	(pr_reg)[0] = (regs)->r15;				\
 	(pr_reg)[1] = (regs)->r14;				\
@@ -226,8 +227,8 @@ do {								\
 	(pr_reg)[18] = (regs)->flags;				\
 	(pr_reg)[19] = (regs)->sp;				\
 	(pr_reg)[20] = (regs)->ss;				\
-	(pr_reg)[21] = current->thread.fsbase;			\
-	(pr_reg)[22] = current->thread.gsbase;			\
+	rdmsrl(MSR_FS_BASE, base); (pr_reg)[21] = base;		\
+	rdmsrl(MSR_KERNEL_GS_BASE, base); (pr_reg)[22] = base;	\
 	asm("movl %%ds,%0" : "=r" (v)); (pr_reg)[23] = v;	\
 	asm("movl %%es,%0" : "=r" (v)); (pr_reg)[24] = v;	\
 	asm("movl %%fs,%0" : "=r" (v)); (pr_reg)[25] = v;	\
-- 
2.13.3

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 3/4] selftests/x86/fsgsbase: Test selectors 1, 2, and 3
       [not found] <cover.1501596485.git.luto@kernel.org>
  2017-08-01 14:11 ` [PATCH 1/4] x86/fsgsbase/64: Fully initialize FS and GS state in start_thread_common Andy Lutomirski
  2017-08-01 14:11 ` [PATCH 2/4] x86/fsgsbase/64: Report FSBASE and GSBASE correctly in core dumps Andy Lutomirski
@ 2017-08-01 14:11 ` Andy Lutomirski
  2017-08-01 14:11 ` [PATCH 4/4] x86/switch_to/64: Rewrite FS/GS switching yet again to fix AMD CPUs Andy Lutomirski
  3 siblings, 0 replies; 4+ messages in thread
From: Andy Lutomirski @ 2017-08-01 14:11 UTC (permalink / raw)
  To: X86 ML; +Cc: Borislav Petkov, Bae, Chang Seok, Andy Lutomirski, stable

Those are funny cases.  Make sure they work.

(Something is screwy with signal handling if a selector is 1, 2, or 3.
Anyone who wants to dive into that rabbit hole is welcome to do so.)

Cc: stable@vger.kernel.org
Signed-off-by: Andy Lutomirski <luto@kernel.org>
---
 tools/testing/selftests/x86/fsgsbase.c | 41 +++++++++++++++++++++++++++++-----
 1 file changed, 35 insertions(+), 6 deletions(-)

diff --git a/tools/testing/selftests/x86/fsgsbase.c b/tools/testing/selftests/x86/fsgsbase.c
index b4967d875236..f249e042b3b5 100644
--- a/tools/testing/selftests/x86/fsgsbase.c
+++ b/tools/testing/selftests/x86/fsgsbase.c
@@ -285,9 +285,12 @@ static void *threadproc(void *ctx)
 	}
 }
 
-static void set_gs_and_switch_to(unsigned long local, unsigned long remote)
+static void set_gs_and_switch_to(unsigned long local,
+				 unsigned short force_sel,
+				 unsigned long remote)
 {
 	unsigned long base;
+	unsigned short sel_pre_sched, sel_post_sched;
 
 	bool hard_zero = false;
 	if (local == HARD_ZERO) {
@@ -297,6 +300,8 @@ static void set_gs_and_switch_to(unsigned long local, unsigned long remote)
 
 	printf("[RUN]\tARCH_SET_GS(0x%lx)%s, then schedule to 0x%lx\n",
 	       local, hard_zero ? " and clear gs" : "", remote);
+	if (force_sel)
+		printf("\tBefore schedule, set selector to 0x%hx\n", force_sel);
 	if (syscall(SYS_arch_prctl, ARCH_SET_GS, local) != 0)
 		err(1, "ARCH_SET_GS");
 	if (hard_zero)
@@ -307,18 +312,35 @@ static void set_gs_and_switch_to(unsigned long local, unsigned long remote)
 		printf("[FAIL]\tGSBASE wasn't set as expected\n");
 	}
 
+	if (force_sel) {
+		asm volatile ("mov %0, %%gs" : : "rm" (force_sel));
+		sel_pre_sched = force_sel;
+		local = read_base(GS);
+
+		/*
+		 * Signal delivery seems to mess up weird selectors.  Put it
+		 * back.
+		 */
+		asm volatile ("mov %0, %%gs" : : "rm" (force_sel));
+	} else {
+		asm volatile ("mov %%gs, %0" : "=rm" (sel_pre_sched));
+	}
+
 	remote_base = remote;
 	ftx = 1;
 	syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
 	while (ftx != 0)
 		syscall(SYS_futex, &ftx, FUTEX_WAIT, 1, NULL, NULL, 0);
 
+	asm volatile ("mov %%gs, %0" : "=rm" (sel_post_sched));
 	base = read_base(GS);
-	if (base == local) {
-		printf("[OK]\tGSBASE remained 0x%lx\n", local);
+	if (base == local && sel_pre_sched == sel_post_sched) {
+		printf("[OK]\tGS/BASE remained 0x%hx/0x%lx\n",
+		       sel_pre_sched, local);
 	} else {
 		nerrs++;
-		printf("[FAIL]\tGSBASE changed to 0x%lx\n", base);
+		printf("[FAIL]\tGS/BASE changed from 0x%hx/0x%lx to 0x%hx/0x%lx\n",
+		       sel_pre_sched, local, sel_post_sched, base);
 	}
 }
 
@@ -381,8 +403,15 @@ int main()
 
 	for (int local = 0; local < 4; local++) {
 		for (int remote = 0; remote < 4; remote++) {
-			set_gs_and_switch_to(bases_with_hard_zero[local],
-					     bases_with_hard_zero[remote]);
+			for (unsigned short s = 0; s < 5; s++) {
+				unsigned short sel = s;
+				if (s == 4)
+					asm ("mov %%ss, %0" : "=rm" (sel));
+				set_gs_and_switch_to(
+					bases_with_hard_zero[local],
+					sel,
+					bases_with_hard_zero[remote]);
+			}
 		}
 	}
 
-- 
2.13.3

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 4/4] x86/switch_to/64: Rewrite FS/GS switching yet again to fix AMD CPUs
       [not found] <cover.1501596485.git.luto@kernel.org>
                   ` (2 preceding siblings ...)
  2017-08-01 14:11 ` [PATCH 3/4] selftests/x86/fsgsbase: Test selectors 1, 2, and 3 Andy Lutomirski
@ 2017-08-01 14:11 ` Andy Lutomirski
  3 siblings, 0 replies; 4+ messages in thread
From: Andy Lutomirski @ 2017-08-01 14:11 UTC (permalink / raw)
  To: X86 ML; +Cc: Borislav Petkov, Bae, Chang Seok, Andy Lutomirski, stable

Switching FS and GS is a mess, and the current code is still subtly
wrong: it assumes that "Loading a nonzero value into FS sets the
index and base", which is false on AMD CPUs if the value being
loaded is 1, 2, or 3.

(The current code came from commit 3e2b68d752c9 ("x86/asm,
sched/x86: Rewrite the FS and GS context switch code"), which made
it better but didn't fully fix it.)

Rewrite it to be much simpler and more obviously correct.  This
should fix it fully on AMD CPUs and shouldn't adversely affect
performance.

Cc: stable@vger.kernel.org
Signed-off-by: Andy Lutomirski <luto@kernel.org>
---
 arch/x86/kernel/process_64.c | 227 +++++++++++++++++++++++--------------------
 1 file changed, 122 insertions(+), 105 deletions(-)

diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index b544bc027859..69c59bb683c4 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -149,6 +149,123 @@ void release_thread(struct task_struct *dead_task)
 	}
 }
 
+enum which_selector {
+	FS,
+	GS
+};
+
+/*
+ * Saves the FS or GS base for an outgoing thread if FSGSBASE extensions are
+ * not available.  The goal is to be reasonably fast on non-FSGSBASE systems.
+ * It's forcibly inlined because it'll generate better code and this function
+ * is hot.
+ */
+static __always_inline void save_base_legacy(struct task_struct *prev_p,
+					     unsigned short selector,
+					     enum which_selector which)
+{
+	if (likely(selector == 0)) {
+		/*
+		 * On Intel (without X86_BUG_NULL_SEG), the segment base could
+		 * be the pre-existing saved base or it could be zero.  On AMD
+		 * (with X86_BUG_NULL_SEG), the segment base could be almost
+		 * anything.
+		 *
+		 * This branch is very hot (it's hit twice on almost every
+		 * context switch between 64-bit programs), and avoiding
+		 * the RDMSR helps a lot, so we just assume that whatever
+		 * value is already saved is correct.  This matches historical
+		 * Linux behavior, so it won't break existing applications.
+		 *
+		 * To avoid leaking state, on non-X86_BUG_NULL_SEG CPUs, if we
+		 * report that the base is zero, it needs to actually be zero:
+		 * see the corresponding logic in load_seg_legacy.
+		 */
+	} else {
+		/*
+		 * If the selector is 1, 2, or 3, then the base is zero on
+		 * !X86_BUG_NULL_SEG CPUs and could be anything on
+		 * X86_BUG_NULL_SEG CPUs.  In the latter case, Linux
+		 * has never attempted to preserve the base across context
+		 * switches.
+		 *
+		 * If selector > 3, then it refers to a real segment, and
+		 * saving the base isn't necessary.
+		 */
+		if (which == FS)
+			prev_p->thread.fsbase = 0;
+		else
+			prev_p->thread.gsbase = 0;
+	}
+}
+
+static __always_inline void save_fsgs(struct task_struct *task)
+{
+	savesegment(fs, task->thread.fsindex);
+	savesegment(gs, task->thread.gsindex);
+	save_base_legacy(task, task->thread.fsindex, FS);
+	save_base_legacy(task, task->thread.gsindex, GS);
+}
+
+static __always_inline void loadseg(enum which_selector which,
+				    unsigned short sel)
+{
+	if (which == FS)
+		loadsegment(fs, sel);
+	else
+		load_gs_index(sel);
+}
+
+static __always_inline void load_seg_legacy(unsigned short prev_index,
+					    unsigned long prev_base,
+					    unsigned short next_index,
+					    unsigned long next_base,
+					    enum which_selector which)
+{
+	if (likely(next_index <= 3)) {
+		/*
+		 * The next task is using 64-bit TLS, is not using this
+		 * segment at all, or is having fun with arcane CPU features.
+		 */
+		if (next_base == 0) {
+			/*
+			 * Nasty case: on AMD CPUs, we need to forcibly zero
+			 * the base.
+			 */
+			if (static_cpu_has_bug(X86_BUG_NULL_SEG)) {
+				loadseg(which, __USER_DS);
+				loadseg(which, next_index);
+			} else {
+				/*
+				 * We could try to exhaustively detect cases
+				 * under which we can skip the segment load,
+				 * but there's really only one case that matters
+				 * for performance: if both the previous and
+				 * next states are fully zeroed, we can skip
+				 * the load.
+				 *
+				 * (This assumes that prev_base == 0 has no
+				 * false positives.  This is the case on
+				 * Intel-style CPUs.)
+				 */
+				if (likely(prev_index | next_index | prev_base))
+					loadseg(which, next_index);
+			}
+		} else {
+			if (prev_index != next_index)
+				loadseg(which, next_index);
+			wrmsrl(which == FS ? MSR_FS_BASE : MSR_KERNEL_GS_BASE,
+			       next_base);
+		}
+	} else {
+		/*
+		 * The next task is using a real segment.  Loading the selector
+		 * is sufficient.
+		 */
+		loadseg(which, next_index);
+	}
+}
+
 int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
 		unsigned long arg, struct task_struct *p, unsigned long tls)
 {
@@ -286,7 +403,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	struct fpu *next_fpu = &next->fpu;
 	int cpu = smp_processor_id();
 	struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
-	unsigned prev_fsindex, prev_gsindex;
 
 	WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
 		     this_cpu_read(irq_count) != -1);
@@ -298,8 +414,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	 *
 	 * (e.g. xen_load_tls())
 	 */
-	savesegment(fs, prev_fsindex);
-	savesegment(gs, prev_gsindex);
+	save_fsgs(prev_p);
 
 	/*
 	 * Load TLS before restoring any segments so that segment loads
@@ -338,108 +453,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	if (unlikely(next->ds | prev->ds))
 		loadsegment(ds, next->ds);
 
-	/*
-	 * Switch FS and GS.
-	 *
-	 * These are even more complicated than DS and ES: they have
-	 * 64-bit bases are that controlled by arch_prctl.  The bases
-	 * don't necessarily match the selectors, as user code can do
-	 * any number of things to cause them to be inconsistent.
-	 *
-	 * We don't promise to preserve the bases if the selectors are
-	 * nonzero.  We also don't promise to preserve the base if the
-	 * selector is zero and the base doesn't match whatever was
-	 * most recently passed to ARCH_SET_FS/GS.  (If/when the
-	 * FSGSBASE instructions are enabled, we'll need to offer
-	 * stronger guarantees.)
-	 *
-	 * As an invariant,
-	 * (fsbase != 0 && fsindex != 0) || (gsbase != 0 && gsindex != 0) is
-	 * impossible.
-	 */
-	if (next->fsindex) {
-		/* Loading a nonzero value into FS sets the index and base. */
-		loadsegment(fs, next->fsindex);
-	} else {
-		if (next->fsbase) {
-			/* Next index is zero but next base is nonzero. */
-			if (prev_fsindex)
-				loadsegment(fs, 0);
-			wrmsrl(MSR_FS_BASE, next->fsbase);
-		} else {
-			/* Next base and index are both zero. */
-			if (static_cpu_has_bug(X86_BUG_NULL_SEG)) {
-				/*
-				 * We don't know the previous base and can't
-				 * find out without RDMSR.  Forcibly clear it.
-				 */
-				loadsegment(fs, __USER_DS);
-				loadsegment(fs, 0);
-			} else {
-				/*
-				 * If the previous index is zero and ARCH_SET_FS
-				 * didn't change the base, then the base is
-				 * also zero and we don't need to do anything.
-				 */
-				if (prev->fsbase || prev_fsindex)
-					loadsegment(fs, 0);
-			}
-		}
-	}
-	/*
-	 * Save the old state and preserve the invariant.
-	 * NB: if prev_fsindex == 0, then we can't reliably learn the base
-	 * without RDMSR because Intel user code can zero it without telling
-	 * us and AMD user code can program any 32-bit value without telling
-	 * us.
-	 */
-	if (prev_fsindex)
-		prev->fsbase = 0;
-	prev->fsindex = prev_fsindex;
-
-	if (next->gsindex) {
-		/* Loading a nonzero value into GS sets the index and base. */
-		load_gs_index(next->gsindex);
-	} else {
-		if (next->gsbase) {
-			/* Next index is zero but next base is nonzero. */
-			if (prev_gsindex)
-				load_gs_index(0);
-			wrmsrl(MSR_KERNEL_GS_BASE, next->gsbase);
-		} else {
-			/* Next base and index are both zero. */
-			if (static_cpu_has_bug(X86_BUG_NULL_SEG)) {
-				/*
-				 * We don't know the previous base and can't
-				 * find out without RDMSR.  Forcibly clear it.
-				 *
-				 * This contains a pointless SWAPGS pair.
-				 * Fixing it would involve an explicit check
-				 * for Xen or a new pvop.
-				 */
-				load_gs_index(__USER_DS);
-				load_gs_index(0);
-			} else {
-				/*
-				 * If the previous index is zero and ARCH_SET_GS
-				 * didn't change the base, then the base is
-				 * also zero and we don't need to do anything.
-				 */
-				if (prev->gsbase || prev_gsindex)
-					load_gs_index(0);
-			}
-		}
-	}
-	/*
-	 * Save the old state and preserve the invariant.
-	 * NB: if prev_gsindex == 0, then we can't reliably learn the base
-	 * without RDMSR because Intel user code can zero it without telling
-	 * us and AMD user code can program any 32-bit value without telling
-	 * us.
-	 */
-	if (prev_gsindex)
-		prev->gsbase = 0;
-	prev->gsindex = prev_gsindex;
+	load_seg_legacy(prev->fsindex, prev->fsbase,
+			next->fsindex, next->fsbase, FS);
+	load_seg_legacy(prev->gsindex, prev->gsbase,
+			next->gsindex, next->gsbase, GS);
 
 	switch_fpu_finish(next_fpu, cpu);
 
-- 
2.13.3

^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2017-08-01 14:11 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <cover.1501596485.git.luto@kernel.org>
2017-08-01 14:11 ` [PATCH 1/4] x86/fsgsbase/64: Fully initialize FS and GS state in start_thread_common Andy Lutomirski
2017-08-01 14:11 ` [PATCH 2/4] x86/fsgsbase/64: Report FSBASE and GSBASE correctly in core dumps Andy Lutomirski
2017-08-01 14:11 ` [PATCH 3/4] selftests/x86/fsgsbase: Test selectors 1, 2, and 3 Andy Lutomirski
2017-08-01 14:11 ` [PATCH 4/4] x86/switch_to/64: Rewrite FS/GS switching yet again to fix AMD CPUs Andy Lutomirski

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).