linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* x86/mm: Randomize per-cpu entry area
@ 2022-10-07  8:42 Peter Zijlstra
  2022-10-07 14:44 ` Jason A. Donenfeld
  0 siblings, 1 reply; 3+ messages in thread
From: Peter Zijlstra @ 2022-10-07  8:42 UTC (permalink / raw)
  To: x86
  Cc: Jann Horn, Natalie Silvanovich, Seth Jenkins, Kees Cook,
	linux-kernel, Jason


Seth found that the CPU-entry-area; the piece of per-cpu data that is
mapped into the userspace page-tables for kPTI is not subject to any
randomization -- irrespective of kASLR settings.

On x86_64 a whole P4D (512 GB) of virtual address space is reserved for
this structure, which is plenty large enough to randomize things a
little.

As such, use a straight forward randomization scheme that avoids
duplicates to spread the existing CPUs over the available space.

Reported-by: Seth Jenkins <sethjenkins@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
---
 arch/x86/include/asm/cpu_entry_area.h |    4 --
 arch/x86/include/asm/pgtable_areas.h  |    8 +++++
 arch/x86/kernel/hw_breakpoint.c       |    2 -
 arch/x86/mm/cpu_entry_area.c          |   46 +++++++++++++++++++++++++++++++---
 4 files changed, 50 insertions(+), 10 deletions(-)

--- a/arch/x86/include/asm/cpu_entry_area.h
+++ b/arch/x86/include/asm/cpu_entry_area.h
@@ -130,10 +130,6 @@ struct cpu_entry_area {
 };
 
 #define CPU_ENTRY_AREA_SIZE		(sizeof(struct cpu_entry_area))
-#define CPU_ENTRY_AREA_ARRAY_SIZE	(CPU_ENTRY_AREA_SIZE * NR_CPUS)
-
-/* Total size includes the readonly IDT mapping page as well: */
-#define CPU_ENTRY_AREA_TOTAL_SIZE	(CPU_ENTRY_AREA_ARRAY_SIZE + PAGE_SIZE)
 
 DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
 DECLARE_PER_CPU(struct cea_exception_stacks *, cea_exception_stacks);
--- a/arch/x86/include/asm/pgtable_areas.h
+++ b/arch/x86/include/asm/pgtable_areas.h
@@ -11,6 +11,12 @@
 
 #define CPU_ENTRY_AREA_RO_IDT_VADDR	((void *)CPU_ENTRY_AREA_RO_IDT)
 
-#define CPU_ENTRY_AREA_MAP_SIZE		(CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_ARRAY_SIZE - CPU_ENTRY_AREA_BASE)
+#ifdef CONFIG_X86_32
+#define CPU_ENTRY_AREA_MAP_SIZE		(CPU_ENTRY_AREA_PER_CPU +		\
+					 (CPU_ENTRY_AREA_SIZE * NR_CPUS) -	\
+					 CPU_ENTRY_AREA_BASE
+#else
+#define CPU_ENTRY_AREA_MAP_SIZE		P4D_SIZE
+#endif
 
 #endif /* _ASM_X86_PGTABLE_AREAS_H */
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -266,7 +266,7 @@ static inline bool within_cpu_entry(unsi
 
 	/* CPU entry erea is always used for CPU entry */
 	if (within_area(addr, end, CPU_ENTRY_AREA_BASE,
-			CPU_ENTRY_AREA_TOTAL_SIZE))
+			CPU_ENTRY_AREA_MAP_SIZE))
 		return true;
 
 	/*
--- a/arch/x86/mm/cpu_entry_area.c
+++ b/arch/x86/mm/cpu_entry_area.c
@@ -15,16 +15,53 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(struc
 #ifdef CONFIG_X86_64
 static DEFINE_PER_CPU_PAGE_ALIGNED(struct exception_stacks, exception_stacks);
 DEFINE_PER_CPU(struct cea_exception_stacks*, cea_exception_stacks);
-#endif
 
-#ifdef CONFIG_X86_32
+static DEFINE_PER_CPU_READ_MOSTLY(unsigned long, _cea_offset);
+
+static inline unsigned int cea_offset(unsigned int cpu)
+{
+	return per_cpu(_cea_offset, cpu);
+}
+
+static __init void init_cea_offsets(void)
+{
+	unsigned int max_cea;
+	unsigned int i, j;
+
+	max_cea = (CPU_ENTRY_AREA_MAP_SIZE - PAGE_SIZE) / CPU_ENTRY_AREA_SIZE;
+
+	/* O(sodding terrible) */
+	for_each_possible_cpu(i) {
+		unsigned int cea;
+
+again:
+		cea = prandom_u32_max(max_cea);
+
+		for_each_possible_cpu(j) {
+			if (cea_offset(j) == cea)
+				goto again;
+
+			if (i == j)
+				break;
+		}
+
+		per_cpu(_cea_offset, i) = cea;
+	}
+}
+#else /* !X86_64 */
 DECLARE_PER_CPU_PAGE_ALIGNED(struct doublefault_stack, doublefault_stack);
+
+static inline unsigned int cea_offset(unsigned int cpu)
+{
+	return cpu;
+}
+static inline void init_cea_offsets(void) { }
 #endif
 
 /* Is called from entry code, so must be noinstr */
 noinstr struct cpu_entry_area *get_cpu_entry_area(int cpu)
 {
-	unsigned long va = CPU_ENTRY_AREA_PER_CPU + cpu * CPU_ENTRY_AREA_SIZE;
+	unsigned long va = CPU_ENTRY_AREA_PER_CPU + cea_offset(cpu) * CPU_ENTRY_AREA_SIZE;
 	BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0);
 
 	return (struct cpu_entry_area *) va;
@@ -205,7 +242,6 @@ static __init void setup_cpu_entry_area_
 
 	/* The +1 is for the readonly IDT: */
 	BUILD_BUG_ON((CPU_ENTRY_AREA_PAGES+1)*PAGE_SIZE != CPU_ENTRY_AREA_MAP_SIZE);
-	BUILD_BUG_ON(CPU_ENTRY_AREA_TOTAL_SIZE != CPU_ENTRY_AREA_MAP_SIZE);
 	BUG_ON(CPU_ENTRY_AREA_BASE & ~PMD_MASK);
 
 	start = CPU_ENTRY_AREA_BASE;
@@ -221,6 +257,8 @@ void __init setup_cpu_entry_areas(void)
 {
 	unsigned int cpu;
 
+	init_cea_offsets();
+
 	setup_cpu_entry_area_ptes();
 
 	for_each_possible_cpu(cpu)

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: x86/mm: Randomize per-cpu entry area
  2022-10-07  8:42 x86/mm: Randomize per-cpu entry area Peter Zijlstra
@ 2022-10-07 14:44 ` Jason A. Donenfeld
  2022-10-07 15:14   ` Peter Zijlstra
  0 siblings, 1 reply; 3+ messages in thread
From: Jason A. Donenfeld @ 2022-10-07 14:44 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: x86, Jann Horn, Natalie Silvanovich, Seth Jenkins, Kees Cook,
	linux-kernel

On Fri, Oct 07, 2022 at 10:42:36AM +0200, Peter Zijlstra wrote:
> +	max_cea = (CPU_ENTRY_AREA_MAP_SIZE - PAGE_SIZE) / CPU_ENTRY_AREA_SIZE;
> +
> +	/* O(sodding terrible) */
> +	for_each_possible_cpu(i) {
> +		unsigned int cea;
> +
> +again:
> +		cea = prandom_u32_max(max_cea);

Just FYI, max_cea is 2274876 (at least in my test rig), which means the
values returned from prandom_u32_max() won't be uniformly distributed.
Right now the kernel doesn't have a function that does rejection
sampling to get uniform distribution, but I could add one if you need.
Alternatively, maybe you don't actually *need* this to be perfectly
distributed -- which seems to be a common perspective -- and so this is
good enough. Your call, but just wanted to make you aware.

Jason

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: x86/mm: Randomize per-cpu entry area
  2022-10-07 14:44 ` Jason A. Donenfeld
@ 2022-10-07 15:14   ` Peter Zijlstra
  0 siblings, 0 replies; 3+ messages in thread
From: Peter Zijlstra @ 2022-10-07 15:14 UTC (permalink / raw)
  To: Jason A. Donenfeld
  Cc: x86, Jann Horn, Natalie Silvanovich, Seth Jenkins, Kees Cook,
	linux-kernel

On Fri, Oct 07, 2022 at 08:44:52AM -0600, Jason A. Donenfeld wrote:
> On Fri, Oct 07, 2022 at 10:42:36AM +0200, Peter Zijlstra wrote:
> > +	max_cea = (CPU_ENTRY_AREA_MAP_SIZE - PAGE_SIZE) / CPU_ENTRY_AREA_SIZE;
> > +
> > +	/* O(sodding terrible) */
> > +	for_each_possible_cpu(i) {
> > +		unsigned int cea;
> > +
> > +again:
> > +		cea = prandom_u32_max(max_cea);
> 
> Just FYI, max_cea is 2274876 (at least in my test rig), which means the

Yeah, that sounds about right, just over 21 bits.

> values returned from prandom_u32_max() won't be uniformly distributed.
> Right now the kernel doesn't have a function that does rejection
> sampling to get uniform distribution, but I could add one if you need.

I figured it was better than '% max_cea'.

> Alternatively, maybe you don't actually *need* this to be perfectly
> distributed -- which seems to be a common perspective -- and so this is
> good enough. Your call, but just wanted to make you aware.

First iteration had a LFSR to 'uniformly' and uniquely distribute the
CEAs around, but someone took offence to that :-). Anyway, I think
anything non-obvious is good enough in this case.

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2022-10-07 15:15 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-10-07  8:42 x86/mm: Randomize per-cpu entry area Peter Zijlstra
2022-10-07 14:44 ` Jason A. Donenfeld
2022-10-07 15:14   ` Peter Zijlstra

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).