Update to the kASLR series. The improved e820 walking code from v7 has been reorganized, and now explicitly avoids unsafe memory regions instead of raising the minimum load address. -Kees
Update to the kASLR series. The improved e820 walking code from v7 has been reorganized, and now explicitly avoids unsafe memory regions instead of raising the minimum load address. -Kees
Refactor the CPU flags handling out of the cpucheck routines so that they can be reused by the future ASLR routines (in order to detect CPU features like RDRAND and RDTSC). This reworks has_eflag() and has_fpu() to be used on both 32-bit and 64-bit, and refactors the calls to cpuid to make them PIC-safe on 32-bit. Signed-off-by: Kees Cook <keescook@chromium.org> --- v7: - renamed patch to "boot" instead of "kaslr"; HPA. v3: - do not constrain registers in cpuid call; HPA. v2: - clean up has_eflags and has_fpu to be 64-bit sane; HPA. --- arch/x86/boot/Makefile | 2 +- arch/x86/boot/boot.h | 10 +--- arch/x86/boot/compressed/Makefile | 2 +- arch/x86/boot/compressed/cpuflags.c | 12 ++++ arch/x86/boot/cpucheck.c | 86 ----------------------------- arch/x86/boot/cpuflags.c | 104 +++++++++++++++++++++++++++++++++++ arch/x86/boot/cpuflags.h | 19 +++++++ 7 files changed, 138 insertions(+), 97 deletions(-) create mode 100644 arch/x86/boot/compressed/cpuflags.c create mode 100644 arch/x86/boot/cpuflags.c create mode 100644 arch/x86/boot/cpuflags.h diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index 379814b..0da2e37 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -20,7 +20,7 @@ targets := vmlinux.bin setup.bin setup.elf bzImage targets += fdimage fdimage144 fdimage288 image.iso mtools.conf subdir- := compressed -setup-y += a20.o bioscall.o cmdline.o copy.o cpu.o cpucheck.o +setup-y += a20.o bioscall.o cmdline.o copy.o cpu.o cpuflags.o cpucheck.o setup-y += early_serial_console.o edd.o header.o main.o mca.o memory.o setup-y += pm.o pmjump.o printf.o regs.o string.o tty.o video.o setup-y += video-mode.o version.o diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h index ef72bae..50f8c5e 100644 --- a/arch/x86/boot/boot.h +++ b/arch/x86/boot/boot.h @@ -26,9 +26,8 @@ #include <asm/boot.h> #include <asm/setup.h> #include "bitops.h" -#include <asm/cpufeature.h> -#include <asm/processor-flags.h> #include "ctype.h" +#include "cpuflags.h" /* Useful macros */ #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) @@ -307,14 +306,7 @@ static inline int cmdline_find_option_bool(const char *option) return __cmdline_find_option_bool(cmd_line_ptr, option); } - /* cpu.c, cpucheck.c */ -struct cpu_features { - int level; /* Family, or 64 for x86-64 */ - int model; - u32 flags[NCAPINTS]; -}; -extern struct cpu_features cpu; int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr); int validate_cpu(void); diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index dcd90df..3312f1b 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -27,7 +27,7 @@ HOST_EXTRACFLAGS += -I$(srctree)/tools/include VMLINUX_OBJS = $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \ $(obj)/string.o $(obj)/cmdline.o $(obj)/early_serial_console.o \ - $(obj)/piggy.o + $(obj)/piggy.o $(obj)/cpuflags.o $(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone diff --git a/arch/x86/boot/compressed/cpuflags.c b/arch/x86/boot/compressed/cpuflags.c new file mode 100644 index 0000000..931cba6 --- /dev/null +++ b/arch/x86/boot/compressed/cpuflags.c @@ -0,0 +1,12 @@ +#ifdef CONFIG_RANDOMIZE_BASE + +#include "../cpuflags.c" + +bool has_cpuflag(int flag) +{ + get_flags(); + + return test_bit(flag, cpu.flags); +} + +#endif diff --git a/arch/x86/boot/cpucheck.c b/arch/x86/boot/cpucheck.c index 4d3ff03..e1f3c16 100644 --- a/arch/x86/boot/cpucheck.c +++ b/arch/x86/boot/cpucheck.c @@ -28,8 +28,6 @@ #include <asm/required-features.h> #include <asm/msr-index.h> -struct cpu_features cpu; -static u32 cpu_vendor[3]; static u32 err_flags[NCAPINTS]; static const int req_level = CONFIG_X86_MINIMUM_CPU_FAMILY; @@ -69,90 +67,6 @@ static int is_transmeta(void) cpu_vendor[2] == A32('M', 'x', '8', '6'); } -static int has_fpu(void) -{ - u16 fcw = -1, fsw = -1; - u32 cr0; - - asm("movl %%cr0,%0" : "=r" (cr0)); - if (cr0 & (X86_CR0_EM|X86_CR0_TS)) { - cr0 &= ~(X86_CR0_EM|X86_CR0_TS); - asm volatile("movl %0,%%cr0" : : "r" (cr0)); - } - - asm volatile("fninit ; fnstsw %0 ; fnstcw %1" - : "+m" (fsw), "+m" (fcw)); - - return fsw == 0 && (fcw & 0x103f) == 0x003f; -} - -static int has_eflag(u32 mask) -{ - u32 f0, f1; - - asm("pushfl ; " - "pushfl ; " - "popl %0 ; " - "movl %0,%1 ; " - "xorl %2,%1 ; " - "pushl %1 ; " - "popfl ; " - "pushfl ; " - "popl %1 ; " - "popfl" - : "=&r" (f0), "=&r" (f1) - : "ri" (mask)); - - return !!((f0^f1) & mask); -} - -static void get_flags(void) -{ - u32 max_intel_level, max_amd_level; - u32 tfms; - - if (has_fpu()) - set_bit(X86_FEATURE_FPU, cpu.flags); - - if (has_eflag(X86_EFLAGS_ID)) { - asm("cpuid" - : "=a" (max_intel_level), - "=b" (cpu_vendor[0]), - "=d" (cpu_vendor[1]), - "=c" (cpu_vendor[2]) - : "a" (0)); - - if (max_intel_level >= 0x00000001 && - max_intel_level <= 0x0000ffff) { - asm("cpuid" - : "=a" (tfms), - "=c" (cpu.flags[4]), - "=d" (cpu.flags[0]) - : "a" (0x00000001) - : "ebx"); - cpu.level = (tfms >> 8) & 15; - cpu.model = (tfms >> 4) & 15; - if (cpu.level >= 6) - cpu.model += ((tfms >> 16) & 0xf) << 4; - } - - asm("cpuid" - : "=a" (max_amd_level) - : "a" (0x80000000) - : "ebx", "ecx", "edx"); - - if (max_amd_level >= 0x80000001 && - max_amd_level <= 0x8000ffff) { - u32 eax = 0x80000001; - asm("cpuid" - : "+a" (eax), - "=c" (cpu.flags[6]), - "=d" (cpu.flags[1]) - : : "ebx"); - } - } -} - /* Returns a bitmask of which words we have error bits in */ static int check_flags(void) { diff --git a/arch/x86/boot/cpuflags.c b/arch/x86/boot/cpuflags.c new file mode 100644 index 0000000..b02544a --- /dev/null +++ b/arch/x86/boot/cpuflags.c @@ -0,0 +1,104 @@ +#include <linux/types.h> +#include "bitops.h" + +#include <asm/processor-flags.h> +#include <asm/required-features.h> +#include <asm/msr-index.h> +#include "cpuflags.h" + +struct cpu_features cpu; +u32 cpu_vendor[3]; + +static bool loaded_flags; + +static int has_fpu(void) +{ + u16 fcw = -1, fsw = -1; + unsigned long cr0; + + asm volatile("mov %%cr0,%0" : "=r" (cr0)); + if (cr0 & (X86_CR0_EM|X86_CR0_TS)) { + cr0 &= ~(X86_CR0_EM|X86_CR0_TS); + asm volatile("mov %0,%%cr0" : : "r" (cr0)); + } + + asm volatile("fninit ; fnstsw %0 ; fnstcw %1" + : "+m" (fsw), "+m" (fcw)); + + return fsw == 0 && (fcw & 0x103f) == 0x003f; +} + +int has_eflag(unsigned long mask) +{ + unsigned long f0, f1; + + asm volatile("pushf \n\t" + "pushf \n\t" + "pop %0 \n\t" + "mov %0,%1 \n\t" + "xor %2,%1 \n\t" + "push %1 \n\t" + "popf \n\t" + "pushf \n\t" + "pop %1 \n\t" + "popf" + : "=&r" (f0), "=&r" (f1) + : "ri" (mask)); + + return !!((f0^f1) & mask); +} + +/* Handle x86_32 PIC using ebx. */ +#if defined(__i386__) && defined(__PIC__) +# define EBX_REG "=r" +#else +# define EBX_REG "=b" +#endif + +static inline void cpuid(u32 id, u32 *a, u32 *b, u32 *c, u32 *d) +{ + asm volatile(".ifnc %%ebx,%3 ; movl %%ebx,%3 ; .endif \n\t" + "cpuid \n\t" + ".ifnc %%ebx,%3 ; xchgl %%ebx,%3 ; .endif \n\t" + : "=a" (*a), "=c" (*c), "=d" (*d), EBX_REG (*b) + : "a" (id) + ); +} + +void get_flags(void) +{ + u32 max_intel_level, max_amd_level; + u32 tfms; + u32 ignored; + + if (loaded_flags) + return; + loaded_flags = true; + + if (has_fpu()) + set_bit(X86_FEATURE_FPU, cpu.flags); + + if (has_eflag(X86_EFLAGS_ID)) { + cpuid(0x0, &max_intel_level, &cpu_vendor[0], &cpu_vendor[2], + &cpu_vendor[1]); + + if (max_intel_level >= 0x00000001 && + max_intel_level <= 0x0000ffff) { + cpuid(0x1, &tfms, &ignored, &cpu.flags[4], + &cpu.flags[0]); + cpu.level = (tfms >> 8) & 15; + cpu.model = (tfms >> 4) & 15; + if (cpu.level >= 6) + cpu.model += ((tfms >> 16) & 0xf) << 4; + } + + cpuid(0x80000000, &max_amd_level, &ignored, &ignored, + &ignored); + + if (max_amd_level >= 0x80000001 && + max_amd_level <= 0x8000ffff) { + cpuid(0x80000001, &ignored, &ignored, &cpu.flags[6], + &cpu.flags[1]); + } + } +} diff --git a/arch/x86/boot/cpuflags.h b/arch/x86/boot/cpuflags.h new file mode 100644 index 0000000..9bb4e25 --- /dev/null +++ b/arch/x86/boot/cpuflags.h @@ -0,0 +1,19 @@ +#ifndef BOOT_CPUFLAGS_H +#define BOOT_CPUFLAGS_H + +#include <asm/cpufeature.h> +#include <asm/processor-flags.h> + +struct cpu_features { + int level; /* Family, or 64 for x86-64 */ + int model; + u32 flags[NCAPINTS]; +}; + +extern struct cpu_features cpu; +extern u32 cpu_vendor[3]; + +int has_eflag(unsigned long mask); +void get_flags(void); + +#endif -- 1.7.9.5
Refactor the CPU flags handling out of the cpucheck routines so that they can be reused by the future ASLR routines (in order to detect CPU features like RDRAND and RDTSC). This reworks has_eflag() and has_fpu() to be used on both 32-bit and 64-bit, and refactors the calls to cpuid to make them PIC-safe on 32-bit. Signed-off-by: Kees Cook <keescook@chromium.org> --- v7: - renamed patch to "boot" instead of "kaslr"; HPA. v3: - do not constrain registers in cpuid call; HPA. v2: - clean up has_eflags and has_fpu to be 64-bit sane; HPA. --- arch/x86/boot/Makefile | 2 +- arch/x86/boot/boot.h | 10 +--- arch/x86/boot/compressed/Makefile | 2 +- arch/x86/boot/compressed/cpuflags.c | 12 ++++ arch/x86/boot/cpucheck.c | 86 ----------------------------- arch/x86/boot/cpuflags.c | 104 +++++++++++++++++++++++++++++++++++ arch/x86/boot/cpuflags.h | 19 +++++++ 7 files changed, 138 insertions(+), 97 deletions(-) create mode 100644 arch/x86/boot/compressed/cpuflags.c create mode 100644 arch/x86/boot/cpuflags.c create mode 100644 arch/x86/boot/cpuflags.h diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index 379814b..0da2e37 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -20,7 +20,7 @@ targets := vmlinux.bin setup.bin setup.elf bzImage targets += fdimage fdimage144 fdimage288 image.iso mtools.conf subdir- := compressed -setup-y += a20.o bioscall.o cmdline.o copy.o cpu.o cpucheck.o +setup-y += a20.o bioscall.o cmdline.o copy.o cpu.o cpuflags.o cpucheck.o setup-y += early_serial_console.o edd.o header.o main.o mca.o memory.o setup-y += pm.o pmjump.o printf.o regs.o string.o tty.o video.o setup-y += video-mode.o version.o diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h index ef72bae..50f8c5e 100644 --- a/arch/x86/boot/boot.h +++ b/arch/x86/boot/boot.h @@ -26,9 +26,8 @@ #include <asm/boot.h> #include <asm/setup.h> #include "bitops.h" -#include <asm/cpufeature.h> -#include <asm/processor-flags.h> #include "ctype.h" +#include "cpuflags.h" /* Useful macros */ #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) @@ -307,14 +306,7 @@ static inline int cmdline_find_option_bool(const char *option) return __cmdline_find_option_bool(cmd_line_ptr, option); } - /* cpu.c, cpucheck.c */ -struct cpu_features { - int level; /* Family, or 64 for x86-64 */ - int model; - u32 flags[NCAPINTS]; -}; -extern struct cpu_features cpu; int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr); int validate_cpu(void); diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index dcd90df..3312f1b 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -27,7 +27,7 @@ HOST_EXTRACFLAGS += -I$(srctree)/tools/include VMLINUX_OBJS = $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \ $(obj)/string.o $(obj)/cmdline.o $(obj)/early_serial_console.o \ - $(obj)/piggy.o + $(obj)/piggy.o $(obj)/cpuflags.o $(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone diff --git a/arch/x86/boot/compressed/cpuflags.c b/arch/x86/boot/compressed/cpuflags.c new file mode 100644 index 0000000..931cba6 --- /dev/null +++ b/arch/x86/boot/compressed/cpuflags.c @@ -0,0 +1,12 @@ +#ifdef CONFIG_RANDOMIZE_BASE + +#include "../cpuflags.c" + +bool has_cpuflag(int flag) +{ + get_flags(); + + return test_bit(flag, cpu.flags); +} + +#endif diff --git a/arch/x86/boot/cpucheck.c b/arch/x86/boot/cpucheck.c index 4d3ff03..e1f3c16 100644 --- a/arch/x86/boot/cpucheck.c +++ b/arch/x86/boot/cpucheck.c @@ -28,8 +28,6 @@ #include <asm/required-features.h> #include <asm/msr-index.h> -struct cpu_features cpu; -static u32 cpu_vendor[3]; static u32 err_flags[NCAPINTS]; static const int req_level = CONFIG_X86_MINIMUM_CPU_FAMILY; @@ -69,90 +67,6 @@ static int is_transmeta(void) cpu_vendor[2] == A32('M', 'x', '8', '6'); } -static int has_fpu(void) -{ - u16 fcw = -1, fsw = -1; - u32 cr0; - - asm("movl %%cr0,%0" : "=r" (cr0)); - if (cr0 & (X86_CR0_EM|X86_CR0_TS)) { - cr0 &= ~(X86_CR0_EM|X86_CR0_TS); - asm volatile("movl %0,%%cr0" : : "r" (cr0)); - } - - asm volatile("fninit ; fnstsw %0 ; fnstcw %1" - : "+m" (fsw), "+m" (fcw)); - - return fsw == 0 && (fcw & 0x103f) == 0x003f; -} - -static int has_eflag(u32 mask) -{ - u32 f0, f1; - - asm("pushfl ; " - "pushfl ; " - "popl %0 ; " - "movl %0,%1 ; " - "xorl %2,%1 ; " - "pushl %1 ; " - "popfl ; " - "pushfl ; " - "popl %1 ; " - "popfl" - : "=&r" (f0), "=&r" (f1) - : "ri" (mask)); - - return !!((f0^f1) & mask); -} - -static void get_flags(void) -{ - u32 max_intel_level, max_amd_level; - u32 tfms; - - if (has_fpu()) - set_bit(X86_FEATURE_FPU, cpu.flags); - - if (has_eflag(X86_EFLAGS_ID)) { - asm("cpuid" - : "=a" (max_intel_level), - "=b" (cpu_vendor[0]), - "=d" (cpu_vendor[1]), - "=c" (cpu_vendor[2]) - : "a" (0)); - - if (max_intel_level >= 0x00000001 && - max_intel_level <= 0x0000ffff) { - asm("cpuid" - : "=a" (tfms), - "=c" (cpu.flags[4]), - "=d" (cpu.flags[0]) - : "a" (0x00000001) - : "ebx"); - cpu.level = (tfms >> 8) & 15; - cpu.model = (tfms >> 4) & 15; - if (cpu.level >= 6) - cpu.model += ((tfms >> 16) & 0xf) << 4; - } - - asm("cpuid" - : "=a" (max_amd_level) - : "a" (0x80000000) - : "ebx", "ecx", "edx"); - - if (max_amd_level >= 0x80000001 && - max_amd_level <= 0x8000ffff) { - u32 eax = 0x80000001; - asm("cpuid" - : "+a" (eax), - "=c" (cpu.flags[6]), - "=d" (cpu.flags[1]) - : : "ebx"); - } - } -} - /* Returns a bitmask of which words we have error bits in */ static int check_flags(void) { diff --git a/arch/x86/boot/cpuflags.c b/arch/x86/boot/cpuflags.c new file mode 100644 index 0000000..b02544a --- /dev/null +++ b/arch/x86/boot/cpuflags.c @@ -0,0 +1,104 @@ +#include <linux/types.h> +#include "bitops.h" + +#include <asm/processor-flags.h> +#include <asm/required-features.h> +#include <asm/msr-index.h> +#include "cpuflags.h" + +struct cpu_features cpu; +u32 cpu_vendor[3]; + +static bool loaded_flags; + +static int has_fpu(void) +{ + u16 fcw = -1, fsw = -1; + unsigned long cr0; + + asm volatile("mov %%cr0,%0" : "=r" (cr0)); + if (cr0 & (X86_CR0_EM|X86_CR0_TS)) { + cr0 &= ~(X86_CR0_EM|X86_CR0_TS); + asm volatile("mov %0,%%cr0" : : "r" (cr0)); + } + + asm volatile("fninit ; fnstsw %0 ; fnstcw %1" + : "+m" (fsw), "+m" (fcw)); + + return fsw == 0 && (fcw & 0x103f) == 0x003f; +} + +int has_eflag(unsigned long mask) +{ + unsigned long f0, f1; + + asm volatile("pushf \n\t" + "pushf \n\t" + "pop %0 \n\t" + "mov %0,%1 \n\t" + "xor %2,%1 \n\t" + "push %1 \n\t" + "popf \n\t" + "pushf \n\t" + "pop %1 \n\t" + "popf" + : "=&r" (f0), "=&r" (f1) + : "ri" (mask)); + + return !!((f0^f1) & mask); +} + +/* Handle x86_32 PIC using ebx. */ +#if defined(__i386__) && defined(__PIC__) +# define EBX_REG "=r" +#else +# define EBX_REG "=b" +#endif + +static inline void cpuid(u32 id, u32 *a, u32 *b, u32 *c, u32 *d) +{ + asm volatile(".ifnc %%ebx,%3 ; movl %%ebx,%3 ; .endif \n\t" + "cpuid \n\t" + ".ifnc %%ebx,%3 ; xchgl %%ebx,%3 ; .endif \n\t" + : "=a" (*a), "=c" (*c), "=d" (*d), EBX_REG (*b) + : "a" (id) + ); +} + +void get_flags(void) +{ + u32 max_intel_level, max_amd_level; + u32 tfms; + u32 ignored; + + if (loaded_flags) + return; + loaded_flags = true; + + if (has_fpu()) + set_bit(X86_FEATURE_FPU, cpu.flags); + + if (has_eflag(X86_EFLAGS_ID)) { + cpuid(0x0, &max_intel_level, &cpu_vendor[0], &cpu_vendor[2], + &cpu_vendor[1]); + + if (max_intel_level >= 0x00000001 && + max_intel_level <= 0x0000ffff) { + cpuid(0x1, &tfms, &ignored, &cpu.flags[4], + &cpu.flags[0]); + cpu.level = (tfms >> 8) & 15; + cpu.model = (tfms >> 4) & 15; + if (cpu.level >= 6) + cpu.model += ((tfms >> 16) & 0xf) << 4; + } + + cpuid(0x80000000, &max_amd_level, &ignored, &ignored, + &ignored); + + if (max_amd_level >= 0x80000001 && + max_amd_level <= 0x8000ffff) { + cpuid(0x80000001, &ignored, &ignored, &cpu.flags[6], + &cpu.flags[1]); + } + } +} diff --git a/arch/x86/boot/cpuflags.h b/arch/x86/boot/cpuflags.h new file mode 100644 index 0000000..9bb4e25 --- /dev/null +++ b/arch/x86/boot/cpuflags.h @@ -0,0 +1,19 @@ +#ifndef BOOT_CPUFLAGS_H +#define BOOT_CPUFLAGS_H + +#include <asm/cpufeature.h> +#include <asm/processor-flags.h> + +struct cpu_features { + int level; /* Family, or 64 for x86-64 */ + int model; + u32 flags[NCAPINTS]; +}; + +extern struct cpu_features cpu; +extern u32 cpu_vendor[3]; + +int has_eflag(unsigned long mask); +void get_flags(void); + +#endif -- 1.7.9.5
This allows decompress_kernel to return a new location for the kernel to be relocated to. Additionally, enforces CONFIG_PHYSICAL_START as the minimum relocation position when building with CONFIG_RELOCATABLE. With CONFIG_RANDOMIZE_BASE set, the choose_kernel_location routine will select a new location to decompress the kernel, though here it is presently a no-op. The kernel command line option "nokaslr" is introduced to bypass these routines. Signed-off-by: Kees Cook <keescook@chromium.org> --- v3: - treat LOAD_PHYSICAL_ADDR as minimum. v2: - renamed "noaslr" to "nokaslr"; HPA. --- Documentation/kernel-parameters.txt | 4 ++++ arch/x86/Kconfig | 38 +++++++++++++++++++++++++++++++---- arch/x86/boot/compressed/Makefile | 2 +- arch/x86/boot/compressed/aslr.c | 23 +++++++++++++++++++++ arch/x86/boot/compressed/cmdline.c | 2 +- arch/x86/boot/compressed/head_32.S | 10 +++++---- arch/x86/boot/compressed/head_64.S | 16 +++++++++------ arch/x86/boot/compressed/misc.c | 8 ++++++-- arch/x86/boot/compressed/misc.h | 27 +++++++++++++++++++------ 9 files changed, 106 insertions(+), 24 deletions(-) create mode 100644 arch/x86/boot/compressed/aslr.c diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index fcbb736..773fc4c 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1975,6 +1975,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. noapic [SMP,APIC] Tells the kernel to not make use of any IOAPICs that may be present in the system. + nokaslr [X86] + Disable kernel base offset ASLR (Address Space + Layout Randomization) if built into the kernel. + noautogroup Disable scheduler automatic task group creation. nobats [PPC] Do not use BATs for mapping kernel lowmem diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index ee2fb9d..992701d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1722,16 +1722,46 @@ config RELOCATABLE Note: If CONFIG_RELOCATABLE=y, then the kernel runs from the address it has been loaded at and the compile time physical address - (CONFIG_PHYSICAL_START) is ignored. + (CONFIG_PHYSICAL_START) is used as the minimum location. -# Relocation on x86-32 needs some additional build support +config RANDOMIZE_BASE + bool "Randomize the address of the kernel image" + depends on RELOCATABLE + depends on !HIBERNATION + default n + ---help--- + Randomizes the physical and virtual address at which the + kernel image is decompressed, as a security feature that + deters exploit attempts relying on knowledge of the location + of kernel internals. + + Entropy is generated using the RDRAND instruction if it + is supported. If not, then RDTSC is used, if supported. If + neither RDRAND nor RDTSC are supported, then no randomness + is introduced. + + The kernel will be offset by up to RANDOMIZE_BASE_MAX_OFFSET, + and aligned according to PHYSICAL_ALIGN. + +config RANDOMIZE_BASE_MAX_OFFSET + hex "Maximum ASLR offset allowed" + depends on RANDOMIZE_BASE + default "0x10000000" + range 0x0 0x10000000 + ---help--- + Determines the maximal offset in bytes that will be applied to the + kernel when Address Space Layout Randomization (ASLR) is active. + Must be less than or equal to the actual physical memory on the + system. This must be a power of two. + +# Relocation on x86 needs some additional build support config X86_NEED_RELOCS def_bool y - depends on X86_32 && RELOCATABLE + depends on RANDOMIZE_BASE || (X86_32 && RELOCATABLE) config PHYSICAL_ALIGN hex "Alignment value to which kernel should be aligned" - default "0x1000000" + default "0x200000" range 0x2000 0x1000000 if X86_32 range 0x200000 0x1000000 if X86_64 ---help--- diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 3312f1b..ae8b5db 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -27,7 +27,7 @@ HOST_EXTRACFLAGS += -I$(srctree)/tools/include VMLINUX_OBJS = $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \ $(obj)/string.o $(obj)/cmdline.o $(obj)/early_serial_console.o \ - $(obj)/piggy.o $(obj)/cpuflags.o + $(obj)/piggy.o $(obj)/cpuflags.o $(obj)/aslr.o $(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c new file mode 100644 index 0000000..b73cc66 --- /dev/null +++ b/arch/x86/boot/compressed/aslr.c @@ -0,0 +1,23 @@ +#include "misc.h" + +#ifdef CONFIG_RANDOMIZE_BASE + +unsigned char *choose_kernel_location(unsigned char *input, + unsigned long input_size, + unsigned char *output, + unsigned long output_size) +{ + unsigned long choice = (unsigned long)output; + + if (cmdline_find_option_bool("nokaslr")) { + debug_putstr("KASLR disabled...\n"); + goto out; + } + + /* XXX: choose random location. */ + +out: + return (unsigned char *)choice; +} + +#endif /* CONFIG_RANDOMIZE_BASE */ diff --git a/arch/x86/boot/compressed/cmdline.c b/arch/x86/boot/compressed/cmdline.c index bffd73b..b68e303 100644 --- a/arch/x86/boot/compressed/cmdline.c +++ b/arch/x86/boot/compressed/cmdline.c @@ -1,6 +1,6 @@ #include "misc.h" -#ifdef CONFIG_EARLY_PRINTK +#if CONFIG_EARLY_PRINTK || CONFIG_RANDOMIZE_BASE static unsigned long fs; static inline void set_fs(unsigned long seg) diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 5d6f689..9116aac 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -117,9 +117,11 @@ preferred_addr: addl %eax, %ebx notl %eax andl %eax, %ebx -#else - movl $LOAD_PHYSICAL_ADDR, %ebx + cmpl $LOAD_PHYSICAL_ADDR, %ebx + jge 1f #endif + movl $LOAD_PHYSICAL_ADDR, %ebx +1: /* Target address to relocate to for decompression */ addl $z_extract_offset, %ebx @@ -191,14 +193,14 @@ relocated: leal boot_heap(%ebx), %eax pushl %eax /* heap area */ pushl %esi /* real mode pointer */ - call decompress_kernel + call decompress_kernel /* returns kernel location in %eax */ addl $24, %esp /* * Jump to the decompressed kernel. */ xorl %ebx, %ebx - jmp *%ebp + jmp *%eax /* * Stack and heap for uncompression diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index c337422..c5c1ae0 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -94,9 +94,11 @@ ENTRY(startup_32) addl %eax, %ebx notl %eax andl %eax, %ebx -#else - movl $LOAD_PHYSICAL_ADDR, %ebx + cmpl $LOAD_PHYSICAL_ADDR, %ebx + jge 1f #endif + movl $LOAD_PHYSICAL_ADDR, %ebx +1: /* Target address to relocate to for decompression */ addl $z_extract_offset, %ebx @@ -269,9 +271,11 @@ preferred_addr: addq %rax, %rbp notq %rax andq %rax, %rbp -#else - movq $LOAD_PHYSICAL_ADDR, %rbp + cmpq $LOAD_PHYSICAL_ADDR, %rbp + jge 1f #endif + movq $LOAD_PHYSICAL_ADDR, %rbp +1: /* Target address to relocate to for decompression */ leaq z_extract_offset(%rbp), %rbx @@ -339,13 +343,13 @@ relocated: movl $z_input_len, %ecx /* input_len */ movq %rbp, %r8 /* output target address */ movq $z_output_len, %r9 /* decompressed length */ - call decompress_kernel + call decompress_kernel /* returns kernel location in %rax */ popq %rsi /* * Jump to the decompressed kernel. */ - jmp *%rbp + jmp *%rax .code32 no_longmode: diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 434f077..7138768 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -395,7 +395,7 @@ static void parse_elf(void *output) free(phdrs); } -asmlinkage void decompress_kernel(void *rmode, memptr heap, +asmlinkage void *decompress_kernel(void *rmode, memptr heap, unsigned char *input_data, unsigned long input_len, unsigned char *output, @@ -422,6 +422,10 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap, free_mem_ptr = heap; /* Heap */ free_mem_end_ptr = heap + BOOT_HEAP_SIZE; + output = choose_kernel_location(input_data, input_len, + output, output_len); + + /* Validate memory location choices. */ if ((unsigned long)output & (MIN_KERNEL_ALIGN - 1)) error("Destination address inappropriately aligned"); #ifdef CONFIG_X86_64 @@ -441,5 +445,5 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap, parse_elf(output); handle_relocations(output, output_len); debug_putstr("done.\nBooting the kernel.\n"); - return; + return output; } diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 674019d..9077af7 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -39,23 +39,38 @@ static inline void debug_putstr(const char *s) #endif -#ifdef CONFIG_EARLY_PRINTK - +#if CONFIG_EARLY_PRINTK || CONFIG_RANDOMIZE_BASE /* cmdline.c */ int cmdline_find_option(const char *option, char *buffer, int bufsize); int cmdline_find_option_bool(const char *option); +#endif -/* early_serial_console.c */ -extern int early_serial_base; -void console_init(void); +#if CONFIG_RANDOMIZE_BASE +/* aslr.c */ +unsigned char *choose_kernel_location(unsigned char *input, + unsigned long input_size, + unsigned char *output, + unsigned long output_size); #else +static inline +unsigned char *choose_kernel_location(unsigned char *input, + unsigned long input_size, + unsigned char *output, + unsigned long output_size) +{ + return output; +} +#endif +#ifdef CONFIG_EARLY_PRINTK /* early_serial_console.c */ +extern int early_serial_base; +void console_init(void); +#else static const int early_serial_base; static inline void console_init(void) { } - #endif #endif -- 1.7.9.5
This allows decompress_kernel to return a new location for the kernel to be relocated to. Additionally, enforces CONFIG_PHYSICAL_START as the minimum relocation position when building with CONFIG_RELOCATABLE. With CONFIG_RANDOMIZE_BASE set, the choose_kernel_location routine will select a new location to decompress the kernel, though here it is presently a no-op. The kernel command line option "nokaslr" is introduced to bypass these routines. Signed-off-by: Kees Cook <keescook@chromium.org> --- v3: - treat LOAD_PHYSICAL_ADDR as minimum. v2: - renamed "noaslr" to "nokaslr"; HPA. --- Documentation/kernel-parameters.txt | 4 ++++ arch/x86/Kconfig | 38 +++++++++++++++++++++++++++++++---- arch/x86/boot/compressed/Makefile | 2 +- arch/x86/boot/compressed/aslr.c | 23 +++++++++++++++++++++ arch/x86/boot/compressed/cmdline.c | 2 +- arch/x86/boot/compressed/head_32.S | 10 +++++---- arch/x86/boot/compressed/head_64.S | 16 +++++++++------ arch/x86/boot/compressed/misc.c | 8 ++++++-- arch/x86/boot/compressed/misc.h | 27 +++++++++++++++++++------ 9 files changed, 106 insertions(+), 24 deletions(-) create mode 100644 arch/x86/boot/compressed/aslr.c diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index fcbb736..773fc4c 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1975,6 +1975,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. noapic [SMP,APIC] Tells the kernel to not make use of any IOAPICs that may be present in the system. + nokaslr [X86] + Disable kernel base offset ASLR (Address Space + Layout Randomization) if built into the kernel. + noautogroup Disable scheduler automatic task group creation. nobats [PPC] Do not use BATs for mapping kernel lowmem diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index ee2fb9d..992701d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1722,16 +1722,46 @@ config RELOCATABLE Note: If CONFIG_RELOCATABLE=y, then the kernel runs from the address it has been loaded at and the compile time physical address - (CONFIG_PHYSICAL_START) is ignored. + (CONFIG_PHYSICAL_START) is used as the minimum location. -# Relocation on x86-32 needs some additional build support +config RANDOMIZE_BASE + bool "Randomize the address of the kernel image" + depends on RELOCATABLE + depends on !HIBERNATION + default n + ---help--- + Randomizes the physical and virtual address at which the + kernel image is decompressed, as a security feature that + deters exploit attempts relying on knowledge of the location + of kernel internals. + + Entropy is generated using the RDRAND instruction if it + is supported. If not, then RDTSC is used, if supported. If + neither RDRAND nor RDTSC are supported, then no randomness + is introduced. + + The kernel will be offset by up to RANDOMIZE_BASE_MAX_OFFSET, + and aligned according to PHYSICAL_ALIGN. + +config RANDOMIZE_BASE_MAX_OFFSET + hex "Maximum ASLR offset allowed" + depends on RANDOMIZE_BASE + default "0x10000000" + range 0x0 0x10000000 + ---help--- + Determines the maximal offset in bytes that will be applied to the + kernel when Address Space Layout Randomization (ASLR) is active. + Must be less than or equal to the actual physical memory on the + system. This must be a power of two. + +# Relocation on x86 needs some additional build support config X86_NEED_RELOCS def_bool y - depends on X86_32 && RELOCATABLE + depends on RANDOMIZE_BASE || (X86_32 && RELOCATABLE) config PHYSICAL_ALIGN hex "Alignment value to which kernel should be aligned" - default "0x1000000" + default "0x200000" range 0x2000 0x1000000 if X86_32 range 0x200000 0x1000000 if X86_64 ---help--- diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 3312f1b..ae8b5db 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -27,7 +27,7 @@ HOST_EXTRACFLAGS += -I$(srctree)/tools/include VMLINUX_OBJS = $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \ $(obj)/string.o $(obj)/cmdline.o $(obj)/early_serial_console.o \ - $(obj)/piggy.o $(obj)/cpuflags.o + $(obj)/piggy.o $(obj)/cpuflags.o $(obj)/aslr.o $(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c new file mode 100644 index 0000000..b73cc66 --- /dev/null +++ b/arch/x86/boot/compressed/aslr.c @@ -0,0 +1,23 @@ +#include "misc.h" + +#ifdef CONFIG_RANDOMIZE_BASE + +unsigned char *choose_kernel_location(unsigned char *input, + unsigned long input_size, + unsigned char *output, + unsigned long output_size) +{ + unsigned long choice = (unsigned long)output; + + if (cmdline_find_option_bool("nokaslr")) { + debug_putstr("KASLR disabled...\n"); + goto out; + } + + /* XXX: choose random location. */ + +out: + return (unsigned char *)choice; +} + +#endif /* CONFIG_RANDOMIZE_BASE */ diff --git a/arch/x86/boot/compressed/cmdline.c b/arch/x86/boot/compressed/cmdline.c index bffd73b..b68e303 100644 --- a/arch/x86/boot/compressed/cmdline.c +++ b/arch/x86/boot/compressed/cmdline.c @@ -1,6 +1,6 @@ #include "misc.h" -#ifdef CONFIG_EARLY_PRINTK +#if CONFIG_EARLY_PRINTK || CONFIG_RANDOMIZE_BASE static unsigned long fs; static inline void set_fs(unsigned long seg) diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 5d6f689..9116aac 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -117,9 +117,11 @@ preferred_addr: addl %eax, %ebx notl %eax andl %eax, %ebx -#else - movl $LOAD_PHYSICAL_ADDR, %ebx + cmpl $LOAD_PHYSICAL_ADDR, %ebx + jge 1f #endif + movl $LOAD_PHYSICAL_ADDR, %ebx +1: /* Target address to relocate to for decompression */ addl $z_extract_offset, %ebx @@ -191,14 +193,14 @@ relocated: leal boot_heap(%ebx), %eax pushl %eax /* heap area */ pushl %esi /* real mode pointer */ - call decompress_kernel + call decompress_kernel /* returns kernel location in %eax */ addl $24, %esp /* * Jump to the decompressed kernel. */ xorl %ebx, %ebx - jmp *%ebp + jmp *%eax /* * Stack and heap for uncompression diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index c337422..c5c1ae0 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -94,9 +94,11 @@ ENTRY(startup_32) addl %eax, %ebx notl %eax andl %eax, %ebx -#else - movl $LOAD_PHYSICAL_ADDR, %ebx + cmpl $LOAD_PHYSICAL_ADDR, %ebx + jge 1f #endif + movl $LOAD_PHYSICAL_ADDR, %ebx +1: /* Target address to relocate to for decompression */ addl $z_extract_offset, %ebx @@ -269,9 +271,11 @@ preferred_addr: addq %rax, %rbp notq %rax andq %rax, %rbp -#else - movq $LOAD_PHYSICAL_ADDR, %rbp + cmpq $LOAD_PHYSICAL_ADDR, %rbp + jge 1f #endif + movq $LOAD_PHYSICAL_ADDR, %rbp +1: /* Target address to relocate to for decompression */ leaq z_extract_offset(%rbp), %rbx @@ -339,13 +343,13 @@ relocated: movl $z_input_len, %ecx /* input_len */ movq %rbp, %r8 /* output target address */ movq $z_output_len, %r9 /* decompressed length */ - call decompress_kernel + call decompress_kernel /* returns kernel location in %rax */ popq %rsi /* * Jump to the decompressed kernel. */ - jmp *%rbp + jmp *%rax .code32 no_longmode: diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 434f077..7138768 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -395,7 +395,7 @@ static void parse_elf(void *output) free(phdrs); } -asmlinkage void decompress_kernel(void *rmode, memptr heap, +asmlinkage void *decompress_kernel(void *rmode, memptr heap, unsigned char *input_data, unsigned long input_len, unsigned char *output, @@ -422,6 +422,10 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap, free_mem_ptr = heap; /* Heap */ free_mem_end_ptr = heap + BOOT_HEAP_SIZE; + output = choose_kernel_location(input_data, input_len, + output, output_len); + + /* Validate memory location choices. */ if ((unsigned long)output & (MIN_KERNEL_ALIGN - 1)) error("Destination address inappropriately aligned"); #ifdef CONFIG_X86_64 @@ -441,5 +445,5 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap, parse_elf(output); handle_relocations(output, output_len); debug_putstr("done.\nBooting the kernel.\n"); - return; + return output; } diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 674019d..9077af7 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -39,23 +39,38 @@ static inline void debug_putstr(const char *s) #endif -#ifdef CONFIG_EARLY_PRINTK - +#if CONFIG_EARLY_PRINTK || CONFIG_RANDOMIZE_BASE /* cmdline.c */ int cmdline_find_option(const char *option, char *buffer, int bufsize); int cmdline_find_option_bool(const char *option); +#endif -/* early_serial_console.c */ -extern int early_serial_base; -void console_init(void); +#if CONFIG_RANDOMIZE_BASE +/* aslr.c */ +unsigned char *choose_kernel_location(unsigned char *input, + unsigned long input_size, + unsigned char *output, + unsigned long output_size); #else +static inline +unsigned char *choose_kernel_location(unsigned char *input, + unsigned long input_size, + unsigned char *output, + unsigned long output_size) +{ + return output; +} +#endif +#ifdef CONFIG_EARLY_PRINTK /* early_serial_console.c */ +extern int early_serial_base; +void console_init(void); +#else static const int early_serial_base; static inline void console_init(void) { } - #endif #endif -- 1.7.9.5
Adds potential sources of randomness: RDRAND, RDTSC, or the i8254. This moves the pre-alternatives inline rdrand function into the header so both pieces of code can use it. Availability of RDRAND is then controlled by CONFIG_ARCH_RANDOM, if someone wants to disable it even for kASLR. Signed-off-by: Kees Cook <keescook@chromium.org> --- v7: - move rdrand_long into header to avoid code duplication; HPA. v3: - fall back to reading the i8254 when no TSC; HPA. v2: - use rdtscl from msr.h; Mathias Krause. --- arch/x86/boot/compressed/aslr.c | 53 +++++++++++++++++++++++++++++++++++++ arch/x86/boot/compressed/misc.h | 2 ++ arch/x86/include/asm/archrandom.h | 21 +++++++++++++++ arch/x86/kernel/cpu/rdrand.c | 14 ---------- 4 files changed, 76 insertions(+), 14 deletions(-) diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c index b73cc66..14b24e0 100644 --- a/arch/x86/boot/compressed/aslr.c +++ b/arch/x86/boot/compressed/aslr.c @@ -1,6 +1,59 @@ #include "misc.h" #ifdef CONFIG_RANDOMIZE_BASE +#include <asm/msr.h> +#include <asm/archrandom.h> + +#define I8254_PORT_CONTROL 0x43 +#define I8254_PORT_COUNTER0 0x40 +#define I8254_CMD_READBACK 0xC0 +#define I8254_SELECT_COUNTER0 0x02 +#define I8254_STATUS_NOTREADY 0x40 +static inline u16 i8254(void) +{ + u16 status, timer; + + do { + outb(I8254_PORT_CONTROL, + I8254_CMD_READBACK | I8254_SELECT_COUNTER0); + status = inb(I8254_PORT_COUNTER0); + timer = inb(I8254_PORT_COUNTER0); + timer |= inb(I8254_PORT_COUNTER0) << 8; + } while (status & I8254_STATUS_NOTREADY); + + return timer; +} + +static unsigned long get_random_long(void) +{ + unsigned long random; + + if (has_cpuflag(X86_FEATURE_RDRAND)) { + debug_putstr("KASLR using RDRAND...\n"); + if (rdrand_long(&random)) + return random; + } + + if (has_cpuflag(X86_FEATURE_TSC)) { + uint32_t raw; + + debug_putstr("KASLR using RDTSC...\n"); + rdtscl(raw); + + /* Only use the low bits of rdtsc. */ + random = raw & 0xffff; + } else { + debug_putstr("KASLR using i8254...\n"); + random = i8254(); + } + + /* Extend timer bits poorly... */ + random |= (random << 16); +#ifdef CONFIG_X86_64 + random |= (random << 32); +#endif + return random; +} unsigned char *choose_kernel_location(unsigned char *input, unsigned long input_size, diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 9077af7..0782eb0 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -52,6 +52,8 @@ unsigned char *choose_kernel_location(unsigned char *input, unsigned long input_size, unsigned char *output, unsigned long output_size); +/* cpuflags.c */ +bool has_cpuflag(int flag); #else static inline unsigned char *choose_kernel_location(unsigned char *input, diff --git a/arch/x86/include/asm/archrandom.h b/arch/x86/include/asm/archrandom.h index 0d9ec77..e6a9245 100644 --- a/arch/x86/include/asm/archrandom.h +++ b/arch/x86/include/asm/archrandom.h @@ -39,6 +39,20 @@ #ifdef CONFIG_ARCH_RANDOM +/* Instead of arch_get_random_long() when alternatives haven't run. */ +static inline int rdrand_long(unsigned long *v) +{ + int ok; + asm volatile("1: " RDRAND_LONG "\n\t" + "jc 2f\n\t" + "decl %0\n\t" + "jnz 1b\n\t" + "2:" + : "=r" (ok), "=a" (*v) + : "0" (RDRAND_RETRY_LOOPS)); + return ok; +} + #define GET_RANDOM(name, type, rdrand, nop) \ static inline int name(type *v) \ { \ @@ -68,6 +82,13 @@ GET_RANDOM(arch_get_random_int, unsigned int, RDRAND_INT, ASM_NOP3); #endif /* CONFIG_X86_64 */ +#else + +static inline int rdrand_long(unsigned long *v) +{ + return 0; +} + #endif /* CONFIG_ARCH_RANDOM */ extern void x86_init_rdrand(struct cpuinfo_x86 *c); diff --git a/arch/x86/kernel/cpu/rdrand.c b/arch/x86/kernel/cpu/rdrand.c index 88db010..384df51 100644 --- a/arch/x86/kernel/cpu/rdrand.c +++ b/arch/x86/kernel/cpu/rdrand.c @@ -31,20 +31,6 @@ static int __init x86_rdrand_setup(char *s) } __setup("nordrand", x86_rdrand_setup); -/* We can't use arch_get_random_long() here since alternatives haven't run */ -static inline int rdrand_long(unsigned long *v) -{ - int ok; - asm volatile("1: " RDRAND_LONG "\n\t" - "jc 2f\n\t" - "decl %0\n\t" - "jnz 1b\n\t" - "2:" - : "=r" (ok), "=a" (*v) - : "0" (RDRAND_RETRY_LOOPS)); - return ok; -} - /* * Force a reseed cycle; we are architecturally guaranteed a reseed * after no more than 512 128-bit chunks of random data. This also -- 1.7.9.5
Adds potential sources of randomness: RDRAND, RDTSC, or the i8254. This moves the pre-alternatives inline rdrand function into the header so both pieces of code can use it. Availability of RDRAND is then controlled by CONFIG_ARCH_RANDOM, if someone wants to disable it even for kASLR. Signed-off-by: Kees Cook <keescook@chromium.org> --- v7: - move rdrand_long into header to avoid code duplication; HPA. v3: - fall back to reading the i8254 when no TSC; HPA. v2: - use rdtscl from msr.h; Mathias Krause. --- arch/x86/boot/compressed/aslr.c | 53 +++++++++++++++++++++++++++++++++++++ arch/x86/boot/compressed/misc.h | 2 ++ arch/x86/include/asm/archrandom.h | 21 +++++++++++++++ arch/x86/kernel/cpu/rdrand.c | 14 ---------- 4 files changed, 76 insertions(+), 14 deletions(-) diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c index b73cc66..14b24e0 100644 --- a/arch/x86/boot/compressed/aslr.c +++ b/arch/x86/boot/compressed/aslr.c @@ -1,6 +1,59 @@ #include "misc.h" #ifdef CONFIG_RANDOMIZE_BASE +#include <asm/msr.h> +#include <asm/archrandom.h> + +#define I8254_PORT_CONTROL 0x43 +#define I8254_PORT_COUNTER0 0x40 +#define I8254_CMD_READBACK 0xC0 +#define I8254_SELECT_COUNTER0 0x02 +#define I8254_STATUS_NOTREADY 0x40 +static inline u16 i8254(void) +{ + u16 status, timer; + + do { + outb(I8254_PORT_CONTROL, + I8254_CMD_READBACK | I8254_SELECT_COUNTER0); + status = inb(I8254_PORT_COUNTER0); + timer = inb(I8254_PORT_COUNTER0); + timer |= inb(I8254_PORT_COUNTER0) << 8; + } while (status & I8254_STATUS_NOTREADY); + + return timer; +} + +static unsigned long get_random_long(void) +{ + unsigned long random; + + if (has_cpuflag(X86_FEATURE_RDRAND)) { + debug_putstr("KASLR using RDRAND...\n"); + if (rdrand_long(&random)) + return random; + } + + if (has_cpuflag(X86_FEATURE_TSC)) { + uint32_t raw; + + debug_putstr("KASLR using RDTSC...\n"); + rdtscl(raw); + + /* Only use the low bits of rdtsc. */ + random = raw & 0xffff; + } else { + debug_putstr("KASLR using i8254...\n"); + random = i8254(); + } + + /* Extend timer bits poorly... */ + random |= (random << 16); +#ifdef CONFIG_X86_64 + random |= (random << 32); +#endif + return random; +} unsigned char *choose_kernel_location(unsigned char *input, unsigned long input_size, diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 9077af7..0782eb0 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -52,6 +52,8 @@ unsigned char *choose_kernel_location(unsigned char *input, unsigned long input_size, unsigned char *output, unsigned long output_size); +/* cpuflags.c */ +bool has_cpuflag(int flag); #else static inline unsigned char *choose_kernel_location(unsigned char *input, diff --git a/arch/x86/include/asm/archrandom.h b/arch/x86/include/asm/archrandom.h index 0d9ec77..e6a9245 100644 --- a/arch/x86/include/asm/archrandom.h +++ b/arch/x86/include/asm/archrandom.h @@ -39,6 +39,20 @@ #ifdef CONFIG_ARCH_RANDOM +/* Instead of arch_get_random_long() when alternatives haven't run. */ +static inline int rdrand_long(unsigned long *v) +{ + int ok; + asm volatile("1: " RDRAND_LONG "\n\t" + "jc 2f\n\t" + "decl %0\n\t" + "jnz 1b\n\t" + "2:" + : "=r" (ok), "=a" (*v) + : "0" (RDRAND_RETRY_LOOPS)); + return ok; +} + #define GET_RANDOM(name, type, rdrand, nop) \ static inline int name(type *v) \ { \ @@ -68,6 +82,13 @@ GET_RANDOM(arch_get_random_int, unsigned int, RDRAND_INT, ASM_NOP3); #endif /* CONFIG_X86_64 */ +#else + +static inline int rdrand_long(unsigned long *v) +{ + return 0; +} + #endif /* CONFIG_ARCH_RANDOM */ extern void x86_init_rdrand(struct cpuinfo_x86 *c); diff --git a/arch/x86/kernel/cpu/rdrand.c b/arch/x86/kernel/cpu/rdrand.c index 88db010..384df51 100644 --- a/arch/x86/kernel/cpu/rdrand.c +++ b/arch/x86/kernel/cpu/rdrand.c @@ -31,20 +31,6 @@ static int __init x86_rdrand_setup(char *s) } __setup("nordrand", x86_rdrand_setup); -/* We can't use arch_get_random_long() here since alternatives haven't run */ -static inline int rdrand_long(unsigned long *v) -{ - int ok; - asm volatile("1: " RDRAND_LONG "\n\t" - "jc 2f\n\t" - "decl %0\n\t" - "jnz 1b\n\t" - "2:" - : "=r" (ok), "=a" (*v) - : "0" (RDRAND_RETRY_LOOPS)); - return ok; -} - /* * Force a reseed cycle; we are architecturally guaranteed a reseed * after no more than 512 128-bit chunks of random data. This also -- 1.7.9.5
Counts available alignment positions across all e820 maps, and chooses one randomly for the new kernel base address, making sure not to collide with unsafe memory areas. Signed-off-by: Kees Cook <keescook@chromium.org> --- v8: - explicitly avoid unsafe memory areas instead of raising minimum position. v2: - make sure to exclude e820 regions outside the 32-bit memory range. --- arch/x86/boot/compressed/aslr.c | 193 ++++++++++++++++++++++++++++++++++++++- arch/x86/boot/compressed/misc.c | 10 +- arch/x86/boot/compressed/misc.h | 8 ++ 3 files changed, 202 insertions(+), 9 deletions(-) diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c index 14b24e0..0595798 100644 --- a/arch/x86/boot/compressed/aslr.c +++ b/arch/x86/boot/compressed/aslr.c @@ -3,6 +3,7 @@ #ifdef CONFIG_RANDOMIZE_BASE #include <asm/msr.h> #include <asm/archrandom.h> +#include <asm/e820.h> #define I8254_PORT_CONTROL 0x43 #define I8254_PORT_COUNTER0 0x40 @@ -55,20 +56,210 @@ static unsigned long get_random_long(void) return random; } +struct mem_vector { + unsigned long start; + unsigned long size; +}; + +#define MEM_AVOID_MAX 5 +struct mem_vector mem_avoid[MEM_AVOID_MAX]; + +static bool mem_contains(struct mem_vector *region, struct mem_vector *item) +{ + /* Item at least partially before region. */ + if (item->start < region->start) + return false; + /* Item at least partially after region. */ + if (item->start + item->size > region->start + region->size) + return false; + return true; +} + +static bool mem_overlaps(struct mem_vector *one, struct mem_vector *two) +{ + /* Item one is entirely before item two. */ + if (one->start + one->size <= two->start) + return false; + /* Item one is entirely after item two. */ + if (one->start >= two->start + two->size) + return false; + return true; +} + +static void mem_avoid_init(unsigned long input, unsigned long input_size, + unsigned long output, unsigned long output_size) +{ + u64 initrd_start, initrd_size; + u64 cmd_line, cmd_line_size; + unsigned long unsafe, unsafe_len; + char *ptr; + + /* + * Avoid the region that is unsafe to overlap during + * decompression (see calculations at top of misc.c). + */ + unsafe_len = (output_size >> 12) + 32768 + 18; + unsafe = (unsigned long)input + input_size - unsafe_len; + mem_avoid[0].start = unsafe; + mem_avoid[0].size = unsafe_len; + + /* Avoid initrd. */ + initrd_start = (u64)real_mode->ext_ramdisk_image << 32; + initrd_start |= real_mode->hdr.ramdisk_image; + initrd_size = (u64)real_mode->ext_ramdisk_size << 32; + initrd_size |= real_mode->hdr.ramdisk_size; + mem_avoid[1].start = initrd_start; + mem_avoid[1].size = initrd_size; + + /* Avoid kernel command line. */ + cmd_line = (u64)real_mode->ext_cmd_line_ptr << 32; + cmd_line |= real_mode->hdr.cmd_line_ptr; + /* Calculate size of cmd_line. */ + ptr = (char *)(unsigned long)cmd_line; + for (cmd_line_size = 0; ptr[cmd_line_size++]; ) + ; + mem_avoid[2].start = cmd_line; + mem_avoid[2].size = cmd_line_size; + + /* Avoid heap memory. */ + mem_avoid[3].start = (unsigned long)free_mem_ptr; + mem_avoid[3].size = BOOT_HEAP_SIZE; + + /* Avoid stack memory. */ + mem_avoid[4].start = (unsigned long)free_mem_end_ptr; + mem_avoid[4].size = BOOT_STACK_SIZE; +} + +/* Does this memory vector overlap a known avoided area? */ +bool mem_avoid_overlap(struct mem_vector *img) +{ + int i; + + for (i = 0; i < MEM_AVOID_MAX; i++) { + if (mem_overlaps(img, &mem_avoid[i])) + return true; + } + + return false; +} + +unsigned long slots[CONFIG_RANDOMIZE_BASE_MAX_OFFSET / CONFIG_PHYSICAL_ALIGN]; +unsigned long slot_max = 0; + +static void slots_append(unsigned long addr) +{ + /* Overflowing the slots list should be impossible. */ + if (slot_max >= CONFIG_RANDOMIZE_BASE_MAX_OFFSET / + CONFIG_PHYSICAL_ALIGN) + return; + + slots[slot_max++] = addr; +} + +static unsigned long slots_fetch_random(void) +{ + /* Handle case of no slots stored. */ + if (slot_max == 0) + return 0; + + return slots[get_random_long() % slot_max]; +} + +static void process_e820_entry(struct e820entry *entry, + unsigned long minimum, + unsigned long image_size) +{ + struct mem_vector region, img; + + /* Skip non-RAM entries. */ + if (entry->type != E820_RAM) + return; + + /* Ignore entries entirely above our maximum. */ + if (entry->addr >= CONFIG_RANDOMIZE_BASE_MAX_OFFSET) + return; + + /* Ignore entries entirely below our minimum. */ + if (entry->addr + entry->size < minimum) + return; + + region.start = entry->addr; + region.size = entry->size; + + /* Potentially raise address to minimum location. */ + if (region.start < minimum) + region.start = minimum; + + /* Potentially raise address to meet alignment requirements. */ + region.start = ALIGN(region.start, CONFIG_PHYSICAL_ALIGN); + + /* Did we raise the address above the bounds of this e820 region? */ + if (region.start > entry->addr + entry->size) + return; + + /* Reduce size by any delta from the original address. */ + region.size -= region.start - entry->addr; + + /* Reduce maximum size to fit end of image within maximum limit. */ + if (region.start + region.size > CONFIG_RANDOMIZE_BASE_MAX_OFFSET) + region.size = CONFIG_RANDOMIZE_BASE_MAX_OFFSET - region.start; + + /* Walk each aligned slot and check for avoided areas. */ + for (img.start = region.start, img.size = image_size ; + mem_contains(®ion, &img) ; + img.start += CONFIG_PHYSICAL_ALIGN) { + if (mem_avoid_overlap(&img)) + continue; + slots_append(img.start); + } +} + +static unsigned long find_random_addr(unsigned long minimum, + unsigned long size) +{ + int i; + unsigned long addr; + + /* Make sure minimum is aligned. */ + minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN); + + /* Verify potential e820 positions, appending to slots list. */ + for (i = 0; i < real_mode->e820_entries; i++) { + process_e820_entry(&real_mode->e820_map[i], minimum, size); + } + + return slots_fetch_random(); +} + unsigned char *choose_kernel_location(unsigned char *input, unsigned long input_size, unsigned char *output, unsigned long output_size) { unsigned long choice = (unsigned long)output; + unsigned long random; if (cmdline_find_option_bool("nokaslr")) { debug_putstr("KASLR disabled...\n"); goto out; } - /* XXX: choose random location. */ + /* Record the various known unsafe memory ranges. */ + mem_avoid_init((unsigned long)input, input_size, + (unsigned long)output, output_size); + + /* Walk e820 and find a random address. */ + random = find_random_addr(choice, output_size); + if (!random) { + debug_putstr("KASLR could not find suitable E820 region...\n"); + goto out; + } + + /* Always enforce the minimum. */ + if (random < choice) + goto out; + choice = random; out: return (unsigned char *)choice; } diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 7138768..196eaf3 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -112,14 +112,8 @@ struct boot_params *real_mode; /* Pointer to real-mode data */ void *memset(void *s, int c, size_t n); void *memcpy(void *dest, const void *src, size_t n); -#ifdef CONFIG_X86_64 -#define memptr long -#else -#define memptr unsigned -#endif - -static memptr free_mem_ptr; -static memptr free_mem_end_ptr; +memptr free_mem_ptr; +memptr free_mem_end_ptr; static char *vidmem; static int vidport; diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 0782eb0..24e3e56 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -23,7 +23,15 @@ #define BOOT_BOOT_H #include "../ctype.h" +#ifdef CONFIG_X86_64 +#define memptr long +#else +#define memptr unsigned +#endif + /* misc.c */ +extern memptr free_mem_ptr; +extern memptr free_mem_end_ptr; extern struct boot_params *real_mode; /* Pointer to real-mode data */ void __putstr(const char *s); #define error_putstr(__x) __putstr(__x) -- 1.7.9.5
Counts available alignment positions across all e820 maps, and chooses one randomly for the new kernel base address, making sure not to collide with unsafe memory areas. Signed-off-by: Kees Cook <keescook@chromium.org> --- v8: - explicitly avoid unsafe memory areas instead of raising minimum position. v2: - make sure to exclude e820 regions outside the 32-bit memory range. --- arch/x86/boot/compressed/aslr.c | 193 ++++++++++++++++++++++++++++++++++++++- arch/x86/boot/compressed/misc.c | 10 +- arch/x86/boot/compressed/misc.h | 8 ++ 3 files changed, 202 insertions(+), 9 deletions(-) diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c index 14b24e0..0595798 100644 --- a/arch/x86/boot/compressed/aslr.c +++ b/arch/x86/boot/compressed/aslr.c @@ -3,6 +3,7 @@ #ifdef CONFIG_RANDOMIZE_BASE #include <asm/msr.h> #include <asm/archrandom.h> +#include <asm/e820.h> #define I8254_PORT_CONTROL 0x43 #define I8254_PORT_COUNTER0 0x40 @@ -55,20 +56,210 @@ static unsigned long get_random_long(void) return random; } +struct mem_vector { + unsigned long start; + unsigned long size; +}; + +#define MEM_AVOID_MAX 5 +struct mem_vector mem_avoid[MEM_AVOID_MAX]; + +static bool mem_contains(struct mem_vector *region, struct mem_vector *item) +{ + /* Item at least partially before region. */ + if (item->start < region->start) + return false; + /* Item at least partially after region. */ + if (item->start + item->size > region->start + region->size) + return false; + return true; +} + +static bool mem_overlaps(struct mem_vector *one, struct mem_vector *two) +{ + /* Item one is entirely before item two. */ + if (one->start + one->size <= two->start) + return false; + /* Item one is entirely after item two. */ + if (one->start >= two->start + two->size) + return false; + return true; +} + +static void mem_avoid_init(unsigned long input, unsigned long input_size, + unsigned long output, unsigned long output_size) +{ + u64 initrd_start, initrd_size; + u64 cmd_line, cmd_line_size; + unsigned long unsafe, unsafe_len; + char *ptr; + + /* + * Avoid the region that is unsafe to overlap during + * decompression (see calculations at top of misc.c). + */ + unsafe_len = (output_size >> 12) + 32768 + 18; + unsafe = (unsigned long)input + input_size - unsafe_len; + mem_avoid[0].start = unsafe; + mem_avoid[0].size = unsafe_len; + + /* Avoid initrd. */ + initrd_start = (u64)real_mode->ext_ramdisk_image << 32; + initrd_start |= real_mode->hdr.ramdisk_image; + initrd_size = (u64)real_mode->ext_ramdisk_size << 32; + initrd_size |= real_mode->hdr.ramdisk_size; + mem_avoid[1].start = initrd_start; + mem_avoid[1].size = initrd_size; + + /* Avoid kernel command line. */ + cmd_line = (u64)real_mode->ext_cmd_line_ptr << 32; + cmd_line |= real_mode->hdr.cmd_line_ptr; + /* Calculate size of cmd_line. */ + ptr = (char *)(unsigned long)cmd_line; + for (cmd_line_size = 0; ptr[cmd_line_size++]; ) + ; + mem_avoid[2].start = cmd_line; + mem_avoid[2].size = cmd_line_size; + + /* Avoid heap memory. */ + mem_avoid[3].start = (unsigned long)free_mem_ptr; + mem_avoid[3].size = BOOT_HEAP_SIZE; + + /* Avoid stack memory. */ + mem_avoid[4].start = (unsigned long)free_mem_end_ptr; + mem_avoid[4].size = BOOT_STACK_SIZE; +} + +/* Does this memory vector overlap a known avoided area? */ +bool mem_avoid_overlap(struct mem_vector *img) +{ + int i; + + for (i = 0; i < MEM_AVOID_MAX; i++) { + if (mem_overlaps(img, &mem_avoid[i])) + return true; + } + + return false; +} + +unsigned long slots[CONFIG_RANDOMIZE_BASE_MAX_OFFSET / CONFIG_PHYSICAL_ALIGN]; +unsigned long slot_max = 0; + +static void slots_append(unsigned long addr) +{ + /* Overflowing the slots list should be impossible. */ + if (slot_max >= CONFIG_RANDOMIZE_BASE_MAX_OFFSET / + CONFIG_PHYSICAL_ALIGN) + return; + + slots[slot_max++] = addr; +} + +static unsigned long slots_fetch_random(void) +{ + /* Handle case of no slots stored. */ + if (slot_max == 0) + return 0; + + return slots[get_random_long() % slot_max]; +} + +static void process_e820_entry(struct e820entry *entry, + unsigned long minimum, + unsigned long image_size) +{ + struct mem_vector region, img; + + /* Skip non-RAM entries. */ + if (entry->type != E820_RAM) + return; + + /* Ignore entries entirely above our maximum. */ + if (entry->addr >= CONFIG_RANDOMIZE_BASE_MAX_OFFSET) + return; + + /* Ignore entries entirely below our minimum. */ + if (entry->addr + entry->size < minimum) + return; + + region.start = entry->addr; + region.size = entry->size; + + /* Potentially raise address to minimum location. */ + if (region.start < minimum) + region.start = minimum; + + /* Potentially raise address to meet alignment requirements. */ + region.start = ALIGN(region.start, CONFIG_PHYSICAL_ALIGN); + + /* Did we raise the address above the bounds of this e820 region? */ + if (region.start > entry->addr + entry->size) + return; + + /* Reduce size by any delta from the original address. */ + region.size -= region.start - entry->addr; + + /* Reduce maximum size to fit end of image within maximum limit. */ + if (region.start + region.size > CONFIG_RANDOMIZE_BASE_MAX_OFFSET) + region.size = CONFIG_RANDOMIZE_BASE_MAX_OFFSET - region.start; + + /* Walk each aligned slot and check for avoided areas. */ + for (img.start = region.start, img.size = image_size ; + mem_contains(®ion, &img) ; + img.start += CONFIG_PHYSICAL_ALIGN) { + if (mem_avoid_overlap(&img)) + continue; + slots_append(img.start); + } +} + +static unsigned long find_random_addr(unsigned long minimum, + unsigned long size) +{ + int i; + unsigned long addr; + + /* Make sure minimum is aligned. */ + minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN); + + /* Verify potential e820 positions, appending to slots list. */ + for (i = 0; i < real_mode->e820_entries; i++) { + process_e820_entry(&real_mode->e820_map[i], minimum, size); + } + + return slots_fetch_random(); +} + unsigned char *choose_kernel_location(unsigned char *input, unsigned long input_size, unsigned char *output, unsigned long output_size) { unsigned long choice = (unsigned long)output; + unsigned long random; if (cmdline_find_option_bool("nokaslr")) { debug_putstr("KASLR disabled...\n"); goto out; } - /* XXX: choose random location. */ + /* Record the various known unsafe memory ranges. */ + mem_avoid_init((unsigned long)input, input_size, + (unsigned long)output, output_size); + + /* Walk e820 and find a random address. */ + random = find_random_addr(choice, output_size); + if (!random) { + debug_putstr("KASLR could not find suitable E820 region...\n"); + goto out; + } + + /* Always enforce the minimum. */ + if (random < choice) + goto out; + choice = random; out: return (unsigned char *)choice; } diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 7138768..196eaf3 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -112,14 +112,8 @@ struct boot_params *real_mode; /* Pointer to real-mode data */ void *memset(void *s, int c, size_t n); void *memcpy(void *dest, const void *src, size_t n); -#ifdef CONFIG_X86_64 -#define memptr long -#else -#define memptr unsigned -#endif - -static memptr free_mem_ptr; -static memptr free_mem_end_ptr; +memptr free_mem_ptr; +memptr free_mem_end_ptr; static char *vidmem; static int vidport; diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 0782eb0..24e3e56 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -23,7 +23,15 @@ #define BOOT_BOOT_H #include "../ctype.h" +#ifdef CONFIG_X86_64 +#define memptr long +#else +#define memptr unsigned +#endif + /* misc.c */ +extern memptr free_mem_ptr; +extern memptr free_mem_end_ptr; extern struct boot_params *real_mode; /* Pointer to real-mode data */ void __putstr(const char *s); #define error_putstr(__x) __putstr(__x) -- 1.7.9.5
When the system panics, include the kernel offset in the report to assist in debugging. Signed-off-by: Kees Cook <keescook@chromium.org> --- arch/x86/kernel/setup.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index f0de629..1708862 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -824,6 +824,20 @@ static void __init trim_low_memory_range(void) } /* + * Dump out kernel offset information on panic. + */ +static int +dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p) +{ + pr_emerg("Kernel Offset: 0x%lx from 0x%lx " + "(relocation range: 0x%lx-0x%lx)\n", + (unsigned long)&_text - __START_KERNEL, __START_KERNEL, + __START_KERNEL_map, MODULES_VADDR-1); + + return 0; +} + +/* * Determine if we were loaded by an EFI loader. If so, then we have also been * passed the efi memmap, systab, etc., so we should use these data structures * for initialization. Note, the efi init code path is determined by the @@ -1242,3 +1256,15 @@ void __init i386_reserve_resources(void) } #endif /* CONFIG_X86_32 */ + +static struct notifier_block kernel_offset_notifier = { + .notifier_call = dump_kernel_offset +}; + +static int __init register_kernel_offset_dumper(void) +{ + atomic_notifier_chain_register(&panic_notifier_list, + &kernel_offset_notifier); + return 0; +} +__initcall(register_kernel_offset_dumper); -- 1.7.9.5
When the system panics, include the kernel offset in the report to assist in debugging. Signed-off-by: Kees Cook <keescook@chromium.org> --- arch/x86/kernel/setup.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index f0de629..1708862 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -824,6 +824,20 @@ static void __init trim_low_memory_range(void) } /* + * Dump out kernel offset information on panic. + */ +static int +dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p) +{ + pr_emerg("Kernel Offset: 0x%lx from 0x%lx " + "(relocation range: 0x%lx-0x%lx)\n", + (unsigned long)&_text - __START_KERNEL, __START_KERNEL, + __START_KERNEL_map, MODULES_VADDR-1); + + return 0; +} + +/* * Determine if we were loaded by an EFI loader. If so, then we have also been * passed the efi memmap, systab, etc., so we should use these data structures * for initialization. Note, the efi init code path is determined by the @@ -1242,3 +1256,15 @@ void __init i386_reserve_resources(void) } #endif /* CONFIG_X86_32 */ + +static struct notifier_block kernel_offset_notifier = { + .notifier_call = dump_kernel_offset +}; + +static int __init register_kernel_offset_dumper(void) +{ + atomic_notifier_chain_register(&panic_notifier_list, + &kernel_offset_notifier); + return 0; +} +__initcall(register_kernel_offset_dumper); -- 1.7.9.5
On 64-bit, this raises the maximum location to 1GiB, the upper limit currently, since the kernel fixmap page mappings need to be moved to use the other 1GiB (which would be the theoretical limit when building with -mcmodel=kernel). Signed-off-by: Kees Cook <keescook@chromium.org> --- arch/x86/Kconfig | 16 +++++++++++++--- arch/x86/include/asm/page_64_types.h | 15 ++++++++++++--- arch/x86/include/asm/pgtable_64_types.h | 2 +- arch/x86/mm/init_32.c | 3 +++ 4 files changed, 29 insertions(+), 7 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 992701d..51f4399 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1746,13 +1746,23 @@ config RANDOMIZE_BASE config RANDOMIZE_BASE_MAX_OFFSET hex "Maximum ASLR offset allowed" depends on RANDOMIZE_BASE - default "0x10000000" - range 0x0 0x10000000 + range 0x0 0x20000000 if X86_32 + default "0x20000000" if X86_32 + range 0x0 0x40000000 if X86_64 + default "0x40000000" if X86_64 ---help--- Determines the maximal offset in bytes that will be applied to the kernel when Address Space Layout Randomization (ASLR) is active. Must be less than or equal to the actual physical memory on the - system. This must be a power of two. + system. This must be a multiple of CONFIG_PHYSICAL_ALIGN. + + On 32-bit this is limited to 512MiB. + + On 64-bit this is limited by how the kernel fixmap page table is + positioned, so this cannot be larger that 1GiB currently. Normally + there is a 512MiB to 1.5GiB split between kernel and modules. When + this is raised above the 512MiB default, the modules area will + shrink to compensate, up to the current maximum 1GiB to 1GiB split. # Relocation on x86 needs some additional build support config X86_NEED_RELOCS diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index 43dcd80..8de6d9c 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -39,9 +39,18 @@ #define __VIRTUAL_MASK_SHIFT 47 /* - * Kernel image size is limited to 512 MB (see level2_kernel_pgt in - * arch/x86/kernel/head_64.S), and it is mapped here: + * Kernel image size is limited to 1GiB due to the fixmap living in the + * next 1GiB (see level2_kernel_pgt in arch/x86/kernel/head_64.S). Use + * 512MiB by default, leaving 1.5GiB for modules once the page tables + * are fully set up. If kernel ASLR is configured, it can extend the + * kernel page table mapping, reducing the size of the modules area. */ -#define KERNEL_IMAGE_SIZE (512 * 1024 * 1024) +#define KERNEL_IMAGE_SIZE_DEFAULT (512 * 1024 * 1024) +#if defined(CONFIG_RANDOMIZE_BASE) && \ + CONFIG_RANDOMIZE_BASE_MAX_OFFSET > KERNEL_IMAGE_SIZE_DEFAULT +#define KERNEL_IMAGE_SIZE CONFIG_RANDOMIZE_BASE_MAX_OFFSET +#else +#define KERNEL_IMAGE_SIZE KERNEL_IMAGE_SIZE_DEFAULT +#endif #endif /* _ASM_X86_PAGE_64_DEFS_H */ diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 2d88344..c883bf7 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -58,7 +58,7 @@ typedef struct { pteval_t pte; } pte_t; #define VMALLOC_START _AC(0xffffc90000000000, UL) #define VMALLOC_END _AC(0xffffe8ffffffffff, UL) #define VMEMMAP_START _AC(0xffffea0000000000, UL) -#define MODULES_VADDR _AC(0xffffffffa0000000, UL) +#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE) #define MODULES_END _AC(0xffffffffff000000, UL) #define MODULES_LEN (MODULES_END - MODULES_VADDR) diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 4287f1f..5bdc543 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -806,6 +806,9 @@ void __init mem_init(void) BUILD_BUG_ON(VMALLOC_START >= VMALLOC_END); #undef high_memory #undef __FIXADDR_TOP +#ifdef CONFIG_RANDOMIZE_BASE + BUILD_BUG_ON(CONFIG_RANDOMIZE_BASE_MAX_OFFSET > KERNEL_IMAGE_SIZE); +#endif #ifdef CONFIG_HIGHMEM BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START); -- 1.7.9.5
On 64-bit, this raises the maximum location to 1GiB, the upper limit currently, since the kernel fixmap page mappings need to be moved to use the other 1GiB (which would be the theoretical limit when building with -mcmodel=kernel). Signed-off-by: Kees Cook <keescook@chromium.org> --- arch/x86/Kconfig | 16 +++++++++++++--- arch/x86/include/asm/page_64_types.h | 15 ++++++++++++--- arch/x86/include/asm/pgtable_64_types.h | 2 +- arch/x86/mm/init_32.c | 3 +++ 4 files changed, 29 insertions(+), 7 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 992701d..51f4399 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1746,13 +1746,23 @@ config RANDOMIZE_BASE config RANDOMIZE_BASE_MAX_OFFSET hex "Maximum ASLR offset allowed" depends on RANDOMIZE_BASE - default "0x10000000" - range 0x0 0x10000000 + range 0x0 0x20000000 if X86_32 + default "0x20000000" if X86_32 + range 0x0 0x40000000 if X86_64 + default "0x40000000" if X86_64 ---help--- Determines the maximal offset in bytes that will be applied to the kernel when Address Space Layout Randomization (ASLR) is active. Must be less than or equal to the actual physical memory on the - system. This must be a power of two. + system. This must be a multiple of CONFIG_PHYSICAL_ALIGN. + + On 32-bit this is limited to 512MiB. + + On 64-bit this is limited by how the kernel fixmap page table is + positioned, so this cannot be larger that 1GiB currently. Normally + there is a 512MiB to 1.5GiB split between kernel and modules. When + this is raised above the 512MiB default, the modules area will + shrink to compensate, up to the current maximum 1GiB to 1GiB split. # Relocation on x86 needs some additional build support config X86_NEED_RELOCS diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index 43dcd80..8de6d9c 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -39,9 +39,18 @@ #define __VIRTUAL_MASK_SHIFT 47 /* - * Kernel image size is limited to 512 MB (see level2_kernel_pgt in - * arch/x86/kernel/head_64.S), and it is mapped here: + * Kernel image size is limited to 1GiB due to the fixmap living in the + * next 1GiB (see level2_kernel_pgt in arch/x86/kernel/head_64.S). Use + * 512MiB by default, leaving 1.5GiB for modules once the page tables + * are fully set up. If kernel ASLR is configured, it can extend the + * kernel page table mapping, reducing the size of the modules area. */ -#define KERNEL_IMAGE_SIZE (512 * 1024 * 1024) +#define KERNEL_IMAGE_SIZE_DEFAULT (512 * 1024 * 1024) +#if defined(CONFIG_RANDOMIZE_BASE) && \ + CONFIG_RANDOMIZE_BASE_MAX_OFFSET > KERNEL_IMAGE_SIZE_DEFAULT +#define KERNEL_IMAGE_SIZE CONFIG_RANDOMIZE_BASE_MAX_OFFSET +#else +#define KERNEL_IMAGE_SIZE KERNEL_IMAGE_SIZE_DEFAULT +#endif #endif /* _ASM_X86_PAGE_64_DEFS_H */ diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 2d88344..c883bf7 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -58,7 +58,7 @@ typedef struct { pteval_t pte; } pte_t; #define VMALLOC_START _AC(0xffffc90000000000, UL) #define VMALLOC_END _AC(0xffffe8ffffffffff, UL) #define VMEMMAP_START _AC(0xffffea0000000000, UL) -#define MODULES_VADDR _AC(0xffffffffa0000000, UL) +#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE) #define MODULES_END _AC(0xffffffffff000000, UL) #define MODULES_LEN (MODULES_END - MODULES_VADDR) diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 4287f1f..5bdc543 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -806,6 +806,9 @@ void __init mem_init(void) BUILD_BUG_ON(VMALLOC_START >= VMALLOC_END); #undef high_memory #undef __FIXADDR_TOP +#ifdef CONFIG_RANDOMIZE_BASE + BUILD_BUG_ON(CONFIG_RANDOMIZE_BASE_MAX_OFFSET > KERNEL_IMAGE_SIZE); +#endif #ifdef CONFIG_HIGHMEM BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START); -- 1.7.9.5
Commit-ID: dd78b97367bd575918204cc89107c1479d3fc1a7 Gitweb: http://git.kernel.org/tip/dd78b97367bd575918204cc89107c1479d3fc1a7 Author: Kees Cook <keescook@chromium.org> AuthorDate: Thu, 10 Oct 2013 17:18:13 -0700 Committer: H. Peter Anvin <hpa@linux.intel.com> CommitDate: Sun, 13 Oct 2013 03:12:02 -0700 x86, boot: Move CPU flags out of cpucheck Refactor the CPU flags handling out of the cpucheck routines so that they can be reused by the future ASLR routines (in order to detect CPU features like RDRAND and RDTSC). This reworks has_eflag() and has_fpu() to be used on both 32-bit and 64-bit, and refactors the calls to cpuid to make them PIC-safe on 32-bit. Signed-off-by: Kees Cook <keescook@chromium.org> Link: http://lkml.kernel.org/r/1381450698-28710-2-git-send-email-keescook@chromium.org Signed-off-by: H. Peter Anvin <hpa@linux.intel.com> --- arch/x86/boot/Makefile | 2 +- arch/x86/boot/boot.h | 10 +--- arch/x86/boot/compressed/Makefile | 2 +- arch/x86/boot/compressed/cpuflags.c | 12 +++++ arch/x86/boot/cpucheck.c | 86 ----------------------------- arch/x86/boot/cpuflags.c | 104 ++++++++++++++++++++++++++++++++++++ arch/x86/boot/cpuflags.h | 19 +++++++ 7 files changed, 138 insertions(+), 97 deletions(-) diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index 379814b..0da2e37 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -20,7 +20,7 @@ targets := vmlinux.bin setup.bin setup.elf bzImage targets += fdimage fdimage144 fdimage288 image.iso mtools.conf subdir- := compressed -setup-y += a20.o bioscall.o cmdline.o copy.o cpu.o cpucheck.o +setup-y += a20.o bioscall.o cmdline.o copy.o cpu.o cpuflags.o cpucheck.o setup-y += early_serial_console.o edd.o header.o main.o mca.o memory.o setup-y += pm.o pmjump.o printf.o regs.o string.o tty.o video.o setup-y += video-mode.o version.o diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h index ef72bae..50f8c5e 100644 --- a/arch/x86/boot/boot.h +++ b/arch/x86/boot/boot.h @@ -26,9 +26,8 @@ #include <asm/boot.h> #include <asm/setup.h> #include "bitops.h" -#include <asm/cpufeature.h> -#include <asm/processor-flags.h> #include "ctype.h" +#include "cpuflags.h" /* Useful macros */ #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) @@ -307,14 +306,7 @@ static inline int cmdline_find_option_bool(const char *option) return __cmdline_find_option_bool(cmd_line_ptr, option); } - /* cpu.c, cpucheck.c */ -struct cpu_features { - int level; /* Family, or 64 for x86-64 */ - int model; - u32 flags[NCAPINTS]; -}; -extern struct cpu_features cpu; int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr); int validate_cpu(void); diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index dcd90df..3312f1b 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -27,7 +27,7 @@ HOST_EXTRACFLAGS += -I$(srctree)/tools/include VMLINUX_OBJS = $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \ $(obj)/string.o $(obj)/cmdline.o $(obj)/early_serial_console.o \ - $(obj)/piggy.o + $(obj)/piggy.o $(obj)/cpuflags.o $(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone diff --git a/arch/x86/boot/compressed/cpuflags.c b/arch/x86/boot/compressed/cpuflags.c new file mode 100644 index 0000000..931cba6 --- /dev/null +++ b/arch/x86/boot/compressed/cpuflags.c @@ -0,0 +1,12 @@ +#ifdef CONFIG_RANDOMIZE_BASE + +#include "../cpuflags.c" + +bool has_cpuflag(int flag) +{ + get_flags(); + + return test_bit(flag, cpu.flags); +} + +#endif diff --git a/arch/x86/boot/cpucheck.c b/arch/x86/boot/cpucheck.c index 4d3ff03..e1f3c16 100644 --- a/arch/x86/boot/cpucheck.c +++ b/arch/x86/boot/cpucheck.c @@ -28,8 +28,6 @@ #include <asm/required-features.h> #include <asm/msr-index.h> -struct cpu_features cpu; -static u32 cpu_vendor[3]; static u32 err_flags[NCAPINTS]; static const int req_level = CONFIG_X86_MINIMUM_CPU_FAMILY; @@ -69,90 +67,6 @@ static int is_transmeta(void) cpu_vendor[2] == A32('M', 'x', '8', '6'); } -static int has_fpu(void) -{ - u16 fcw = -1, fsw = -1; - u32 cr0; - - asm("movl %%cr0,%0" : "=r" (cr0)); - if (cr0 & (X86_CR0_EM|X86_CR0_TS)) { - cr0 &= ~(X86_CR0_EM|X86_CR0_TS); - asm volatile("movl %0,%%cr0" : : "r" (cr0)); - } - - asm volatile("fninit ; fnstsw %0 ; fnstcw %1" - : "+m" (fsw), "+m" (fcw)); - - return fsw == 0 && (fcw & 0x103f) == 0x003f; -} - -static int has_eflag(u32 mask) -{ - u32 f0, f1; - - asm("pushfl ; " - "pushfl ; " - "popl %0 ; " - "movl %0,%1 ; " - "xorl %2,%1 ; " - "pushl %1 ; " - "popfl ; " - "pushfl ; " - "popl %1 ; " - "popfl" - : "=&r" (f0), "=&r" (f1) - : "ri" (mask)); - - return !!((f0^f1) & mask); -} - -static void get_flags(void) -{ - u32 max_intel_level, max_amd_level; - u32 tfms; - - if (has_fpu()) - set_bit(X86_FEATURE_FPU, cpu.flags); - - if (has_eflag(X86_EFLAGS_ID)) { - asm("cpuid" - : "=a" (max_intel_level), - "=b" (cpu_vendor[0]), - "=d" (cpu_vendor[1]), - "=c" (cpu_vendor[2]) - : "a" (0)); - - if (max_intel_level >= 0x00000001 && - max_intel_level <= 0x0000ffff) { - asm("cpuid" - : "=a" (tfms), - "=c" (cpu.flags[4]), - "=d" (cpu.flags[0]) - : "a" (0x00000001) - : "ebx"); - cpu.level = (tfms >> 8) & 15; - cpu.model = (tfms >> 4) & 15; - if (cpu.level >= 6) - cpu.model += ((tfms >> 16) & 0xf) << 4; - } - - asm("cpuid" - : "=a" (max_amd_level) - : "a" (0x80000000) - : "ebx", "ecx", "edx"); - - if (max_amd_level >= 0x80000001 && - max_amd_level <= 0x8000ffff) { - u32 eax = 0x80000001; - asm("cpuid" - : "+a" (eax), - "=c" (cpu.flags[6]), - "=d" (cpu.flags[1]) - : : "ebx"); - } - } -} - /* Returns a bitmask of which words we have error bits in */ static int check_flags(void) { diff --git a/arch/x86/boot/cpuflags.c b/arch/x86/boot/cpuflags.c new file mode 100644 index 0000000..b02544a --- /dev/null +++ b/arch/x86/boot/cpuflags.c @@ -0,0 +1,104 @@ +#include <linux/types.h> +#include "bitops.h" + +#include <asm/processor-flags.h> +#include <asm/required-features.h> +#include <asm/msr-index.h> +#include "cpuflags.h" + +struct cpu_features cpu; +u32 cpu_vendor[3]; + +static bool loaded_flags; + +static int has_fpu(void) +{ + u16 fcw = -1, fsw = -1; + unsigned long cr0; + + asm volatile("mov %%cr0,%0" : "=r" (cr0)); + if (cr0 & (X86_CR0_EM|X86_CR0_TS)) { + cr0 &= ~(X86_CR0_EM|X86_CR0_TS); + asm volatile("mov %0,%%cr0" : : "r" (cr0)); + } + + asm volatile("fninit ; fnstsw %0 ; fnstcw %1" + : "+m" (fsw), "+m" (fcw)); + + return fsw == 0 && (fcw & 0x103f) == 0x003f; +} + +int has_eflag(unsigned long mask) +{ + unsigned long f0, f1; + + asm volatile("pushf \n\t" + "pushf \n\t" + "pop %0 \n\t" + "mov %0,%1 \n\t" + "xor %2,%1 \n\t" + "push %1 \n\t" + "popf \n\t" + "pushf \n\t" + "pop %1 \n\t" + "popf" + : "=&r" (f0), "=&r" (f1) + : "ri" (mask)); + + return !!((f0^f1) & mask); +} + +/* Handle x86_32 PIC using ebx. */ +#if defined(__i386__) && defined(__PIC__) +# define EBX_REG "=r" +#else +# define EBX_REG "=b" +#endif + +static inline void cpuid(u32 id, u32 *a, u32 *b, u32 *c, u32 *d) +{ + asm volatile(".ifnc %%ebx,%3 ; movl %%ebx,%3 ; .endif \n\t" + "cpuid \n\t" + ".ifnc %%ebx,%3 ; xchgl %%ebx,%3 ; .endif \n\t" + : "=a" (*a), "=c" (*c), "=d" (*d), EBX_REG (*b) + : "a" (id) + ); +} + +void get_flags(void) +{ + u32 max_intel_level, max_amd_level; + u32 tfms; + u32 ignored; + + if (loaded_flags) + return; + loaded_flags = true; + + if (has_fpu()) + set_bit(X86_FEATURE_FPU, cpu.flags); + + if (has_eflag(X86_EFLAGS_ID)) { + cpuid(0x0, &max_intel_level, &cpu_vendor[0], &cpu_vendor[2], + &cpu_vendor[1]); + + if (max_intel_level >= 0x00000001 && + max_intel_level <= 0x0000ffff) { + cpuid(0x1, &tfms, &ignored, &cpu.flags[4], + &cpu.flags[0]); + cpu.level = (tfms >> 8) & 15; + cpu.model = (tfms >> 4) & 15; + if (cpu.level >= 6) + cpu.model += ((tfms >> 16) & 0xf) << 4; + } + + cpuid(0x80000000, &max_amd_level, &ignored, &ignored, + &ignored); + + if (max_amd_level >= 0x80000001 && + max_amd_level <= 0x8000ffff) { + cpuid(0x80000001, &ignored, &ignored, &cpu.flags[6], + &cpu.flags[1]); + } + } +} diff --git a/arch/x86/boot/cpuflags.h b/arch/x86/boot/cpuflags.h new file mode 100644 index 0000000..9bb4e25 --- /dev/null +++ b/arch/x86/boot/cpuflags.h @@ -0,0 +1,19 @@ +#ifndef BOOT_CPUFLAGS_H +#define BOOT_CPUFLAGS_H + +#include <asm/cpufeature.h> +#include <asm/processor-flags.h> + +struct cpu_features { + int level; /* Family, or 64 for x86-64 */ + int model; + u32 flags[NCAPINTS]; +}; + +extern struct cpu_features cpu; +extern u32 cpu_vendor[3]; + +int has_eflag(unsigned long mask); +void get_flags(void); + +#endif
Commit-ID: 8ab3820fd5b2896d66da7bb2a906bc382e63e7bc Gitweb: http://git.kernel.org/tip/8ab3820fd5b2896d66da7bb2a906bc382e63e7bc Author: Kees Cook <keescook@chromium.org> AuthorDate: Thu, 10 Oct 2013 17:18:14 -0700 Committer: H. Peter Anvin <hpa@linux.intel.com> CommitDate: Sun, 13 Oct 2013 03:12:07 -0700 x86, kaslr: Return location from decompress_kernel This allows decompress_kernel to return a new location for the kernel to be relocated to. Additionally, enforces CONFIG_PHYSICAL_START as the minimum relocation position when building with CONFIG_RELOCATABLE. With CONFIG_RANDOMIZE_BASE set, the choose_kernel_location routine will select a new location to decompress the kernel, though here it is presently a no-op. The kernel command line option "nokaslr" is introduced to bypass these routines. Signed-off-by: Kees Cook <keescook@chromium.org> Link: http://lkml.kernel.org/r/1381450698-28710-3-git-send-email-keescook@chromium.org Signed-off-by: H. Peter Anvin <hpa@linux.intel.com> --- Documentation/kernel-parameters.txt | 4 ++++ arch/x86/Kconfig | 38 +++++++++++++++++++++++++++++++++---- arch/x86/boot/compressed/Makefile | 2 +- arch/x86/boot/compressed/aslr.c | 23 ++++++++++++++++++++++ arch/x86/boot/compressed/cmdline.c | 2 +- arch/x86/boot/compressed/head_32.S | 10 ++++++---- arch/x86/boot/compressed/head_64.S | 16 ++++++++++------ arch/x86/boot/compressed/misc.c | 8 ++++++-- arch/x86/boot/compressed/misc.h | 27 ++++++++++++++++++++------ 9 files changed, 106 insertions(+), 24 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index fcbb736..773fc4c 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1975,6 +1975,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. noapic [SMP,APIC] Tells the kernel to not make use of any IOAPICs that may be present in the system. + nokaslr [X86] + Disable kernel base offset ASLR (Address Space + Layout Randomization) if built into the kernel. + noautogroup Disable scheduler automatic task group creation. nobats [PPC] Do not use BATs for mapping kernel lowmem diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index ee2fb9d..992701d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1722,16 +1722,46 @@ config RELOCATABLE Note: If CONFIG_RELOCATABLE=y, then the kernel runs from the address it has been loaded at and the compile time physical address - (CONFIG_PHYSICAL_START) is ignored. + (CONFIG_PHYSICAL_START) is used as the minimum location. -# Relocation on x86-32 needs some additional build support +config RANDOMIZE_BASE + bool "Randomize the address of the kernel image" + depends on RELOCATABLE + depends on !HIBERNATION + default n + ---help--- + Randomizes the physical and virtual address at which the + kernel image is decompressed, as a security feature that + deters exploit attempts relying on knowledge of the location + of kernel internals. + + Entropy is generated using the RDRAND instruction if it + is supported. If not, then RDTSC is used, if supported. If + neither RDRAND nor RDTSC are supported, then no randomness + is introduced. + + The kernel will be offset by up to RANDOMIZE_BASE_MAX_OFFSET, + and aligned according to PHYSICAL_ALIGN. + +config RANDOMIZE_BASE_MAX_OFFSET + hex "Maximum ASLR offset allowed" + depends on RANDOMIZE_BASE + default "0x10000000" + range 0x0 0x10000000 + ---help--- + Determines the maximal offset in bytes that will be applied to the + kernel when Address Space Layout Randomization (ASLR) is active. + Must be less than or equal to the actual physical memory on the + system. This must be a power of two. + +# Relocation on x86 needs some additional build support config X86_NEED_RELOCS def_bool y - depends on X86_32 && RELOCATABLE + depends on RANDOMIZE_BASE || (X86_32 && RELOCATABLE) config PHYSICAL_ALIGN hex "Alignment value to which kernel should be aligned" - default "0x1000000" + default "0x200000" range 0x2000 0x1000000 if X86_32 range 0x200000 0x1000000 if X86_64 ---help--- diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 3312f1b..ae8b5db 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -27,7 +27,7 @@ HOST_EXTRACFLAGS += -I$(srctree)/tools/include VMLINUX_OBJS = $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \ $(obj)/string.o $(obj)/cmdline.o $(obj)/early_serial_console.o \ - $(obj)/piggy.o $(obj)/cpuflags.o + $(obj)/piggy.o $(obj)/cpuflags.o $(obj)/aslr.o $(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c new file mode 100644 index 0000000..b73cc66 --- /dev/null +++ b/arch/x86/boot/compressed/aslr.c @@ -0,0 +1,23 @@ +#include "misc.h" + +#ifdef CONFIG_RANDOMIZE_BASE + +unsigned char *choose_kernel_location(unsigned char *input, + unsigned long input_size, + unsigned char *output, + unsigned long output_size) +{ + unsigned long choice = (unsigned long)output; + + if (cmdline_find_option_bool("nokaslr")) { + debug_putstr("KASLR disabled...\n"); + goto out; + } + + /* XXX: choose random location. */ + +out: + return (unsigned char *)choice; +} + +#endif /* CONFIG_RANDOMIZE_BASE */ diff --git a/arch/x86/boot/compressed/cmdline.c b/arch/x86/boot/compressed/cmdline.c index bffd73b..b68e303 100644 --- a/arch/x86/boot/compressed/cmdline.c +++ b/arch/x86/boot/compressed/cmdline.c @@ -1,6 +1,6 @@ #include "misc.h" -#ifdef CONFIG_EARLY_PRINTK +#if CONFIG_EARLY_PRINTK || CONFIG_RANDOMIZE_BASE static unsigned long fs; static inline void set_fs(unsigned long seg) diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 5d6f689..9116aac 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -117,9 +117,11 @@ preferred_addr: addl %eax, %ebx notl %eax andl %eax, %ebx -#else - movl $LOAD_PHYSICAL_ADDR, %ebx + cmpl $LOAD_PHYSICAL_ADDR, %ebx + jge 1f #endif + movl $LOAD_PHYSICAL_ADDR, %ebx +1: /* Target address to relocate to for decompression */ addl $z_extract_offset, %ebx @@ -191,14 +193,14 @@ relocated: leal boot_heap(%ebx), %eax pushl %eax /* heap area */ pushl %esi /* real mode pointer */ - call decompress_kernel + call decompress_kernel /* returns kernel location in %eax */ addl $24, %esp /* * Jump to the decompressed kernel. */ xorl %ebx, %ebx - jmp *%ebp + jmp *%eax /* * Stack and heap for uncompression diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index c337422..c5c1ae0 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -94,9 +94,11 @@ ENTRY(startup_32) addl %eax, %ebx notl %eax andl %eax, %ebx -#else - movl $LOAD_PHYSICAL_ADDR, %ebx + cmpl $LOAD_PHYSICAL_ADDR, %ebx + jge 1f #endif + movl $LOAD_PHYSICAL_ADDR, %ebx +1: /* Target address to relocate to for decompression */ addl $z_extract_offset, %ebx @@ -269,9 +271,11 @@ preferred_addr: addq %rax, %rbp notq %rax andq %rax, %rbp -#else - movq $LOAD_PHYSICAL_ADDR, %rbp + cmpq $LOAD_PHYSICAL_ADDR, %rbp + jge 1f #endif + movq $LOAD_PHYSICAL_ADDR, %rbp +1: /* Target address to relocate to for decompression */ leaq z_extract_offset(%rbp), %rbx @@ -339,13 +343,13 @@ relocated: movl $z_input_len, %ecx /* input_len */ movq %rbp, %r8 /* output target address */ movq $z_output_len, %r9 /* decompressed length */ - call decompress_kernel + call decompress_kernel /* returns kernel location in %rax */ popq %rsi /* * Jump to the decompressed kernel. */ - jmp *%rbp + jmp *%rax .code32 no_longmode: diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 434f077..7138768 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -395,7 +395,7 @@ static void parse_elf(void *output) free(phdrs); } -asmlinkage void decompress_kernel(void *rmode, memptr heap, +asmlinkage void *decompress_kernel(void *rmode, memptr heap, unsigned char *input_data, unsigned long input_len, unsigned char *output, @@ -422,6 +422,10 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap, free_mem_ptr = heap; /* Heap */ free_mem_end_ptr = heap + BOOT_HEAP_SIZE; + output = choose_kernel_location(input_data, input_len, + output, output_len); + + /* Validate memory location choices. */ if ((unsigned long)output & (MIN_KERNEL_ALIGN - 1)) error("Destination address inappropriately aligned"); #ifdef CONFIG_X86_64 @@ -441,5 +445,5 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap, parse_elf(output); handle_relocations(output, output_len); debug_putstr("done.\nBooting the kernel.\n"); - return; + return output; } diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 674019d..9077af7 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -39,23 +39,38 @@ static inline void debug_putstr(const char *s) #endif -#ifdef CONFIG_EARLY_PRINTK - +#if CONFIG_EARLY_PRINTK || CONFIG_RANDOMIZE_BASE /* cmdline.c */ int cmdline_find_option(const char *option, char *buffer, int bufsize); int cmdline_find_option_bool(const char *option); +#endif -/* early_serial_console.c */ -extern int early_serial_base; -void console_init(void); +#if CONFIG_RANDOMIZE_BASE +/* aslr.c */ +unsigned char *choose_kernel_location(unsigned char *input, + unsigned long input_size, + unsigned char *output, + unsigned long output_size); #else +static inline +unsigned char *choose_kernel_location(unsigned char *input, + unsigned long input_size, + unsigned char *output, + unsigned long output_size) +{ + return output; +} +#endif +#ifdef CONFIG_EARLY_PRINTK /* early_serial_console.c */ +extern int early_serial_base; +void console_init(void); +#else static const int early_serial_base; static inline void console_init(void) { } - #endif #endif
Commit-ID: 5bfce5ef55cbe78ee2ee6e97f2e26a8a582008f3 Gitweb: http://git.kernel.org/tip/5bfce5ef55cbe78ee2ee6e97f2e26a8a582008f3 Author: Kees Cook <keescook@chromium.org> AuthorDate: Thu, 10 Oct 2013 17:18:15 -0700 Committer: H. Peter Anvin <hpa@linux.intel.com> CommitDate: Sun, 13 Oct 2013 03:12:12 -0700 x86, kaslr: Provide randomness functions Adds potential sources of randomness: RDRAND, RDTSC, or the i8254. This moves the pre-alternatives inline rdrand function into the header so both pieces of code can use it. Availability of RDRAND is then controlled by CONFIG_ARCH_RANDOM, if someone wants to disable it even for kASLR. Signed-off-by: Kees Cook <keescook@chromium.org> Link: http://lkml.kernel.org/r/1381450698-28710-4-git-send-email-keescook@chromium.org Signed-off-by: H. Peter Anvin <hpa@linux.intel.com> --- arch/x86/boot/compressed/aslr.c | 53 +++++++++++++++++++++++++++++++++++++++ arch/x86/boot/compressed/misc.h | 2 ++ arch/x86/include/asm/archrandom.h | 21 ++++++++++++++++ arch/x86/kernel/cpu/rdrand.c | 14 ----------- 4 files changed, 76 insertions(+), 14 deletions(-) diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c index b73cc66..14b24e0 100644 --- a/arch/x86/boot/compressed/aslr.c +++ b/arch/x86/boot/compressed/aslr.c @@ -1,6 +1,59 @@ #include "misc.h" #ifdef CONFIG_RANDOMIZE_BASE +#include <asm/msr.h> +#include <asm/archrandom.h> + +#define I8254_PORT_CONTROL 0x43 +#define I8254_PORT_COUNTER0 0x40 +#define I8254_CMD_READBACK 0xC0 +#define I8254_SELECT_COUNTER0 0x02 +#define I8254_STATUS_NOTREADY 0x40 +static inline u16 i8254(void) +{ + u16 status, timer; + + do { + outb(I8254_PORT_CONTROL, + I8254_CMD_READBACK | I8254_SELECT_COUNTER0); + status = inb(I8254_PORT_COUNTER0); + timer = inb(I8254_PORT_COUNTER0); + timer |= inb(I8254_PORT_COUNTER0) << 8; + } while (status & I8254_STATUS_NOTREADY); + + return timer; +} + +static unsigned long get_random_long(void) +{ + unsigned long random; + + if (has_cpuflag(X86_FEATURE_RDRAND)) { + debug_putstr("KASLR using RDRAND...\n"); + if (rdrand_long(&random)) + return random; + } + + if (has_cpuflag(X86_FEATURE_TSC)) { + uint32_t raw; + + debug_putstr("KASLR using RDTSC...\n"); + rdtscl(raw); + + /* Only use the low bits of rdtsc. */ + random = raw & 0xffff; + } else { + debug_putstr("KASLR using i8254...\n"); + random = i8254(); + } + + /* Extend timer bits poorly... */ + random |= (random << 16); +#ifdef CONFIG_X86_64 + random |= (random << 32); +#endif + return random; +} unsigned char *choose_kernel_location(unsigned char *input, unsigned long input_size, diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 9077af7..0782eb0 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -52,6 +52,8 @@ unsigned char *choose_kernel_location(unsigned char *input, unsigned long input_size, unsigned char *output, unsigned long output_size); +/* cpuflags.c */ +bool has_cpuflag(int flag); #else static inline unsigned char *choose_kernel_location(unsigned char *input, diff --git a/arch/x86/include/asm/archrandom.h b/arch/x86/include/asm/archrandom.h index 0d9ec77..e6a9245 100644 --- a/arch/x86/include/asm/archrandom.h +++ b/arch/x86/include/asm/archrandom.h @@ -39,6 +39,20 @@ #ifdef CONFIG_ARCH_RANDOM +/* Instead of arch_get_random_long() when alternatives haven't run. */ +static inline int rdrand_long(unsigned long *v) +{ + int ok; + asm volatile("1: " RDRAND_LONG "\n\t" + "jc 2f\n\t" + "decl %0\n\t" + "jnz 1b\n\t" + "2:" + : "=r" (ok), "=a" (*v) + : "0" (RDRAND_RETRY_LOOPS)); + return ok; +} + #define GET_RANDOM(name, type, rdrand, nop) \ static inline int name(type *v) \ { \ @@ -68,6 +82,13 @@ GET_RANDOM(arch_get_random_int, unsigned int, RDRAND_INT, ASM_NOP3); #endif /* CONFIG_X86_64 */ +#else + +static inline int rdrand_long(unsigned long *v) +{ + return 0; +} + #endif /* CONFIG_ARCH_RANDOM */ extern void x86_init_rdrand(struct cpuinfo_x86 *c); diff --git a/arch/x86/kernel/cpu/rdrand.c b/arch/x86/kernel/cpu/rdrand.c index 88db010..384df51 100644 --- a/arch/x86/kernel/cpu/rdrand.c +++ b/arch/x86/kernel/cpu/rdrand.c @@ -31,20 +31,6 @@ static int __init x86_rdrand_setup(char *s) } __setup("nordrand", x86_rdrand_setup); -/* We can't use arch_get_random_long() here since alternatives haven't run */ -static inline int rdrand_long(unsigned long *v) -{ - int ok; - asm volatile("1: " RDRAND_LONG "\n\t" - "jc 2f\n\t" - "decl %0\n\t" - "jnz 1b\n\t" - "2:" - : "=r" (ok), "=a" (*v) - : "0" (RDRAND_RETRY_LOOPS)); - return ok; -} - /* * Force a reseed cycle; we are architecturally guaranteed a reseed * after no more than 512 128-bit chunks of random data. This also
Commit-ID: 82fa9637a2ba285bcc7c5050c73010b2c1b3d803 Gitweb: http://git.kernel.org/tip/82fa9637a2ba285bcc7c5050c73010b2c1b3d803 Author: Kees Cook <keescook@chromium.org> AuthorDate: Thu, 10 Oct 2013 17:18:16 -0700 Committer: H. Peter Anvin <hpa@linux.intel.com> CommitDate: Sun, 13 Oct 2013 03:12:19 -0700 x86, kaslr: Select random position from e820 maps Counts available alignment positions across all e820 maps, and chooses one randomly for the new kernel base address, making sure not to collide with unsafe memory areas. Signed-off-by: Kees Cook <keescook@chromium.org> Link: http://lkml.kernel.org/r/1381450698-28710-5-git-send-email-keescook@chromium.org Signed-off-by: H. Peter Anvin <hpa@linux.intel.com> --- arch/x86/boot/compressed/aslr.c | 193 +++++++++++++++++++++++++++++++++++++++- arch/x86/boot/compressed/misc.c | 10 +-- arch/x86/boot/compressed/misc.h | 8 ++ 3 files changed, 202 insertions(+), 9 deletions(-) diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c index 14b24e0..0595798 100644 --- a/arch/x86/boot/compressed/aslr.c +++ b/arch/x86/boot/compressed/aslr.c @@ -3,6 +3,7 @@ #ifdef CONFIG_RANDOMIZE_BASE #include <asm/msr.h> #include <asm/archrandom.h> +#include <asm/e820.h> #define I8254_PORT_CONTROL 0x43 #define I8254_PORT_COUNTER0 0x40 @@ -55,20 +56,210 @@ static unsigned long get_random_long(void) return random; } +struct mem_vector { + unsigned long start; + unsigned long size; +}; + +#define MEM_AVOID_MAX 5 +struct mem_vector mem_avoid[MEM_AVOID_MAX]; + +static bool mem_contains(struct mem_vector *region, struct mem_vector *item) +{ + /* Item at least partially before region. */ + if (item->start < region->start) + return false; + /* Item at least partially after region. */ + if (item->start + item->size > region->start + region->size) + return false; + return true; +} + +static bool mem_overlaps(struct mem_vector *one, struct mem_vector *two) +{ + /* Item one is entirely before item two. */ + if (one->start + one->size <= two->start) + return false; + /* Item one is entirely after item two. */ + if (one->start >= two->start + two->size) + return false; + return true; +} + +static void mem_avoid_init(unsigned long input, unsigned long input_size, + unsigned long output, unsigned long output_size) +{ + u64 initrd_start, initrd_size; + u64 cmd_line, cmd_line_size; + unsigned long unsafe, unsafe_len; + char *ptr; + + /* + * Avoid the region that is unsafe to overlap during + * decompression (see calculations at top of misc.c). + */ + unsafe_len = (output_size >> 12) + 32768 + 18; + unsafe = (unsigned long)input + input_size - unsafe_len; + mem_avoid[0].start = unsafe; + mem_avoid[0].size = unsafe_len; + + /* Avoid initrd. */ + initrd_start = (u64)real_mode->ext_ramdisk_image << 32; + initrd_start |= real_mode->hdr.ramdisk_image; + initrd_size = (u64)real_mode->ext_ramdisk_size << 32; + initrd_size |= real_mode->hdr.ramdisk_size; + mem_avoid[1].start = initrd_start; + mem_avoid[1].size = initrd_size; + + /* Avoid kernel command line. */ + cmd_line = (u64)real_mode->ext_cmd_line_ptr << 32; + cmd_line |= real_mode->hdr.cmd_line_ptr; + /* Calculate size of cmd_line. */ + ptr = (char *)(unsigned long)cmd_line; + for (cmd_line_size = 0; ptr[cmd_line_size++]; ) + ; + mem_avoid[2].start = cmd_line; + mem_avoid[2].size = cmd_line_size; + + /* Avoid heap memory. */ + mem_avoid[3].start = (unsigned long)free_mem_ptr; + mem_avoid[3].size = BOOT_HEAP_SIZE; + + /* Avoid stack memory. */ + mem_avoid[4].start = (unsigned long)free_mem_end_ptr; + mem_avoid[4].size = BOOT_STACK_SIZE; +} + +/* Does this memory vector overlap a known avoided area? */ +bool mem_avoid_overlap(struct mem_vector *img) +{ + int i; + + for (i = 0; i < MEM_AVOID_MAX; i++) { + if (mem_overlaps(img, &mem_avoid[i])) + return true; + } + + return false; +} + +unsigned long slots[CONFIG_RANDOMIZE_BASE_MAX_OFFSET / CONFIG_PHYSICAL_ALIGN]; +unsigned long slot_max = 0; + +static void slots_append(unsigned long addr) +{ + /* Overflowing the slots list should be impossible. */ + if (slot_max >= CONFIG_RANDOMIZE_BASE_MAX_OFFSET / + CONFIG_PHYSICAL_ALIGN) + return; + + slots[slot_max++] = addr; +} + +static unsigned long slots_fetch_random(void) +{ + /* Handle case of no slots stored. */ + if (slot_max == 0) + return 0; + + return slots[get_random_long() % slot_max]; +} + +static void process_e820_entry(struct e820entry *entry, + unsigned long minimum, + unsigned long image_size) +{ + struct mem_vector region, img; + + /* Skip non-RAM entries. */ + if (entry->type != E820_RAM) + return; + + /* Ignore entries entirely above our maximum. */ + if (entry->addr >= CONFIG_RANDOMIZE_BASE_MAX_OFFSET) + return; + + /* Ignore entries entirely below our minimum. */ + if (entry->addr + entry->size < minimum) + return; + + region.start = entry->addr; + region.size = entry->size; + + /* Potentially raise address to minimum location. */ + if (region.start < minimum) + region.start = minimum; + + /* Potentially raise address to meet alignment requirements. */ + region.start = ALIGN(region.start, CONFIG_PHYSICAL_ALIGN); + + /* Did we raise the address above the bounds of this e820 region? */ + if (region.start > entry->addr + entry->size) + return; + + /* Reduce size by any delta from the original address. */ + region.size -= region.start - entry->addr; + + /* Reduce maximum size to fit end of image within maximum limit. */ + if (region.start + region.size > CONFIG_RANDOMIZE_BASE_MAX_OFFSET) + region.size = CONFIG_RANDOMIZE_BASE_MAX_OFFSET - region.start; + + /* Walk each aligned slot and check for avoided areas. */ + for (img.start = region.start, img.size = image_size ; + mem_contains(®ion, &img) ; + img.start += CONFIG_PHYSICAL_ALIGN) { + if (mem_avoid_overlap(&img)) + continue; + slots_append(img.start); + } +} + +static unsigned long find_random_addr(unsigned long minimum, + unsigned long size) +{ + int i; + unsigned long addr; + + /* Make sure minimum is aligned. */ + minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN); + + /* Verify potential e820 positions, appending to slots list. */ + for (i = 0; i < real_mode->e820_entries; i++) { + process_e820_entry(&real_mode->e820_map[i], minimum, size); + } + + return slots_fetch_random(); +} + unsigned char *choose_kernel_location(unsigned char *input, unsigned long input_size, unsigned char *output, unsigned long output_size) { unsigned long choice = (unsigned long)output; + unsigned long random; if (cmdline_find_option_bool("nokaslr")) { debug_putstr("KASLR disabled...\n"); goto out; } - /* XXX: choose random location. */ + /* Record the various known unsafe memory ranges. */ + mem_avoid_init((unsigned long)input, input_size, + (unsigned long)output, output_size); + + /* Walk e820 and find a random address. */ + random = find_random_addr(choice, output_size); + if (!random) { + debug_putstr("KASLR could not find suitable E820 region...\n"); + goto out; + } + + /* Always enforce the minimum. */ + if (random < choice) + goto out; + choice = random; out: return (unsigned char *)choice; } diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 7138768..196eaf3 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -112,14 +112,8 @@ struct boot_params *real_mode; /* Pointer to real-mode data */ void *memset(void *s, int c, size_t n); void *memcpy(void *dest, const void *src, size_t n); -#ifdef CONFIG_X86_64 -#define memptr long -#else -#define memptr unsigned -#endif - -static memptr free_mem_ptr; -static memptr free_mem_end_ptr; +memptr free_mem_ptr; +memptr free_mem_end_ptr; static char *vidmem; static int vidport; diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 0782eb0..24e3e56 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -23,7 +23,15 @@ #define BOOT_BOOT_H #include "../ctype.h" +#ifdef CONFIG_X86_64 +#define memptr long +#else +#define memptr unsigned +#endif + /* misc.c */ +extern memptr free_mem_ptr; +extern memptr free_mem_end_ptr; extern struct boot_params *real_mode; /* Pointer to real-mode data */ void __putstr(const char *s); #define error_putstr(__x) __putstr(__x)
Commit-ID: f32360ef6608434a032dc7ad262d45e9693c27f3 Gitweb: http://git.kernel.org/tip/f32360ef6608434a032dc7ad262d45e9693c27f3 Author: Kees Cook <keescook@chromium.org> AuthorDate: Thu, 10 Oct 2013 17:18:17 -0700 Committer: H. Peter Anvin <hpa@linux.intel.com> CommitDate: Sun, 13 Oct 2013 03:12:24 -0700 x86, kaslr: Report kernel offset on panic When the system panics, include the kernel offset in the report to assist in debugging. Signed-off-by: Kees Cook <keescook@chromium.org> Link: http://lkml.kernel.org/r/1381450698-28710-6-git-send-email-keescook@chromium.org Signed-off-by: H. Peter Anvin <hpa@linux.intel.com> --- arch/x86/kernel/setup.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index f0de629..1708862 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -824,6 +824,20 @@ static void __init trim_low_memory_range(void) } /* + * Dump out kernel offset information on panic. + */ +static int +dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p) +{ + pr_emerg("Kernel Offset: 0x%lx from 0x%lx " + "(relocation range: 0x%lx-0x%lx)\n", + (unsigned long)&_text - __START_KERNEL, __START_KERNEL, + __START_KERNEL_map, MODULES_VADDR-1); + + return 0; +} + +/* * Determine if we were loaded by an EFI loader. If so, then we have also been * passed the efi memmap, systab, etc., so we should use these data structures * for initialization. Note, the efi init code path is determined by the @@ -1242,3 +1256,15 @@ void __init i386_reserve_resources(void) } #endif /* CONFIG_X86_32 */ + +static struct notifier_block kernel_offset_notifier = { + .notifier_call = dump_kernel_offset +}; + +static int __init register_kernel_offset_dumper(void) +{ + atomic_notifier_chain_register(&panic_notifier_list, + &kernel_offset_notifier); + return 0; +} +__initcall(register_kernel_offset_dumper);
Commit-ID: 6145cfe394a7f138f6b64491c5663f97dba12450 Gitweb: http://git.kernel.org/tip/6145cfe394a7f138f6b64491c5663f97dba12450 Author: Kees Cook <keescook@chromium.org> AuthorDate: Thu, 10 Oct 2013 17:18:18 -0700 Committer: H. Peter Anvin <hpa@linux.intel.com> CommitDate: Sun, 13 Oct 2013 03:13:13 -0700 x86, kaslr: Raise the maximum virtual address to -1 GiB on x86_64 On 64-bit, this raises the maximum location to -1 GiB (from -1.5 GiB), the upper limit currently, since the kernel fixmap page mappings need to be moved to use the other 1 GiB (which would be the theoretical limit when building with -mcmodel=kernel). Signed-off-by: Kees Cook <keescook@chromium.org> Link: http://lkml.kernel.org/r/1381450698-28710-7-git-send-email-keescook@chromium.org Signed-off-by: H. Peter Anvin <hpa@linux.intel.com> --- arch/x86/Kconfig | 16 +++++++++++++--- arch/x86/include/asm/page_64_types.h | 15 ++++++++++++--- arch/x86/include/asm/pgtable_64_types.h | 2 +- arch/x86/mm/init_32.c | 3 +++ 4 files changed, 29 insertions(+), 7 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 992701d..51f4399 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1746,13 +1746,23 @@ config RANDOMIZE_BASE config RANDOMIZE_BASE_MAX_OFFSET hex "Maximum ASLR offset allowed" depends on RANDOMIZE_BASE - default "0x10000000" - range 0x0 0x10000000 + range 0x0 0x20000000 if X86_32 + default "0x20000000" if X86_32 + range 0x0 0x40000000 if X86_64 + default "0x40000000" if X86_64 ---help--- Determines the maximal offset in bytes that will be applied to the kernel when Address Space Layout Randomization (ASLR) is active. Must be less than or equal to the actual physical memory on the - system. This must be a power of two. + system. This must be a multiple of CONFIG_PHYSICAL_ALIGN. + + On 32-bit this is limited to 512MiB. + + On 64-bit this is limited by how the kernel fixmap page table is + positioned, so this cannot be larger that 1GiB currently. Normally + there is a 512MiB to 1.5GiB split between kernel and modules. When + this is raised above the 512MiB default, the modules area will + shrink to compensate, up to the current maximum 1GiB to 1GiB split. # Relocation on x86 needs some additional build support config X86_NEED_RELOCS diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index 43dcd80..8de6d9c 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -39,9 +39,18 @@ #define __VIRTUAL_MASK_SHIFT 47 /* - * Kernel image size is limited to 512 MB (see level2_kernel_pgt in - * arch/x86/kernel/head_64.S), and it is mapped here: + * Kernel image size is limited to 1GiB due to the fixmap living in the + * next 1GiB (see level2_kernel_pgt in arch/x86/kernel/head_64.S). Use + * 512MiB by default, leaving 1.5GiB for modules once the page tables + * are fully set up. If kernel ASLR is configured, it can extend the + * kernel page table mapping, reducing the size of the modules area. */ -#define KERNEL_IMAGE_SIZE (512 * 1024 * 1024) +#define KERNEL_IMAGE_SIZE_DEFAULT (512 * 1024 * 1024) +#if defined(CONFIG_RANDOMIZE_BASE) && \ + CONFIG_RANDOMIZE_BASE_MAX_OFFSET > KERNEL_IMAGE_SIZE_DEFAULT +#define KERNEL_IMAGE_SIZE CONFIG_RANDOMIZE_BASE_MAX_OFFSET +#else +#define KERNEL_IMAGE_SIZE KERNEL_IMAGE_SIZE_DEFAULT +#endif #endif /* _ASM_X86_PAGE_64_DEFS_H */ diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 2d88344..c883bf7 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -58,7 +58,7 @@ typedef struct { pteval_t pte; } pte_t; #define VMALLOC_START _AC(0xffffc90000000000, UL) #define VMALLOC_END _AC(0xffffe8ffffffffff, UL) #define VMEMMAP_START _AC(0xffffea0000000000, UL) -#define MODULES_VADDR _AC(0xffffffffa0000000, UL) +#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE) #define MODULES_END _AC(0xffffffffff000000, UL) #define MODULES_LEN (MODULES_END - MODULES_VADDR) diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 4287f1f..5bdc543 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -806,6 +806,9 @@ void __init mem_init(void) BUILD_BUG_ON(VMALLOC_START >= VMALLOC_END); #undef high_memory #undef __FIXADDR_TOP +#ifdef CONFIG_RANDOMIZE_BASE + BUILD_BUG_ON(CONFIG_RANDOMIZE_BASE_MAX_OFFSET > KERNEL_IMAGE_SIZE); +#endif #ifdef CONFIG_HIGHMEM BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START);
Commit-ID: 6e6a4932b0f569b1a5bb4fcbf5dde1b1a42f01bb Gitweb: http://git.kernel.org/tip/6e6a4932b0f569b1a5bb4fcbf5dde1b1a42f01bb Author: H. Peter Anvin <hpa@linux.intel.com> AuthorDate: Sun, 13 Oct 2013 04:08:56 -0700 Committer: H. Peter Anvin <hpa@linux.intel.com> CommitDate: Sun, 13 Oct 2013 04:08:56 -0700 x86, boot: Rename get_flags() and check_flags() to *_cpuflags() When a function is used in more than one file it may not be possible to immediately tell from context what the intended meaning is. As such, it is more important that the naming be self-evident. Thus, change get_flags() to get_cpuflags(). For consistency, change check_flags() to check_cpuflags() even though it is only used in cpucheck.c. Link: http://lkml.kernel.org/r/1381450698-28710-2-git-send-email-keescook@chromium.org Signed-off-by: H. Peter Anvin <hpa@linux.intel.com> --- arch/x86/boot/compressed/cpuflags.c | 2 +- arch/x86/boot/cpucheck.c | 14 +++++++------- arch/x86/boot/cpuflags.c | 2 +- arch/x86/boot/cpuflags.h | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/x86/boot/compressed/cpuflags.c b/arch/x86/boot/compressed/cpuflags.c index 931cba6..aa31346 100644 --- a/arch/x86/boot/compressed/cpuflags.c +++ b/arch/x86/boot/compressed/cpuflags.c @@ -4,7 +4,7 @@ bool has_cpuflag(int flag) { - get_flags(); + get_cpuflags(); return test_bit(flag, cpu.flags); } diff --git a/arch/x86/boot/cpucheck.c b/arch/x86/boot/cpucheck.c index e1f3c16..100a9a1 100644 --- a/arch/x86/boot/cpucheck.c +++ b/arch/x86/boot/cpucheck.c @@ -68,7 +68,7 @@ static int is_transmeta(void) } /* Returns a bitmask of which words we have error bits in */ -static int check_flags(void) +static int check_cpuflags(void) { u32 err; int i; @@ -101,8 +101,8 @@ int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr) if (has_eflag(X86_EFLAGS_AC)) cpu.level = 4; - get_flags(); - err = check_flags(); + get_cpuflags(); + err = check_cpuflags(); if (test_bit(X86_FEATURE_LM, cpu.flags)) cpu.level = 64; @@ -121,8 +121,8 @@ int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr) eax &= ~(1 << 15); asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx)); - get_flags(); /* Make sure it really did something */ - err = check_flags(); + get_cpuflags(); /* Make sure it really did something */ + err = check_cpuflags(); } else if (err == 0x01 && !(err_flags[0] & ~(1 << X86_FEATURE_CX8)) && is_centaur() && cpu.model >= 6) { @@ -137,7 +137,7 @@ int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr) asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx)); set_bit(X86_FEATURE_CX8, cpu.flags); - err = check_flags(); + err = check_cpuflags(); } else if (err == 0x01 && is_transmeta()) { /* Transmeta might have masked feature bits in word 0 */ @@ -152,7 +152,7 @@ int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr) : : "ecx", "ebx"); asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx)); - err = check_flags(); + err = check_cpuflags(); } if (err_flags_ptr) diff --git a/arch/x86/boot/cpuflags.c b/arch/x86/boot/cpuflags.c index b02544a..a9fcb7c 100644 --- a/arch/x86/boot/cpuflags.c +++ b/arch/x86/boot/cpuflags.c @@ -65,7 +65,7 @@ static inline void cpuid(u32 id, u32 *a, u32 *b, u32 *c, u32 *d) ); } -void get_flags(void) +void get_cpuflags(void) { u32 max_intel_level, max_amd_level; u32 tfms; diff --git a/arch/x86/boot/cpuflags.h b/arch/x86/boot/cpuflags.h index 9bb4e25..ea97697 100644 --- a/arch/x86/boot/cpuflags.h +++ b/arch/x86/boot/cpuflags.h @@ -14,6 +14,6 @@ extern struct cpu_features cpu; extern u32 cpu_vendor[3]; int has_eflag(unsigned long mask); -void get_flags(void); +void get_cpuflags(void); #endif
* tip-bot for Kees Cook <tipbot@zytor.com> wrote: > Commit-ID: 5bfce5ef55cbe78ee2ee6e97f2e26a8a582008f3 > Gitweb: http://git.kernel.org/tip/5bfce5ef55cbe78ee2ee6e97f2e26a8a582008f3 > Author: Kees Cook <keescook@chromium.org> > AuthorDate: Thu, 10 Oct 2013 17:18:15 -0700 > Committer: H. Peter Anvin <hpa@linux.intel.com> > CommitDate: Sun, 13 Oct 2013 03:12:12 -0700 > > x86, kaslr: Provide randomness functions > > Adds potential sources of randomness: RDRAND, RDTSC, or the i8254. > > This moves the pre-alternatives inline rdrand function into the header so > both pieces of code can use it. Availability of RDRAND is then controlled > by CONFIG_ARCH_RANDOM, if someone wants to disable it even for kASLR. While reviewing this as a pre-pull-request, I noticed the following detail: > +static unsigned long get_random_long(void) > +{ > + unsigned long random; > + > + if (has_cpuflag(X86_FEATURE_RDRAND)) { > + debug_putstr("KASLR using RDRAND...\n"); > + if (rdrand_long(&random)) > + return random; > + } > + > + if (has_cpuflag(X86_FEATURE_TSC)) { > + uint32_t raw; > + > + debug_putstr("KASLR using RDTSC...\n"); > + rdtscl(raw); > + > + /* Only use the low bits of rdtsc. */ > + random = raw & 0xffff; > + } else { > + debug_putstr("KASLR using i8254...\n"); > + random = i8254(); > + } > + > + /* Extend timer bits poorly... */ > + random |= (random << 16); > +#ifdef CONFIG_X86_64 > + random |= (random << 32); > +#endif > + return random; > +} Why aren't the 3 sources of entropy XOR-ed together? Also, we talked about also adding system dependent entropy sources, such as memory layout or the DMI table - none of that seems to have happened. It's not like this function should be performance critical, it's run once per bootup, right? There's just no excuse for not maximizing available entropy in such a situation ... Thanks, Ingo
* Ingo Molnar <mingo@kernel.org> wrote:
>
> * tip-bot for Kees Cook <tipbot@zytor.com> wrote:
>
> > Commit-ID: 5bfce5ef55cbe78ee2ee6e97f2e26a8a582008f3
> > Gitweb: http://git.kernel.org/tip/5bfce5ef55cbe78ee2ee6e97f2e26a8a582008f3
> > Author: Kees Cook <keescook@chromium.org>
> > AuthorDate: Thu, 10 Oct 2013 17:18:15 -0700
> > Committer: H. Peter Anvin <hpa@linux.intel.com>
> > CommitDate: Sun, 13 Oct 2013 03:12:12 -0700
> >
> > x86, kaslr: Provide randomness functions
> >
> > Adds potential sources of randomness: RDRAND, RDTSC, or the i8254.
> >
> > This moves the pre-alternatives inline rdrand function into the header so
> > both pieces of code can use it. Availability of RDRAND is then controlled
> > by CONFIG_ARCH_RANDOM, if someone wants to disable it even for kASLR.
>
> While reviewing this as a pre-pull-request, I noticed the following
> detail:
>
> > +static unsigned long get_random_long(void)
> > +{
> > + unsigned long random;
> > +
> > + if (has_cpuflag(X86_FEATURE_RDRAND)) {
> > + debug_putstr("KASLR using RDRAND...\n");
> > + if (rdrand_long(&random))
> > + return random;
> > + }
> > +
> > + if (has_cpuflag(X86_FEATURE_TSC)) {
> > + uint32_t raw;
> > +
> > + debug_putstr("KASLR using RDTSC...\n");
> > + rdtscl(raw);
> > +
> > + /* Only use the low bits of rdtsc. */
> > + random = raw & 0xffff;
> > + } else {
> > + debug_putstr("KASLR using i8254...\n");
> > + random = i8254();
> > + }
> > +
> > + /* Extend timer bits poorly... */
> > + random |= (random << 16);
> > +#ifdef CONFIG_X86_64
> > + random |= (random << 32);
> > +#endif
> > + return random;
> > +}
>
> Why aren't the 3 sources of entropy XOR-ed together?
>
> Also, we talked about also adding system dependent entropy sources, such
> as memory layout or the DMI table - none of that seems to have happened.
>
> It's not like this function should be performance critical, it's run once
> per bootup, right? There's just no excuse for not maximizing available
> entropy in such a situation ...
Another problem I noticed is that the RANDOMIZE_BASE Kconfig text does not
match the actual sources of entropy:
Entropy is generated using the RDRAND instruction if it
is supported. If not, then RDTSC is used, if supported. If
neither RDRAND nor RDTSC are supported, then no randomness
is introduced.
(i8254 is missing.)
Nor does the help text explain an important detail: what granularity does
the randomization have and roughly how many bits of the address are
randomized if people use the default values?
Thanks,
Ingo
On 11/11/2013 10:20 AM, Ingo Molnar wrote: > > Why aren't the 3 sources of entropy XOR-ed together? > Note we don't want to poke i8254 if we have any other sources, as the i8254 may not be present on newer systems and in some other cases not safe to touch. We can XOR in the TSC with RDRAND although it seems a bit odd. HOWEVER... + /* Only use the low bits of rdtsc. */ + random = raw & 0xffff; Why?! All that can do is throw away randomness... + /* Extend timer bits poorly... */ + random |= (random << 16); +#ifdef CONFIG_X86_64 + random |= (random << 32); +#endif For smearing out bits, a better way is usually to multiply with a large prime number (preferably a circular multiply.) > Also, we talked about also adding system dependent entropy sources, such > as memory layout or the DMI table - none of that seems to have happened. > > It's not like this function should be performance critical, it's run once > per bootup, right? There's just no excuse for not maximizing available > entropy in such a situation ... -hpa
On Mon, Nov 11, 2013 at 10:31 AM, Ingo Molnar <mingo@kernel.org> wrote: > > * Ingo Molnar <mingo@kernel.org> wrote: > >> >> * tip-bot for Kees Cook <tipbot@zytor.com> wrote: >> >> > Commit-ID: 5bfce5ef55cbe78ee2ee6e97f2e26a8a582008f3 >> > Gitweb: http://git.kernel.org/tip/5bfce5ef55cbe78ee2ee6e97f2e26a8a582008f3 >> > Author: Kees Cook <keescook@chromium.org> >> > AuthorDate: Thu, 10 Oct 2013 17:18:15 -0700 >> > Committer: H. Peter Anvin <hpa@linux.intel.com> >> > CommitDate: Sun, 13 Oct 2013 03:12:12 -0700 >> > >> > x86, kaslr: Provide randomness functions >> > >> > Adds potential sources of randomness: RDRAND, RDTSC, or the i8254. >> > >> > This moves the pre-alternatives inline rdrand function into the header so >> > both pieces of code can use it. Availability of RDRAND is then controlled >> > by CONFIG_ARCH_RANDOM, if someone wants to disable it even for kASLR. >> >> While reviewing this as a pre-pull-request, I noticed the following >> detail: >> >> > +static unsigned long get_random_long(void) >> > +{ >> > + unsigned long random; >> > + >> > + if (has_cpuflag(X86_FEATURE_RDRAND)) { >> > + debug_putstr("KASLR using RDRAND...\n"); >> > + if (rdrand_long(&random)) >> > + return random; >> > + } >> > + >> > + if (has_cpuflag(X86_FEATURE_TSC)) { >> > + uint32_t raw; >> > + >> > + debug_putstr("KASLR using RDTSC...\n"); >> > + rdtscl(raw); >> > + >> > + /* Only use the low bits of rdtsc. */ >> > + random = raw & 0xffff; >> > + } else { >> > + debug_putstr("KASLR using i8254...\n"); >> > + random = i8254(); >> > + } >> > + >> > + /* Extend timer bits poorly... */ >> > + random |= (random << 16); >> > +#ifdef CONFIG_X86_64 >> > + random |= (random << 32); >> > +#endif >> > + return random; >> > +} >> >> Why aren't the 3 sources of entropy XOR-ed together? Ah, excellent suggestion. There's no reason they couldn't be. I can rework that function to do that. >> Also, we talked about also adding system dependent entropy sources, such >> as memory layout or the DMI table - none of that seems to have happened. It seemed like those things didn't contribute as much entropy as the 3 already in use, but I could investigate how to distill those things down into entropy. Perhaps just XORing the start and length of every e820 area? DMI I'll need to dig into... >> It's not like this function should be performance critical, it's run once >> per bootup, right? There's just no excuse for not maximizing available >> entropy in such a situation ... Fair point. Is memory layout and DMI used for system entropy later in boot? > Another problem I noticed is that the RANDOMIZE_BASE Kconfig text does not > match the actual sources of entropy: > > Entropy is generated using the RDRAND instruction if it > is supported. If not, then RDTSC is used, if supported. If > neither RDRAND nor RDTSC are supported, then no randomness > is introduced. > > (i8254 is missing.) Ah! Yes, thanks for catching that. I will fix that. > Nor does the help text explain an important detail: what granularity does > the randomization have and roughly how many bits of the address are > randomized if people use the default values? Yeah, true -- that seems like a good place to describe the limits. Would you like the series updated, or patches on top? -- Kees Cook Chrome OS Security
On Mon, Nov 11, 2013 at 11:27 AM, H. Peter Anvin <hpa@zytor.com> wrote: > On 11/11/2013 10:20 AM, Ingo Molnar wrote: >> >> Why aren't the 3 sources of entropy XOR-ed together? >> > > Note we don't want to poke i8254 if we have any other sources, as the > i8254 may not be present on newer systems and in some other cases not > safe to touch. We can XOR in the TSC with RDRAND although it seems a > bit odd. I await the final decision! :) > HOWEVER... > > + /* Only use the low bits of rdtsc. */ > + random = raw & 0xffff; > > Why?! All that can do is throw away randomness... Due to the 2GiB addressing and 2MiB alignment, we can never use more than 10 bits of entropy at maximum. The existing patches only use 9 bits (due to page table layouts) on 64-bit and 8 bits on 32-bit. > + /* Extend timer bits poorly... */ > + random |= (random << 16); > +#ifdef CONFIG_X86_64 > + random |= (random << 32); > +#endif I did this so that any portion of the returned value could be used for the 10 bit mask. > For smearing out bits, a better way is usually to multiply with a large > prime number (preferably a circular multiply.) This shouldn't be needed since we're only using the low 10 bits. How would you like to see this function updated to make that more clear? More comments? -Kees -- Kees Cook Chrome OS Security
On 11/11/2013 11:32 AM, Kees Cook wrote:
>>>
>>> Why aren't the 3 sources of entropy XOR-ed together?
>
> Ah, excellent suggestion. There's no reason they couldn't be. I can
> rework that function to do that.
>
>>> Also, we talked about also adding system dependent entropy sources, such
>>> as memory layout or the DMI table - none of that seems to have happened.
>
> It seemed like those things didn't contribute as much entropy as the 3
> already in use, but I could investigate how to distill those things
> down into entropy. Perhaps just XORing the start and length of every
> e820 area? DMI I'll need to dig into...
>
DMI and ACPI are relatively straightforward: they are really just blocks
of memory with magic pointers. Since you don't actually need to parse
them, it is mostly just a matter of finding those blocks of memory and
hashing them.
You could also hash the BDA - the first 0x500 bytes of memory - although
that starts to get into the "what about weird nontraditional platforms"
space.
A *MAJOR* issue with DMI and ACPI is that the way they are located is
firmware dependent: there is a search algorithm defined for BIOS
platforms, whereas on EFI I believe it requires doing runtime calls to
find things.
Of course, perhaps we could just hash struct boot_params as a starting
point?
-hpa
On 11/11/2013 11:37 AM, Kees Cook wrote: > On Mon, Nov 11, 2013 at 11:27 AM, H. Peter Anvin <hpa@zytor.com> wrote: >> On 11/11/2013 10:20 AM, Ingo Molnar wrote: >>> >>> Why aren't the 3 sources of entropy XOR-ed together? >> >> Note we don't want to poke i8254 if we have any other sources, as the >> i8254 may not be present on newer systems and in some other cases not >> safe to touch. We can XOR in the TSC with RDRAND although it seems a >> bit odd. > > I await the final decision! :) > Use the i8254 if and only if nothing else is available, it isn't safe to touch on some newer systems. >> HOWEVER... >> >> + /* Only use the low bits of rdtsc. */ >> + random = raw & 0xffff; >> >> Why?! All that can do is throw away randomness... > > Due to the 2GiB addressing and 2MiB alignment, we can never use more > than 10 bits of entropy at maximum. The existing patches only use 9 > bits (due to page table layouts) on 64-bit and 8 bits on 32-bit. The point is that you are already doing that masking somewhere else, it is just odd. >> + /* Extend timer bits poorly... */ >> + random |= (random << 16); >> +#ifdef CONFIG_X86_64 >> + random |= (random << 32); >> +#endif > > I did this so that any portion of the returned value could be used for > the 10 bit mask. Why? It is standard practice to only use the low bits, and in light of the above it is even more bizarre. However, doing a circular multiply might still spread in some additional entropy at least in the case of a kexec boot on a non-RDRAND system. >> For smearing out bits, a better way is usually to multiply with a large >> prime number (preferably a circular multiply.) > > This shouldn't be needed since we're only using the low 10 bits. None of the above is needed in that case, although a spreading multiply will make it less likely that you are throwing away the highest entropy bits. > How would you like to see this function updated to make that more > clear? More comments? -hpa
* H. Peter Anvin <hpa@zytor.com> wrote:
> On 11/11/2013 11:37 AM, Kees Cook wrote:
> > On Mon, Nov 11, 2013 at 11:27 AM, H. Peter Anvin <hpa@zytor.com> wrote:
> >> On 11/11/2013 10:20 AM, Ingo Molnar wrote:
> >>>
> >>> Why aren't the 3 sources of entropy XOR-ed together?
> >>
> >> Note we don't want to poke i8254 if we have any other sources, as the
> >> i8254 may not be present on newer systems and in some other cases not
> >> safe to touch. We can XOR in the TSC with RDRAND although it seems a
> >> bit odd.
> >
> > I await the final decision! :)
> >
>
> Use the i8254 if and only if nothing else is available, it isn't safe to
> touch on some newer systems.
I concur - the i8254 is probably also a low entry source, given that this
all runs early bootup.
In any case there's no excuse for not mixing RDRAND and RDTSC entropy.
Thanks,
Ingo
On 11/11/2013 11:58 AM, Ingo Molnar wrote: >> >> Use the i8254 if and only if nothing else is available, it isn't safe to >> touch on some newer systems. > > I concur - the i8254 is probably also a low entry source, given that this > all runs early bootup. > Well, it is a 16-bit 1.19318 MHz counter so at least on older systems it is likely to provide at least a modicum on entropy. On systems with a TSC it is likely to be redundant with the TSC. > In any case there's no excuse for not mixing RDRAND and RDTSC entropy. -hpa
* H. Peter Anvin <hpa@zytor.com> wrote:
> Of course, perhaps we could just hash struct boot_params as a starting
> point?
Yeah, that would be a good first step, before trying to parse DMI or ACPI.
Anything that is reasonably system dependent and gives at least some
amount of unpredicability.
Thanks,
Ingo
* H. Peter Anvin <hpa@zytor.com> wrote:
> On 11/11/2013 11:58 AM, Ingo Molnar wrote:
> >>
> >> Use the i8254 if and only if nothing else is available, it isn't safe to
> >> touch on some newer systems.
> >
> > I concur - the i8254 is probably also a low entry source, given that this
> > all runs early bootup.
> >
>
> Well, it is a 16-bit 1.19318 MHz counter so at least on older systems it
> is likely to provide at least a modicum on entropy. On systems with a
> TSC it is likely to be redundant with the TSC.
Yeah.
Thanks,
Ingo
On Mon, Nov 11, 2013 at 12:07 PM, Ingo Molnar <mingo@kernel.org> wrote:
>
> * H. Peter Anvin <hpa@zytor.com> wrote:
>
>> Of course, perhaps we could just hash struct boot_params as a starting
>> point?
>
> Yeah, that would be a good first step, before trying to parse DMI or ACPI.
> Anything that is reasonably system dependent and gives at least some
> amount of unpredicability.
Given the very limited nature of the early boot environment, what
would you recommend for the hashing routine itself? There's nothing in
boot/ that does any kind of "real" crypto hashing. Should I just XOR
the entire contents of boot_params together?
-Kees
--
Kees Cook
Chrome OS Security
XOR and rotate is probably a good start.
Kees Cook <keescook@chromium.org> wrote:
>On Mon, Nov 11, 2013 at 12:07 PM, Ingo Molnar <mingo@kernel.org> wrote:
>>
>> * H. Peter Anvin <hpa@zytor.com> wrote:
>>
>>> Of course, perhaps we could just hash struct boot_params as a
>starting
>>> point?
>>
>> Yeah, that would be a good first step, before trying to parse DMI or
>ACPI.
>> Anything that is reasonably system dependent and gives at least some
>> amount of unpredicability.
>
>Given the very limited nature of the early boot environment, what
>would you recommend for the hashing routine itself? There's nothing in
>boot/ that does any kind of "real" crypto hashing. Should I just XOR
>the entire contents of boot_params together?
>
>-Kees
--
Sent from my mobile phone. Please pardon brevity and lack of formatting.
Hi! > > x86, kaslr: Provide randomness functions > > > > Adds potential sources of randomness: RDRAND, RDTSC, or the i8254. > > > > This moves the pre-alternatives inline rdrand function into the header so > > both pieces of code can use it. Availability of RDRAND is then controlled > > by CONFIG_ARCH_RANDOM, if someone wants to disable it even for kASLR. > > While reviewing this as a pre-pull-request, I noticed the following > detail: > > > +static unsigned long get_random_long(void) > > +{ > > + unsigned long random; > > + > > + if (has_cpuflag(X86_FEATURE_RDRAND)) { > > + debug_putstr("KASLR using RDRAND...\n"); > > + if (rdrand_long(&random)) > > + return random; > > + } > > + > > + if (has_cpuflag(X86_FEATURE_TSC)) { > > + uint32_t raw; > > + > > + debug_putstr("KASLR using RDTSC...\n"); > > + rdtscl(raw); > > + > > + /* Only use the low bits of rdtsc. */ > > + random = raw & 0xffff; > > + } else { > > + debug_putstr("KASLR using i8254...\n"); > > + random = i8254(); > > + } > > + > > + /* Extend timer bits poorly... */ > > + random |= (random << 16); > > +#ifdef CONFIG_X86_64 > > + random |= (random << 32); > > +#endif > > + return random; > > +} > > Why aren't the 3 sources of entropy XOR-ed together? > > Also, we talked about also adding system dependent entropy sources, such > as memory layout or the DMI table - none of that seems to have > happened. > It's not like this function should be performance critical, it's run once > per bootup, right? There's just no excuse for not maximizing available > entropy in such a situation ... If it is not performance critical, why not use "/dev/random" infrastructure? Pavel -- (english) http://www.livejournal.com/~pavelmachek (cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
On 11/13/2013 10:16 AM, Pavel Machek wrote:
>
> If it is not performance critical, why not use "/dev/random"
> infrastructure?
>
Because it doesn't exist yet?
-hpa
On Wed 2013-11-13 10:40:40, H. Peter Anvin wrote: > On 11/13/2013 10:16 AM, Pavel Machek wrote: > > > > If it is not performance critical, why not use "/dev/random" > > infrastructure? > > > > Because it doesn't exist yet? Would it be feasible to initialize /dev/random earlier instead of reinventing it? Pavel -- (english) http://www.livejournal.com/~pavelmachek (cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
No, it literally doesn't exist yet. As in it isn't in memory at the point this is run.
Pavel Machek <pavel@ucw.cz> wrote:
>On Wed 2013-11-13 10:40:40, H. Peter Anvin wrote:
>> On 11/13/2013 10:16 AM, Pavel Machek wrote:
>> >
>> > If it is not performance critical, why not use "/dev/random"
>> > infrastructure?
>> >
>>
>> Because it doesn't exist yet?
>
>Would it be feasible to initialize /dev/random earlier instead of
>reinventing it?
> Pavel
--
Sent from my mobile phone. Please pardon brevity and lack of formatting.