All of lore.kernel.org
 help / color / mirror / Atom feed
From: Yinghai Lu <yinghai@kernel.org>
To: Matt Fleming <matt.fleming@intel.com>,
	"H. Peter Anvin" <hpa@zytor.com>,
	Bjorn Helgaas <bhelgaas@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>,
	Ingo Molnar <mingo@redhat.com>, Jiri Kosina <jkosina@suse.cz>,
	Borislav Petkov <bp@suse.de>, Baoquan He <bhe@redhat.com>,
	linux-kernel@vger.kernel.org, linux-efi@vger.kernel.org,
	linux-pci@vger.kernel.org, Yinghai Lu <yinghai@kernel.org>,
	Kees Cook <keescook@chromium.org>
Subject: [PATCH v2 07/15] x86, kaslr, 64bit: set new or extra ident_mapping
Date: Wed,  4 Mar 2015 00:00:40 -0800	[thread overview]
Message-ID: <1425456048-16236-8-git-send-email-yinghai@kernel.org> (raw)
In-Reply-To: <1425456048-16236-1-git-send-email-yinghai@kernel.org>

aslr will support to put random VO above 4G, so we need to set ident
mapping for the range even we come from startup_32 path.

At the same time, when boot from 64bit bootloader, bootloader will
set ident mapping, and boot via ZO startup_64.
Then pages for pagetable need to be avoided when selecting new random VO base.
otherwise decompressor will overwrite the pgtable.

One solution: go through pagetable and find out every page is
used by pagetable for every mem_aovid checking.
but kexec could put those page anywhere, and we will need extra code.

Other solution: create new ident mapping instead, and pages for pagetable
will sit in _pagetable area of ZO, and they are in mem_avoid array already.

so the _pgtable will be shared 32bit and 64bit path to reduce init_size.

Need to increase buffer size. As we need to cover old VO, params, cmdline
and new VO, in extreme case we could have all cross 512G boundary, will need
1+(2+2)*4 pages with 2M mapping.

Cc: Kees Cook <keescook@chromium.org>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Borislav Petkov <bp@suse.de>
Cc: Matt Fleming <matt.fleming@intel.com>
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
---
 arch/x86/boot/compressed/aslr.c     | 28 +++++++++++
 arch/x86/boot/compressed/head_64.S  |  4 +-
 arch/x86/boot/compressed/misc_pgt.c | 96 +++++++++++++++++++++++++++++++++++++
 arch/x86/include/asm/boot.h         | 13 +++++
 4 files changed, 139 insertions(+), 2 deletions(-)
 create mode 100644 arch/x86/boot/compressed/misc_pgt.c

diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c
index e8486a5..10ed3c7 100644
--- a/arch/x86/boot/compressed/aslr.c
+++ b/arch/x86/boot/compressed/aslr.c
@@ -1,3 +1,8 @@
+#ifdef CONFIG_X86_64
+#define __pa(x)  ((unsigned long)(x))
+#define __va(x)  ((void *)((unsigned long)(x)))
+#endif
+
 #include "misc.h"
 
 #include <asm/msr.h>
@@ -21,6 +26,8 @@ struct kaslr_setup_data {
 	__u8 data[1];
 } kaslr_setup_data;
 
+#include "misc_pgt.c"
+
 #define I8254_PORT_CONTROL	0x43
 #define I8254_PORT_COUNTER0	0x40
 #define I8254_CMD_READBACK	0xC0
@@ -160,6 +167,7 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size,
 	unsafe = (unsigned long)input + input_size;
 	mem_avoid[0].start = unsafe;
 	mem_avoid[0].size = unsafe_len;
+	fill_linux64_pagetable(output, init_size);
 
 	/* Avoid initrd. */
 	initrd_start  = (u64)real_mode->ext_ramdisk_image << 32;
@@ -168,6 +176,7 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size,
 	initrd_size |= real_mode->hdr.ramdisk_size;
 	mem_avoid[1].start = initrd_start;
 	mem_avoid[1].size = initrd_size;
+	/* don't need to set mapping for initrd */
 
 	/* Avoid kernel command line. */
 	cmd_line  = (u64)real_mode->ext_cmd_line_ptr << 32;
@@ -178,10 +187,25 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size,
 		;
 	mem_avoid[2].start = cmd_line;
 	mem_avoid[2].size = cmd_line_size;
+	fill_linux64_pagetable(cmd_line, cmd_line_size);
 
 	/* Avoid params */
 	mem_avoid[3].start = (unsigned long)real_mode;
 	mem_avoid[3].size = sizeof(*real_mode);
+	fill_linux64_pagetable((unsigned long)real_mode, sizeof(*real_mode));
+}
+
+static void init_linux64_pagetable(void)
+{
+	struct setup_data *ptr;
+
+	ptr = (struct setup_data *)(unsigned long)real_mode->hdr.setup_data;
+	while (ptr) {
+		fill_linux64_pagetable((unsigned long)ptr,
+				       sizeof(*ptr) + ptr->len);
+
+		ptr = (struct setup_data *)(unsigned long)ptr->next;
+	}
 }
 
 /* Does this memory vector overlap a known avoided area? */
@@ -346,6 +370,7 @@ unsigned char *choose_kernel_location(struct boot_params *params,
 #endif
 	add_kaslr_setup_data(params, 1);
 
+	init_linux64_pagetable();
 	/* Record the various known unsafe memory ranges. */
 	mem_avoid_init((unsigned long)input, input_size,
 		       (unsigned long)output, init_size);
@@ -362,6 +387,9 @@ unsigned char *choose_kernel_location(struct boot_params *params,
 		goto out;
 
 	choice = random;
+
+	fill_linux64_pagetable(choice, init_size);
+	switch_linux64_pagetable();
 out:
 	return (unsigned char *)choice;
 }
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 69015b5..1b6e34a 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -125,7 +125,7 @@ ENTRY(startup_32)
 	/* Initialize Page tables to 0 */
 	leal	pgtable(%ebx), %edi
 	xorl	%eax, %eax
-	movl	$((4096*6)/4), %ecx
+	movl	$(BOOT_INIT_PGT_SIZE/4), %ecx
 	rep	stosl
 
 	/* Build Level 4 */
@@ -477,4 +477,4 @@ boot_stack_end:
 	.section ".pgtable","a",@nobits
 	.balign 4096
 pgtable:
-	.fill 6*4096, 1, 0
+	.fill BOOT_PGT_SIZE, 1, 0
diff --git a/arch/x86/boot/compressed/misc_pgt.c b/arch/x86/boot/compressed/misc_pgt.c
new file mode 100644
index 0000000..afc73bf
--- /dev/null
+++ b/arch/x86/boot/compressed/misc_pgt.c
@@ -0,0 +1,96 @@
+
+#ifdef CONFIG_X86_64
+#include <asm/init.h>
+#include <asm/pgtable.h>
+
+#include "../../mm/ident_map.c"
+
+struct alloc_pgt_data {
+	unsigned char *pgt_buf;
+	unsigned long pgt_buf_size;
+	unsigned long pgt_buf_offset;
+};
+
+static void *alloc_pgt_page(void *context)
+{
+	struct alloc_pgt_data *d = (struct alloc_pgt_data *)context;
+	unsigned char *p = (unsigned char *)d->pgt_buf;
+
+	if (d->pgt_buf_offset >= d->pgt_buf_size) {
+		debug_putstr("out of pgt_buf in misc.c\n");
+		return NULL;
+	}
+
+	p += d->pgt_buf_offset;
+	d->pgt_buf_offset += PAGE_SIZE;
+
+	return p;
+}
+
+/*
+ * Use a normal definition of memset() from string.c. There are already
+ * included header files which expect a definition of memset() and by
+ * the time we define memset macro, it is too late.
+ */
+#undef memset
+#define memzero(s, n)   memset((s), 0, (n))
+
+unsigned long __force_order;
+static struct alloc_pgt_data pgt_data;
+static struct x86_mapping_info mapping_info;
+static pgd_t *level4p;
+
+extern unsigned char _pgtable[];
+static void fill_linux64_pagetable(unsigned long start, unsigned long size)
+{
+	unsigned long end = start + size;
+
+	if (!level4p) {
+		pgt_data.pgt_buf_offset = 0;
+		mapping_info.alloc_pgt_page = alloc_pgt_page;
+		mapping_info.context = &pgt_data;
+		mapping_info.pmd_flag = __PAGE_KERNEL_LARGE_EXEC;
+
+		/*
+		 * come from startup_32 ?
+		 * then cr3 is _pgtable, we can reuse it.
+		 */
+		level4p = (pgd_t *)read_cr3();
+		if ((unsigned long)level4p == (unsigned long)_pgtable) {
+			pgt_data.pgt_buf = (unsigned char *)_pgtable +
+						 BOOT_INIT_PGT_SIZE;
+			pgt_data.pgt_buf_size = BOOT_PGT_SIZE -
+						 BOOT_INIT_PGT_SIZE;
+
+			debug_putstr("boot via startup_32\n");
+		} else {
+			pgt_data.pgt_buf = (unsigned char *)_pgtable;
+			pgt_data.pgt_buf_size = BOOT_PGT_SIZE;
+
+			debug_putstr("boot via startup_64\n");
+			level4p = (pgd_t *)alloc_pgt_page(&pgt_data);
+		}
+		memset((unsigned char *)pgt_data.pgt_buf, 0,
+			 pgt_data.pgt_buf_size);
+	}
+
+	/* align boundary to 2M */
+	start = round_down(start, PMD_SIZE);
+	end = round_up(end, PMD_SIZE);
+	if (start < end)
+		kernel_ident_mapping_init(&mapping_info, level4p, start, end);
+}
+
+static void switch_linux64_pagetable(void)
+{
+	write_cr3((unsigned long)level4p);
+}
+
+#else
+static void fill_linux64_pagetable(unsigned long start, unsigned long size)
+{
+}
+static void switch_linux64_pagetable(void)
+{
+}
+#endif
diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h
index 4fa687a..3795a77 100644
--- a/arch/x86/include/asm/boot.h
+++ b/arch/x86/include/asm/boot.h
@@ -32,7 +32,20 @@
 #endif /* !CONFIG_KERNEL_BZIP2 */
 
 #ifdef CONFIG_X86_64
+
 #define BOOT_STACK_SIZE	0x4000
+
+#define BOOT_INIT_PGT_SIZE (6*4096)
+#ifdef CONFIG_RANDOMIZE_BASE
+/*
+ * 17 pages to cover for kernel, param, cmd_line, random kernel
+ * if all cross 512G boundary.
+ */
+#define BOOT_PGT_SIZE (BOOT_INIT_PGT_SIZE + (11*4096))
+#else
+#define BOOT_PGT_SIZE BOOT_INIT_PGT_SIZE
+#endif
+
 #else
 #define BOOT_STACK_SIZE	0x1000
 #endif
-- 
1.8.4.5


  parent reply	other threads:[~2015-03-04  8:04 UTC|newest]

Thread overview: 55+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-03-04  8:00 [PATCH v2 00/15] x86, boot: clean up kasl and setup_data handling Yinghai Lu
2015-03-04  8:00 ` [PATCH v2 01/15] x86, kaslr: Use init_size instead of run_size Yinghai Lu
2015-03-06 13:55   ` Borislav Petkov
2015-03-06 18:44     ` Yinghai Lu
2015-03-06 18:55       ` Kees Cook
2015-03-06 18:55         ` Kees Cook
2015-03-06 19:28         ` Yinghai Lu
2015-03-06 19:56           ` Kees Cook
2015-03-06 19:56             ` Kees Cook
2015-03-07  0:52             ` Yinghai Lu
2015-03-07  0:52               ` Yinghai Lu
2015-03-04  8:00 ` [PATCH v2 02/15] x86, boot: move ZO to end of buffer Yinghai Lu
2015-03-06 13:58   ` Borislav Petkov
2015-03-04  8:00 ` [PATCH v2 03/15] x86, boot: keep data from ZO boot stage to VO kernel stage Yinghai Lu
2015-03-04  8:00 ` [PATCH v2 04/15] x86, kaslr: get kaslr_enabled back correctly Yinghai Lu
2015-03-04  8:00   ` Yinghai Lu
2015-03-04 10:16   ` Borislav Petkov
2015-03-04 15:54     ` Jiri Kosina
2015-03-04 18:12       ` Yinghai Lu
2015-03-04 18:12         ` Yinghai Lu
2015-03-04 19:41         ` Ingo Molnar
2015-03-04 19:41           ` Ingo Molnar
2015-03-05  2:58         ` joeyli
2015-03-05  3:20           ` Yinghai Lu
2015-03-04 18:06     ` Yinghai Lu
2015-03-04 18:56       ` Yinghai Lu
2015-03-04 20:00       ` Ingo Molnar
2015-03-04 20:00         ` Ingo Molnar
2015-03-04 21:32         ` Yinghai Lu
2015-03-06 13:33           ` Borislav Petkov
2015-03-06 17:49             ` Yinghai Lu
2015-03-06 17:49               ` Yinghai Lu
2015-03-07 20:50               ` Borislav Petkov
2015-03-06 19:50             ` Yinghai Lu
2015-03-06 19:50               ` Yinghai Lu
2015-03-06 19:53               ` Yinghai Lu
2015-03-06 19:53                 ` Yinghai Lu
2015-03-07 21:05                 ` Borislav Petkov
2015-03-07 21:11                   ` Yinghai Lu
2015-03-07 20:56               ` Borislav Petkov
2015-03-04  8:00 ` [PATCH v2 05/15] x86, kaslr: consolidate the mem_avoid filling Yinghai Lu
2015-03-04  8:00   ` Yinghai Lu
2015-03-04  8:00 ` [PATCH v2 06/15] x86, boot: split kernel_ident_mapping_init into another file Yinghai Lu
2015-03-04  8:00 ` Yinghai Lu [this message]
2015-03-04  8:00 ` [PATCH v2 08/15] x86: Kill E820_RESERVED_KERN Yinghai Lu
2015-03-04  8:00   ` Yinghai Lu
2015-03-04  8:00 ` [PATCH v2 09/15] x86, efi: copy SETUP_EFI data and access directly Yinghai Lu
2015-03-04  8:00   ` Yinghai Lu
2015-03-04  8:00 ` [PATCH v2 10/15] x86, of: let add_dtb reserve by itself Yinghai Lu
2015-03-04  8:00 ` [PATCH v2 11/15] x86, boot: Add add_pci handler for SETUP_PCI Yinghai Lu
2015-03-04  8:00 ` [PATCH v2 12/15] x86: kill not used setup_data handling code Yinghai Lu
2015-03-04  8:00   ` Yinghai Lu
2015-03-04  8:00 ` [PATCH v2 13/15] x86, pci: convert SETUP_PCI data to list Yinghai Lu
2015-03-04  8:00 ` [PATCH v2 14/15] x86, boot: copy rom to kernel space Yinghai Lu
2015-03-04  8:00 ` [PATCH v2 15/15] x86, pci: export SETUP_PCI data via sysfs Yinghai Lu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1425456048-16236-8-git-send-email-yinghai@kernel.org \
    --to=yinghai@kernel.org \
    --cc=bhe@redhat.com \
    --cc=bhelgaas@google.com \
    --cc=bp@suse.de \
    --cc=hpa@zytor.com \
    --cc=jkosina@suse.cz \
    --cc=keescook@chromium.org \
    --cc=linux-efi@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-pci@vger.kernel.org \
    --cc=matt.fleming@intel.com \
    --cc=mingo@redhat.com \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.