linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/2] kaslr related bug fix
@ 2017-04-26 10:39 Baoquan He
  2017-04-26 10:39 ` [PATCH 1/2] x86/efi: Correct ident mapping of efi old_map when kalsr enabled Baoquan He
  2017-04-26 10:39 ` [PATCH 2/2] x86/KASLR: Use old ident map page table if physical randomization failed Baoquan He
  0 siblings, 2 replies; 10+ messages in thread
From: Baoquan He @ 2017-04-26 10:39 UTC (permalink / raw)
  To: linux-kernel; +Cc: mingo, keescook, thgarnie, dyoung, xlpang, Baoquan He

And the patch 2/2 is an urgent fix, should be sent to stable tree.
Patch 1/2 can be worked around by adding 'nokaslr' to kdump kernel
cmdline.

Baoquan He (2):
  x86/efi: Correct ident mapping of efi old_map when kalsr enabled
  x86/KASLR: Use old ident map page table if physical randomization
    failed

 arch/x86/boot/compressed/kaslr.c | 10 ++++++++--
 arch/x86/platform/efi/efi_64.c   | 35 +++++++++++++++++++++++++++--------
 2 files changed, 35 insertions(+), 10 deletions(-)

-- 
2.5.5

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 1/2] x86/efi: Correct ident mapping of efi old_map when kalsr enabled
  2017-04-26 10:39 [PATCH 0/2] kaslr related bug fix Baoquan He
@ 2017-04-26 10:39 ` Baoquan He
  2017-04-26 10:43   ` Baoquan He
  2017-04-26 10:39 ` [PATCH 2/2] x86/KASLR: Use old ident map page table if physical randomization failed Baoquan He
  1 sibling, 1 reply; 10+ messages in thread
From: Baoquan He @ 2017-04-26 10:39 UTC (permalink / raw)
  To: linux-kernel
  Cc: mingo, keescook, thgarnie, dyoung, xlpang, Baoquan He,
	Matt Fleming, Ard Biesheuvel, Thomas Gleixner, H. Peter Anvin,
	x86, linux-efi

For EFI with old_map enabled, Kernel will panic when kaslr is enabled.

The root cause is the ident mapping is not built correctly in this case.

For nokaslr kernel, PAGE_OFFSET is 0xffff880000000000 which is PGDIR_SIZE
aligned. We can borrow the pud table from direct mapping safely. Given a
physical address X, we have pud_index(X) == pud_index(__va(X)). However,
for kaslr kernel, PAGE_OFFSET is PUD_SIZE aligned. For a given physical
address X, pud_index(X) != pud_index(__va(X)). We can't only copy pgd entry
from direct mapping to build ident mapping, instead need copy pud entry
one by one from direct mapping.

So fix it in this patch.

The panic message is like below, an emty PUD or a wrong PUD.

[    0.233007] BUG: unable to handle kernel paging request at 000000007febd57e
[    0.233899] IP: 0x7febd57e
[    0.234000] PGD 1025a067
[    0.234000] PUD 0
[    0.234000]
[    0.234000] Oops: 0010 [#1] SMP
[    0.234000] Modules linked in:
[    0.234000] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.11.0-rc8+ #125
[    0.234000] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 0.0.0 02/06/2015
[    0.234000] task: ffffffffafe104c0 task.stack: ffffffffafe00000
[    0.234000] RIP: 0010:0x7febd57e
[    0.234000] RSP: 0000:ffffffffafe03d98 EFLAGS: 00010086
[    0.234000] RAX: ffff8c9e3fff9540 RBX: 000000007c4b6000 RCX: 0000000000000480
[    0.234000] RDX: 0000000000000030 RSI: 0000000000000480 RDI: 000000007febd57e
[    0.234000] RBP: ffffffffafe03e40 R08: 0000000000000001 R09: 000000007c4b6000
[    0.234000] R10: ffffffffafa71a40 R11: 20786c6c2478303d R12: 0000000000000030
[    0.234000] R13: 0000000000000246 R14: ffff8c9e3c4198d8 R15: 0000000000000480
[    0.234000] FS:  0000000000000000(0000) GS:ffff8c9e3fa00000(0000) knlGS:0000000000000000
[    0.234000] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[    0.234000] CR2: 000000007febd57e CR3: 000000000fe09000 CR4: 00000000000406b0
[    0.234000] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[    0.234000] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[    0.234000] Call Trace:
[    0.234000]  ? efi_call+0x58/0x90
[    0.234000]  ? printk+0x58/0x6f
[    0.234000]  efi_enter_virtual_mode+0x3c5/0x50d
[    0.234000]  start_kernel+0x40f/0x4b8
[    0.234000]  ? set_init_arg+0x55/0x55
[    0.234000]  ? early_idt_handler_array+0x120/0x120
[    0.234000]  x86_64_start_reservations+0x24/0x26
[    0.234000]  x86_64_start_kernel+0x14c/0x16f
[    0.234000]  start_cpu+0x14/0x14
[    0.234000] Code:  Bad RIP value.
[    0.234000] RIP: 0x7febd57e RSP: ffffffffafe03d98
[    0.234000] CR2: 000000007febd57e
[    0.234000] ---[ end trace d4ded46ab8ab8ba9 ]---
[    0.234000] Kernel panic - not syncing: Attempted to kill the idle task!
[    0.234000] ---[ end Kernel panic - not syncing: Attempted to kill the idle task!

Signed-off-by: Baoquan He <bhe@redhat.com>
Signed-off-by: Dave Young <dyoung@redhat.com>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: x86@kernel.org
Cc: linux-efi@vger.kernel.org
---
 arch/x86/platform/efi/efi_64.c | 35 +++++++++++++++++++++++++++--------
 1 file changed, 27 insertions(+), 8 deletions(-)

diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 2ee7694..2e7baff 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -71,11 +71,12 @@ static void __init early_code_mapping_set_exec(int executable)
 
 pgd_t * __init efi_call_phys_prolog(void)
 {
-	unsigned long vaddress;
+	unsigned long vaddr, left_vaddr;
+	unsigned int num_entries;
 	pgd_t *save_pgd;
-
-	int pgd;
+	pud_t *pud, *pud_k;
 	int n_pgds;
+	int i;
 
 	if (!efi_enabled(EFI_OLD_MEMMAP)) {
 		save_pgd = (pgd_t *)read_cr3();
@@ -88,10 +89,22 @@ pgd_t * __init efi_call_phys_prolog(void)
 	n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT), PGDIR_SIZE);
 	save_pgd = kmalloc_array(n_pgds, sizeof(*save_pgd), GFP_KERNEL);
 
-	for (pgd = 0; pgd < n_pgds; pgd++) {
-		save_pgd[pgd] = *pgd_offset_k(pgd * PGDIR_SIZE);
-		vaddress = (unsigned long)__va(pgd * PGDIR_SIZE);
-		set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), *pgd_offset_k(vaddress));
+	for (i = 0; i < n_pgds; i++) {
+		save_pgd[i] = *pgd_offset_k(i * PGDIR_SIZE);
+
+		vaddr = (unsigned long)__va(i * PGDIR_SIZE);
+		pud = pud_alloc_one(NULL, 0);
+
+		num_entries = PTRS_PER_PUD - pud_index(vaddr);
+		pud_k = pud_offset(pgd_offset_k(vaddr), vaddr);
+		memcpy(pud, pud_k, num_entries);
+		if (pud_index(vaddr) > 0) {
+			left_vaddr = vaddr + (num_entries * PUD_SIZE);
+			pud_k = pud_offset(pgd_offset_k(left_vaddr),
+					   left_vaddr);
+			memcpy(pud + num_entries, pud_k, pud_index(vaddr));
+		}
+		pgd_populate(NULL, pgd_offset_k(i * PGDIR_SIZE), pud);
 	}
 out:
 	__flush_tlb_all();
@@ -106,6 +119,8 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd)
 	 */
 	int pgd_idx;
 	int nr_pgds;
+	pud_t *pud;
+	pgd_t *pgd;
 
 	if (!efi_enabled(EFI_OLD_MEMMAP)) {
 		write_cr3((unsigned long)save_pgd);
@@ -115,8 +130,12 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd)
 
 	nr_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE);
 
-	for (pgd_idx = 0; pgd_idx < nr_pgds; pgd_idx++)
+	for (pgd_idx = 0; pgd_idx < nr_pgds; pgd_idx++) {
+		pgd = pgd_offset_k(pgd_idx * PGDIR_SIZE);
+		pud = (pud_t *)pgd_page_vaddr(*pgd);
+		pud_free(NULL, pud);
 		set_pgd(pgd_offset_k(pgd_idx * PGDIR_SIZE), save_pgd[pgd_idx]);
+	}
 
 	kfree(save_pgd);
 
-- 
2.5.5

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 2/2] x86/KASLR: Use old ident map page table if physical randomization failed
  2017-04-26 10:39 [PATCH 0/2] kaslr related bug fix Baoquan He
  2017-04-26 10:39 ` [PATCH 1/2] x86/efi: Correct ident mapping of efi old_map when kalsr enabled Baoquan He
@ 2017-04-26 10:39 ` Baoquan He
  2017-04-26 10:49   ` Baoquan He
  2017-04-26 19:12   ` Kees Cook
  1 sibling, 2 replies; 10+ messages in thread
From: Baoquan He @ 2017-04-26 10:39 UTC (permalink / raw)
  To: linux-kernel
  Cc: mingo, keescook, thgarnie, dyoung, xlpang, Baoquan He,
	H. Peter Anvin, Thomas Gleixner, x86, Yinghai Lu,
	Borislav Petkov, Dave Jiang

Dave found when kdump kernel will reset to bios immediately if kaslr
is enabled and physical randomization failed to faind a new position
for kernel. But nokaslr works in this case.

The reason is kaslr will install a new page table for ident mapping,
while it missed to consider building ident mapping for original area
of kernel if kaslr failed on physical randomization.

In fact bootloaders including kexec/kdump have built ident mapping
for original place of kernel. We can only install new ident mapping
page table when physical kaslr succeeds. Otherwise we just keep the
old page table unchanged just like nokaslr does.

Signed-off-by: Baoquan He <bhe@redhat.com>
Signed-off-by: Dave Young <dyoung@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: x86@kernel.org
Cc: Kees Cook <keescook@chromium.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Borislav Petkov <bp@suse.de>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: Thomas Garnier <thgarnie@google.com>
---
 arch/x86/boot/compressed/kaslr.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index e5eb0c3..7a8b443 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -650,10 +650,16 @@ void choose_random_location(unsigned long input,
 			add_identity_map(random_addr, output_size);
 			*output = random_addr;
 		}
+
+		/*
+		 * This actually loads the identity pagetable on x86_64.
+		 * And this should only be done only if a new position
+		 * is found. Otherwise we should keep the old page table
+		 * to make it be like nokaslr case.
+		 */
+		finalize_identity_maps();
 	}
 
-	/* This actually loads the identity pagetable on x86_64. */
-	finalize_identity_maps();
 
 	/* Pick random virtual address starting from LOAD_PHYSICAL_ADDR. */
 	if (IS_ENABLED(CONFIG_X86_64))
-- 
2.5.5

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/2] x86/efi: Correct ident mapping of efi old_map when kalsr enabled
  2017-04-26 10:39 ` [PATCH 1/2] x86/efi: Correct ident mapping of efi old_map when kalsr enabled Baoquan He
@ 2017-04-26 10:43   ` Baoquan He
  2017-04-26 14:49     ` Thomas Garnier
  0 siblings, 1 reply; 10+ messages in thread
From: Baoquan He @ 2017-04-26 10:43 UTC (permalink / raw)
  To: linux-kernel
  Cc: mingo, keescook, thgarnie, dyoung, xlpang, Matt Fleming,
	Ard Biesheuvel, Thomas Gleixner, H. Peter Anvin, x86, linux-efi

This bug will cause SGI uv 100 boot failure since SGI uv 100 can only
use efi old_map because of hardware. On rhel it failed all SGI uv series
since we haven't back ported fix for SGI uv 200/300.

On 04/26/17 at 06:39pm, Baoquan He wrote:
> For EFI with old_map enabled, Kernel will panic when kaslr is enabled.
> 
> The root cause is the ident mapping is not built correctly in this case.
> 
> For nokaslr kernel, PAGE_OFFSET is 0xffff880000000000 which is PGDIR_SIZE
> aligned. We can borrow the pud table from direct mapping safely. Given a
> physical address X, we have pud_index(X) == pud_index(__va(X)). However,
> for kaslr kernel, PAGE_OFFSET is PUD_SIZE aligned. For a given physical
> address X, pud_index(X) != pud_index(__va(X)). We can't only copy pgd entry
> from direct mapping to build ident mapping, instead need copy pud entry
> one by one from direct mapping.
> 
> So fix it in this patch.
> 
> The panic message is like below, an emty PUD or a wrong PUD.
> 
> [    0.233007] BUG: unable to handle kernel paging request at 000000007febd57e
> [    0.233899] IP: 0x7febd57e
> [    0.234000] PGD 1025a067
> [    0.234000] PUD 0
> [    0.234000]
> [    0.234000] Oops: 0010 [#1] SMP
> [    0.234000] Modules linked in:
> [    0.234000] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.11.0-rc8+ #125
> [    0.234000] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 0.0.0 02/06/2015
> [    0.234000] task: ffffffffafe104c0 task.stack: ffffffffafe00000
> [    0.234000] RIP: 0010:0x7febd57e
> [    0.234000] RSP: 0000:ffffffffafe03d98 EFLAGS: 00010086
> [    0.234000] RAX: ffff8c9e3fff9540 RBX: 000000007c4b6000 RCX: 0000000000000480
> [    0.234000] RDX: 0000000000000030 RSI: 0000000000000480 RDI: 000000007febd57e
> [    0.234000] RBP: ffffffffafe03e40 R08: 0000000000000001 R09: 000000007c4b6000
> [    0.234000] R10: ffffffffafa71a40 R11: 20786c6c2478303d R12: 0000000000000030
> [    0.234000] R13: 0000000000000246 R14: ffff8c9e3c4198d8 R15: 0000000000000480
> [    0.234000] FS:  0000000000000000(0000) GS:ffff8c9e3fa00000(0000) knlGS:0000000000000000
> [    0.234000] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [    0.234000] CR2: 000000007febd57e CR3: 000000000fe09000 CR4: 00000000000406b0
> [    0.234000] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> [    0.234000] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
> [    0.234000] Call Trace:
> [    0.234000]  ? efi_call+0x58/0x90
> [    0.234000]  ? printk+0x58/0x6f
> [    0.234000]  efi_enter_virtual_mode+0x3c5/0x50d
> [    0.234000]  start_kernel+0x40f/0x4b8
> [    0.234000]  ? set_init_arg+0x55/0x55
> [    0.234000]  ? early_idt_handler_array+0x120/0x120
> [    0.234000]  x86_64_start_reservations+0x24/0x26
> [    0.234000]  x86_64_start_kernel+0x14c/0x16f
> [    0.234000]  start_cpu+0x14/0x14
> [    0.234000] Code:  Bad RIP value.
> [    0.234000] RIP: 0x7febd57e RSP: ffffffffafe03d98
> [    0.234000] CR2: 000000007febd57e
> [    0.234000] ---[ end trace d4ded46ab8ab8ba9 ]---
> [    0.234000] Kernel panic - not syncing: Attempted to kill the idle task!
> [    0.234000] ---[ end Kernel panic - not syncing: Attempted to kill the idle task!
> 
> Signed-off-by: Baoquan He <bhe@redhat.com>
> Signed-off-by: Dave Young <dyoung@redhat.com>
> Cc: Matt Fleming <matt@codeblueprint.co.uk>
> Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
> Cc: Thomas Gleixner <tglx@linutronix.de>
> Cc: Ingo Molnar <mingo@redhat.com>
> Cc: "H. Peter Anvin" <hpa@zytor.com>
> Cc: x86@kernel.org
> Cc: linux-efi@vger.kernel.org
> ---
>  arch/x86/platform/efi/efi_64.c | 35 +++++++++++++++++++++++++++--------
>  1 file changed, 27 insertions(+), 8 deletions(-)
> 
> diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
> index 2ee7694..2e7baff 100644
> --- a/arch/x86/platform/efi/efi_64.c
> +++ b/arch/x86/platform/efi/efi_64.c
> @@ -71,11 +71,12 @@ static void __init early_code_mapping_set_exec(int executable)
>  
>  pgd_t * __init efi_call_phys_prolog(void)
>  {
> -	unsigned long vaddress;
> +	unsigned long vaddr, left_vaddr;
> +	unsigned int num_entries;
>  	pgd_t *save_pgd;
> -
> -	int pgd;
> +	pud_t *pud, *pud_k;
>  	int n_pgds;
> +	int i;
>  
>  	if (!efi_enabled(EFI_OLD_MEMMAP)) {
>  		save_pgd = (pgd_t *)read_cr3();
> @@ -88,10 +89,22 @@ pgd_t * __init efi_call_phys_prolog(void)
>  	n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT), PGDIR_SIZE);
>  	save_pgd = kmalloc_array(n_pgds, sizeof(*save_pgd), GFP_KERNEL);
>  
> -	for (pgd = 0; pgd < n_pgds; pgd++) {
> -		save_pgd[pgd] = *pgd_offset_k(pgd * PGDIR_SIZE);
> -		vaddress = (unsigned long)__va(pgd * PGDIR_SIZE);
> -		set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), *pgd_offset_k(vaddress));
> +	for (i = 0; i < n_pgds; i++) {
> +		save_pgd[i] = *pgd_offset_k(i * PGDIR_SIZE);
> +
> +		vaddr = (unsigned long)__va(i * PGDIR_SIZE);
> +		pud = pud_alloc_one(NULL, 0);
> +
> +		num_entries = PTRS_PER_PUD - pud_index(vaddr);
> +		pud_k = pud_offset(pgd_offset_k(vaddr), vaddr);
> +		memcpy(pud, pud_k, num_entries);
> +		if (pud_index(vaddr) > 0) {
> +			left_vaddr = vaddr + (num_entries * PUD_SIZE);
> +			pud_k = pud_offset(pgd_offset_k(left_vaddr),
> +					   left_vaddr);
> +			memcpy(pud + num_entries, pud_k, pud_index(vaddr));
> +		}
> +		pgd_populate(NULL, pgd_offset_k(i * PGDIR_SIZE), pud);
>  	}
>  out:
>  	__flush_tlb_all();
> @@ -106,6 +119,8 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd)
>  	 */
>  	int pgd_idx;
>  	int nr_pgds;
> +	pud_t *pud;
> +	pgd_t *pgd;
>  
>  	if (!efi_enabled(EFI_OLD_MEMMAP)) {
>  		write_cr3((unsigned long)save_pgd);
> @@ -115,8 +130,12 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd)
>  
>  	nr_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE);
>  
> -	for (pgd_idx = 0; pgd_idx < nr_pgds; pgd_idx++)
> +	for (pgd_idx = 0; pgd_idx < nr_pgds; pgd_idx++) {
> +		pgd = pgd_offset_k(pgd_idx * PGDIR_SIZE);
> +		pud = (pud_t *)pgd_page_vaddr(*pgd);
> +		pud_free(NULL, pud);
>  		set_pgd(pgd_offset_k(pgd_idx * PGDIR_SIZE), save_pgd[pgd_idx]);
> +	}
>  
>  	kfree(save_pgd);
>  
> -- 
> 2.5.5
> 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 2/2] x86/KASLR: Use old ident map page table if physical randomization failed
  2017-04-26 10:39 ` [PATCH 2/2] x86/KASLR: Use old ident map page table if physical randomization failed Baoquan He
@ 2017-04-26 10:49   ` Baoquan He
  2017-04-26 19:12   ` Kees Cook
  1 sibling, 0 replies; 10+ messages in thread
From: Baoquan He @ 2017-04-26 10:49 UTC (permalink / raw)
  To: linux-kernel
  Cc: mingo, keescook, thgarnie, dyoung, xlpang, H. Peter Anvin,
	Thomas Gleixner, x86, Yinghai Lu, Borislav Petkov, Dave Jiang

On 04/26/17 at 06:39pm, Baoquan He wrote:
> Dave found when kdump kernel will reset to bios immediately if kaslr
	      ^~ this 'when' is redundent, sorry 
> is enabled and physical randomization failed to faind a new position
> for kernel. But nokaslr works in this case.
> 
> The reason is kaslr will install a new page table for ident mapping,
> while it missed to consider building ident mapping for original area
> of kernel if kaslr failed on physical randomization.
> 
> In fact bootloaders including kexec/kdump have built ident mapping
> for original place of kernel. We can only install new ident mapping
> page table when physical kaslr succeeds. Otherwise we just keep the
> old page table unchanged just like nokaslr does.
> 
> Signed-off-by: Baoquan He <bhe@redhat.com>
> Signed-off-by: Dave Young <dyoung@redhat.com>
> Cc: "H. Peter Anvin" <hpa@zytor.com>
> Cc: Thomas Gleixner <tglx@linutronix.de>
> Cc: Ingo Molnar <mingo@redhat.com>
> Cc: x86@kernel.org
> Cc: Kees Cook <keescook@chromium.org>
> Cc: Yinghai Lu <yinghai@kernel.org>
> Cc: Borislav Petkov <bp@suse.de>
> Cc: Dave Jiang <dave.jiang@intel.com>
> Cc: Thomas Garnier <thgarnie@google.com>
> ---
>  arch/x86/boot/compressed/kaslr.c | 10 ++++++++--
>  1 file changed, 8 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
> index e5eb0c3..7a8b443 100644
> --- a/arch/x86/boot/compressed/kaslr.c
> +++ b/arch/x86/boot/compressed/kaslr.c
> @@ -650,10 +650,16 @@ void choose_random_location(unsigned long input,
>  			add_identity_map(random_addr, output_size);
>  			*output = random_addr;
>  		}
> +
> +		/*
> +		 * This actually loads the identity pagetable on x86_64.
> +		 * And this should only be done only if a new position
> +		 * is found. Otherwise we should keep the old page table
> +		 * to make it be like nokaslr case.
> +		 */
> +		finalize_identity_maps();
>  	}
>  
> -	/* This actually loads the identity pagetable on x86_64. */
> -	finalize_identity_maps();
>  
>  	/* Pick random virtual address starting from LOAD_PHYSICAL_ADDR. */
>  	if (IS_ENABLED(CONFIG_X86_64))
> -- 
> 2.5.5
> 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/2] x86/efi: Correct ident mapping of efi old_map when kalsr enabled
  2017-04-26 10:43   ` Baoquan He
@ 2017-04-26 14:49     ` Thomas Garnier
  2017-04-27 10:31       ` Baoquan He
  0 siblings, 1 reply; 10+ messages in thread
From: Thomas Garnier @ 2017-04-26 14:49 UTC (permalink / raw)
  To: Baoquan He
  Cc: LKML, Ingo Molnar, Kees Cook, Dave Young, Xunlei Pang,
	Matt Fleming, Ard Biesheuvel, Thomas Gleixner, H. Peter Anvin,
	the arch/x86 maintainers, linux-efi

On Wed, Apr 26, 2017 at 3:43 AM, Baoquan He <bhe@redhat.com> wrote:
>
> This bug will cause SGI uv 100 boot failure since SGI uv 100 can only
> use efi old_map because of hardware. On rhel it failed all SGI uv series
> since we haven't back ported fix for SGI uv 200/300.
>
> On 04/26/17 at 06:39pm, Baoquan He wrote:
> > For EFI with old_map enabled, Kernel will panic when kaslr is enabled.
> >
> > The root cause is the ident mapping is not built correctly in this case.
> >
> > For nokaslr kernel, PAGE_OFFSET is 0xffff880000000000 which is PGDIR_SIZE
> > aligned. We can borrow the pud table from direct mapping safely. Given a
> > physical address X, we have pud_index(X) == pud_index(__va(X)). However,
> > for kaslr kernel, PAGE_OFFSET is PUD_SIZE aligned. For a given physical
> > address X, pud_index(X) != pud_index(__va(X)). We can't only copy pgd entry
> > from direct mapping to build ident mapping, instead need copy pud entry
> > one by one from direct mapping.
> >
> > So fix it in this patch.

Thanks for looking into this!

> >
> > The panic message is like below, an emty PUD or a wrong PUD.
> >
> > [    0.233007] BUG: unable to handle kernel paging request at 000000007febd57e
> > [    0.233899] IP: 0x7febd57e
> > [    0.234000] PGD 1025a067
> > [    0.234000] PUD 0
> > [    0.234000]
> > [    0.234000] Oops: 0010 [#1] SMP
> > [    0.234000] Modules linked in:
> > [    0.234000] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.11.0-rc8+ #125
> > [    0.234000] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 0.0.0 02/06/2015
> > [    0.234000] task: ffffffffafe104c0 task.stack: ffffffffafe00000
> > [    0.234000] RIP: 0010:0x7febd57e
> > [    0.234000] RSP: 0000:ffffffffafe03d98 EFLAGS: 00010086
> > [    0.234000] RAX: ffff8c9e3fff9540 RBX: 000000007c4b6000 RCX: 0000000000000480
> > [    0.234000] RDX: 0000000000000030 RSI: 0000000000000480 RDI: 000000007febd57e
> > [    0.234000] RBP: ffffffffafe03e40 R08: 0000000000000001 R09: 000000007c4b6000
> > [    0.234000] R10: ffffffffafa71a40 R11: 20786c6c2478303d R12: 0000000000000030
> > [    0.234000] R13: 0000000000000246 R14: ffff8c9e3c4198d8 R15: 0000000000000480
> > [    0.234000] FS:  0000000000000000(0000) GS:ffff8c9e3fa00000(0000) knlGS:0000000000000000
> > [    0.234000] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> > [    0.234000] CR2: 000000007febd57e CR3: 000000000fe09000 CR4: 00000000000406b0
> > [    0.234000] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> > [    0.234000] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
> > [    0.234000] Call Trace:
> > [    0.234000]  ? efi_call+0x58/0x90
> > [    0.234000]  ? printk+0x58/0x6f
> > [    0.234000]  efi_enter_virtual_mode+0x3c5/0x50d
> > [    0.234000]  start_kernel+0x40f/0x4b8
> > [    0.234000]  ? set_init_arg+0x55/0x55
> > [    0.234000]  ? early_idt_handler_array+0x120/0x120
> > [    0.234000]  x86_64_start_reservations+0x24/0x26
> > [    0.234000]  x86_64_start_kernel+0x14c/0x16f
> > [    0.234000]  start_cpu+0x14/0x14
> > [    0.234000] Code:  Bad RIP value.
> > [    0.234000] RIP: 0x7febd57e RSP: ffffffffafe03d98
> > [    0.234000] CR2: 000000007febd57e
> > [    0.234000] ---[ end trace d4ded46ab8ab8ba9 ]---
> > [    0.234000] Kernel panic - not syncing: Attempted to kill the idle task!
> > [    0.234000] ---[ end Kernel panic - not syncing: Attempted to kill the idle task!
> >
> > Signed-off-by: Baoquan He <bhe@redhat.com>
> > Signed-off-by: Dave Young <dyoung@redhat.com>
> > Cc: Matt Fleming <matt@codeblueprint.co.uk>
> > Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
> > Cc: Thomas Gleixner <tglx@linutronix.de>
> > Cc: Ingo Molnar <mingo@redhat.com>
> > Cc: "H. Peter Anvin" <hpa@zytor.com>
> > Cc: x86@kernel.org
> > Cc: linux-efi@vger.kernel.org
> > ---
> >  arch/x86/platform/efi/efi_64.c | 35 +++++++++++++++++++++++++++--------
> >  1 file changed, 27 insertions(+), 8 deletions(-)
> >
> > diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
> > index 2ee7694..2e7baff 100644
> > --- a/arch/x86/platform/efi/efi_64.c
> > +++ b/arch/x86/platform/efi/efi_64.c
> > @@ -71,11 +71,12 @@ static void __init early_code_mapping_set_exec(int executable)
> >
> >  pgd_t * __init efi_call_phys_prolog(void)
> >  {
> > -     unsigned long vaddress;
> > +     unsigned long vaddr, left_vaddr;
> > +     unsigned int num_entries;
> >       pgd_t *save_pgd;
> > -
> > -     int pgd;
> > +     pud_t *pud, *pud_k;
> >       int n_pgds;
> > +     int i;
> >
> >       if (!efi_enabled(EFI_OLD_MEMMAP)) {
> >               save_pgd = (pgd_t *)read_cr3();
> > @@ -88,10 +89,22 @@ pgd_t * __init efi_call_phys_prolog(void)
> >       n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT), PGDIR_SIZE);
> >       save_pgd = kmalloc_array(n_pgds, sizeof(*save_pgd), GFP_KERNEL);
> >
> > -     for (pgd = 0; pgd < n_pgds; pgd++) {
> > -             save_pgd[pgd] = *pgd_offset_k(pgd * PGDIR_SIZE);
> > -             vaddress = (unsigned long)__va(pgd * PGDIR_SIZE);
> > -             set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), *pgd_offset_k(vaddress));
> > +     for (i = 0; i < n_pgds; i++) {
> > +             save_pgd[i] = *pgd_offset_k(i * PGDIR_SIZE);
> > +
> > +             vaddr = (unsigned long)__va(i * PGDIR_SIZE);
> > +             pud = pud_alloc_one(NULL, 0);

Please check if pud is NULL.

> > +
> > +             num_entries = PTRS_PER_PUD - pud_index(vaddr);
> > +             pud_k = pud_offset(pgd_offset_k(vaddr), vaddr);
> > +             memcpy(pud, pud_k, num_entries);
> > +             if (pud_index(vaddr) > 0) {

You are using pud_index(vaddr) 3 times, might be worth using a local variable.

> > +                     left_vaddr = vaddr + (num_entries * PUD_SIZE);
> > +                     pud_k = pud_offset(pgd_offset_k(left_vaddr),
> > +                                        left_vaddr);
> > +                     memcpy(pud + num_entries, pud_k, pud_index(vaddr));

I think this section (or the overall for loop) would benefit with a
comment explaining explaining why you are shifting the new PUD like
this.

> > +             }
> > +             pgd_populate(NULL, pgd_offset_k(i * PGDIR_SIZE), pud);
> >       }
> >  out:
> >       __flush_tlb_all();
> > @@ -106,6 +119,8 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd)
> >        */
> >       int pgd_idx;
> >       int nr_pgds;
> > +     pud_t *pud;
> > +     pgd_t *pgd;
> >
> >       if (!efi_enabled(EFI_OLD_MEMMAP)) {
> >               write_cr3((unsigned long)save_pgd);
> > @@ -115,8 +130,12 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd)
> >
> >       nr_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE);
> >
> > -     for (pgd_idx = 0; pgd_idx < nr_pgds; pgd_idx++)
> > +     for (pgd_idx = 0; pgd_idx < nr_pgds; pgd_idx++) {
> > +             pgd = pgd_offset_k(pgd_idx * PGDIR_SIZE);
> > +             pud = (pud_t *)pgd_page_vaddr(*pgd);
> > +             pud_free(NULL, pud);
> >               set_pgd(pgd_offset_k(pgd_idx * PGDIR_SIZE), save_pgd[pgd_idx]);
> > +     }
> >
> >       kfree(save_pgd);
> >
> > --
> > 2.5.5
> >




-- 
Thomas

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 2/2] x86/KASLR: Use old ident map page table if physical randomization failed
  2017-04-26 10:39 ` [PATCH 2/2] x86/KASLR: Use old ident map page table if physical randomization failed Baoquan He
  2017-04-26 10:49   ` Baoquan He
@ 2017-04-26 19:12   ` Kees Cook
  2017-04-27  7:18     ` Baoquan He
  1 sibling, 1 reply; 10+ messages in thread
From: Kees Cook @ 2017-04-26 19:12 UTC (permalink / raw)
  To: Baoquan He
  Cc: LKML, Ingo Molnar, Thomas Garnier, Dave Young, Xunlei Pang,
	H. Peter Anvin, Thomas Gleixner, x86, Yinghai Lu,
	Borislav Petkov, Dave Jiang

On Wed, Apr 26, 2017 at 3:39 AM, Baoquan He <bhe@redhat.com> wrote:
> Dave found when kdump kernel will reset to bios immediately if kaslr
> is enabled and physical randomization failed to faind a new position
> for kernel. But nokaslr works in this case.
>
> The reason is kaslr will install a new page table for ident mapping,
> while it missed to consider building ident mapping for original area
> of kernel if kaslr failed on physical randomization.
>
> In fact bootloaders including kexec/kdump have built ident mapping
> for original place of kernel. We can only install new ident mapping
> page table when physical kaslr succeeds. Otherwise we just keep the
> old page table unchanged just like nokaslr does.
>
> Signed-off-by: Baoquan He <bhe@redhat.com>
> Signed-off-by: Dave Young <dyoung@redhat.com>
> Cc: "H. Peter Anvin" <hpa@zytor.com>
> Cc: Thomas Gleixner <tglx@linutronix.de>
> Cc: Ingo Molnar <mingo@redhat.com>
> Cc: x86@kernel.org
> Cc: Kees Cook <keescook@chromium.org>
> Cc: Yinghai Lu <yinghai@kernel.org>
> Cc: Borislav Petkov <bp@suse.de>
> Cc: Dave Jiang <dave.jiang@intel.com>
> Cc: Thomas Garnier <thgarnie@google.com>

Nice catch!

Acked-by: Kees Cook <keescook@chromium.org>

-Kees

> ---
>  arch/x86/boot/compressed/kaslr.c | 10 ++++++++--
>  1 file changed, 8 insertions(+), 2 deletions(-)
>
> diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
> index e5eb0c3..7a8b443 100644
> --- a/arch/x86/boot/compressed/kaslr.c
> +++ b/arch/x86/boot/compressed/kaslr.c
> @@ -650,10 +650,16 @@ void choose_random_location(unsigned long input,
>                         add_identity_map(random_addr, output_size);
>                         *output = random_addr;
>                 }
> +
> +               /*
> +                * This actually loads the identity pagetable on x86_64.
> +                * And this should only be done only if a new position
> +                * is found. Otherwise we should keep the old page table
> +                * to make it be like nokaslr case.
> +                */
> +               finalize_identity_maps();
>         }
>
> -       /* This actually loads the identity pagetable on x86_64. */
> -       finalize_identity_maps();
>
>         /* Pick random virtual address starting from LOAD_PHYSICAL_ADDR. */
>         if (IS_ENABLED(CONFIG_X86_64))
> --
> 2.5.5
>



-- 
Kees Cook
Pixel Security

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 2/2] x86/KASLR: Use old ident map page table if physical randomization failed
  2017-04-26 19:12   ` Kees Cook
@ 2017-04-27  7:18     ` Baoquan He
  0 siblings, 0 replies; 10+ messages in thread
From: Baoquan He @ 2017-04-27  7:18 UTC (permalink / raw)
  To: Kees Cook
  Cc: LKML, Ingo Molnar, Thomas Garnier, Dave Young, Xunlei Pang,
	H. Peter Anvin, Thomas Gleixner, x86, Yinghai Lu,
	Borislav Petkov, Dave Jiang

On 04/26/17 at 12:12pm, Kees Cook wrote:
> On Wed, Apr 26, 2017 at 3:39 AM, Baoquan He <bhe@redhat.com> wrote:
> > Dave found when kdump kernel will reset to bios immediately if kaslr
> > is enabled and physical randomization failed to faind a new position
> > for kernel. But nokaslr works in this case.
> >
> > The reason is kaslr will install a new page table for ident mapping,
> > while it missed to consider building ident mapping for original area
> > of kernel if kaslr failed on physical randomization.
> >
> > In fact bootloaders including kexec/kdump have built ident mapping
> > for original place of kernel. We can only install new ident mapping
> > page table when physical kaslr succeeds. Otherwise we just keep the
> > old page table unchanged just like nokaslr does.
> >
> > Signed-off-by: Baoquan He <bhe@redhat.com>
> > Signed-off-by: Dave Young <dyoung@redhat.com>
> > Cc: "H. Peter Anvin" <hpa@zytor.com>
> > Cc: Thomas Gleixner <tglx@linutronix.de>
> > Cc: Ingo Molnar <mingo@redhat.com>
> > Cc: x86@kernel.org
> > Cc: Kees Cook <keescook@chromium.org>
> > Cc: Yinghai Lu <yinghai@kernel.org>
> > Cc: Borislav Petkov <bp@suse.de>
> > Cc: Dave Jiang <dave.jiang@intel.com>
> > Cc: Thomas Garnier <thgarnie@google.com>
> 
> Nice catch!
> 
> Acked-by: Kees Cook <keescook@chromium.org>

Thanks, Kees.

Seems I forget telling this only happens in kexec/kdump kernel. Since
the ident mapping has been built for kexec/kdump in 1st kernel for the
whole memory by calling init_pgtable(). Here if physical randomizaiton
failed, it won't build ident mapping for the original area of kernel but
change to new page table '_pgtable'. Then kernel will reset to bios
immediately caused by no ident mapping.

While normal kernel won't be impacted because it comes here via
startup_32() and cr3 will be _pgtable already. In startup_32() ident
mapping is built for 0~4G area. In kaslr We just append to the existing
area instead of entirely overwriting it for on-demand ident mapping
building. So ident mapping for the original area of kernel is still
there.

I will post v2 with a improved patch log, and with your Acked-by.

> 
> -Kees
> 
> > ---
> >  arch/x86/boot/compressed/kaslr.c | 10 ++++++++--
> >  1 file changed, 8 insertions(+), 2 deletions(-)
> >
> > diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
> > index e5eb0c3..7a8b443 100644
> > --- a/arch/x86/boot/compressed/kaslr.c
> > +++ b/arch/x86/boot/compressed/kaslr.c
> > @@ -650,10 +650,16 @@ void choose_random_location(unsigned long input,
> >                         add_identity_map(random_addr, output_size);
> >                         *output = random_addr;
> >                 }
> > +
> > +               /*
> > +                * This actually loads the identity pagetable on x86_64.
> > +                * And this should only be done only if a new position
> > +                * is found. Otherwise we should keep the old page table
> > +                * to make it be like nokaslr case.
> > +                */
> > +               finalize_identity_maps();
> >         }
> >
> > -       /* This actually loads the identity pagetable on x86_64. */
> > -       finalize_identity_maps();
> >
> >         /* Pick random virtual address starting from LOAD_PHYSICAL_ADDR. */
> >         if (IS_ENABLED(CONFIG_X86_64))
> > --
> > 2.5.5
> >
> 
> 
> 
> -- 
> Kees Cook
> Pixel Security

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/2] x86/efi: Correct ident mapping of efi old_map when kalsr enabled
  2017-04-26 14:49     ` Thomas Garnier
@ 2017-04-27 10:31       ` Baoquan He
  2017-04-27 10:47         ` Baoquan He
  0 siblings, 1 reply; 10+ messages in thread
From: Baoquan He @ 2017-04-27 10:31 UTC (permalink / raw)
  To: Thomas Garnier
  Cc: LKML, Ingo Molnar, Kees Cook, Dave Young, Xunlei Pang,
	Matt Fleming, Ard Biesheuvel, Thomas Gleixner, H. Peter Anvin,
	the arch/x86 maintainers, linux-efi

Hi Thomas,

Thanks for reviewing!

On 04/26/17 at 07:49am, Thomas Garnier wrote:
> On Wed, Apr 26, 2017 at 3:43 AM, Baoquan He <bhe@redhat.com> wrote:
> > >  arch/x86/platform/efi/efi_64.c | 35 +++++++++++++++++++++++++++--------
> > >  1 file changed, 27 insertions(+), 8 deletions(-)
> > >
> > > diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
> > > index 2ee7694..2e7baff 100644
> > > --- a/arch/x86/platform/efi/efi_64.c
> > > +++ b/arch/x86/platform/efi/efi_64.c
> > > @@ -71,11 +71,12 @@ static void __init early_code_mapping_set_exec(int executable)
> > >
> > >  pgd_t * __init efi_call_phys_prolog(void)
> > >  {
> > > -     unsigned long vaddress;
> > > +     unsigned long vaddr, left_vaddr;
> > > +     unsigned int num_entries;
> > >       pgd_t *save_pgd;
> > > -
> > > -     int pgd;
> > > +     pud_t *pud, *pud_k;
> > >       int n_pgds;
> > > +     int i;
> > >
> > >       if (!efi_enabled(EFI_OLD_MEMMAP)) {
> > >               save_pgd = (pgd_t *)read_cr3();
> > > @@ -88,10 +89,22 @@ pgd_t * __init efi_call_phys_prolog(void)
> > >       n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT), PGDIR_SIZE);
> > >       save_pgd = kmalloc_array(n_pgds, sizeof(*save_pgd), GFP_KERNEL);
> > >
> > > -     for (pgd = 0; pgd < n_pgds; pgd++) {
> > > -             save_pgd[pgd] = *pgd_offset_k(pgd * PGDIR_SIZE);
> > > -             vaddress = (unsigned long)__va(pgd * PGDIR_SIZE);
> > > -             set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), *pgd_offset_k(vaddress));
> > > +     for (i = 0; i < n_pgds; i++) {
> > > +             save_pgd[i] = *pgd_offset_k(i * PGDIR_SIZE);
> > > +
> > > +             vaddr = (unsigned long)__va(i * PGDIR_SIZE);
> > > +             pud = pud_alloc_one(NULL, 0);
> 
> Please check if pud is NULL.

I considered it a while. I didn't check because I thought it's still in
kernel init stage,  and at most 128 page frames are cost for 64TB,
namely 512KB. If kernel can't give 512KB at this time, it will die soon.
I would like to hear what people are suggesting. Since you have pointed
it out, I will add checking here.

However I think we can keep those allocated page and try our best to
build as much ident mapping as possible. E.g if we have 10TB memory, but
failed to allocate page for 11th pud table, we can break the for loop,
leave those built ident mapping there since efi region could be located
inside those 0~5TB region.

Then inefi_call_phys_epilog() only free these allocated pud tables in
efi_call_phys_prolog, check and avoid freeing those pud tables from
direct mapping which still existed because of allocation failure in
efi_call_phys_prolog.

diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 2e7baff..67920d4 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -94,6 +94,11 @@ pgd_t * __init efi_call_phys_prolog(void)
 
 		vaddr = (unsigned long)__va(i * PGDIR_SIZE);
 		pud = pud_alloc_one(NULL, 0);
+		if (!pud) {
+			pr_err("Failed to allocate page for %d-th pud table "
+				"to build 1:1 mapping!\n", i);
+			break;
+		}
 
 		num_entries = PTRS_PER_PUD - pud_index(vaddr);
 		pud_k = pud_offset(pgd_offset_k(vaddr), vaddr);
@@ -132,8 +137,10 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd)
 
 	for (pgd_idx = 0; pgd_idx < nr_pgds; pgd_idx++) {
 		pgd = pgd_offset_k(pgd_idx * PGDIR_SIZE);
-		pud = (pud_t *)pgd_page_vaddr(*pgd);
-		pud_free(NULL, pud);
+		if (*pgd != save_pgd[pgd_idx]) {
+			pud = (pud_t *)pgd_page_vaddr(*pgd);
+			pud_free(NULL, pud);
+		}
 		set_pgd(pgd_offset_k(pgd_idx * PGDIR_SIZE), save_pgd[pgd_idx]);
 	}
 

> 
> > > +
> > > +             num_entries = PTRS_PER_PUD - pud_index(vaddr);
> > > +             pud_k = pud_offset(pgd_offset_k(vaddr), vaddr);
> > > +             memcpy(pud, pud_k, num_entries);
> > > +             if (pud_index(vaddr) > 0) {
> 
> You are using pud_index(vaddr) 3 times, might be worth using a local variable.

Sure, will do, thanks.

> 
> > > +                     left_vaddr = vaddr + (num_entries * PUD_SIZE);
> > > +                     pud_k = pud_offset(pgd_offset_k(left_vaddr),
> > > +                                        left_vaddr);
> > > +                     memcpy(pud + num_entries, pud_k, pud_index(vaddr));
> 
> I think this section (or the overall for loop) would benefit with a
> comment explaining explaining why you are shifting the new PUD like
> this.

Will write a paragraph.
> 
> > > +             }
> > > +             pgd_populate(NULL, pgd_offset_k(i * PGDIR_SIZE), pud);
> > >       }
> > >  out:
> > >       __flush_tlb_all();
> > > @@ -106,6 +119,8 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd)
> > >        */
> > >       int pgd_idx;
> > >       int nr_pgds;
> > > +     pud_t *pud;
> > > +     pgd_t *pgd;
> > >
> > >       if (!efi_enabled(EFI_OLD_MEMMAP)) {
> > >               write_cr3((unsigned long)save_pgd);
> > > @@ -115,8 +130,12 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd)
> > >
> > >       nr_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE);
> > >
> > > -     for (pgd_idx = 0; pgd_idx < nr_pgds; pgd_idx++)
> > > +     for (pgd_idx = 0; pgd_idx < nr_pgds; pgd_idx++) {
> > > +             pgd = pgd_offset_k(pgd_idx * PGDIR_SIZE);
> > > +             pud = (pud_t *)pgd_page_vaddr(*pgd);
> > > +             pud_free(NULL, pud);
> > >               set_pgd(pgd_offset_k(pgd_idx * PGDIR_SIZE), save_pgd[pgd_idx]);
> > > +     }
> > >
> > >       kfree(save_pgd);
> > >
> > > --
> > > 2.5.5
> > >
> 
> 
> 
> 
> -- 
> Thomas

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/2] x86/efi: Correct ident mapping of efi old_map when kalsr enabled
  2017-04-27 10:31       ` Baoquan He
@ 2017-04-27 10:47         ` Baoquan He
  0 siblings, 0 replies; 10+ messages in thread
From: Baoquan He @ 2017-04-27 10:47 UTC (permalink / raw)
  To: Thomas Garnier
  Cc: LKML, Ingo Molnar, Kees Cook, Dave Young, Xunlei Pang,
	Matt Fleming, Ard Biesheuvel, Thomas Gleixner, H. Peter Anvin,
	the arch/x86 maintainers, linux-efi

On 04/27/17 at 06:31pm, Baoquan He wrote:
> Hi Thomas,
> 
> Thanks for reviewing!
> 
> On 04/26/17 at 07:49am, Thomas Garnier wrote:
> > On Wed, Apr 26, 2017 at 3:43 AM, Baoquan He <bhe@redhat.com> wrote:
> > > >  arch/x86/platform/efi/efi_64.c | 35 +++++++++++++++++++++++++++--------
> > > >  1 file changed, 27 insertions(+), 8 deletions(-)
> > > >
> > > > diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
> > > > index 2ee7694..2e7baff 100644
> > > > --- a/arch/x86/platform/efi/efi_64.c
> > > > +++ b/arch/x86/platform/efi/efi_64.c
> > > > @@ -71,11 +71,12 @@ static void __init early_code_mapping_set_exec(int executable)
> > > >
> > > >  pgd_t * __init efi_call_phys_prolog(void)
> > > >  {
> > > > -     unsigned long vaddress;
> > > > +     unsigned long vaddr, left_vaddr;
> > > > +     unsigned int num_entries;
> > > >       pgd_t *save_pgd;
> > > > -
> > > > +     pud_t *pud, *pud_k;
> > > >       int n_pgds;
> > > > +     int i;
> > > >
> > > >       if (!efi_enabled(EFI_OLD_MEMMAP)) {
> > > >               save_pgd = (pgd_t *)read_cr3();
> > > > @@ -88,10 +89,22 @@ pgd_t * __init efi_call_phys_prolog(void)
> > > >       n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT), PGDIR_SIZE);
> > > >       save_pgd = kmalloc_array(n_pgds, sizeof(*save_pgd), GFP_KERNEL);
> > > >
> > > > -     for (pgd = 0; pgd < n_pgds; pgd++) {
> > > > -             save_pgd[pgd] = *pgd_offset_k(pgd * PGDIR_SIZE);
> > > > -             vaddress = (unsigned long)__va(pgd * PGDIR_SIZE);
> > > > -             set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), *pgd_offset_k(vaddress));
> > > > +     for (i = 0; i < n_pgds; i++) {
> > > > +             save_pgd[i] = *pgd_offset_k(i * PGDIR_SIZE);
> > > > +
> > > > +             vaddr = (unsigned long)__va(i * PGDIR_SIZE);
> > > > +             pud = pud_alloc_one(NULL, 0);
> > 
> > Please check if pud is NULL.

Or panic if pud_alloc_one failed since kernel won't function well
anyway.
> 
> I considered it a while. I didn't check because I thought it's still in
> kernel init stage,  and at most 128 page frames are cost for 64TB,
> namely 512KB. If kernel can't give 512KB at this time, it will die soon.
> I would like to hear what people are suggesting. Since you have pointed
> it out, I will add checking here.
> 
> However I think we can keep those allocated page and try our best to
> build as much ident mapping as possible. E.g if we have 10TB memory, but
> failed to allocate page for 11th pud table, we can break the for loop,
> leave those built ident mapping there since efi region could be located
> inside those 0~5TB region.
> 
> Then inefi_call_phys_epilog() only free these allocated pud tables in
> efi_call_phys_prolog, check and avoid freeing those pud tables from
> direct mapping which still existed because of allocation failure in
> efi_call_phys_prolog.
> 
> diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
> index 2e7baff..67920d4 100644
> --- a/arch/x86/platform/efi/efi_64.c
> +++ b/arch/x86/platform/efi/efi_64.c
> @@ -94,6 +94,11 @@ pgd_t * __init efi_call_phys_prolog(void)
>  
>  		vaddr = (unsigned long)__va(i * PGDIR_SIZE);
>  		pud = pud_alloc_one(NULL, 0);
> +		if (!pud) {
> +			pr_err("Failed to allocate page for %d-th pud table "
> +				"to build 1:1 mapping!\n", i);
> +			break;
> +		}
>  
>  		num_entries = PTRS_PER_PUD - pud_index(vaddr);
>  		pud_k = pud_offset(pgd_offset_k(vaddr), vaddr);
> @@ -132,8 +137,10 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd)
>  
>  	for (pgd_idx = 0; pgd_idx < nr_pgds; pgd_idx++) {
>  		pgd = pgd_offset_k(pgd_idx * PGDIR_SIZE);
> -		pud = (pud_t *)pgd_page_vaddr(*pgd);
> -		pud_free(NULL, pud);
> +		if (*pgd != save_pgd[pgd_idx]) {
> +			pud = (pud_t *)pgd_page_vaddr(*pgd);
> +			pud_free(NULL, pud);
> +		}
>  		set_pgd(pgd_offset_k(pgd_idx * PGDIR_SIZE), save_pgd[pgd_idx]);
>  	}
>  
> 
> > 
> > > > +
> > > > +             num_entries = PTRS_PER_PUD - pud_index(vaddr);
> > > > +             pud_k = pud_offset(pgd_offset_k(vaddr), vaddr);
> > > > +             memcpy(pud, pud_k, num_entries);
> > > > +             if (pud_index(vaddr) > 0) {
> > 
> > You are using pud_index(vaddr) 3 times, might be worth using a local variable.
> 
> Sure, will do, thanks.
> 
> > 
> > > > +                     left_vaddr = vaddr + (num_entries * PUD_SIZE);
> > > > +                     pud_k = pud_offset(pgd_offset_k(left_vaddr),
> > > > +                                        left_vaddr);
> > > > +                     memcpy(pud + num_entries, pud_k, pud_index(vaddr));
> > 
> > I think this section (or the overall for loop) would benefit with a
> > comment explaining explaining why you are shifting the new PUD like
> > this.
> 
> Will write a paragraph.
> > 
> > > > +             }
> > > > +             pgd_populate(NULL, pgd_offset_k(i * PGDIR_SIZE), pud);
> > > >       }
> > > >  out:
> > > >       __flush_tlb_all();
> > > > @@ -106,6 +119,8 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd)
> > > >        */
> > > >       int pgd_idx;
> > > >       int nr_pgds;
> > > > +     pud_t *pud;
> > > > +     pgd_t *pgd;
> > > >
> > > >       if (!efi_enabled(EFI_OLD_MEMMAP)) {
> > > >               write_cr3((unsigned long)save_pgd);
> > > > @@ -115,8 +130,12 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd)
> > > >
> > > >       nr_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE);
> > > >
> > > > -     for (pgd_idx = 0; pgd_idx < nr_pgds; pgd_idx++)
> > > > +     for (pgd_idx = 0; pgd_idx < nr_pgds; pgd_idx++) {
> > > > +             pgd = pgd_offset_k(pgd_idx * PGDIR_SIZE);
> > > > +             pud = (pud_t *)pgd_page_vaddr(*pgd);
> > > > +             pud_free(NULL, pud);
> > > >               set_pgd(pgd_offset_k(pgd_idx * PGDIR_SIZE), save_pgd[pgd_idx]);
> > > > +     }
> > > >
> > > >       kfree(save_pgd);
> > > >
> > > > --
> > > > 2.5.5
> > > >
> > 
> > 
> > 
> > 
> > -- 
> > Thomas

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2017-04-27 10:47 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-04-26 10:39 [PATCH 0/2] kaslr related bug fix Baoquan He
2017-04-26 10:39 ` [PATCH 1/2] x86/efi: Correct ident mapping of efi old_map when kalsr enabled Baoquan He
2017-04-26 10:43   ` Baoquan He
2017-04-26 14:49     ` Thomas Garnier
2017-04-27 10:31       ` Baoquan He
2017-04-27 10:47         ` Baoquan He
2017-04-26 10:39 ` [PATCH 2/2] x86/KASLR: Use old ident map page table if physical randomization failed Baoquan He
2017-04-26 10:49   ` Baoquan He
2017-04-26 19:12   ` Kees Cook
2017-04-27  7:18     ` Baoquan He

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).