LKML Archive on lore.kernel.org
 help / Atom feed
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
To: Will Deacon <will.deacon@arm.com>
Cc: "linux-arm-kernel@lists.infradead.org" 
	<linux-arm-kernel@lists.infradead.org>,
	"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
	Catalin Marinas <catalin.marinas@arm.com>,
	Mark Rutland <mark.rutland@arm.com>,
	Stephen Boyd <sboyd@codeaurora.org>,
	Dave Hansen <dave.hansen@linux.intel.com>,
	Kees Cook <keescook@chromium.org>
Subject: Re: [PATCH 00/18] arm64: Unmap the kernel whilst running in userspace (KAISER)
Date: Sat, 18 Nov 2017 15:25:06 +0000
Message-ID: <CAKv+Gu8YB_qXgWEbd0ZZ6UycxNLXxTZCno87vJ9HH7d6+dKeLQ@mail.gmail.com> (raw)
In-Reply-To: <1510942921-12564-1-git-send-email-will.deacon@arm.com>

On 17 November 2017 at 18:21, Will Deacon <will.deacon@arm.com> wrote:
> Hi all,
>
> This patch series implements something along the lines of KAISER for arm64:
>
>   https://gruss.cc/files/kaiser.pdf
>
> although I wrote this from scratch because the paper has some funny
> assumptions about how the architecture works. There is a patch series
> in review for x86, which follows a similar approach:
>
>   http://lkml.kernel.org/r/<20171110193058.BECA7D88@viggo.jf.intel.com>
>
> and the topic was recently covered by LWN (currently subscriber-only):
>
>   https://lwn.net/Articles/738975/
>
> The basic idea is that transitions to and from userspace are proxied
> through a trampoline page which is mapped into a separate page table and
> can switch the full kernel mapping in and out on exception entry and
> exit respectively. This is a valuable defence against various KASLR and
> timing attacks, particularly as the trampoline page is at a fixed virtual
> address and therefore the kernel text can be randomized independently.
>
> The major consequences of the trampoline are:
>
>   * We can no longer make use of global mappings for kernel space, so
>     each task is assigned two ASIDs: one for user mappings and one for
>     kernel mappings
>
>   * Our ASID moves into TTBR1 so that we can quickly switch between the
>     trampoline and kernel page tables
>
>   * Switching TTBR0 always requires use of the zero page, so we can
>     dispense with some of our errata workaround code.
>
>   * entry.S gets more complicated to read
>
> The performance hit from this series isn't as bad as I feared: things
> like cyclictest and kernbench seem to be largely unaffected, although
> syscall micro-benchmarks appear to show that syscall overhead is roughly
> doubled, and this has an impact on things like hackbench which exhibits
> a ~10% hit due to its heavy context-switching.
>
> Patches based on 4.14 and also pushed here:
>
>   git://git.kernel.org/pub/scm/linux/kernel/git/will/linux.git kaiser
>
> Feedback welcome,
>
> Will
>

Very nice! I am quite pleased, because this makes KASLR much more
useful than it is now.

My main question is why we need a separate trampoline vector table: it
seems to me that with some minor surgery (as proposed below), we can
make the kernel_ventry macro instantiations tolerant for being loaded
somewhere in the fixmap (which I think is a better place for this than
at the base of the VMALLOC space), removing the need to change
vbar_el1 back and forth. The only downside is that exceptions taken
from EL1 will also use absolute addressing, but I don't think that is
a huge price to pay.

-------------->8------------------
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index f8ce4cdd3bb5..7f89ebc690b1 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -71,6 +71,20 @@

  .macro kernel_ventry, el, label, regsize = 64
  .align 7
+alternative_if_not ARM64_MAP_KERNEL_AT_EL0
+ .if \regsize == 64
+ msr tpidrro_el0, x30 // preserve x30
+ .endif
+ .if \el == 0
+ mrs x30, ttbr1_el1
+ sub x30, x30, #(SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE)
+ bic x30, x30, #USER_ASID_FLAG
+ msr ttbr1_el1, x30
+ isb
+ .endif
+ ldr x30, =el\()\el\()_\label
+alternative_else_nop_endif
+
  sub sp, sp, #S_FRAME_SIZE
 #ifdef CONFIG_VMAP_STACK
  /*
@@ -82,7 +96,11 @@
  tbnz x0, #THREAD_SHIFT, 0f
  sub x0, sp, x0 // x0'' = sp' - x0' = (sp + x0) - sp = x0
  sub sp, sp, x0 // sp'' = sp' - x0 = (sp + x0) - x0 = sp
+alternative_if_not ARM64_MAP_KERNEL_AT_EL0
+ br x30
+alternative_else
  b el\()\el\()_\label
+alternative_endif

 0:
  /*
@@ -91,6 +109,10 @@
  * userspace, and can clobber EL0 registers to free up GPRs.
  */

+alternative_if_not ARM64_MAP_KERNEL_AT_EL0
+ mrs x30, tpidrro_el0 // restore x30
+alternative_else_nop_endif
+
  /* Stash the original SP (minus S_FRAME_SIZE) in tpidr_el0. */
  msr tpidr_el0, x0

@@ -98,8 +120,11 @@
  sub x0, sp, x0
  msr tpidrro_el0, x0

- /* Switch to the overflow stack */
- adr_this_cpu sp, overflow_stack + OVERFLOW_STACK_SIZE, x0
+ /* Switch to the overflow stack of this CPU */
+ ldr x0, =overflow_stack + OVERFLOW_STACK_SIZE
+ mov sp, x0
+ mrs x0, tpidr_el1
+ add sp, sp, x0

  /*
  * Check whether we were already on the overflow stack. This may happen
@@ -108,19 +133,30 @@
  mrs x0, tpidr_el0 // sp of interrupted context
  sub x0, sp, x0 // delta with top of overflow stack
  tst x0, #~(OVERFLOW_STACK_SIZE - 1) // within range?
- b.ne __bad_stack // no? -> bad stack pointer
+ b.eq 1f
+ ldr x0, =__bad_stack // no? -> bad stack pointer
+ br x0

  /* We were already on the overflow stack. Restore sp/x0 and carry on. */
- sub sp, sp, x0
+1: sub sp, sp, x0
  mrs x0, tpidrro_el0
 #endif
+alternative_if_not ARM64_MAP_KERNEL_AT_EL0
+ br x30
+alternative_else
  b el\()\el\()_\label
+alternative_endif
  .endm

- .macro kernel_entry, el, regsize = 64
+ .macro kernel_entry, el, regsize = 64, restore_x30 = 1
  .if \regsize == 32
  mov w0, w0 // zero upper 32 bits of x0
  .endif
+ .if \restore_x30
+alternative_if_not ARM64_MAP_KERNEL_AT_EL0
+ mrs x30, tpidrro_el0 // restore x30
+alternative_else_nop_endif
+ .endif
  stp x0, x1, [sp, #16 * 0]
  stp x2, x3, [sp, #16 * 1]
  stp x4, x5, [sp, #16 * 2]
@@ -363,7 +399,7 @@ tsk .req x28 // current thread_info
  */
  .pushsection ".entry.text", "ax"

- .align 11
+ .align PAGE_SHIFT
 ENTRY(vectors)
  kernel_ventry 1, sync_invalid // Synchronous EL1t
  kernel_ventry 1, irq_invalid // IRQ EL1t
@@ -391,6 +427,8 @@ ENTRY(vectors)
  kernel_ventry 0, fiq_invalid, 32 // FIQ 32-bit EL0
  kernel_ventry 0, error_invalid, 32 // Error 32-bit EL0
 #endif
+ .ltorg
+ .align PAGE_SHIFT
 END(vectors)

 #ifdef CONFIG_VMAP_STACK
@@ -408,7 +446,7 @@ __bad_stack:
  * S_FRAME_SIZE) was stashed in tpidr_el0 by kernel_ventry.
  */
  sub sp, sp, #S_FRAME_SIZE
- kernel_entry 1
+ kernel_entry 1, restore_x30=0
  mrs x0, tpidr_el0
  add x0, x0, #S_FRAME_SIZE
  str x0, [sp, #S_SP]

  parent reply index

Thread overview: 45+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-11-17 18:21 Will Deacon
2017-11-17 18:21 ` [PATCH 01/18] arm64: mm: Use non-global mappings for kernel space Will Deacon
2017-11-17 18:21 ` [PATCH 02/18] arm64: mm: Temporarily disable ARM64_SW_TTBR0_PAN Will Deacon
2017-11-17 18:21 ` [PATCH 03/18] arm64: mm: Move ASID from TTBR0 to TTBR1 Will Deacon
2017-11-17 18:21 ` [PATCH 04/18] arm64: mm: Remove pre_ttbr0_update_workaround for Falkor erratum #E1003 Will Deacon
2017-11-17 18:21 ` [PATCH 05/18] arm64: mm: Rename post_ttbr0_update_workaround Will Deacon
2017-11-17 18:21 ` [PATCH 06/18] arm64: mm: Fix and re-enable ARM64_SW_TTBR0_PAN Will Deacon
2017-11-17 18:21 ` [PATCH 07/18] arm64: mm: Allocate ASIDs in pairs Will Deacon
2017-11-17 18:21 ` [PATCH 08/18] arm64: mm: Add arm64_kernel_mapped_at_el0 helper using static key Will Deacon
2017-11-17 18:21 ` [PATCH 09/18] arm64: mm: Invalidate both kernel and user ASIDs when performing TLBI Will Deacon
2017-11-17 18:21 ` [PATCH 10/18] arm64: entry: Add exception trampoline page for exceptions from EL0 Will Deacon
2017-11-17 18:21 ` [PATCH 11/18] arm64: mm: Map entry trampoline into trampoline and kernel page tables Will Deacon
2017-11-17 18:21 ` [PATCH 12/18] arm64: entry: Explicitly pass exception level to kernel_ventry macro Will Deacon
2017-11-17 18:21 ` [PATCH 13/18] arm64: entry: Hook up entry trampoline to exception vectors Will Deacon
2017-11-17 18:21 ` [PATCH 14/18] arm64: erratum: Work around Falkor erratum #E1003 in trampoline code Will Deacon
2017-11-18  0:27   ` Stephen Boyd
2017-11-20 18:05     ` Will Deacon
2017-11-17 18:21 ` [PATCH 15/18] arm64: tls: Avoid unconditional zeroing of tpidrro_el0 for native tasks Will Deacon
2017-11-17 18:21 ` [PATCH 16/18] arm64: entry: Add fake CPU feature for mapping the kernel at EL0 Will Deacon
2017-11-17 18:22 ` [PATCH 17/18] arm64: makefile: Ensure TEXT_OFFSET doesn't overlap with trampoline Will Deacon
2017-11-17 18:22 ` [PATCH 18/18] arm64: Kconfig: Add CONFIG_UNMAP_KERNEL_AT_EL0 Will Deacon
2017-11-22 16:52   ` Marc Zyngier
2017-11-22 19:36     ` Will Deacon
2017-11-18  0:19 ` [PATCH 00/18] arm64: Unmap the kernel whilst running in userspace (KAISER) Stephen Boyd
2017-11-20 18:03   ` Will Deacon
2017-11-18 15:25 ` Ard Biesheuvel [this message]
2017-11-20 18:06   ` Will Deacon
2017-11-20 18:20     ` Ard Biesheuvel
2017-11-22 19:37       ` Will Deacon
2017-11-20 22:50 ` Laura Abbott
2017-11-22 19:37   ` Will Deacon
2017-11-22 16:19 ` Pavel Machek
2017-11-22 19:37   ` Will Deacon
2017-11-22 22:36     ` Pavel Machek
2017-11-22 21:19   ` Ard Biesheuvel
2017-11-22 22:33     ` Pavel Machek
2017-11-22 23:19       ` Ard Biesheuvel
2017-11-22 23:37         ` Pavel Machek
2017-11-23  6:51           ` Ard Biesheuvel
2017-11-23  9:07             ` Pavel Machek
2017-11-23  9:23               ` Ard Biesheuvel
2017-11-23 10:46                 ` Pavel Machek
2017-11-23 11:38                   ` Ard Biesheuvel
2017-11-23 17:54                     ` Pavel Machek
2017-11-23 18:17                       ` Ard Biesheuvel

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=CAKv+Gu8YB_qXgWEbd0ZZ6UycxNLXxTZCno87vJ9HH7d6+dKeLQ@mail.gmail.com \
    --to=ard.biesheuvel@linaro.org \
    --cc=catalin.marinas@arm.com \
    --cc=dave.hansen@linux.intel.com \
    --cc=keescook@chromium.org \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mark.rutland@arm.com \
    --cc=sboyd@codeaurora.org \
    --cc=will.deacon@arm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

LKML Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/lkml/0 lkml/git/0.git
	git clone --mirror https://lore.kernel.org/lkml/1 lkml/git/1.git
	git clone --mirror https://lore.kernel.org/lkml/2 lkml/git/2.git
	git clone --mirror https://lore.kernel.org/lkml/3 lkml/git/3.git
	git clone --mirror https://lore.kernel.org/lkml/4 lkml/git/4.git
	git clone --mirror https://lore.kernel.org/lkml/5 lkml/git/5.git
	git clone --mirror https://lore.kernel.org/lkml/6 lkml/git/6.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 lkml lkml/ https://lore.kernel.org/lkml \
		linux-kernel@vger.kernel.org linux-kernel@archiver.kernel.org
	public-inbox-index lkml


Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-kernel


AGPL code for this site: git clone https://public-inbox.org/ public-inbox