All of lore.kernel.org
 help / color / mirror / Atom feed
From: Alexandre Chartre <alexandre.chartre@oracle.com>
To: pbonzini@redhat.com, rkrcmar@redhat.com, tglx@linutronix.de,
	mingo@redhat.com, bp@alien8.de, hpa@zytor.com,
	dave.hansen@linux.intel.com, luto@kernel.org,
	peterz@infradead.org, kvm@vger.kernel.org, x86@kernel.org,
	linux-mm@kvack.org, linux-kernel@vger.kernel.org
Cc: konrad.wilk@oracle.com, jan.setjeeilers@oracle.com,
	liran.alon@oracle.com, jwadams@google.com, graf@amazon.de,
	rppt@linux.vnet.ibm.com, alexandre.chartre@oracle.com
Subject: [RFC v2 01/26] mm/x86: Introduce kernel address space isolation
Date: Thu, 11 Jul 2019 16:25:13 +0200	[thread overview]
Message-ID: <1562855138-19507-2-git-send-email-alexandre.chartre@oracle.com> (raw)
In-Reply-To: <1562855138-19507-1-git-send-email-alexandre.chartre@oracle.com>

Introduce core functions and structures for implementing Address Space
Isolation (ASI). Kernel address space isolation provides the ability to
run some kernel code with a reduced kernel address space.

An address space isolation is defined with a struct asi structure which
has its own page-table. While, for now, this page-table is empty, it
will eventually be possible to populate it so that it is much smaller
than the full kernel page-table.

Isolation is entered by calling asi_enter() which switches the kernel
page-table to the address space isolation page-table. Isolation is then
exited by calling asi_exit() which switches the page-table back to the
kernel page-table.

Signed-off-by: Alexandre Chartre <alexandre.chartre@oracle.com>
---
 arch/x86/include/asm/asi.h |   41 ++++++++++++
 arch/x86/mm/Makefile       |    2 +
 arch/x86/mm/asi.c          |  152 ++++++++++++++++++++++++++++++++++++++++++++
 security/Kconfig           |   10 +++
 4 files changed, 205 insertions(+), 0 deletions(-)
 create mode 100644 arch/x86/include/asm/asi.h
 create mode 100644 arch/x86/mm/asi.c

diff --git a/arch/x86/include/asm/asi.h b/arch/x86/include/asm/asi.h
new file mode 100644
index 0000000..8a13f73
--- /dev/null
+++ b/arch/x86/include/asm/asi.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ARCH_X86_MM_ASI_H
+#define ARCH_X86_MM_ASI_H
+
+#ifdef CONFIG_ADDRESS_SPACE_ISOLATION
+
+#include <linux/spinlock.h>
+#include <asm/pgtable.h>
+
+struct asi {
+	spinlock_t		lock;		/* protect all attributes */
+	pgd_t			*pgd;		/* ASI page-table */
+};
+
+/*
+ * An ASI session maintains the state of address state isolation on a
+ * cpu. There is one ASI session per cpu. There is no lock to protect
+ * members of the asi_session structure as each cpu is managing its
+ * own ASI session.
+ */
+
+enum asi_session_state {
+	ASI_SESSION_STATE_INACTIVE,	/* no address space isolation */
+	ASI_SESSION_STATE_ACTIVE,	/* address space isolation is active */
+};
+
+struct asi_session {
+	struct asi		*asi;		/* ASI for this session */
+	enum asi_session_state	state;		/* state of ASI session */
+	unsigned long		original_cr3;	/* cr3 before entering ASI */
+	struct task_struct	*task;		/* task during isolation */
+} __aligned(PAGE_SIZE);
+
+extern struct asi *asi_create(void);
+extern void asi_destroy(struct asi *asi);
+extern int asi_enter(struct asi *asi);
+extern void asi_exit(struct asi *asi);
+
+#endif	/* CONFIG_ADDRESS_SPACE_ISOLATION */
+
+#endif
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 84373dc..dae5c8a 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -49,7 +49,9 @@ obj-$(CONFIG_X86_INTEL_MPX)			+= mpx.o
 obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS)	+= pkeys.o
 obj-$(CONFIG_RANDOMIZE_MEMORY)			+= kaslr.o
 obj-$(CONFIG_PAGE_TABLE_ISOLATION)		+= pti.o
+obj-$(CONFIG_ADDRESS_SPACE_ISOLATION)		+= asi.o
 
 obj-$(CONFIG_AMD_MEM_ENCRYPT)	+= mem_encrypt.o
 obj-$(CONFIG_AMD_MEM_ENCRYPT)	+= mem_encrypt_identity.o
 obj-$(CONFIG_AMD_MEM_ENCRYPT)	+= mem_encrypt_boot.o
+
diff --git a/arch/x86/mm/asi.c b/arch/x86/mm/asi.c
new file mode 100644
index 0000000..c3993b7
--- /dev/null
+++ b/arch/x86/mm/asi.c
@@ -0,0 +1,152 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
+ *
+ * Kernel Address Space Isolation (ASI)
+ */
+
+#include <linux/export.h>
+#include <linux/gfp.h>
+#include <linux/mm.h>
+#include <linux/printk.h>
+#include <linux/slab.h>
+
+#include <asm/asi.h>
+#include <asm/bug.h>
+#include <asm/mmu_context.h>
+
+/* ASI sessions, one per cpu */
+DEFINE_PER_CPU_PAGE_ALIGNED(struct asi_session, cpu_asi_session);
+
+static int asi_init_mapping(struct asi *asi)
+{
+	/*
+	 * TODO: Populate the ASI page-table with minimal mappings so
+	 * that we can at least enter isolation and abort.
+	 */
+	return 0;
+}
+
+struct asi *asi_create(void)
+{
+	struct page *page;
+	struct asi *asi;
+	int err;
+
+	asi = kzalloc(sizeof(*asi), GFP_KERNEL);
+	if (!asi)
+		return NULL;
+
+	page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
+	if (!page)
+		goto error;
+
+	asi->pgd = page_address(page);
+	spin_lock_init(&asi->lock);
+
+	err = asi_init_mapping(asi);
+	if (err)
+		goto error;
+
+	return asi;
+
+error:
+	asi_destroy(asi);
+	return NULL;
+}
+EXPORT_SYMBOL(asi_create);
+
+void asi_destroy(struct asi *asi)
+{
+	if (!asi)
+		return;
+
+	if (asi->pgd)
+		free_page((unsigned long)asi->pgd);
+
+	kfree(asi);
+}
+EXPORT_SYMBOL(asi_destroy);
+
+
+/*
+ * When isolation is active, the address space doesn't necessarily map
+ * the percpu offset value (this_cpu_off) which is used to get pointers
+ * to percpu variables. So functions which can be invoked while isolation
+ * is active shouldn't be getting pointers to percpu variables (i.e. with
+ * get_cpu_var() or this_cpu_ptr()). Instead percpu variable should be
+ * directly read or written to (i.e. with this_cpu_read() or
+ * this_cpu_write()).
+ */
+
+int asi_enter(struct asi *asi)
+{
+	enum asi_session_state state;
+	struct asi *current_asi;
+	struct asi_session *asi_session;
+
+	state = this_cpu_read(cpu_asi_session.state);
+	/*
+	 * We can re-enter isolation, but only with the same ASI (we don't
+	 * support nesting isolation). Also, if isolation is still active,
+	 * then we should be re-entering with the same task.
+	 */
+	if (state == ASI_SESSION_STATE_ACTIVE) {
+		current_asi = this_cpu_read(cpu_asi_session.asi);
+		if (current_asi != asi) {
+			WARN_ON(1);
+			return -EBUSY;
+		}
+		WARN_ON(this_cpu_read(cpu_asi_session.task) != current);
+		return 0;
+	}
+
+	/* isolation is not active so we can safely access the percpu pointer */
+	asi_session = &get_cpu_var(cpu_asi_session);
+	asi_session->asi = asi;
+	asi_session->task = current;
+	asi_session->original_cr3 = __get_current_cr3_fast();
+	if (!asi_session->original_cr3) {
+		WARN_ON(1);
+		err = -EINVAL;
+		goto err_clear_asi;
+	}
+	asi_session->state = ASI_SESSION_STATE_ACTIVE;
+
+	load_cr3(asi->pgd);
+
+	return 0;
+
+err_clear_asi:
+	asi_session->asi = NULL;
+	asi_session->task = NULL;
+
+	return err;
+
+}
+EXPORT_SYMBOL(asi_enter);
+
+void asi_exit(struct asi *asi)
+{
+	struct asi_session *asi_session;
+	enum asi_session_state asi_state;
+	unsigned long original_cr3;
+
+	asi_state = this_cpu_read(cpu_asi_session.state);
+	if (asi_state == ASI_SESSION_STATE_INACTIVE)
+		return;
+
+	/* TODO: Kick sibling hyperthread before switching to kernel cr3 */
+	original_cr3 = this_cpu_read(cpu_asi_session.original_cr3);
+	if (original_cr3)
+		write_cr3(original_cr3);
+
+	/* page-table was switched, we can now access the percpu pointer */
+	asi_session = &get_cpu_var(cpu_asi_session);
+	WARN_ON(asi_session->task != current);
+	asi_session->state = ASI_SESSION_STATE_INACTIVE;
+	asi_session->asi = NULL;
+	asi_session->task = NULL;
+	asi_session->original_cr3 = 0;
+}
+EXPORT_SYMBOL(asi_exit);
diff --git a/security/Kconfig b/security/Kconfig
index 466cc1f..241b9a7 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -65,6 +65,16 @@ config PAGE_TABLE_ISOLATION
 
 	  See Documentation/x86/pti.txt for more details.
 
+config ADDRESS_SPACE_ISOLATION
+	bool "Allow code to run with a reduced kernel address space"
+	default y
+	depends on (X86_64 || X86_PAE) && !UML
+	help
+	   This feature provides the ability to run some kernel code
+	   with a reduced kernel address space. This can be used to
+	   mitigate speculative execution attacks which are able to
+	   leak data between sibling CPU hyper-threads.
+
 config SECURITY_INFINIBAND
 	bool "Infiniband Security Hooks"
 	depends on SECURITY && INFINIBAND
-- 
1.7.1


  reply	other threads:[~2019-07-11 14:27 UTC|newest]

Thread overview: 79+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-07-11 14:25 [RFC v2 00/27] Kernel Address Space Isolation Alexandre Chartre
2019-07-11 14:25 ` Alexandre Chartre [this message]
2019-07-11 21:33   ` [RFC v2 01/26] mm/x86: Introduce kernel address space isolation Thomas Gleixner
2019-07-11 21:33     ` Thomas Gleixner
2019-07-12  7:43     ` Alexandre Chartre
2019-07-11 14:25 ` [RFC v2 02/26] mm/asi: Abort isolation on interrupt, exception and context switch Alexandre Chartre
2019-07-11 20:11   ` Andi Kleen
2019-07-11 20:11     ` Andi Kleen
2019-07-11 20:17     ` Mike Rapoport
2019-07-11 20:41       ` Alexandre Chartre
2019-07-12  0:05   ` Andy Lutomirski
2019-07-12  7:50     ` Alexandre Chartre
2019-07-11 14:25 ` [RFC v2 03/26] mm/asi: Handle page fault due to address space isolation Alexandre Chartre
2019-07-11 14:25 ` [RFC v2 04/26] mm/asi: Functions to track buffers allocated for an ASI page-table Alexandre Chartre
2019-07-11 14:25 ` [RFC v2 05/26] mm/asi: Add ASI page-table entry offset functions Alexandre Chartre
2019-07-11 14:25 ` [RFC v2 06/26] mm/asi: Add ASI page-table entry allocation functions Alexandre Chartre
2019-07-11 14:25 ` [RFC v2 07/26] mm/asi: Add ASI page-table entry set functions Alexandre Chartre
2019-07-11 14:25 ` [RFC v2 08/26] mm/asi: Functions to populate an ASI page-table from a VA range Alexandre Chartre
2019-07-11 14:25 ` [RFC v2 09/26] mm/asi: Helper functions to map module into ASI Alexandre Chartre
2019-07-11 14:25 ` [RFC v2 10/26] mm/asi: Keep track of VA ranges mapped in ASI page-table Alexandre Chartre
2019-07-11 14:25 ` [RFC v2 11/26] mm/asi: Functions to clear ASI page-table entries for a VA range Alexandre Chartre
2019-07-11 14:25 ` [RFC v2 12/26] mm/asi: Function to copy page-table entries for percpu buffer Alexandre Chartre
2019-07-11 14:25 ` [RFC v2 13/26] mm/asi: Add asi_remap() function Alexandre Chartre
2019-07-11 14:25 ` [RFC v2 14/26] mm/asi: Handle ASI mapped range leaks and overlaps Alexandre Chartre
2019-07-11 14:25 ` [RFC v2 15/26] mm/asi: Initialize the ASI page-table with core mappings Alexandre Chartre
2019-07-11 14:25 ` [RFC v2 16/26] mm/asi: Option to map current task into ASI Alexandre Chartre
2019-07-11 14:25 ` [RFC v2 17/26] rcu: Move tree.h static forward declarations to tree.c Alexandre Chartre
2019-07-11 14:25 ` [RFC v2 18/26] rcu: Make percpu rcu_data non-static Alexandre Chartre
2019-07-11 14:25 ` [RFC v2 19/26] mm/asi: Add option to map RCU data Alexandre Chartre
2019-07-11 14:25 ` [RFC v2 20/26] mm/asi: Add option to map cpu_hw_events Alexandre Chartre
2019-07-11 14:25 ` [RFC v2 21/26] mm/asi: Make functions to read cr3/cr4 ASI aware Alexandre Chartre
2019-07-11 14:25 ` [RFC v2 22/26] KVM: x86/asi: Introduce address_space_isolation module parameter Alexandre Chartre
2019-07-11 14:25 ` [RFC v2 23/26] KVM: x86/asi: Introduce KVM address space isolation Alexandre Chartre
2019-07-11 14:25 ` [RFC v2 24/26] KVM: x86/asi: Populate the KVM ASI page-table Alexandre Chartre
2019-07-11 14:25 ` [RFC v2 25/26] KVM: x86/asi: Switch to KVM address space on entry to guest Alexandre Chartre
2019-07-11 14:25 ` [RFC v2 26/26] KVM: x86/asi: Map KVM memslots and IO buses into KVM ASI Alexandre Chartre
2019-07-11 14:40 ` [RFC v2 00/27] Kernel Address Space Isolation Alexandre Chartre
2019-07-11 22:38 ` Dave Hansen
2019-07-12  8:09   ` Alexandre Chartre
2019-07-12 13:51     ` Dave Hansen
2019-07-12 14:06       ` Alexandre Chartre
2019-07-12 15:23         ` Thomas Gleixner
2019-07-12 15:23           ` Thomas Gleixner
2019-07-12 10:44   ` Thomas Gleixner
2019-07-12 10:44     ` Thomas Gleixner
2019-07-12 11:56     ` Alexandre Chartre
2019-07-12 12:50       ` Peter Zijlstra
2019-07-12 13:43         ` Alexandre Chartre
2019-07-12 13:58           ` Dave Hansen
2019-07-12 14:36           ` Andy Lutomirski
2019-07-12 14:36             ` Andy Lutomirski
2019-07-14 18:17             ` Alexander Graf
2019-07-12 13:54         ` Dave Hansen
2019-07-12 15:20           ` Peter Zijlstra
2019-07-12 15:16         ` Thomas Gleixner
2019-07-12 15:16           ` Thomas Gleixner
2019-07-12 16:37           ` Alexandre Chartre
2019-07-12 16:45             ` Andy Lutomirski
2019-07-14 17:11               ` Mike Rapoport
2019-07-12 19:06             ` Peter Zijlstra
2019-07-14 15:06               ` Andy Lutomirski
2019-07-14 15:06                 ` Andy Lutomirski
2019-07-15 10:33                 ` Peter Zijlstra
2019-07-12 19:48             ` Thomas Gleixner
2019-07-12 19:48               ` Thomas Gleixner
2019-07-15  8:23               ` Alexandre Chartre
2019-07-15  8:28                 ` Thomas Gleixner
2019-07-15  8:28                   ` Thomas Gleixner
2019-07-12 16:00       ` Thomas Gleixner
2019-07-12 16:00         ` Thomas Gleixner
2019-07-12 11:44 ` Peter Zijlstra
2019-07-12 12:17   ` Alexandre Chartre
2019-07-12 12:36     ` Peter Zijlstra
2019-07-12 12:47       ` Alexandre Chartre
2019-07-12 13:07         ` Peter Zijlstra
2019-07-12 13:46           ` Alexandre Chartre
2019-07-31 16:31             ` Dario Faggioli
2019-07-31 16:31               ` Dario Faggioli
2019-08-22 12:31               ` Alexandre Chartre

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1562855138-19507-2-git-send-email-alexandre.chartre@oracle.com \
    --to=alexandre.chartre@oracle.com \
    --cc=bp@alien8.de \
    --cc=dave.hansen@linux.intel.com \
    --cc=graf@amazon.de \
    --cc=hpa@zytor.com \
    --cc=jan.setjeeilers@oracle.com \
    --cc=jwadams@google.com \
    --cc=konrad.wilk@oracle.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=liran.alon@oracle.com \
    --cc=luto@kernel.org \
    --cc=mingo@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=peterz@infradead.org \
    --cc=rkrcmar@redhat.com \
    --cc=rppt@linux.vnet.ibm.com \
    --cc=tglx@linutronix.de \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.