linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Junaid Shahid <junaids@google.com>
To: linux-kernel@vger.kernel.org
Cc: Ofir Weisse <oweisse@google.com>,
	kvm@vger.kernel.org, pbonzini@redhat.com, jmattson@google.com,
	pjt@google.com, alexandre.chartre@oracle.com, rppt@linux.ibm.com,
	dave.hansen@linux.intel.com, peterz@infradead.org,
	tglx@linutronix.de, luto@kernel.org, linux-mm@kvack.org
Subject: [RFC PATCH 45/47] mm: asi: Mapping global nonsensitive areas in asi_global_init
Date: Tue, 22 Feb 2022 21:22:21 -0800	[thread overview]
Message-ID: <20220223052223.1202152-46-junaids@google.com> (raw)
In-Reply-To: <20220223052223.1202152-1-junaids@google.com>

From: Ofir Weisse <oweisse@google.com>

There are several areas in memory which we consider non sensitive.
These areas should be mapped in every ASI domain. We map there areas
in asi_global_init(). We modified some of the linking scripts to
ensure these areas are starting and ending on page boundaries.

The areas:
 - _stext             --> _etext
 - __init_begin       --> __init_end
 - __start_rodata     --> __end_rodata
 - __start_once       --> __end_once
 - __start___ex_table --> __stop___ex_table
 - __start_asi_nonsensitive            --> __end_asi_nonsensitive
 - __start_asi_nonsensitive_readmostly -->
     __end_asi_nonsensitive_readmostly
 - __vvar_page --> + PAGE_SIZE
 - APIC_BASE   --> + PAGE_SIZE
 - phys_base   --> + PAGE_SIZE
 - __start___tracepoints_ptrs --> __stop___tracepoints_ptrs
 - __start___tracepoint_str   --> __stop___tracepoint_str
 - __per_cpu_asi_start        --> __per_cpu_asi_end (percpu)
 - irq_stack_backing_store    --> + sizeof(irq_stack_backing_store)
   (percpu)

The pgd's of the following addresses are cloned, modeled after KPTI:
 - CPU_ENTRY_AREA_BASE
 - ESPFIX_BASE_ADDR

Signed-off-by: Ofir Weisse <oweisse@google.com>


---
 arch/x86/kernel/head_64.S         | 12 +++++
 arch/x86/kernel/vmlinux.lds.S     |  2 +-
 arch/x86/mm/asi.c                 | 82 +++++++++++++++++++++++++++++++
 include/asm-generic/vmlinux.lds.h | 13 +++--
 4 files changed, 105 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index d8b3ebd2bb85..3d3874661895 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -574,9 +574,21 @@ SYM_DATA_LOCAL(early_gdt_descr_base,	.quad INIT_PER_CPU_VAR(gdt_page))
 
 	.align 16
 /* This must match the first entry in level2_kernel_pgt */
+
+#ifdef CONFIG_ADDRESS_SPACE_ISOLATION
+/* TODO: Find a way to mark .section for phys_base */
+/* Ideally, we want to map phys_base in .data..asi_non_sensitive. That doesn't
+ * seem to work properly. For now, we just make sure phys_base is in it's own
+ * page. */
+	.align PAGE_SIZE
+#endif
 SYM_DATA(phys_base, .quad 0x0)
 EXPORT_SYMBOL(phys_base)
 
+#ifdef CONFIG_ADDRESS_SPACE_ISOLATION
+	.align PAGE_SIZE
+#endif
+
 #include "../../x86/xen/xen-head.S"
 
 	__PAGE_ALIGNED_BSS
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 3d6dc12d198f..2b3668291785 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -148,8 +148,8 @@ SECTIONS
 	} :text =0xcccc
 
 	/* End of text section, which should occupy whole number of pages */
-	_etext = .;
 	. = ALIGN(PAGE_SIZE);
+	_etext = .;
 
 	X86_ALIGN_RODATA_BEGIN
 	RO_DATA(PAGE_SIZE)
diff --git a/arch/x86/mm/asi.c b/arch/x86/mm/asi.c
index 04628949e89d..7f2aa1823736 100644
--- a/arch/x86/mm/asi.c
+++ b/arch/x86/mm/asi.c
@@ -9,6 +9,7 @@
 
 #include <asm/asi.h>
 #include <asm/pgalloc.h>
+#include <asm/processor.h> /* struct irq_stack */
 #include <asm/mmu_context.h>
 
 #include "mm_internal.h"
@@ -17,6 +18,24 @@
 #undef pr_fmt
 #define pr_fmt(fmt)     "ASI: " fmt
 
+#include <linux/extable.h>
+#include <asm-generic/sections.h>
+
+extern struct exception_table_entry __start___ex_table[];
+extern struct exception_table_entry __stop___ex_table[];
+
+extern const char __start_asi_nonsensitive[], __end_asi_nonsensitive[];
+extern const char __start_asi_nonsensitive_readmostly[],
+            __end_asi_nonsensitive_readmostly[];
+extern const char __per_cpu_asi_start[], __per_cpu_asi_end[];
+extern const char *__start___tracepoint_str[];
+extern const char *__stop___tracepoint_str[];
+extern const char *__start___tracepoints_ptrs[];
+extern const char *__stop___tracepoints_ptrs[];
+extern const char __vvar_page[];
+
+DECLARE_PER_CPU_PAGE_ALIGNED(struct irq_stack, irq_stack_backing_store);
+
 static struct asi_class asi_class[ASI_MAX_NUM] __asi_not_sensitive;
 static DEFINE_SPINLOCK(asi_class_lock __asi_not_sensitive);
 
@@ -412,6 +431,7 @@ void asi_unload_module(struct module* module)
 static int __init asi_global_init(void)
 {
 	uint i, n;
+        int err = 0;
 
 	if (!boot_cpu_has(X86_FEATURE_ASI))
 		return 0;
@@ -436,6 +456,68 @@ static int __init asi_global_init(void)
 
         pcpu_map_asi_reserved_chunk();
 
+
+	/*
+	 * TODO: We need to ensure that all the sections mapped below are
+	 * actually page-aligned by the linker. For now, we temporarily just
+	 * align the start/end addresses here, but that is incorrect as the
+	 * rest of the page could potentially contain sensitive data.
+	 */
+#define MAP_SECTION(start, end)                                   \
+        pr_err("%s:%d mapping 0x%lx --> 0x%lx",                   \
+               __FUNCTION__, __LINE__, start, end);               \
+       err = asi_map(ASI_GLOBAL_NONSENSITIVE,                    \
+                     (void*)((unsigned long)(start) & PAGE_MASK),\
+                      PAGE_ALIGN((unsigned long)(end)) -          \
+                     ((unsigned long)(start) & PAGE_MASK));      \
+       BUG_ON(err);
+
+#define MAP_SECTION_PERCPU(start, size)                                  \
+        pr_err("%s:%d mapping PERCPU 0x%lx --> 0x%lx",                   \
+               __FUNCTION__, __LINE__, start, (unsigned long)start+size); \
+       err = asi_map_percpu(ASI_GLOBAL_NONSENSITIVE,                     \
+                     (void*)((unsigned long)(start) & PAGE_MASK),        \
+                      PAGE_ALIGN((unsigned long)(size)));                \
+       BUG_ON(err);
+
+        MAP_SECTION(_stext, _etext);
+        MAP_SECTION(__init_begin, __init_end);
+        MAP_SECTION(__start_rodata, __end_rodata);
+        MAP_SECTION(__start_once, __end_once);
+        MAP_SECTION(__start___ex_table, __stop___ex_table);
+        MAP_SECTION(__start_asi_nonsensitive, __end_asi_nonsensitive);
+        MAP_SECTION(__start_asi_nonsensitive_readmostly,
+                    __end_asi_nonsensitive_readmostly);
+        MAP_SECTION(__vvar_page, __vvar_page + PAGE_SIZE);
+        MAP_SECTION(APIC_BASE, APIC_BASE + PAGE_SIZE);
+        MAP_SECTION(&phys_base, &phys_base + PAGE_SIZE);
+
+       /* TODO: add a build flag to enable disable mapping only when
+        * instrumentation is used */
+        MAP_SECTION(__start___tracepoints_ptrs, __stop___tracepoints_ptrs);
+        MAP_SECTION(__start___tracepoint_str, __stop___tracepoint_str);
+
+	MAP_SECTION_PERCPU((void*)__per_cpu_asi_start,
+	 		   __per_cpu_asi_end - __per_cpu_asi_start);
+
+	MAP_SECTION_PERCPU(&irq_stack_backing_store,
+			   sizeof(irq_stack_backing_store));
+
+	/* We have to map the stack canary into ASI. This is far from ideal, as
+	* attackers can use L1TF to steal the canary value, and then perhaps
+	* mount some other attack including a buffer overflow. This is a price
+	* we must pay to use ASI.
+	*/
+	MAP_SECTION_PERCPU(&fixed_percpu_data, PAGE_SIZE);
+
+#define CLONE_INIT_PGD(addr) \
+        asi_clone_pgd(asi_global_nonsensitive_pgd, init_mm.pgd, addr);
+
+        CLONE_INIT_PGD(CPU_ENTRY_AREA_BASE);
+#ifdef CONFIG_X86_ESPFIX64
+        CLONE_INIT_PGD(ESPFIX_BASE_ADDR);
+#endif
+
 	return 0;
 }
 subsys_initcall(asi_global_init)
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 0a931aedc285..7152ce3613f5 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -235,8 +235,10 @@
 #define TRACE_PRINTKS()	 __start___trace_bprintk_fmt = .;      \
 			 KEEP(*(__trace_printk_fmt)) /* Trace_printk fmt' pointer */ \
 			 __stop___trace_bprintk_fmt = .;
-#define TRACEPOINT_STR() __start___tracepoint_str = .;	\
+#define TRACEPOINT_STR() . = ALIGN(PAGE_SIZE);          \
+                         __start___tracepoint_str = .;	\
 			 KEEP(*(__tracepoint_str)) /* Trace_printk fmt' pointer */ \
+                         . = ALIGN(PAGE_SIZE);          \
 			 __stop___tracepoint_str = .;
 #else
 #define TRACE_PRINTKS()
@@ -348,8 +350,10 @@
 	MEM_KEEP(init.data*)						\
 	MEM_KEEP(exit.data*)						\
 	*(.data.unlikely)						\
+	. = ALIGN(PAGE_SIZE);						\
 	__start_once = .;						\
 	*(.data.once)							\
+	. = ALIGN(PAGE_SIZE);						\
 	__end_once = .;							\
 	STRUCT_ALIGN();							\
 	*(__tracepoints)						\
@@ -453,9 +457,10 @@
 		*(.rodata) *(.rodata.*)					\
 		SCHED_DATA						\
 		RO_AFTER_INIT_DATA	/* Read only after init */	\
-		. = ALIGN(8);						\
+                . = ALIGN(PAGE_SIZE);	        			\
 		__start___tracepoints_ptrs = .;				\
 		KEEP(*(__tracepoints_ptrs)) /* Tracepoints: pointer array */ \
+                . = ALIGN(PAGE_SIZE);	        			\
 		__stop___tracepoints_ptrs = .;				\
 		*(__tracepoints_strings)/* Tracepoints: strings */	\
 	}								\
@@ -671,11 +676,13 @@
  */
 #define EXCEPTION_TABLE(align)						\
 	. = ALIGN(align);						\
+        . = ALIGN(PAGE_SIZE);                                           \
 	__ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) {		\
 		__start___ex_table = .;					\
 		KEEP(*(__ex_table))					\
+                . = ALIGN(PAGE_SIZE);                                   \
 		__stop___ex_table = .;					\
-	}
+	}                                                               \
 
 /*
  * .BTF
-- 
2.35.1.473.g83b2b277ed-goog


  parent reply	other threads:[~2022-02-23  5:28 UTC|newest]

Thread overview: 64+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-02-23  5:21 [RFC PATCH 00/47] Address Space Isolation for KVM Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 01/47] mm: asi: Introduce ASI core API Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 02/47] mm: asi: Add command-line parameter to enable/disable ASI Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 03/47] mm: asi: Switch to unrestricted address space when entering scheduler Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 04/47] mm: asi: ASI support in interrupts/exceptions Junaid Shahid
2022-03-14 15:50   ` Thomas Gleixner
2022-03-15  2:01     ` Junaid Shahid
2022-03-15 12:55       ` Thomas Gleixner
2022-03-15 22:41         ` Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 05/47] mm: asi: Make __get_current_cr3_fast() ASI-aware Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 06/47] mm: asi: ASI page table allocation and free functions Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 07/47] mm: asi: Functions to map/unmap a memory range into ASI page tables Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 08/47] mm: asi: Add basic infrastructure for global non-sensitive mappings Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 09/47] mm: Add __PAGEFLAG_FALSE Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 10/47] mm: asi: Support for global non-sensitive direct map allocations Junaid Shahid
2022-03-23 21:06   ` Matthew Wilcox
2022-03-23 23:48     ` Junaid Shahid
2022-03-24  1:54       ` Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 11/47] mm: asi: Global non-sensitive vmalloc/vmap support Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 12/47] mm: asi: Support for global non-sensitive slab caches Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 13/47] asi: Added ASI memory cgroup flag Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 14/47] mm: asi: Disable ASI API when ASI is not enabled for a process Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 15/47] kvm: asi: Restricted address space for VM execution Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 16/47] mm: asi: Support for mapping non-sensitive pcpu chunks Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 17/47] mm: asi: Aliased direct map for local non-sensitive allocations Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 18/47] mm: asi: Support for pre-ASI-init " Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 19/47] mm: asi: Support for locally nonsensitive page allocations Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 20/47] mm: asi: Support for locally non-sensitive vmalloc allocations Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 21/47] mm: asi: Add support for locally non-sensitive VM_USERMAP pages Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 22/47] mm: asi: Added refcounting when initilizing an asi Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 23/47] mm: asi: Add support for mapping all userspace memory into ASI Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 24/47] mm: asi: Support for local non-sensitive slab caches Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 25/47] mm: asi: Avoid warning from NMI userspace accesses in ASI context Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 26/47] mm: asi: Use separate PCIDs for restricted address spaces Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 27/47] mm: asi: Avoid TLB flushes during ASI CR3 switches when possible Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 28/47] mm: asi: Avoid TLB flush IPIs to CPUs not in ASI context Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 29/47] mm: asi: Reduce TLB flushes when freeing pages asynchronously Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 30/47] mm: asi: Add API for mapping userspace address ranges Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 31/47] mm: asi: Support for non-sensitive SLUB caches Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 32/47] x86: asi: Allocate FPU state separately when ASI is enabled Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 33/47] kvm: asi: Map guest memory into restricted ASI address space Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 34/47] kvm: asi: Unmap guest memory from ASI address space when using nested virt Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 35/47] mm: asi: asi_exit() on PF, skip handling if address is accessible Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 36/47] mm: asi: Adding support for dynamic percpu ASI allocations Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 37/47] mm: asi: ASI annotation support for static variables Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 38/47] mm: asi: ASI annotation support for dynamic modules Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 39/47] mm: asi: Skip conventional L1TF/MDS mitigations Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 40/47] mm: asi: support for static percpu DEFINE_PER_CPU*_ASI Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 41/47] mm: asi: Annotation of static variables to be nonsensitive Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 42/47] mm: asi: Annotation of PERCPU " Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 43/47] mm: asi: Annotation of dynamic " Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 44/47] kvm: asi: Splitting kvm_vcpu_arch into non/sensitive parts Junaid Shahid
2022-02-23  5:22 ` Junaid Shahid [this message]
2022-02-23  5:22 ` [RFC PATCH 46/47] kvm: asi: Do asi_exit() in vcpu_run loop before returning to userspace Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 47/47] mm: asi: Properly un/mapping task stack from ASI + tlb flush Junaid Shahid
2022-03-05  3:39 ` [RFC PATCH 00/47] Address Space Isolation for KVM Hyeonggon Yoo
2022-03-16 21:34 ` Alexandre Chartre
2022-03-17 23:25   ` Junaid Shahid
2022-03-22  9:46     ` Alexandre Chartre
2022-03-23 19:35       ` Junaid Shahid
2022-04-08  8:52         ` Alexandre Chartre
2022-04-11  3:26           ` junaid_shahid
2022-03-16 22:49 ` Thomas Gleixner
2022-03-17 21:24   ` Junaid Shahid

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220223052223.1202152-46-junaids@google.com \
    --to=junaids@google.com \
    --cc=alexandre.chartre@oracle.com \
    --cc=dave.hansen@linux.intel.com \
    --cc=jmattson@google.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=luto@kernel.org \
    --cc=oweisse@google.com \
    --cc=pbonzini@redhat.com \
    --cc=peterz@infradead.org \
    --cc=pjt@google.com \
    --cc=rppt@linux.ibm.com \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).