linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Junaid Shahid <junaids@google.com>
To: linux-kernel@vger.kernel.org
Cc: kvm@vger.kernel.org, pbonzini@redhat.com, jmattson@google.com,
	pjt@google.com, oweisse@google.com, alexandre.chartre@oracle.com,
	rppt@linux.ibm.com, dave.hansen@linux.intel.com,
	peterz@infradead.org, tglx@linutronix.de, luto@kernel.org,
	linux-mm@kvack.org
Subject: [RFC PATCH 10/47] mm: asi: Support for global non-sensitive direct map allocations
Date: Tue, 22 Feb 2022 21:21:46 -0800	[thread overview]
Message-ID: <20220223052223.1202152-11-junaids@google.com> (raw)
In-Reply-To: <20220223052223.1202152-1-junaids@google.com>

A new GFP flag is added to specify that an allocation should be
considered globally non-sensitive. The pages will be mapped into the
ASI global non-sensitive pseudo-PGD, which is shared between all
standard ASI instances. A new page flag is also added so that when
these pages are freed, they can also be unmapped from the ASI page
tables.

Signed-off-by: Junaid Shahid <junaids@google.com>


---
 include/linux/gfp.h            |  10 ++-
 include/linux/mm_types.h       |   5 ++
 include/linux/page-flags.h     |   9 ++
 include/trace/events/mmflags.h |  12 ++-
 mm/page_alloc.c                | 145 ++++++++++++++++++++++++++++++++-
 tools/perf/builtin-kmem.c      |   1 +
 6 files changed, 178 insertions(+), 4 deletions(-)

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 8fcc38467af6..07a99a463a34 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -60,6 +60,11 @@ struct vm_area_struct;
 #else
 #define ___GFP_NOLOCKDEP	0
 #endif
+#ifdef CONFIG_ADDRESS_SPACE_ISOLATION
+#define ___GFP_GLOBAL_NONSENSITIVE 0x4000000u
+#else
+#define ___GFP_GLOBAL_NONSENSITIVE 0
+#endif
 /* If the above are modified, __GFP_BITS_SHIFT may need updating */
 
 /*
@@ -248,8 +253,11 @@ struct vm_area_struct;
 /* Disable lockdep for GFP context tracking */
 #define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP)
 
+/* Allocate non-sensitive memory */
+#define __GFP_GLOBAL_NONSENSITIVE ((__force gfp_t)___GFP_GLOBAL_NONSENSITIVE)
+
 /* Room for N __GFP_FOO bits */
-#define __GFP_BITS_SHIFT (25 + IS_ENABLED(CONFIG_LOCKDEP))
+#define __GFP_BITS_SHIFT 27
 #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
 
 /**
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 3de1afa57289..5b8028fcfe67 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -191,6 +191,11 @@ struct page {
 
 		/** @rcu_head: You can use this to free a page by RCU. */
 		struct rcu_head rcu_head;
+
+#ifdef CONFIG_ADDRESS_SPACE_ISOLATION
+		/* Links the pages_to_free_async list */
+		struct llist_node async_free_node;
+#endif
 	};
 
 	union {		/* This union is 4 bytes in size. */
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index b90a17e9796d..a07434cc679c 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -140,6 +140,9 @@ enum pageflags {
 #endif
 #ifdef CONFIG_KASAN_HW_TAGS
 	PG_skip_kasan_poison,
+#endif
+#ifdef CONFIG_ADDRESS_SPACE_ISOLATION
+	PG_global_nonsensitive,
 #endif
 	__NR_PAGEFLAGS,
 
@@ -542,6 +545,12 @@ TESTCLEARFLAG(Young, young, PF_ANY)
 PAGEFLAG(Idle, idle, PF_ANY)
 #endif
 
+#ifdef CONFIG_ADDRESS_SPACE_ISOLATION
+__PAGEFLAG(GlobalNonSensitive, global_nonsensitive, PF_ANY);
+#else
+__PAGEFLAG_FALSE(GlobalNonSensitive, global_nonsensitive);
+#endif
+
 #ifdef CONFIG_KASAN_HW_TAGS
 PAGEFLAG(SkipKASanPoison, skip_kasan_poison, PF_HEAD)
 #else
diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
index 116ed4d5d0f8..73a49197ef54 100644
--- a/include/trace/events/mmflags.h
+++ b/include/trace/events/mmflags.h
@@ -50,7 +50,8 @@
 	{(unsigned long)__GFP_DIRECT_RECLAIM,	"__GFP_DIRECT_RECLAIM"},\
 	{(unsigned long)__GFP_KSWAPD_RECLAIM,	"__GFP_KSWAPD_RECLAIM"},\
 	{(unsigned long)__GFP_ZEROTAGS,		"__GFP_ZEROTAGS"},	\
-	{(unsigned long)__GFP_SKIP_KASAN_POISON,"__GFP_SKIP_KASAN_POISON"}\
+	{(unsigned long)__GFP_SKIP_KASAN_POISON,"__GFP_SKIP_KASAN_POISON"},\
+	{(unsigned long)__GFP_GLOBAL_NONSENSITIVE, "__GFP_GLOBAL_NONSENSITIVE"}\
 
 #define show_gfp_flags(flags)						\
 	(flags) ? __print_flags(flags, "|",				\
@@ -93,6 +94,12 @@
 #define IF_HAVE_PG_SKIP_KASAN_POISON(flag,string)
 #endif
 
+#ifdef CONFIG_ADDRESS_SPACE_ISOLATION
+#define IF_HAVE_ASI(flag, string) ,{1UL << flag, string}
+#else
+#define IF_HAVE_ASI(flag, string)
+#endif
+
 #define __def_pageflag_names						\
 	{1UL << PG_locked,		"locked"	},		\
 	{1UL << PG_waiters,		"waiters"	},		\
@@ -121,7 +128,8 @@ IF_HAVE_PG_HWPOISON(PG_hwpoison,	"hwpoison"	)		\
 IF_HAVE_PG_IDLE(PG_young,		"young"		)		\
 IF_HAVE_PG_IDLE(PG_idle,		"idle"		)		\
 IF_HAVE_PG_ARCH_2(PG_arch_2,		"arch_2"	)		\
-IF_HAVE_PG_SKIP_KASAN_POISON(PG_skip_kasan_poison, "skip_kasan_poison")
+IF_HAVE_PG_SKIP_KASAN_POISON(PG_skip_kasan_poison, "skip_kasan_poison")	\
+IF_HAVE_ASI(PG_global_nonsensitive,	"global_nonsensitive")
 
 #define show_page_flags(flags)						\
 	(flags) ? __print_flags(flags, "|",				\
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index c5952749ad40..a4048fa1868a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -697,7 +697,7 @@ static inline bool pcp_allowed_order(unsigned int order)
 	return false;
 }
 
-static inline void free_the_page(struct page *page, unsigned int order)
+static inline void __free_the_page(struct page *page, unsigned int order)
 {
 	if (pcp_allowed_order(order))		/* Via pcp? */
 		free_unref_page(page, order);
@@ -705,6 +705,14 @@ static inline void free_the_page(struct page *page, unsigned int order)
 		__free_pages_ok(page, order, FPI_NONE);
 }
 
+static bool asi_unmap_freed_pages(struct page *page, unsigned int order);
+
+static inline void free_the_page(struct page *page, unsigned int order)
+{
+	if (asi_unmap_freed_pages(page, order))
+		__free_the_page(page, order);
+}
+
 /*
  * Higher-order pages are called "compound pages".  They are structured thusly:
  *
@@ -5162,6 +5170,129 @@ static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order,
 	return true;
 }
 
+#ifdef CONFIG_ADDRESS_SPACE_ISOLATION
+
+static DEFINE_PER_CPU(struct work_struct, async_free_work);
+static DEFINE_PER_CPU(struct llist_head, pages_to_free_async);
+static bool async_free_work_initialized;
+
+static void __free_the_page(struct page *page, unsigned int order);
+
+static void async_free_work_fn(struct work_struct *work)
+{
+	struct page *page, *tmp;
+	struct llist_node *pages_to_free;
+	void *va;
+	size_t len;
+	uint order;
+
+	pages_to_free = llist_del_all(this_cpu_ptr(&pages_to_free_async));
+
+	/* A later patch will do a more optimized TLB flush. */
+
+	llist_for_each_entry_safe(page, tmp, pages_to_free, async_free_node) {
+		va = page_to_virt(page);
+		order = page->private;
+		len = PAGE_SIZE * (1 << order);
+
+		asi_flush_tlb_range(ASI_GLOBAL_NONSENSITIVE, va, len);
+		__free_the_page(page, order);
+	}
+}
+
+static int __init asi_page_alloc_init(void)
+{
+	int cpu;
+
+	if (!static_asi_enabled())
+		return 0;
+
+	for_each_possible_cpu(cpu)
+		INIT_WORK(per_cpu_ptr(&async_free_work, cpu),
+			  async_free_work_fn);
+
+	/*
+	 * This function is called before SMP is initialized, so we can assume
+	 * that this is the only running CPU at this point.
+	 */
+
+	barrier();
+	async_free_work_initialized = true;
+	barrier();
+
+	if (!llist_empty(this_cpu_ptr(&pages_to_free_async)))
+		queue_work_on(smp_processor_id(), mm_percpu_wq,
+			      this_cpu_ptr(&async_free_work));
+
+	return 0;
+}
+early_initcall(asi_page_alloc_init);
+
+static int asi_map_alloced_pages(struct page *page, uint order, gfp_t gfp_mask)
+{
+	uint i;
+
+	if (!static_asi_enabled())
+		return 0;
+
+	if (gfp_mask & __GFP_GLOBAL_NONSENSITIVE) {
+		for (i = 0; i < (1 << order); i++)
+			__SetPageGlobalNonSensitive(page + i);
+
+		return asi_map_gfp(ASI_GLOBAL_NONSENSITIVE, page_to_virt(page),
+				   PAGE_SIZE * (1 << order), gfp_mask);
+	}
+
+	return 0;
+}
+
+static bool asi_unmap_freed_pages(struct page *page, unsigned int order)
+{
+	void *va;
+	size_t len;
+	bool async_flush_needed;
+
+	if (!static_asi_enabled())
+		return true;
+
+	if (!PageGlobalNonSensitive(page))
+		return true;
+
+	va = page_to_virt(page);
+	len = PAGE_SIZE * (1 << order);
+	async_flush_needed = irqs_disabled() || in_interrupt();
+
+	asi_unmap(ASI_GLOBAL_NONSENSITIVE, va, len, !async_flush_needed);
+
+	if (!async_flush_needed)
+		return true;
+
+	page->private = order;
+	llist_add(&page->async_free_node, this_cpu_ptr(&pages_to_free_async));
+
+	if (async_free_work_initialized)
+		queue_work_on(smp_processor_id(), mm_percpu_wq,
+			      this_cpu_ptr(&async_free_work));
+
+	return false;
+}
+
+#else /* CONFIG_ADDRESS_SPACE_ISOLATION */
+
+static inline
+int asi_map_alloced_pages(struct page *pages, uint order, gfp_t gfp_mask)
+{
+	return 0;
+}
+
+static inline
+bool asi_unmap_freed_pages(struct page *page, unsigned int order)
+{
+	return true;
+}
+
+#endif
+
 /*
  * __alloc_pages_bulk - Allocate a number of order-0 pages to a list or array
  * @gfp: GFP flags for the allocation
@@ -5345,6 +5476,9 @@ struct page *__alloc_pages(gfp_t gfp, unsigned int order, int preferred_nid,
 		return NULL;
 	}
 
+	if (static_asi_enabled() && (gfp & __GFP_GLOBAL_NONSENSITIVE))
+		gfp |= __GFP_ZERO;
+
 	gfp &= gfp_allowed_mask;
 	/*
 	 * Apply scoped allocation constraints. This is mainly about GFP_NOFS
@@ -5388,6 +5522,15 @@ struct page *__alloc_pages(gfp_t gfp, unsigned int order, int preferred_nid,
 		page = NULL;
 	}
 
+	if (page) {
+		int err = asi_map_alloced_pages(page, order, gfp);
+
+		if (unlikely(err)) {
+			__free_pages(page, order);
+			page = NULL;
+		}
+	}
+
 	trace_mm_page_alloc(page, order, alloc_gfp, ac.migratetype);
 
 	return page;
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index da03a341c63c..5857953cd5c1 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -660,6 +660,7 @@ static const struct {
 	{ "__GFP_RECLAIM",		"R" },
 	{ "__GFP_DIRECT_RECLAIM",	"DR" },
 	{ "__GFP_KSWAPD_RECLAIM",	"KR" },
+	{ "__GFP_GLOBAL_NONSENSITIVE",	"GNS" },
 };
 
 static size_t max_gfp_len;
-- 
2.35.1.473.g83b2b277ed-goog


  parent reply	other threads:[~2022-02-23  5:24 UTC|newest]

Thread overview: 64+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-02-23  5:21 [RFC PATCH 00/47] Address Space Isolation for KVM Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 01/47] mm: asi: Introduce ASI core API Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 02/47] mm: asi: Add command-line parameter to enable/disable ASI Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 03/47] mm: asi: Switch to unrestricted address space when entering scheduler Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 04/47] mm: asi: ASI support in interrupts/exceptions Junaid Shahid
2022-03-14 15:50   ` Thomas Gleixner
2022-03-15  2:01     ` Junaid Shahid
2022-03-15 12:55       ` Thomas Gleixner
2022-03-15 22:41         ` Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 05/47] mm: asi: Make __get_current_cr3_fast() ASI-aware Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 06/47] mm: asi: ASI page table allocation and free functions Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 07/47] mm: asi: Functions to map/unmap a memory range into ASI page tables Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 08/47] mm: asi: Add basic infrastructure for global non-sensitive mappings Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 09/47] mm: Add __PAGEFLAG_FALSE Junaid Shahid
2022-02-23  5:21 ` Junaid Shahid [this message]
2022-03-23 21:06   ` [RFC PATCH 10/47] mm: asi: Support for global non-sensitive direct map allocations Matthew Wilcox
2022-03-23 23:48     ` Junaid Shahid
2022-03-24  1:54       ` Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 11/47] mm: asi: Global non-sensitive vmalloc/vmap support Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 12/47] mm: asi: Support for global non-sensitive slab caches Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 13/47] asi: Added ASI memory cgroup flag Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 14/47] mm: asi: Disable ASI API when ASI is not enabled for a process Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 15/47] kvm: asi: Restricted address space for VM execution Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 16/47] mm: asi: Support for mapping non-sensitive pcpu chunks Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 17/47] mm: asi: Aliased direct map for local non-sensitive allocations Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 18/47] mm: asi: Support for pre-ASI-init " Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 19/47] mm: asi: Support for locally nonsensitive page allocations Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 20/47] mm: asi: Support for locally non-sensitive vmalloc allocations Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 21/47] mm: asi: Add support for locally non-sensitive VM_USERMAP pages Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 22/47] mm: asi: Added refcounting when initilizing an asi Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 23/47] mm: asi: Add support for mapping all userspace memory into ASI Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 24/47] mm: asi: Support for local non-sensitive slab caches Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 25/47] mm: asi: Avoid warning from NMI userspace accesses in ASI context Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 26/47] mm: asi: Use separate PCIDs for restricted address spaces Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 27/47] mm: asi: Avoid TLB flushes during ASI CR3 switches when possible Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 28/47] mm: asi: Avoid TLB flush IPIs to CPUs not in ASI context Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 29/47] mm: asi: Reduce TLB flushes when freeing pages asynchronously Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 30/47] mm: asi: Add API for mapping userspace address ranges Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 31/47] mm: asi: Support for non-sensitive SLUB caches Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 32/47] x86: asi: Allocate FPU state separately when ASI is enabled Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 33/47] kvm: asi: Map guest memory into restricted ASI address space Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 34/47] kvm: asi: Unmap guest memory from ASI address space when using nested virt Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 35/47] mm: asi: asi_exit() on PF, skip handling if address is accessible Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 36/47] mm: asi: Adding support for dynamic percpu ASI allocations Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 37/47] mm: asi: ASI annotation support for static variables Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 38/47] mm: asi: ASI annotation support for dynamic modules Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 39/47] mm: asi: Skip conventional L1TF/MDS mitigations Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 40/47] mm: asi: support for static percpu DEFINE_PER_CPU*_ASI Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 41/47] mm: asi: Annotation of static variables to be nonsensitive Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 42/47] mm: asi: Annotation of PERCPU " Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 43/47] mm: asi: Annotation of dynamic " Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 44/47] kvm: asi: Splitting kvm_vcpu_arch into non/sensitive parts Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 45/47] mm: asi: Mapping global nonsensitive areas in asi_global_init Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 46/47] kvm: asi: Do asi_exit() in vcpu_run loop before returning to userspace Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 47/47] mm: asi: Properly un/mapping task stack from ASI + tlb flush Junaid Shahid
2022-03-05  3:39 ` [RFC PATCH 00/47] Address Space Isolation for KVM Hyeonggon Yoo
2022-03-16 21:34 ` Alexandre Chartre
2022-03-17 23:25   ` Junaid Shahid
2022-03-22  9:46     ` Alexandre Chartre
2022-03-23 19:35       ` Junaid Shahid
2022-04-08  8:52         ` Alexandre Chartre
2022-04-11  3:26           ` junaid_shahid
2022-03-16 22:49 ` Thomas Gleixner
2022-03-17 21:24   ` Junaid Shahid

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220223052223.1202152-11-junaids@google.com \
    --to=junaids@google.com \
    --cc=alexandre.chartre@oracle.com \
    --cc=dave.hansen@linux.intel.com \
    --cc=jmattson@google.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=luto@kernel.org \
    --cc=oweisse@google.com \
    --cc=pbonzini@redhat.com \
    --cc=peterz@infradead.org \
    --cc=pjt@google.com \
    --cc=rppt@linux.ibm.com \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).