All of lore.kernel.org
 help / color / mirror / Atom feed
From: Junaid Shahid <junaids@google.com>
To: linux-kernel@vger.kernel.org
Cc: kvm@vger.kernel.org, pbonzini@redhat.com, jmattson@google.com,
	pjt@google.com, oweisse@google.com, alexandre.chartre@oracle.com,
	rppt@linux.ibm.com, dave.hansen@linux.intel.com,
	peterz@infradead.org, tglx@linutronix.de, luto@kernel.org,
	linux-mm@kvack.org
Subject: [RFC PATCH 31/47] mm: asi: Support for non-sensitive SLUB caches
Date: Tue, 22 Feb 2022 21:22:07 -0800	[thread overview]
Message-ID: <20220223052223.1202152-32-junaids@google.com> (raw)
In-Reply-To: <20220223052223.1202152-1-junaids@google.com>

This adds support for allocating global and local non-sensitive objects
using the SLUB allocator. Similar to SLAB, per-process child caches are
created for locally non-sensitive allocations. This mechanism is based
on a modified form of the earlier implementation of per-memcg caches.

Signed-off-by: Junaid Shahid <junaids@google.com>


---
 include/linux/slub_def.h |   6 ++
 mm/slab.h                |   5 ++
 mm/slab_common.c         |  33 +++++++--
 mm/slub.c                | 140 ++++++++++++++++++++++++++++++++++++++-
 security/Kconfig         |   3 +-
 5 files changed, 179 insertions(+), 8 deletions(-)

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 0fa751b946fa..6e185b61582c 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -137,6 +137,12 @@ struct kmem_cache {
 	struct kasan_cache kasan_info;
 #endif
 
+#ifdef CONFIG_ADDRESS_SPACE_ISOLATION
+	struct kmem_local_cache_info local_cache_info;
+	/* For propagation, maximum size of a stored attr */
+	unsigned int max_attr_size;
+#endif
+
 	unsigned int useroffset;	/* Usercopy region offset */
 	unsigned int usersize;		/* Usercopy region size */
 
diff --git a/mm/slab.h b/mm/slab.h
index b9e11038be27..8799bcdd2fff 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -216,6 +216,7 @@ int __kmem_cache_shutdown(struct kmem_cache *);
 void __kmem_cache_release(struct kmem_cache *);
 int __kmem_cache_shrink(struct kmem_cache *);
 void slab_kmem_cache_release(struct kmem_cache *);
+void kmem_cache_shrink_all(struct kmem_cache *s);
 
 struct seq_file;
 struct file;
@@ -344,6 +345,7 @@ void restore_page_nonsensitive_metadata(struct page *page,
 }
 
 void set_nonsensitive_cache_params(struct kmem_cache *s);
+void init_local_cache_info(struct kmem_cache *s, struct kmem_cache *root);
 
 #else /* CONFIG_ADDRESS_SPACE_ISOLATION */
 
@@ -380,6 +382,9 @@ static inline void restore_page_nonsensitive_metadata(struct page *page,
 
 static inline void set_nonsensitive_cache_params(struct kmem_cache *s) { }
 
+static inline
+void init_local_cache_info(struct kmem_cache *s, struct kmem_cache *root) { }
+
 #endif /* CONFIG_ADDRESS_SPACE_ISOLATION */
 
 #ifdef CONFIG_MEMCG_KMEM
diff --git a/mm/slab_common.c b/mm/slab_common.c
index b486b72d6344..efa61b97902a 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -142,7 +142,7 @@ int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t nr,
 
 LIST_HEAD(slab_root_caches);
 
-static void init_local_cache_info(struct kmem_cache *s, struct kmem_cache *root)
+void init_local_cache_info(struct kmem_cache *s, struct kmem_cache *root)
 {
 	if (root) {
 		s->local_cache_info.root_cache = root;
@@ -194,9 +194,6 @@ void set_nonsensitive_cache_params(struct kmem_cache *s)
 
 #else
 
-static inline
-void init_local_cache_info(struct kmem_cache *s, struct kmem_cache *root) { }
-
 static inline void cleanup_local_cache_info(struct kmem_cache *s) { }
 
 #endif /* CONFIG_ADDRESS_SPACE_ISOLATION */
@@ -644,6 +641,34 @@ int kmem_cache_shrink(struct kmem_cache *cachep)
 }
 EXPORT_SYMBOL(kmem_cache_shrink);
 
+/**
+ * kmem_cache_shrink_all - shrink a cache and all child caches for root cache
+ * @s: The cache pointer
+ */
+void kmem_cache_shrink_all(struct kmem_cache *s)
+{
+	struct kmem_cache *c;
+
+	if (!static_asi_enabled() || !is_root_cache(s)) {
+		kmem_cache_shrink(s);
+		return;
+	}
+
+	kasan_cache_shrink(s);
+	__kmem_cache_shrink(s);
+
+	/*
+	 * We have to take the slab_mutex to protect from the child cache list
+	 * modification.
+	 */
+	mutex_lock(&slab_mutex);
+	for_each_child_cache(c, s) {
+		kasan_cache_shrink(c);
+		__kmem_cache_shrink(c);
+	}
+	mutex_unlock(&slab_mutex);
+}
+
 bool slab_is_available(void)
 {
 	return slab_state >= UP;
diff --git a/mm/slub.c b/mm/slub.c
index abe7db581d68..df0191f8b0e2 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -289,6 +289,21 @@ static void debugfs_slab_add(struct kmem_cache *);
 static inline void debugfs_slab_add(struct kmem_cache *s) { }
 #endif
 
+#if defined(CONFIG_SYSFS) && defined(CONFIG_ADDRESS_SPACE_ISOLATION)
+static void propagate_slab_attrs_from_parent(struct kmem_cache *s);
+static void propagate_slab_attr_to_children(struct kmem_cache *s,
+					    struct attribute *attr,
+					    const char *buf, size_t len);
+#else
+static inline void propagate_slab_attrs_from_parent(struct kmem_cache *s) { }
+
+static inline
+void propagate_slab_attr_to_children(struct kmem_cache *s,
+				     struct attribute *attr,
+				     const char *buf, size_t len)
+{ }
+#endif
+
 static inline void stat(const struct kmem_cache *s, enum stat_item si)
 {
 #ifdef CONFIG_SLUB_STATS
@@ -2015,6 +2030,7 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
 	if (current->reclaim_state)
 		current->reclaim_state->reclaimed_slab += pages;
 	unaccount_slab_page(page, order, s);
+	restore_page_nonsensitive_metadata(page, s);
 	__free_pages(page, order);
 }
 
@@ -4204,6 +4220,8 @@ static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags)
 		}
 	}
 
+	set_nonsensitive_cache_params(s);
+
 #if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
     defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
 	if (system_has_cmpxchg_double() && (s->flags & SLAB_NO_CMPXCHG) == 0)
@@ -4797,6 +4815,10 @@ static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
 #endif
 	}
 	list_add(&s->list, &slab_caches);
+	init_local_cache_info(s, NULL);
+#ifdef CONFIG_ADDRESS_SPACE_ISOLATION
+	list_del(&static_cache->root_caches_node);
+#endif
 	return s;
 }
 
@@ -4863,7 +4885,7 @@ struct kmem_cache *
 __kmem_cache_alias(const char *name, unsigned int size, unsigned int align,
 		   slab_flags_t flags, void (*ctor)(void *))
 {
-	struct kmem_cache *s;
+	struct kmem_cache *s, *c;
 
 	s = find_mergeable(size, align, flags, name, ctor);
 	if (s) {
@@ -4876,6 +4898,11 @@ __kmem_cache_alias(const char *name, unsigned int size, unsigned int align,
 		s->object_size = max(s->object_size, size);
 		s->inuse = max(s->inuse, ALIGN(size, sizeof(void *)));
 
+		for_each_child_cache(c, s) {
+			c->object_size = s->object_size;
+			c->inuse = max(c->inuse, ALIGN(size, sizeof(void *)));
+		}
+
 		if (sysfs_slab_alias(s, name)) {
 			s->refcount--;
 			s = NULL;
@@ -4889,6 +4916,9 @@ int __kmem_cache_create(struct kmem_cache *s, slab_flags_t flags)
 {
 	int err;
 
+	if (!static_asi_enabled())
+		flags &= ~SLAB_NONSENSITIVE;
+
 	err = kmem_cache_open(s, flags);
 	if (err)
 		return err;
@@ -4897,6 +4927,8 @@ int __kmem_cache_create(struct kmem_cache *s, slab_flags_t flags)
 	if (slab_state <= UP)
 		return 0;
 
+	propagate_slab_attrs_from_parent(s);
+
 	err = sysfs_slab_add(s);
 	if (err) {
 		__kmem_cache_release(s);
@@ -5619,7 +5651,7 @@ static ssize_t shrink_store(struct kmem_cache *s,
 			const char *buf, size_t length)
 {
 	if (buf[0] == '1')
-		kmem_cache_shrink(s);
+		kmem_cache_shrink_all(s);
 	else
 		return -EINVAL;
 	return length;
@@ -5829,6 +5861,87 @@ static ssize_t slab_attr_show(struct kobject *kobj,
 	return err;
 }
 
+#ifdef CONFIG_ADDRESS_SPACE_ISOLATION
+
+static void propagate_slab_attrs_from_parent(struct kmem_cache *s)
+{
+	int i;
+	char *buffer = NULL;
+	struct kmem_cache *root_cache;
+
+	if (is_root_cache(s))
+		return;
+
+	root_cache = s->local_cache_info.root_cache;
+
+	/*
+	 * This mean this cache had no attribute written. Therefore, no point
+	 * in copying default values around
+	 */
+	if (!root_cache->max_attr_size)
+		return;
+
+	for (i = 0; i < ARRAY_SIZE(slab_attrs); i++) {
+		char mbuf[64];
+		char *buf;
+		struct slab_attribute *attr = to_slab_attr(slab_attrs[i]);
+		ssize_t len;
+
+		if (!attr || !attr->store || !attr->show)
+			continue;
+
+		/*
+		 * It is really bad that we have to allocate here, so we will
+		 * do it only as a fallback. If we actually allocate, though,
+		 * we can just use the allocated buffer until the end.
+		 *
+		 * Most of the slub attributes will tend to be very small in
+		 * size, but sysfs allows buffers up to a page, so they can
+		 * theoretically happen.
+		 */
+		if (buffer) {
+			buf = buffer;
+		} else if (root_cache->max_attr_size < ARRAY_SIZE(mbuf) &&
+			 !IS_ENABLED(CONFIG_SLUB_STATS)) {
+			buf = mbuf;
+		} else {
+			buffer = (char *)get_zeroed_page(GFP_KERNEL);
+			if (WARN_ON(!buffer))
+				continue;
+			buf = buffer;
+		}
+
+		len = attr->show(root_cache, buf);
+		if (len > 0)
+			attr->store(s, buf, len);
+	}
+
+	if (buffer)
+		free_page((unsigned long)buffer);
+}
+
+static void propagate_slab_attr_to_children(struct kmem_cache *s,
+					    struct attribute *attr,
+					    const char *buf, size_t len)
+{
+	struct kmem_cache *c;
+	struct slab_attribute *attribute = to_slab_attr(attr);
+
+	if (static_asi_enabled()) {
+		mutex_lock(&slab_mutex);
+
+		if (s->max_attr_size < len)
+			s->max_attr_size = len;
+
+		for_each_child_cache(c, s)
+			attribute->store(c, buf, len);
+
+		mutex_unlock(&slab_mutex);
+	}
+}
+
+#endif
+
 static ssize_t slab_attr_store(struct kobject *kobj,
 				struct attribute *attr,
 				const char *buf, size_t len)
@@ -5844,6 +5957,27 @@ static ssize_t slab_attr_store(struct kobject *kobj,
 		return -EIO;
 
 	err = attribute->store(s, buf, len);
+
+	/*
+	 * This is a best effort propagation, so this function's return
+	 * value will be determined by the parent cache only. This is
+	 * basically because not all attributes will have a well
+	 * defined semantics for rollbacks - most of the actions will
+	 * have permanent effects.
+	 *
+	 * Returning the error value of any of the children that fail
+	 * is not 100 % defined, in the sense that users seeing the
+	 * error code won't be able to know anything about the state of
+	 * the cache.
+	 *
+	 * Only returning the error code for the parent cache at least
+	 * has well defined semantics. The cache being written to
+	 * directly either failed or succeeded, in which case we loop
+	 * through the descendants with best-effort propagation.
+	 */
+	if (slab_state >= FULL && err >= 0 && is_root_cache(s))
+		propagate_slab_attr_to_children(s, attr, buf, len);
+
 	return err;
 }
 
@@ -5866,7 +6000,7 @@ static struct kset *slab_kset;
 
 static inline struct kset *cache_kset(struct kmem_cache *s)
 {
-	return slab_kset;
+	return is_root_cache(s) ? slab_kset : NULL;
 }
 
 #define ID_STR_LENGTH 64
diff --git a/security/Kconfig b/security/Kconfig
index 070a948b5266..a5cfb09352b0 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -68,7 +68,8 @@ config PAGE_TABLE_ISOLATION
 config ADDRESS_SPACE_ISOLATION
 	bool "Allow code to run with a reduced kernel address space"
 	default n
-	depends on X86_64 && !UML && SLAB && !NEED_PER_CPU_KM
+	depends on X86_64 && !UML && !NEED_PER_CPU_KM
+	depends on SLAB || SLUB
 	depends on !PARAVIRT
 	depends on !MEMORY_HOTPLUG
 	help
-- 
2.35.1.473.g83b2b277ed-goog


  parent reply	other threads:[~2022-02-23  5:28 UTC|newest]

Thread overview: 64+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-02-23  5:21 [RFC PATCH 00/47] Address Space Isolation for KVM Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 01/47] mm: asi: Introduce ASI core API Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 02/47] mm: asi: Add command-line parameter to enable/disable ASI Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 03/47] mm: asi: Switch to unrestricted address space when entering scheduler Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 04/47] mm: asi: ASI support in interrupts/exceptions Junaid Shahid
2022-03-14 15:50   ` Thomas Gleixner
2022-03-15  2:01     ` Junaid Shahid
2022-03-15 12:55       ` Thomas Gleixner
2022-03-15 22:41         ` Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 05/47] mm: asi: Make __get_current_cr3_fast() ASI-aware Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 06/47] mm: asi: ASI page table allocation and free functions Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 07/47] mm: asi: Functions to map/unmap a memory range into ASI page tables Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 08/47] mm: asi: Add basic infrastructure for global non-sensitive mappings Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 09/47] mm: Add __PAGEFLAG_FALSE Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 10/47] mm: asi: Support for global non-sensitive direct map allocations Junaid Shahid
2022-03-23 21:06   ` Matthew Wilcox
2022-03-23 23:48     ` Junaid Shahid
2022-03-24  1:54       ` Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 11/47] mm: asi: Global non-sensitive vmalloc/vmap support Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 12/47] mm: asi: Support for global non-sensitive slab caches Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 13/47] asi: Added ASI memory cgroup flag Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 14/47] mm: asi: Disable ASI API when ASI is not enabled for a process Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 15/47] kvm: asi: Restricted address space for VM execution Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 16/47] mm: asi: Support for mapping non-sensitive pcpu chunks Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 17/47] mm: asi: Aliased direct map for local non-sensitive allocations Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 18/47] mm: asi: Support for pre-ASI-init " Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 19/47] mm: asi: Support for locally nonsensitive page allocations Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 20/47] mm: asi: Support for locally non-sensitive vmalloc allocations Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 21/47] mm: asi: Add support for locally non-sensitive VM_USERMAP pages Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 22/47] mm: asi: Added refcounting when initilizing an asi Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 23/47] mm: asi: Add support for mapping all userspace memory into ASI Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 24/47] mm: asi: Support for local non-sensitive slab caches Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 25/47] mm: asi: Avoid warning from NMI userspace accesses in ASI context Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 26/47] mm: asi: Use separate PCIDs for restricted address spaces Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 27/47] mm: asi: Avoid TLB flushes during ASI CR3 switches when possible Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 28/47] mm: asi: Avoid TLB flush IPIs to CPUs not in ASI context Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 29/47] mm: asi: Reduce TLB flushes when freeing pages asynchronously Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 30/47] mm: asi: Add API for mapping userspace address ranges Junaid Shahid
2022-02-23  5:22 ` Junaid Shahid [this message]
2022-02-23  5:22 ` [RFC PATCH 32/47] x86: asi: Allocate FPU state separately when ASI is enabled Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 33/47] kvm: asi: Map guest memory into restricted ASI address space Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 34/47] kvm: asi: Unmap guest memory from ASI address space when using nested virt Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 35/47] mm: asi: asi_exit() on PF, skip handling if address is accessible Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 36/47] mm: asi: Adding support for dynamic percpu ASI allocations Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 37/47] mm: asi: ASI annotation support for static variables Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 38/47] mm: asi: ASI annotation support for dynamic modules Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 39/47] mm: asi: Skip conventional L1TF/MDS mitigations Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 40/47] mm: asi: support for static percpu DEFINE_PER_CPU*_ASI Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 41/47] mm: asi: Annotation of static variables to be nonsensitive Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 42/47] mm: asi: Annotation of PERCPU " Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 43/47] mm: asi: Annotation of dynamic " Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 44/47] kvm: asi: Splitting kvm_vcpu_arch into non/sensitive parts Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 45/47] mm: asi: Mapping global nonsensitive areas in asi_global_init Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 46/47] kvm: asi: Do asi_exit() in vcpu_run loop before returning to userspace Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 47/47] mm: asi: Properly un/mapping task stack from ASI + tlb flush Junaid Shahid
2022-03-05  3:39 ` [RFC PATCH 00/47] Address Space Isolation for KVM Hyeonggon Yoo
2022-03-16 21:34 ` Alexandre Chartre
2022-03-17 23:25   ` Junaid Shahid
2022-03-22  9:46     ` Alexandre Chartre
2022-03-23 19:35       ` Junaid Shahid
2022-04-08  8:52         ` Alexandre Chartre
2022-04-11  3:26           ` junaid_shahid
2022-03-16 22:49 ` Thomas Gleixner
2022-03-17 21:24   ` Junaid Shahid

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220223052223.1202152-32-junaids@google.com \
    --to=junaids@google.com \
    --cc=alexandre.chartre@oracle.com \
    --cc=dave.hansen@linux.intel.com \
    --cc=jmattson@google.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=luto@kernel.org \
    --cc=oweisse@google.com \
    --cc=pbonzini@redhat.com \
    --cc=peterz@infradead.org \
    --cc=pjt@google.com \
    --cc=rppt@linux.ibm.com \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.