[PATCH 1/4] slab: Introduce dedicated bucket allocator

From: Kees Cook <keescook@chromium.org>
To: Vlastimil Babka <vbabka@suse.cz>
Cc: Kees Cook <keescook@chromium.org>,
	Christoph Lameter <cl@linux.com>,
	Pekka Enberg <penberg@kernel.org>,
	David Rientjes <rientjes@google.com>,
	Joonsoo Kim <iamjoonsoo.kim@lge.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	Roman Gushchin <roman.gushchin@linux.dev>,
	Hyeonggon Yoo <42.hyeyoo@gmail.com>,
	linux-mm@kvack.org, "GONG, Ruiqi" <gongruiqi@huaweicloud.com>,
	Xiu Jianfeng <xiujianfeng@huawei.com>,
	Suren Baghdasaryan <surenb@google.com>,
	Kent Overstreet <kent.overstreet@linux.dev>,
	Christian Brauner <brauner@kernel.org>,
	Al Viro <viro@zeniv.linux.org.uk>, Jan Kara <jack@suse.cz>,
	linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	linux-hardening@vger.kernel.org
Subject: [PATCH 1/4] slab: Introduce dedicated bucket allocator
Date: Mon,  4 Mar 2024 10:49:29 -0800	[thread overview]
Message-ID: <20240304184933.3672759-1-keescook@chromium.org> (raw)
In-Reply-To: <20240304184252.work.496-kees@kernel.org>

Dedicated caches are available For fixed size allocations via
kmem_cache_alloc(), but for dynamically sized allocations there is only
the global kmalloc API's set of buckets available. This means it isn't
possible to separate specific sets of dynamically sized allocations into
a separate collection of caches.

This leads to a use-after-free exploitation weakness in the Linux
kernel since many heap memory spraying/grooming attacks depend on using
userspace-controllable dynamically sized allocations to collide with
fixed size allocations that end up in same cache.

While CONFIG_RANDOM_KMALLOC_CACHES provides a probabilistic defense
against these kinds of "type confusion" attacks, including for fixed
same-size heap objects, we can create a complementary deterministic
defense for dynamically sized allocations.

In order to isolate user-controllable sized allocations from system
allocations, introduce kmem_buckets_create() and kmem_buckets_alloc(),
which behave like kmem_cache_create() and like kmem_cache_alloc() for
confining allocations to a dedicated set of sized caches (which have
the same layout as the kmalloc caches).

This can also be used in the future once codetag allocation annotations
exist to implement per-caller allocation cache isolation[1] even for
dynamic allocations.

Link: https://lore.kernel.org/lkml/202402211449.401382D2AF@keescook [1]
Signed-off-by: Kees Cook <keescook@chromium.org>
---
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com>
Cc: linux-mm@kvack.org
---
 include/linux/slab.h | 26 ++++++++++++++++++
 mm/slab_common.c     | 64 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 90 insertions(+)

diff --git a/include/linux/slab.h b/include/linux/slab.h
index b5f5ee8308d0..4a4ff84534be 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -492,6 +492,16 @@ void *kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru,
 			   gfp_t gfpflags) __assume_slab_alignment __malloc;
 void kmem_cache_free(struct kmem_cache *s, void *objp);
 
+struct kmem_buckets {
+	struct kmem_cache *caches[ARRAY_SIZE(kmalloc_caches[KMALLOC_NORMAL])];
+};
+
+struct kmem_buckets *
+kmem_buckets_create(const char *name, unsigned int align, slab_flags_t flags,
+		    unsigned int useroffset, unsigned int usersize,
+		    void (*ctor)(void *));
+
+
 /*
  * Bulk allocation and freeing operations. These are accelerated in an
  * allocator specific way to avoid taking locks repeatedly or building
@@ -594,6 +604,22 @@ static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags)
 	return __kmalloc(size, flags);
 }
 
+static __always_inline __alloc_size(2)
+void *kmem_buckets_alloc(struct kmem_buckets *b, size_t size, gfp_t flags)
+{
+	unsigned int index;
+
+	if (size > KMALLOC_MAX_CACHE_SIZE)
+		return kmalloc_large(size, flags);
+	if (WARN_ON_ONCE(!b))
+		return NULL;
+	index = kmalloc_index(size);
+	if (WARN_ONCE(!b->caches[index],
+		      "missing cache for size %zu (index %d)\n", size, index))
+		return kmalloc(size, flags);
+	return kmalloc_trace(b->caches[index], flags, size);
+}
+
 static __always_inline __alloc_size(1) void *kmalloc_node(size_t size, gfp_t flags, int node)
 {
 	if (__builtin_constant_p(size) && size) {
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 238293b1dbe1..6002a182d014 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -392,6 +392,66 @@ kmem_cache_create(const char *name, unsigned int size, unsigned int align,
 }
 EXPORT_SYMBOL(kmem_cache_create);
 
+static struct kmem_cache *kmem_buckets_cache __ro_after_init;
+
+struct kmem_buckets *
+kmem_buckets_create(const char *name, unsigned int align,
+		  slab_flags_t flags,
+		  unsigned int useroffset, unsigned int usersize,
+		  void (*ctor)(void *))
+{
+	struct kmem_buckets *b;
+	int idx;
+
+	if (WARN_ON(!kmem_buckets_cache))
+		return NULL;
+
+	b = kmem_cache_alloc(kmem_buckets_cache, GFP_KERNEL|__GFP_ZERO);
+	if (WARN_ON(!b))
+		return NULL;
+
+	for (idx = 0; idx < ARRAY_SIZE(kmalloc_caches[KMALLOC_NORMAL]); idx++) {
+		char *short_size, *cache_name;
+		unsigned int size;
+
+		if (!kmalloc_caches[KMALLOC_NORMAL][idx])
+			continue;
+
+		size = kmalloc_caches[KMALLOC_NORMAL][idx]->object_size;
+		if (!size)
+			continue;
+
+		short_size = strchr(kmalloc_caches[KMALLOC_NORMAL][idx]->name, '-');
+		if (WARN_ON(!short_size))
+			goto fail;
+
+		cache_name = kasprintf(GFP_KERNEL, "%s-%s", name, short_size + 1);
+		if (WARN_ON(!cache_name))
+			goto fail;
+
+		b->caches[idx] = kmem_cache_create_usercopy(cache_name, size,
+					align, flags, useroffset,
+					min(size - useroffset, usersize), ctor);
+		kfree(cache_name);
+		if (WARN_ON(!b->caches[idx]))
+			goto fail;
+	}
+
+	return b;
+
+fail:
+	for (idx = 0; idx < ARRAY_SIZE(kmalloc_caches[KMALLOC_NORMAL]); idx++) {
+		if (b->caches[idx]) {
+			kfree(b->caches[idx]->name);
+			kmem_cache_destroy(b->caches[idx]);
+		}
+	}
+	kfree(b);
+
+	return NULL;
+}
+EXPORT_SYMBOL(kmem_buckets_create);
+
 #ifdef SLAB_SUPPORTS_SYSFS
 /*
  * For a given kmem_cache, kmem_cache_destroy() should only be called
@@ -934,6 +994,10 @@ void __init create_kmalloc_caches(slab_flags_t flags)
 
 	/* Kmalloc array is now usable */
 	slab_state = UP;
+
+	kmem_buckets_cache = kmem_cache_create("kmalloc_buckets",
+				sizeof(struct kmem_buckets) * ARRAY_SIZE(kmalloc_info),
+				0, 0, NULL);
 }
 
 /**
-- 
2.34.1