All of lore.kernel.org
 help / color / mirror / Atom feed
From: Hyeonggon Yoo <42.hyeyoo@gmail.com>
To: Vlastimil Babka <vbabka@suse.cz>
Cc: Marco Elver <elver@google.com>,
	Matthew WilCox <willy@infradead.org>,
	Christoph Lameter <cl@linux.com>,
	Pekka Enberg <penberg@kernel.org>,
	David Rientjes <rientjes@google.com>,
	Joonsoo Kim <iamjoonsoo.kim@lge.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	Hyeonggon Yoo <42.hyeyoo@gmail.com>,
	Roman Gushchin <roman.gushchin@linux.dev>,
	linux-mm@kvack.org, linux-kernel@vger.kernel.org
Subject: [PATCH v2 13/23] mm/slab: kmalloc: pass requests larger than order-1 page to page allocator
Date: Thu, 14 Apr 2022 17:57:17 +0900	[thread overview]
Message-ID: <20220414085727.643099-14-42.hyeyoo@gmail.com> (raw)
In-Reply-To: <20220414085727.643099-1-42.hyeyoo@gmail.com>

There is not much benefit for serving large objects in kmalloc().
Let's pass large requests to page allocator like SLUB for better
maintenance of common code.

Signed-off-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
---

Changes from previous series (Thanks to Vlastimil):
- Disable/Enable irqs around free_large_kmalloc()
- Do not lose NUMA locality in __do_kmalloc
- Some style fixes (use slab->slab_cache instead of virt_to_cache)
- Remove unsupported sizes in __kmalloc_index

Changes from v1:
- instead of defining varaible x, just casting to (void *) while calling free_large_kmalloc().


 include/linux/slab.h | 23 +++++------------------
 mm/slab.c            | 44 ++++++++++++++++++++++++++++++--------------
 mm/slab.h            |  3 +++
 mm/slab_common.c     | 25 ++++++++++++++++++-------
 mm/slub.c            | 19 -------------------
 5 files changed, 56 insertions(+), 58 deletions(-)

diff --git a/include/linux/slab.h b/include/linux/slab.h
index ea168f8a248d..c8c82087c3f9 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -231,27 +231,17 @@ void kmem_dump_obj(void *object);
 
 #ifdef CONFIG_SLAB
 /*
- * The largest kmalloc size supported by the SLAB allocators is
- * 32 megabyte (2^25) or the maximum allocatable page order if that is
- * less than 32 MB.
- *
- * WARNING: Its not easy to increase this value since the allocators have
- * to do various tricks to work around compiler limitations in order to
- * ensure proper constant folding.
+ * SLAB and SLUB directly allocates requests fitting in to an order-1 page
+ * (PAGE_SIZE*2).  Larger requests are passed to the page allocator.
  */
-#define KMALLOC_SHIFT_HIGH	((MAX_ORDER + PAGE_SHIFT - 1) <= 25 ? \
-				(MAX_ORDER + PAGE_SHIFT - 1) : 25)
-#define KMALLOC_SHIFT_MAX	KMALLOC_SHIFT_HIGH
+#define KMALLOC_SHIFT_HIGH	(PAGE_SHIFT + 1)
+#define KMALLOC_SHIFT_MAX	(MAX_ORDER + PAGE_SHIFT - 1)
 #ifndef KMALLOC_SHIFT_LOW
 #define KMALLOC_SHIFT_LOW	5
 #endif
 #endif
 
 #ifdef CONFIG_SLUB
-/*
- * SLUB directly allocates requests fitting in to an order-1 page
- * (PAGE_SIZE*2).  Larger requests are passed to the page allocator.
- */
 #define KMALLOC_SHIFT_HIGH	(PAGE_SHIFT + 1)
 #define KMALLOC_SHIFT_MAX	(MAX_ORDER + PAGE_SHIFT - 1)
 #ifndef KMALLOC_SHIFT_LOW
@@ -403,10 +393,6 @@ static __always_inline unsigned int __kmalloc_index(size_t size,
 	if (size <= 512 * 1024) return 19;
 	if (size <= 1024 * 1024) return 20;
 	if (size <=  2 * 1024 * 1024) return 21;
-	if (size <=  4 * 1024 * 1024) return 22;
-	if (size <=  8 * 1024 * 1024) return 23;
-	if (size <=  16 * 1024 * 1024) return 24;
-	if (size <=  32 * 1024 * 1024) return 25;
 
 	if (!IS_ENABLED(CONFIG_PROFILE_ALL_BRANCHES) && size_is_constant)
 		BUILD_BUG_ON_MSG(1, "unexpected size in kmalloc_index()");
@@ -416,6 +402,7 @@ static __always_inline unsigned int __kmalloc_index(size_t size,
 	/* Will never be reached. Needed because the compiler may complain */
 	return -1;
 }
+static_assert(PAGE_SHIFT <= 20);
 #define kmalloc_index(s) __kmalloc_index(s, true)
 #endif /* !CONFIG_SLOB */
 
diff --git a/mm/slab.c b/mm/slab.c
index b0aaca017f42..1dfe0f9d5882 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3533,7 +3533,7 @@ __do_kmalloc_node(size_t size, gfp_t flags, int node, unsigned long caller)
 	void *ret;
 
 	if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
-		return NULL;
+		return kmalloc_large_node(size, flags, node);
 	cachep = kmalloc_slab(size, flags);
 	if (unlikely(ZERO_OR_NULL_PTR(cachep)))
 		return cachep;
@@ -3607,15 +3607,25 @@ void kmem_cache_free_bulk(struct kmem_cache *orig_s, size_t size, void **p)
 {
 	struct kmem_cache *s;
 	size_t i;
+	struct folio *folio;
 
 	local_irq_disable();
 	for (i = 0; i < size; i++) {
 		void *objp = p[i];
 
-		if (!orig_s) /* called via kfree_bulk */
-			s = virt_to_cache(objp);
-		else
+		if (!orig_s) {
+			folio = virt_to_folio(objp);
+			/* called via kfree_bulk */
+			if (!folio_test_slab(folio)) {
+				local_irq_enable();
+				free_large_kmalloc(folio, objp);
+				local_irq_disable();
+				continue;
+			}
+			s = folio_slab(folio)->slab_cache;
+		} else
 			s = cache_from_obj(orig_s, objp);
+
 		if (!s)
 			continue;
 
@@ -3644,20 +3654,24 @@ void kfree(const void *objp)
 {
 	struct kmem_cache *c;
 	unsigned long flags;
+	struct folio *folio;
 
 	trace_kfree(_RET_IP_, objp);
 
 	if (unlikely(ZERO_OR_NULL_PTR(objp)))
 		return;
-	local_irq_save(flags);
-	kfree_debugcheck(objp);
-	c = virt_to_cache(objp);
-	if (!c) {
-		local_irq_restore(flags);
+
+	folio = virt_to_folio(objp);
+	if (!folio_test_slab(folio)) {
+		free_large_kmalloc(folio, (void *)objp);
 		return;
 	}
-	debug_check_no_locks_freed(objp, c->object_size);
 
+	c = folio_slab(folio)->slab_cache;
+
+	local_irq_save(flags);
+	kfree_debugcheck(objp);
+	debug_check_no_locks_freed(objp, c->object_size);
 	debug_check_no_obj_freed(objp, c->object_size);
 	__cache_free(c, (void *)objp, _RET_IP_);
 	local_irq_restore(flags);
@@ -4079,15 +4093,17 @@ void __check_heap_object(const void *ptr, unsigned long n,
 size_t __ksize(const void *objp)
 {
 	struct kmem_cache *c;
-	size_t size;
+	struct folio *folio;
 
 	BUG_ON(!objp);
 	if (unlikely(objp == ZERO_SIZE_PTR))
 		return 0;
 
-	c = virt_to_cache(objp);
-	size = c ? c->object_size : 0;
+	folio = virt_to_folio(objp);
+	if (!folio_test_slab(folio))
+		return folio_size(folio);
 
-	return size;
+	c = folio_slab(folio)->slab_cache;
+	return c->object_size;
 }
 EXPORT_SYMBOL(__ksize);
diff --git a/mm/slab.h b/mm/slab.h
index f7d018100994..b864c5bc4c25 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -681,6 +681,9 @@ static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
 		print_tracking(cachep, x);
 	return cachep;
 }
+
+void free_large_kmalloc(struct folio *folio, void *object);
+
 #endif /* CONFIG_SLOB */
 
 static inline size_t slab_ksize(const struct kmem_cache *s)
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 30684efc89d7..960cc07c3a91 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -764,8 +764,8 @@ struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags)
 
 /*
  * kmalloc_info[] is to make slub_debug=,kmalloc-xx option work at boot time.
- * kmalloc_index() supports up to 2^25=32MB, so the final entry of the table is
- * kmalloc-32M.
+ * kmalloc_index() supports up to 2^21=2MB, so the final entry of the table is
+ * kmalloc-2M.
  */
 const struct kmalloc_info_struct kmalloc_info[] __initconst = {
 	INIT_KMALLOC_INFO(0, 0),
@@ -789,11 +789,7 @@ const struct kmalloc_info_struct kmalloc_info[] __initconst = {
 	INIT_KMALLOC_INFO(262144, 256k),
 	INIT_KMALLOC_INFO(524288, 512k),
 	INIT_KMALLOC_INFO(1048576, 1M),
-	INIT_KMALLOC_INFO(2097152, 2M),
-	INIT_KMALLOC_INFO(4194304, 4M),
-	INIT_KMALLOC_INFO(8388608, 8M),
-	INIT_KMALLOC_INFO(16777216, 16M),
-	INIT_KMALLOC_INFO(33554432, 32M)
+	INIT_KMALLOC_INFO(2097152, 2M)
 };
 
 /*
@@ -906,6 +902,21 @@ void __init create_kmalloc_caches(slab_flags_t flags)
 	/* Kmalloc array is now usable */
 	slab_state = UP;
 }
+
+void free_large_kmalloc(struct folio *folio, void *object)
+{
+	unsigned int order = folio_order(folio);
+
+	if (WARN_ON_ONCE(order == 0))
+		pr_warn_once("object pointer: 0x%p\n", object);
+
+	kmemleak_free(object);
+	kasan_kfree_large(object);
+
+	mod_lruvec_page_state(folio_page(folio, 0), NR_SLAB_UNRECLAIMABLE_B,
+			      -(PAGE_SIZE << order));
+	__free_pages(folio_page(folio, 0), order);
+}
 #endif /* !CONFIG_SLOB */
 
 gfp_t kmalloc_fix_flags(gfp_t flags)
diff --git a/mm/slub.c b/mm/slub.c
index 892988990da7..1dc9e8eebb62 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1679,12 +1679,6 @@ static bool freelist_corrupted(struct kmem_cache *s, struct slab *slab,
  * Hooks for other subsystems that check memory allocations. In a typical
  * production configuration these hooks all should produce no code at all.
  */
-static __always_inline void kfree_hook(void *x)
-{
-	kmemleak_free(x);
-	kasan_kfree_large(x);
-}
-
 static __always_inline bool slab_free_hook(struct kmem_cache *s,
 						void *x, bool init)
 {
@@ -3490,19 +3484,6 @@ struct detached_freelist {
 	struct kmem_cache *s;
 };
 
-static inline void free_large_kmalloc(struct folio *folio, void *object)
-{
-	unsigned int order = folio_order(folio);
-
-	if (WARN_ON_ONCE(order == 0))
-		pr_warn_once("object pointer: 0x%p\n", object);
-
-	kfree_hook(object);
-	mod_lruvec_page_state(folio_page(folio, 0), NR_SLAB_UNRECLAIMABLE_B,
-			      -(PAGE_SIZE << order));
-	__free_pages(folio_page(folio, 0), order);
-}
-
 /*
  * This function progressively scans the array with free objects (with
  * a limited look ahead) and extract objects belonging to the same
-- 
2.32.0


  parent reply	other threads:[~2022-04-14  9:02 UTC|newest]

Thread overview: 51+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-04-14  8:57 [PATCH v2 00/23] common kmalloc for SLUB and SLAB v2 Hyeonggon Yoo
2022-04-14  8:57 ` [PATCH v2 01/23] mm/slab: move NUMA-related code to __do_cache_alloc() Hyeonggon Yoo
2022-04-22 18:04   ` Vlastimil Babka
2022-04-14  8:57 ` [PATCH v2 02/23] mm/slab: cleanup slab_alloc() and slab_alloc_node() Hyeonggon Yoo
2022-04-25 14:05   ` Vlastimil Babka
2022-04-14  8:57 ` [PATCH v2 03/23] mm/slab_common: remove CONFIG_NUMA ifdefs for common kmalloc functions Hyeonggon Yoo
2022-04-25 14:41   ` Vlastimil Babka
2022-04-14  8:57 ` [PATCH v2 04/23] mm/slab_common: cleanup kmalloc_track_caller() Hyeonggon Yoo
2022-04-25 15:05   ` Vlastimil Babka
2022-04-26 15:49   ` Vlastimil Babka
2022-04-30 11:44     ` Hyeonggon Yoo
2022-04-14  8:57 ` [PATCH v2 05/23] mm/slab_common: cleanup __kmalloc() Hyeonggon Yoo
2022-04-26 16:02   ` Vlastimil Babka
2022-04-14  8:57 ` [PATCH v2 06/23] mm/sl[auo]b: fold kmalloc_order_trace() into kmalloc_large() Hyeonggon Yoo
2022-04-26 16:09   ` Vlastimil Babka
2022-04-14  8:57 ` [PATCH v2 07/23] mm/slub: move kmalloc_large_node() to slab_common.c Hyeonggon Yoo
2022-04-26 16:13   ` Vlastimil Babka
2022-04-14  8:57 ` [PATCH v2 08/23] mm/slab_common: make kmalloc_large_node() consistent with kmalloc_large() Hyeonggon Yoo
2022-04-26 17:15   ` Vlastimil Babka
2022-04-28  6:35     ` Hyeonggon Yoo
2022-04-14  8:57 ` [PATCH v2 09/23] mm/slab_common: cleanup kmalloc_large() Hyeonggon Yoo
2022-04-26 17:18   ` Vlastimil Babka
2022-04-14  8:57 ` [PATCH v2 10/23] mm/slab_common: cleanup kmem_cache_alloc{,node,lru} Hyeonggon Yoo
2022-04-26 18:01   ` Vlastimil Babka
2022-04-30 11:48     ` Hyeonggon Yoo
2022-04-14  8:57 ` [PATCH v2 11/23] mm/slab_common: kmalloc_node: pass large requests to page allocator Hyeonggon Yoo
2022-04-14  8:57 ` [PATCH v2 12/23] mm/slab_common: cleanup kmalloc() Hyeonggon Yoo
2022-04-26 18:00   ` Joe Perches
2022-04-28 11:30     ` Hyeonggon Yoo
2022-04-27  7:50   ` Vlastimil Babka
2022-04-14  8:57 ` Hyeonggon Yoo [this message]
2022-04-27  8:10   ` [PATCH v2 13/23] mm/slab: kmalloc: pass requests larger than order-1 page to page allocator Vlastimil Babka
2022-04-30 11:50     ` Hyeonggon Yoo
2022-04-14  8:57 ` [PATCH v2 14/23] mm/slab_common: print cache name in tracepoints Hyeonggon Yoo
2022-04-29 14:05   ` Vlastimil Babka
2022-04-30 14:06     ` Hyeonggon Yoo
2022-04-14  8:57 ` [PATCH v2 15/23] mm/slab_common: use same tracepoint in kmalloc and normal caches Hyeonggon Yoo
2022-04-14  8:57 ` [PATCH v2 16/23] mm/slab_common: rename tracepoint Hyeonggon Yoo
2022-04-14  8:57 ` [PATCH v2 17/23] mm/slab_common: implement __kmem_cache_free() Hyeonggon Yoo
2022-04-14  8:57 ` [PATCH v2 18/23] mm/sl[au]b: generalize kmalloc subsystem Hyeonggon Yoo
2022-04-29 14:30   ` Vlastimil Babka
2022-04-14  8:57 ` [PATCH v2 19/23] mm/slab_common: add kasan_kmalloc() in __kmalloc_node_track_caller() Hyeonggon Yoo
2022-04-14  8:57 ` [PATCH v2 20/23] mm/slab_common: factor out __do_kmalloc_node() Hyeonggon Yoo
2022-04-14 11:45   ` Hyeonggon Yoo
2022-04-29 14:48   ` Vlastimil Babka
2022-04-14  8:57 ` [PATCH v2 21/23] mm/sl[au]b: remove kmem_cache_alloc_node_trace() Hyeonggon Yoo
2022-04-14  8:57 ` [PATCH v2 22/23] mm/sl[auo]b: move definition of __ksize() to mm/slab.h Hyeonggon Yoo
2022-04-14  8:57 ` [PATCH v2 23/23] mm/sl[au]b: check if large object is valid in __ksize() Hyeonggon Yoo
2022-04-14  9:58   ` Christoph Lameter
2022-04-14 11:46     ` Hyeonggon Yoo
2022-04-14 12:36 ` [PATCH v2 00/23] common kmalloc for SLUB and SLAB v2 Hyeonggon Yoo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220414085727.643099-14-42.hyeyoo@gmail.com \
    --to=42.hyeyoo@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=cl@linux.com \
    --cc=elver@google.com \
    --cc=iamjoonsoo.kim@lge.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=penberg@kernel.org \
    --cc=rientjes@google.com \
    --cc=roman.gushchin@linux.dev \
    --cc=vbabka@suse.cz \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.