linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 0/5] slab cleanups
@ 2022-03-04  6:34 Hyeonggon Yoo
  2022-03-04  6:34 ` [PATCH v2 1/5] mm/slab: kmalloc: pass requests larger than order-1 page to page allocator Hyeonggon Yoo
                   ` (5 more replies)
  0 siblings, 6 replies; 21+ messages in thread
From: Hyeonggon Yoo @ 2022-03-04  6:34 UTC (permalink / raw)
  To: linux-mm
  Cc: Christoph Lameter, Pekka Enberg, David Rientjes, Joonsoo Kim,
	Andrew Morton, Vlastimil Babka, Marco Elver, Matthew WilCox,
	Roman Gushchin, linux-kernel, 42.hyeyoo

Changes from v1:
	Now SLAB passes requests larger than order-1 page
	to page allocator.

	Adjusted comments from Matthew, Vlastimil, Rientjes.
	Thank you for feedback!

	BTW, I have no idea what __ksize() should return when an object that
	is not allocated from slab is passed. both 0 and folio_size()
	seems wrong to me.

Hello, these are cleanup patches for slab.
Please consider them for slab-next :)

Any comments will be appreciated.
Thanks.

Hyeonggon Yoo (5):
  mm/slab: kmalloc: pass requests larger than order-1 page to page
    allocator
  mm/sl[au]b: unify __ksize()
  mm/sl[auo]b: move definition of __ksize() to mm/slab.h
  mm/slub: limit number of node partial slabs only in cache creation
  mm/slub: refactor deactivate_slab()

 include/linux/slab.h |  36 ++++++------
 mm/slab.c            |  51 ++++++++---------
 mm/slab.h            |  21 +++++++
 mm/slab_common.c     |  20 +++++++
 mm/slob.c            |   1 -
 mm/slub.c            | 130 ++++++++++++-------------------------------
 6 files changed, 114 insertions(+), 145 deletions(-)

-- 
2.33.1


^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH v2 1/5] mm/slab: kmalloc: pass requests larger than order-1 page to page allocator
  2022-03-04  6:34 [PATCH v2 0/5] slab cleanups Hyeonggon Yoo
@ 2022-03-04  6:34 ` Hyeonggon Yoo
  2022-03-04 12:45   ` Vlastimil Babka
  2022-03-04  6:34 ` [PATCH v2 2/5] mm/sl[au]b: unify __ksize() Hyeonggon Yoo
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 21+ messages in thread
From: Hyeonggon Yoo @ 2022-03-04  6:34 UTC (permalink / raw)
  To: linux-mm
  Cc: Christoph Lameter, Pekka Enberg, David Rientjes, Joonsoo Kim,
	Andrew Morton, Vlastimil Babka, Marco Elver, Matthew WilCox,
	Roman Gushchin, linux-kernel, 42.hyeyoo

There is not much benefit for serving large objects in kmalloc().
Let's pass large requests to page allocator like SLUB for better
maintenance of common code.

Signed-off-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
---
 include/linux/slab.h | 35 ++++++++++++++++-------------------
 mm/slab.c            | 31 +++++++++++++++++++++++++++----
 mm/slab.h            | 19 +++++++++++++++++++
 mm/slub.c            | 19 -------------------
 4 files changed, 62 insertions(+), 42 deletions(-)

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 37bde99b74af..e7b3330db4f3 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -224,29 +224,19 @@ void kmem_dump_obj(void *object);
  * Kmalloc array related definitions
  */
 
-#ifdef CONFIG_SLAB
 /*
- * The largest kmalloc size supported by the SLAB allocators is
- * 32 megabyte (2^25) or the maximum allocatable page order if that is
- * less than 32 MB.
- *
- * WARNING: Its not easy to increase this value since the allocators have
- * to do various tricks to work around compiler limitations in order to
- * ensure proper constant folding.
+ * SLAB and SLUB directly allocates requests fitting in to an order-1 page
+ * (PAGE_SIZE*2).  Larger requests are passed to the page allocator.
  */
-#define KMALLOC_SHIFT_HIGH	((MAX_ORDER + PAGE_SHIFT - 1) <= 25 ? \
-				(MAX_ORDER + PAGE_SHIFT - 1) : 25)
-#define KMALLOC_SHIFT_MAX	KMALLOC_SHIFT_HIGH
+#ifdef CONFIG_SLAB
+#define KMALLOC_SHIFT_HIGH	(PAGE_SHIFT + 1)
+#define KMALLOC_SHIFT_MAX	(MAX_ORDER + PAGE_SHIFT - 1)
 #ifndef KMALLOC_SHIFT_LOW
 #define KMALLOC_SHIFT_LOW	5
 #endif
 #endif
 
 #ifdef CONFIG_SLUB
-/*
- * SLUB directly allocates requests fitting in to an order-1 page
- * (PAGE_SIZE*2).  Larger requests are passed to the page allocator.
- */
 #define KMALLOC_SHIFT_HIGH	(PAGE_SHIFT + 1)
 #define KMALLOC_SHIFT_MAX	(MAX_ORDER + PAGE_SHIFT - 1)
 #ifndef KMALLOC_SHIFT_LOW
@@ -564,15 +554,15 @@ static __always_inline __alloc_size(1) void *kmalloc_large(size_t size, gfp_t fl
  *	Try really hard to succeed the allocation but fail
  *	eventually.
  */
+#ifndef CONFIG_SLOB
 static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags)
 {
 	if (__builtin_constant_p(size)) {
-#ifndef CONFIG_SLOB
 		unsigned int index;
-#endif
+
 		if (size > KMALLOC_MAX_CACHE_SIZE)
 			return kmalloc_large(size, flags);
-#ifndef CONFIG_SLOB
+
 		index = kmalloc_index(size);
 
 		if (!index)
@@ -581,10 +571,17 @@ static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags)
 		return kmem_cache_alloc_trace(
 				kmalloc_caches[kmalloc_type(flags)][index],
 				flags, size);
-#endif
 	}
 	return __kmalloc(size, flags);
 }
+#else
+static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags)
+{
+	if (__builtin_constant_p(size) && size > KMALLOC_MAX_CACHE_SIZE)
+		return kmalloc_large(size, flags);
+	return __kmalloc(size, flags);
+}
+#endif
 
 static __always_inline __alloc_size(1) void *kmalloc_node(size_t size, gfp_t flags, int node)
 {
diff --git a/mm/slab.c b/mm/slab.c
index ddf5737c63d9..570af6dc3478 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3624,7 +3624,8 @@ __do_kmalloc_node(size_t size, gfp_t flags, int node, unsigned long caller)
 	void *ret;
 
 	if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
-		return NULL;
+		return kmalloc_large(size, flags);
+
 	cachep = kmalloc_slab(size, flags);
 	if (unlikely(ZERO_OR_NULL_PTR(cachep)))
 		return cachep;
@@ -3685,7 +3686,8 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
 	void *ret;
 
 	if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
-		return NULL;
+		return kmalloc_large(size, flags);
+
 	cachep = kmalloc_slab(size, flags);
 	if (unlikely(ZERO_OR_NULL_PTR(cachep)))
 		return cachep;
@@ -3739,14 +3741,21 @@ void kmem_cache_free_bulk(struct kmem_cache *orig_s, size_t size, void **p)
 {
 	struct kmem_cache *s;
 	size_t i;
+	struct folio *folio;
 
 	local_irq_disable();
 	for (i = 0; i < size; i++) {
 		void *objp = p[i];
 
-		if (!orig_s) /* called via kfree_bulk */
+		if (!orig_s) {
+			/* called via kfree_bulk */
+			folio = virt_to_folio(objp);
+			if (unlikely(!folio_test_slab(folio))) {
+				free_large_kmalloc(folio, objp);
+				continue;
+			}
 			s = virt_to_cache(objp);
-		else
+		} else
 			s = cache_from_obj(orig_s, objp);
 		if (!s)
 			continue;
@@ -3776,11 +3785,20 @@ void kfree(const void *objp)
 {
 	struct kmem_cache *c;
 	unsigned long flags;
+	struct folio *folio;
+	void *object = (void *) objp;
 
 	trace_kfree(_RET_IP_, objp);
 
 	if (unlikely(ZERO_OR_NULL_PTR(objp)))
 		return;
+
+	folio = virt_to_folio(objp);
+	if (unlikely(!folio_test_slab(folio))) {
+		free_large_kmalloc(folio, object);
+		return;
+	}
+
 	local_irq_save(flags);
 	kfree_debugcheck(objp);
 	c = virt_to_cache(objp);
@@ -4211,12 +4229,17 @@ void __check_heap_object(const void *ptr, unsigned long n,
 size_t __ksize(const void *objp)
 {
 	struct kmem_cache *c;
+	struct folio *folio;
 	size_t size;
 
 	BUG_ON(!objp);
 	if (unlikely(objp == ZERO_SIZE_PTR))
 		return 0;
 
+	folio = virt_to_folio(objp);
+	if (!folio_test_slab(folio))
+		return folio_size(folio);
+
 	c = virt_to_cache(objp);
 	size = c ? c->object_size : 0;
 
diff --git a/mm/slab.h b/mm/slab.h
index c7f2abc2b154..31e98beb47a3 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -664,6 +664,25 @@ static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
 		print_tracking(cachep, x);
 	return cachep;
 }
+
+static __always_inline void kfree_hook(void *x)
+{
+	kmemleak_free(x);
+	kasan_kfree_large(x);
+}
+
+static inline void free_large_kmalloc(struct folio *folio, void *object)
+{
+	unsigned int order = folio_order(folio);
+
+	if (WARN_ON_ONCE(order == 0))
+		pr_warn_once("object pointer: 0x%p\n", object);
+
+	kfree_hook(object);
+	mod_lruvec_page_state(folio_page(folio, 0), NR_SLAB_UNRECLAIMABLE_B,
+			      -(PAGE_SIZE << order));
+	__free_pages(folio_page(folio, 0), order);
+}
 #endif /* CONFIG_SLOB */
 
 static inline size_t slab_ksize(const struct kmem_cache *s)
diff --git a/mm/slub.c b/mm/slub.c
index 261474092e43..04fd084f4709 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1686,12 +1686,6 @@ static inline void *kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags)
 	return ptr;
 }
 
-static __always_inline void kfree_hook(void *x)
-{
-	kmemleak_free(x);
-	kasan_kfree_large(x);
-}
-
 static __always_inline bool slab_free_hook(struct kmem_cache *s,
 						void *x, bool init)
 {
@@ -3535,19 +3529,6 @@ struct detached_freelist {
 	struct kmem_cache *s;
 };
 
-static inline void free_large_kmalloc(struct folio *folio, void *object)
-{
-	unsigned int order = folio_order(folio);
-
-	if (WARN_ON_ONCE(order == 0))
-		pr_warn_once("object pointer: 0x%p\n", object);
-
-	kfree_hook(object);
-	mod_lruvec_page_state(folio_page(folio, 0), NR_SLAB_UNRECLAIMABLE_B,
-			      -(PAGE_SIZE << order));
-	__free_pages(folio_page(folio, 0), order);
-}
-
 /*
  * This function progressively scans the array with free objects (with
  * a limited look ahead) and extract objects belonging to the same
-- 
2.33.1


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH v2 2/5] mm/sl[au]b: unify __ksize()
  2022-03-04  6:34 [PATCH v2 0/5] slab cleanups Hyeonggon Yoo
  2022-03-04  6:34 ` [PATCH v2 1/5] mm/slab: kmalloc: pass requests larger than order-1 page to page allocator Hyeonggon Yoo
@ 2022-03-04  6:34 ` Hyeonggon Yoo
  2022-03-04 18:25   ` Vlastimil Babka
  2022-03-04  6:34 ` [PATCH v2 3/5] mm/sl[auo]b: move definition of __ksize() to mm/slab.h Hyeonggon Yoo
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 21+ messages in thread
From: Hyeonggon Yoo @ 2022-03-04  6:34 UTC (permalink / raw)
  To: linux-mm
  Cc: Christoph Lameter, Pekka Enberg, David Rientjes, Joonsoo Kim,
	Andrew Morton, Vlastimil Babka, Marco Elver, Matthew WilCox,
	Roman Gushchin, linux-kernel, 42.hyeyoo

Now that SLAB passes large requests to page allocator like SLUB,
Unify __ksize(). Only SLOB need to implement own version of __ksize()
because it stores size in object header for kmalloc objects.

Signed-off-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
---
 mm/slab.c        | 30 ------------------------------
 mm/slab_common.c | 27 +++++++++++++++++++++++++++
 mm/slub.c        | 16 ----------------
 3 files changed, 27 insertions(+), 46 deletions(-)

diff --git a/mm/slab.c b/mm/slab.c
index 570af6dc3478..3ddf2181d8e4 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -4216,33 +4216,3 @@ void __check_heap_object(const void *ptr, unsigned long n,
 	usercopy_abort("SLAB object", cachep->name, to_user, offset, n);
 }
 #endif /* CONFIG_HARDENED_USERCOPY */
-
-/**
- * __ksize -- Uninstrumented ksize.
- * @objp: pointer to the object
- *
- * Unlike ksize(), __ksize() is uninstrumented, and does not provide the same
- * safety checks as ksize() with KASAN instrumentation enabled.
- *
- * Return: size of the actual memory used by @objp in bytes
- */
-size_t __ksize(const void *objp)
-{
-	struct kmem_cache *c;
-	struct folio *folio;
-	size_t size;
-
-	BUG_ON(!objp);
-	if (unlikely(objp == ZERO_SIZE_PTR))
-		return 0;
-
-	folio = virt_to_folio(objp);
-	if (!folio_test_slab(folio))
-		return folio_size(folio);
-
-	c = virt_to_cache(objp);
-	size = c ? c->object_size : 0;
-
-	return size;
-}
-EXPORT_SYMBOL(__ksize);
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 23f2ab0713b7..1d2f92e871d2 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -1245,6 +1245,33 @@ void kfree_sensitive(const void *p)
 }
 EXPORT_SYMBOL(kfree_sensitive);
 
+#ifndef CONFIG_SLOB
+/**
+ * __ksize -- Uninstrumented ksize.
+ * @objp: pointer to the object
+ *
+ * Unlike ksize(), __ksize() is uninstrumented, and does not provide the same
+ * safety checks as ksize() with KASAN instrumentation enabled.
+ *
+ * Return: size of the actual memory used by @objp in bytes
+ */
+size_t __ksize(const void *object)
+{
+	struct folio *folio;
+
+	if (unlikely(object == ZERO_SIZE_PTR))
+		return 0;
+
+	folio = virt_to_folio(object);
+
+	if (unlikely(!folio_test_slab(folio)))
+		return folio_size(folio);
+
+	return slab_ksize(folio_slab(folio)->slab_cache);
+}
+EXPORT_SYMBOL(__ksize);
+#endif
+
 /**
  * ksize - get the actual amount of memory allocated for a given object
  * @objp: Pointer to the object
diff --git a/mm/slub.c b/mm/slub.c
index 04fd084f4709..6f0ebadd8f30 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -4507,22 +4507,6 @@ void __check_heap_object(const void *ptr, unsigned long n,
 }
 #endif /* CONFIG_HARDENED_USERCOPY */
 
-size_t __ksize(const void *object)
-{
-	struct folio *folio;
-
-	if (unlikely(object == ZERO_SIZE_PTR))
-		return 0;
-
-	folio = virt_to_folio(object);
-
-	if (unlikely(!folio_test_slab(folio)))
-		return folio_size(folio);
-
-	return slab_ksize(folio_slab(folio)->slab_cache);
-}
-EXPORT_SYMBOL(__ksize);
-
 void kfree(const void *x)
 {
 	struct folio *folio;
-- 
2.33.1


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH v2 3/5] mm/sl[auo]b: move definition of __ksize() to mm/slab.h
  2022-03-04  6:34 [PATCH v2 0/5] slab cleanups Hyeonggon Yoo
  2022-03-04  6:34 ` [PATCH v2 1/5] mm/slab: kmalloc: pass requests larger than order-1 page to page allocator Hyeonggon Yoo
  2022-03-04  6:34 ` [PATCH v2 2/5] mm/sl[au]b: unify __ksize() Hyeonggon Yoo
@ 2022-03-04  6:34 ` Hyeonggon Yoo
  2022-03-04 18:29   ` Vlastimil Babka
  2022-03-04  6:34 ` [PATCH v2 4/5] mm/slub: limit number of node partial slabs only in cache creation Hyeonggon Yoo
                   ` (2 subsequent siblings)
  5 siblings, 1 reply; 21+ messages in thread
From: Hyeonggon Yoo @ 2022-03-04  6:34 UTC (permalink / raw)
  To: linux-mm
  Cc: Christoph Lameter, Pekka Enberg, David Rientjes, Joonsoo Kim,
	Andrew Morton, Vlastimil Babka, Marco Elver, Matthew WilCox,
	Roman Gushchin, linux-kernel, 42.hyeyoo

__ksize() is only called by KASAN. Remove export symbol and move
definition to mm/slab.h as we don't want to grow its callers.

[ willy@infradead.org: Move definition to mm/slab.h and reduce comments ]

Signed-off-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
---
 include/linux/slab.h | 1 -
 mm/slab.h            | 2 ++
 mm/slab_common.c     | 9 +--------
 mm/slob.c            | 1 -
 4 files changed, 3 insertions(+), 10 deletions(-)

diff --git a/include/linux/slab.h b/include/linux/slab.h
index e7b3330db4f3..d2b896553315 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -182,7 +182,6 @@ int kmem_cache_shrink(struct kmem_cache *s);
 void * __must_check krealloc(const void *objp, size_t new_size, gfp_t flags) __alloc_size(2);
 void kfree(const void *objp);
 void kfree_sensitive(const void *objp);
-size_t __ksize(const void *objp);
 size_t ksize(const void *objp);
 #ifdef CONFIG_PRINTK
 bool kmem_valid_obj(void *object);
diff --git a/mm/slab.h b/mm/slab.h
index 31e98beb47a3..79b319d58504 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -685,6 +685,8 @@ static inline void free_large_kmalloc(struct folio *folio, void *object)
 }
 #endif /* CONFIG_SLOB */
 
+size_t __ksize(const void *objp);
+
 static inline size_t slab_ksize(const struct kmem_cache *s)
 {
 #ifndef CONFIG_SLUB
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 1d2f92e871d2..b126fc7247b9 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -1247,13 +1247,7 @@ EXPORT_SYMBOL(kfree_sensitive);
 
 #ifndef CONFIG_SLOB
 /**
- * __ksize -- Uninstrumented ksize.
- * @objp: pointer to the object
- *
- * Unlike ksize(), __ksize() is uninstrumented, and does not provide the same
- * safety checks as ksize() with KASAN instrumentation enabled.
- *
- * Return: size of the actual memory used by @objp in bytes
+ * __ksize -- Uninstrumented ksize. Only called by KASAN.
  */
 size_t __ksize(const void *object)
 {
@@ -1269,7 +1263,6 @@ size_t __ksize(const void *object)
 
 	return slab_ksize(folio_slab(folio)->slab_cache);
 }
-EXPORT_SYMBOL(__ksize);
 #endif
 
 /**
diff --git a/mm/slob.c b/mm/slob.c
index 60c5842215f1..d8af6c54f133 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -588,7 +588,6 @@ size_t __ksize(const void *block)
 	m = (unsigned int *)(block - align);
 	return SLOB_UNITS(*m) * SLOB_UNIT;
 }
-EXPORT_SYMBOL(__ksize);
 
 int __kmem_cache_create(struct kmem_cache *c, slab_flags_t flags)
 {
-- 
2.33.1


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH v2 4/5] mm/slub: limit number of node partial slabs only in cache creation
  2022-03-04  6:34 [PATCH v2 0/5] slab cleanups Hyeonggon Yoo
                   ` (2 preceding siblings ...)
  2022-03-04  6:34 ` [PATCH v2 3/5] mm/sl[auo]b: move definition of __ksize() to mm/slab.h Hyeonggon Yoo
@ 2022-03-04  6:34 ` Hyeonggon Yoo
  2022-03-04 18:33   ` Vlastimil Babka
  2022-03-04  6:34 ` [PATCH v2 5/5] mm/slub: refactor deactivate_slab() Hyeonggon Yoo
  2022-03-04 11:50 ` [PATCH v2 0/5] slab cleanups Marco Elver
  5 siblings, 1 reply; 21+ messages in thread
From: Hyeonggon Yoo @ 2022-03-04  6:34 UTC (permalink / raw)
  To: linux-mm
  Cc: Christoph Lameter, Pekka Enberg, David Rientjes, Joonsoo Kim,
	Andrew Morton, Vlastimil Babka, Marco Elver, Matthew WilCox,
	Roman Gushchin, linux-kernel, 42.hyeyoo

SLUB sets number of minimum partial slabs for node (min_partial)
using set_min_partial(). SLUB holds at least min_partial slabs even if
they're empty to avoid excessive use of page allocator.

set_min_partial() limits value of min_partial limits value of
min_partial MIN_PARTIAL and MAX_PARTIAL. As set_min_partial() can be
called by min_partial_store() too, Only limit value of min_partial
in kmem_cache_open() so that it can be changed to value that a user wants.

[ rientjes@google.com: Fold set_min_partial() into its callers ]

Signed-off-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
---
 mm/slub.c | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/mm/slub.c b/mm/slub.c
index 6f0ebadd8f30..f9ae983a3dc6 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3981,15 +3981,6 @@ static int init_kmem_cache_nodes(struct kmem_cache *s)
 	return 1;
 }
 
-static void set_min_partial(struct kmem_cache *s, unsigned long min)
-{
-	if (min < MIN_PARTIAL)
-		min = MIN_PARTIAL;
-	else if (min > MAX_PARTIAL)
-		min = MAX_PARTIAL;
-	s->min_partial = min;
-}
-
 static void set_cpu_partial(struct kmem_cache *s)
 {
 #ifdef CONFIG_SLUB_CPU_PARTIAL
@@ -4196,7 +4187,8 @@ static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags)
 	 * The larger the object size is, the more slabs we want on the partial
 	 * list to avoid pounding the page allocator excessively.
 	 */
-	set_min_partial(s, ilog2(s->size) / 2);
+	s->min_partial = min_t(unsigned long, MAX_PARTIAL, ilog2(s->size) / 2);
+	s->min_partial = max_t(unsigned long, MIN_PARTIAL, s->min_partial);
 
 	set_cpu_partial(s);
 
@@ -5361,7 +5353,7 @@ static ssize_t min_partial_store(struct kmem_cache *s, const char *buf,
 	if (err)
 		return err;
 
-	set_min_partial(s, min);
+	s->min_partial = min;
 	return length;
 }
 SLAB_ATTR(min_partial);
-- 
2.33.1


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH v2 5/5] mm/slub: refactor deactivate_slab()
  2022-03-04  6:34 [PATCH v2 0/5] slab cleanups Hyeonggon Yoo
                   ` (3 preceding siblings ...)
  2022-03-04  6:34 ` [PATCH v2 4/5] mm/slub: limit number of node partial slabs only in cache creation Hyeonggon Yoo
@ 2022-03-04  6:34 ` Hyeonggon Yoo
  2022-03-04 19:01   ` Vlastimil Babka
  2022-03-04 11:50 ` [PATCH v2 0/5] slab cleanups Marco Elver
  5 siblings, 1 reply; 21+ messages in thread
From: Hyeonggon Yoo @ 2022-03-04  6:34 UTC (permalink / raw)
  To: linux-mm
  Cc: Christoph Lameter, Pekka Enberg, David Rientjes, Joonsoo Kim,
	Andrew Morton, Vlastimil Babka, Marco Elver, Matthew WilCox,
	Roman Gushchin, linux-kernel, 42.hyeyoo

Simplify deactivate_slab() by unlocking n->list_lock and retrying
cmpxchg_double() when cmpxchg_double() fails, and perform
add_{partial,full} only when it succeed.

Releasing and taking n->list_lock again here is not harmful as SLUB
avoids deactivating slabs as much as possible.

[ vbabka@suse.cz: perform add_{partial,full} when cmpxchg_double()
  succeed. ]

Signed-off-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
---
 mm/slub.c | 81 ++++++++++++++++++++++---------------------------------
 1 file changed, 32 insertions(+), 49 deletions(-)

diff --git a/mm/slub.c b/mm/slub.c
index f9ae983a3dc6..c1a693ec5874 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2344,8 +2344,8 @@ static void deactivate_slab(struct kmem_cache *s, struct slab *slab,
 {
 	enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE };
 	struct kmem_cache_node *n = get_node(s, slab_nid(slab));
-	int lock = 0, free_delta = 0;
-	enum slab_modes l = M_NONE, m = M_NONE;
+	int free_delta = 0;
+	enum slab_modes mode = M_NONE;
 	void *nextfree, *freelist_iter, *freelist_tail;
 	int tail = DEACTIVATE_TO_HEAD;
 	unsigned long flags = 0;
@@ -2387,14 +2387,10 @@ static void deactivate_slab(struct kmem_cache *s, struct slab *slab,
 	 * Ensure that the slab is unfrozen while the list presence
 	 * reflects the actual number of objects during unfreeze.
 	 *
-	 * We setup the list membership and then perform a cmpxchg
-	 * with the count. If there is a mismatch then the slab
-	 * is not unfrozen but the slab is on the wrong list.
-	 *
-	 * Then we restart the process which may have to remove
-	 * the slab from the list that we just put it on again
-	 * because the number of objects in the slab may have
-	 * changed.
+	 * We first perform cmpxchg holding lock and insert to list
+	 * when it succeed. If there is mismatch then slub is not
+	 * unfrozen and number of objects in the slab may have changed.
+	 * Then release lock and retry cmpxchg again.
 	 */
 redo:
 
@@ -2414,57 +2410,44 @@ static void deactivate_slab(struct kmem_cache *s, struct slab *slab,
 	new.frozen = 0;
 
 	if (!new.inuse && n->nr_partial >= s->min_partial)
-		m = M_FREE;
+		mode = M_FREE;
 	else if (new.freelist) {
-		m = M_PARTIAL;
-		if (!lock) {
-			lock = 1;
-			/*
-			 * Taking the spinlock removes the possibility that
-			 * acquire_slab() will see a slab that is frozen
-			 */
-			spin_lock_irqsave(&n->list_lock, flags);
-		}
-	} else {
-		m = M_FULL;
-		if (kmem_cache_debug_flags(s, SLAB_STORE_USER) && !lock) {
-			lock = 1;
-			/*
-			 * This also ensures that the scanning of full
-			 * slabs from diagnostic functions will not see
-			 * any frozen slabs.
-			 */
-			spin_lock_irqsave(&n->list_lock, flags);
-		}
+		mode = M_PARTIAL;
+		/*
+		 * Taking the spinlock removes the possibility that
+		 * acquire_slab() will see a slab that is frozen
+		 */
+		spin_lock_irqsave(&n->list_lock, flags);
+	} else if (kmem_cache_debug_flags(s, SLAB_STORE_USER)) {
+		mode = M_FULL;
+		/*
+		 * This also ensures that the scanning of full
+		 * slabs from diagnostic functions will not see
+		 * any frozen slabs.
+		 */
+		spin_lock_irqsave(&n->list_lock, flags);
 	}
 
-	if (l != m) {
-		if (l == M_PARTIAL)
-			remove_partial(n, slab);
-		else if (l == M_FULL)
-			remove_full(s, n, slab);
-
-		if (m == M_PARTIAL)
-			add_partial(n, slab, tail);
-		else if (m == M_FULL)
-			add_full(s, n, slab);
-	}
 
-	l = m;
 	if (!cmpxchg_double_slab(s, slab,
 				old.freelist, old.counters,
 				new.freelist, new.counters,
-				"unfreezing slab"))
+				"unfreezing slab")) {
+		if (mode == M_PARTIAL || mode == M_FULL)
+			spin_unlock_irqrestore(&n->list_lock, flags);
 		goto redo;
+	}
 
-	if (lock)
-		spin_unlock_irqrestore(&n->list_lock, flags);
 
-	if (m == M_PARTIAL)
+	if (mode == M_PARTIAL) {
+		add_partial(n, slab, tail);
+		spin_unlock_irqrestore(&n->list_lock, flags);
 		stat(s, tail);
-	else if (m == M_FULL)
+	} else if (mode == M_FULL) {
+		add_full(s, n, slab);
+		spin_unlock_irqrestore(&n->list_lock, flags);
 		stat(s, DEACTIVATE_FULL);
-	else if (m == M_FREE) {
+	} else if (mode == M_FREE) {
 		stat(s, DEACTIVATE_EMPTY);
 		discard_slab(s, slab);
 		stat(s, FREE_SLAB);
-- 
2.33.1


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 0/5] slab cleanups
  2022-03-04  6:34 [PATCH v2 0/5] slab cleanups Hyeonggon Yoo
                   ` (4 preceding siblings ...)
  2022-03-04  6:34 ` [PATCH v2 5/5] mm/slub: refactor deactivate_slab() Hyeonggon Yoo
@ 2022-03-04 11:50 ` Marco Elver
  2022-03-04 12:02   ` Hyeonggon Yoo
  5 siblings, 1 reply; 21+ messages in thread
From: Marco Elver @ 2022-03-04 11:50 UTC (permalink / raw)
  To: Hyeonggon Yoo
  Cc: linux-mm, Christoph Lameter, Pekka Enberg, David Rientjes,
	Joonsoo Kim, Andrew Morton, Vlastimil Babka, Matthew WilCox,
	Roman Gushchin, linux-kernel

On Fri, 4 Mar 2022 at 07:34, Hyeonggon Yoo <42.hyeyoo@gmail.com> wrote:
>
> Changes from v1:
>         Now SLAB passes requests larger than order-1 page
>         to page allocator.
>
>         Adjusted comments from Matthew, Vlastimil, Rientjes.
>         Thank you for feedback!
>
>         BTW, I have no idea what __ksize() should return when an object that
>         is not allocated from slab is passed. both 0 and folio_size()
>         seems wrong to me.

Didn't we say 0 would be the safer of the two options?
https://lkml.kernel.org/r/0e02416f-ef43-dc8a-9e8e-50ff63dd3c61@suse.cz

> Hello, these are cleanup patches for slab.
> Please consider them for slab-next :)
>
> Any comments will be appreciated.
> Thanks.
>
> Hyeonggon Yoo (5):
>   mm/slab: kmalloc: pass requests larger than order-1 page to page
>     allocator
>   mm/sl[au]b: unify __ksize()
>   mm/sl[auo]b: move definition of __ksize() to mm/slab.h
>   mm/slub: limit number of node partial slabs only in cache creation
>   mm/slub: refactor deactivate_slab()
>
>  include/linux/slab.h |  36 ++++++------
>  mm/slab.c            |  51 ++++++++---------
>  mm/slab.h            |  21 +++++++
>  mm/slab_common.c     |  20 +++++++
>  mm/slob.c            |   1 -
>  mm/slub.c            | 130 ++++++++++++-------------------------------
>  6 files changed, 114 insertions(+), 145 deletions(-)
>
> --
> 2.33.1
>

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 0/5] slab cleanups
  2022-03-04 11:50 ` [PATCH v2 0/5] slab cleanups Marco Elver
@ 2022-03-04 12:02   ` Hyeonggon Yoo
  2022-03-04 13:11     ` Marco Elver
  0 siblings, 1 reply; 21+ messages in thread
From: Hyeonggon Yoo @ 2022-03-04 12:02 UTC (permalink / raw)
  To: Marco Elver
  Cc: linux-mm, Christoph Lameter, Pekka Enberg, David Rientjes,
	Joonsoo Kim, Andrew Morton, Vlastimil Babka, Matthew WilCox,
	Roman Gushchin, linux-kernel

On Fri, Mar 04, 2022 at 12:50:21PM +0100, Marco Elver wrote:
> On Fri, 4 Mar 2022 at 07:34, Hyeonggon Yoo <42.hyeyoo@gmail.com> wrote:
> >
> > Changes from v1:
> >         Now SLAB passes requests larger than order-1 page
> >         to page allocator.
> >
> >         Adjusted comments from Matthew, Vlastimil, Rientjes.
> >         Thank you for feedback!
> >
> >         BTW, I have no idea what __ksize() should return when an object that
> >         is not allocated from slab is passed. both 0 and folio_size()
> >         seems wrong to me.
> 
> Didn't we say 0 would be the safer of the two options?
> https://lkml.kernel.org/r/0e02416f-ef43-dc8a-9e8e-50ff63dd3c61@suse.cz
>

Oh sorry, I didn't understand why 0 was safer when I was reading it.

Reading again, 0 is safer because kasan does not unpoison for
wrongly passed object, right?

> > Hello, these are cleanup patches for slab.
> > Please consider them for slab-next :)
> >
> > Any comments will be appreciated.
> > Thanks.
> >
> > Hyeonggon Yoo (5):
> >   mm/slab: kmalloc: pass requests larger than order-1 page to page
> >     allocator
> >   mm/sl[au]b: unify __ksize()
> >   mm/sl[auo]b: move definition of __ksize() to mm/slab.h
> >   mm/slub: limit number of node partial slabs only in cache creation
> >   mm/slub: refactor deactivate_slab()
> >
> >  include/linux/slab.h |  36 ++++++------
> >  mm/slab.c            |  51 ++++++++---------
> >  mm/slab.h            |  21 +++++++
> >  mm/slab_common.c     |  20 +++++++
> >  mm/slob.c            |   1 -
> >  mm/slub.c            | 130 ++++++++++++-------------------------------
> >  6 files changed, 114 insertions(+), 145 deletions(-)
> >
> > --
> > 2.33.1
> >

-- 
Thank you, You are awesome!
Hyeonggon :-)

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 1/5] mm/slab: kmalloc: pass requests larger than order-1 page to page allocator
  2022-03-04  6:34 ` [PATCH v2 1/5] mm/slab: kmalloc: pass requests larger than order-1 page to page allocator Hyeonggon Yoo
@ 2022-03-04 12:45   ` Vlastimil Babka
  2022-03-05  5:10     ` Hyeonggon Yoo
  0 siblings, 1 reply; 21+ messages in thread
From: Vlastimil Babka @ 2022-03-04 12:45 UTC (permalink / raw)
  To: Hyeonggon Yoo, linux-mm
  Cc: Christoph Lameter, Pekka Enberg, David Rientjes, Joonsoo Kim,
	Andrew Morton, Marco Elver, Matthew WilCox, Roman Gushchin,
	linux-kernel

On 3/4/22 07:34, Hyeonggon Yoo wrote:
> There is not much benefit for serving large objects in kmalloc().
> Let's pass large requests to page allocator like SLUB for better
> maintenance of common code.
> 
> Signed-off-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>

Looks like it's on the right path! Some suggestions follow.

> ---
>  include/linux/slab.h | 35 ++++++++++++++++-------------------
>  mm/slab.c            | 31 +++++++++++++++++++++++++++----
>  mm/slab.h            | 19 +++++++++++++++++++
>  mm/slub.c            | 19 -------------------
>  4 files changed, 62 insertions(+), 42 deletions(-)
> 
> diff --git a/include/linux/slab.h b/include/linux/slab.h
> index 37bde99b74af..e7b3330db4f3 100644
> --- a/include/linux/slab.h
> +++ b/include/linux/slab.h
> @@ -224,29 +224,19 @@ void kmem_dump_obj(void *object);
>   * Kmalloc array related definitions
>   */
>  
> -#ifdef CONFIG_SLAB
>  /*
> - * The largest kmalloc size supported by the SLAB allocators is
> - * 32 megabyte (2^25) or the maximum allocatable page order if that is
> - * less than 32 MB.
> - *
> - * WARNING: Its not easy to increase this value since the allocators have
> - * to do various tricks to work around compiler limitations in order to
> - * ensure proper constant folding.
> + * SLAB and SLUB directly allocates requests fitting in to an order-1 page
> + * (PAGE_SIZE*2).  Larger requests are passed to the page allocator.
>   */
> -#define KMALLOC_SHIFT_HIGH	((MAX_ORDER + PAGE_SHIFT - 1) <= 25 ? \
> -				(MAX_ORDER + PAGE_SHIFT - 1) : 25)
> -#define KMALLOC_SHIFT_MAX	KMALLOC_SHIFT_HIGH
> +#ifdef CONFIG_SLAB
> +#define KMALLOC_SHIFT_HIGH	(PAGE_SHIFT + 1)
> +#define KMALLOC_SHIFT_MAX	(MAX_ORDER + PAGE_SHIFT - 1)

I think we should be able to also remove the larger sizes from
__kmalloc_index() later in this file.

>  #ifndef KMALLOC_SHIFT_LOW
>  #define KMALLOC_SHIFT_LOW	5
>  #endif
>  #endif
>  
>  #ifdef CONFIG_SLUB
> -/*
> - * SLUB directly allocates requests fitting in to an order-1 page
> - * (PAGE_SIZE*2).  Larger requests are passed to the page allocator.
> - */
>  #define KMALLOC_SHIFT_HIGH	(PAGE_SHIFT + 1)
>  #define KMALLOC_SHIFT_MAX	(MAX_ORDER + PAGE_SHIFT - 1)
>  #ifndef KMALLOC_SHIFT_LOW
> @@ -564,15 +554,15 @@ static __always_inline __alloc_size(1) void *kmalloc_large(size_t size, gfp_t fl
>   *	Try really hard to succeed the allocation but fail
>   *	eventually.
>   */
> +#ifndef CONFIG_SLOB
>  static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags)
>  {
>  	if (__builtin_constant_p(size)) {
> -#ifndef CONFIG_SLOB
>  		unsigned int index;
> -#endif
> +
>  		if (size > KMALLOC_MAX_CACHE_SIZE)
>  			return kmalloc_large(size, flags);
> -#ifndef CONFIG_SLOB
> +
>  		index = kmalloc_index(size);
>  
>  		if (!index)
> @@ -581,10 +571,17 @@ static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags)
>  		return kmem_cache_alloc_trace(
>  				kmalloc_caches[kmalloc_type(flags)][index],
>  				flags, size);
> -#endif
>  	}
>  	return __kmalloc(size, flags);
>  }
> +#else
> +static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags)
> +{
> +	if (__builtin_constant_p(size) && size > KMALLOC_MAX_CACHE_SIZE)
> +		return kmalloc_large(size, flags);
> +	return __kmalloc(size, flags);
> +}
> +#endif
>  
>  static __always_inline __alloc_size(1) void *kmalloc_node(size_t size, gfp_t flags, int node)
>  {
> diff --git a/mm/slab.c b/mm/slab.c
> index ddf5737c63d9..570af6dc3478 100644
> --- a/mm/slab.c
> +++ b/mm/slab.c
> @@ -3624,7 +3624,8 @@ __do_kmalloc_node(size_t size, gfp_t flags, int node, unsigned long caller)
>  	void *ret;
>  
>  	if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
> -		return NULL;
> +		return kmalloc_large(size, flags);

This loses the node information and NUMA locality. It would be better to
generalize kmalloc_large_node() from mm/slub.c instead.

> +
>  	cachep = kmalloc_slab(size, flags);
>  	if (unlikely(ZERO_OR_NULL_PTR(cachep)))
>  		return cachep;
> @@ -3685,7 +3686,8 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
>  	void *ret;
>  
>  	if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
> -		return NULL;
> +		return kmalloc_large(size, flags);
> +
>  	cachep = kmalloc_slab(size, flags);
>  	if (unlikely(ZERO_OR_NULL_PTR(cachep)))
>  		return cachep;
> @@ -3739,14 +3741,21 @@ void kmem_cache_free_bulk(struct kmem_cache *orig_s, size_t size, void **p)
>  {
>  	struct kmem_cache *s;
>  	size_t i;
> +	struct folio *folio;
>  
>  	local_irq_disable();
>  	for (i = 0; i < size; i++) {
>  		void *objp = p[i];
>  
> -		if (!orig_s) /* called via kfree_bulk */
> +		if (!orig_s) {
> +			/* called via kfree_bulk */
> +			folio = virt_to_folio(objp);
> +			if (unlikely(!folio_test_slab(folio))) {
> +				free_large_kmalloc(folio, objp);

Hmm I'm not sure it's a good idea to call this and the page allocator with
disabled irq's. Maybe simply re-enable around it?

> +				continue;
> +			}
>  			s = virt_to_cache(objp);

Since we already have the folio, we shoold instead use folio_slab and
slab->slab_cache (see how mm/slub.c does it).
The same applies to functions below as well.

> -		else
> +		} else
>  			s = cache_from_obj(orig_s, objp);
>  		if (!s)
>  			continue;
> @@ -3776,11 +3785,20 @@ void kfree(const void *objp)
>  {
>  	struct kmem_cache *c;
>  	unsigned long flags;
> +	struct folio *folio;
> +	void *object = (void *) objp;
>  
>  	trace_kfree(_RET_IP_, objp);
>  
>  	if (unlikely(ZERO_OR_NULL_PTR(objp)))
>  		return;
> +
> +	folio = virt_to_folio(objp);
> +	if (unlikely(!folio_test_slab(folio))) {
> +		free_large_kmalloc(folio, object);
> +		return;
> +	}
> +
>  	local_irq_save(flags);
>  	kfree_debugcheck(objp);
>  	c = virt_to_cache(objp);
> @@ -4211,12 +4229,17 @@ void __check_heap_object(const void *ptr, unsigned long n,
>  size_t __ksize(const void *objp)
>  {
>  	struct kmem_cache *c;
> +	struct folio *folio;
>  	size_t size;
>  
>  	BUG_ON(!objp);
>  	if (unlikely(objp == ZERO_SIZE_PTR))
>  		return 0;
>  
> +	folio = virt_to_folio(objp);
> +	if (!folio_test_slab(folio))
> +		return folio_size(folio);
> +
>  	c = virt_to_cache(objp);
>  	size = c ? c->object_size : 0;
>  
> diff --git a/mm/slab.h b/mm/slab.h
> index c7f2abc2b154..31e98beb47a3 100644
> --- a/mm/slab.h
> +++ b/mm/slab.h
> @@ -664,6 +664,25 @@ static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
>  		print_tracking(cachep, x);
>  	return cachep;
>  }
> +
> +static __always_inline void kfree_hook(void *x)
> +{
> +	kmemleak_free(x);
> +	kasan_kfree_large(x);

Looks like there's only one caller of kfree_hook() so we could do that
directly there.

> +}
> +
> +static inline void free_large_kmalloc(struct folio *folio, void *object)
> +{
> +	unsigned int order = folio_order(folio);
> +
> +	if (WARN_ON_ONCE(order == 0))
> +		pr_warn_once("object pointer: 0x%p\n", object);
> +
> +	kfree_hook(object);
> +	mod_lruvec_page_state(folio_page(folio, 0), NR_SLAB_UNRECLAIMABLE_B,
> +			      -(PAGE_SIZE << order));
> +	__free_pages(folio_page(folio, 0), order);
> +}
>  #endif /* CONFIG_SLOB */
>  
>  static inline size_t slab_ksize(const struct kmem_cache *s)
> diff --git a/mm/slub.c b/mm/slub.c
> index 261474092e43..04fd084f4709 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -1686,12 +1686,6 @@ static inline void *kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags)
>  	return ptr;
>  }
>  
> -static __always_inline void kfree_hook(void *x)
> -{
> -	kmemleak_free(x);
> -	kasan_kfree_large(x);
> -}
> -
>  static __always_inline bool slab_free_hook(struct kmem_cache *s,
>  						void *x, bool init)
>  {
> @@ -3535,19 +3529,6 @@ struct detached_freelist {
>  	struct kmem_cache *s;
>  };
>  
> -static inline void free_large_kmalloc(struct folio *folio, void *object)
> -{
> -	unsigned int order = folio_order(folio);
> -
> -	if (WARN_ON_ONCE(order == 0))
> -		pr_warn_once("object pointer: 0x%p\n", object);
> -
> -	kfree_hook(object);
> -	mod_lruvec_page_state(folio_page(folio, 0), NR_SLAB_UNRECLAIMABLE_B,
> -			      -(PAGE_SIZE << order));
> -	__free_pages(folio_page(folio, 0), order);
> -}
> -
>  /*
>   * This function progressively scans the array with free objects (with
>   * a limited look ahead) and extract objects belonging to the same


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 0/5] slab cleanups
  2022-03-04 12:02   ` Hyeonggon Yoo
@ 2022-03-04 13:11     ` Marco Elver
  2022-03-04 16:42       ` Vlastimil Babka
  2022-03-05  4:00       ` Hyeonggon Yoo
  0 siblings, 2 replies; 21+ messages in thread
From: Marco Elver @ 2022-03-04 13:11 UTC (permalink / raw)
  To: Hyeonggon Yoo
  Cc: linux-mm, Christoph Lameter, Pekka Enberg, David Rientjes,
	Joonsoo Kim, Andrew Morton, Vlastimil Babka, Matthew WilCox,
	Roman Gushchin, linux-kernel

On Fri, 4 Mar 2022 at 13:02, Hyeonggon Yoo <42.hyeyoo@gmail.com> wrote:
>
> On Fri, Mar 04, 2022 at 12:50:21PM +0100, Marco Elver wrote:
> > On Fri, 4 Mar 2022 at 07:34, Hyeonggon Yoo <42.hyeyoo@gmail.com> wrote:
> > >
> > > Changes from v1:
> > >         Now SLAB passes requests larger than order-1 page
> > >         to page allocator.
> > >
> > >         Adjusted comments from Matthew, Vlastimil, Rientjes.
> > >         Thank you for feedback!
> > >
> > >         BTW, I have no idea what __ksize() should return when an object that
> > >         is not allocated from slab is passed. both 0 and folio_size()
> > >         seems wrong to me.
> >
> > Didn't we say 0 would be the safer of the two options?
> > https://lkml.kernel.org/r/0e02416f-ef43-dc8a-9e8e-50ff63dd3c61@suse.cz
> >
>
> Oh sorry, I didn't understand why 0 was safer when I was reading it.
>
> Reading again, 0 is safer because kasan does not unpoison for
> wrongly passed object, right?

Not quite. KASAN can tell if something is wrong, i.e. invalid object.
Similarly, if you are able to tell if the passed pointer is not a
valid object some other way, you can do something better - namely,
return 0. The intuition here is that the caller has a pointer to an
invalid object, and wants to use ksize() to determine its size, and
most likely access all those bytes. Arguably, at that point the kernel
is already in a degrading state. But we can try to not let things get
worse by having ksize() return 0, in the hopes that it will stop
corrupting more memory. It won't work in all cases, but should avoid
things like "s = ksize(obj); touch_all_bytes(obj, s)" where the size
bounds the memory accessed corrupting random memory.

The other reason is that a caller could actually check the size, and
if 0, do something else. Few callers will do so, because nobody
expects that their code has a bug. :-)

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 0/5] slab cleanups
  2022-03-04 13:11     ` Marco Elver
@ 2022-03-04 16:42       ` Vlastimil Babka
  2022-03-04 16:45         ` Marco Elver
  2022-03-05  4:00       ` Hyeonggon Yoo
  1 sibling, 1 reply; 21+ messages in thread
From: Vlastimil Babka @ 2022-03-04 16:42 UTC (permalink / raw)
  To: Marco Elver, Hyeonggon Yoo
  Cc: linux-mm, Christoph Lameter, Pekka Enberg, David Rientjes,
	Joonsoo Kim, Andrew Morton, Matthew WilCox, Roman Gushchin,
	linux-kernel

On 3/4/22 14:11, Marco Elver wrote:
> On Fri, 4 Mar 2022 at 13:02, Hyeonggon Yoo <42.hyeyoo@gmail.com> wrote:
>>
>> On Fri, Mar 04, 2022 at 12:50:21PM +0100, Marco Elver wrote:
>> > On Fri, 4 Mar 2022 at 07:34, Hyeonggon Yoo <42.hyeyoo@gmail.com> wrote:
>> > >
>> > > Changes from v1:
>> > >         Now SLAB passes requests larger than order-1 page
>> > >         to page allocator.
>> > >
>> > >         Adjusted comments from Matthew, Vlastimil, Rientjes.
>> > >         Thank you for feedback!
>> > >
>> > >         BTW, I have no idea what __ksize() should return when an object that
>> > >         is not allocated from slab is passed. both 0 and folio_size()
>> > >         seems wrong to me.
>> >
>> > Didn't we say 0 would be the safer of the two options?
>> > https://lkml.kernel.org/r/0e02416f-ef43-dc8a-9e8e-50ff63dd3c61@suse.cz
>> >
>>
>> Oh sorry, I didn't understand why 0 was safer when I was reading it.
>>
>> Reading again, 0 is safer because kasan does not unpoison for
>> wrongly passed object, right?
> 
> Not quite. KASAN can tell if something is wrong, i.e. invalid object.
> Similarly, if you are able to tell if the passed pointer is not a
> valid object some other way, you can do something better - namely,
> return 0.

Hmm, but how paranoid do we have to be? Patch 1 converts SLAB to use
kmalloc_large(). So it's now legitimate to have objects allocated by SLAB's
kmalloc() that don't have a slab folio flag set, and their size is
folio_size(). It would be more common than getting a bogus pointer, so
should we return 0 just because a bogus pointer is possible? If we do that,
then KASAN will fail to unpoison legitimate kmalloc_large() objects, no?
What I suggested earlier is we could make the checks more precise - if
folio_size() is smaller or equal order-1 page, then it's bogus because we
only do kmalloc_large() for >order-1. If the object pointer is not to the
beginning of the folio, then it's bogus, because kmalloc_large() returns the
beginning of the folio. Then in these case we return 0, but otherwise we
should return folio_size()?

> The intuition here is that the caller has a pointer to an
> invalid object, and wants to use ksize() to determine its size, and
> most likely access all those bytes. Arguably, at that point the kernel
> is already in a degrading state. But we can try to not let things get
> worse by having ksize() return 0, in the hopes that it will stop
> corrupting more memory. It won't work in all cases, but should avoid
> things like "s = ksize(obj); touch_all_bytes(obj, s)" where the size
> bounds the memory accessed corrupting random memory.
> 
> The other reason is that a caller could actually check the size, and
> if 0, do something else. Few callers will do so, because nobody
> expects that their code has a bug. :-)


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 0/5] slab cleanups
  2022-03-04 16:42       ` Vlastimil Babka
@ 2022-03-04 16:45         ` Marco Elver
  0 siblings, 0 replies; 21+ messages in thread
From: Marco Elver @ 2022-03-04 16:45 UTC (permalink / raw)
  To: Vlastimil Babka
  Cc: Hyeonggon Yoo, linux-mm, Christoph Lameter, Pekka Enberg,
	David Rientjes, Joonsoo Kim, Andrew Morton, Matthew WilCox,
	Roman Gushchin, linux-kernel

On Fri, 4 Mar 2022 at 17:42, Vlastimil Babka <vbabka@suse.cz> wrote:
>
> On 3/4/22 14:11, Marco Elver wrote:
> > On Fri, 4 Mar 2022 at 13:02, Hyeonggon Yoo <42.hyeyoo@gmail.com> wrote:
> >>
> >> On Fri, Mar 04, 2022 at 12:50:21PM +0100, Marco Elver wrote:
> >> > On Fri, 4 Mar 2022 at 07:34, Hyeonggon Yoo <42.hyeyoo@gmail.com> wrote:
> >> > >
> >> > > Changes from v1:
> >> > >         Now SLAB passes requests larger than order-1 page
> >> > >         to page allocator.
> >> > >
> >> > >         Adjusted comments from Matthew, Vlastimil, Rientjes.
> >> > >         Thank you for feedback!
> >> > >
> >> > >         BTW, I have no idea what __ksize() should return when an object that
> >> > >         is not allocated from slab is passed. both 0 and folio_size()
> >> > >         seems wrong to me.
> >> >
> >> > Didn't we say 0 would be the safer of the two options?
> >> > https://lkml.kernel.org/r/0e02416f-ef43-dc8a-9e8e-50ff63dd3c61@suse.cz
> >> >
> >>
> >> Oh sorry, I didn't understand why 0 was safer when I was reading it.
> >>
> >> Reading again, 0 is safer because kasan does not unpoison for
> >> wrongly passed object, right?
> >
> > Not quite. KASAN can tell if something is wrong, i.e. invalid object.
> > Similarly, if you are able to tell if the passed pointer is not a
> > valid object some other way, you can do something better - namely,
> > return 0.
>
> Hmm, but how paranoid do we have to be? Patch 1 converts SLAB to use
> kmalloc_large(). So it's now legitimate to have objects allocated by SLAB's
> kmalloc() that don't have a slab folio flag set, and their size is
> folio_size(). It would be more common than getting a bogus pointer, so
> should we return 0 just because a bogus pointer is possible?

No of course not, which is why I asked in the earlier email if it's a
"definitive failure case".

> If we do that,
> then KASAN will fail to unpoison legitimate kmalloc_large() objects, no?
> What I suggested earlier is we could make the checks more precise - if
> folio_size() is smaller or equal order-1 page, then it's bogus because we
> only do kmalloc_large() for >order-1. If the object pointer is not to the
> beginning of the folio, then it's bogus, because kmalloc_large() returns the
> beginning of the folio. Then in these case we return 0, but otherwise we
> should return folio_size()?
>
> > The intuition here is that the caller has a pointer to an
> > invalid object, and wants to use ksize() to determine its size, and
> > most likely access all those bytes. Arguably, at that point the kernel
> > is already in a degrading state. But we can try to not let things get
> > worse by having ksize() return 0, in the hopes that it will stop
> > corrupting more memory. It won't work in all cases, but should avoid
> > things like "s = ksize(obj); touch_all_bytes(obj, s)" where the size
> > bounds the memory accessed corrupting random memory.
> >
> > The other reason is that a caller could actually check the size, and
> > if 0, do something else. Few callers will do so, because nobody
> > expects that their code has a bug. :-)
>

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 2/5] mm/sl[au]b: unify __ksize()
  2022-03-04  6:34 ` [PATCH v2 2/5] mm/sl[au]b: unify __ksize() Hyeonggon Yoo
@ 2022-03-04 18:25   ` Vlastimil Babka
  2022-03-05  4:02     ` Hyeonggon Yoo
  0 siblings, 1 reply; 21+ messages in thread
From: Vlastimil Babka @ 2022-03-04 18:25 UTC (permalink / raw)
  To: Hyeonggon Yoo, linux-mm
  Cc: Christoph Lameter, Pekka Enberg, David Rientjes, Joonsoo Kim,
	Andrew Morton, Marco Elver, Matthew WilCox, Roman Gushchin,
	linux-kernel

On 3/4/22 07:34, Hyeonggon Yoo wrote:
> Now that SLAB passes large requests to page allocator like SLUB,
> Unify __ksize(). Only SLOB need to implement own version of __ksize()
> because it stores size in object header for kmalloc objects.
> 
> Signed-off-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>

Reviewed-by: Vlastimil Babka <vbabka@suse.cz>

As discussed, we can be more specific about the !folio_test_slab() case, but
that can be done on top.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 3/5] mm/sl[auo]b: move definition of __ksize() to mm/slab.h
  2022-03-04  6:34 ` [PATCH v2 3/5] mm/sl[auo]b: move definition of __ksize() to mm/slab.h Hyeonggon Yoo
@ 2022-03-04 18:29   ` Vlastimil Babka
  2022-03-05  4:03     ` Hyeonggon Yoo
  0 siblings, 1 reply; 21+ messages in thread
From: Vlastimil Babka @ 2022-03-04 18:29 UTC (permalink / raw)
  To: Hyeonggon Yoo, linux-mm
  Cc: Christoph Lameter, Pekka Enberg, David Rientjes, Joonsoo Kim,
	Andrew Morton, Marco Elver, Matthew WilCox, Roman Gushchin,
	linux-kernel

On 3/4/22 07:34, Hyeonggon Yoo wrote:
> __ksize() is only called by KASAN. Remove export symbol and move
> definition to mm/slab.h as we don't want to grow its callers.
> 
> [ willy@infradead.org: Move definition to mm/slab.h and reduce comments ]
> 
> Signed-off-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>

Reviewed-by: Vlastimil Babka <vbabka@suse.cz>

> --- a/mm/slab.h
> +++ b/mm/slab.h
> @@ -685,6 +685,8 @@ static inline void free_large_kmalloc(struct folio *folio, void *object)
>  }
>  #endif /* CONFIG_SLOB */
>  
> +size_t __ksize(const void *objp);
> +
>  static inline size_t slab_ksize(const struct kmem_cache *s)
>  {
>  #ifndef CONFIG_SLUB
> diff --git a/mm/slab_common.c b/mm/slab_common.c
> index 1d2f92e871d2..b126fc7247b9 100644
> --- a/mm/slab_common.c
> +++ b/mm/slab_common.c
> @@ -1247,13 +1247,7 @@ EXPORT_SYMBOL(kfree_sensitive);
>  
>  #ifndef CONFIG_SLOB
>  /**

Maybe just /* so it's not even parsed as a kernel-doc anymore?

> - * __ksize -- Uninstrumented ksize.
> - * @objp: pointer to the object
> - *
> - * Unlike ksize(), __ksize() is uninstrumented, and does not provide the same
> - * safety checks as ksize() with KASAN instrumentation enabled.
> - *
> - * Return: size of the actual memory used by @objp in bytes
> + * __ksize -- Uninstrumented ksize. Only called by KASAN.
>   */
>  size_t __ksize(const void *object)
>  {
> @@ -1269,7 +1263,6 @@ size_t __ksize(const void *object)
>  
>  	return slab_ksize(folio_slab(folio)->slab_cache);
>  }
> -EXPORT_SYMBOL(__ksize);
>  #endif
>  

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 4/5] mm/slub: limit number of node partial slabs only in cache creation
  2022-03-04  6:34 ` [PATCH v2 4/5] mm/slub: limit number of node partial slabs only in cache creation Hyeonggon Yoo
@ 2022-03-04 18:33   ` Vlastimil Babka
  0 siblings, 0 replies; 21+ messages in thread
From: Vlastimil Babka @ 2022-03-04 18:33 UTC (permalink / raw)
  To: Hyeonggon Yoo, linux-mm
  Cc: Christoph Lameter, Pekka Enberg, David Rientjes, Joonsoo Kim,
	Andrew Morton, Marco Elver, Matthew WilCox, Roman Gushchin,
	linux-kernel

On 3/4/22 07:34, Hyeonggon Yoo wrote:
> SLUB sets number of minimum partial slabs for node (min_partial)
> using set_min_partial(). SLUB holds at least min_partial slabs even if
> they're empty to avoid excessive use of page allocator.
> 
> set_min_partial() limits value of min_partial limits value of
> min_partial MIN_PARTIAL and MAX_PARTIAL. As set_min_partial() can be
> called by min_partial_store() too, Only limit value of min_partial
> in kmem_cache_open() so that it can be changed to value that a user wants.
> 
> [ rientjes@google.com: Fold set_min_partial() into its callers ]
> 
> Signed-off-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>

Reviewed-by: Vlastimil Babka <vbabka@suse.cz>

> ---
>  mm/slub.c | 14 +++-----------
>  1 file changed, 3 insertions(+), 11 deletions(-)
> 
> diff --git a/mm/slub.c b/mm/slub.c
> index 6f0ebadd8f30..f9ae983a3dc6 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -3981,15 +3981,6 @@ static int init_kmem_cache_nodes(struct kmem_cache *s)
>  	return 1;
>  }
>  
> -static void set_min_partial(struct kmem_cache *s, unsigned long min)
> -{
> -	if (min < MIN_PARTIAL)
> -		min = MIN_PARTIAL;
> -	else if (min > MAX_PARTIAL)
> -		min = MAX_PARTIAL;
> -	s->min_partial = min;
> -}
> -
>  static void set_cpu_partial(struct kmem_cache *s)
>  {
>  #ifdef CONFIG_SLUB_CPU_PARTIAL
> @@ -4196,7 +4187,8 @@ static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags)
>  	 * The larger the object size is, the more slabs we want on the partial
>  	 * list to avoid pounding the page allocator excessively.
>  	 */
> -	set_min_partial(s, ilog2(s->size) / 2);
> +	s->min_partial = min_t(unsigned long, MAX_PARTIAL, ilog2(s->size) / 2);
> +	s->min_partial = max_t(unsigned long, MIN_PARTIAL, s->min_partial);
>  
>  	set_cpu_partial(s);
>  
> @@ -5361,7 +5353,7 @@ static ssize_t min_partial_store(struct kmem_cache *s, const char *buf,
>  	if (err)
>  		return err;
>  
> -	set_min_partial(s, min);
> +	s->min_partial = min;
>  	return length;
>  }
>  SLAB_ATTR(min_partial);


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 5/5] mm/slub: refactor deactivate_slab()
  2022-03-04  6:34 ` [PATCH v2 5/5] mm/slub: refactor deactivate_slab() Hyeonggon Yoo
@ 2022-03-04 19:01   ` Vlastimil Babka
  2022-03-05  4:21     ` Hyeonggon Yoo
  0 siblings, 1 reply; 21+ messages in thread
From: Vlastimil Babka @ 2022-03-04 19:01 UTC (permalink / raw)
  To: Hyeonggon Yoo, linux-mm
  Cc: Christoph Lameter, Pekka Enberg, David Rientjes, Joonsoo Kim,
	Andrew Morton, Marco Elver, Matthew WilCox, Roman Gushchin,
	linux-kernel

On 3/4/22 07:34, Hyeonggon Yoo wrote:
> Simplify deactivate_slab() by unlocking n->list_lock and retrying
> cmpxchg_double() when cmpxchg_double() fails, and perform
> add_{partial,full} only when it succeed.
> 
> Releasing and taking n->list_lock again here is not harmful as SLUB
> avoids deactivating slabs as much as possible.
> 
> [ vbabka@suse.cz: perform add_{partial,full} when cmpxchg_double()
>   succeed. ]
> 
> Signed-off-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>

Looks good, just noticed a tiny issue.

> ---
>  mm/slub.c | 81 ++++++++++++++++++++++---------------------------------
>  1 file changed, 32 insertions(+), 49 deletions(-)
> 
> diff --git a/mm/slub.c b/mm/slub.c
> index f9ae983a3dc6..c1a693ec5874 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -2344,8 +2344,8 @@ static void deactivate_slab(struct kmem_cache *s, struct slab *slab,
>  {
>  	enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE };
>  	struct kmem_cache_node *n = get_node(s, slab_nid(slab));
> -	int lock = 0, free_delta = 0;
> -	enum slab_modes l = M_NONE, m = M_NONE;
> +	int free_delta = 0;
> +	enum slab_modes mode = M_NONE;
>  	void *nextfree, *freelist_iter, *freelist_tail;
>  	int tail = DEACTIVATE_TO_HEAD;
>  	unsigned long flags = 0;
> @@ -2387,14 +2387,10 @@ static void deactivate_slab(struct kmem_cache *s, struct slab *slab,
>  	 * Ensure that the slab is unfrozen while the list presence
>  	 * reflects the actual number of objects during unfreeze.
>  	 *
> -	 * We setup the list membership and then perform a cmpxchg
> -	 * with the count. If there is a mismatch then the slab
> -	 * is not unfrozen but the slab is on the wrong list.
> -	 *
> -	 * Then we restart the process which may have to remove
> -	 * the slab from the list that we just put it on again
> -	 * because the number of objects in the slab may have
> -	 * changed.
> +	 * We first perform cmpxchg holding lock and insert to list
> +	 * when it succeed. If there is mismatch then slub is not
> +	 * unfrozen and number of objects in the slab may have changed.
> +	 * Then release lock and retry cmpxchg again.
>  	 */
>  redo:
>  
> @@ -2414,57 +2410,44 @@ static void deactivate_slab(struct kmem_cache *s, struct slab *slab,
>  	new.frozen = 0;
>  
>  	if (!new.inuse && n->nr_partial >= s->min_partial)
> -		m = M_FREE;
> +		mode = M_FREE;
>  	else if (new.freelist) {
> -		m = M_PARTIAL;
> -		if (!lock) {
> -			lock = 1;
> -			/*
> -			 * Taking the spinlock removes the possibility that
> -			 * acquire_slab() will see a slab that is frozen
> -			 */
> -			spin_lock_irqsave(&n->list_lock, flags);
> -		}
> -	} else {
> -		m = M_FULL;
> -		if (kmem_cache_debug_flags(s, SLAB_STORE_USER) && !lock) {

This used to set m = M_FULL; always.

> -			lock = 1;
> -			/*
> -			 * This also ensures that the scanning of full
> -			 * slabs from diagnostic functions will not see
> -			 * any frozen slabs.
> -			 */
> -			spin_lock_irqsave(&n->list_lock, flags);
> -		}
> +		mode = M_PARTIAL;
> +		/*
> +		 * Taking the spinlock removes the possibility that
> +		 * acquire_slab() will see a slab that is frozen
> +		 */
> +		spin_lock_irqsave(&n->list_lock, flags);
> +	} else if (kmem_cache_debug_flags(s, SLAB_STORE_USER)) {
> +		mode = M_FULL;

Now you only set it for SLAB_STORE_USER caches.

> +		/*
> +		 * This also ensures that the scanning of full
> +		 * slabs from diagnostic functions will not see
> +		 * any frozen slabs.
> +		 */
> +		spin_lock_irqsave(&n->list_lock, flags);
>  	}
>  
> -	if (l != m) {
> -		if (l == M_PARTIAL)
> -			remove_partial(n, slab);
> -		else if (l == M_FULL)
> -			remove_full(s, n, slab);
> -
> -		if (m == M_PARTIAL)
> -			add_partial(n, slab, tail);
> -		else if (m == M_FULL)
> -			add_full(s, n, slab);
> -	}
>  
> -	l = m;
>  	if (!cmpxchg_double_slab(s, slab,
>  				old.freelist, old.counters,
>  				new.freelist, new.counters,
> -				"unfreezing slab"))
> +				"unfreezing slab")) {
> +		if (mode == M_PARTIAL || mode == M_FULL)
> +			spin_unlock_irqrestore(&n->list_lock, flags);
>  		goto redo;
> +	}
>  
> -	if (lock)
> -		spin_unlock_irqrestore(&n->list_lock, flags);
>  
> -	if (m == M_PARTIAL)
> +	if (mode == M_PARTIAL) {
> +		add_partial(n, slab, tail);
> +		spin_unlock_irqrestore(&n->list_lock, flags);
>  		stat(s, tail);
> -	else if (m == M_FULL)
> +	} else if (mode == M_FULL) {
> +		add_full(s, n, slab);
> +		spin_unlock_irqrestore(&n->list_lock, flags);
>  		stat(s, DEACTIVATE_FULL);

As a result, full slabs without SLAB_STORE_USER will not count
DEACTIVATE_FULL anymore.
I guess the easiest way to solve it is to e.g. add a M_FULL_NOLIST mode that
only does the DEACTIVATE_FULL counting.

> -	else if (m == M_FREE) {
> +	} else if (mode == M_FREE) {
>  		stat(s, DEACTIVATE_EMPTY);
>  		discard_slab(s, slab);
>  		stat(s, FREE_SLAB);


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 0/5] slab cleanups
  2022-03-04 13:11     ` Marco Elver
  2022-03-04 16:42       ` Vlastimil Babka
@ 2022-03-05  4:00       ` Hyeonggon Yoo
  1 sibling, 0 replies; 21+ messages in thread
From: Hyeonggon Yoo @ 2022-03-05  4:00 UTC (permalink / raw)
  To: Marco Elver
  Cc: linux-mm, Christoph Lameter, Pekka Enberg, David Rientjes,
	Joonsoo Kim, Andrew Morton, Vlastimil Babka, Matthew WilCox,
	Roman Gushchin, linux-kernel

On Fri, Mar 04, 2022 at 02:11:50PM +0100, Marco Elver wrote:
> On Fri, 4 Mar 2022 at 13:02, Hyeonggon Yoo <42.hyeyoo@gmail.com> wrote:
> >
> > On Fri, Mar 04, 2022 at 12:50:21PM +0100, Marco Elver wrote:
> > > On Fri, 4 Mar 2022 at 07:34, Hyeonggon Yoo <42.hyeyoo@gmail.com> wrote:
> > > >
> > > > Changes from v1:
> > > >         Now SLAB passes requests larger than order-1 page
> > > >         to page allocator.
> > > >
> > > >         Adjusted comments from Matthew, Vlastimil, Rientjes.
> > > >         Thank you for feedback!
> > > >
> > > >         BTW, I have no idea what __ksize() should return when an object that
> > > >         is not allocated from slab is passed. both 0 and folio_size()
> > > >         seems wrong to me.
> > >
> > > Didn't we say 0 would be the safer of the two options?
> > > https://lkml.kernel.org/r/0e02416f-ef43-dc8a-9e8e-50ff63dd3c61@suse.cz
> > >
> >
> > Oh sorry, I didn't understand why 0 was safer when I was reading it.
> >
> > Reading again, 0 is safer because kasan does not unpoison for
> > wrongly passed object, right?
> 
> Not quite. KASAN can tell if something is wrong, i.e. invalid object.
> Similarly, if you are able to tell if the passed pointer is not a
> valid object some other way, you can do something better - namely,
> return 0.
>
> The intuition here is that the caller has a pointer to an
> invalid object, and wants to use ksize() to determine its size, and
> most likely access all those bytes. Arguably, at that point the kernel
> is already in a degrading state. But we can try to not let things get
> worse by having ksize() return 0, in the hopes that it will stop
> corrupting more memory. It won't work in all cases, but should avoid
> things like "s = ksize(obj); touch_all_bytes(obj, s)" where the size
> bounds the memory accessed corrupting random memory.

Oh, it's to prevent to corrupt memory further in failure case,
like memset(obj, 0, s);

> The other reason is that a caller could actually check the size, and
> if 0, do something else. Few callers will do so, because nobody
> expects that their code has a bug. :-)

and making it able to check errors by caller.
Thank you so much for kind explanation.

I'll add what Vlastimil suggested in next series. Thanks!

-- 
Thank you, You are awesome!
Hyeonggon :-)

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 2/5] mm/sl[au]b: unify __ksize()
  2022-03-04 18:25   ` Vlastimil Babka
@ 2022-03-05  4:02     ` Hyeonggon Yoo
  0 siblings, 0 replies; 21+ messages in thread
From: Hyeonggon Yoo @ 2022-03-05  4:02 UTC (permalink / raw)
  To: Vlastimil Babka
  Cc: linux-mm, Christoph Lameter, Pekka Enberg, David Rientjes,
	Joonsoo Kim, Andrew Morton, Marco Elver, Matthew WilCox,
	Roman Gushchin, linux-kernel

On Fri, Mar 04, 2022 at 07:25:47PM +0100, Vlastimil Babka wrote:
> On 3/4/22 07:34, Hyeonggon Yoo wrote:
> > Now that SLAB passes large requests to page allocator like SLUB,
> > Unify __ksize(). Only SLOB need to implement own version of __ksize()
> > because it stores size in object header for kmalloc objects.
> > 
> > Signed-off-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
> 
> Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
> 
> As discussed, we can be more specific about the !folio_test_slab() case, but
> that can be done on top.

I'll add that in v3. Thanks!

-- 
Thank you, You are awesome!
Hyeonggon :-)

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 3/5] mm/sl[auo]b: move definition of __ksize() to mm/slab.h
  2022-03-04 18:29   ` Vlastimil Babka
@ 2022-03-05  4:03     ` Hyeonggon Yoo
  0 siblings, 0 replies; 21+ messages in thread
From: Hyeonggon Yoo @ 2022-03-05  4:03 UTC (permalink / raw)
  To: Vlastimil Babka
  Cc: linux-mm, Christoph Lameter, Pekka Enberg, David Rientjes,
	Joonsoo Kim, Andrew Morton, Marco Elver, Matthew WilCox,
	Roman Gushchin, linux-kernel

On Fri, Mar 04, 2022 at 07:29:21PM +0100, Vlastimil Babka wrote:
> On 3/4/22 07:34, Hyeonggon Yoo wrote:
> > __ksize() is only called by KASAN. Remove export symbol and move
> > definition to mm/slab.h as we don't want to grow its callers.
> > 
> > [ willy@infradead.org: Move definition to mm/slab.h and reduce comments ]
> > 
> > Signed-off-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
> 
> Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
>

Thanks!

> > --- a/mm/slab.h
> > +++ b/mm/slab.h
> > @@ -685,6 +685,8 @@ static inline void free_large_kmalloc(struct folio *folio, void *object)
> >  }
> >  #endif /* CONFIG_SLOB */
> >  
> > +size_t __ksize(const void *objp);
> > +
> >  static inline size_t slab_ksize(const struct kmem_cache *s)
> >  {
> >  #ifndef CONFIG_SLUB
> > diff --git a/mm/slab_common.c b/mm/slab_common.c
> > index 1d2f92e871d2..b126fc7247b9 100644
> > --- a/mm/slab_common.c
> > +++ b/mm/slab_common.c
> > @@ -1247,13 +1247,7 @@ EXPORT_SYMBOL(kfree_sensitive);
> >  
> >  #ifndef CONFIG_SLOB
> >  /**
> 
> Maybe just /* so it's not even parsed as a kernel-doc anymore?
>

Oh yes, that would be better.

> > - * __ksize -- Uninstrumented ksize.
> > - * @objp: pointer to the object
> > - *
> > - * Unlike ksize(), __ksize() is uninstrumented, and does not provide the same
> > - * safety checks as ksize() with KASAN instrumentation enabled.
> > - *
> > - * Return: size of the actual memory used by @objp in bytes
> > + * __ksize -- Uninstrumented ksize. Only called by KASAN.
> >   */
> >  size_t __ksize(const void *object)
> >  {
> > @@ -1269,7 +1263,6 @@ size_t __ksize(const void *object)
> >  
> >  	return slab_ksize(folio_slab(folio)->slab_cache);
> >  }
> > -EXPORT_SYMBOL(__ksize);
> >  #endif
> >  

-- 
Thank you, You are awesome!
Hyeonggon :-)

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 5/5] mm/slub: refactor deactivate_slab()
  2022-03-04 19:01   ` Vlastimil Babka
@ 2022-03-05  4:21     ` Hyeonggon Yoo
  0 siblings, 0 replies; 21+ messages in thread
From: Hyeonggon Yoo @ 2022-03-05  4:21 UTC (permalink / raw)
  To: Vlastimil Babka
  Cc: linux-mm, Christoph Lameter, Pekka Enberg, David Rientjes,
	Joonsoo Kim, Andrew Morton, Marco Elver, Matthew WilCox,
	Roman Gushchin, linux-kernel

On Fri, Mar 04, 2022 at 08:01:20PM +0100, Vlastimil Babka wrote:
> On 3/4/22 07:34, Hyeonggon Yoo wrote:
> > Simplify deactivate_slab() by unlocking n->list_lock and retrying
> > cmpxchg_double() when cmpxchg_double() fails, and perform
> > add_{partial,full} only when it succeed.
> > 
> > Releasing and taking n->list_lock again here is not harmful as SLUB
> > avoids deactivating slabs as much as possible.
> > 
> > [ vbabka@suse.cz: perform add_{partial,full} when cmpxchg_double()
> >   succeed. ]
> > 
> > Signed-off-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
> 
> Looks good, just noticed a tiny issue.
> 
> > ---
> >  mm/slub.c | 81 ++++++++++++++++++++++---------------------------------
> >  1 file changed, 32 insertions(+), 49 deletions(-)
> > 
> > diff --git a/mm/slub.c b/mm/slub.c
> > index f9ae983a3dc6..c1a693ec5874 100644
> > --- a/mm/slub.c
> > +++ b/mm/slub.c
> > @@ -2344,8 +2344,8 @@ static void deactivate_slab(struct kmem_cache *s, struct slab *slab,
> >  {
> >  	enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE };
> >  	struct kmem_cache_node *n = get_node(s, slab_nid(slab));
> > -	int lock = 0, free_delta = 0;
> > -	enum slab_modes l = M_NONE, m = M_NONE;
> > +	int free_delta = 0;
> > +	enum slab_modes mode = M_NONE;
> >  	void *nextfree, *freelist_iter, *freelist_tail;
> >  	int tail = DEACTIVATE_TO_HEAD;
> >  	unsigned long flags = 0;
> > @@ -2387,14 +2387,10 @@ static void deactivate_slab(struct kmem_cache *s, struct slab *slab,
> >  	 * Ensure that the slab is unfrozen while the list presence
> >  	 * reflects the actual number of objects during unfreeze.
> >  	 *
> > -	 * We setup the list membership and then perform a cmpxchg
> > -	 * with the count. If there is a mismatch then the slab
> > -	 * is not unfrozen but the slab is on the wrong list.
> > -	 *
> > -	 * Then we restart the process which may have to remove
> > -	 * the slab from the list that we just put it on again
> > -	 * because the number of objects in the slab may have
> > -	 * changed.
> > +	 * We first perform cmpxchg holding lock and insert to list
> > +	 * when it succeed. If there is mismatch then slub is not
> > +	 * unfrozen and number of objects in the slab may have changed.
> > +	 * Then release lock and retry cmpxchg again.
> >  	 */
> >  redo:
> >  
> > @@ -2414,57 +2410,44 @@ static void deactivate_slab(struct kmem_cache *s, struct slab *slab,
> >  	new.frozen = 0;
> >  
> >  	if (!new.inuse && n->nr_partial >= s->min_partial)
> > -		m = M_FREE;
> > +		mode = M_FREE;
> >  	else if (new.freelist) {
> > -		m = M_PARTIAL;
> > -		if (!lock) {
> > -			lock = 1;
> > -			/*
> > -			 * Taking the spinlock removes the possibility that
> > -			 * acquire_slab() will see a slab that is frozen
> > -			 */
> > -			spin_lock_irqsave(&n->list_lock, flags);
> > -		}
> > -	} else {
> > -		m = M_FULL;
> > -		if (kmem_cache_debug_flags(s, SLAB_STORE_USER) && !lock) {
> 
> This used to set m = M_FULL; always.
> 
> > -			lock = 1;
> > -			/*
> > -			 * This also ensures that the scanning of full
> > -			 * slabs from diagnostic functions will not see
> > -			 * any frozen slabs.
> > -			 */
> > -			spin_lock_irqsave(&n->list_lock, flags);
> > -		}
> > +		mode = M_PARTIAL;
> > +		/*
> > +		 * Taking the spinlock removes the possibility that
> > +		 * acquire_slab() will see a slab that is frozen
> > +		 */
> > +		spin_lock_irqsave(&n->list_lock, flags);
> > +	} else if (kmem_cache_debug_flags(s, SLAB_STORE_USER)) {
> > +		mode = M_FULL;
> 
> Now you only set it for SLAB_STORE_USER caches.
> 
> > +		/*
> > +		 * This also ensures that the scanning of full
> > +		 * slabs from diagnostic functions will not see
> > +		 * any frozen slabs.
> > +		 */
> > +		spin_lock_irqsave(&n->list_lock, flags);
> >  	}
> >  
> > -	if (l != m) {
> > -		if (l == M_PARTIAL)
> > -			remove_partial(n, slab);
> > -		else if (l == M_FULL)
> > -			remove_full(s, n, slab);
> > -
> > -		if (m == M_PARTIAL)
> > -			add_partial(n, slab, tail);
> > -		else if (m == M_FULL)
> > -			add_full(s, n, slab);
> > -	}
> >  
> > -	l = m;
> >  	if (!cmpxchg_double_slab(s, slab,
> >  				old.freelist, old.counters,
> >  				new.freelist, new.counters,
> > -				"unfreezing slab"))
> > +				"unfreezing slab")) {
> > +		if (mode == M_PARTIAL || mode == M_FULL)
> > +			spin_unlock_irqrestore(&n->list_lock, flags);
> >  		goto redo;
> > +	}
> >  
> > -	if (lock)
> > -		spin_unlock_irqrestore(&n->list_lock, flags);
> >  
> > -	if (m == M_PARTIAL)
> > +	if (mode == M_PARTIAL) {
> > +		add_partial(n, slab, tail);
> > +		spin_unlock_irqrestore(&n->list_lock, flags);
> >  		stat(s, tail);
> > -	else if (m == M_FULL)
> > +	} else if (mode == M_FULL) {
> > +		add_full(s, n, slab);
> > +		spin_unlock_irqrestore(&n->list_lock, flags);
> >  		stat(s, DEACTIVATE_FULL);
> 
> As a result, full slabs without SLAB_STORE_USER will not count
> DEACTIVATE_FULL anymore.

Oh, thank you for catching this.

We usually only deactivate full slabs for debugging to track all list of slabs.

But as you pointed, in rare case when pfmemalloc flag does not match, full slabs
can be deactivated (even if they are not put on list).

> I guess the easiest way to solve it is to e.g. add a M_FULL_NOLIST mode that
> only does the DEACTIVATE_FULL counting.

That will be enough solution.

Will fix this in v3. Thank you!

> > -	else if (m == M_FREE) {
> > +	} else if (mode == M_FREE) {
> >  		stat(s, DEACTIVATE_EMPTY);
> >  		discard_slab(s, slab);
> >  		stat(s, FREE_SLAB);
> 

-- 
Thank you, You are awesome!
Hyeonggon :-)

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 1/5] mm/slab: kmalloc: pass requests larger than order-1 page to page allocator
  2022-03-04 12:45   ` Vlastimil Babka
@ 2022-03-05  5:10     ` Hyeonggon Yoo
  0 siblings, 0 replies; 21+ messages in thread
From: Hyeonggon Yoo @ 2022-03-05  5:10 UTC (permalink / raw)
  To: Vlastimil Babka
  Cc: linux-mm, Christoph Lameter, Pekka Enberg, David Rientjes,
	Joonsoo Kim, Andrew Morton, Marco Elver, Matthew WilCox,
	Roman Gushchin, linux-kernel

On Fri, Mar 04, 2022 at 01:45:16PM +0100, Vlastimil Babka wrote:
> On 3/4/22 07:34, Hyeonggon Yoo wrote:
> > There is not much benefit for serving large objects in kmalloc().
> > Let's pass large requests to page allocator like SLUB for better
> > maintenance of common code.
> > 
> > Signed-off-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
> 
> Looks like it's on the right path! Some suggestions follow.
> 

Thank you for reviewing this!
You have really good eye.

> > ---
> >  include/linux/slab.h | 35 ++++++++++++++++-------------------
> >  mm/slab.c            | 31 +++++++++++++++++++++++++++----
> >  mm/slab.h            | 19 +++++++++++++++++++
> >  mm/slub.c            | 19 -------------------
> >  4 files changed, 62 insertions(+), 42 deletions(-)
> > 
> > diff --git a/include/linux/slab.h b/include/linux/slab.h
> > index 37bde99b74af..e7b3330db4f3 100644
> > --- a/include/linux/slab.h
> > +++ b/include/linux/slab.h
> > @@ -224,29 +224,19 @@ void kmem_dump_obj(void *object);
> >   * Kmalloc array related definitions
> >   */
> >  
> > -#ifdef CONFIG_SLAB
> >  /*
> > - * The largest kmalloc size supported by the SLAB allocators is
> > - * 32 megabyte (2^25) or the maximum allocatable page order if that is
> > - * less than 32 MB.
> > - *
> > - * WARNING: Its not easy to increase this value since the allocators have
> > - * to do various tricks to work around compiler limitations in order to
> > - * ensure proper constant folding.
> > + * SLAB and SLUB directly allocates requests fitting in to an order-1 page
> > + * (PAGE_SIZE*2).  Larger requests are passed to the page allocator.
> >   */
> > -#define KMALLOC_SHIFT_HIGH	((MAX_ORDER + PAGE_SHIFT - 1) <= 25 ? \
> > -				(MAX_ORDER + PAGE_SHIFT - 1) : 25)
> > -#define KMALLOC_SHIFT_MAX	KMALLOC_SHIFT_HIGH
> > +#ifdef CONFIG_SLAB
> > +#define KMALLOC_SHIFT_HIGH	(PAGE_SHIFT + 1)
> > +#define KMALLOC_SHIFT_MAX	(MAX_ORDER + PAGE_SHIFT - 1)
> 
> I think we should be able to also remove the larger sizes from
> __kmalloc_index() later in this file.
>

Right. we can remove sizes larger than max(PAGE_SHIFT) + 1.

searching using cscope, maximum PAGE_SHIFT is 20 (1MB) in hexagon.
I think we can remove anything larger than 2MB.

But Hmm, I'll add something like static_assert(PAGE_SHIFT <= 20).
Let's see if any architecture/config complain this.

> >  #ifndef KMALLOC_SHIFT_LOW
> >  #define KMALLOC_SHIFT_LOW	5
> >  #endif
> >  #endif
> >  
> >  #ifdef CONFIG_SLUB
> > -/*
> > - * SLUB directly allocates requests fitting in to an order-1 page
> > - * (PAGE_SIZE*2).  Larger requests are passed to the page allocator.
> > - */
> >  #define KMALLOC_SHIFT_HIGH	(PAGE_SHIFT + 1)
> >  #define KMALLOC_SHIFT_MAX	(MAX_ORDER + PAGE_SHIFT - 1)
> >  #ifndef KMALLOC_SHIFT_LOW
> > @@ -564,15 +554,15 @@ static __always_inline __alloc_size(1) void *kmalloc_large(size_t size, gfp_t fl
> >   *	Try really hard to succeed the allocation but fail
> >   *	eventually.
> >   */
> > +#ifndef CONFIG_SLOB
> >  static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags)
> >  {
> >  	if (__builtin_constant_p(size)) {
> > -#ifndef CONFIG_SLOB
> >  		unsigned int index;
> > -#endif
> > +
> >  		if (size > KMALLOC_MAX_CACHE_SIZE)
> >  			return kmalloc_large(size, flags);
> > -#ifndef CONFIG_SLOB
> > +
> >  		index = kmalloc_index(size);
> >  
> >  		if (!index)
> > @@ -581,10 +571,17 @@ static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags)
> >  		return kmem_cache_alloc_trace(
> >  				kmalloc_caches[kmalloc_type(flags)][index],
> >  				flags, size);
> > -#endif
> >  	}
> >  	return __kmalloc(size, flags);
> >  }
> > +#else
> > +static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags)
> > +{
> > +	if (__builtin_constant_p(size) && size > KMALLOC_MAX_CACHE_SIZE)
> > +		return kmalloc_large(size, flags);
> > +	return __kmalloc(size, flags);
> > +}
> > +#endif
> >  
> >  static __always_inline __alloc_size(1) void *kmalloc_node(size_t size, gfp_t flags, int node)
> >  {
> > diff --git a/mm/slab.c b/mm/slab.c
> > index ddf5737c63d9..570af6dc3478 100644
> > --- a/mm/slab.c
> > +++ b/mm/slab.c
> > @@ -3624,7 +3624,8 @@ __do_kmalloc_node(size_t size, gfp_t flags, int node, unsigned long caller)
> >  	void *ret;
> >  
> >  	if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
> > -		return NULL;
> > +		return kmalloc_large(size, flags);
> 
> This loses the node information and NUMA locality.

Oh, my fault. Thank you for catching this.

> It would be better to generalize kmalloc_large_node() from mm/slub.c instead.

Sounds nice.

> > +
> >  	cachep = kmalloc_slab(size, flags);
> >  	if (unlikely(ZERO_OR_NULL_PTR(cachep)))
> >  		return cachep;
> > @@ -3685,7 +3686,8 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
> >  	void *ret;
> >  
> >  	if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
> > -		return NULL;
> > +		return kmalloc_large(size, flags);
> > +
> >  	cachep = kmalloc_slab(size, flags);
> >  	if (unlikely(ZERO_OR_NULL_PTR(cachep)))
> >  		return cachep;
> > @@ -3739,14 +3741,21 @@ void kmem_cache_free_bulk(struct kmem_cache *orig_s, size_t size, void **p)
> >  {
> >  	struct kmem_cache *s;
> >  	size_t i;
> > +	struct folio *folio;
> >  
> >  	local_irq_disable();
> >  	for (i = 0; i < size; i++) {
> >  		void *objp = p[i];
> >  
> > -		if (!orig_s) /* called via kfree_bulk */
> > +		if (!orig_s) {
> > +			/* called via kfree_bulk */
> > +			folio = virt_to_folio(objp);
> > +			if (unlikely(!folio_test_slab(folio))) {
> > +				free_large_kmalloc(folio, objp);
> 
> Hmm I'm not sure it's a good idea to call this and the page allocator with
> disabled irq's. Maybe simply re-enable around it?

Seems reasonable. re-enabling around here will be sufficient for this
function.

By The Way, it seems SLAB really does not care about disabling irq
even if it's unnecessary.

I would like to try reducing irq-disabled area in SLAB and how it
affects on macro/micro benchmarks after this series is done :)

> > +				continue;
> > +			}
> >  			s = virt_to_cache(objp);
> 
> Since we already have the folio, we shoold instead use folio_slab and
> slab->slab_cache (see how mm/slub.c does it).
> The same applies to functions below as well.

Right. I'll change them in v3.

> > -		else
> > +		} else
> >  			s = cache_from_obj(orig_s, objp);
> >  		if (!s)
> >  			continue;
> > @@ -3776,11 +3785,20 @@ void kfree(const void *objp)
> >  {
> >  	struct kmem_cache *c;
> >  	unsigned long flags;
> > +	struct folio *folio;
> > +	void *object = (void *) objp;
> >  
> >  	trace_kfree(_RET_IP_, objp);
> >  
> >  	if (unlikely(ZERO_OR_NULL_PTR(objp)))
> >  		return;
> > +
> > +	folio = virt_to_folio(objp);
> > +	if (unlikely(!folio_test_slab(folio))) {
> > +		free_large_kmalloc(folio, object);
> > +		return;
> > +	}
> > +
> >  	local_irq_save(flags);
> >  	kfree_debugcheck(objp);
> >  	c = virt_to_cache(objp);
> > @@ -4211,12 +4229,17 @@ void __check_heap_object(const void *ptr, unsigned long n,
> >  size_t __ksize(const void *objp)
> >  {
> >  	struct kmem_cache *c;
> > +	struct folio *folio;
> >  	size_t size;
> >  
> >  	BUG_ON(!objp);
> >  	if (unlikely(objp == ZERO_SIZE_PTR))
> >  		return 0;
> >  
> > +	folio = virt_to_folio(objp);
> > +	if (!folio_test_slab(folio))
> > +		return folio_size(folio);
> > +
> >  	c = virt_to_cache(objp);
> >  	size = c ? c->object_size : 0;
> >  
> > diff --git a/mm/slab.h b/mm/slab.h
> > index c7f2abc2b154..31e98beb47a3 100644
> > --- a/mm/slab.h
> > +++ b/mm/slab.h
> > @@ -664,6 +664,25 @@ static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
> >  		print_tracking(cachep, x);
> >  	return cachep;
> >  }
> > +
> > +static __always_inline void kfree_hook(void *x)
> > +{
> > +	kmemleak_free(x);
> > +	kasan_kfree_large(x);
> 
> Looks like there's only one caller of kfree_hook() so we could do that
> directly there.
>

Right.

> > +}
> > +
> > +static inline void free_large_kmalloc(struct folio *folio, void *object)
> > +{
> > +	unsigned int order = folio_order(folio);
> > +
> > +	if (WARN_ON_ONCE(order == 0))
> > +		pr_warn_once("object pointer: 0x%p\n", object);
> > +
> > +	kfree_hook(object);
> > +	mod_lruvec_page_state(folio_page(folio, 0), NR_SLAB_UNRECLAIMABLE_B,
> > +			      -(PAGE_SIZE << order));
> > +	__free_pages(folio_page(folio, 0), order);
> > +}
> >  #endif /* CONFIG_SLOB */
> >  
> >  static inline size_t slab_ksize(const struct kmem_cache *s)
> > diff --git a/mm/slub.c b/mm/slub.c
> > index 261474092e43..04fd084f4709 100644
> > --- a/mm/slub.c
> > +++ b/mm/slub.c
> > @@ -1686,12 +1686,6 @@ static inline void *kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags)
> >  	return ptr;
> >  }
> >  
> > -static __always_inline void kfree_hook(void *x)
> > -{
> > -	kmemleak_free(x);
> > -	kasan_kfree_large(x);
> > -}
> > -
> >  static __always_inline bool slab_free_hook(struct kmem_cache *s,
> >  						void *x, bool init)
> >  {
> > @@ -3535,19 +3529,6 @@ struct detached_freelist {
> >  	struct kmem_cache *s;
> >  };
> >  
> > -static inline void free_large_kmalloc(struct folio *folio, void *object)
> > -{
> > -	unsigned int order = folio_order(folio);
> > -
> > -	if (WARN_ON_ONCE(order == 0))
> > -		pr_warn_once("object pointer: 0x%p\n", object);
> > -
> > -	kfree_hook(object);
> > -	mod_lruvec_page_state(folio_page(folio, 0), NR_SLAB_UNRECLAIMABLE_B,
> > -			      -(PAGE_SIZE << order));
> > -	__free_pages(folio_page(folio, 0), order);
> > -}
> > -
> >  /*
> >   * This function progressively scans the array with free objects (with
> >   * a limited look ahead) and extract objects belonging to the same
>

Thank you so much!

-- 
Thank you, You are awesome!
Hyeonggon :-)

^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2022-03-05  5:10 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-03-04  6:34 [PATCH v2 0/5] slab cleanups Hyeonggon Yoo
2022-03-04  6:34 ` [PATCH v2 1/5] mm/slab: kmalloc: pass requests larger than order-1 page to page allocator Hyeonggon Yoo
2022-03-04 12:45   ` Vlastimil Babka
2022-03-05  5:10     ` Hyeonggon Yoo
2022-03-04  6:34 ` [PATCH v2 2/5] mm/sl[au]b: unify __ksize() Hyeonggon Yoo
2022-03-04 18:25   ` Vlastimil Babka
2022-03-05  4:02     ` Hyeonggon Yoo
2022-03-04  6:34 ` [PATCH v2 3/5] mm/sl[auo]b: move definition of __ksize() to mm/slab.h Hyeonggon Yoo
2022-03-04 18:29   ` Vlastimil Babka
2022-03-05  4:03     ` Hyeonggon Yoo
2022-03-04  6:34 ` [PATCH v2 4/5] mm/slub: limit number of node partial slabs only in cache creation Hyeonggon Yoo
2022-03-04 18:33   ` Vlastimil Babka
2022-03-04  6:34 ` [PATCH v2 5/5] mm/slub: refactor deactivate_slab() Hyeonggon Yoo
2022-03-04 19:01   ` Vlastimil Babka
2022-03-05  4:21     ` Hyeonggon Yoo
2022-03-04 11:50 ` [PATCH v2 0/5] slab cleanups Marco Elver
2022-03-04 12:02   ` Hyeonggon Yoo
2022-03-04 13:11     ` Marco Elver
2022-03-04 16:42       ` Vlastimil Babka
2022-03-04 16:45         ` Marco Elver
2022-03-05  4:00       ` Hyeonggon Yoo

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).