linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 1/3] mm/slub: fix the race between validate_slab and slab_free
@ 2022-07-12  2:28 Rongwei Wang
  2022-07-12  2:28 ` [PATCH v2 2/3] mm/slub: improve consistency of nr_slabs count Rongwei Wang
                   ` (3 more replies)
  0 siblings, 4 replies; 6+ messages in thread
From: Rongwei Wang @ 2022-07-12  2:28 UTC (permalink / raw)
  To: akpm, vbabka, 42.hyeyoo, roman.gushchin, iamjoonsoo.kim,
	rientjes, penberg, cl
  Cc: linux-mm, linux-kernel

In use cases where allocating and freeing slab frequently, some
error messages, such as "Left Redzone overwritten", "First byte
0xbb instead of 0xcc" would be printed when validating slabs.
That's because an object has been filled with SLAB_RED_INACTIVE,
but has not been added to slab's freelist. And between these
two states, the behaviour of validating slab is likely to occur.

Actually, it doesn't mean the slab can not work stably. But, these
confusing messages will disturb slab debugging more or less.

Signed-off-by: Rongwei Wang <rongwei.wang@linux.alibaba.com>
---
 mm/slub.c | 43 +++++++++++++++++++++++++------------------
 1 file changed, 25 insertions(+), 18 deletions(-)

diff --git a/mm/slub.c b/mm/slub.c
index b1281b8654bd..e950d8df8380 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1391,18 +1391,16 @@ static noinline int free_debug_processing(
 	void *head, void *tail, int bulk_cnt,
 	unsigned long addr)
 {
-	struct kmem_cache_node *n = get_node(s, slab_nid(slab));
 	void *object = head;
 	int cnt = 0;
-	unsigned long flags, flags2;
+	unsigned long flags;
 	int ret = 0;
 	depot_stack_handle_t handle = 0;
 
 	if (s->flags & SLAB_STORE_USER)
 		handle = set_track_prepare();
 
-	spin_lock_irqsave(&n->list_lock, flags);
-	slab_lock(slab, &flags2);
+	slab_lock(slab, &flags);
 
 	if (s->flags & SLAB_CONSISTENCY_CHECKS) {
 		if (!check_slab(s, slab))
@@ -1435,8 +1433,7 @@ static noinline int free_debug_processing(
 		slab_err(s, slab, "Bulk freelist count(%d) invalid(%d)\n",
 			 bulk_cnt, cnt);
 
-	slab_unlock(slab, &flags2);
-	spin_unlock_irqrestore(&n->list_lock, flags);
+	slab_unlock(slab, &flags);
 	if (!ret)
 		slab_fix(s, "Object at 0x%p not freed", object);
 	return ret;
@@ -3330,7 +3327,7 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab,
 
 {
 	void *prior;
-	int was_frozen;
+	int was_frozen, to_take_off = 0;
 	struct slab new;
 	unsigned long counters;
 	struct kmem_cache_node *n = NULL;
@@ -3341,14 +3338,23 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab,
 	if (kfence_free(head))
 		return;
 
-	if (kmem_cache_debug(s) &&
-	    !free_debug_processing(s, slab, head, tail, cnt, addr))
-		return;
+	n = get_node(s, slab_nid(slab));
+	if (kmem_cache_debug(s)) {
+		int ret;
 
-	do {
-		if (unlikely(n)) {
+		spin_lock_irqsave(&n->list_lock, flags);
+		ret = free_debug_processing(s, slab, head, tail, cnt, addr);
+		if (!ret) {
 			spin_unlock_irqrestore(&n->list_lock, flags);
-			n = NULL;
+			return;
+		}
+	}
+
+	do {
+		if (unlikely(to_take_off)) {
+			if (!kmem_cache_debug(s))
+				spin_unlock_irqrestore(&n->list_lock, flags);
+			to_take_off = 0;
 		}
 		prior = slab->freelist;
 		counters = slab->counters;
@@ -3369,8 +3375,6 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab,
 				new.frozen = 1;
 
 			} else { /* Needs to be taken off a list */
-
-				n = get_node(s, slab_nid(slab));
 				/*
 				 * Speculatively acquire the list_lock.
 				 * If the cmpxchg does not succeed then we may
@@ -3379,8 +3383,10 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab,
 				 * Otherwise the list_lock will synchronize with
 				 * other processors updating the list of slabs.
 				 */
-				spin_lock_irqsave(&n->list_lock, flags);
+				if (!kmem_cache_debug(s))
+					spin_lock_irqsave(&n->list_lock, flags);
 
+				to_take_off = 1;
 			}
 		}
 
@@ -3389,8 +3395,9 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab,
 		head, new.counters,
 		"__slab_free"));
 
-	if (likely(!n)) {
-
+	if (likely(!to_take_off)) {
+		if (kmem_cache_debug(s))
+			spin_unlock_irqrestore(&n->list_lock, flags);
 		if (likely(was_frozen)) {
 			/*
 			 * The list lock was not taken therefore no list
-- 
2.27.0



^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH v2 2/3] mm/slub: improve consistency of nr_slabs count
  2022-07-12  2:28 [PATCH v2 1/3] mm/slub: fix the race between validate_slab and slab_free Rongwei Wang
@ 2022-07-12  2:28 ` Rongwei Wang
  2022-07-12  2:28 ` [PATCH v2 3/3] mm/slub: delete confusing pr_err when debugging slub Rongwei Wang
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 6+ messages in thread
From: Rongwei Wang @ 2022-07-12  2:28 UTC (permalink / raw)
  To: akpm, vbabka, 42.hyeyoo, roman.gushchin, iamjoonsoo.kim,
	rientjes, penberg, cl
  Cc: linux-mm, linux-kernel

Currently, discard_slab() can change nr_slabs count
without holding node's list_lock. This will lead some
error messages print when scanning node's partial or
full list, e.g. validate all slabs. Literally, it
affects the consistency of nr_slabs count.

Here, discard_slab() is abandoned, And dec_slabs_node()
is called before releasing node's list_lock.
dec_slabs_nodes() and free_slab() can be called separately
to ensure consistency of nr_slabs count.

Signed-off-by: Rongwei Wang <rongwei.wang@linux.alibaba.com>
---
 mm/slub.c | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/mm/slub.c b/mm/slub.c
index e950d8df8380..587416e39292 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2065,12 +2065,6 @@ static void free_slab(struct kmem_cache *s, struct slab *slab)
 		__free_slab(s, slab);
 }
 
-static void discard_slab(struct kmem_cache *s, struct slab *slab)
-{
-	dec_slabs_node(s, slab_nid(slab), slab->objects);
-	free_slab(s, slab);
-}
-
 /*
  * Management of partially allocated slabs.
  */
@@ -2439,6 +2433,7 @@ static void deactivate_slab(struct kmem_cache *s, struct slab *slab,
 
 	if (!new.inuse && n->nr_partial >= s->min_partial) {
 		mode = M_FREE;
+		spin_lock_irqsave(&n->list_lock, flags);
 	} else if (new.freelist) {
 		mode = M_PARTIAL;
 		/*
@@ -2463,7 +2458,7 @@ static void deactivate_slab(struct kmem_cache *s, struct slab *slab,
 				old.freelist, old.counters,
 				new.freelist, new.counters,
 				"unfreezing slab")) {
-		if (mode == M_PARTIAL || mode == M_FULL)
+		if (mode != M_FULL_NOLIST)
 			spin_unlock_irqrestore(&n->list_lock, flags);
 		goto redo;
 	}
@@ -2475,7 +2470,10 @@ static void deactivate_slab(struct kmem_cache *s, struct slab *slab,
 		stat(s, tail);
 	} else if (mode == M_FREE) {
 		stat(s, DEACTIVATE_EMPTY);
-		discard_slab(s, slab);
+		dec_slabs_node(s, slab_nid(slab), slab->objects);
+		spin_unlock_irqrestore(&n->list_lock, flags);
+
+		free_slab(s, slab);
 		stat(s, FREE_SLAB);
 	} else if (mode == M_FULL) {
 		add_full(s, n, slab);
@@ -2528,6 +2526,7 @@ static void __unfreeze_partials(struct kmem_cache *s, struct slab *partial_slab)
 		if (unlikely(!new.inuse && n->nr_partial >= s->min_partial)) {
 			slab->next = slab_to_discard;
 			slab_to_discard = slab;
+			dec_slabs_node(s, slab_nid(slab), slab->objects);
 		} else {
 			add_partial(n, slab, DEACTIVATE_TO_TAIL);
 			stat(s, FREE_ADD_PARTIAL);
@@ -2542,7 +2541,7 @@ static void __unfreeze_partials(struct kmem_cache *s, struct slab *partial_slab)
 		slab_to_discard = slab_to_discard->next;
 
 		stat(s, DEACTIVATE_EMPTY);
-		discard_slab(s, slab);
+		free_slab(s, slab);
 		stat(s, FREE_SLAB);
 	}
 }
@@ -3443,9 +3442,10 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab,
 		remove_full(s, n, slab);
 	}
 
+	dec_slabs_node(s, slab_nid(slab), slab->objects);
 	spin_unlock_irqrestore(&n->list_lock, flags);
 	stat(s, FREE_SLAB);
-	discard_slab(s, slab);
+	free_slab(s, slab);
 }
 
 /*
@@ -4302,6 +4302,7 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
 		if (!slab->inuse) {
 			remove_partial(n, slab);
 			list_add(&slab->slab_list, &discard);
+			dec_slabs_node(s, slab_nid(slab), slab->objects);
 		} else {
 			list_slab_objects(s, slab,
 			  "Objects remaining in %s on __kmem_cache_shutdown()");
@@ -4310,7 +4311,7 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
 	spin_unlock_irq(&n->list_lock);
 
 	list_for_each_entry_safe(slab, h, &discard, slab_list)
-		discard_slab(s, slab);
+		free_slab(s, slab);
 }
 
 bool __kmem_cache_empty(struct kmem_cache *s)
@@ -4640,6 +4641,7 @@ static int __kmem_cache_do_shrink(struct kmem_cache *s)
 			if (free == slab->objects) {
 				list_move(&slab->slab_list, &discard);
 				n->nr_partial--;
+				dec_slabs_node(s, slab_nid(slab), slab->objects);
 			} else if (free <= SHRINK_PROMOTE_MAX)
 				list_move(&slab->slab_list, promote + free - 1);
 		}
@@ -4655,7 +4657,7 @@ static int __kmem_cache_do_shrink(struct kmem_cache *s)
 
 		/* Release empty slabs */
 		list_for_each_entry_safe(slab, t, &discard, slab_list)
-			discard_slab(s, slab);
+			free_slab(s, slab);
 
 		if (slabs_node(s, node))
 			ret = 1;
-- 
2.27.0



^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH v2 3/3] mm/slub: delete confusing pr_err when debugging slub
  2022-07-12  2:28 [PATCH v2 1/3] mm/slub: fix the race between validate_slab and slab_free Rongwei Wang
  2022-07-12  2:28 ` [PATCH v2 2/3] mm/slub: improve consistency of nr_slabs count Rongwei Wang
@ 2022-07-12  2:28 ` Rongwei Wang
  2022-07-12  2:57 ` [PATCH v2 1/3] mm/slub: fix the race between validate_slab and slab_free Rongwei Wang
  2022-07-13 10:22 ` Hyeonggon Yoo
  3 siblings, 0 replies; 6+ messages in thread
From: Rongwei Wang @ 2022-07-12  2:28 UTC (permalink / raw)
  To: akpm, vbabka, 42.hyeyoo, roman.gushchin, iamjoonsoo.kim,
	rientjes, penberg, cl
  Cc: linux-mm, linux-kernel

The n->nr_slabs will be updated when really to allocate or
free a slab, but this slab is unnecessarily in full list
or partial list of one node. That means the total count of
slab in node's full and partial list is unnecessarily equal
to n->nr_slabs, even though flush_all() has been called.

An example here, an error message likes below will be
printed when 'slabinfo -v' is executed:

SLUB: kmemleak_object 4157 slabs counted but counter=4161
SLUB: kmemleak_object 4072 slabs counted but counter=4077
SLUB: kmalloc-2k 19 slabs counted but counter=20
SLUB: kmalloc-2k 12 slabs counted but counter=13
SLUB: kmemleak_object 4205 slabs counted but counter=4209

Here, deleting this pr_err() directly.

Signed-off-by: Rongwei Wang <rongwei.wang@linux.alibaba.com>
---
 mm/slub.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/mm/slub.c b/mm/slub.c
index 587416e39292..cdac004f232f 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -5059,11 +5059,6 @@ static int validate_slab_node(struct kmem_cache *s,
 		validate_slab(s, slab, obj_map);
 		count++;
 	}
-	if (count != atomic_long_read(&n->nr_slabs)) {
-		pr_err("SLUB: %s %ld slabs counted but counter=%ld\n",
-		       s->name, count, atomic_long_read(&n->nr_slabs));
-		slab_add_kunit_errors();
-	}
 
 out:
 	spin_unlock_irqrestore(&n->list_lock, flags);
-- 
2.27.0



^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH v2 1/3] mm/slub: fix the race between validate_slab and slab_free
  2022-07-12  2:28 [PATCH v2 1/3] mm/slub: fix the race between validate_slab and slab_free Rongwei Wang
  2022-07-12  2:28 ` [PATCH v2 2/3] mm/slub: improve consistency of nr_slabs count Rongwei Wang
  2022-07-12  2:28 ` [PATCH v2 3/3] mm/slub: delete confusing pr_err when debugging slub Rongwei Wang
@ 2022-07-12  2:57 ` Rongwei Wang
  2022-07-13 10:22 ` Hyeonggon Yoo
  3 siblings, 0 replies; 6+ messages in thread
From: Rongwei Wang @ 2022-07-12  2:57 UTC (permalink / raw)
  To: akpm, vbabka, 42.hyeyoo, roman.gushchin, iamjoonsoo.kim,
	rientjes, penberg, cl
  Cc: linux-mm, linux-kernel

Hi

According to all's point in PATCH v1 [1], I rewrote the first patch
"mm/slub: fix the race between validate_slab and slab_free". And now, 
these changes only works when DEBUG SLUB enabled. Plus, here some 
performance test can been found in [2] (Thanks Christoph's suggestion).

changelog
v1->v2:
*mm/slub: fix the race between validate_slab and slab_free
make these changes can work when debug slub enabled.

*mm/slub: improve consistency of nr_slabs count
nothing

*mm/slub: delete confusing pr_err when debugging slub
only deleting the confusing pr_err().

For convenient, showing the latest test data here (copy from [2]):

testcase used: https://github.com/netoptimizer/prototype-kernel.git
(slab_test)

Single thread testing
1. Kmalloc: Repeatedly allocate then free test
                    before                fix
                    kmalloc    kfree      kmalloc     kfree
10000 times 8     4 cycles   5 cycles	4 cycles    5 cycles
10000 times 16    3 cycles   5 cycles	3 cycles    5 cycles
10000 times 32    3 cycles   5 cycles	3 cycles    5 cycles
10000 times 64    3 cycles   5 cycles	3 cycles    5 cycles
10000 times 128   3 cycles   5 cycles	3 cycles    5 cycles
10000 times 256   14 cycles  9 cycles	6 cycles    8 cycles
10000 times 512   9 cycles   8 cycles	9 cycles    10 cycles
10000 times 1024  48 cycles  10 cycles	6 cycles    10 cycles
10000 times 2048  31 cycles  12 cycles	35 cycles   13 cycles
10000 times 4096  96 cycles  17 cycles	96 cycles   18 cycles
10000 times 8192  188 cycles 27 cycles	190 cycles  27 cycles
10000 times 16384 117 cycles 38 cycles  115 cycles  38 cycles
			
2. Kmalloc: alloc/free test
                                    before        fix
10000 times kmalloc(8)/kfree      3 cycles      3 cycles
10000 times kmalloc(16)/kfree     3 cycles      3 cycles
10000 times kmalloc(32)/kfree     3 cycles      3 cycles
10000 times kmalloc(64)/kfree     3 cycles      3 cycles
10000 times kmalloc(128)/kfree    3 cycles      3 cycles
10000 times kmalloc(256)/kfree    3 cycles      3 cycles
10000 times kmalloc(512)/kfree    3 cycles      3 cycles
10000 times kmalloc(1024)/kfree   3 cycles      3 cycles
10000 times kmalloc(2048)/kfree   3 cycles      3 cycles
10000 times kmalloc(4096)/kfree   3 cycles      3 cycles
10000 times kmalloc(8192)/kfree   3 cycles      3 cycles
10000 times kmalloc(16384)/kfree  33 cycles     33 cycles

Concurrent allocs
                                  before            fix
Kmalloc N*alloc N*free(8)       Average=13/14     Average=14/15
Kmalloc N*alloc N*free(16)      Average=13/15     Average=13/15
Kmalloc N*alloc N*free(32)      Average=13/15     Average=13/15
Kmalloc N*alloc N*free(64)      Average=13/15     Average=13/15
Kmalloc N*alloc N*free(128)     Average=13/15     Average=13/15
Kmalloc N*alloc N*free(256)     Average=137/29    Average=134/39
Kmalloc N*alloc N*free(512)     Average=61/29     Average=64/28
Kmalloc N*alloc N*free(1024)    Average=465/50    Average=656/55
Kmalloc N*alloc N*free(2048)    Average=503/97    Average=422/97
Kmalloc N*alloc N*free(4096)    Average=1592/206  Average=1624/207
		
Kmalloc N*(alloc free)(8)       Average=3         Average=3
Kmalloc N*(alloc free)(16)      Average=3         Average=3
Kmalloc N*(alloc free)(32)      Average=3         Average=3
Kmalloc N*(alloc free)(64)      Average=3         Average=3
Kmalloc N*(alloc free)(128)     Average=3         Average=3
Kmalloc N*(alloc free)(256)     Average=3         Average=3
Kmalloc N*(alloc free)(512)     Average=3         Average=3
Kmalloc N*(alloc free)(1024)    Average=3         Average=3
Kmalloc N*(alloc free)(2048)    Average=3         Average=3
Kmalloc N*(alloc free)(4096)    Average=3         Average=3

The above data seems indicate that this modification (only works when
kmem_cache_debug(s) is true) does not introduce significant performance
impact. And if you have better suggestion of testcase, please let me 
know, Thanks!

[1] 
https://lore.kernel.org/linux-mm/alpine.DEB.2.22.394.2206081417370.465021@gentwo.de/T/#m2832b1983a229183aabfd6eb71a2eb39ecd0d08a

[2] 
https://lore.kernel.org/linux-mm/alpine.DEB.2.22.394.2206081417370.465021@gentwo.de/T/#m75f1f32ad590fb13ac9e771030fafd15c7db8cb1

Thanks for your time!

On 7/12/22 10:28 AM, Rongwei Wang wrote:
> In use cases where allocating and freeing slab frequently, some
> error messages, such as "Left Redzone overwritten", "First byte
> 0xbb instead of 0xcc" would be printed when validating slabs.
> That's because an object has been filled with SLAB_RED_INACTIVE,
> but has not been added to slab's freelist. And between these
> two states, the behaviour of validating slab is likely to occur.
> 
> Actually, it doesn't mean the slab can not work stably. But, these
> confusing messages will disturb slab debugging more or less.
> 
> Signed-off-by: Rongwei Wang <rongwei.wang@linux.alibaba.com>
> ---
>   mm/slub.c | 43 +++++++++++++++++++++++++------------------
>   1 file changed, 25 insertions(+), 18 deletions(-)
> 
> diff --git a/mm/slub.c b/mm/slub.c
> index b1281b8654bd..e950d8df8380 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -1391,18 +1391,16 @@ static noinline int free_debug_processing(
>   	void *head, void *tail, int bulk_cnt,
>   	unsigned long addr)
>   {
> -	struct kmem_cache_node *n = get_node(s, slab_nid(slab));
>   	void *object = head;
>   	int cnt = 0;
> -	unsigned long flags, flags2;
> +	unsigned long flags;
>   	int ret = 0;
>   	depot_stack_handle_t handle = 0;
>   
>   	if (s->flags & SLAB_STORE_USER)
>   		handle = set_track_prepare();
>   
> -	spin_lock_irqsave(&n->list_lock, flags);
> -	slab_lock(slab, &flags2);
> +	slab_lock(slab, &flags);
>   
>   	if (s->flags & SLAB_CONSISTENCY_CHECKS) {
>   		if (!check_slab(s, slab))
> @@ -1435,8 +1433,7 @@ static noinline int free_debug_processing(
>   		slab_err(s, slab, "Bulk freelist count(%d) invalid(%d)\n",
>   			 bulk_cnt, cnt);
>   
> -	slab_unlock(slab, &flags2);
> -	spin_unlock_irqrestore(&n->list_lock, flags);
> +	slab_unlock(slab, &flags);
>   	if (!ret)
>   		slab_fix(s, "Object at 0x%p not freed", object);
>   	return ret;
> @@ -3330,7 +3327,7 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab,
>   
>   {
>   	void *prior;
> -	int was_frozen;
> +	int was_frozen, to_take_off = 0;
>   	struct slab new;
>   	unsigned long counters;
>   	struct kmem_cache_node *n = NULL;
> @@ -3341,14 +3338,23 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab,
>   	if (kfence_free(head))
>   		return;
>   
> -	if (kmem_cache_debug(s) &&
> -	    !free_debug_processing(s, slab, head, tail, cnt, addr))
> -		return;
> +	n = get_node(s, slab_nid(slab));
> +	if (kmem_cache_debug(s)) {
> +		int ret;
>   
> -	do {
> -		if (unlikely(n)) {
> +		spin_lock_irqsave(&n->list_lock, flags);
> +		ret = free_debug_processing(s, slab, head, tail, cnt, addr);
> +		if (!ret) {
>   			spin_unlock_irqrestore(&n->list_lock, flags);
> -			n = NULL;
> +			return;
> +		}
> +	}
> +
> +	do {
> +		if (unlikely(to_take_off)) {
> +			if (!kmem_cache_debug(s))
> +				spin_unlock_irqrestore(&n->list_lock, flags);
> +			to_take_off = 0;
>   		}
>   		prior = slab->freelist;
>   		counters = slab->counters;
> @@ -3369,8 +3375,6 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab,
>   				new.frozen = 1;
>   
>   			} else { /* Needs to be taken off a list */
> -
> -				n = get_node(s, slab_nid(slab));
>   				/*
>   				 * Speculatively acquire the list_lock.
>   				 * If the cmpxchg does not succeed then we may
> @@ -3379,8 +3383,10 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab,
>   				 * Otherwise the list_lock will synchronize with
>   				 * other processors updating the list of slabs.
>   				 */
> -				spin_lock_irqsave(&n->list_lock, flags);
> +				if (!kmem_cache_debug(s))
> +					spin_lock_irqsave(&n->list_lock, flags);
>   
> +				to_take_off = 1;
>   			}
>   		}
>   
> @@ -3389,8 +3395,9 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab,
>   		head, new.counters,
>   		"__slab_free"));
>   
> -	if (likely(!n)) {
> -
> +	if (likely(!to_take_off)) {
> +		if (kmem_cache_debug(s))
> +			spin_unlock_irqrestore(&n->list_lock, flags);
>   		if (likely(was_frozen)) {
>   			/*
>   			 * The list lock was not taken therefore no list


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v2 1/3] mm/slub: fix the race between validate_slab and slab_free
  2022-07-12  2:28 [PATCH v2 1/3] mm/slub: fix the race between validate_slab and slab_free Rongwei Wang
                   ` (2 preceding siblings ...)
  2022-07-12  2:57 ` [PATCH v2 1/3] mm/slub: fix the race between validate_slab and slab_free Rongwei Wang
@ 2022-07-13 10:22 ` Hyeonggon Yoo
  2022-07-13 12:10   ` Rongwei Wang
  3 siblings, 1 reply; 6+ messages in thread
From: Hyeonggon Yoo @ 2022-07-13 10:22 UTC (permalink / raw)
  To: Rongwei Wang
  Cc: akpm, vbabka, roman.gushchin, iamjoonsoo.kim, rientjes, penberg,
	cl, linux-mm, linux-kernel, Feng Tang

On Tue, Jul 12, 2022 at 10:28:05AM +0800, Rongwei Wang wrote:
> In use cases where allocating and freeing slab frequently, some
> error messages, such as "Left Redzone overwritten", "First byte
> 0xbb instead of 0xcc" would be printed when validating slabs.
> That's because an object has been filled with SLAB_RED_INACTIVE,
> but has not been added to slab's freelist. And between these
> two states, the behaviour of validating slab is likely to occur.
> 
> Actually, it doesn't mean the slab can not work stably. But, these
> confusing messages will disturb slab debugging more or less.
> 
> Signed-off-by: Rongwei Wang <rongwei.wang@linux.alibaba.com>
> ---
>  mm/slub.c | 43 +++++++++++++++++++++++++------------------
>  1 file changed, 25 insertions(+), 18 deletions(-)
>

This makes the code more complex.

A part of me says it may be more pleasant to split implementation
allocating from caches for debugging. That would make it simpler.

something like:

__slab_alloc() {
	if (kmem_cache_debug(s))
		slab_alloc_debug()
	else
		___slab_alloc()
}

slab_free() {
	if (kmem_cache_debug(s))
		slab_free_debug()
	else
		__do_slab_free()
}

See also:
	https://lore.kernel.org/lkml/faf416b9-f46c-8534-7fb7-557c046a564d@suse.cz/

> diff --git a/mm/slub.c b/mm/slub.c
> index b1281b8654bd..e950d8df8380 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -1391,18 +1391,16 @@ static noinline int free_debug_processing(
>  	void *head, void *tail, int bulk_cnt,
>  	unsigned long addr)
>  {
> -	struct kmem_cache_node *n = get_node(s, slab_nid(slab));
>  	void *object = head;
>  	int cnt = 0;
> -	unsigned long flags, flags2;
> +	unsigned long flags;
>  	int ret = 0;
>  	depot_stack_handle_t handle = 0;
>  
>  	if (s->flags & SLAB_STORE_USER)
>  		handle = set_track_prepare();
>  
> -	spin_lock_irqsave(&n->list_lock, flags);
> -	slab_lock(slab, &flags2);
> +	slab_lock(slab, &flags);
>  
>  	if (s->flags & SLAB_CONSISTENCY_CHECKS) {
>  		if (!check_slab(s, slab))
> @@ -1435,8 +1433,7 @@ static noinline int free_debug_processing(
>  		slab_err(s, slab, "Bulk freelist count(%d) invalid(%d)\n",
>  			 bulk_cnt, cnt);
>  
> -	slab_unlock(slab, &flags2);
> -	spin_unlock_irqrestore(&n->list_lock, flags);
> +	slab_unlock(slab, &flags);
>  	if (!ret)
>  		slab_fix(s, "Object at 0x%p not freed", object);
>  	return ret;
> @@ -3330,7 +3327,7 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab,
>  
>  {
>  	void *prior;
> -	int was_frozen;
> +	int was_frozen, to_take_off = 0;
>  	struct slab new;
>  	unsigned long counters;
>  	struct kmem_cache_node *n = NULL;
> @@ -3341,14 +3338,23 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab,
>  	if (kfence_free(head))
>  		return;
>  
> -	if (kmem_cache_debug(s) &&
> -	    !free_debug_processing(s, slab, head, tail, cnt, addr))
> -		return;
> +	n = get_node(s, slab_nid(slab));
> +	if (kmem_cache_debug(s)) {
> +		int ret;
>  
> -	do {
> -		if (unlikely(n)) {
> +		spin_lock_irqsave(&n->list_lock, flags);
> +		ret = free_debug_processing(s, slab, head, tail, cnt, addr);
> +		if (!ret) {
>  			spin_unlock_irqrestore(&n->list_lock, flags);
> -			n = NULL;
> +			return;
> +		}
> +	}
> +
> +	do {
> +		if (unlikely(to_take_off)) {
> +			if (!kmem_cache_debug(s))
> +				spin_unlock_irqrestore(&n->list_lock, flags);
> +			to_take_off = 0;
>  		}
>  		prior = slab->freelist;
>  		counters = slab->counters;
> @@ -3369,8 +3375,6 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab,
>  				new.frozen = 1;
>  
>  			} else { /* Needs to be taken off a list */
> -
> -				n = get_node(s, slab_nid(slab));
>  				/*
>  				 * Speculatively acquire the list_lock.
>  				 * If the cmpxchg does not succeed then we may
> @@ -3379,8 +3383,10 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab,
>  				 * Otherwise the list_lock will synchronize with
>  				 * other processors updating the list of slabs.
>  				 */
> -				spin_lock_irqsave(&n->list_lock, flags);
> +				if (!kmem_cache_debug(s))
> +					spin_lock_irqsave(&n->list_lock, flags);
>  
> +				to_take_off = 1;
>  			}
>  		}
>  
> @@ -3389,8 +3395,9 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab,
>  		head, new.counters,
>  		"__slab_free"));
>  
> -	if (likely(!n)) {
> -
> +	if (likely(!to_take_off)) {
> +		if (kmem_cache_debug(s))
> +			spin_unlock_irqrestore(&n->list_lock, flags);
>  		if (likely(was_frozen)) {
>  			/*
>  			 * The list lock was not taken therefore no list
> -- 
> 2.27.0
> 


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v2 1/3] mm/slub: fix the race between validate_slab and slab_free
  2022-07-13 10:22 ` Hyeonggon Yoo
@ 2022-07-13 12:10   ` Rongwei Wang
  0 siblings, 0 replies; 6+ messages in thread
From: Rongwei Wang @ 2022-07-13 12:10 UTC (permalink / raw)
  To: Hyeonggon Yoo
  Cc: akpm, vbabka, roman.gushchin, iamjoonsoo.kim, rientjes, penberg,
	cl, linux-mm, linux-kernel, Feng Tang



On 7/13/22 6:22 PM, Hyeonggon Yoo wrote:
> On Tue, Jul 12, 2022 at 10:28:05AM +0800, Rongwei Wang wrote:
>> In use cases where allocating and freeing slab frequently, some
>> error messages, such as "Left Redzone overwritten", "First byte
>> 0xbb instead of 0xcc" would be printed when validating slabs.
>> That's because an object has been filled with SLAB_RED_INACTIVE,
>> but has not been added to slab's freelist. And between these
>> two states, the behaviour of validating slab is likely to occur.
>>
>> Actually, it doesn't mean the slab can not work stably. But, these
>> confusing messages will disturb slab debugging more or less.
>>
>> Signed-off-by: Rongwei Wang <rongwei.wang@linux.alibaba.com>
>> ---
>>   mm/slub.c | 43 +++++++++++++++++++++++++------------------
>>   1 file changed, 25 insertions(+), 18 deletions(-)
>>
> 
> This makes the code more complex.
> 
> A part of me says it may be more pleasant to split implementation
> allocating from caches for debugging. That would make it simpler.
> 
> something like:
> 
> __slab_alloc() {
> 	if (kmem_cache_debug(s))
> 		slab_alloc_debug()
> 	else
> 		___slab_alloc()
> }
> 
> slab_free() {
> 	if (kmem_cache_debug(s))
> 		slab_free_debug()
> 	else
> 		__do_slab_free()
> }
Oh, I also have same idea, but not sure whether it is accepted because 
of it needs more changes than now. Since you agree with this way, I can
rewrite this code.

Thanks.
> 
> See also:
> 	https://lore.kernel.org/lkml/faf416b9-f46c-8534-7fb7-557c046a564d@suse.cz/
Thanks, it seems that I had missed it.
> 
>> diff --git a/mm/slub.c b/mm/slub.c
>> index b1281b8654bd..e950d8df8380 100644
>> --- a/mm/slub.c
>> +++ b/mm/slub.c
>> @@ -1391,18 +1391,16 @@ static noinline int free_debug_processing(
>>   	void *head, void *tail, int bulk_cnt,
>>   	unsigned long addr)
>>   {
>> -	struct kmem_cache_node *n = get_node(s, slab_nid(slab));
>>   	void *object = head;
>>   	int cnt = 0;
>> -	unsigned long flags, flags2;
>> +	unsigned long flags;
>>   	int ret = 0;
>>   	depot_stack_handle_t handle = 0;
>>   
>>   	if (s->flags & SLAB_STORE_USER)
>>   		handle = set_track_prepare();
>>   
>> -	spin_lock_irqsave(&n->list_lock, flags);
>> -	slab_lock(slab, &flags2);
>> +	slab_lock(slab, &flags);
>>   
>>   	if (s->flags & SLAB_CONSISTENCY_CHECKS) {
>>   		if (!check_slab(s, slab))
>> @@ -1435,8 +1433,7 @@ static noinline int free_debug_processing(
>>   		slab_err(s, slab, "Bulk freelist count(%d) invalid(%d)\n",
>>   			 bulk_cnt, cnt);
>>   
>> -	slab_unlock(slab, &flags2);
>> -	spin_unlock_irqrestore(&n->list_lock, flags);
>> +	slab_unlock(slab, &flags);
>>   	if (!ret)
>>   		slab_fix(s, "Object at 0x%p not freed", object);
>>   	return ret;
>> @@ -3330,7 +3327,7 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab,
>>   
>>   {
>>   	void *prior;
>> -	int was_frozen;
>> +	int was_frozen, to_take_off = 0;
>>   	struct slab new;
>>   	unsigned long counters;
>>   	struct kmem_cache_node *n = NULL;
>> @@ -3341,14 +3338,23 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab,
>>   	if (kfence_free(head))
>>   		return;
>>   
>> -	if (kmem_cache_debug(s) &&
>> -	    !free_debug_processing(s, slab, head, tail, cnt, addr))
>> -		return;
>> +	n = get_node(s, slab_nid(slab));
>> +	if (kmem_cache_debug(s)) {
>> +		int ret;
>>   
>> -	do {
>> -		if (unlikely(n)) {
>> +		spin_lock_irqsave(&n->list_lock, flags);
>> +		ret = free_debug_processing(s, slab, head, tail, cnt, addr);
>> +		if (!ret) {
>>   			spin_unlock_irqrestore(&n->list_lock, flags);
>> -			n = NULL;
>> +			return;
>> +		}
>> +	}
>> +
>> +	do {
>> +		if (unlikely(to_take_off)) {
>> +			if (!kmem_cache_debug(s))
>> +				spin_unlock_irqrestore(&n->list_lock, flags);
>> +			to_take_off = 0;
>>   		}
>>   		prior = slab->freelist;
>>   		counters = slab->counters;
>> @@ -3369,8 +3375,6 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab,
>>   				new.frozen = 1;
>>   
>>   			} else { /* Needs to be taken off a list */
>> -
>> -				n = get_node(s, slab_nid(slab));
>>   				/*
>>   				 * Speculatively acquire the list_lock.
>>   				 * If the cmpxchg does not succeed then we may
>> @@ -3379,8 +3383,10 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab,
>>   				 * Otherwise the list_lock will synchronize with
>>   				 * other processors updating the list of slabs.
>>   				 */
>> -				spin_lock_irqsave(&n->list_lock, flags);
>> +				if (!kmem_cache_debug(s))
>> +					spin_lock_irqsave(&n->list_lock, flags);
>>   
>> +				to_take_off = 1;
>>   			}
>>   		}
>>   
>> @@ -3389,8 +3395,9 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab,
>>   		head, new.counters,
>>   		"__slab_free"));
>>   
>> -	if (likely(!n)) {
>> -
>> +	if (likely(!to_take_off)) {
>> +		if (kmem_cache_debug(s))
>> +			spin_unlock_irqrestore(&n->list_lock, flags);
>>   		if (likely(was_frozen)) {
>>   			/*
>>   			 * The list lock was not taken therefore no list
>> -- 
>> 2.27.0
>>


^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2022-07-13 12:10 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-07-12  2:28 [PATCH v2 1/3] mm/slub: fix the race between validate_slab and slab_free Rongwei Wang
2022-07-12  2:28 ` [PATCH v2 2/3] mm/slub: improve consistency of nr_slabs count Rongwei Wang
2022-07-12  2:28 ` [PATCH v2 3/3] mm/slub: delete confusing pr_err when debugging slub Rongwei Wang
2022-07-12  2:57 ` [PATCH v2 1/3] mm/slub: fix the race between validate_slab and slab_free Rongwei Wang
2022-07-13 10:22 ` Hyeonggon Yoo
2022-07-13 12:10   ` Rongwei Wang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).