Re: [PATCH] mm: slub: annotate kmem_cache_node->list_lock as raw_spinlock

* Re: [PATCH] mm: slub: annotate kmem_cache_node->list_lock as raw_spinlock
       [not found] <20230411130854.46795-1-zhengqi.arch@bytedance.com>
@ 2023-04-11 13:40 ` Vlastimil Babka
  2023-04-11 14:08   ` Qi Zheng
  0 siblings, 1 reply; 23+ messages in thread
From: Vlastimil Babka @ 2023-04-11 13:40 UTC (permalink / raw)
  To: Qi Zheng, 42.hyeyoo, akpm, roman.gushchin, iamjoonsoo.kim,
	rientjes, penberg, cl
  Cc: linux-mm, linux-kernel, Zhao Gongyi, Sebastian Andrzej Siewior,
	Thomas Gleixner, RCU, Paul E . McKenney

On 4/11/23 15:08, Qi Zheng wrote:
> The list_lock can be held in the critical section of
> raw_spinlock, and then lockdep will complain about it
> like below:
> 
>  =============================
>  [ BUG: Invalid wait context ]
>  6.3.0-rc6-next-20230411 #7 Not tainted
>  -----------------------------
>  swapper/0/1 is trying to lock:
>  ffff888100055418 (&n->list_lock){....}-{3:3}, at: ___slab_alloc+0x73d/0x1330
>  other info that might help us debug this:
>  context-{5:5}
>  2 locks held by swapper/0/1:
>   #0: ffffffff824e8160 (rcu_tasks.cbs_gbl_lock){....}-{2:2}, at: cblist_init_generic+0x22/0x2d0
>   #1: ffff888136bede50 (&ACCESS_PRIVATE(rtpcp, lock)){....}-{2:2}, at: cblist_init_generic+0x232/0x2d0
>  stack backtrace:
>  CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.3.0-rc6-next-20230411 #7
>  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014
>  Call Trace:
>   <TASK>
>   dump_stack_lvl+0x77/0xc0
>   __lock_acquire+0xa65/0x2950
>   ? arch_stack_walk+0x65/0xf0
>   ? arch_stack_walk+0x65/0xf0
>   ? unwind_next_frame+0x602/0x8d0
>   lock_acquire+0xe0/0x300
>   ? ___slab_alloc+0x73d/0x1330
>   ? find_usage_forwards+0x39/0x50
>   ? check_irq_usage+0x162/0xa70
>   ? __bfs+0x10c/0x2c0
>   _raw_spin_lock_irqsave+0x4f/0x90
>   ? ___slab_alloc+0x73d/0x1330
>   ___slab_alloc+0x73d/0x1330
>   ? fill_pool+0x16b/0x2a0
>   ? look_up_lock_class+0x5d/0x160
>   ? register_lock_class+0x48/0x500
>   ? __lock_acquire+0xabc/0x2950
>   ? fill_pool+0x16b/0x2a0
>   kmem_cache_alloc+0x358/0x3b0
>   ? __lock_acquire+0xabc/0x2950
>   fill_pool+0x16b/0x2a0
>   ? __debug_object_init+0x292/0x560
>   ? lock_acquire+0xe0/0x300
>   ? cblist_init_generic+0x232/0x2d0
>   __debug_object_init+0x2c/0x560
>   cblist_init_generic+0x147/0x2d0
>   rcu_init_tasks_generic+0x15/0x190
>   kernel_init_freeable+0x6e/0x3e0
>   ? rest_init+0x1e0/0x1e0
>   kernel_init+0x1b/0x1d0
>   ? rest_init+0x1e0/0x1e0
>   ret_from_fork+0x1f/0x30
>   </TASK>
> 
> The fill_pool() can only be called in the !PREEMPT_RT kernel
> or in the preemptible context of the PREEMPT_RT kernel, so
> the above warning is not a real issue, but it's better to
> annotate kmem_cache_node->list_lock as raw_spinlock to get
> rid of such issue.

+ CC some RT and RCU people

AFAIK raw_spinlock is not just an annotation, but on RT it changes the
implementation from preemptible mutex to actual spin lock, so it would be
rather unfortunate to do that for a spurious warning. Can it be somehow
fixed in a better way?

> Reported-by: Zhao Gongyi <zhaogongyi@bytedance.com>
> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
> ---
>  mm/slab.h |  4 ++--
>  mm/slub.c | 66 +++++++++++++++++++++++++++----------------------------
>  2 files changed, 35 insertions(+), 35 deletions(-)
> 
> diff --git a/mm/slab.h b/mm/slab.h
> index f01ac256a8f5..43f3436d13b4 100644
> --- a/mm/slab.h
> +++ b/mm/slab.h
> @@ -723,8 +723,9 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s,
>   * The slab lists for all objects.
>   */
>  struct kmem_cache_node {
> -#ifdef CONFIG_SLAB
>  	raw_spinlock_t list_lock;
> +
> +#ifdef CONFIG_SLAB
>  	struct list_head slabs_partial;	/* partial list first, better asm code */
>  	struct list_head slabs_full;
>  	struct list_head slabs_free;
> @@ -740,7 +741,6 @@ struct kmem_cache_node {
>  #endif
>  
>  #ifdef CONFIG_SLUB
> -	spinlock_t list_lock;
>  	unsigned long nr_partial;
>  	struct list_head partial;
>  #ifdef CONFIG_SLUB_DEBUG
> diff --git a/mm/slub.c b/mm/slub.c
> index c87628cd8a9a..e66a35643624 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -1331,7 +1331,7 @@ static void add_full(struct kmem_cache *s,
>  	if (!(s->flags & SLAB_STORE_USER))
>  		return;
>  
> -	lockdep_assert_held(&n->list_lock);
> +	assert_raw_spin_locked(&n->list_lock);
>  	list_add(&slab->slab_list, &n->full);
>  }
>  
> @@ -1340,7 +1340,7 @@ static void remove_full(struct kmem_cache *s, struct kmem_cache_node *n, struct
>  	if (!(s->flags & SLAB_STORE_USER))
>  		return;
>  
> -	lockdep_assert_held(&n->list_lock);
> +	assert_raw_spin_locked(&n->list_lock);
>  	list_del(&slab->slab_list);
>  }
>  
> @@ -2113,14 +2113,14 @@ __add_partial(struct kmem_cache_node *n, struct slab *slab, int tail)
>  static inline void add_partial(struct kmem_cache_node *n,
>  				struct slab *slab, int tail)
>  {
> -	lockdep_assert_held(&n->list_lock);
> +	assert_raw_spin_locked(&n->list_lock);
>  	__add_partial(n, slab, tail);
>  }
>  
>  static inline void remove_partial(struct kmem_cache_node *n,
>  					struct slab *slab)
>  {
> -	lockdep_assert_held(&n->list_lock);
> +	assert_raw_spin_locked(&n->list_lock);
>  	list_del(&slab->slab_list);
>  	n->nr_partial--;
>  }
> @@ -2136,7 +2136,7 @@ static void *alloc_single_from_partial(struct kmem_cache *s,
>  {
>  	void *object;
>  
> -	lockdep_assert_held(&n->list_lock);
> +	assert_raw_spin_locked(&n->list_lock);
>  
>  	object = slab->freelist;
>  	slab->freelist = get_freepointer(s, object);
> @@ -2181,7 +2181,7 @@ static void *alloc_single_from_new_slab(struct kmem_cache *s,
>  		 */
>  		return NULL;
>  
> -	spin_lock_irqsave(&n->list_lock, flags);
> +	raw_spin_lock_irqsave(&n->list_lock, flags);
>  
>  	if (slab->inuse == slab->objects)
>  		add_full(s, n, slab);
> @@ -2189,7 +2189,7 @@ static void *alloc_single_from_new_slab(struct kmem_cache *s,
>  		add_partial(n, slab, DEACTIVATE_TO_HEAD);
>  
>  	inc_slabs_node(s, nid, slab->objects);
> -	spin_unlock_irqrestore(&n->list_lock, flags);
> +	raw_spin_unlock_irqrestore(&n->list_lock, flags);
>  
>  	return object;
>  }
> @@ -2208,7 +2208,7 @@ static inline void *acquire_slab(struct kmem_cache *s,
>  	unsigned long counters;
>  	struct slab new;
>  
> -	lockdep_assert_held(&n->list_lock);
> +	assert_raw_spin_locked(&n->list_lock);
>  
>  	/*
>  	 * Zap the freelist and set the frozen bit.
> @@ -2267,7 +2267,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
>  	if (!n || !n->nr_partial)
>  		return NULL;
>  
> -	spin_lock_irqsave(&n->list_lock, flags);
> +	raw_spin_lock_irqsave(&n->list_lock, flags);
>  	list_for_each_entry_safe(slab, slab2, &n->partial, slab_list) {
>  		void *t;
>  
> @@ -2304,7 +2304,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
>  #endif
>  
>  	}
> -	spin_unlock_irqrestore(&n->list_lock, flags);
> +	raw_spin_unlock_irqrestore(&n->list_lock, flags);
>  	return object;
>  }
>  
> @@ -2548,7 +2548,7 @@ static void deactivate_slab(struct kmem_cache *s, struct slab *slab,
>  		 * Taking the spinlock removes the possibility that
>  		 * acquire_slab() will see a slab that is frozen
>  		 */
> -		spin_lock_irqsave(&n->list_lock, flags);
> +		raw_spin_lock_irqsave(&n->list_lock, flags);
>  	} else {
>  		mode = M_FULL_NOLIST;
>  	}
> @@ -2559,14 +2559,14 @@ static void deactivate_slab(struct kmem_cache *s, struct slab *slab,
>  				new.freelist, new.counters,
>  				"unfreezing slab")) {
>  		if (mode == M_PARTIAL)
> -			spin_unlock_irqrestore(&n->list_lock, flags);
> +			raw_spin_unlock_irqrestore(&n->list_lock, flags);
>  		goto redo;
>  	}
>  
>  
>  	if (mode == M_PARTIAL) {
>  		add_partial(n, slab, tail);
> -		spin_unlock_irqrestore(&n->list_lock, flags);
> +		raw_spin_unlock_irqrestore(&n->list_lock, flags);
>  		stat(s, tail);
>  	} else if (mode == M_FREE) {
>  		stat(s, DEACTIVATE_EMPTY);
> @@ -2594,10 +2594,10 @@ static void __unfreeze_partials(struct kmem_cache *s, struct slab *partial_slab)
>  		n2 = get_node(s, slab_nid(slab));
>  		if (n != n2) {
>  			if (n)
> -				spin_unlock_irqrestore(&n->list_lock, flags);
> +				raw_spin_unlock_irqrestore(&n->list_lock, flags);
>  
>  			n = n2;
> -			spin_lock_irqsave(&n->list_lock, flags);
> +			raw_spin_lock_irqsave(&n->list_lock, flags);
>  		}
>  
>  		do {
> @@ -2626,7 +2626,7 @@ static void __unfreeze_partials(struct kmem_cache *s, struct slab *partial_slab)
>  	}
>  
>  	if (n)
> -		spin_unlock_irqrestore(&n->list_lock, flags);
> +		raw_spin_unlock_irqrestore(&n->list_lock, flags);
>  
>  	while (slab_to_discard) {
>  		slab = slab_to_discard;
> @@ -2951,10 +2951,10 @@ static unsigned long count_partial(struct kmem_cache_node *n,
>  	unsigned long x = 0;
>  	struct slab *slab;
>  
> -	spin_lock_irqsave(&n->list_lock, flags);
> +	raw_spin_lock_irqsave(&n->list_lock, flags);
>  	list_for_each_entry(slab, &n->partial, slab_list)
>  		x += get_count(slab);
> -	spin_unlock_irqrestore(&n->list_lock, flags);
> +	raw_spin_unlock_irqrestore(&n->list_lock, flags);
>  	return x;
>  }
>  #endif /* CONFIG_SLUB_DEBUG || SLAB_SUPPORTS_SYSFS */
> @@ -3515,7 +3515,7 @@ static noinline void free_to_partial_list(
>  	if (s->flags & SLAB_STORE_USER)
>  		handle = set_track_prepare();
>  
> -	spin_lock_irqsave(&n->list_lock, flags);
> +	raw_spin_lock_irqsave(&n->list_lock, flags);
>  
>  	if (free_debug_processing(s, slab, head, tail, &cnt, addr, handle)) {
>  		void *prior = slab->freelist;
> @@ -3554,7 +3554,7 @@ static noinline void free_to_partial_list(
>  		dec_slabs_node(s, slab_nid(slab_free), slab_free->objects);
>  	}
>  
> -	spin_unlock_irqrestore(&n->list_lock, flags);
> +	raw_spin_unlock_irqrestore(&n->list_lock, flags);
>  
>  	if (slab_free) {
>  		stat(s, FREE_SLAB);
> @@ -3594,7 +3594,7 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab,
>  
>  	do {
>  		if (unlikely(n)) {
> -			spin_unlock_irqrestore(&n->list_lock, flags);
> +			raw_spin_unlock_irqrestore(&n->list_lock, flags);
>  			n = NULL;
>  		}
>  		prior = slab->freelist;
> @@ -3626,7 +3626,7 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab,
>  				 * Otherwise the list_lock will synchronize with
>  				 * other processors updating the list of slabs.
>  				 */
> -				spin_lock_irqsave(&n->list_lock, flags);
> +				raw_spin_lock_irqsave(&n->list_lock, flags);
>  
>  			}
>  		}
> @@ -3668,7 +3668,7 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab,
>  		add_partial(n, slab, DEACTIVATE_TO_TAIL);
>  		stat(s, FREE_ADD_PARTIAL);
>  	}
> -	spin_unlock_irqrestore(&n->list_lock, flags);
> +	raw_spin_unlock_irqrestore(&n->list_lock, flags);
>  	return;
>  
>  slab_empty:
> @@ -3683,7 +3683,7 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab,
>  		remove_full(s, n, slab);
>  	}
>  
> -	spin_unlock_irqrestore(&n->list_lock, flags);
> +	raw_spin_unlock_irqrestore(&n->list_lock, flags);
>  	stat(s, FREE_SLAB);
>  	discard_slab(s, slab);
>  }
> @@ -4180,7 +4180,7 @@ static void
>  init_kmem_cache_node(struct kmem_cache_node *n)
>  {
>  	n->nr_partial = 0;
> -	spin_lock_init(&n->list_lock);
> +	raw_spin_lock_init(&n->list_lock);
>  	INIT_LIST_HEAD(&n->partial);
>  #ifdef CONFIG_SLUB_DEBUG
>  	atomic_long_set(&n->nr_slabs, 0);
> @@ -4576,7 +4576,7 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
>  	struct slab *slab, *h;
>  
>  	BUG_ON(irqs_disabled());
> -	spin_lock_irq(&n->list_lock);
> +	raw_spin_lock_irq(&n->list_lock);
>  	list_for_each_entry_safe(slab, h, &n->partial, slab_list) {
>  		if (!slab->inuse) {
>  			remove_partial(n, slab);
> @@ -4586,7 +4586,7 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
>  			  "Objects remaining in %s on __kmem_cache_shutdown()");
>  		}
>  	}
> -	spin_unlock_irq(&n->list_lock);
> +	raw_spin_unlock_irq(&n->list_lock);
>  
>  	list_for_each_entry_safe(slab, h, &discard, slab_list)
>  		discard_slab(s, slab);
> @@ -4790,7 +4790,7 @@ static int __kmem_cache_do_shrink(struct kmem_cache *s)
>  		for (i = 0; i < SHRINK_PROMOTE_MAX; i++)
>  			INIT_LIST_HEAD(promote + i);
>  
> -		spin_lock_irqsave(&n->list_lock, flags);
> +		raw_spin_lock_irqsave(&n->list_lock, flags);
>  
>  		/*
>  		 * Build lists of slabs to discard or promote.
> @@ -4822,7 +4822,7 @@ static int __kmem_cache_do_shrink(struct kmem_cache *s)
>  		for (i = SHRINK_PROMOTE_MAX - 1; i >= 0; i--)
>  			list_splice(promote + i, &n->partial);
>  
> -		spin_unlock_irqrestore(&n->list_lock, flags);
> +		raw_spin_unlock_irqrestore(&n->list_lock, flags);
>  
>  		/* Release empty slabs */
>  		list_for_each_entry_safe(slab, t, &discard, slab_list)
> @@ -5147,7 +5147,7 @@ static int validate_slab_node(struct kmem_cache *s,
>  	struct slab *slab;
>  	unsigned long flags;
>  
> -	spin_lock_irqsave(&n->list_lock, flags);
> +	raw_spin_lock_irqsave(&n->list_lock, flags);
>  
>  	list_for_each_entry(slab, &n->partial, slab_list) {
>  		validate_slab(s, slab, obj_map);
> @@ -5173,7 +5173,7 @@ static int validate_slab_node(struct kmem_cache *s,
>  	}
>  
>  out:
> -	spin_unlock_irqrestore(&n->list_lock, flags);
> +	raw_spin_unlock_irqrestore(&n->list_lock, flags);
>  	return count;
>  }
>  
> @@ -6399,12 +6399,12 @@ static int slab_debug_trace_open(struct inode *inode, struct file *filep)
>  		if (!atomic_long_read(&n->nr_slabs))
>  			continue;
>  
> -		spin_lock_irqsave(&n->list_lock, flags);
> +		raw_spin_lock_irqsave(&n->list_lock, flags);
>  		list_for_each_entry(slab, &n->partial, slab_list)
>  			process_slab(t, s, slab, alloc, obj_map);
>  		list_for_each_entry(slab, &n->full, slab_list)
>  			process_slab(t, s, slab, alloc, obj_map);
> -		spin_unlock_irqrestore(&n->list_lock, flags);
> +		raw_spin_unlock_irqrestore(&n->list_lock, flags);
>  	}
>  
>  	/* Sort locations by count */

^ permalink raw reply	[flat|nested] 23+ messages in thread