linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Michal Hocko <mhocko@suse.com>
To: Sultan Alsawaf <sultan@kerneltoast.com>
Cc: Andrew Morton <akpm@linux-foundation.org>,
	linux-mm@kvack.org, linux-kernel@vger.kernel.org
Subject: Re: [PATCH] mm: vmscan: Replace shrinker_rwsem trylocks with SRCU protection
Date: Mon, 27 Sep 2021 10:36:36 +0200	[thread overview]
Message-ID: <YVGClLWLXO8AWaEk@dhcp22.suse.cz> (raw)
In-Reply-To: <20210927074823.5825-1-sultan@kerneltoast.com>

On Mon 27-09-21 00:48:23, Sultan Alsawaf wrote:
> From: Sultan Alsawaf <sultan@kerneltoast.com>
> 
> The shrinker rwsem is problematic because the actual shrinking path must
> back off when contention appears, causing some or all shrinkers to be
> skipped. This can be especially bad when shrinkers are frequently
> registered and unregistered. A high frequency of shrinker registrations/
> unregistrations can effectively DoS the shrinker mechanism, rendering it
> useless.

Can you be more specific about those scenarios please?

> Using SRCU to protect iteration through the shrinker list and idr
> eliminates this issue entirely. Now, shrinking can happen concurrently
> with shrinker registrations/unregistrations.

I have a vague recollection that this approach has been proposed in the
past. Have you checked previous attempts?

> Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com>
> ---
>  mm/vmscan.c | 68 ++++++++++++++++++++++++++++-------------------------
>  1 file changed, 36 insertions(+), 32 deletions(-)
> 
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index 74296c2d1fed..3dea927be5ad 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -190,6 +190,7 @@ static void set_task_reclaim_state(struct task_struct *task,
>  
>  static LIST_HEAD(shrinker_list);
>  static DECLARE_RWSEM(shrinker_rwsem);
> +DEFINE_STATIC_SRCU(shrinker_srcu);
>  
>  #ifdef CONFIG_MEMCG
>  static int shrinker_nr_max;
> @@ -212,6 +213,20 @@ static struct shrinker_info *shrinker_info_protected(struct mem_cgroup *memcg,
>  					 lockdep_is_held(&shrinker_rwsem));
>  }
>  
> +static struct shrinker_info *shrinker_info_srcu(struct mem_cgroup *memcg,
> +						int nid)
> +{
> +	return srcu_dereference(memcg->nodeinfo[nid]->shrinker_info,
> +				&shrinker_srcu);
> +}
> +
> +static void free_shrinker_info_srcu(struct rcu_head *head)
> +{
> +	struct shrinker_info *info = container_of(head, typeof(*info), rcu);
> +
> +	kvfree_rcu(info, rcu);
> +}
> +
>  static int expand_one_shrinker_info(struct mem_cgroup *memcg,
>  				    int map_size, int defer_size,
>  				    int old_map_size, int old_defer_size)
> @@ -244,7 +259,12 @@ static int expand_one_shrinker_info(struct mem_cgroup *memcg,
>  		       defer_size - old_defer_size);
>  
>  		rcu_assign_pointer(pn->shrinker_info, new);
> -		kvfree_rcu(old, rcu);
> +
> +		/*
> +		 * Shrinker info is used under both SRCU and regular RCU, so
> +		 * synchronize the free against both of them.
> +		 */
> +		call_srcu(&shrinker_srcu, &old->rcu, free_shrinker_info_srcu);
>  	}
>  
>  	return 0;
> @@ -357,7 +377,6 @@ static int prealloc_memcg_shrinker(struct shrinker *shrinker)
>  		return -ENOSYS;
>  
>  	down_write(&shrinker_rwsem);
> -	/* This may call shrinker, so it must use down_read_trylock() */
>  	id = idr_alloc(&shrinker_idr, shrinker, 0, 0, GFP_KERNEL);
>  	if (id < 0)
>  		goto unlock;
> @@ -391,7 +410,7 @@ static long xchg_nr_deferred_memcg(int nid, struct shrinker *shrinker,
>  {
>  	struct shrinker_info *info;
>  
> -	info = shrinker_info_protected(memcg, nid);
> +	info = shrinker_info_srcu(memcg, nid);
>  	return atomic_long_xchg(&info->nr_deferred[shrinker->id], 0);
>  }
>  
> @@ -400,7 +419,7 @@ static long add_nr_deferred_memcg(long nr, int nid, struct shrinker *shrinker,
>  {
>  	struct shrinker_info *info;
>  
> -	info = shrinker_info_protected(memcg, nid);
> +	info = shrinker_info_srcu(memcg, nid);
>  	return atomic_long_add_return(nr, &info->nr_deferred[shrinker->id]);
>  }
>  
> @@ -641,6 +660,7 @@ void free_prealloced_shrinker(struct shrinker *shrinker)
>  		down_write(&shrinker_rwsem);
>  		unregister_memcg_shrinker(shrinker);
>  		up_write(&shrinker_rwsem);
> +		synchronize_srcu(&shrinker_srcu);
>  		return;
>  	}
>  
> @@ -651,7 +671,7 @@ void free_prealloced_shrinker(struct shrinker *shrinker)
>  void register_shrinker_prepared(struct shrinker *shrinker)
>  {
>  	down_write(&shrinker_rwsem);
> -	list_add_tail(&shrinker->list, &shrinker_list);
> +	list_add_tail_rcu(&shrinker->list, &shrinker_list);
>  	shrinker->flags |= SHRINKER_REGISTERED;
>  	up_write(&shrinker_rwsem);
>  }
> @@ -676,11 +696,12 @@ void unregister_shrinker(struct shrinker *shrinker)
>  		return;
>  
>  	down_write(&shrinker_rwsem);
> -	list_del(&shrinker->list);
> +	list_del_rcu(&shrinker->list);
>  	shrinker->flags &= ~SHRINKER_REGISTERED;
>  	if (shrinker->flags & SHRINKER_MEMCG_AWARE)
>  		unregister_memcg_shrinker(shrinker);
>  	up_write(&shrinker_rwsem);
> +	synchronize_srcu(&shrinker_srcu);
>  
>  	kfree(shrinker->nr_deferred);
>  	shrinker->nr_deferred = NULL;
> @@ -792,15 +813,13 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
>  {
>  	struct shrinker_info *info;
>  	unsigned long ret, freed = 0;
> -	int i;
> +	int i, srcu_idx;
>  
>  	if (!mem_cgroup_online(memcg))
>  		return 0;
>  
> -	if (!down_read_trylock(&shrinker_rwsem))
> -		return 0;
> -
> -	info = shrinker_info_protected(memcg, nid);
> +	srcu_idx = srcu_read_lock(&shrinker_srcu);
> +	info = shrinker_info_srcu(memcg, nid);
>  	if (unlikely(!info))
>  		goto unlock;
>  
> @@ -850,14 +869,9 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
>  				set_shrinker_bit(memcg, nid, i);
>  		}
>  		freed += ret;
> -
> -		if (rwsem_is_contended(&shrinker_rwsem)) {
> -			freed = freed ? : 1;
> -			break;
> -		}
>  	}
>  unlock:
> -	up_read(&shrinker_rwsem);
> +	srcu_read_unlock(&shrinker_srcu, srcu_idx);
>  	return freed;
>  }
>  #else /* CONFIG_MEMCG */
> @@ -894,6 +908,7 @@ static unsigned long shrink_slab(gfp_t gfp_mask, int nid,
>  {
>  	unsigned long ret, freed = 0;
>  	struct shrinker *shrinker;
> +	int srcu_idx;
>  
>  	/*
>  	 * The root memcg might be allocated even though memcg is disabled
> @@ -905,10 +920,9 @@ static unsigned long shrink_slab(gfp_t gfp_mask, int nid,
>  	if (!mem_cgroup_disabled() && !mem_cgroup_is_root(memcg))
>  		return shrink_slab_memcg(gfp_mask, nid, memcg, priority);
>  
> -	if (!down_read_trylock(&shrinker_rwsem))
> -		goto out;
> -
> -	list_for_each_entry(shrinker, &shrinker_list, list) {
> +	srcu_idx = srcu_read_lock(&shrinker_srcu);
> +	list_for_each_entry_srcu(shrinker, &shrinker_list, list,
> +				 srcu_read_lock_held(&shrinker_srcu)) {
>  		struct shrink_control sc = {
>  			.gfp_mask = gfp_mask,
>  			.nid = nid,
> @@ -919,19 +933,9 @@ static unsigned long shrink_slab(gfp_t gfp_mask, int nid,
>  		if (ret == SHRINK_EMPTY)
>  			ret = 0;
>  		freed += ret;
> -		/*
> -		 * Bail out if someone want to register a new shrinker to
> -		 * prevent the registration from being stalled for long periods
> -		 * by parallel ongoing shrinking.
> -		 */
> -		if (rwsem_is_contended(&shrinker_rwsem)) {
> -			freed = freed ? : 1;
> -			break;
> -		}
>  	}
> +	srcu_read_unlock(&shrinker_srcu, srcu_idx);
>  
> -	up_read(&shrinker_rwsem);
> -out:
>  	cond_resched();
>  	return freed;
>  }
> -- 
> 2.33.0

-- 
Michal Hocko
SUSE Labs

  reply	other threads:[~2021-09-27  8:36 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-09-27  7:48 [PATCH] mm: vmscan: Replace shrinker_rwsem trylocks with SRCU protection Sultan Alsawaf
2021-09-27  8:36 ` Michal Hocko [this message]
2021-09-27 16:53   ` Sultan Alsawaf
2021-09-29  8:06     ` Michal Hocko

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=YVGClLWLXO8AWaEk@dhcp22.suse.cz \
    --to=mhocko@suse.com \
    --cc=akpm@linux-foundation.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=sultan@kerneltoast.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).