All of lore.kernel.org
 help / color / mirror / Atom feed
From: Qi Zheng <zhengqi.arch@bytedance.com>
To: Kirill Tkhai <tkhai@ya.ru>
Cc: sultan@kerneltoast.com, dave@stgolabs.net,
	penguin-kernel@I-love.SAKURA.ne.jp, paulmck@kernel.org,
	linux-mm@kvack.org, linux-kernel@vger.kernel.org,
	Andrew Morton <akpm@linux-foundation.org>,
	Johannes Weiner <hannes@cmpxchg.org>,
	Shakeel Butt <shakeelb@google.com>,
	Michal Hocko <mhocko@kernel.org>,
	Roman Gushchin <roman.gushchin@linux.dev>,
	Muchun Song <muchun.song@linux.dev>,
	David Hildenbrand <david@redhat.com>,
	Yang Shi <shy828301@gmail.com>
Subject: Re: [PATCH 2/5] mm: vmscan: make memcg slab shrink lockless
Date: Thu, 23 Feb 2023 12:37:19 +0800	[thread overview]
Message-ID: <ee7852cc-e37e-5602-7729-5bb1500a1f2a@bytedance.com> (raw)
In-Reply-To: <23fde2e7-e9e2-2b0d-dfd8-1a654bc5503c@ya.ru>



On 2023/2/23 04:05, Kirill Tkhai wrote:
> On 22.02.2023 11:21, Qi Zheng wrote:
>>
>>
>> On 2023/2/22 16:16, Qi Zheng wrote:
>>> Hi Kirill,
>>>
>>> On 2023/2/22 05:43, Kirill Tkhai wrote:
>>>> On 20.02.2023 12:16, Qi Zheng wrote:
>>>>> Like global slab shrink, since commit 1cd0bd06093c<...>
>>>>>    static bool cgroup_reclaim(struct scan_control *sc)
>>>>> @@ -891,15 +905,14 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
>>>>>    {
>>>>>        struct shrinker_info *info;
>>>>>        unsigned long ret, freed = 0;
>>>>> +    int srcu_idx;
>>>>>        int i;
>>>>>        if (!mem_cgroup_online(memcg))
>>>>>            return 0;
>>>>> -    if (!down_read_trylock(&shrinker_rwsem))
>>>>> -        return 0;
>>>>> -
>>>>> -    info = shrinker_info_protected(memcg, nid);
>>>>> +    srcu_idx = srcu_read_lock(&shrinker_srcu);
>>>>> +    info = shrinker_info_srcu(memcg, nid);
>>>>>        if (unlikely(!info))
>>>>>            goto unlock;
>>>>
>>>> There is shrinker_nr_max dereference under this hunk. It's not in the patch:
>>>>
>>>>           for_each_set_bit(i, info->map, shrinker_nr_max) {
>>>>
>>>> Since shrinker_nr_max may grow in parallel, this leads to access beyond allocated memory :(
>>>
>>> Oh, indeed.
>>>
>>>>
>>>> It looks like we should save size of info->map as a new member of struct shrinker_info.
>>>
>>> Agree, then we only traverse info->map_size each time. Like below:
>>>
>>> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
>>> index b6eda2ab205d..f1b5d2803007 100644
>>> --- a/include/linux/memcontrol.h
>>> +++ b/include/linux/memcontrol.h
>>> @@ -97,6 +97,7 @@ struct shrinker_info {
>>>           struct rcu_head rcu;
>>>           atomic_long_t *nr_deferred;
>>>           unsigned long *map;
>>> +       int map_size;
> 
> Sure, like this. The only thing (after rethinking) I want to say is that despite "size" was
> may suggestion, now it makes me think that name "size" is about size in bytes.
> 
> Possible, something like map_nr_max would be better here.

Agree. Will change to it.

> 
>>>    };
>>>
>>>    struct lruvec_stats_percpu {
>>> diff --git a/mm/vmscan.c b/mm/vmscan.c
>>> index f94bfe540675..dd07eb107915 100644
>>> --- a/mm/vmscan.c
>>> +++ b/mm/vmscan.c
>>> @@ -239,14 +239,20 @@ static void free_shrinker_info_rcu(struct rcu_head *head)
>>>           kvfree(container_of(head, struct shrinker_info, rcu));
>>>    }
>>>
>>> -static int expand_one_shrinker_info(struct mem_cgroup *memcg,
>>> -                                   int map_size, int defer_size,
>>> -                                   int old_map_size, int old_defer_size)
>>> +static int expand_one_shrinker_info(struct mem_cgroup *memcg, int new_nr_max,
>>> +                                   int old_nr_max)
>>>    {
>>> +       int map_size, defer_size, old_map_size, old_defer_size;
>>>           struct shrinker_info *new, *old;
>>>           struct mem_cgroup_per_node *pn;
>>>           int nid;
>>> -       int size = map_size + defer_size;
>>> +       int size;
>>> +
>>> +       map_size = shrinker_map_size(new_nr_max);
>>> +       defer_size = shrinker_defer_size(new_nr_max);
>>> +       old_map_size = shrinker_map_size(shrinker_nr_max);
>>> +       old_defer_size = shrinker_defer_size(shrinker_nr_max);
>>
>> Perhaps these should still be calculated outside the loop as before.
> 
> Yeah, for me it's also better.
>   
>>> +       size = map_size + defer_size;
>>>
>>>           for_each_node(nid) {
>>>                   pn = memcg->nodeinfo[nid];
>>> @@ -261,6 +267,7 @@ static int expand_one_shrinker_info(struct mem_cgroup *memcg,
>>>
>>>                   new->nr_deferred = (atomic_long_t *)(new + 1);
>>>                   new->map = (void *)new->nr_deferred + defer_size;
>>> +               new->map_size = new_nr_max;
>>>
>>>                   /* map: set all old bits, clear all new bits */
>>>                   memset(new->map, (int)0xff, old_map_size);
>>> @@ -310,6 +317,7 @@ int alloc_shrinker_info(struct mem_cgroup *memcg)
>>>                   }
>>>                   info->nr_deferred = (atomic_long_t *)(info + 1);
>>>                   info->map = (void *)info->nr_deferred + defer_size;
>>> +               info->map_size = shrinker_nr_max;
>>>                   rcu_assign_pointer(memcg->nodeinfo[nid]->shrinker_info, info);
>>>           }
>>>           mutex_unlock(&shrinker_mutex);
>>> @@ -327,8 +335,6 @@ static int expand_shrinker_info(int new_id)
>>>    {
>>>           int ret = 0;
>>>           int new_nr_max = new_id + 1;
>>> -       int map_size, defer_size = 0;
>>> -       int old_map_size, old_defer_size = 0;
>>>           struct mem_cgroup *memcg;
>>>
>>>           if (!need_expand(new_nr_max))
>>> @@ -339,15 +345,9 @@ static int expand_shrinker_info(int new_id)
>>>
>>>           lockdep_assert_held(&shrinker_mutex);
>>>
>>> -       map_size = shrinker_map_size(new_nr_max);
>>> -       defer_size = shrinker_defer_size(new_nr_max);
>>> -       old_map_size = shrinker_map_size(shrinker_nr_max);
>>> -       old_defer_size = shrinker_defer_size(shrinker_nr_max);
>>> -
>>>           memcg = mem_cgroup_iter(NULL, NULL, NULL);
>>>           do {
>>> -               ret = expand_one_shrinker_info(memcg, map_size, defer_size,
>>> -                                              old_map_size, old_defer_size);
>>> +               ret = expand_one_shrinker_info(memcg, new_nr_max, shrinker_nr_max);
>>>                   if (ret) {
>>>                           mem_cgroup_iter_break(NULL, memcg);
>>>                           goto out;
>>> @@ -912,7 +912,7 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
>>>           if (unlikely(!info))
>>>                   goto unlock;
>>>
>>> -       for_each_set_bit(i, info->map, shrinker_nr_max) {
>>> +       for_each_set_bit(i, info->map, info->map_size) {
>>>                   struct shrink_control sc = {
>>>                           .gfp_mask = gfp_mask,
>>>                           .nid = nid,
>>>
>>> I will send the v2.
>>>
>>> Thanks,
>>> Qi
>>>
>>>>
>>>>> @@ -949,14 +962,9 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
>>>>>                    set_shrinker_bit(memcg, nid, i);
>>>>>            }
>>>>>            freed += ret;
>>>>> -
>>>>> -        if (rwsem_is_contended(&shrinker_rwsem)) {
>>>>> -            freed = freed ? : 1;
>>>>> -            break;
>>>>> -        }
>>>>>        }
>>>>>    unlock:
>>>>> -    up_read(&shrinker_rwsem);
>>>>> +    srcu_read_unlock(&shrinker_srcu, srcu_idx);
>>>>>        return freed;
>>>>>    }
>>>>>    #else /* CONFIG_MEMCG */
>>>>
>>>
>>
> 

-- 
Thanks,
Qi

  reply	other threads:[~2023-02-23  4:38 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-02-20  9:16 [PATCH 0/5] make slab shrink lockless Qi Zheng
2023-02-20  9:16 ` [PATCH 1/5] mm: vmscan: make global " Qi Zheng
2023-02-20  9:16 ` [PATCH 2/5] mm: vmscan: make memcg " Qi Zheng
2023-02-21 21:28   ` Kirill Tkhai
2023-02-22  7:32     ` Qi Zheng
2023-02-22 19:58       ` Kirill Tkhai
2023-02-23  4:36         ` Qi Zheng
2023-02-21 21:43   ` Kirill Tkhai
2023-02-22  8:16     ` Qi Zheng
2023-02-22  8:21       ` Qi Zheng
2023-02-22 20:05         ` Kirill Tkhai
2023-02-23  4:37           ` Qi Zheng [this message]
2023-02-20  9:16 ` [PATCH 3/5] mm: shrinkers: make count and scan in shrinker debugfs lockless Qi Zheng
2023-02-20 13:22   ` kernel test robot
2023-02-20  9:16 ` [PATCH 4/5] mm: vmscan: remove shrinker_rwsem from synchronize_shrinkers() Qi Zheng
2023-02-20  9:16 ` [PATCH 5/5] mm: shrinkers: convert shrinker_rwsem to mutex Qi Zheng

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ee7852cc-e37e-5602-7729-5bb1500a1f2a@bytedance.com \
    --to=zhengqi.arch@bytedance.com \
    --cc=akpm@linux-foundation.org \
    --cc=dave@stgolabs.net \
    --cc=david@redhat.com \
    --cc=hannes@cmpxchg.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@kernel.org \
    --cc=muchun.song@linux.dev \
    --cc=paulmck@kernel.org \
    --cc=penguin-kernel@I-love.SAKURA.ne.jp \
    --cc=roman.gushchin@linux.dev \
    --cc=shakeelb@google.com \
    --cc=shy828301@gmail.com \
    --cc=sultan@kerneltoast.com \
    --cc=tkhai@ya.ru \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.