From: Andrey Ryabinin <aryabinin@virtuozzo.com>
To: Michal Hocko <mhocko@kernel.org>,
Thomas Lindroth <thomas.lindroth@gmail.com>
Cc: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>,
Andrey Ryabinin <aryabinin@virtuozzo.com>,
linux-mm@kvack.org
Subject: Re: [BUG] kmemcg limit defeats __GFP_NOFAIL allocation
Date: Fri, 6 Sep 2019 13:54:30 +0300 [thread overview]
Message-ID: <940ea5a4-b580-34f8-2e5f-0bd2534b7426@virtuozzo.com> (raw)
In-Reply-To: <20190906072711.GD14491@dhcp22.suse.cz>
On 9/6/19 10:27 AM, Michal Hocko wrote:
> On Fri 06-09-19 01:11:53, Thomas Lindroth wrote:
>> On 9/4/19 6:39 PM, Tetsuo Handa wrote:
>>> On 2019/09/04 23:29, Michal Hocko wrote:
>>>> Ohh, right. We are trying to uncharge something that hasn't been charged
>>>> because page_counter_try_charge has failed. So the fix needs to be more
>>>> involved. Sorry, I should have realized that.
>>>
>>> OK. Survived the test. Thomas, please try.
>>>
>>>> ---
>>>> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
>>>> index 9ec5e12486a7..e18108b2b786 100644
>>>> --- a/mm/memcontrol.c
>>>> +++ b/mm/memcontrol.c
>>>> @@ -2821,6 +2821,16 @@ int __memcg_kmem_charge_memcg(struct page *page, gfp_t gfp, int order,
>>>> if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) &&
>>>> !page_counter_try_charge(&memcg->kmem, nr_pages, &counter)) {
>>>> +
>>>> + /*
>>>> + * Enforce __GFP_NOFAIL allocation because callers are not
>>>> + * prepared to see failures and likely do not have any failure
>>>> + * handling code.
>>>> + */
>>>> + if (gfp & __GFP_NOFAIL) {
>>>> + page_counter_charge(&memcg->kmem, nr_pages);
>>>> + return 0;
>>>> + }
>>>> cancel_charge(memcg, nr_pages);
>>>> return -ENOMEM;
>>>> }
>>>>
>>
>> I tried the patch with 5.2.11 and wasn't able to trigger any null pointer
>> deref crashes with it. Testing is tricky because the OOM killer will still
>> run and eventually kill bash and whatever runs in the cgroup.
>
> Yeah, this is unfortunate but also unfixable I am afraid.
I think there are two possible ways to fix this. If we decide to keep kmem.limit_in_bytes broken,
than we can just always bypass limit. Also we could add something like pr_warn_once("kmem limit doesn't work");
when user changes kmem.limit_in_bytes
Or we can fix kmem.limit_in_bytes like this:
---
mm/memcontrol.c | 76 +++++++++++++++++++++++++++++++------------------
1 file changed, 48 insertions(+), 28 deletions(-)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 2d1d598d9554..71b9065e4b31 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1314,7 +1314,7 @@ void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
* Returns the maximum amount of memory @mem can be charged with, in
* pages.
*/
-static unsigned long mem_cgroup_margin(struct mem_cgroup *memcg)
+static unsigned long mem_cgroup_margin(struct mem_cgroup *memcg, bool kmem)
{
unsigned long margin = 0;
unsigned long count;
@@ -1334,6 +1334,15 @@ static unsigned long mem_cgroup_margin(struct mem_cgroup *memcg)
margin = 0;
}
+ if (kmem && margin) {
+ count = page_counter_read(&memcg->kmem);
+ limit = READ_ONCE(memcg->kmem.max);
+ if (count <= limit)
+ margin = min(margin, limit - count);
+ else
+ margin = 0;
+ }
+
return margin;
}
@@ -2505,7 +2514,7 @@ void mem_cgroup_handle_over_high(void)
}
static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
- unsigned int nr_pages)
+ unsigned int nr_pages, bool kmem_charge)
{
unsigned int batch = max(MEMCG_CHARGE_BATCH, nr_pages);
int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
@@ -2519,21 +2528,42 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
if (mem_cgroup_is_root(memcg))
return 0;
retry:
- if (consume_stock(memcg, nr_pages))
+ if (consume_stock(memcg, nr_pages)) {
+ if (kmem_charge && !page_counter_try_charge(&memcg->kmem,
+ nr_pages, &counter)) {
+ refill_stock(memcg, nr_pages);
+ goto charge;
+ }
return 0;
+ }
+charge:
+ mem_over_limit = NULL;
if (!do_memsw_account() ||
page_counter_try_charge(&memcg->memsw, batch, &counter)) {
- if (page_counter_try_charge(&memcg->memory, batch, &counter))
- goto done_restock;
- if (do_memsw_account())
- page_counter_uncharge(&memcg->memsw, batch);
- mem_over_limit = mem_cgroup_from_counter(counter, memory);
+ if (!page_counter_try_charge(&memcg->memory, batch, &counter)) {
+ if (do_memsw_account())
+ page_counter_uncharge(&memcg->memsw, batch);
+ mem_over_limit = mem_cgroup_from_counter(counter, memory);
+ }
} else {
mem_over_limit = mem_cgroup_from_counter(counter, memsw);
may_swap = false;
}
+ if (!mem_over_limit && kmem_charge) {
+ if (!page_counter_try_charge(&memcg->kmem, nr_pages, &counter)) {
+ may_swap = false;
+ mem_over_limit = mem_cgroup_from_counter(counter, kmem);
+ page_counter_uncharge(&memcg->memory, batch);
+ if (do_memsw_account())
+ page_counter_uncharge(&memcg->memsw, batch);
+ }
+ }
+
+ if (!mem_over_limit)
+ goto done_restock;
+
if (batch > nr_pages) {
batch = nr_pages;
goto retry;
@@ -2568,7 +2598,7 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
nr_reclaimed = try_to_free_mem_cgroup_pages(mem_over_limit, nr_pages,
gfp_mask, may_swap);
- if (mem_cgroup_margin(mem_over_limit) >= nr_pages)
+ if (mem_cgroup_margin(mem_over_limit, kmem_charge) >= nr_pages)
goto retry;
if (!drained) {
@@ -2637,6 +2667,8 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
page_counter_charge(&memcg->memory, nr_pages);
if (do_memsw_account())
page_counter_charge(&memcg->memsw, nr_pages);
+ if (kmem_charge)
+ page_counter_charge(&memcg->kmem, nr_pages);
css_get_many(&memcg->css, nr_pages);
return 0;
@@ -2943,20 +2975,8 @@ void memcg_kmem_put_cache(struct kmem_cache *cachep)
int __memcg_kmem_charge_memcg(struct page *page, gfp_t gfp, int order,
struct mem_cgroup *memcg)
{
- unsigned int nr_pages = 1 << order;
- struct page_counter *counter;
- int ret;
-
- ret = try_charge(memcg, gfp, nr_pages);
- if (ret)
- return ret;
-
- if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) &&
- !page_counter_try_charge(&memcg->kmem, nr_pages, &counter)) {
- cancel_charge(memcg, nr_pages);
- return -ENOMEM;
- }
- return 0;
+ return try_charge(memcg, gfp, 1 << order,
+ !cgroup_subsys_on_dfl(memory_cgrp_subsys));
}
/**
@@ -5053,7 +5073,7 @@ static int mem_cgroup_do_precharge(unsigned long count)
int ret;
/* Try a single bulk charge without reclaim first, kswapd may wake */
- ret = try_charge(mc.to, GFP_KERNEL & ~__GFP_DIRECT_RECLAIM, count);
+ ret = try_charge(mc.to, GFP_KERNEL & ~__GFP_DIRECT_RECLAIM, count, false);
if (!ret) {
mc.precharge += count;
return ret;
@@ -5061,7 +5081,7 @@ static int mem_cgroup_do_precharge(unsigned long count)
/* Try charges one by one with reclaim, but do not retry */
while (count--) {
- ret = try_charge(mc.to, GFP_KERNEL | __GFP_NORETRY, 1);
+ ret = try_charge(mc.to, GFP_KERNEL | __GFP_NORETRY, 1, false);
if (ret)
return ret;
mc.precharge++;
@@ -6255,7 +6275,7 @@ int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
if (!memcg)
memcg = get_mem_cgroup_from_mm(mm);
- ret = try_charge(memcg, gfp_mask, nr_pages);
+ ret = try_charge(memcg, gfp_mask, nr_pages, false);
css_put(&memcg->css);
out:
@@ -6634,10 +6654,10 @@ bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages)
mod_memcg_state(memcg, MEMCG_SOCK, nr_pages);
- if (try_charge(memcg, gfp_mask, nr_pages) == 0)
+ if (try_charge(memcg, gfp_mask, nr_pages, false) == 0)
return true;
- try_charge(memcg, gfp_mask|__GFP_NOFAIL, nr_pages);
+ try_charge(memcg, gfp_mask|__GFP_NOFAIL, nr_pages, false);
return false;
}
--
2.21.0
next prev parent reply other threads:[~2019-09-06 10:54 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-09-01 20:43 [BUG] Early OOM and kernel NULL pointer dereference in 4.19.69 Thomas Lindroth
2019-09-02 7:16 ` Michal Hocko
2019-09-02 7:27 ` Michal Hocko
2019-09-02 19:34 ` Thomas Lindroth
2019-09-03 7:41 ` Michal Hocko
2019-09-03 12:01 ` Thomas Lindroth
2019-09-03 12:05 ` Andrey Ryabinin
2019-09-03 12:22 ` Michal Hocko
2019-09-03 18:20 ` Thomas Lindroth
2019-09-03 19:36 ` Michal Hocko
[not found] ` <666dbcde-1b8a-9e2d-7d1f-48a117c78ae1@I-love.SAKURA.ne.jp>
2019-09-03 18:25 ` Thomas Lindroth
[not found] ` <4d0eda9a-319d-1a7d-1eed-71da90902367@i-love.sakura.ne.jp>
2019-09-04 11:25 ` [BUG] kmemcg limit defeats __GFP_NOFAIL allocation Michal Hocko
[not found] ` <4d87d770-c110-224f-6c0c-d6fada90417d@i-love.sakura.ne.jp>
2019-09-04 11:59 ` Michal Hocko
[not found] ` <0056063b-46ff-0ebd-ff0d-c96a1f9ae6b1@i-love.sakura.ne.jp>
2019-09-04 14:29 ` Michal Hocko
[not found] ` <405ce28b-c0b4-780c-c883-42d741ec60e0@i-love.sakura.ne.jp>
2019-09-05 23:11 ` Thomas Lindroth
2019-09-06 7:27 ` Michal Hocko
2019-09-06 10:54 ` Andrey Ryabinin [this message]
2019-09-06 11:29 ` Michal Hocko
[not found] ` <20190906125608.32129-1-mhocko@kernel.org>
2019-09-06 18:24 ` [PATCH] memcg, kmem: do not fail __GFP_NOFAIL charges Shakeel Butt
2019-09-09 11:22 ` Michal Hocko
2019-09-11 12:00 ` Michal Hocko
2019-09-11 14:37 ` Andrew Morton
2019-09-11 15:16 ` Michal Hocko
2019-09-13 2:46 ` Shakeel Butt
2019-09-24 10:53 ` Michal Hocko
2019-09-24 23:06 ` Andrew Morton
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=940ea5a4-b580-34f8-2e5f-0bd2534b7426@virtuozzo.com \
--to=aryabinin@virtuozzo.com \
--cc=linux-mm@kvack.org \
--cc=mhocko@kernel.org \
--cc=penguin-kernel@i-love.sakura.ne.jp \
--cc=thomas.lindroth@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).