All of lore.kernel.org
 help / color / mirror / Atom feed
From: Andrey Ryabinin <aryabinin@virtuozzo.com>
To: Michal Hocko <mhocko@kernel.org>,
	Thomas Lindroth <thomas.lindroth@gmail.com>
Cc: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>,
	Andrey Ryabinin <aryabinin@virtuozzo.com>,
	linux-mm@kvack.org
Subject: Re: [BUG] kmemcg limit defeats __GFP_NOFAIL allocation
Date: Fri, 6 Sep 2019 13:54:30 +0300	[thread overview]
Message-ID: <940ea5a4-b580-34f8-2e5f-0bd2534b7426@virtuozzo.com> (raw)
In-Reply-To: <20190906072711.GD14491@dhcp22.suse.cz>



On 9/6/19 10:27 AM, Michal Hocko wrote:
> On Fri 06-09-19 01:11:53, Thomas Lindroth wrote:
>> On 9/4/19 6:39 PM, Tetsuo Handa wrote:
>>> On 2019/09/04 23:29, Michal Hocko wrote:
>>>> Ohh, right. We are trying to uncharge something that hasn't been charged
>>>> because page_counter_try_charge has failed. So the fix needs to be more
>>>> involved. Sorry, I should have realized that.
>>>
>>> OK. Survived the test. Thomas, please try.
>>>
>>>> ---
>>>> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
>>>> index 9ec5e12486a7..e18108b2b786 100644
>>>> --- a/mm/memcontrol.c
>>>> +++ b/mm/memcontrol.c
>>>> @@ -2821,6 +2821,16 @@ int __memcg_kmem_charge_memcg(struct page *page, gfp_t gfp, int order,
>>>>   	if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) &&
>>>>   	    !page_counter_try_charge(&memcg->kmem, nr_pages, &counter)) {
>>>> +
>>>> +		/*
>>>> +		 * Enforce __GFP_NOFAIL allocation because callers are not
>>>> +		 * prepared to see failures and likely do not have any failure
>>>> +		 * handling code.
>>>> +		 */
>>>> +		if (gfp & __GFP_NOFAIL) {
>>>> +			page_counter_charge(&memcg->kmem, nr_pages);
>>>> +			return 0;
>>>> +		}
>>>>   		cancel_charge(memcg, nr_pages);
>>>>   		return -ENOMEM;
>>>>   	}
>>>>
>>
>> I tried the patch with 5.2.11 and wasn't able to trigger any null pointer
>> deref crashes with it. Testing is tricky because the OOM killer will still
>> run and eventually kill bash and whatever runs in the cgroup.
> 
> Yeah, this is unfortunate but also unfixable I am afraid. 

I think there are two possible ways to fix this. If we decide to keep kmem.limit_in_bytes broken,
than we can just always bypass limit. Also we could add something like pr_warn_once("kmem limit doesn't work");
when user changes kmem.limit_in_bytes 


Or we can fix kmem.limit_in_bytes like this:


---
 mm/memcontrol.c | 76 +++++++++++++++++++++++++++++++------------------
 1 file changed, 48 insertions(+), 28 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 2d1d598d9554..71b9065e4b31 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1314,7 +1314,7 @@ void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
  * Returns the maximum amount of memory @mem can be charged with, in
  * pages.
  */
-static unsigned long mem_cgroup_margin(struct mem_cgroup *memcg)
+static unsigned long mem_cgroup_margin(struct mem_cgroup *memcg, bool kmem)
 {
 	unsigned long margin = 0;
 	unsigned long count;
@@ -1334,6 +1334,15 @@ static unsigned long mem_cgroup_margin(struct mem_cgroup *memcg)
 			margin = 0;
 	}
 
+	if (kmem && margin) {
+		count = page_counter_read(&memcg->kmem);
+		limit = READ_ONCE(memcg->kmem.max);
+		if (count <= limit)
+			margin = min(margin, limit - count);
+		else
+			margin = 0;
+	}
+
 	return margin;
 }
 
@@ -2505,7 +2514,7 @@ void mem_cgroup_handle_over_high(void)
 }
 
 static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
-		      unsigned int nr_pages)
+		      unsigned int nr_pages, bool kmem_charge)
 {
 	unsigned int batch = max(MEMCG_CHARGE_BATCH, nr_pages);
 	int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
@@ -2519,21 +2528,42 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
 	if (mem_cgroup_is_root(memcg))
 		return 0;
 retry:
-	if (consume_stock(memcg, nr_pages))
+	if (consume_stock(memcg, nr_pages)) {
+		if (kmem_charge && !page_counter_try_charge(&memcg->kmem,
+							nr_pages, &counter)) {
+			refill_stock(memcg, nr_pages);
+			goto charge;
+		}
 		return 0;
+	}
 
+charge:
+	mem_over_limit = NULL;
 	if (!do_memsw_account() ||
 	    page_counter_try_charge(&memcg->memsw, batch, &counter)) {
-		if (page_counter_try_charge(&memcg->memory, batch, &counter))
-			goto done_restock;
-		if (do_memsw_account())
-			page_counter_uncharge(&memcg->memsw, batch);
-		mem_over_limit = mem_cgroup_from_counter(counter, memory);
+		if (!page_counter_try_charge(&memcg->memory, batch, &counter)) {
+			if (do_memsw_account())
+				page_counter_uncharge(&memcg->memsw, batch);
+			mem_over_limit = mem_cgroup_from_counter(counter, memory);
+		}
 	} else {
 		mem_over_limit = mem_cgroup_from_counter(counter, memsw);
 		may_swap = false;
 	}
 
+	if (!mem_over_limit && kmem_charge) {
+		if (!page_counter_try_charge(&memcg->kmem, nr_pages, &counter)) {
+			may_swap = false;
+			mem_over_limit = mem_cgroup_from_counter(counter, kmem);
+			page_counter_uncharge(&memcg->memory, batch);
+			if (do_memsw_account())
+				page_counter_uncharge(&memcg->memsw, batch);
+		}
+	}
+
+	if (!mem_over_limit)
+		goto done_restock;
+
 	if (batch > nr_pages) {
 		batch = nr_pages;
 		goto retry;
@@ -2568,7 +2598,7 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
 	nr_reclaimed = try_to_free_mem_cgroup_pages(mem_over_limit, nr_pages,
 						    gfp_mask, may_swap);
 
-	if (mem_cgroup_margin(mem_over_limit) >= nr_pages)
+	if (mem_cgroup_margin(mem_over_limit, kmem_charge) >= nr_pages)
 		goto retry;
 
 	if (!drained) {
@@ -2637,6 +2667,8 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
 	page_counter_charge(&memcg->memory, nr_pages);
 	if (do_memsw_account())
 		page_counter_charge(&memcg->memsw, nr_pages);
+	if (kmem_charge)
+		page_counter_charge(&memcg->kmem, nr_pages);
 	css_get_many(&memcg->css, nr_pages);
 
 	return 0;
@@ -2943,20 +2975,8 @@ void memcg_kmem_put_cache(struct kmem_cache *cachep)
 int __memcg_kmem_charge_memcg(struct page *page, gfp_t gfp, int order,
 			    struct mem_cgroup *memcg)
 {
-	unsigned int nr_pages = 1 << order;
-	struct page_counter *counter;
-	int ret;
-
-	ret = try_charge(memcg, gfp, nr_pages);
-	if (ret)
-		return ret;
-
-	if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) &&
-	    !page_counter_try_charge(&memcg->kmem, nr_pages, &counter)) {
-		cancel_charge(memcg, nr_pages);
-		return -ENOMEM;
-	}
-	return 0;
+	return try_charge(memcg, gfp, 1 << order,
+			!cgroup_subsys_on_dfl(memory_cgrp_subsys));
 }
 
 /**
@@ -5053,7 +5073,7 @@ static int mem_cgroup_do_precharge(unsigned long count)
 	int ret;
 
 	/* Try a single bulk charge without reclaim first, kswapd may wake */
-	ret = try_charge(mc.to, GFP_KERNEL & ~__GFP_DIRECT_RECLAIM, count);
+	ret = try_charge(mc.to, GFP_KERNEL & ~__GFP_DIRECT_RECLAIM, count, false);
 	if (!ret) {
 		mc.precharge += count;
 		return ret;
@@ -5061,7 +5081,7 @@ static int mem_cgroup_do_precharge(unsigned long count)
 
 	/* Try charges one by one with reclaim, but do not retry */
 	while (count--) {
-		ret = try_charge(mc.to, GFP_KERNEL | __GFP_NORETRY, 1);
+		ret = try_charge(mc.to, GFP_KERNEL | __GFP_NORETRY, 1, false);
 		if (ret)
 			return ret;
 		mc.precharge++;
@@ -6255,7 +6275,7 @@ int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
 	if (!memcg)
 		memcg = get_mem_cgroup_from_mm(mm);
 
-	ret = try_charge(memcg, gfp_mask, nr_pages);
+	ret = try_charge(memcg, gfp_mask, nr_pages, false);
 
 	css_put(&memcg->css);
 out:
@@ -6634,10 +6654,10 @@ bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages)
 
 	mod_memcg_state(memcg, MEMCG_SOCK, nr_pages);
 
-	if (try_charge(memcg, gfp_mask, nr_pages) == 0)
+	if (try_charge(memcg, gfp_mask, nr_pages, false) == 0)
 		return true;
 
-	try_charge(memcg, gfp_mask|__GFP_NOFAIL, nr_pages);
+	try_charge(memcg, gfp_mask|__GFP_NOFAIL, nr_pages, false);
 	return false;
 }
 
-- 
2.21.0


  reply	other threads:[~2019-09-06 10:54 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-09-01 20:43 [BUG] Early OOM and kernel NULL pointer dereference in 4.19.69 Thomas Lindroth
2019-09-02  7:16 ` Michal Hocko
2019-09-02  7:27   ` Michal Hocko
2019-09-02 19:34   ` Thomas Lindroth
2019-09-03  7:41     ` Michal Hocko
2019-09-03 12:01       ` Thomas Lindroth
2019-09-03 12:05       ` Andrey Ryabinin
2019-09-03 12:22         ` Michal Hocko
2019-09-03 18:20           ` Thomas Lindroth
2019-09-03 19:36             ` Michal Hocko
     [not found] ` <666dbcde-1b8a-9e2d-7d1f-48a117c78ae1@I-love.SAKURA.ne.jp>
2019-09-03 18:25   ` Thomas Lindroth
     [not found]     ` <4d0eda9a-319d-1a7d-1eed-71da90902367@i-love.sakura.ne.jp>
2019-09-04 11:25       ` [BUG] kmemcg limit defeats __GFP_NOFAIL allocation Michal Hocko
     [not found]         ` <4d87d770-c110-224f-6c0c-d6fada90417d@i-love.sakura.ne.jp>
2019-09-04 11:59           ` Michal Hocko
     [not found]         ` <0056063b-46ff-0ebd-ff0d-c96a1f9ae6b1@i-love.sakura.ne.jp>
2019-09-04 14:29           ` Michal Hocko
     [not found]             ` <405ce28b-c0b4-780c-c883-42d741ec60e0@i-love.sakura.ne.jp>
2019-09-05 23:11               ` Thomas Lindroth
2019-09-06  7:27                 ` Michal Hocko
2019-09-06 10:54                   ` Andrey Ryabinin [this message]
2019-09-06 11:29                     ` Michal Hocko
2019-09-06 12:56 ` [PATCH] memcg, kmem: do not fail __GFP_NOFAIL charges Michal Hocko
2019-09-06 18:24   ` Shakeel Butt
2019-09-06 18:24     ` Shakeel Butt
2019-09-09 11:22     ` Michal Hocko
2019-09-11 12:00       ` Michal Hocko
2019-09-11 14:37         ` Andrew Morton
2019-09-11 15:16           ` Michal Hocko
2019-09-13  2:46             ` Shakeel Butt
2019-09-13  2:46               ` Shakeel Butt
2019-09-24 10:53   ` Michal Hocko
2019-09-24 23:06     ` Andrew Morton

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=940ea5a4-b580-34f8-2e5f-0bd2534b7426@virtuozzo.com \
    --to=aryabinin@virtuozzo.com \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@kernel.org \
    --cc=penguin-kernel@i-love.sakura.ne.jp \
    --cc=thomas.lindroth@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.