LKML Archive on lore.kernel.org
 help / color / Atom feed
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
To: mhocko@kernel.org, Vladimir Davydov <vdavydov@virtuozzo.com>,
	Oleg Nesterov <oleg@redhat.com>,
	David Rientjes <rientjes@google.com>
Cc: syzbot <syzbot+bab151e82a4e973fa325@syzkaller.appspotmail.com>,
	cgroups@vger.kernel.org, hannes@cmpxchg.org,
	linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	syzkaller-bugs@googlegroups.com,
	Andrew Morton <akpm@linux-foundation.org>
Subject: Re: WARNING in try_charge
Date: Thu, 9 Aug 2018 22:57:43 +0900
Message-ID: <e2869136-9f59-9ce8-8b9f-f75b157ee31d@I-love.SAKURA.ne.jp> (raw)
In-Reply-To: <0000000000005e979605729c1564@google.com>

From b1f38168f14397c7af9c122cd8207663d96e02ec Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Thu, 9 Aug 2018 22:49:40 +0900
Subject: [PATCH] mm, oom: task_will_free_mem(current) should retry until
 memory reserve fails

Commit 696453e66630ad45 ("mm, oom: task_will_free_mem should skip
oom_reaped tasks") changed to select next OOM victim as soon as
MMF_OOM_SKIP is set. But we don't need to select next OOM victim as
long as ALLOC_OOM allocation can succeed. And syzbot is hitting WARN(1)
caused by this race window [1].

Since memcg OOM case uses forced charge if current thread is killed,
out_of_memory() can return true without selecting next OOM victim.
Therefore, this patch changes task_will_free_mem(current) to ignore
MMF_OOM_SKIP unless ALLOC_OOM allocation failed.

[1] https://syzkaller.appspot.com/bug?id=ea8c7912757d253537375e981b61749b2da69258

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Reported-by: syzbot <syzbot+bab151e82a4e973fa325@syzkaller.appspotmail.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Vladimir Davydov <vdavydov@virtuozzo.com>
Cc: David Rientjes <rientjes@google.com>
---
 include/linux/oom.h | 3 +++
 mm/oom_kill.c       | 8 ++++----
 mm/page_alloc.c     | 7 +++++--
 3 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/include/linux/oom.h b/include/linux/oom.h
index 69864a5..b5abacd 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -38,6 +38,9 @@ struct oom_control {
 	 */
 	const int order;
 
+	/* Did we already try ALLOC_OOM allocation? i*/
+	const bool reserve_tried;
+
 	/* Used by oom implementation, do not set */
 	unsigned long totalpages;
 	struct task_struct *chosen;
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 0e10b86..95453e8 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -782,7 +782,7 @@ static inline bool __task_will_free_mem(struct task_struct *task)
  * Caller has to make sure that task->mm is stable (hold task_lock or
  * it operates on the current).
  */
-static bool task_will_free_mem(struct task_struct *task)
+static bool task_will_free_mem(struct task_struct *task, bool select_new)
 {
 	struct mm_struct *mm = task->mm;
 	struct task_struct *p;
@@ -803,7 +803,7 @@ static bool task_will_free_mem(struct task_struct *task)
 	 * This task has already been drained by the oom reaper so there are
 	 * only small chances it will free some more
 	 */
-	if (test_bit(MMF_OOM_SKIP, &mm->flags))
+	if (test_bit(MMF_OOM_SKIP, &mm->flags) && select_new)
 		return false;
 
 	if (atomic_read(&mm->mm_users) <= 1)
@@ -939,7 +939,7 @@ static void oom_kill_process(struct oom_control *oc, const char *message)
 	 * so it can die quickly
 	 */
 	task_lock(p);
-	if (task_will_free_mem(p)) {
+	if (task_will_free_mem(p, true)) {
 		mark_oom_victim(p);
 		wake_oom_reaper(p);
 		task_unlock(p);
@@ -1069,7 +1069,7 @@ bool out_of_memory(struct oom_control *oc)
 	 * select it.  The goal is to allow it to allocate so that it may
 	 * quickly exit and free its memory.
 	 */
-	if (task_will_free_mem(current)) {
+	if (task_will_free_mem(current, oc->reserve_tried)) {
 		mark_oom_victim(current);
 		wake_oom_reaper(current);
 		return true;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 879b861..03ca29a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3455,7 +3455,7 @@ void warn_alloc(gfp_t gfp_mask, nodemask_t *nodemask, const char *fmt, ...)
 }
 
 static inline struct page *
-__alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
+__alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, bool reserve_tried,
 	const struct alloc_context *ac, unsigned long *did_some_progress)
 {
 	struct oom_control oc = {
@@ -3464,6 +3464,7 @@ void warn_alloc(gfp_t gfp_mask, nodemask_t *nodemask, const char *fmt, ...)
 		.memcg = NULL,
 		.gfp_mask = gfp_mask,
 		.order = order,
+		.reserve_tried = reserve_tried,
 	};
 	struct page *page;
 
@@ -4239,7 +4240,9 @@ bool gfp_pfmemalloc_allowed(gfp_t gfp_mask)
 		goto retry_cpuset;
 
 	/* Reclaim has failed us, start killing things */
-	page = __alloc_pages_may_oom(gfp_mask, order, ac, &did_some_progress);
+	page = __alloc_pages_may_oom(gfp_mask, order, alloc_flags == ALLOC_OOM
+				     || (gfp_mask & __GFP_NOMEMALLOC), ac,
+				     &did_some_progress);
 	if (page)
 		goto got_pg;
 
-- 
1.8.3.1



  parent reply index

Thread overview: 53+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-08-04 13:33 syzbot
2018-08-04 13:45 ` Tetsuo Handa
2018-08-05 11:33   ` Tetsuo Handa
2018-08-05  8:14 ` syzbot
2018-08-06  9:15 ` Michal Hocko
2018-08-06  9:30   ` Dmitry Vyukov
2018-08-06  9:48     ` Michal Hocko
2018-08-06 10:34       ` Dmitry Vyukov
2018-08-06 11:02         ` Michal Hocko
2018-08-06 11:57           ` Dmitry Vyukov
2018-08-06 14:21             ` Michal Hocko
2018-08-06 14:58               ` Dmitry Vyukov
2018-08-06 17:30                 ` Michal Hocko
2018-08-06 17:53                   ` Dmitry Vyukov
2018-08-06 15:07               ` Dmitry Vyukov
2018-08-06 15:31               ` Johannes Weiner
2018-08-06 10:39       ` Dmitry Vyukov
2018-08-06 10:47         ` Tetsuo Handa
2018-08-06 11:09           ` Michal Hocko
2018-08-06 11:27           ` syzbot
2018-08-06 11:32             ` Michal Hocko
2018-08-06 11:58               ` Dmitry Vyukov
2018-08-06 14:41               ` Tetsuo Handa
2018-08-06 14:58                 ` Michal Hocko
2018-08-06 15:12                   ` Tetsuo Handa
2018-08-06 14:54               ` David Howells
2018-08-06 15:04                 ` Tetsuo Handa
2018-08-06 11:00         ` syzbot
2018-08-06 15:32         ` Tetsuo Handa
2018-08-06 15:42           ` syzbot
2018-08-06 16:02             ` Tetsuo Handa
2018-08-06 17:44             ` Michal Hocko
2018-08-06 17:49               ` Dmitry Vyukov
2018-08-06 17:56               ` Michal Hocko
2018-08-06 18:13                 ` Michal Hocko
2018-08-06 18:23                   ` syzbot
2018-08-06 18:55                     ` Michal Hocko
2018-08-06 19:12                       ` syzbot
2018-08-06 19:45                         ` Michal Hocko
2018-08-06 19:46                           ` Michal Hocko
2018-08-07 11:18                       ` Dmitry Vyukov
2018-08-07 11:25                         ` Michal Hocko
2018-08-06 18:39                   ` Michal Hocko
2018-08-06 20:26                 ` Tetsuo Handa
2018-08-06 20:34                   ` Michal Hocko
2018-08-06 20:46                     ` Tetsuo Handa
2018-08-06 20:55                       ` Michal Hocko
2018-08-06 21:50                         ` Tetsuo Handa
2018-08-07 10:19                           ` Tetsuo Handa
2018-08-09 13:57 ` Tetsuo Handa [this message]
2018-08-09 15:07   ` Michal Hocko
2018-08-09 21:05     ` Tetsuo Handa
2018-08-09 15:34   ` Johannes Weiner

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=e2869136-9f59-9ce8-8b9f-f75b157ee31d@I-love.SAKURA.ne.jp \
    --to=penguin-kernel@i-love.sakura.ne.jp \
    --cc=akpm@linux-foundation.org \
    --cc=cgroups@vger.kernel.org \
    --cc=hannes@cmpxchg.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@kernel.org \
    --cc=oleg@redhat.com \
    --cc=rientjes@google.com \
    --cc=syzbot+bab151e82a4e973fa325@syzkaller.appspotmail.com \
    --cc=syzkaller-bugs@googlegroups.com \
    --cc=vdavydov@virtuozzo.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

LKML Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/lkml/0 lkml/git/0.git
	git clone --mirror https://lore.kernel.org/lkml/1 lkml/git/1.git
	git clone --mirror https://lore.kernel.org/lkml/2 lkml/git/2.git
	git clone --mirror https://lore.kernel.org/lkml/3 lkml/git/3.git
	git clone --mirror https://lore.kernel.org/lkml/4 lkml/git/4.git
	git clone --mirror https://lore.kernel.org/lkml/5 lkml/git/5.git
	git clone --mirror https://lore.kernel.org/lkml/6 lkml/git/6.git
	git clone --mirror https://lore.kernel.org/lkml/7 lkml/git/7.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 lkml lkml/ https://lore.kernel.org/lkml \
		linux-kernel@vger.kernel.org
	public-inbox-index lkml

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-kernel


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git