From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
To: mhocko@kernel.org
Cc: rientjes@google.com, oleg@redhat.com,
torvalds@linux-foundation.org, kwalker@redhat.com, cl@linux.com,
akpm@linux-foundation.org, hannes@cmpxchg.org,
vdavydov@parallels.com, linux-mm@kvack.org,
linux-kernel@vger.kernel.org, skozina@redhat.com
Subject: Silent hang up caused by pages being not scanned?
Date: Tue, 13 Oct 2015 00:25:53 +0900 [thread overview]
Message-ID: <201510130025.EJF21331.FFOQJtVOMLFHSO@I-love.SAKURA.ne.jp> (raw)
In-Reply-To: <201510121543.EJF21858.LtJFHOOOSQVMFF@I-love.SAKURA.ne.jp>
Tetsuo Handa wrote:
> Uptime between 101 and 300 is a silent hang up (i.e. no OOM killer messages,
> no SIGKILL pending tasks, no TIF_MEMDIE tasks) which I solved using SysRq-f
> at uptime = 289. I don't know the reason of this silent hang up, but the
> memory unzapping kernel thread will not help because there is no OOM victim.
>
> ----------
> [ 101.438951] MemAlloc-Info: 10 stalling task, 0 dying task, 0 victim task.
> (...snipped...)
> [ 111.817922] MemAlloc-Info: 12 stalling task, 0 dying task, 0 victim task.
> (...snipped...)
> [ 122.281828] MemAlloc-Info: 13 stalling task, 0 dying task, 0 victim task.
> (...snipped...)
> [ 132.793724] MemAlloc-Info: 14 stalling task, 0 dying task, 0 victim task.
> (...snipped...)
> [ 143.336154] MemAlloc-Info: 16 stalling task, 0 dying task, 0 victim task.
> (...snipped...)
> [ 289.343187] sysrq: SysRq : Manual OOM execution
> (...snipped...)
> [ 292.065650] MemAlloc-Info: 16 stalling task, 0 dying task, 0 victim task.
> (...snipped...)
> [ 302.590736] kworker/3:2 invoked oom-killer: gfp_mask=0x24000c0, order=-1, oom_score_adj=0
> (...snipped...)
> [ 302.690047] MemAlloc-Info: 4 stalling task, 0 dying task, 0 victim task.
> ----------
I examined this hang up using additional debug printk() patch. And it was
observed that when this silent hang up occurs, zone_reclaimable() called from
shrink_zones() called from a __GFP_FS memory allocation request is returning
true forever. Since the __GFP_FS memory allocation request can never call
out_of_memory() due to did_some_progree > 0, the system will silently hang up
with 100% CPU usage.
----------
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 0473eec..fda0bb5 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2821,6 +2821,8 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
}
#endif /* CONFIG_COMPACTION */
+pid_t dump_target_pid;
+
/* Perform direct synchronous page reclaim */
static int
__perform_reclaim(gfp_t gfp_mask, unsigned int order,
@@ -2847,6 +2849,9 @@ __perform_reclaim(gfp_t gfp_mask, unsigned int order,
cond_resched();
+ if (dump_target_pid == current->pid)
+ printk(KERN_INFO "__perform_reclaim returned %u at line %u\n",
+ progress, __LINE__);
return progress;
}
@@ -3007,6 +3012,7 @@ static int malloc_watchdog(void *unused)
unsigned int memdie_pending;
unsigned int stalling_tasks;
u8 index;
+ pid_t pid;
not_stalling: /* Healty case. */
/*
@@ -3025,12 +3031,16 @@ static int malloc_watchdog(void *unused)
* and stop_memalloc_timer() within timeout duration.
*/
if (likely(!memalloc_counter[index]))
+ {
+ dump_target_pid = 0;
goto not_stalling;
+ }
maybe_stalling: /* Maybe something is wrong. Let's check. */
/* First, report whether there are SIGKILL tasks and/or OOM victims. */
sigkill_pending = 0;
memdie_pending = 0;
stalling_tasks = 0;
+ pid = 0;
preempt_disable();
rcu_read_lock();
for_each_process_thread(g, p) {
@@ -3062,8 +3072,11 @@ static int malloc_watchdog(void *unused)
(fatal_signal_pending(p) ? "-dying" : ""),
p->comm, p->pid, m->gfp, m->order, spent);
show_stack(p, NULL);
+ if (!pid && (m->gfp & __GFP_FS))
+ pid = p->pid;
}
spin_unlock(&memalloc_list_lock);
+ dump_target_pid = -pid;
/* Wait until next timeout duration. */
schedule_timeout_interruptible(timeout);
if (memalloc_counter[index])
@@ -3155,6 +3168,9 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
goto nopage;
retry:
+ if (dump_target_pid == -current->pid)
+ dump_target_pid = -dump_target_pid;
+
if (gfp_mask & __GFP_KSWAPD_RECLAIM)
wake_all_kswapds(order, ac);
@@ -3280,6 +3296,11 @@ retry:
goto noretry;
/* Keep reclaiming pages as long as there is reasonable progress */
+ if (dump_target_pid == current->pid) {
+ printk(KERN_INFO "did_some_progress=%lu at line %u\n",
+ did_some_progress, __LINE__);
+ dump_target_pid = 0;
+ }
pages_reclaimed += did_some_progress;
if ((did_some_progress && order <= PAGE_ALLOC_COSTLY_ORDER) ||
((gfp_mask & __GFP_REPEAT) && pages_reclaimed < (1 << order))) {
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 27d580b..cb0c22e 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2527,6 +2527,8 @@ static inline bool compaction_ready(struct zone *zone, int order)
return watermark_ok;
}
+extern pid_t dump_target_pid;
+
/*
* This is the direct reclaim path, for page-allocating processes. We only
* try to reclaim pages from zones which will satisfy the caller's allocation
@@ -2619,16 +2621,41 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
sc->nr_reclaimed += nr_soft_reclaimed;
sc->nr_scanned += nr_soft_scanned;
if (nr_soft_reclaimed)
+ {
+ if (dump_target_pid == current->pid)
+ printk(KERN_INFO "nr_soft_reclaimed=%lu at line %u\n",
+ nr_soft_reclaimed, __LINE__);
reclaimable = true;
+ }
/* need some check for avoid more shrink_zone() */
}
if (shrink_zone(zone, sc, zone_idx(zone) == classzone_idx))
+ {
+ if (dump_target_pid == current->pid)
+ printk(KERN_INFO "shrink_zone returned 1 at line %u\n",
+ __LINE__);
reclaimable = true;
+ }
if (global_reclaim(sc) &&
!reclaimable && zone_reclaimable(zone))
+ {
+ if (dump_target_pid == current->pid) {
+ printk(KERN_INFO "zone_reclaimable returned 1 at line %u\n",
+ __LINE__);
+ printk(KERN_INFO "(ACTIVE_FILE=%lu+INACTIVE_FILE=%lu",
+ zone_page_state(zone, NR_ACTIVE_FILE),
+ zone_page_state(zone, NR_INACTIVE_FILE));
+ if (get_nr_swap_pages() > 0)
+ printk(KERN_CONT "+ACTIVE_ANON=%lu+INACTIVE_ANON=%lu",
+ zone_page_state(zone, NR_ACTIVE_ANON),
+ zone_page_state(zone, NR_INACTIVE_ANON));
+ printk(KERN_CONT ") * 6 > PAGES_SCANNED=%lu\n",
+ zone_page_state(zone, NR_PAGES_SCANNED));
+ }
reclaimable = true;
+ }
}
/*
@@ -2674,6 +2701,9 @@ retry:
sc->priority);
sc->nr_scanned = 0;
zones_reclaimable = shrink_zones(zonelist, sc);
+ if (dump_target_pid == current->pid)
+ printk(KERN_INFO "shrink_zones returned %u at line %u\n",
+ zones_reclaimable, __LINE__);
total_scanned += sc->nr_scanned;
if (sc->nr_reclaimed >= sc->nr_to_reclaim)
@@ -2707,11 +2737,21 @@ retry:
delayacct_freepages_end();
if (sc->nr_reclaimed)
+ {
+ if (dump_target_pid == current->pid)
+ printk(KERN_INFO "sc->nr_reclaimed=%lu at line %u\n",
+ sc->nr_reclaimed, __LINE__);
return sc->nr_reclaimed;
+ }
/* Aborted reclaim to try compaction? don't OOM, then */
if (sc->compaction_ready)
+ {
+ if (dump_target_pid == current->pid)
+ printk(KERN_INFO "sc->compaction_ready=%u at line %u\n",
+ sc->compaction_ready, __LINE__);
return 1;
+ }
/* Untapped cgroup reserves? Don't OOM, retry. */
if (!sc->may_thrash) {
@@ -2720,6 +2760,9 @@ retry:
goto retry;
}
+ if (dump_target_pid == current->pid)
+ printk(KERN_INFO "zones_reclaimable=%u at line %u\n",
+ zones_reclaimable, __LINE__);
/* Any of the zones still reclaimable? Don't OOM. */
if (zones_reclaimable)
return 1;
@@ -2875,7 +2918,12 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
* point.
*/
if (throttle_direct_reclaim(gfp_mask, zonelist, nodemask))
+ {
+ if (dump_target_pid == current->pid)
+ printk(KERN_INFO "throttle_direct_reclaim returned 1 at line %u\n",
+ __LINE__);
return 1;
+ }
trace_mm_vmscan_direct_reclaim_begin(order,
sc.may_writepage,
@@ -2885,6 +2933,9 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
trace_mm_vmscan_direct_reclaim_end(nr_reclaimed);
+ if (dump_target_pid == current->pid)
+ printk(KERN_INFO "do_try_to_free_pages returned %lu at line %u\n",
+ nr_reclaimed, __LINE__);
return nr_reclaimed;
}
----------
What is strange, the values printed by this debug printk() patch did not
change as time went by. Thus, I think that this is not a problem of lack of
CPU time for scanning pages. I suspect that there is a bug that nobody is
scanning pages.
----------
[ 66.821450] zone_reclaimable returned 1 at line 2646
[ 66.823020] (ACTIVE_FILE=26+INACTIVE_FILE=10) * 6 > PAGES_SCANNED=32
[ 66.824935] shrink_zones returned 1 at line 2706
[ 66.826392] zones_reclaimable=1 at line 2765
[ 66.827865] do_try_to_free_pages returned 1 at line 2938
[ 67.102322] __perform_reclaim returned 1 at line 2854
[ 67.103968] did_some_progress=1 at line 3301
(...snipped...)
[ 281.439977] zone_reclaimable returned 1 at line 2646
[ 281.439977] (ACTIVE_FILE=26+INACTIVE_FILE=10) * 6 > PAGES_SCANNED=32
[ 281.439978] shrink_zones returned 1 at line 2706
[ 281.439978] zones_reclaimable=1 at line 2765
[ 281.439979] do_try_to_free_pages returned 1 at line 2938
[ 281.439979] __perform_reclaim returned 1 at line 2854
[ 281.439980] did_some_progress=1 at line 3301
----------
Complete log is at http://I-love.SAKURA.ne.jp/tmp/serial-20151013.txt.xz
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2015-10-12 15:26 UTC|newest]
Thread overview: 110+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-09-17 17:59 [PATCH] mm/oom_kill.c: don't kill TASK_UNINTERRUPTIBLE tasks Kyle Walker
2015-09-17 19:22 ` Oleg Nesterov
2015-09-18 15:41 ` Christoph Lameter
2015-09-18 16:24 ` Oleg Nesterov
2015-09-18 16:39 ` Tetsuo Handa
2015-09-18 16:54 ` Oleg Nesterov
2015-09-18 17:00 ` Christoph Lameter
2015-09-18 19:07 ` Oleg Nesterov
2015-09-18 19:19 ` Christoph Lameter
2015-09-18 21:28 ` Kyle Walker
2015-09-18 22:07 ` Christoph Lameter
2015-09-19 8:32 ` Michal Hocko
2015-09-19 14:33 ` Tetsuo Handa
2015-09-19 15:51 ` Michal Hocko
2015-09-21 23:33 ` David Rientjes
2015-09-22 5:33 ` Tetsuo Handa
2015-09-22 23:32 ` David Rientjes
2015-09-23 12:03 ` Kyle Walker
2015-09-24 11:50 ` Tetsuo Handa
2015-09-19 14:44 ` Oleg Nesterov
2015-09-21 23:27 ` David Rientjes
2015-09-19 8:25 ` Michal Hocko
2015-09-19 8:22 ` Michal Hocko
2015-09-21 23:08 ` David Rientjes
2015-09-19 15:03 ` can't oom-kill zap the victim's memory? Oleg Nesterov
2015-09-19 15:10 ` Oleg Nesterov
2015-09-19 15:58 ` Michal Hocko
2015-09-20 13:16 ` Oleg Nesterov
2015-09-19 22:24 ` Linus Torvalds
2015-09-19 22:54 ` Raymond Jennings
2015-09-19 23:00 ` Raymond Jennings
2015-09-19 23:13 ` Linus Torvalds
2015-09-20 9:33 ` Michal Hocko
2015-09-20 13:06 ` Oleg Nesterov
2015-09-20 12:56 ` Oleg Nesterov
2015-09-20 18:05 ` Linus Torvalds
2015-09-20 18:21 ` Raymond Jennings
2015-09-20 18:23 ` Raymond Jennings
2015-09-20 19:07 ` Raymond Jennings
2015-09-21 13:57 ` Oleg Nesterov
2015-09-21 13:44 ` Oleg Nesterov
2015-09-21 14:24 ` Michal Hocko
2015-09-21 15:32 ` Oleg Nesterov
2015-09-21 16:12 ` Michal Hocko
2015-09-22 16:06 ` Oleg Nesterov
2015-09-22 23:04 ` David Rientjes
2015-09-23 20:59 ` Michal Hocko
2015-09-24 21:15 ` David Rientjes
2015-09-25 9:35 ` Michal Hocko
2015-09-25 16:14 ` Tetsuo Handa
2015-09-28 16:18 ` Tetsuo Handa
2015-09-28 22:28 ` David Rientjes
2015-10-02 12:36 ` Michal Hocko
2015-10-02 19:01 ` Linus Torvalds
2015-10-05 14:44 ` Michal Hocko
2015-10-07 5:16 ` Vlastimil Babka
2015-10-07 10:43 ` Tetsuo Handa
2015-10-08 9:40 ` Vlastimil Babka
2015-10-06 7:55 ` Eric W. Biederman
2015-10-06 8:49 ` Linus Torvalds
2015-10-06 8:55 ` Linus Torvalds
2015-10-06 14:52 ` Eric W. Biederman
2015-10-03 6:02 ` Can't we use timeout based OOM warning/killing? Tetsuo Handa
2015-10-06 14:51 ` Tetsuo Handa
2015-10-12 6:43 ` Tetsuo Handa
2015-10-12 15:25 ` Tetsuo Handa [this message]
2015-10-12 21:23 ` Silent hang up caused by pages being not scanned? Linus Torvalds
2015-10-13 12:21 ` Tetsuo Handa
2015-10-13 16:37 ` Linus Torvalds
2015-10-14 12:21 ` Tetsuo Handa
2015-10-15 13:14 ` Michal Hocko
2015-10-16 15:57 ` Michal Hocko
2015-10-16 18:34 ` Linus Torvalds
2015-10-16 18:49 ` Tetsuo Handa
2015-10-19 12:57 ` Michal Hocko
2015-10-19 12:53 ` Michal Hocko
2015-10-13 13:32 ` Michal Hocko
2015-10-13 16:19 ` Tetsuo Handa
2015-10-14 13:22 ` Michal Hocko
2015-10-14 14:38 ` Tetsuo Handa
2015-10-14 14:59 ` Michal Hocko
2015-10-14 15:06 ` Tetsuo Handa
2015-10-26 11:44 ` Newbie's question: memory allocation when reclaiming memory Tetsuo Handa
2015-11-05 8:46 ` Vlastimil Babka
2015-10-06 15:25 ` Can't we use timeout based OOM warning/killing? Linus Torvalds
2015-10-08 15:33 ` Tetsuo Handa
2015-10-10 12:50 ` Tetsuo Handa
2015-09-28 22:24 ` can't oom-kill zap the victim's memory? David Rientjes
2015-09-29 7:57 ` Tetsuo Handa
2015-09-29 22:56 ` David Rientjes
2015-09-30 4:25 ` Tetsuo Handa
2015-09-30 10:21 ` Tetsuo Handa
2015-09-30 21:11 ` David Rientjes
2015-10-01 12:13 ` Tetsuo Handa
2015-10-01 14:48 ` Michal Hocko
2015-10-02 13:06 ` Tetsuo Handa
2015-10-06 18:45 ` Oleg Nesterov
2015-10-07 11:03 ` Tetsuo Handa
2015-10-07 12:00 ` Oleg Nesterov
2015-10-08 14:04 ` Michal Hocko
2015-10-08 14:01 ` Michal Hocko
2015-09-21 16:51 ` Tetsuo Handa
2015-09-22 12:43 ` Oleg Nesterov
2015-09-22 14:30 ` Tetsuo Handa
2015-09-22 14:45 ` Oleg Nesterov
2015-09-21 23:42 ` David Rientjes
2015-09-21 16:55 ` Linus Torvalds
2015-09-20 14:50 ` Tetsuo Handa
2015-09-20 14:55 ` Oleg Nesterov
2015-10-14 8:03 Silent hang up caused by pages being not scanned? Hillf Danton
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=201510130025.EJF21331.FFOQJtVOMLFHSO@I-love.SAKURA.ne.jp \
--to=penguin-kernel@i-love.sakura.ne.jp \
--cc=akpm@linux-foundation.org \
--cc=cl@linux.com \
--cc=hannes@cmpxchg.org \
--cc=kwalker@redhat.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mhocko@kernel.org \
--cc=oleg@redhat.com \
--cc=rientjes@google.com \
--cc=skozina@redhat.com \
--cc=torvalds@linux-foundation.org \
--cc=vdavydov@parallels.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).