From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
To: mhocko@kernel.org
Cc: rientjes@google.com, akpm@linux-foundation.org,
torvalds@linux-foundation.org, hannes@cmpxchg.org,
mgorman@suse.de, hillf.zj@alibaba-inc.com,
kamezawa.hiroyu@jp.fujitsu.com, linux-mm@kvack.org
Subject: Re: How to handle infinite too_many_isolated() loop (for OOM detection rework v4) ?
Date: Thu, 11 Feb 2016 16:06:27 +0900 [thread overview]
Message-ID: <201602111606.IIG81724.QOLFJOSMtFHOFV@I-love.SAKURA.ne.jp> (raw)
In-Reply-To: <201602092349.ACG81273.OSVtMJQHLOFOFF@I-love.SAKURA.ne.jp>
Tetsuo Handa wrote:
> The result is that, we have no TIF_MEMDIE tasks but nobody is calling
> out_of_memory(). That is, OOM livelock without invoking the OOM killer.
> They seem to be waiting at congestion_wait() from too_many_isolated()
> loop called from shrink_inactive_list() because nobody can make forward
> progress. I think we must not wait forever at too_many_isolated() loop.
I used delta patch shown below for confirming that they are actually
waiting at congestion_wait() from too_many_isolated() loop called from
shrink_inactive_list().
---------- delta patch (for linux-next-20160209 + kmallocwd) ----------
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 0aeff29..e954ac3 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1400,6 +1400,7 @@ struct memalloc_info {
* bit 0: Will be reported as OOM victim.
* bit 1: Will be reported as dying task.
* bit 2: Will be reported as stalling task.
+ * bit 3: Will be reported as exiting task.
*/
u8 type;
/* Started time in jiffies as of valid == 1. */
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index 745a78c..d804d7e 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -17,6 +17,7 @@
#include <linux/sysctl.h>
#include <linux/utsname.h>
#include <linux/oom.h> /* out_of_memory_count */
+#include <linux/console.h> /* console_trylock()/console_unlock() */
#include <trace/events/sched.h>
/*
@@ -153,10 +154,24 @@ static bool is_stalling_task(const struct task_struct *task,
return time_after_eq(expire, memalloc.start);
}
+static bool wait_console_flushed(unsigned int max_wait)
+{
+ while (1) {
+ if (console_trylock()) {
+ console_unlock();
+ return true;
+ }
+ if (max_wait--)
+ schedule_timeout_interruptible(1);
+ else
+ return false;
+ }
+}
+
/* Check for memory allocation stalls. */
static void check_memalloc_stalling_tasks(unsigned long timeout)
{
- char buf[128];
+ char buf[256];
struct task_struct *g, *p;
unsigned long now;
unsigned long expire;
@@ -205,8 +220,9 @@ static void check_memalloc_stalling_tasks(unsigned long timeout)
preempt_enable();
if (!stalling_tasks)
return;
+ wait_console_flushed(10);
/* Report stalling tasks, dying and victim tasks. */
- pr_warn("MemAlloc-Info: %u stalling task, %u dying task, %u exiting task, %u victim task. oom_count=%u\n",
+ pr_warn("MemAlloc-Info: stalling=%u dying=%u exiting=%u, victim=%u oom_count=%u\n",
stalling_tasks, sigkill_pending, exiting_tasks, memdie_pending, out_of_memory_count);
cond_resched();
preempt_disable();
@@ -240,15 +256,14 @@ static void check_memalloc_stalling_tasks(unsigned long timeout)
* Victim tasks get pending SIGKILL removed before arriving at
* do_exit(). Therefore, print " exiting" instead for " dying".
*/
- pr_warn("MemAlloc: %s(%u)%s%s%s%s%s\n", p->comm, p->pid,
- (type & 4) ? buf : "",
+ pr_warn("MemAlloc: %s(%u) flags=0x%x%s%s%s%s%s\n", p->comm,
+ p->pid, p->flags, (type & 4) ? buf : "",
(p->state & TASK_UNINTERRUPTIBLE) ?
" uninterruptible" : "",
(type & 8) ? " exiting" : "",
(type & 2) ? " dying" : "",
(type & 1) ? " victim" : "");
sched_show_task(p);
- debug_show_held_locks(p);
/*
* Since there could be thousands of tasks to report, we always
* sleep and try to flush printk() buffer after each report, in
@@ -262,7 +277,8 @@ static void check_memalloc_stalling_tasks(unsigned long timeout)
get_task_struct(p);
rcu_read_unlock();
preempt_enable();
- schedule_timeout_interruptible(1);
+ cond_resched();
+ wait_console_flushed(1);
preempt_disable();
rcu_read_lock();
can_cont = pid_alive(g) && pid_alive(p);
@@ -278,6 +294,8 @@ static void check_memalloc_stalling_tasks(unsigned long timeout)
show_mem(0);
/* Show workqueue state. */
show_workqueue_state();
+ /* Show lock information. (SysRq-d) */
+ debug_show_all_locks();
}
#endif /* CONFIG_DETECT_MEMALLOC_STALL_TASK */
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 18b3767..0d94523 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1576,6 +1576,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
int file = is_file_lru(lru);
struct zone *zone = lruvec_zone(lruvec);
struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
+ unsigned char counter = 0;
while (unlikely(too_many_isolated(zone, file, sc))) {
congestion_wait(BLK_RW_ASYNC, HZ/10);
@@ -1583,6 +1584,18 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
/* We are about to die and free our memory. Return now. */
if (fatal_signal_pending(current))
return SWAP_CLUSTER_MAX;
+ if (!++counter) {
+ if (file)
+ printk(KERN_WARNING "zone=%s NR_INACTIVE_FILE=%lu NR_ISOLATED_FILE=%lu\n",
+ zone->name,
+ zone_page_state(zone, NR_INACTIVE_FILE),
+ zone_page_state(zone, NR_ISOLATED_FILE));
+ else
+ printk(KERN_WARNING "zone=%s NR_INACTIVE_ANON=%lu NR_ISOLATED_ANON=%lu\n",
+ zone->name,
+ zone_page_state(zone, NR_INACTIVE_ANON),
+ zone_page_state(zone, NR_ISOLATED_ANON));
+ }
}
lru_add_drain();
---------- delta patch (for linux-next-20160209 + kmallocwd) ----------
Complete log is at http://I-love.SAKURA.ne.jp/tmp/serial-20160211.txt.xz .
---------- console log ----------
[ 101.471027] MemAlloc-Info: stalling=46 dying=2 exiting=0, victim=0 oom_count=182
[ 117.187128] zone=DMA NR_INACTIVE_FILE=4 NR_ISOLATED_FILE=19
[ 121.199151] MemAlloc-Info: stalling=50 dying=2 exiting=0, victim=0 oom_count=182
[ 123.777398] zone=DMA NR_INACTIVE_FILE=4 NR_ISOLATED_FILE=19
[ 141.184386] MemAlloc-Info: stalling=50 dying=2 exiting=0, victim=0 oom_count=182
[ 142.944292] zone=DMA NR_INACTIVE_FILE=4 NR_ISOLATED_FILE=19
[ 161.188356] MemAlloc-Info: stalling=51 dying=2 exiting=0, victim=0 oom_count=182
[ 163.541083] zone=DMA NR_INACTIVE_FILE=4 NR_ISOLATED_FILE=19
[ 181.211690] MemAlloc-Info: stalling=51 dying=2 exiting=0, victim=0 oom_count=182
[ 189.423559] zone=DMA NR_INACTIVE_FILE=4 NR_ISOLATED_FILE=19
[ 201.404914] MemAlloc-Info: stalling=51 dying=2 exiting=0, victim=0 oom_count=182
[ 204.456970] zone=DMA NR_INACTIVE_FILE=4 NR_ISOLATED_FILE=19
[ 213.753982] MemAlloc-Info: stalling=53 dying=2 exiting=0, victim=0 oom_count=182
[ 215.117586] zone=DMA NR_INACTIVE_FILE=4 NR_ISOLATED_FILE=19
---------- console log ----------
The zone which causes this silent hang up is not DMA32 but DMA. Nobody except
kswapd can escape this too_many_isolated() loop because isolated > inactive is
always true. Unless kswapd performs operations for making isolated > inactive
false, we will silently hang up. And I think kswapd did nothing for this zone.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2016-02-11 7:06 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-02-09 14:49 How to handle infinite too_many_isolated() loop (for OOM detection rework v4) ? Tetsuo Handa
2016-02-11 7:06 ` Tetsuo Handa [this message]
2016-02-11 11:45 ` Tetsuo Handa
2016-02-11 22:59 ` Dave Chinner
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=201602111606.IIG81724.QOLFJOSMtFHOFV@I-love.SAKURA.ne.jp \
--to=penguin-kernel@i-love.sakura.ne.jp \
--cc=akpm@linux-foundation.org \
--cc=hannes@cmpxchg.org \
--cc=hillf.zj@alibaba-inc.com \
--cc=kamezawa.hiroyu@jp.fujitsu.com \
--cc=linux-mm@kvack.org \
--cc=mgorman@suse.de \
--cc=mhocko@kernel.org \
--cc=rientjes@google.com \
--cc=torvalds@linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).