linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
To: mhocko@kernel.org
Cc: rientjes@google.com, oleg@redhat.com,
	torvalds@linux-foundation.org, kwalker@redhat.com, cl@linux.com,
	akpm@linux-foundation.org, hannes@cmpxchg.org,
	vdavydov@parallels.com, linux-mm@kvack.org,
	linux-kernel@vger.kernel.org, skozina@redhat.com
Subject: Silent hang up caused by pages being not scanned?
Date: Tue, 13 Oct 2015 00:25:53 +0900	[thread overview]
Message-ID: <201510130025.EJF21331.FFOQJtVOMLFHSO@I-love.SAKURA.ne.jp> (raw)
In-Reply-To: <201510121543.EJF21858.LtJFHOOOSQVMFF@I-love.SAKURA.ne.jp>

Tetsuo Handa wrote:
> Uptime between 101 and 300 is a silent hang up (i.e. no OOM killer messages,
> no SIGKILL pending tasks, no TIF_MEMDIE tasks) which I solved using SysRq-f
> at uptime = 289. I don't know the reason of this silent hang up, but the
> memory unzapping kernel thread will not help because there is no OOM victim.
> 
> ----------
> [  101.438951] MemAlloc-Info: 10 stalling task, 0 dying task, 0 victim task.
> (...snipped...)
> [  111.817922] MemAlloc-Info: 12 stalling task, 0 dying task, 0 victim task.
> (...snipped...)
> [  122.281828] MemAlloc-Info: 13 stalling task, 0 dying task, 0 victim task.
> (...snipped...)
> [  132.793724] MemAlloc-Info: 14 stalling task, 0 dying task, 0 victim task.
> (...snipped...)
> [  143.336154] MemAlloc-Info: 16 stalling task, 0 dying task, 0 victim task.
> (...snipped...)
> [  289.343187] sysrq: SysRq : Manual OOM execution
> (...snipped...)
> [  292.065650] MemAlloc-Info: 16 stalling task, 0 dying task, 0 victim task.
> (...snipped...)
> [  302.590736] kworker/3:2 invoked oom-killer: gfp_mask=0x24000c0, order=-1, oom_score_adj=0
> (...snipped...)
> [  302.690047] MemAlloc-Info: 4 stalling task, 0 dying task, 0 victim task.
> ----------

I examined this hang up using additional debug printk() patch. And it was
observed that when this silent hang up occurs, zone_reclaimable() called from
shrink_zones() called from a __GFP_FS memory allocation request is returning
true forever. Since the __GFP_FS memory allocation request can never call
out_of_memory() due to did_some_progree > 0, the system will silently hang up
with 100% CPU usage.

----------
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 0473eec..fda0bb5 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2821,6 +2821,8 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
 }
 #endif /* CONFIG_COMPACTION */
 
+pid_t dump_target_pid;
+
 /* Perform direct synchronous page reclaim */
 static int
 __perform_reclaim(gfp_t gfp_mask, unsigned int order,
@@ -2847,6 +2849,9 @@ __perform_reclaim(gfp_t gfp_mask, unsigned int order,
 
 	cond_resched();
 
+	if (dump_target_pid == current->pid)
+		printk(KERN_INFO "__perform_reclaim returned %u at line %u\n",
+		       progress, __LINE__);
 	return progress;
 }
 
@@ -3007,6 +3012,7 @@ static int malloc_watchdog(void *unused)
 	unsigned int memdie_pending;
 	unsigned int stalling_tasks;
 	u8 index;
+	pid_t pid;
 
  not_stalling: /* Healty case. */
 	/*
@@ -3025,12 +3031,16 @@ static int malloc_watchdog(void *unused)
 	 * and stop_memalloc_timer() within timeout duration.
 	 */
 	if (likely(!memalloc_counter[index]))
+	{
+		dump_target_pid = 0;
 		goto not_stalling;
+	}
  maybe_stalling: /* Maybe something is wrong. Let's check. */
 	/* First, report whether there are SIGKILL tasks and/or OOM victims. */
 	sigkill_pending = 0;
 	memdie_pending = 0;
 	stalling_tasks = 0;
+	pid = 0;
 	preempt_disable();
 	rcu_read_lock();
 	for_each_process_thread(g, p) {
@@ -3062,8 +3072,11 @@ static int malloc_watchdog(void *unused)
 			(fatal_signal_pending(p) ? "-dying" : ""),
 			p->comm, p->pid, m->gfp, m->order, spent);
 		show_stack(p, NULL);
+		if (!pid && (m->gfp & __GFP_FS))
+			pid = p->pid;
 	}
 	spin_unlock(&memalloc_list_lock);
+	dump_target_pid = -pid;
 	/* Wait until next timeout duration. */
 	schedule_timeout_interruptible(timeout);
 	if (memalloc_counter[index])
@@ -3155,6 +3168,9 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 		goto nopage;
 
 retry:
+	if (dump_target_pid == -current->pid)
+		dump_target_pid = -dump_target_pid;
+
 	if (gfp_mask & __GFP_KSWAPD_RECLAIM)
 		wake_all_kswapds(order, ac);
 
@@ -3280,6 +3296,11 @@ retry:
 		goto noretry;
 
 	/* Keep reclaiming pages as long as there is reasonable progress */
+	if (dump_target_pid == current->pid) {
+		printk(KERN_INFO "did_some_progress=%lu at line %u\n",
+		       did_some_progress, __LINE__);
+		dump_target_pid = 0;
+	}
 	pages_reclaimed += did_some_progress;
 	if ((did_some_progress && order <= PAGE_ALLOC_COSTLY_ORDER) ||
 	    ((gfp_mask & __GFP_REPEAT) && pages_reclaimed < (1 << order))) {
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 27d580b..cb0c22e 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2527,6 +2527,8 @@ static inline bool compaction_ready(struct zone *zone, int order)
 	return watermark_ok;
 }
 
+extern pid_t dump_target_pid;
+
 /*
  * This is the direct reclaim path, for page-allocating processes.  We only
  * try to reclaim pages from zones which will satisfy the caller's allocation
@@ -2619,16 +2621,41 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
 			sc->nr_reclaimed += nr_soft_reclaimed;
 			sc->nr_scanned += nr_soft_scanned;
 			if (nr_soft_reclaimed)
+			{
+				if (dump_target_pid == current->pid)
+					printk(KERN_INFO "nr_soft_reclaimed=%lu at line %u\n",
+					       nr_soft_reclaimed, __LINE__);
 				reclaimable = true;
+			}
 			/* need some check for avoid more shrink_zone() */
 		}
 
 		if (shrink_zone(zone, sc, zone_idx(zone) == classzone_idx))
+		{
+			if (dump_target_pid == current->pid)
+				printk(KERN_INFO "shrink_zone returned 1 at line %u\n",
+				       __LINE__);
 			reclaimable = true;
+		}
 
 		if (global_reclaim(sc) &&
 		    !reclaimable && zone_reclaimable(zone))
+		{
+			if (dump_target_pid == current->pid) {
+				printk(KERN_INFO "zone_reclaimable returned 1 at line %u\n",
+				       __LINE__);
+				printk(KERN_INFO "(ACTIVE_FILE=%lu+INACTIVE_FILE=%lu",
+				       zone_page_state(zone, NR_ACTIVE_FILE),
+				       zone_page_state(zone, NR_INACTIVE_FILE));
+				if (get_nr_swap_pages() > 0)
+					printk(KERN_CONT "+ACTIVE_ANON=%lu+INACTIVE_ANON=%lu",
+					       zone_page_state(zone, NR_ACTIVE_ANON),
+					       zone_page_state(zone, NR_INACTIVE_ANON));
+				printk(KERN_CONT ") * 6 > PAGES_SCANNED=%lu\n",
+				       zone_page_state(zone, NR_PAGES_SCANNED));
+			}
 			reclaimable = true;
+		}
 	}
 
 	/*
@@ -2674,6 +2701,9 @@ retry:
 				sc->priority);
 		sc->nr_scanned = 0;
 		zones_reclaimable = shrink_zones(zonelist, sc);
+		if (dump_target_pid == current->pid)
+			printk(KERN_INFO "shrink_zones returned %u at line %u\n",
+			       zones_reclaimable, __LINE__);
 
 		total_scanned += sc->nr_scanned;
 		if (sc->nr_reclaimed >= sc->nr_to_reclaim)
@@ -2707,11 +2737,21 @@ retry:
 	delayacct_freepages_end();
 
 	if (sc->nr_reclaimed)
+	{
+		if (dump_target_pid == current->pid)
+			printk(KERN_INFO "sc->nr_reclaimed=%lu at line %u\n",
+			       sc->nr_reclaimed, __LINE__);
 		return sc->nr_reclaimed;
+	}
 
 	/* Aborted reclaim to try compaction? don't OOM, then */
 	if (sc->compaction_ready)
+	{
+		if (dump_target_pid == current->pid)
+			printk(KERN_INFO "sc->compaction_ready=%u at line %u\n",
+			       sc->compaction_ready, __LINE__);
 		return 1;
+	}
 
 	/* Untapped cgroup reserves?  Don't OOM, retry. */
 	if (!sc->may_thrash) {
@@ -2720,6 +2760,9 @@ retry:
 		goto retry;
 	}
 
+	if (dump_target_pid == current->pid)
+		printk(KERN_INFO "zones_reclaimable=%u at line %u\n",
+		       zones_reclaimable, __LINE__);
 	/* Any of the zones still reclaimable?  Don't OOM. */
 	if (zones_reclaimable)
 		return 1;
@@ -2875,7 +2918,12 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
 	 * point.
 	 */
 	if (throttle_direct_reclaim(gfp_mask, zonelist, nodemask))
+	{
+		if (dump_target_pid == current->pid)
+			printk(KERN_INFO "throttle_direct_reclaim returned 1 at line %u\n",
+			       __LINE__);
 		return 1;
+	}
 
 	trace_mm_vmscan_direct_reclaim_begin(order,
 				sc.may_writepage,
@@ -2885,6 +2933,9 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
 
 	trace_mm_vmscan_direct_reclaim_end(nr_reclaimed);
 
+	if (dump_target_pid == current->pid)
+		printk(KERN_INFO "do_try_to_free_pages returned %lu at line %u\n",
+		       nr_reclaimed, __LINE__);
 	return nr_reclaimed;
 }
 
----------

What is strange, the values printed by this debug printk() patch did not
change as time went by. Thus, I think that this is not a problem of lack of
CPU time for scanning pages. I suspect that there is a bug that nobody is
scanning pages.

----------
[   66.821450] zone_reclaimable returned 1 at line 2646
[   66.823020] (ACTIVE_FILE=26+INACTIVE_FILE=10) * 6 > PAGES_SCANNED=32
[   66.824935] shrink_zones returned 1 at line 2706
[   66.826392] zones_reclaimable=1 at line 2765
[   66.827865] do_try_to_free_pages returned 1 at line 2938
[   67.102322] __perform_reclaim returned 1 at line 2854
[   67.103968] did_some_progress=1 at line 3301
(...snipped...)
[  281.439977] zone_reclaimable returned 1 at line 2646
[  281.439977] (ACTIVE_FILE=26+INACTIVE_FILE=10) * 6 > PAGES_SCANNED=32
[  281.439978] shrink_zones returned 1 at line 2706
[  281.439978] zones_reclaimable=1 at line 2765
[  281.439979] do_try_to_free_pages returned 1 at line 2938
[  281.439979] __perform_reclaim returned 1 at line 2854
[  281.439980] did_some_progress=1 at line 3301
----------

Complete log is at http://I-love.SAKURA.ne.jp/tmp/serial-20151013.txt.xz

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  reply	other threads:[~2015-10-12 15:26 UTC|newest]

Thread overview: 110+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-09-17 17:59 [PATCH] mm/oom_kill.c: don't kill TASK_UNINTERRUPTIBLE tasks Kyle Walker
2015-09-17 19:22 ` Oleg Nesterov
2015-09-18 15:41   ` Christoph Lameter
2015-09-18 16:24     ` Oleg Nesterov
2015-09-18 16:39       ` Tetsuo Handa
2015-09-18 16:54         ` Oleg Nesterov
2015-09-18 17:00       ` Christoph Lameter
2015-09-18 19:07         ` Oleg Nesterov
2015-09-18 19:19           ` Christoph Lameter
2015-09-18 21:28             ` Kyle Walker
2015-09-18 22:07               ` Christoph Lameter
2015-09-19  8:32         ` Michal Hocko
2015-09-19 14:33           ` Tetsuo Handa
2015-09-19 15:51             ` Michal Hocko
2015-09-21 23:33             ` David Rientjes
2015-09-22  5:33               ` Tetsuo Handa
2015-09-22 23:32                 ` David Rientjes
2015-09-23 12:03                   ` Kyle Walker
2015-09-24 11:50                     ` Tetsuo Handa
2015-09-19 14:44           ` Oleg Nesterov
2015-09-21 23:27         ` David Rientjes
2015-09-19  8:25     ` Michal Hocko
2015-09-19  8:22 ` Michal Hocko
2015-09-21 23:08   ` David Rientjes
2015-09-19 15:03 ` can't oom-kill zap the victim's memory? Oleg Nesterov
2015-09-19 15:10   ` Oleg Nesterov
2015-09-19 15:58   ` Michal Hocko
2015-09-20 13:16     ` Oleg Nesterov
2015-09-19 22:24   ` Linus Torvalds
2015-09-19 22:54     ` Raymond Jennings
2015-09-19 23:00     ` Raymond Jennings
2015-09-19 23:13       ` Linus Torvalds
2015-09-20  9:33     ` Michal Hocko
2015-09-20 13:06       ` Oleg Nesterov
2015-09-20 12:56     ` Oleg Nesterov
2015-09-20 18:05       ` Linus Torvalds
2015-09-20 18:21         ` Raymond Jennings
2015-09-20 18:23         ` Raymond Jennings
2015-09-20 19:07         ` Raymond Jennings
2015-09-21 13:57           ` Oleg Nesterov
2015-09-21 13:44         ` Oleg Nesterov
2015-09-21 14:24           ` Michal Hocko
2015-09-21 15:32             ` Oleg Nesterov
2015-09-21 16:12               ` Michal Hocko
2015-09-22 16:06                 ` Oleg Nesterov
2015-09-22 23:04                   ` David Rientjes
2015-09-23 20:59                   ` Michal Hocko
2015-09-24 21:15                     ` David Rientjes
2015-09-25  9:35                       ` Michal Hocko
2015-09-25 16:14                         ` Tetsuo Handa
2015-09-28 16:18                           ` Tetsuo Handa
2015-09-28 22:28                             ` David Rientjes
2015-10-02 12:36                             ` Michal Hocko
2015-10-02 19:01                               ` Linus Torvalds
2015-10-05 14:44                                 ` Michal Hocko
2015-10-07  5:16                                   ` Vlastimil Babka
2015-10-07 10:43                                     ` Tetsuo Handa
2015-10-08  9:40                                       ` Vlastimil Babka
2015-10-06  7:55                                 ` Eric W. Biederman
2015-10-06  8:49                                   ` Linus Torvalds
2015-10-06  8:55                                     ` Linus Torvalds
2015-10-06 14:52                                       ` Eric W. Biederman
2015-10-03  6:02                               ` Can't we use timeout based OOM warning/killing? Tetsuo Handa
2015-10-06 14:51                                 ` Tetsuo Handa
2015-10-12  6:43                                   ` Tetsuo Handa
2015-10-12 15:25                                     ` Tetsuo Handa [this message]
2015-10-12 21:23                                       ` Silent hang up caused by pages being not scanned? Linus Torvalds
2015-10-13 12:21                                         ` Tetsuo Handa
2015-10-13 16:37                                           ` Linus Torvalds
2015-10-14 12:21                                             ` Tetsuo Handa
2015-10-15 13:14                                             ` Michal Hocko
2015-10-16 15:57                                               ` Michal Hocko
2015-10-16 18:34                                                 ` Linus Torvalds
2015-10-16 18:49                                                   ` Tetsuo Handa
2015-10-19 12:57                                                     ` Michal Hocko
2015-10-19 12:53                                                   ` Michal Hocko
2015-10-13 13:32                                       ` Michal Hocko
2015-10-13 16:19                                         ` Tetsuo Handa
2015-10-14 13:22                                           ` Michal Hocko
2015-10-14 14:38                                             ` Tetsuo Handa
2015-10-14 14:59                                               ` Michal Hocko
2015-10-14 15:06                                                 ` Tetsuo Handa
2015-10-26 11:44                                     ` Newbie's question: memory allocation when reclaiming memory Tetsuo Handa
2015-11-05  8:46                                       ` Vlastimil Babka
2015-10-06 15:25                                 ` Can't we use timeout based OOM warning/killing? Linus Torvalds
2015-10-08 15:33                                   ` Tetsuo Handa
2015-10-10 12:50                                 ` Tetsuo Handa
2015-09-28 22:24                         ` can't oom-kill zap the victim's memory? David Rientjes
2015-09-29  7:57                           ` Tetsuo Handa
2015-09-29 22:56                             ` David Rientjes
2015-09-30  4:25                               ` Tetsuo Handa
2015-09-30 10:21                                 ` Tetsuo Handa
2015-09-30 21:11                                 ` David Rientjes
2015-10-01 12:13                                   ` Tetsuo Handa
2015-10-01 14:48                           ` Michal Hocko
2015-10-02 13:06                             ` Tetsuo Handa
2015-10-06 18:45                     ` Oleg Nesterov
2015-10-07 11:03                       ` Tetsuo Handa
2015-10-07 12:00                         ` Oleg Nesterov
2015-10-08 14:04                           ` Michal Hocko
2015-10-08 14:01                       ` Michal Hocko
2015-09-21 16:51               ` Tetsuo Handa
2015-09-22 12:43                 ` Oleg Nesterov
2015-09-22 14:30                   ` Tetsuo Handa
2015-09-22 14:45                     ` Oleg Nesterov
2015-09-21 23:42               ` David Rientjes
2015-09-21 16:55           ` Linus Torvalds
2015-09-20 14:50   ` Tetsuo Handa
2015-09-20 14:55     ` Oleg Nesterov
2015-10-14  8:03 Silent hang up caused by pages being not scanned? Hillf Danton

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=201510130025.EJF21331.FFOQJtVOMLFHSO@I-love.SAKURA.ne.jp \
    --to=penguin-kernel@i-love.sakura.ne.jp \
    --cc=akpm@linux-foundation.org \
    --cc=cl@linux.com \
    --cc=hannes@cmpxchg.org \
    --cc=kwalker@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@kernel.org \
    --cc=oleg@redhat.com \
    --cc=rientjes@google.com \
    --cc=skozina@redhat.com \
    --cc=torvalds@linux-foundation.org \
    --cc=vdavydov@parallels.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).