linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Thomas Gleixner <tglx@linutronix.de>
To: Marcelo Tosatti <marcelo.tosatti@cyclades.com>
Cc: Andrew Morton <akpm@osdl.org>,
	Martin MOKREJ__ <mmokrejs@ribosome.natur.cuni.cz>,
	piggin@cyberone.com.au, chris@tebibyte.org, andrea@novell.com,
	LKML <linux-kernel@vger.kernel.org>,
	linux-mm@kvack.org, Rik van Riel <riel@redhat.com>
Subject: Re: [PATCH] fix spurious OOM kills
Date: Fri, 19 Nov 2004 17:17:22 +0100	[thread overview]
Message-ID: <1100881042.2635.140.camel@thomas> (raw)
In-Reply-To: <20041119080946.GA30845@logos.cnet>

[-- Attachment #1: Type: text/plain, Size: 682 bytes --]

On Fri, 2004-11-19 at 06:09 -0200, Marcelo Tosatti wrote:
> As Thomas Gleixner has investigated, the OOM killer selection is problematic.
> 
> When testing your ignore-page-referenced patch it first killed the memory hog
> then shortly afterwards the shell I was running it on.
> 
> You've seen Thomas emails, he has nice description there.

I had another go on 2.6.10-rc2-mm2. 

The reentrancy blocking and the additional test of freepages in
out_of_memory() make all the ugly time and counter checks superfluid. 

I think they were neccecary to make the spurious kill triggering less
obvious. :)

Can somebody else check with his test cases, if the behaviour is
correct ?

tglx



[-- Attachment #2: 2.6.10-rc2-mm2-oom.diff --]
[-- Type: text/x-patch, Size: 4954 bytes --]

diff -urN 2.6.10-rc2-mm2.orig/mm/oom_kill.c 2.6.10-rc2-mm2/mm/oom_kill.c
--- 2.6.10-rc2-mm2.orig/mm/oom_kill.c	2004-11-19 14:52:16.000000000 +0100
+++ 2.6.10-rc2-mm2/mm/oom_kill.c	2004-11-19 17:12:40.000000000 +0100
@@ -45,8 +45,10 @@
 static unsigned long badness(struct task_struct *p, unsigned long uptime)
 {
 	unsigned long points, cpu_time, run_time, s;
+        struct list_head *tsk;
 
-	if (!p->mm)
+	/* Ignore mm-less tasks and init */
+	if (!p->mm || p->pid == 1)
 		return 0;
 
 	if (p->flags & PF_MEMDIE)
@@ -57,6 +59,19 @@
 	points = p->mm->total_vm;
 
 	/*
+	 * Processes which fork a lot of child processes are likely 
+	 * a good choice. We add the vmsize of the childs if they
+	 * have an own mm. This prevents forking servers to flood the
+	 * machine with an endless amount of childs
+	 */
+	list_for_each(tsk, &p->children) {
+		struct task_struct *chld;
+		chld = list_entry(tsk, struct task_struct, sibling);
+		if (chld->mm != p->mm && chld->mm)
+			points += chld->mm->total_vm;
+	}
+
+	/*
 	 * CPU time is in tens of seconds and run time is in thousands
          * of seconds. There is no particular reason for this other than
          * that it turned out to work very well in practice.
@@ -176,6 +191,27 @@
 	return mm;
 }
 
+static struct mm_struct *oom_kill_process(task_t *p)
+{
+	struct mm_struct *mm;
+	struct task_struct *g, *q;
+
+	mm = oom_kill_task(p);
+	if (!mm)
+		return NULL;
+	/*
+	 * kill all processes that share the ->mm (i.e. all threads),
+	 * but are in a different thread group
+	 */
+	do_each_thread(g, q)
+		if (q->mm == mm && q->tgid != p->tgid)
+			__oom_kill_task(q);
+
+	while_each_thread(g, q);
+	if (!p->mm)
+		printk(KERN_INFO "Fixed up OOM kill of mm-less task\n");
+	return mm;
+}
 
 /**
  * oom_kill - kill the "best" process when we run out of memory
@@ -188,7 +224,9 @@
 void oom_kill(void)
 {
 	struct mm_struct *mm;
-	struct task_struct *g, *p, *q;
+	struct task_struct *c, *p;
+	struct list_head *tsk;
+	int mmcnt = 0;
 	
 	read_lock(&tasklist_lock);
 retry:
@@ -200,21 +238,25 @@
 		panic("Out of memory and no killable processes...\n");
 	}
 
-	mm = oom_kill_task(p);
-	if (!mm)
-		goto retry;
 	/*
-	 * kill all processes that share the ->mm (i.e. all threads),
-	 * but are in a different thread group
+	 * Kill the child processes first
 	 */
-	do_each_thread(g, q)
-		if (q->mm == mm && q->tgid != p->tgid)
-			__oom_kill_task(q);
-	while_each_thread(g, q);
-	if (!p->mm)
-		printk(KERN_INFO "Fixed up OOM kill of mm-less task\n");
+	list_for_each(tsk, &p->children) {
+		c = list_entry(tsk, struct task_struct, sibling);
+		if (c->mm == p->mm)
+			continue;
+		mm = oom_kill_process(c);
+		if (mm) {
+			mmcnt ++;
+			mmput(mm);
+		}
+	}
+	mm = oom_kill_process(p);
+	if (!mmcnt && !mm)
+		goto retry;
+	if (mm)
+		mmput(mm);
 	read_unlock(&tasklist_lock);
-	mmput(mm);
 	return;
 }
 
@@ -224,59 +266,23 @@
 void out_of_memory(int gfp_mask)
 {
 	/*
-	 * oom_lock protects out_of_memory()'s static variables.
-	 * It's a global lock; this is not performance-critical.
-	 */
-	static DEFINE_SPINLOCK(oom_lock);
-	static unsigned long first, last, count, lastkill;
-	unsigned long now, since;
-
-	spin_lock(&oom_lock);
-	now = jiffies;
-	since = now - last;
-	last = now;
-
-	/*
-	 * If it's been a long time since last failure,
-	 * we're not oom.
-	 */
-	if (since > 5*HZ)
-		goto reset;
-
-	/*
-	 * If we haven't tried for at least one second,
-	 * we're not really oom.
-	 */
-	since = now - first;
-	if (since < HZ)
-		goto out_unlock;
-
-	/*
-	 * If we have gotten only a few failures,
-	 * we're not really oom. 
-	 */
-	if (++count < 10)
-		goto out_unlock;
-
-	/*
-	 * If we just killed a process, wait a while
-	 * to give that task a chance to exit. This
-	 * avoids killing multiple processes needlessly.
-	 */
-	since = now - lastkill;
-	if (since < HZ*5)
-		goto out_unlock;
-
-	/*
-	 * Ok, really out of memory. Kill something.
-	 */
-	lastkill = now;
+ 	 * inprogress protects out_of_memory()'s static variables
+	 * and prevents reentrancy
+  	 */
+ 	static unsigned long inprogress;
+ 	static unsigned int  freepages = 1000000;
+
+ 	if (test_and_set_bit(0, &inprogress))
+ 		return;
+ 	
+ 	/* Check, if memory was freed since the last oom kill */
+ 	if (freepages < nr_free_pages())
+ 		goto out_unlock;
 
 	printk("oom-killer: gfp_mask=0x%x\n", gfp_mask);
 	show_free_areas();
-
-	/* oom_kill() sleeps */
-	spin_unlock(&oom_lock);
+	/* Store free pages  * 2 for the check above */
+	freepages = (nr_free_pages() << 1);
 	oom_kill();
 	/*
 	 * Make kswapd go out of the way, so "p" has a good chance of
@@ -284,17 +290,7 @@
 	 * for more memory.
 	 */
 	yield();
-	spin_lock(&oom_lock);
-
-reset:
-	/*
-	 * We dropped the lock above, so check to be sure the variable
-	 * first only ever increases to prevent false OOM's.
-	 */
-	if (time_after(now, first))
-		first = now;
-	count = 0;
-
+	
 out_unlock:
-	spin_unlock(&oom_lock);
+	clear_bit(0, &inprogress);
 }

  reply	other threads:[~2004-11-19 16:31 UTC|newest]

Thread overview: 64+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2004-11-11 11:29 [PATCH] fix spurious OOM kills Marcelo Tosatti
2004-11-11 15:42 ` Andrea Arcangeli
2004-11-11 12:38   ` Marcelo Tosatti
2004-11-11 16:50     ` Andrea Arcangeli
2004-11-11 13:56       ` Marcelo Tosatti
2004-11-11 21:45         ` Andrea Arcangeli
2004-11-11 19:19           ` Marcelo Tosatti
2004-11-11 17:42       ` Martin J. Bligh
2004-11-11 21:50         ` Andrea Arcangeli
2004-11-12 11:13       ` fix for mpol mm corruption on tmpfs Andrea Arcangeli
2004-11-11 21:57 ` [PATCH] fix spurious OOM kills Chris Ross
2004-11-12 16:52   ` Chris Ross
2004-11-12 23:56     ` Nick Piggin
2004-11-13 23:37     ` Andrea Arcangeli
2004-11-14  9:44       ` Marcelo Tosatti
2004-11-14 10:02         ` Marcelo Tosatti
2004-11-14 17:11           ` Andrea Arcangeli
2004-11-14 17:03         ` Andrea Arcangeli
2004-11-14 18:16           ` Martin J. Bligh
2004-11-14 18:27             ` Andrea Arcangeli
2004-11-14 20:21           ` Marcelo Tosatti
2004-11-16 16:30             ` Chris Ross
2004-11-17  9:08               ` Chris Ross
2004-11-17  9:23                 ` Andrew Morton
2004-11-17  6:06                   ` Marcelo Tosatti
2004-11-17  6:08                     ` Marcelo Tosatti
2004-11-17  6:38                       ` Marcelo Tosatti
2004-11-17 11:04                         ` Chris Ross
2004-11-17 10:26                       ` Andrew Morton
2004-11-17 10:50                       ` Chris Ross
2004-11-17  7:09                         ` Marcelo Tosatti
2004-11-17 11:49                           ` Chris Ross
2004-11-17 12:09                           ` Rik van Riel
2004-11-17 13:12                   ` Chris Ross
     [not found]                   ` <419CD8C1.4030506@ribosome.natur.cuni.cz>
2004-11-18 21:16                     ` Andrew Morton
     [not found]                       ` <419D25B5.1060504@ribosome.natur.cuni.cz>
     [not found]                         ` <419D2987.8010305@cyberone.com.au>
2004-11-19  0:03                           ` Martin MOKREJŠ
2004-11-19  0:08                             ` Andrew Morton
2004-11-19  8:09                               ` Marcelo Tosatti
2004-11-19 16:17                                 ` Thomas Gleixner [this message]
     [not found]                               ` <419E821F.7010601@ribosome.natur.cuni.cz>
2004-11-20 10:23                                 ` Thomas Gleixner
2004-11-20 10:45                                   ` Martin MOKREJŠ
2004-11-20 11:29                                   ` Martin MOKREJŠ
2004-11-20 13:29                                     ` Thomas Gleixner
2004-11-20 21:19                                       ` Martin MOKREJŠ
2004-11-21 11:53                                         ` Thomas Gleixner
2004-11-21 12:17                                           ` Martin MOKREJŠ
2004-11-21 13:57                                             ` Thomas Gleixner
2004-11-22 10:55                                               ` Thomas Gleixner
2004-11-23  7:41                                                 ` Martin MOKREJŠ
2004-11-23 10:27                                                   ` Thomas Gleixner
2004-11-24 15:52                                                     ` Martin MOKREJŠ
2004-11-24 16:36                                                       ` Thomas Gleixner
2004-12-14 16:04                                                     ` Martin MOKREJŠ
2004-12-14 17:38                                                       ` Andrea Arcangeli
2004-12-14 23:30                                                         ` Nick Piggin
2004-12-14 23:55                                                           ` Andrea Arcangeli
2004-12-15  0:16                                                             ` Thomas Gleixner
2004-12-15  0:37                                                               ` Andrea Arcangeli
2004-12-15  0:48                                                                 ` Thomas Gleixner
2004-11-21 19:01                   ` Chris Ross
2004-11-22 12:15                     ` Chris Ross
2004-11-22  8:35                       ` Marcelo Tosatti
2004-11-16  8:37           ` Chris Ross
2004-11-17  3:45   ` Andrew Morton

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1100881042.2635.140.camel@thomas \
    --to=tglx@linutronix.de \
    --cc=akpm@osdl.org \
    --cc=andrea@novell.com \
    --cc=chris@tebibyte.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=marcelo.tosatti@cyclades.com \
    --cc=mmokrejs@ribosome.natur.cuni.cz \
    --cc=piggin@cyberone.com.au \
    --cc=riel@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).