All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] mm: serialize OOM kill operations
@ 2006-04-26  0:01 ` Dave Peterson
  0 siblings, 0 replies; 18+ messages in thread
From: Dave Peterson @ 2006-04-26  0:01 UTC (permalink / raw)
  To: linux-mm; +Cc: linux-kernel, riel, akpm

The patch below modifies the behavior of the OOM killer so that only
one OOM kill operation can be in progress at a time.  When running a
test program that eats lots of memory, I was observing behavior where
the OOM killer gets impatient and shoots one or more system daemons
in addition to the program that is eating lots of memory.  This fixes
the problematic behavior.

Signed-Off-By: David S. Peterson <dsp@llnl.gov>
---
This patch applies to kernel 2.6.17-rc2-git7.

Index: git7-oom/include/linux/sched.h
===================================================================
--- git7-oom.orig/include/linux/sched.h	2006-04-25 16:19:40.000000000 -0700
+++ git7-oom/include/linux/sched.h	2006-04-25 16:21:48.000000000 -0700
@@ -350,6 +350,8 @@ struct mm_struct {
 	/* aio bits */
 	rwlock_t		ioctx_list_lock;
 	struct kioctx		*ioctx_list;
+
+	int oom_notify;
 };
 
 struct sighand_struct {
Index: git7-oom/include/linux/swap.h
===================================================================
--- git7-oom.orig/include/linux/swap.h	2006-04-25 16:18:06.000000000 -0700
+++ git7-oom/include/linux/swap.h	2006-04-25 16:21:48.000000000 -0700
@@ -147,6 +147,7 @@ struct swap_list_t {
 #define vm_swap_full() (nr_swap_pages*2 < total_swap_pages)
 
 /* linux/mm/oom_kill.c */
+extern void oom_kill_finish(void);
 extern void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order);
 
 /* linux/mm/memory.c */
Index: git7-oom/kernel/fork.c
===================================================================
--- git7-oom.orig/kernel/fork.c	2006-04-25 16:19:40.000000000 -0700
+++ git7-oom/kernel/fork.c	2006-04-25 16:21:48.000000000 -0700
@@ -328,6 +328,7 @@ static struct mm_struct * mm_init(struct
 	mm->ioctx_list = NULL;
 	mm->free_area_cache = TASK_UNMAPPED_BASE;
 	mm->cached_hole_size = ~0UL;
+	mm->oom_notify = 0;
 
 	if (likely(!mm_alloc_pgd(mm))) {
 		mm->def_flags = 0;
@@ -379,6 +380,10 @@ void mmput(struct mm_struct *mm)
 			spin_unlock(&mmlist_lock);
 		}
 		put_swap_token(mm);
+
+		if (unlikely(mm->oom_notify))
+			oom_kill_finish();
+
 		mmdrop(mm);
 	}
 }
Index: git7-oom/mm/oom_kill.c
===================================================================
--- git7-oom.orig/mm/oom_kill.c	2006-04-25 16:19:40.000000000 -0700
+++ git7-oom/mm/oom_kill.c	2006-04-25 16:21:48.000000000 -0700
@@ -21,9 +21,34 @@
 #include <linux/timex.h>
 #include <linux/jiffies.h>
 #include <linux/cpuset.h>
+#include <asm/bitops.h>
 
 /* #define DEBUG */
 
+volatile unsigned long oom_kill_in_progress = 0;
+
+/*
+ * Attempt to start an OOM kill operation.  Return 0 on success, or 1 if an
+ * OOM kill is already in progress.
+ */
+static inline int oom_kill_start(void)
+{
+	return test_and_set_bit(0, &oom_kill_in_progress);
+}
+
+/*
+ * Terminate an OOM kill operation.
+ *
+ * When the OOM killer chooses a victim, it sets the oom_notify flag of the
+ * victim's mm_struct.  mmput() then calls this function when the mm_users
+ * count has reached 0 and the contents of the mm_struct have been cleaned
+ * out.
+ */
+void oom_kill_finish(void)
+{
+	clear_bit(0, &oom_kill_in_progress);
+}
+
 /**
  * oom_badness - calculate a numeric value for how bad this task has been
  * @p: task struct of which task we should calculate
@@ -259,27 +284,31 @@ static int oom_kill_task(task_t *p, cons
 	struct mm_struct *mm;
 	task_t * g, * q;
 
+	task_lock(p);
 	mm = p->mm;
 
-	/* WARNING: mm may not be dereferenced since we did not obtain its
-	 * value from get_task_mm(p).  This is OK since all we need to do is
-	 * compare mm to q->mm below.
+	if (mm == NULL || mm == &init_mm) {
+		task_unlock(p);
+		return 1;
+	}
+
+	mm->oom_notify = 1;
+	task_unlock(p);
+
+	/* WARNING: mm may no longer be dereferenced since we did not obtain
+	 * its value from get_task_mm(p).  This is OK since all we need to do
+	 * is compare mm to q->mm below.
 	 *
 	 * Furthermore, even if mm contains a non-NULL value, p->mm may
-	 * change to NULL at any time since we do not hold task_lock(p).
+	 * change to NULL at any time since we no longer hold task_lock(p).
 	 * However, this is of no concern to us.
 	 */
 
-	if (mm == NULL || mm == &init_mm)
-		return 1;
-
-	__oom_kill_task(p, message);
 	/*
-	 * kill all processes that share the ->mm (i.e. all threads),
-	 * but are in a different thread group
+	 * kill all processes that share the ->mm (i.e. all threads)
 	 */
 	do_each_thread(g, q)
-		if (q->mm == mm && q->tgid != p->tgid)
+		if (q->mm == mm)
 			__oom_kill_task(q, message);
 	while_each_thread(g, q);
 
@@ -317,13 +346,14 @@ void out_of_memory(struct zonelist *zone
 {
 	task_t *p;
 	unsigned long points = 0;
+	int cancel = 0;
 
-	if (printk_ratelimit()) {
-		printk("oom-killer: gfp_mask=0x%x, order=%d\n",
-			gfp_mask, order);
-		dump_stack();
-		show_mem();
-	}
+	if (oom_kill_start())
+		return;  /* OOM kill already in progress */
+
+	printk("oom-killer: gfp_mask=0x%x, order=%d\n", gfp_mask, order);
+	dump_stack();
+	show_mem();
 
 	cpuset_lock();
 	read_lock(&tasklist_lock);
@@ -334,12 +364,12 @@ void out_of_memory(struct zonelist *zone
 	 */
 	switch (constrained_alloc(zonelist, gfp_mask)) {
 	case CONSTRAINT_MEMORY_POLICY:
-		oom_kill_process(current, points,
+		cancel = oom_kill_process(current, points,
 				"No available memory (MPOL_BIND)");
 		break;
 
 	case CONSTRAINT_CPUSET:
-		oom_kill_process(current, points,
+		cancel = oom_kill_process(current, points,
 				"No available memory in cpuset");
 		break;
 
@@ -351,8 +381,10 @@ retry:
 		 */
 		p = select_bad_process(&points);
 
-		if (PTR_ERR(p) == -1UL)
+		if (PTR_ERR(p) == -1UL) {
+			cancel = 1;
 			goto out;
+		}
 
 		/* Found nothing?!?! Either we hang forever, or we panic. */
 		if (!p) {
@@ -371,6 +403,9 @@ out:
 	read_unlock(&tasklist_lock);
 	cpuset_unlock();
 
+	if (cancel)
+		oom_kill_finish();  /* cancel OOM kill operation */
+
 	/*
 	 * Give "p" a good chance of killing itself before we
 	 * retry to allocate memory unless "p" is current

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 1/2] mm: serialize OOM kill operations
@ 2006-04-26  0:01 ` Dave Peterson
  0 siblings, 0 replies; 18+ messages in thread
From: Dave Peterson @ 2006-04-26  0:01 UTC (permalink / raw)
  To: linux-mm; +Cc: linux-kernel, riel, akpm

The patch below modifies the behavior of the OOM killer so that only
one OOM kill operation can be in progress at a time.  When running a
test program that eats lots of memory, I was observing behavior where
the OOM killer gets impatient and shoots one or more system daemons
in addition to the program that is eating lots of memory.  This fixes
the problematic behavior.

Signed-Off-By: David S. Peterson <dsp@llnl.gov>
---
This patch applies to kernel 2.6.17-rc2-git7.

Index: git7-oom/include/linux/sched.h
===================================================================
--- git7-oom.orig/include/linux/sched.h	2006-04-25 16:19:40.000000000 -0700
+++ git7-oom/include/linux/sched.h	2006-04-25 16:21:48.000000000 -0700
@@ -350,6 +350,8 @@ struct mm_struct {
 	/* aio bits */
 	rwlock_t		ioctx_list_lock;
 	struct kioctx		*ioctx_list;
+
+	int oom_notify;
 };
 
 struct sighand_struct {
Index: git7-oom/include/linux/swap.h
===================================================================
--- git7-oom.orig/include/linux/swap.h	2006-04-25 16:18:06.000000000 -0700
+++ git7-oom/include/linux/swap.h	2006-04-25 16:21:48.000000000 -0700
@@ -147,6 +147,7 @@ struct swap_list_t {
 #define vm_swap_full() (nr_swap_pages*2 < total_swap_pages)
 
 /* linux/mm/oom_kill.c */
+extern void oom_kill_finish(void);
 extern void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order);
 
 /* linux/mm/memory.c */
Index: git7-oom/kernel/fork.c
===================================================================
--- git7-oom.orig/kernel/fork.c	2006-04-25 16:19:40.000000000 -0700
+++ git7-oom/kernel/fork.c	2006-04-25 16:21:48.000000000 -0700
@@ -328,6 +328,7 @@ static struct mm_struct * mm_init(struct
 	mm->ioctx_list = NULL;
 	mm->free_area_cache = TASK_UNMAPPED_BASE;
 	mm->cached_hole_size = ~0UL;
+	mm->oom_notify = 0;
 
 	if (likely(!mm_alloc_pgd(mm))) {
 		mm->def_flags = 0;
@@ -379,6 +380,10 @@ void mmput(struct mm_struct *mm)
 			spin_unlock(&mmlist_lock);
 		}
 		put_swap_token(mm);
+
+		if (unlikely(mm->oom_notify))
+			oom_kill_finish();
+
 		mmdrop(mm);
 	}
 }
Index: git7-oom/mm/oom_kill.c
===================================================================
--- git7-oom.orig/mm/oom_kill.c	2006-04-25 16:19:40.000000000 -0700
+++ git7-oom/mm/oom_kill.c	2006-04-25 16:21:48.000000000 -0700
@@ -21,9 +21,34 @@
 #include <linux/timex.h>
 #include <linux/jiffies.h>
 #include <linux/cpuset.h>
+#include <asm/bitops.h>
 
 /* #define DEBUG */
 
+volatile unsigned long oom_kill_in_progress = 0;
+
+/*
+ * Attempt to start an OOM kill operation.  Return 0 on success, or 1 if an
+ * OOM kill is already in progress.
+ */
+static inline int oom_kill_start(void)
+{
+	return test_and_set_bit(0, &oom_kill_in_progress);
+}
+
+/*
+ * Terminate an OOM kill operation.
+ *
+ * When the OOM killer chooses a victim, it sets the oom_notify flag of the
+ * victim's mm_struct.  mmput() then calls this function when the mm_users
+ * count has reached 0 and the contents of the mm_struct have been cleaned
+ * out.
+ */
+void oom_kill_finish(void)
+{
+	clear_bit(0, &oom_kill_in_progress);
+}
+
 /**
  * oom_badness - calculate a numeric value for how bad this task has been
  * @p: task struct of which task we should calculate
@@ -259,27 +284,31 @@ static int oom_kill_task(task_t *p, cons
 	struct mm_struct *mm;
 	task_t * g, * q;
 
+	task_lock(p);
 	mm = p->mm;
 
-	/* WARNING: mm may not be dereferenced since we did not obtain its
-	 * value from get_task_mm(p).  This is OK since all we need to do is
-	 * compare mm to q->mm below.
+	if (mm == NULL || mm == &init_mm) {
+		task_unlock(p);
+		return 1;
+	}
+
+	mm->oom_notify = 1;
+	task_unlock(p);
+
+	/* WARNING: mm may no longer be dereferenced since we did not obtain
+	 * its value from get_task_mm(p).  This is OK since all we need to do
+	 * is compare mm to q->mm below.
 	 *
 	 * Furthermore, even if mm contains a non-NULL value, p->mm may
-	 * change to NULL at any time since we do not hold task_lock(p).
+	 * change to NULL at any time since we no longer hold task_lock(p).
 	 * However, this is of no concern to us.
 	 */
 
-	if (mm == NULL || mm == &init_mm)
-		return 1;
-
-	__oom_kill_task(p, message);
 	/*
-	 * kill all processes that share the ->mm (i.e. all threads),
-	 * but are in a different thread group
+	 * kill all processes that share the ->mm (i.e. all threads)
 	 */
 	do_each_thread(g, q)
-		if (q->mm == mm && q->tgid != p->tgid)
+		if (q->mm == mm)
 			__oom_kill_task(q, message);
 	while_each_thread(g, q);
 
@@ -317,13 +346,14 @@ void out_of_memory(struct zonelist *zone
 {
 	task_t *p;
 	unsigned long points = 0;
+	int cancel = 0;
 
-	if (printk_ratelimit()) {
-		printk("oom-killer: gfp_mask=0x%x, order=%d\n",
-			gfp_mask, order);
-		dump_stack();
-		show_mem();
-	}
+	if (oom_kill_start())
+		return;  /* OOM kill already in progress */
+
+	printk("oom-killer: gfp_mask=0x%x, order=%d\n", gfp_mask, order);
+	dump_stack();
+	show_mem();
 
 	cpuset_lock();
 	read_lock(&tasklist_lock);
@@ -334,12 +364,12 @@ void out_of_memory(struct zonelist *zone
 	 */
 	switch (constrained_alloc(zonelist, gfp_mask)) {
 	case CONSTRAINT_MEMORY_POLICY:
-		oom_kill_process(current, points,
+		cancel = oom_kill_process(current, points,
 				"No available memory (MPOL_BIND)");
 		break;
 
 	case CONSTRAINT_CPUSET:
-		oom_kill_process(current, points,
+		cancel = oom_kill_process(current, points,
 				"No available memory in cpuset");
 		break;
 
@@ -351,8 +381,10 @@ retry:
 		 */
 		p = select_bad_process(&points);
 
-		if (PTR_ERR(p) == -1UL)
+		if (PTR_ERR(p) == -1UL) {
+			cancel = 1;
 			goto out;
+		}
 
 		/* Found nothing?!?! Either we hang forever, or we panic. */
 		if (!p) {
@@ -371,6 +403,9 @@ out:
 	read_unlock(&tasklist_lock);
 	cpuset_unlock();
 
+	if (cancel)
+		oom_kill_finish();  /* cancel OOM kill operation */
+
 	/*
 	 * Give "p" a good chance of killing itself before we
 	 * retry to allocate memory unless "p" is current

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/2] mm: serialize OOM kill operations
  2006-04-26  0:01 ` Dave Peterson
@ 2006-04-26  4:10   ` Nick Piggin
  -1 siblings, 0 replies; 18+ messages in thread
From: Nick Piggin @ 2006-04-26  4:10 UTC (permalink / raw)
  To: Dave Peterson; +Cc: linux-mm, linux-kernel, riel, akpm

Dave Peterson wrote:

>The patch below modifies the behavior of the OOM killer so that only
>one OOM kill operation can be in progress at a time.  When running a
>test program that eats lots of memory, I was observing behavior where
>the OOM killer gets impatient and shoots one or more system daemons
>in addition to the program that is eating lots of memory.  This fixes
>the problematic behavior.
>

Hi Dave,

Firstly why not use a semaphore and trylocks instead of your homebrew
lock?

Second, can you arrange it without using the extra field in mm_struct
and operation in the mmput fast path?

--

Send instant messages to your online friends http://au.messenger.yahoo.com 

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/2] mm: serialize OOM kill operations
@ 2006-04-26  4:10   ` Nick Piggin
  0 siblings, 0 replies; 18+ messages in thread
From: Nick Piggin @ 2006-04-26  4:10 UTC (permalink / raw)
  To: Dave Peterson; +Cc: linux-mm, linux-kernel, riel, akpm

Dave Peterson wrote:

>The patch below modifies the behavior of the OOM killer so that only
>one OOM kill operation can be in progress at a time.  When running a
>test program that eats lots of memory, I was observing behavior where
>the OOM killer gets impatient and shoots one or more system daemons
>in addition to the program that is eating lots of memory.  This fixes
>the problematic behavior.
>

Hi Dave,

Firstly why not use a semaphore and trylocks instead of your homebrew
lock?

Second, can you arrange it without using the extra field in mm_struct
and operation in the mmput fast path?

--

Send instant messages to your online friends http://au.messenger.yahoo.com 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/2] mm: serialize OOM kill operations
  2006-04-26  0:01 ` Dave Peterson
  (?)
  (?)
@ 2006-04-26  4:46 ` Andi Kleen
  2006-04-26 17:15   ` Dave Peterson
  -1 siblings, 1 reply; 18+ messages in thread
From: Andi Kleen @ 2006-04-26  4:46 UTC (permalink / raw)
  To: Dave Peterson; +Cc: linux-kernel, riel, akpm

Dave Peterson <dsp@llnl.gov> writes:
> 
> Index: git7-oom/include/linux/sched.h
> ===================================================================
> --- git7-oom.orig/include/linux/sched.h	2006-04-25 16:19:40.000000000 -0700
> +++ git7-oom/include/linux/sched.h	2006-04-25 16:21:48.000000000 -0700
> @@ -350,6 +350,8 @@ struct mm_struct {
>  	/* aio bits */
>  	rwlock_t		ioctx_list_lock;
>  	struct kioctx		*ioctx_list;
> +
> +	int oom_notify;

That wastes 4 bytes for a single bit. Please put a flag into some of the existing 
flag bitmaps instead.

-Andi

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/2] mm: serialize OOM kill operations
  2006-04-26  4:10   ` Nick Piggin
@ 2006-04-26 17:14     ` Dave Peterson
  -1 siblings, 0 replies; 18+ messages in thread
From: Dave Peterson @ 2006-04-26 17:14 UTC (permalink / raw)
  To: Nick Piggin; +Cc: linux-mm, linux-kernel, riel, akpm

On Tuesday 25 April 2006 21:10, Nick Piggin wrote:
> Firstly why not use a semaphore and trylocks instead of your homebrew
> lock?

Are you suggesting something like this?

	spinlock_t oom_kill_lock = SPIN_LOCK_UNLOCKED;

	static inline int oom_kill_start(void)
	{
		return !spin_trylock(&oom_kill_lock);
	}

	static inline void oom_kill_finish()
	{
		spin_unlock(&oom_kill_lock);
	}

If you prefer the above implementation, I can rework the patch as
above.

> Second, can you arrange it without using the extra field in mm_struct
> and operation in the mmput fast path?

I'm open to suggestions on other ways of implementing this.  However I
think the performance impact of the proposed implementation should be
miniscule.  The code added to mmput() executes only when the referece
count has reached 0; not on every decrement of the reference count.
Once the reference count has reached 0, the common-case behavior is
still only testing a boolean flag followed by a not-taken branch.  The
use of unlikely() should help the compiler and CPU branch prediction
hardware minimize overhead in the typical case where oom_kill_finish()
is not called.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/2] mm: serialize OOM kill operations
@ 2006-04-26 17:14     ` Dave Peterson
  0 siblings, 0 replies; 18+ messages in thread
From: Dave Peterson @ 2006-04-26 17:14 UTC (permalink / raw)
  To: Nick Piggin; +Cc: linux-mm, linux-kernel, riel, akpm

On Tuesday 25 April 2006 21:10, Nick Piggin wrote:
> Firstly why not use a semaphore and trylocks instead of your homebrew
> lock?

Are you suggesting something like this?

	spinlock_t oom_kill_lock = SPIN_LOCK_UNLOCKED;

	static inline int oom_kill_start(void)
	{
		return !spin_trylock(&oom_kill_lock);
	}

	static inline void oom_kill_finish()
	{
		spin_unlock(&oom_kill_lock);
	}

If you prefer the above implementation, I can rework the patch as
above.

> Second, can you arrange it without using the extra field in mm_struct
> and operation in the mmput fast path?

I'm open to suggestions on other ways of implementing this.  However I
think the performance impact of the proposed implementation should be
miniscule.  The code added to mmput() executes only when the referece
count has reached 0; not on every decrement of the reference count.
Once the reference count has reached 0, the common-case behavior is
still only testing a boolean flag followed by a not-taken branch.  The
use of unlikely() should help the compiler and CPU branch prediction
hardware minimize overhead in the typical case where oom_kill_finish()
is not called.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/2] mm: serialize OOM kill operations
  2006-04-26  4:46 ` Andi Kleen
@ 2006-04-26 17:15   ` Dave Peterson
  2006-04-26 18:05     ` Andi Kleen
  0 siblings, 1 reply; 18+ messages in thread
From: Dave Peterson @ 2006-04-26 17:15 UTC (permalink / raw)
  To: Andi Kleen; +Cc: linux-kernel, riel, akpm

On Tuesday 25 April 2006 21:46, Andi Kleen wrote:
> Dave Peterson <dsp@llnl.gov> writes:
> > Index: git7-oom/include/linux/sched.h
> > ===================================================================
> > --- git7-oom.orig/include/linux/sched.h	2006-04-25 16:19:40.000000000
> > -0700 +++ git7-oom/include/linux/sched.h	2006-04-25 16:21:48.000000000
> > -0700 @@ -350,6 +350,8 @@ struct mm_struct {
> >  	/* aio bits */
> >  	rwlock_t		ioctx_list_lock;
> >  	struct kioctx		*ioctx_list;
> > +
> > +	int oom_notify;
>
> That wastes 4 bytes for a single bit. Please put a flag into some of the
> existing flag bitmaps instead.

Any suggestions for a location?

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/2] mm: serialize OOM kill operations
  2006-04-26 17:15   ` Dave Peterson
@ 2006-04-26 18:05     ` Andi Kleen
  2006-04-26 18:29       ` Dave Peterson
  0 siblings, 1 reply; 18+ messages in thread
From: Andi Kleen @ 2006-04-26 18:05 UTC (permalink / raw)
  To: Dave Peterson; +Cc: linux-kernel, riel, akpm


> Any suggestions for a location?

->flags

-Andi


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/2] mm: serialize OOM kill operations
  2006-04-26 18:05     ` Andi Kleen
@ 2006-04-26 18:29       ` Dave Peterson
  0 siblings, 0 replies; 18+ messages in thread
From: Dave Peterson @ 2006-04-26 18:29 UTC (permalink / raw)
  To: Andi Kleen; +Cc: linux-kernel, riel, akpm

On Wednesday 26 April 2006 11:05, Andi Kleen wrote:
> > Any suggestions for a location?
>
> ->flags

What struct are you looking in?  I don't see a ->flags member in
mm_struct.  Perhaps I should create a ->flags member and use one of
the bits?

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/2] mm: serialize OOM kill operations
  2006-04-26 17:14     ` Dave Peterson
@ 2006-04-27  3:33       ` Nick Piggin
  -1 siblings, 0 replies; 18+ messages in thread
From: Nick Piggin @ 2006-04-27  3:33 UTC (permalink / raw)
  To: Dave Peterson; +Cc: linux-mm, linux-kernel, riel, akpm

Dave Peterson wrote:

>On Tuesday 25 April 2006 21:10, Nick Piggin wrote:
>
>>Firstly why not use a semaphore and trylocks instead of your homebrew
>>lock?
>>
>
>Are you suggesting something like this?
>
>	spinlock_t oom_kill_lock = SPIN_LOCK_UNLOCKED;
>
>	static inline int oom_kill_start(void)
>	{
>		return !spin_trylock(&oom_kill_lock);
>	}
>
>	static inline void oom_kill_finish()
>	{
>		spin_unlock(&oom_kill_lock);
>	}
>
>If you prefer the above implementation, I can rework the patch as
>above.
>

I think you need a semaphore? Either way, drop the trivial wrappers.

>
>>Second, can you arrange it without using the extra field in mm_struct
>>and operation in the mmput fast path?
>>
>
>I'm open to suggestions on other ways of implementing this.  However I
>think the performance impact of the proposed implementation should be
>miniscule.  The code added to mmput() executes only when the referece
>count has reached 0; not on every decrement of the reference count.
>Once the reference count has reached 0, the common-case behavior is
>still only testing a boolean flag followed by a not-taken branch.  The
>use of unlikely() should help the compiler and CPU branch prediction
>hardware minimize overhead in the typical case where oom_kill_finish()
>is not called.
>

Mainly the cost of increasing cacheline footprint. I think someone
suggested using a flag bit somewhere... that'd be preferable.

--

Send instant messages to your online friends http://au.messenger.yahoo.com 

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/2] mm: serialize OOM kill operations
@ 2006-04-27  3:33       ` Nick Piggin
  0 siblings, 0 replies; 18+ messages in thread
From: Nick Piggin @ 2006-04-27  3:33 UTC (permalink / raw)
  To: Dave Peterson; +Cc: linux-mm, linux-kernel, riel, akpm

Dave Peterson wrote:

>On Tuesday 25 April 2006 21:10, Nick Piggin wrote:
>
>>Firstly why not use a semaphore and trylocks instead of your homebrew
>>lock?
>>
>
>Are you suggesting something like this?
>
>	spinlock_t oom_kill_lock = SPIN_LOCK_UNLOCKED;
>
>	static inline int oom_kill_start(void)
>	{
>		return !spin_trylock(&oom_kill_lock);
>	}
>
>	static inline void oom_kill_finish()
>	{
>		spin_unlock(&oom_kill_lock);
>	}
>
>If you prefer the above implementation, I can rework the patch as
>above.
>

I think you need a semaphore? Either way, drop the trivial wrappers.

>
>>Second, can you arrange it without using the extra field in mm_struct
>>and operation in the mmput fast path?
>>
>
>I'm open to suggestions on other ways of implementing this.  However I
>think the performance impact of the proposed implementation should be
>miniscule.  The code added to mmput() executes only when the referece
>count has reached 0; not on every decrement of the reference count.
>Once the reference count has reached 0, the common-case behavior is
>still only testing a boolean flag followed by a not-taken branch.  The
>use of unlikely() should help the compiler and CPU branch prediction
>hardware minimize overhead in the typical case where oom_kill_finish()
>is not called.
>

Mainly the cost of increasing cacheline footprint. I think someone
suggested using a flag bit somewhere... that'd be preferable.

--

Send instant messages to your online friends http://au.messenger.yahoo.com 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/2] mm: serialize OOM kill operations
  2006-04-27  3:33       ` Nick Piggin
@ 2006-04-27 16:56         ` Dave Peterson
  -1 siblings, 0 replies; 18+ messages in thread
From: Dave Peterson @ 2006-04-27 16:56 UTC (permalink / raw)
  To: Nick Piggin; +Cc: linux-mm, linux-kernel, riel, akpm

On Wednesday 26 April 2006 20:33, Nick Piggin wrote:
> Dave Peterson wrote:
> >If you prefer the above implementation, I can rework the patch as
> >above.
>
> I think you need a semaphore?

In this particular case, I think a semaphore is unnecessary because
we just want out_of_memory() to return to its caller if an OOM kill
is already in progress (as opposed to waiting in out_of_memory() and
then starting a new OOM kill operation).  What I want to avoid is the
the following type of behavior:

    1.  Two processes (A and B) call out_of_memory() at roughly the
        same time and race for oom_kill_lock.  Let's say A wins and B
        is delayed.

    2.  Process A shoots some process and releases oom_kill_lock.

    3.  Process B now acquires oom_kill_lock and shoots another
        process.  However this isn't really what we want to do if
        the OOM kill done by A above freed enough memory to resolve
        the OOM condition.

> Either way, drop the trivial wrappers.

Ok, I'll drop the wrappers.

> >>Second, can you arrange it without using the extra field in mm_struct
> >>and operation in the mmput fast path?
> >
> >I'm open to suggestions on other ways of implementing this.  However I
> >think the performance impact of the proposed implementation should be
> >miniscule.  The code added to mmput() executes only when the referece
> >count has reached 0; not on every decrement of the reference count.
> >Once the reference count has reached 0, the common-case behavior is
> >still only testing a boolean flag followed by a not-taken branch.  The
> >use of unlikely() should help the compiler and CPU branch prediction
> >hardware minimize overhead in the typical case where oom_kill_finish()
> >is not called.
>
> Mainly the cost of increasing cacheline footprint. I think someone
> suggested using a flag bit somewhere... that'd be preferable.

Ok, I'll add a ->flags member to mm_struct and just use one bit for
the oom_notify value.  Then if other users of mm_struct need flag
bits for other things in the future they can all share ->flags.  I'll
rework my patches and repost shortly...

Dave

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/2] mm: serialize OOM kill operations
@ 2006-04-27 16:56         ` Dave Peterson
  0 siblings, 0 replies; 18+ messages in thread
From: Dave Peterson @ 2006-04-27 16:56 UTC (permalink / raw)
  To: Nick Piggin; +Cc: linux-mm, linux-kernel, riel, akpm

On Wednesday 26 April 2006 20:33, Nick Piggin wrote:
> Dave Peterson wrote:
> >If you prefer the above implementation, I can rework the patch as
> >above.
>
> I think you need a semaphore?

In this particular case, I think a semaphore is unnecessary because
we just want out_of_memory() to return to its caller if an OOM kill
is already in progress (as opposed to waiting in out_of_memory() and
then starting a new OOM kill operation).  What I want to avoid is the
the following type of behavior:

    1.  Two processes (A and B) call out_of_memory() at roughly the
        same time and race for oom_kill_lock.  Let's say A wins and B
        is delayed.

    2.  Process A shoots some process and releases oom_kill_lock.

    3.  Process B now acquires oom_kill_lock and shoots another
        process.  However this isn't really what we want to do if
        the OOM kill done by A above freed enough memory to resolve
        the OOM condition.

> Either way, drop the trivial wrappers.

Ok, I'll drop the wrappers.

> >>Second, can you arrange it without using the extra field in mm_struct
> >>and operation in the mmput fast path?
> >
> >I'm open to suggestions on other ways of implementing this.  However I
> >think the performance impact of the proposed implementation should be
> >miniscule.  The code added to mmput() executes only when the referece
> >count has reached 0; not on every decrement of the reference count.
> >Once the reference count has reached 0, the common-case behavior is
> >still only testing a boolean flag followed by a not-taken branch.  The
> >use of unlikely() should help the compiler and CPU branch prediction
> >hardware minimize overhead in the typical case where oom_kill_finish()
> >is not called.
>
> Mainly the cost of increasing cacheline footprint. I think someone
> suggested using a flag bit somewhere... that'd be preferable.

Ok, I'll add a ->flags member to mm_struct and just use one bit for
the oom_notify value.  Then if other users of mm_struct need flag
bits for other things in the future they can all share ->flags.  I'll
rework my patches and repost shortly...

Dave

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/2] mm: serialize OOM kill operations
  2006-04-27 16:56         ` Dave Peterson
@ 2006-04-28  5:00           ` Nick Piggin
  -1 siblings, 0 replies; 18+ messages in thread
From: Nick Piggin @ 2006-04-28  5:00 UTC (permalink / raw)
  To: Dave Peterson; +Cc: linux-mm, linux-kernel, riel, akpm

Dave Peterson wrote:
> On Wednesday 26 April 2006 20:33, Nick Piggin wrote:
> 
>>Dave Peterson wrote:
>>
>>>If you prefer the above implementation, I can rework the patch as
>>>above.
>>
>>I think you need a semaphore?
> 
> 
> In this particular case, I think a semaphore is unnecessary because
> we just want out_of_memory() to return to its caller if an OOM kill
> is already in progress (as opposed to waiting in out_of_memory() and
> then starting a new OOM kill operation).  What I want to avoid is the

When you are holding the spinlock, you can't schedule and the lock
really should be released by the same process that took it. Are you
OK with that?

>>
>>Mainly the cost of increasing cacheline footprint. I think someone
>>suggested using a flag bit somewhere... that'd be preferable.
> 
> 
> Ok, I'll add a ->flags member to mm_struct and just use one bit for
> the oom_notify value.  Then if other users of mm_struct need flag
> bits for other things in the future they can all share ->flags.  I'll
> rework my patches and repost shortly...

mm_struct already has what you want -- dumpable:2 -- if you just put
your bit in an adjacent bitfield, you'll be right.

-- 
SUSE Labs, Novell Inc.
Send instant messages to your online friends http://au.messenger.yahoo.com 

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/2] mm: serialize OOM kill operations
@ 2006-04-28  5:00           ` Nick Piggin
  0 siblings, 0 replies; 18+ messages in thread
From: Nick Piggin @ 2006-04-28  5:00 UTC (permalink / raw)
  To: Dave Peterson; +Cc: linux-mm, linux-kernel, riel, akpm

Dave Peterson wrote:
> On Wednesday 26 April 2006 20:33, Nick Piggin wrote:
> 
>>Dave Peterson wrote:
>>
>>>If you prefer the above implementation, I can rework the patch as
>>>above.
>>
>>I think you need a semaphore?
> 
> 
> In this particular case, I think a semaphore is unnecessary because
> we just want out_of_memory() to return to its caller if an OOM kill
> is already in progress (as opposed to waiting in out_of_memory() and
> then starting a new OOM kill operation).  What I want to avoid is the

When you are holding the spinlock, you can't schedule and the lock
really should be released by the same process that took it. Are you
OK with that?

>>
>>Mainly the cost of increasing cacheline footprint. I think someone
>>suggested using a flag bit somewhere... that'd be preferable.
> 
> 
> Ok, I'll add a ->flags member to mm_struct and just use one bit for
> the oom_notify value.  Then if other users of mm_struct need flag
> bits for other things in the future they can all share ->flags.  I'll
> rework my patches and repost shortly...

mm_struct already has what you want -- dumpable:2 -- if you just put
your bit in an adjacent bitfield, you'll be right.

-- 
SUSE Labs, Novell Inc.
Send instant messages to your online friends http://au.messenger.yahoo.com 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/2] mm: serialize OOM kill operations
  2006-04-28  5:00           ` Nick Piggin
@ 2006-04-28  5:05             ` Nick Piggin
  -1 siblings, 0 replies; 18+ messages in thread
From: Nick Piggin @ 2006-04-28  5:05 UTC (permalink / raw)
  To: Nick Piggin; +Cc: Dave Peterson, linux-mm, linux-kernel, riel, akpm

Nick Piggin wrote:

> mm_struct already has what you want -- dumpable:2 -- if you just put
> your bit in an adjacent bitfield, you'll be right.

I should have read all my email first. Ignore me ;)

-- 
SUSE Labs, Novell Inc.
Send instant messages to your online friends http://au.messenger.yahoo.com 

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/2] mm: serialize OOM kill operations
@ 2006-04-28  5:05             ` Nick Piggin
  0 siblings, 0 replies; 18+ messages in thread
From: Nick Piggin @ 2006-04-28  5:05 UTC (permalink / raw)
  To: Nick Piggin; +Cc: Dave Peterson, linux-mm, linux-kernel, riel, akpm

Nick Piggin wrote:

> mm_struct already has what you want -- dumpable:2 -- if you just put
> your bit in an adjacent bitfield, you'll be right.

I should have read all my email first. Ignore me ;)

-- 
SUSE Labs, Novell Inc.
Send instant messages to your online friends http://au.messenger.yahoo.com 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2006-04-28  5:08 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2006-04-26  0:01 [PATCH 1/2] mm: serialize OOM kill operations Dave Peterson
2006-04-26  0:01 ` Dave Peterson
2006-04-26  4:10 ` Nick Piggin
2006-04-26  4:10   ` Nick Piggin
2006-04-26 17:14   ` Dave Peterson
2006-04-26 17:14     ` Dave Peterson
2006-04-27  3:33     ` Nick Piggin
2006-04-27  3:33       ` Nick Piggin
2006-04-27 16:56       ` Dave Peterson
2006-04-27 16:56         ` Dave Peterson
2006-04-28  5:00         ` Nick Piggin
2006-04-28  5:00           ` Nick Piggin
2006-04-28  5:05           ` Nick Piggin
2006-04-28  5:05             ` Nick Piggin
2006-04-26  4:46 ` Andi Kleen
2006-04-26 17:15   ` Dave Peterson
2006-04-26 18:05     ` Andi Kleen
2006-04-26 18:29       ` Dave Peterson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.