All of lore.kernel.org
 help / color / mirror / Atom feed
* [patch v2] oom: fix oom_score_adj consistency with oom_disable_count
       [not found]   ` <alpine.DEB.2.00.1011011738200.26266@chino.kir.corp.google.com>
@ 2010-11-03  0:41     ` David Rientjes
  2010-11-03 11:23       ` Oleg Nesterov
  0 siblings, 1 reply; 14+ messages in thread
From: David Rientjes @ 2010-11-03  0:41 UTC (permalink / raw)
  To: Andrew Morton
  Cc: KOSAKI Motohiro, KAMEZAWA Hiroyuki, Rik van Riel, Ying Han,
	Oleg Nesterov, linux-mm

p->mm->oom_disable_count tracks how many threads sharing p->mm have an
oom_score_adj value of OOM_SCORE_ADJ_MIN, which disables the oom killer
for that task.  If non-zero, p->mm->oom_disable_count indicates killing a
task sharing p->mm won't help since other threads cannot be killed and
the memory can't be freed.

oom_score_adj is a per-process value stored in p->signal->oom_score_adj,
which is protected by p->sighand->siglock.  Thus, it's necessary to take
this lock whenever the value is tested.

This patch introduces the necessary locking to ensure oom_score_adj can
be tested and/or changed with consistency.  This isn't the only locking
necessary to work with oom_score_adj: task_lock(p) must also be held or
the mm otherwise pinned in memory to ensure it doesn't change while
siglock is held.  That locking is already in place.

Reported-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: David Rientjes <rientjes@google.com>
---
 v2: cleaned up locking and fixed lockdep warnings

 For 2.6.37-rc-series.

 fs/exec.c     |   10 +++++++---
 kernel/exit.c |    8 ++++++--
 kernel/fork.c |   31 ++++++++++++++++++++++---------
 3 files changed, 35 insertions(+), 14 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -741,6 +741,7 @@ static int exec_mmap(struct mm_struct *mm)
 {
 	struct task_struct *tsk;
 	struct mm_struct * old_mm, *active_mm;
+	unsigned long flags;
 
 	/* Notify parent that we're no longer interested in the old VM */
 	tsk = current;
@@ -766,9 +767,12 @@ static int exec_mmap(struct mm_struct *mm)
 	tsk->mm = mm;
 	tsk->active_mm = mm;
 	activate_mm(active_mm, mm);
-	if (old_mm && tsk->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) {
-		atomic_dec(&old_mm->oom_disable_count);
-		atomic_inc(&tsk->mm->oom_disable_count);
+	if (lock_task_sighand(tsk, &flags)) {
+		if (old_mm && tsk->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) {
+			atomic_dec(&old_mm->oom_disable_count);
+			atomic_inc(&tsk->mm->oom_disable_count);
+		}
+		unlock_task_sighand(tsk, &flags);
 	}
 	task_unlock(tsk);
 	arch_pick_mmap_layout(mm);
diff --git a/kernel/exit.c b/kernel/exit.c
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -644,6 +644,7 @@ static void exit_mm(struct task_struct * tsk)
 {
 	struct mm_struct *mm = tsk->mm;
 	struct core_state *core_state;
+	unsigned long flags;
 
 	mm_release(tsk, mm);
 	if (!mm)
@@ -688,8 +689,11 @@ static void exit_mm(struct task_struct * tsk)
 	enter_lazy_tlb(mm, current);
 	/* We don't want this task to be frozen prematurely */
 	clear_freeze_flag(tsk);
-	if (tsk->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
-		atomic_dec(&mm->oom_disable_count);
+	if (lock_task_sighand(tsk, &flags)) {
+		if (tsk->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
+			atomic_dec(&mm->oom_disable_count);
+		unlock_task_sighand(tsk, &flags);
+	}
 	task_unlock(tsk);
 	mm_update_next_owner(mm);
 	mmput(mm);
diff --git a/kernel/fork.c b/kernel/fork.c
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -708,6 +708,7 @@ fail_nocontext:
 static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
 {
 	struct mm_struct * mm, *oldmm;
+	unsigned long flags;
 	int retval;
 
 	tsk->min_flt = tsk->maj_flt = 0;
@@ -743,8 +744,11 @@ good_mm:
 	/* Initializing for Swap token stuff */
 	mm->token_priority = 0;
 	mm->last_interval = 0;
-	if (tsk->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
-		atomic_inc(&mm->oom_disable_count);
+	if (lock_task_sighand(tsk, &flags)) {
+		if (tsk->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
+			atomic_inc(&mm->oom_disable_count);
+		unlock_task_sighand(tsk, &flags);
+	}
 
 	tsk->mm = mm;
 	tsk->active_mm = mm;
@@ -1306,10 +1310,13 @@ bad_fork_cleanup_namespaces:
 	exit_task_namespaces(p);
 bad_fork_cleanup_mm:
 	if (p->mm) {
-		task_lock(p);
-		if (p->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
-			atomic_dec(&p->mm->oom_disable_count);
-		task_unlock(p);
+		unsigned long flags;
+
+		if (lock_task_sighand(p, &flags)) {
+			if (p->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
+				atomic_dec(&p->mm->oom_disable_count);
+			unlock_task_sighand(p, &flags);
+		}
 		mmput(p->mm);
 	}
 bad_fork_cleanup_signal:
@@ -1700,13 +1707,19 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
 		}
 
 		if (new_mm) {
+			unsigned long flags;
+
 			mm = current->mm;
 			active_mm = current->active_mm;
 			current->mm = new_mm;
 			current->active_mm = new_mm;
-			if (current->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) {
-				atomic_dec(&mm->oom_disable_count);
-				atomic_inc(&new_mm->oom_disable_count);
+			if (lock_task_sighand(current, &flags)) {
+				if (current->signal->oom_score_adj ==
+							OOM_SCORE_ADJ_MIN) {
+					atomic_dec(&mm->oom_disable_count);
+					atomic_inc(&new_mm->oom_disable_count);
+				}
+				unlock_task_sighand(current, &flags);
 			}
 			activate_mm(active_mm, new_mm);
 			new_mm = mm;

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom policy in Canada: sign http://dissolvethecrtc.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [patch v2] oom: fix oom_score_adj consistency with oom_disable_count
  2010-11-03  0:41     ` [patch v2] oom: fix oom_score_adj consistency with oom_disable_count David Rientjes
@ 2010-11-03 11:23       ` Oleg Nesterov
  2010-11-03 20:28         ` David Rientjes
  2010-11-05 17:41           ` Oleg Nesterov
  0 siblings, 2 replies; 14+ messages in thread
From: Oleg Nesterov @ 2010-11-03 11:23 UTC (permalink / raw)
  To: David Rientjes
  Cc: Andrew Morton, KOSAKI Motohiro, KAMEZAWA Hiroyuki, Rik van Riel,
	Ying Han, linux-mm

Hmm. I did a quick grep trying to understand what ->oom_disable_count
means, and the whole idea behind this counter looks very wrong to me.
This patch doesn't look right too...

IOW. I believe that 3d5992d2ac7dc09aed8ab537cba074589f0f0a52
"oom: add per-mm oom disable count" should be reverted or fixed.

Trivial example. A process with 2 threads, T1 and T2.
->mm->oom_disable_count = 0.

oom_score_adj_write() sets OOM_SCORE_ADJ_MIN and increments
oom_disable_count.

T2 exits, notices OOM_SCORE_ADJ_MIN and decrements ->oom_disable_count
back to zero.

Now, T1 runs with OOM_SCORE_ADJ_MIN, but its ->oom_disable_count == 0.

No?


On 11/02, David Rientjes wrote:
>
> p->mm->oom_disable_count tracks how many threads sharing p->mm have an
> oom_score_adj value of OOM_SCORE_ADJ_MIN, which disables the oom killer
> for that task.

Another reason to move ->oom_score_adj into ->mm ;)

> This patch introduces the necessary locking to ensure oom_score_adj can
> be tested and/or changed with consistency.

Oh. We should avoid abusing ->siglock, but OK, we don't have
anything else right now.

David, nothing in this patch needs lock_task_sighand(), ->sighand
can't go away in copy_process/exec_mmap/unshare. You can just do
spin_lock_irq(->siglock). This is minor, but personally I dislike
the fact the code looks as if lock_task_sighand() can fail.

> @@ -741,6 +741,7 @@ static int exec_mmap(struct mm_struct *mm)
>  {
>  	struct task_struct *tsk;
>  	struct mm_struct * old_mm, *active_mm;
> +	unsigned long flags;
>
>  	/* Notify parent that we're no longer interested in the old VM */
>  	tsk = current;
> @@ -766,9 +767,12 @@ static int exec_mmap(struct mm_struct *mm)
>  	tsk->mm = mm;
>  	tsk->active_mm = mm;
>  	activate_mm(active_mm, mm);
> -	if (old_mm && tsk->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) {
> -		atomic_dec(&old_mm->oom_disable_count);
> -		atomic_inc(&tsk->mm->oom_disable_count);
> +	if (lock_task_sighand(tsk, &flags)) {
> +		if (old_mm && tsk->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) {
> +			atomic_dec(&old_mm->oom_disable_count);
> +			atomic_inc(&tsk->mm->oom_disable_count);
> +		}

Not sure this needs additional locking. exec_mmap() is called when
there are no other threads, we can rely on task_lock() we hold.

>  static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
>  {
>  	struct mm_struct * mm, *oldmm;
> +	unsigned long flags;
>  	int retval;
>
>  	tsk->min_flt = tsk->maj_flt = 0;
> @@ -743,8 +744,11 @@ good_mm:
>  	/* Initializing for Swap token stuff */
>  	mm->token_priority = 0;
>  	mm->last_interval = 0;
> -	if (tsk->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
> -		atomic_inc(&mm->oom_disable_count);
> +	if (lock_task_sighand(tsk, &flags)) {
> +		if (tsk->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
> +			atomic_inc(&mm->oom_disable_count);
> +		unlock_task_sighand(tsk, &flags);
> +	}

This doesn't need ->siglock too. Nobody can see this new child,
nobody can access its tsk->signal.

> @@ -1700,13 +1707,19 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
>  		}
>
>  		if (new_mm) {
> +			unsigned long flags;
> +
>  			mm = current->mm;
>  			active_mm = current->active_mm;
>  			current->mm = new_mm;
>  			current->active_mm = new_mm;
> -			if (current->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) {
> -				atomic_dec(&mm->oom_disable_count);
> -				atomic_inc(&new_mm->oom_disable_count);
> +			if (lock_task_sighand(current, &flags)) {
> +				if (current->signal->oom_score_adj ==
> +							OOM_SCORE_ADJ_MIN) {
> +					atomic_dec(&mm->oom_disable_count);
> +					atomic_inc(&new_mm->oom_disable_count);
> +				}

This is racy anyway, even if we take ->siglock.

If we need the protection from oom_score_adj_write(), then we have
to change ->mm under ->siglock as well. Otherwise, suppose that
oom_score_adj_write() sets OOM_SCORE_ADJ_MIN right after unshare()
does current->mm = new_mm.

However. Please do not touch this code. It doesn't work anyway,
I'll resend the patch which removes this crap.

Oleg.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom policy in Canada: sign http://dissolvethecrtc.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [patch v2] oom: fix oom_score_adj consistency with oom_disable_count
  2010-11-03 11:23       ` Oleg Nesterov
@ 2010-11-03 20:28         ` David Rientjes
  2010-11-04 18:42           ` Oleg Nesterov
  2010-11-05 17:41           ` Oleg Nesterov
  1 sibling, 1 reply; 14+ messages in thread
From: David Rientjes @ 2010-11-03 20:28 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Andrew Morton, KOSAKI Motohiro, KAMEZAWA Hiroyuki, Rik van Riel,
	Ying Han, linux-mm

On Wed, 3 Nov 2010, Oleg Nesterov wrote:

> Hmm. I did a quick grep trying to understand what ->oom_disable_count
> means, and the whole idea behind this counter looks very wrong to me.
> This patch doesn't look right too...
> 
> IOW. I believe that 3d5992d2ac7dc09aed8ab537cba074589f0f0a52
> "oom: add per-mm oom disable count" should be reverted or fixed.
> 
> Trivial example. A process with 2 threads, T1 and T2.
> ->mm->oom_disable_count = 0.
> 
> oom_score_adj_write() sets OOM_SCORE_ADJ_MIN and increments
> oom_disable_count.
> 
> T2 exits, notices OOM_SCORE_ADJ_MIN and decrements ->oom_disable_count
> back to zero.
> 
> Now, T1 runs with OOM_SCORE_ADJ_MIN, but its ->oom_disable_count == 0.
> 
> No?
> 

The intent of Ying's patch was for mm->oom_disable_count to map the number 
of threads sharing the ->mm that have p->signal->oom_score_adj == 
OOM_SCORE_ADJ_MIN.

> > p->mm->oom_disable_count tracks how many threads sharing p->mm have an
> > oom_score_adj value of OOM_SCORE_ADJ_MIN, which disables the oom killer
> > for that task.
> 
> Another reason to move ->oom_score_adj into ->mm ;)
> 

I would _love_ to move oom_score_adj into struct mm_struct, and I fought 
very strongly to do so, but people complained about its inheritance 
property.  They insist that oom_score_adj be able to be changed after 
vfork() and before exec() without changing the oom_score_adj of the 
parent.  The usual usecase is a job scheduler that is set with 
OOM_SCORE_ADJ_MIN that vforks a child, sets the child's oom_score_adj to 
0, and then execs.

> > This patch introduces the necessary locking to ensure oom_score_adj can
> > be tested and/or changed with consistency.
> 
> Oh. We should avoid abusing ->siglock, but OK, we don't have
> anything else right now.
> 
> David, nothing in this patch needs lock_task_sighand(), ->sighand
> can't go away in copy_process/exec_mmap/unshare. You can just do
> spin_lock_irq(->siglock). This is minor, but personally I dislike
> the fact the code looks as if lock_task_sighand() can fail.
> 

Ok, I thought that lock_task_sighand() was some kind of API to do this, 
but I can certainly change this in a subsequent change.  Thanks!

> > @@ -741,6 +741,7 @@ static int exec_mmap(struct mm_struct *mm)
> >  {
> >  	struct task_struct *tsk;
> >  	struct mm_struct * old_mm, *active_mm;
> > +	unsigned long flags;
> >
> >  	/* Notify parent that we're no longer interested in the old VM */
> >  	tsk = current;
> > @@ -766,9 +767,12 @@ static int exec_mmap(struct mm_struct *mm)
> >  	tsk->mm = mm;
> >  	tsk->active_mm = mm;
> >  	activate_mm(active_mm, mm);
> > -	if (old_mm && tsk->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) {
> > -		atomic_dec(&old_mm->oom_disable_count);
> > -		atomic_inc(&tsk->mm->oom_disable_count);
> > +	if (lock_task_sighand(tsk, &flags)) {
> > +		if (old_mm && tsk->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) {
> > +			atomic_dec(&old_mm->oom_disable_count);
> > +			atomic_inc(&tsk->mm->oom_disable_count);
> > +		}
> 
> Not sure this needs additional locking. exec_mmap() is called when
> there are no other threads, we can rely on task_lock() we hold.
> 

There are no other threads that can share tsk->signal at this point?  I 
was mislead by the de_thread() comment about CLONE_SIGHAND.

> >  static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
> >  {
> >  	struct mm_struct * mm, *oldmm;
> > +	unsigned long flags;
> >  	int retval;
> >
> >  	tsk->min_flt = tsk->maj_flt = 0;
> > @@ -743,8 +744,11 @@ good_mm:
> >  	/* Initializing for Swap token stuff */
> >  	mm->token_priority = 0;
> >  	mm->last_interval = 0;
> > -	if (tsk->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
> > -		atomic_inc(&mm->oom_disable_count);
> > +	if (lock_task_sighand(tsk, &flags)) {
> > +		if (tsk->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
> > +			atomic_inc(&mm->oom_disable_count);
> > +		unlock_task_sighand(tsk, &flags);
> > +	}
> 
> This doesn't need ->siglock too. Nobody can see this new child,
> nobody can access its tsk->signal.
> 

Ok!

> > @@ -1700,13 +1707,19 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
> >  		}
> >
> >  		if (new_mm) {
> > +			unsigned long flags;
> > +
> >  			mm = current->mm;
> >  			active_mm = current->active_mm;
> >  			current->mm = new_mm;
> >  			current->active_mm = new_mm;
> > -			if (current->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) {
> > -				atomic_dec(&mm->oom_disable_count);
> > -				atomic_inc(&new_mm->oom_disable_count);
> > +			if (lock_task_sighand(current, &flags)) {
> > +				if (current->signal->oom_score_adj ==
> > +							OOM_SCORE_ADJ_MIN) {
> > +					atomic_dec(&mm->oom_disable_count);
> > +					atomic_inc(&new_mm->oom_disable_count);
> > +				}
> 
> This is racy anyway, even if we take ->siglock.
> 
> If we need the protection from oom_score_adj_write(), then we have
> to change ->mm under ->siglock as well. Otherwise, suppose that
> oom_score_adj_write() sets OOM_SCORE_ADJ_MIN right after unshare()
> does current->mm = new_mm.
> 

We're protected by task_lock(current) in unshare, it can't do 
current->mm = new_mm while task_lock() is held in oom_score_adj_write().

> However. Please do not touch this code. It doesn't work anyway,
> I'll resend the patch which removes this crap.
> 

Ok, I'll look forward to that :)

Do you see issues with the mapping of threads attached to an mm being 
counted appropriately in mm->oom_disable_count?

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom policy in Canada: sign http://dissolvethecrtc.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [patch v2] oom: fix oom_score_adj consistency with oom_disable_count
  2010-11-03 20:28         ` David Rientjes
@ 2010-11-04 18:42           ` Oleg Nesterov
  0 siblings, 0 replies; 14+ messages in thread
From: Oleg Nesterov @ 2010-11-04 18:42 UTC (permalink / raw)
  To: David Rientjes
  Cc: Andrew Morton, KOSAKI Motohiro, KAMEZAWA Hiroyuki, Rik van Riel,
	Ying Han, linux-mm

On 11/03, David Rientjes wrote:
>
> On Wed, 3 Nov 2010, Oleg Nesterov wrote:
>
> > IOW. I believe that 3d5992d2ac7dc09aed8ab537cba074589f0f0a52
> > "oom: add per-mm oom disable count" should be reverted or fixed.
> >
> > Trivial example. A process with 2 threads, T1 and T2.
> > ->mm->oom_disable_count = 0.
> >
> > oom_score_adj_write() sets OOM_SCORE_ADJ_MIN and increments
> > oom_disable_count.
> >
> > T2 exits, notices OOM_SCORE_ADJ_MIN and decrements ->oom_disable_count
> > back to zero.
> >
> > Now, T1 runs with OOM_SCORE_ADJ_MIN, but its ->oom_disable_count == 0.
> >
> > No?
> >
>
> The intent of Ying's patch was for mm->oom_disable_count to map the number
> of threads sharing the ->mm that have p->signal->oom_score_adj ==
> OOM_SCORE_ADJ_MIN.

Yes, I see the intent. But the patch is obviouly wrong.

> > Another reason to move ->oom_score_adj into ->mm ;)
> >
>
> I would _love_ to move oom_score_adj into struct mm_struct, and I fought
> very strongly to do so,

Yes, I know ;)

> > Not sure this needs additional locking. exec_mmap() is called when
> > there are no other threads, we can rely on task_lock() we hold.
> >
>
> There are no other threads that can share tsk->signal at this point?  I
> was mislead by the de_thread() comment about CLONE_SIGHAND.

Agreed, the comment is misleading. "Other processes might share the signal
table" actually means: other processes (not only sub-threads) can share
->sighand. That is why de_thread() checks oldsighand->count at the end
of this function, after we already killed all sub-threads.

But at this point nobody except current uses this ->signal.

> > >  static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
> > >  {
> > >  	struct mm_struct * mm, *oldmm;
> > > +	unsigned long flags;
> > >  	int retval;
> > >
> > >  	tsk->min_flt = tsk->maj_flt = 0;
> > > @@ -743,8 +744,11 @@ good_mm:
> > >  	/* Initializing for Swap token stuff */
> > >  	mm->token_priority = 0;
> > >  	mm->last_interval = 0;
> > > -	if (tsk->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
> > > -		atomic_inc(&mm->oom_disable_count);
> > > +	if (lock_task_sighand(tsk, &flags)) {
> > > +		if (tsk->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
> > > +			atomic_inc(&mm->oom_disable_count);
> > > +		unlock_task_sighand(tsk, &flags);
> > > +	}
> >
> > This doesn't need ->siglock too. Nobody can see this new child,
> > nobody can access its tsk->signal.
>
> Ok!

OOPS! Sorry, I didn't notice that this code works in CLONE_VM|CLONE_THREAD
case too. In this case we do need the locking.

Wait. And what about the case I meant, !CLONE_THREAD case? In this case
we don't need ->siglock, but atomic_inc() is very wrong. Note that
this (new) mm_struct has the "random" value in ->oom_disable_count
copied from parent's ->mm.

> > > @@ -1700,13 +1707,19 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
> > >  		}
> > >
> > >  		if (new_mm) {
> > > +			unsigned long flags;
> > > +
> > >  			mm = current->mm;
> > >  			active_mm = current->active_mm;
> > >  			current->mm = new_mm;
> > >  			current->active_mm = new_mm;
> > > -			if (current->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) {
> > > -				atomic_dec(&mm->oom_disable_count);
> > > -				atomic_inc(&new_mm->oom_disable_count);
> > > +			if (lock_task_sighand(current, &flags)) {
> > > +				if (current->signal->oom_score_adj ==
> > > +							OOM_SCORE_ADJ_MIN) {
> > > +					atomic_dec(&mm->oom_disable_count);
> > > +					atomic_inc(&new_mm->oom_disable_count);
> > > +				}
> >
> > This is racy anyway, even if we take ->siglock.
> >
> > If we need the protection from oom_score_adj_write(), then we have
> > to change ->mm under ->siglock as well. Otherwise, suppose that
> > oom_score_adj_write() sets OOM_SCORE_ADJ_MIN right after unshare()
> > does current->mm = new_mm.
> >
>
> We're protected by task_lock(current) in unshare, it can't do
> current->mm = new_mm while task_lock() is held in oom_score_adj_write().

Indeed, I was wrong, thanks. I forgot that this code actually never works
(if it worked, it should change ->mm for all sub-threads, each has its
 own task->alloc_lock).

> > However. Please do not touch this code. It doesn't work anyway,
> > I'll resend the patch which removes this crap.
> >
>
> Ok, I'll look forward to that :)

Sorry, don't have the time today. Will do tomorrow.

> Do you see issues with the mapping of threads attached to an mm being
> counted appropriately in mm->oom_disable_count?

Not sure I understand you.

The main problem is, they are not counted correctly. If exit_mm()
decrements this counter then oom_score_adj_write() should account
every live (with ->mm != NULL) thread, this is nasty. Or we should
find the way to drop the counter only when the whole process exits
(and in this case CLONE_THREAD shouldn't touch the counter).

Oleg.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom policy in Canada: sign http://dissolvethecrtc.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 0/1] (Was: oom: fix oom_score_adj consistency with oom_disable_count)
  2010-11-03 11:23       ` Oleg Nesterov
@ 2010-11-05 17:41           ` Oleg Nesterov
  2010-11-05 17:41           ` Oleg Nesterov
  1 sibling, 0 replies; 14+ messages in thread
From: Oleg Nesterov @ 2010-11-05 17:41 UTC (permalink / raw)
  To: Andrew Morton
  Cc: David Rientjes, KOSAKI Motohiro, KAMEZAWA Hiroyuki, Rik van Riel,
	Ying Han, linux-mm, linux-kernel, Roland McGrath,
	Eric W. Biederman, JANAK DESAI

On 11/03, Oleg Nesterov wrote:
>
> However. Please do not touch this code. It doesn't work anyway,
> I'll resend the patch which removes this crap.

This code continues to confuse developers. And this is the only
effect it has.

Once again. The patch removes the DEAD code. It is never called,
it can't work (from 2006) but this is not immediately clear.
Howver it often needs attention/changes because it _looks_ as if
it works.

Let's remove it, finally.

Oleg.


^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 0/1] (Was: oom: fix oom_score_adj consistency with oom_disable_count)
@ 2010-11-05 17:41           ` Oleg Nesterov
  0 siblings, 0 replies; 14+ messages in thread
From: Oleg Nesterov @ 2010-11-05 17:41 UTC (permalink / raw)
  To: Andrew Morton
  Cc: David Rientjes, KOSAKI Motohiro, KAMEZAWA Hiroyuki, Rik van Riel,
	Ying Han, linux-mm, linux-kernel, Roland McGrath,
	Eric W. Biederman, JANAK DESAI

On 11/03, Oleg Nesterov wrote:
>
> However. Please do not touch this code. It doesn't work anyway,
> I'll resend the patch which removes this crap.

This code continues to confuse developers. And this is the only
effect it has.

Once again. The patch removes the DEAD code. It is never called,
it can't work (from 2006) but this is not immediately clear.
Howver it often needs attention/changes because it _looks_ as if
it works.

Let's remove it, finally.

Oleg.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom policy in Canada: sign http://dissolvethecrtc.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 1/1][2nd resend] sys_unshare: remove the dead CLONE_THREAD/SIGHAND/VM code
  2010-11-05 17:41           ` Oleg Nesterov
@ 2010-11-05 17:43             ` Oleg Nesterov
  -1 siblings, 0 replies; 14+ messages in thread
From: Oleg Nesterov @ 2010-11-05 17:43 UTC (permalink / raw)
  To: Andrew Morton
  Cc: David Rientjes, KOSAKI Motohiro, KAMEZAWA Hiroyuki, Rik van Riel,
	Ying Han, linux-mm, linux-kernel, Roland McGrath,
	Eric W. Biederman, JANAK DESAI

Cleanup: kill the dead code which does nothing but complicates the code
and confuses the reader.

sys_unshare(CLONE_THREAD/SIGHAND/VM) is not really implemented, and I doubt
very much it will ever work. At least, nobody even tried since the original
"unshare system call -v5: system call handler function" commit
99d1419d96d7df9cfa56bc977810be831bd5ef64 was applied more than 4 years ago.

And the code is not consistent. unshare_thread() always fails unconditionally,
while unshare_sighand() and unshare_vm() pretend to work if there is nothing
to unshare.

Remove unshare_thread(), unshare_sighand(), unshare_vm() helpers and related
variables and add a simple CLONE_THREAD | CLONE_SIGHAND| CLONE_VM check into
check_unshare_flags().

Also, move the "CLONE_NEWNS needs CLONE_FS" check from check_unshare_flags()
to sys_unshare(). This looks more consistent and matches the similar
do_sysvsem check in sys_unshare().

Note: with or without this patch "atomic_read(mm->mm_users) > 1" can give
a false positive due to get_task_mm().

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Roland McGrath <roland@redhat.com>
---

 kernel/fork.c |  123 +++++++++++-----------------------------------------------
 1 file changed, 25 insertions(+), 98 deletions(-)

--- 2.6.37/kernel/fork.c~unshare-killcrap	2010-11-05 18:03:28.000000000 +0100
+++ 2.6.37/kernel/fork.c	2010-11-05 18:09:52.000000000 +0100
@@ -1522,38 +1522,24 @@ void __init proc_caches_init(void)
 }
 
 /*
- * Check constraints on flags passed to the unshare system call and
- * force unsharing of additional process context as appropriate.
+ * Check constraints on flags passed to the unshare system call.
  */
-static void check_unshare_flags(unsigned long *flags_ptr)
+static int check_unshare_flags(unsigned long unshare_flags)
 {
+	if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
+				CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
+				CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET))
+		return -EINVAL;
 	/*
-	 * If unsharing a thread from a thread group, must also
-	 * unshare vm.
-	 */
-	if (*flags_ptr & CLONE_THREAD)
-		*flags_ptr |= CLONE_VM;
-
-	/*
-	 * If unsharing vm, must also unshare signal handlers.
-	 */
-	if (*flags_ptr & CLONE_VM)
-		*flags_ptr |= CLONE_SIGHAND;
-
-	/*
-	 * If unsharing namespace, must also unshare filesystem information.
+	 * Not implemented, but pretend it works if there is nothing to
+	 * unshare. Note that unsharing CLONE_THREAD or CLONE_SIGHAND
+	 * needs to unshare vm.
 	 */
-	if (*flags_ptr & CLONE_NEWNS)
-		*flags_ptr |= CLONE_FS;
-}
-
-/*
- * Unsharing of tasks created with CLONE_THREAD is not supported yet
- */
-static int unshare_thread(unsigned long unshare_flags)
-{
-	if (unshare_flags & CLONE_THREAD)
-		return -EINVAL;
+	if (unshare_flags & (CLONE_THREAD | CLONE_SIGHAND | CLONE_VM)) {
+		/* FIXME: get_task_mm() increments ->mm_users */
+		if (atomic_read(&current->mm->mm_users) > 1)
+			return -EINVAL;
+	}
 
 	return 0;
 }
@@ -1580,34 +1566,6 @@ static int unshare_fs(unsigned long unsh
 }
 
 /*
- * Unsharing of sighand is not supported yet
- */
-static int unshare_sighand(unsigned long unshare_flags, struct sighand_struct **new_sighp)
-{
-	struct sighand_struct *sigh = current->sighand;
-
-	if ((unshare_flags & CLONE_SIGHAND) && atomic_read(&sigh->count) > 1)
-		return -EINVAL;
-	else
-		return 0;
-}
-
-/*
- * Unshare vm if it is being shared
- */
-static int unshare_vm(unsigned long unshare_flags, struct mm_struct **new_mmp)
-{
-	struct mm_struct *mm = current->mm;
-
-	if ((unshare_flags & CLONE_VM) &&
-	    (mm && atomic_read(&mm->mm_users) > 1)) {
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-/*
  * Unshare file descriptor table if it is being shared
  */
 static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp)
@@ -1635,45 +1593,37 @@ static int unshare_fd(unsigned long unsh
  */
 SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
 {
-	int err = 0;
 	struct fs_struct *fs, *new_fs = NULL;
-	struct sighand_struct *new_sigh = NULL;
-	struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL;
 	struct files_struct *fd, *new_fd = NULL;
 	struct nsproxy *new_nsproxy = NULL;
 	int do_sysvsem = 0;
+	int err;
 
-	check_unshare_flags(&unshare_flags);
-
-	/* Return -EINVAL for all unsupported flags */
-	err = -EINVAL;
-	if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
-				CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
-				CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET))
+	err = check_unshare_flags(unshare_flags);
+	if (err)
 		goto bad_unshare_out;
 
 	/*
+	 * If unsharing namespace, must also unshare filesystem information.
+	 */
+	if (unshare_flags & CLONE_NEWNS)
+		unshare_flags |= CLONE_FS;
+	/*
 	 * CLONE_NEWIPC must also detach from the undolist: after switching
 	 * to a new ipc namespace, the semaphore arrays from the old
 	 * namespace are unreachable.
 	 */
 	if (unshare_flags & (CLONE_NEWIPC|CLONE_SYSVSEM))
 		do_sysvsem = 1;
-	if ((err = unshare_thread(unshare_flags)))
-		goto bad_unshare_out;
 	if ((err = unshare_fs(unshare_flags, &new_fs)))
-		goto bad_unshare_cleanup_thread;
-	if ((err = unshare_sighand(unshare_flags, &new_sigh)))
-		goto bad_unshare_cleanup_fs;
-	if ((err = unshare_vm(unshare_flags, &new_mm)))
-		goto bad_unshare_cleanup_sigh;
+		goto bad_unshare_out;
 	if ((err = unshare_fd(unshare_flags, &new_fd)))
-		goto bad_unshare_cleanup_vm;
+		goto bad_unshare_cleanup_fs;
 	if ((err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy,
 			new_fs)))
 		goto bad_unshare_cleanup_fd;
 
-	if (new_fs ||  new_mm || new_fd || do_sysvsem || new_nsproxy) {
+	if (new_fs || new_fd || do_sysvsem || new_nsproxy) {
 		if (do_sysvsem) {
 			/*
 			 * CLONE_SYSVSEM is equivalent to sys_exit().
@@ -1699,19 +1649,6 @@ SYSCALL_DEFINE1(unshare, unsigned long, 
 			spin_unlock(&fs->lock);
 		}
 
-		if (new_mm) {
-			mm = current->mm;
-			active_mm = current->active_mm;
-			current->mm = new_mm;
-			current->active_mm = new_mm;
-			if (current->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) {
-				atomic_dec(&mm->oom_disable_count);
-				atomic_inc(&new_mm->oom_disable_count);
-			}
-			activate_mm(active_mm, new_mm);
-			new_mm = mm;
-		}
-
 		if (new_fd) {
 			fd = current->files;
 			current->files = new_fd;
@@ -1728,20 +1665,10 @@ bad_unshare_cleanup_fd:
 	if (new_fd)
 		put_files_struct(new_fd);
 
-bad_unshare_cleanup_vm:
-	if (new_mm)
-		mmput(new_mm);
-
-bad_unshare_cleanup_sigh:
-	if (new_sigh)
-		if (atomic_dec_and_test(&new_sigh->count))
-			kmem_cache_free(sighand_cachep, new_sigh);
-
 bad_unshare_cleanup_fs:
 	if (new_fs)
 		free_fs_struct(new_fs);
 
-bad_unshare_cleanup_thread:
 bad_unshare_out:
 	return err;
 }


^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 1/1][2nd resend] sys_unshare: remove the dead CLONE_THREAD/SIGHAND/VM code
@ 2010-11-05 17:43             ` Oleg Nesterov
  0 siblings, 0 replies; 14+ messages in thread
From: Oleg Nesterov @ 2010-11-05 17:43 UTC (permalink / raw)
  To: Andrew Morton
  Cc: David Rientjes, KOSAKI Motohiro, KAMEZAWA Hiroyuki, Rik van Riel,
	Ying Han, linux-mm, linux-kernel, Roland McGrath,
	Eric W. Biederman, JANAK DESAI

Cleanup: kill the dead code which does nothing but complicates the code
and confuses the reader.

sys_unshare(CLONE_THREAD/SIGHAND/VM) is not really implemented, and I doubt
very much it will ever work. At least, nobody even tried since the original
"unshare system call -v5: system call handler function" commit
99d1419d96d7df9cfa56bc977810be831bd5ef64 was applied more than 4 years ago.

And the code is not consistent. unshare_thread() always fails unconditionally,
while unshare_sighand() and unshare_vm() pretend to work if there is nothing
to unshare.

Remove unshare_thread(), unshare_sighand(), unshare_vm() helpers and related
variables and add a simple CLONE_THREAD | CLONE_SIGHAND| CLONE_VM check into
check_unshare_flags().

Also, move the "CLONE_NEWNS needs CLONE_FS" check from check_unshare_flags()
to sys_unshare(). This looks more consistent and matches the similar
do_sysvsem check in sys_unshare().

Note: with or without this patch "atomic_read(mm->mm_users) > 1" can give
a false positive due to get_task_mm().

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Roland McGrath <roland@redhat.com>
---

 kernel/fork.c |  123 +++++++++++-----------------------------------------------
 1 file changed, 25 insertions(+), 98 deletions(-)

--- 2.6.37/kernel/fork.c~unshare-killcrap	2010-11-05 18:03:28.000000000 +0100
+++ 2.6.37/kernel/fork.c	2010-11-05 18:09:52.000000000 +0100
@@ -1522,38 +1522,24 @@ void __init proc_caches_init(void)
 }
 
 /*
- * Check constraints on flags passed to the unshare system call and
- * force unsharing of additional process context as appropriate.
+ * Check constraints on flags passed to the unshare system call.
  */
-static void check_unshare_flags(unsigned long *flags_ptr)
+static int check_unshare_flags(unsigned long unshare_flags)
 {
+	if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
+				CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
+				CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET))
+		return -EINVAL;
 	/*
-	 * If unsharing a thread from a thread group, must also
-	 * unshare vm.
-	 */
-	if (*flags_ptr & CLONE_THREAD)
-		*flags_ptr |= CLONE_VM;
-
-	/*
-	 * If unsharing vm, must also unshare signal handlers.
-	 */
-	if (*flags_ptr & CLONE_VM)
-		*flags_ptr |= CLONE_SIGHAND;
-
-	/*
-	 * If unsharing namespace, must also unshare filesystem information.
+	 * Not implemented, but pretend it works if there is nothing to
+	 * unshare. Note that unsharing CLONE_THREAD or CLONE_SIGHAND
+	 * needs to unshare vm.
 	 */
-	if (*flags_ptr & CLONE_NEWNS)
-		*flags_ptr |= CLONE_FS;
-}
-
-/*
- * Unsharing of tasks created with CLONE_THREAD is not supported yet
- */
-static int unshare_thread(unsigned long unshare_flags)
-{
-	if (unshare_flags & CLONE_THREAD)
-		return -EINVAL;
+	if (unshare_flags & (CLONE_THREAD | CLONE_SIGHAND | CLONE_VM)) {
+		/* FIXME: get_task_mm() increments ->mm_users */
+		if (atomic_read(&current->mm->mm_users) > 1)
+			return -EINVAL;
+	}
 
 	return 0;
 }
@@ -1580,34 +1566,6 @@ static int unshare_fs(unsigned long unsh
 }
 
 /*
- * Unsharing of sighand is not supported yet
- */
-static int unshare_sighand(unsigned long unshare_flags, struct sighand_struct **new_sighp)
-{
-	struct sighand_struct *sigh = current->sighand;
-
-	if ((unshare_flags & CLONE_SIGHAND) && atomic_read(&sigh->count) > 1)
-		return -EINVAL;
-	else
-		return 0;
-}
-
-/*
- * Unshare vm if it is being shared
- */
-static int unshare_vm(unsigned long unshare_flags, struct mm_struct **new_mmp)
-{
-	struct mm_struct *mm = current->mm;
-
-	if ((unshare_flags & CLONE_VM) &&
-	    (mm && atomic_read(&mm->mm_users) > 1)) {
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-/*
  * Unshare file descriptor table if it is being shared
  */
 static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp)
@@ -1635,45 +1593,37 @@ static int unshare_fd(unsigned long unsh
  */
 SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
 {
-	int err = 0;
 	struct fs_struct *fs, *new_fs = NULL;
-	struct sighand_struct *new_sigh = NULL;
-	struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL;
 	struct files_struct *fd, *new_fd = NULL;
 	struct nsproxy *new_nsproxy = NULL;
 	int do_sysvsem = 0;
+	int err;
 
-	check_unshare_flags(&unshare_flags);
-
-	/* Return -EINVAL for all unsupported flags */
-	err = -EINVAL;
-	if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
-				CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
-				CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET))
+	err = check_unshare_flags(unshare_flags);
+	if (err)
 		goto bad_unshare_out;
 
 	/*
+	 * If unsharing namespace, must also unshare filesystem information.
+	 */
+	if (unshare_flags & CLONE_NEWNS)
+		unshare_flags |= CLONE_FS;
+	/*
 	 * CLONE_NEWIPC must also detach from the undolist: after switching
 	 * to a new ipc namespace, the semaphore arrays from the old
 	 * namespace are unreachable.
 	 */
 	if (unshare_flags & (CLONE_NEWIPC|CLONE_SYSVSEM))
 		do_sysvsem = 1;
-	if ((err = unshare_thread(unshare_flags)))
-		goto bad_unshare_out;
 	if ((err = unshare_fs(unshare_flags, &new_fs)))
-		goto bad_unshare_cleanup_thread;
-	if ((err = unshare_sighand(unshare_flags, &new_sigh)))
-		goto bad_unshare_cleanup_fs;
-	if ((err = unshare_vm(unshare_flags, &new_mm)))
-		goto bad_unshare_cleanup_sigh;
+		goto bad_unshare_out;
 	if ((err = unshare_fd(unshare_flags, &new_fd)))
-		goto bad_unshare_cleanup_vm;
+		goto bad_unshare_cleanup_fs;
 	if ((err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy,
 			new_fs)))
 		goto bad_unshare_cleanup_fd;
 
-	if (new_fs ||  new_mm || new_fd || do_sysvsem || new_nsproxy) {
+	if (new_fs || new_fd || do_sysvsem || new_nsproxy) {
 		if (do_sysvsem) {
 			/*
 			 * CLONE_SYSVSEM is equivalent to sys_exit().
@@ -1699,19 +1649,6 @@ SYSCALL_DEFINE1(unshare, unsigned long, 
 			spin_unlock(&fs->lock);
 		}
 
-		if (new_mm) {
-			mm = current->mm;
-			active_mm = current->active_mm;
-			current->mm = new_mm;
-			current->active_mm = new_mm;
-			if (current->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) {
-				atomic_dec(&mm->oom_disable_count);
-				atomic_inc(&new_mm->oom_disable_count);
-			}
-			activate_mm(active_mm, new_mm);
-			new_mm = mm;
-		}
-
 		if (new_fd) {
 			fd = current->files;
 			current->files = new_fd;
@@ -1728,20 +1665,10 @@ bad_unshare_cleanup_fd:
 	if (new_fd)
 		put_files_struct(new_fd);
 
-bad_unshare_cleanup_vm:
-	if (new_mm)
-		mmput(new_mm);
-
-bad_unshare_cleanup_sigh:
-	if (new_sigh)
-		if (atomic_dec_and_test(&new_sigh->count))
-			kmem_cache_free(sighand_cachep, new_sigh);
-
 bad_unshare_cleanup_fs:
 	if (new_fs)
 		free_fs_struct(new_fs);
 
-bad_unshare_cleanup_thread:
 bad_unshare_out:
 	return err;
 }

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom policy in Canada: sign http://dissolvethecrtc.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 1/1][2nd resend] sys_unshare: remove the dead CLONE_THREAD/SIGHAND/VM code
  2010-11-05 17:43             ` Oleg Nesterov
@ 2010-11-09 11:21               ` KOSAKI Motohiro
  -1 siblings, 0 replies; 14+ messages in thread
From: KOSAKI Motohiro @ 2010-11-09 11:21 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: kosaki.motohiro, Andrew Morton, David Rientjes,
	KAMEZAWA Hiroyuki, Rik van Riel, Ying Han, linux-mm,
	linux-kernel, Roland McGrath, Eric W. Biederman, JANAK DESAI

> Cleanup: kill the dead code which does nothing but complicates the code
> and confuses the reader.
> 
> sys_unshare(CLONE_THREAD/SIGHAND/VM) is not really implemented, and I doubt
> very much it will ever work. At least, nobody even tried since the original
> "unshare system call -v5: system call handler function" commit
> 99d1419d96d7df9cfa56bc977810be831bd5ef64 was applied more than 4 years ago.
> 
> And the code is not consistent. unshare_thread() always fails unconditionally,
> while unshare_sighand() and unshare_vm() pretend to work if there is nothing
> to unshare.
> 
> Remove unshare_thread(), unshare_sighand(), unshare_vm() helpers and related
> variables and add a simple CLONE_THREAD | CLONE_SIGHAND| CLONE_VM check into
> check_unshare_flags().
> 
> Also, move the "CLONE_NEWNS needs CLONE_FS" check from check_unshare_flags()
> to sys_unshare(). This looks more consistent and matches the similar
> do_sysvsem check in sys_unshare().
> 
> Note: with or without this patch "atomic_read(mm->mm_users) > 1" can give
> a false positive due to get_task_mm().
> 
> Signed-off-by: Oleg Nesterov <oleg@redhat.com>
> Acked-by: Roland McGrath <roland@redhat.com>
> ---
> 
>  kernel/fork.c |  123 +++++++++++-----------------------------------------------
>  1 file changed, 25 insertions(+), 98 deletions(-)
> 
> --- 2.6.37/kernel/fork.c~unshare-killcrap	2010-11-05 18:03:28.000000000 +0100
> +++ 2.6.37/kernel/fork.c	2010-11-05 18:09:52.000000000 +0100
> @@ -1522,38 +1522,24 @@ void __init proc_caches_init(void)
>  }
>  
>  /*
> - * Check constraints on flags passed to the unshare system call and
> - * force unsharing of additional process context as appropriate.
> + * Check constraints on flags passed to the unshare system call.
>   */
> -static void check_unshare_flags(unsigned long *flags_ptr)
> +static int check_unshare_flags(unsigned long unshare_flags)
>  {
> +	if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
> +				CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
> +				CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET))
> +		return -EINVAL;

Please put WARN_ON_ONCE() explicitly. That's good way to find hidden
user if exist and getting better bug report.

And, I've reveied this patch and I've found no fault. but I will not put
my ack because I think I haven't understand original intention perhaps.

Anyway, thanks Oleg.




^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 1/1][2nd resend] sys_unshare: remove the dead CLONE_THREAD/SIGHAND/VM code
@ 2010-11-09 11:21               ` KOSAKI Motohiro
  0 siblings, 0 replies; 14+ messages in thread
From: KOSAKI Motohiro @ 2010-11-09 11:21 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: kosaki.motohiro, Andrew Morton, David Rientjes,
	KAMEZAWA Hiroyuki, Rik van Riel, Ying Han, linux-mm,
	linux-kernel, Roland McGrath, Eric W. Biederman, JANAK DESAI

> Cleanup: kill the dead code which does nothing but complicates the code
> and confuses the reader.
> 
> sys_unshare(CLONE_THREAD/SIGHAND/VM) is not really implemented, and I doubt
> very much it will ever work. At least, nobody even tried since the original
> "unshare system call -v5: system call handler function" commit
> 99d1419d96d7df9cfa56bc977810be831bd5ef64 was applied more than 4 years ago.
> 
> And the code is not consistent. unshare_thread() always fails unconditionally,
> while unshare_sighand() and unshare_vm() pretend to work if there is nothing
> to unshare.
> 
> Remove unshare_thread(), unshare_sighand(), unshare_vm() helpers and related
> variables and add a simple CLONE_THREAD | CLONE_SIGHAND| CLONE_VM check into
> check_unshare_flags().
> 
> Also, move the "CLONE_NEWNS needs CLONE_FS" check from check_unshare_flags()
> to sys_unshare(). This looks more consistent and matches the similar
> do_sysvsem check in sys_unshare().
> 
> Note: with or without this patch "atomic_read(mm->mm_users) > 1" can give
> a false positive due to get_task_mm().
> 
> Signed-off-by: Oleg Nesterov <oleg@redhat.com>
> Acked-by: Roland McGrath <roland@redhat.com>
> ---
> 
>  kernel/fork.c |  123 +++++++++++-----------------------------------------------
>  1 file changed, 25 insertions(+), 98 deletions(-)
> 
> --- 2.6.37/kernel/fork.c~unshare-killcrap	2010-11-05 18:03:28.000000000 +0100
> +++ 2.6.37/kernel/fork.c	2010-11-05 18:09:52.000000000 +0100
> @@ -1522,38 +1522,24 @@ void __init proc_caches_init(void)
>  }
>  
>  /*
> - * Check constraints on flags passed to the unshare system call and
> - * force unsharing of additional process context as appropriate.
> + * Check constraints on flags passed to the unshare system call.
>   */
> -static void check_unshare_flags(unsigned long *flags_ptr)
> +static int check_unshare_flags(unsigned long unshare_flags)
>  {
> +	if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
> +				CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
> +				CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET))
> +		return -EINVAL;

Please put WARN_ON_ONCE() explicitly. That's good way to find hidden
user if exist and getting better bug report.

And, I've reveied this patch and I've found no fault. but I will not put
my ack because I think I haven't understand original intention perhaps.

Anyway, thanks Oleg.



--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom policy in Canada: sign http://dissolvethecrtc.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 1/1][2nd resend] sys_unshare: remove the dead CLONE_THREAD/SIGHAND/VM code
  2010-11-09 11:21               ` KOSAKI Motohiro
@ 2010-11-09 17:17                 ` Oleg Nesterov
  -1 siblings, 0 replies; 14+ messages in thread
From: Oleg Nesterov @ 2010-11-09 17:17 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: Andrew Morton, David Rientjes, KAMEZAWA Hiroyuki, Rik van Riel,
	Ying Han, linux-mm, linux-kernel, Roland McGrath,
	Eric W. Biederman, JANAK DESAI

On 11/09, KOSAKI Motohiro wrote:
>
> > -static void check_unshare_flags(unsigned long *flags_ptr)
> > +static int check_unshare_flags(unsigned long unshare_flags)
> >  {
> > +	if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
> > +				CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
> > +				CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET))
> > +		return -EINVAL;
>
> Please put WARN_ON_ONCE() explicitly. That's good way to find hidden
> user if exist and getting better bug report.

Perhaps... but this needs a separate change.

Please note that this check was simply moved from sys_unshare(), this
patch shouldn't have any visible effect.

Personally, I think it would be even better if, say, unshare(CLONE_THREAD)
returned -EINVAL unconditionally.

> And, I've reveied this patch and I've found no fault. but I will not put
> my ack because I think I haven't understand original intention perhaps.

Thanks!

IIRC, the main (only?) motivation for sys_unshare() was unshare_fs().
Most probably unshare_thread/vm were added as placeholders.

Oleg.


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 1/1][2nd resend] sys_unshare: remove the dead CLONE_THREAD/SIGHAND/VM code
@ 2010-11-09 17:17                 ` Oleg Nesterov
  0 siblings, 0 replies; 14+ messages in thread
From: Oleg Nesterov @ 2010-11-09 17:17 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: Andrew Morton, David Rientjes, KAMEZAWA Hiroyuki, Rik van Riel,
	Ying Han, linux-mm, linux-kernel, Roland McGrath,
	Eric W. Biederman, JANAK DESAI

On 11/09, KOSAKI Motohiro wrote:
>
> > -static void check_unshare_flags(unsigned long *flags_ptr)
> > +static int check_unshare_flags(unsigned long unshare_flags)
> >  {
> > +	if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
> > +				CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
> > +				CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET))
> > +		return -EINVAL;
>
> Please put WARN_ON_ONCE() explicitly. That's good way to find hidden
> user if exist and getting better bug report.

Perhaps... but this needs a separate change.

Please note that this check was simply moved from sys_unshare(), this
patch shouldn't have any visible effect.

Personally, I think it would be even better if, say, unshare(CLONE_THREAD)
returned -EINVAL unconditionally.

> And, I've reveied this patch and I've found no fault. but I will not put
> my ack because I think I haven't understand original intention perhaps.

Thanks!

IIRC, the main (only?) motivation for sys_unshare() was unshare_fs().
Most probably unshare_thread/vm were added as placeholders.

Oleg.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom policy in Canada: sign http://dissolvethecrtc.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 1/1][2nd resend] sys_unshare: remove the dead CLONE_THREAD/SIGHAND/VM code
  2010-11-09 17:17                 ` Oleg Nesterov
@ 2010-11-14  7:14                   ` KOSAKI Motohiro
  -1 siblings, 0 replies; 14+ messages in thread
From: KOSAKI Motohiro @ 2010-11-14  7:14 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: kosaki.motohiro, Andrew Morton, David Rientjes,
	KAMEZAWA Hiroyuki, Rik van Riel, Ying Han, linux-mm,
	linux-kernel, Roland McGrath, Eric W. Biederman, JANAK DESAI

> On 11/09, KOSAKI Motohiro wrote:
> >
> > > -static void check_unshare_flags(unsigned long *flags_ptr)
> > > +static int check_unshare_flags(unsigned long unshare_flags)
> > >  {
> > > +	if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
> > > +				CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
> > > +				CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET))
> > > +		return -EINVAL;
> >
> > Please put WARN_ON_ONCE() explicitly. That's good way to find hidden
> > user if exist and getting better bug report.
> 
> Perhaps... but this needs a separate change.
> 
> Please note that this check was simply moved from sys_unshare(), this
> patch shouldn't have any visible effect.
> 
> Personally, I think it would be even better if, say, unshare(CLONE_THREAD)
> returned -EINVAL unconditionally.

Ah, OK. you are right.



> > And, I've reveied this patch and I've found no fault. but I will not put
> > my ack because I think I haven't understand original intention perhaps.
> 
> Thanks!
> 
> IIRC, the main (only?) motivation for sys_unshare() was unshare_fs().
> Most probably unshare_thread/vm were added as placeholders.






^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 1/1][2nd resend] sys_unshare: remove the dead CLONE_THREAD/SIGHAND/VM code
@ 2010-11-14  7:14                   ` KOSAKI Motohiro
  0 siblings, 0 replies; 14+ messages in thread
From: KOSAKI Motohiro @ 2010-11-14  7:14 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: kosaki.motohiro, Andrew Morton, David Rientjes,
	KAMEZAWA Hiroyuki, Rik van Riel, Ying Han, linux-mm,
	linux-kernel, Roland McGrath, Eric W. Biederman, JANAK DESAI

> On 11/09, KOSAKI Motohiro wrote:
> >
> > > -static void check_unshare_flags(unsigned long *flags_ptr)
> > > +static int check_unshare_flags(unsigned long unshare_flags)
> > >  {
> > > +	if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
> > > +				CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
> > > +				CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET))
> > > +		return -EINVAL;
> >
> > Please put WARN_ON_ONCE() explicitly. That's good way to find hidden
> > user if exist and getting better bug report.
> 
> Perhaps... but this needs a separate change.
> 
> Please note that this check was simply moved from sys_unshare(), this
> patch shouldn't have any visible effect.
> 
> Personally, I think it would be even better if, say, unshare(CLONE_THREAD)
> returned -EINVAL unconditionally.

Ah, OK. you are right.



> > And, I've reveied this patch and I've found no fault. but I will not put
> > my ack because I think I haven't understand original intention perhaps.
> 
> Thanks!
> 
> IIRC, the main (only?) motivation for sys_unshare() was unshare_fs().
> Most probably unshare_thread/vm were added as placeholders.





--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom policy in Canada: sign http://dissolvethecrtc.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2010-11-14  7:14 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <201010262121.o9QLLNFo016375@imap1.linux-foundation.org>
     [not found] ` <20101101024949.6074.A69D9226@jp.fujitsu.com>
     [not found]   ` <alpine.DEB.2.00.1011011738200.26266@chino.kir.corp.google.com>
2010-11-03  0:41     ` [patch v2] oom: fix oom_score_adj consistency with oom_disable_count David Rientjes
2010-11-03 11:23       ` Oleg Nesterov
2010-11-03 20:28         ` David Rientjes
2010-11-04 18:42           ` Oleg Nesterov
2010-11-05 17:41         ` [PATCH 0/1] (Was: oom: fix oom_score_adj consistency with oom_disable_count) Oleg Nesterov
2010-11-05 17:41           ` Oleg Nesterov
2010-11-05 17:43           ` [PATCH 1/1][2nd resend] sys_unshare: remove the dead CLONE_THREAD/SIGHAND/VM code Oleg Nesterov
2010-11-05 17:43             ` Oleg Nesterov
2010-11-09 11:21             ` KOSAKI Motohiro
2010-11-09 11:21               ` KOSAKI Motohiro
2010-11-09 17:17               ` Oleg Nesterov
2010-11-09 17:17                 ` Oleg Nesterov
2010-11-14  7:14                 ` KOSAKI Motohiro
2010-11-14  7:14                   ` KOSAKI Motohiro

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.