linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] unshare: Cleanup up the sys_unshare interface before we are committed.
@ 2006-03-16 16:49 Eric W. Biederman
  2006-03-16 17:31 ` [PATCH] unshare: Use rcu_assign_pointer when setting sighand Eric W. Biederman
                   ` (2 more replies)
  0 siblings, 3 replies; 24+ messages in thread
From: Eric W. Biederman @ 2006-03-16 16:49 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Andrew Morton, Janak Desai, Al Viro,
	Christoph Hellwig, Michael Kerrisk, Andi Kleen, Paul Mackerras,
	JANAK DESAI


Since we have not crossed the magic 2.6.16 line can we please
include this patch.  My apologies for catching this so late in the
cycle.

- Error if we are passed any flags we don't expect.

  This preserves forward compatibility so programs that use new flags that
  run on old kernels will fail instead of silently doing the wrong thing.

- Use separate defines from sys_clone.

  sys_unshare can't implement half of the clone flags under any circumstances
  and those that it does implement have subtlely different semantics than
  the clone flags.  Using a different set of flags sets the
  expectation that things will be different.

  Binary compatibility with current users of the is still maintained
  as the unshare flags and the clone flags have the same values.


Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>


---

 include/linux/sched.h |   17 +++++++++++++++++
 kernel/fork.c         |   34 +++++++++++++++++++---------------
 2 files changed, 36 insertions(+), 15 deletions(-)

1b9e67b9696f1e828ba789d3f6c8247d1171f367
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 62e6314..8e46f05 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -69,6 +69,23 @@ struct exec_domain;
 #define CLONE_KERNEL	(CLONE_FS | CLONE_FILES | CLONE_SIGHAND)
 
 /*
+ * unsharing flags: (Keep in sync with the clone flags if possible)
+ */
+#define UNSHARE_VM	0x00000100	/* stop sharing the VM between processes */
+#define UNSHARE_FS	0x00000200	/* stop sharing the fs info between processes */
+#define UNSHARE_FILES	0x00000400	/* stop sharing open files between processes */
+#define UNSHARE_SIGHAND	0x00000800	/* stop sharing signal handlers and blocked signals */
+#define UNSHARE_THREAD	0x00010000	/* stop sharing a thread group */
+#define UNSHARE_NS	0x00020000	/* stop sharing the mount namespace */
+#define UNSHARE_SYSVSEM	0x00040000	/* stop sharing system V SEM_UNDO semantics */
+
+/*
+ * Helper so sys_unshare can check if it is passed valid flags.
+ */
+#define UNSHARE_VALID	(UNSHARE_VM|UNSHARE_FS|UNSHARE_FILES|UNSHARE_SIGHAND| \
+				UNSHARE_THREAD|UNSHARE_NS|UNSHARE_SYSVSEM)
+
+/*
  * These are the constant used to fake the fixed-point load-average
  * counting. Some notes:
  *  - 11 bit fractions expand to 22 bits by the multiplies: this gives
diff --git a/kernel/fork.c b/kernel/fork.c
index ccdfbb1..d2706e9 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1382,28 +1382,28 @@ static inline void check_unshare_flags(u
 	 * If unsharing a thread from a thread group, must also
 	 * unshare vm.
 	 */
-	if (*flags_ptr & CLONE_THREAD)
-		*flags_ptr |= CLONE_VM;
+	if (*flags_ptr & UNSHARE_THREAD)
+		*flags_ptr |= UNSHARE_VM;
 
 	/*
 	 * If unsharing vm, must also unshare signal handlers.
 	 */
-	if (*flags_ptr & CLONE_VM)
-		*flags_ptr |= CLONE_SIGHAND;
+	if (*flags_ptr & UNSHARE_VM)
+		*flags_ptr |= UNSHARE_SIGHAND;
 
 	/*
 	 * If unsharing signal handlers and the task was created
 	 * using CLONE_THREAD, then must unshare the thread
 	 */
-	if ((*flags_ptr & CLONE_SIGHAND) &&
+	if ((*flags_ptr & UNSHARE_SIGHAND) &&
 	    (atomic_read(&current->signal->count) > 1))
-		*flags_ptr |= CLONE_THREAD;
+		*flags_ptr |= UNSHARE_THREAD;
 
 	/*
 	 * If unsharing namespace, must also unshare filesystem information.
 	 */
-	if (*flags_ptr & CLONE_NEWNS)
-		*flags_ptr |= CLONE_FS;
+	if (*flags_ptr & UNSHARE_NS)
+		*flags_ptr |= UNSHARE_FS;
 }
 
 /*
@@ -1411,7 +1411,7 @@ static inline void check_unshare_flags(u
  */
 static int unshare_thread(unsigned long unshare_flags)
 {
-	if (unshare_flags & CLONE_THREAD)
+	if (unshare_flags & UNSHARE_THREAD)
 		return -EINVAL;
 
 	return 0;
@@ -1424,7 +1424,7 @@ static int unshare_fs(unsigned long unsh
 {
 	struct fs_struct *fs = current->fs;
 
-	if ((unshare_flags & CLONE_FS) &&
+	if ((unshare_flags & UNSHARE_FS) &&
 	    (fs && atomic_read(&fs->count) > 1)) {
 		*new_fsp = __copy_fs_struct(current->fs);
 		if (!*new_fsp)
@@ -1441,7 +1441,7 @@ static int unshare_namespace(unsigned lo
 {
 	struct namespace *ns = current->namespace;
 
-	if ((unshare_flags & CLONE_NEWNS) &&
+	if ((unshare_flags & UNSHARE_NS) &&
 	    (ns && atomic_read(&ns->count) > 1)) {
 		if (!capable(CAP_SYS_ADMIN))
 			return -EPERM;
@@ -1462,7 +1462,7 @@ static int unshare_sighand(unsigned long
 {
 	struct sighand_struct *sigh = current->sighand;
 
-	if ((unshare_flags & CLONE_SIGHAND) &&
+	if ((unshare_flags & UNSHARE_SIGHAND) &&
 	    (sigh && atomic_read(&sigh->count) > 1))
 		return -EINVAL;
 	else
@@ -1476,7 +1476,7 @@ static int unshare_vm(unsigned long unsh
 {
 	struct mm_struct *mm = current->mm;
 
-	if ((unshare_flags & CLONE_VM) &&
+	if ((unshare_flags & UNSHARE_VM) &&
 	    (mm && atomic_read(&mm->mm_users) > 1)) {
 		*new_mmp = dup_mm(current);
 		if (!*new_mmp)
@@ -1494,7 +1494,7 @@ static int unshare_fd(unsigned long unsh
 	struct files_struct *fd = current->files;
 	int error = 0;
 
-	if ((unshare_flags & CLONE_FILES) &&
+	if ((unshare_flags & UNSHARE_FILES) &&
 	    (fd && atomic_read(&fd->count) > 1)) {
 		*new_fdp = dup_fd(fd, &error);
 		if (!*new_fdp)
@@ -1510,7 +1510,7 @@ static int unshare_fd(unsigned long unsh
  */
 static int unshare_semundo(unsigned long unshare_flags, struct sem_undo_list **new_ulistp)
 {
-	if (unshare_flags & CLONE_SYSVSEM)
+	if (unshare_flags & UNSHARE_SYSVSEM)
 		return -EINVAL;
 
 	return 0;
@@ -1534,6 +1534,10 @@ asmlinkage long sys_unshare(unsigned lon
 	struct files_struct *fd, *new_fd = NULL;
 	struct sem_undo_list *new_ulist = NULL;
 
+	/* Only accept valid unshare flags */
+	if (unshare_flags & ~UNSHARE_VALID)
+		return -EINVAL;
+
 	check_unshare_flags(&unshare_flags);
 
 	if ((err = unshare_thread(unshare_flags)))
-- 
1.2.4.g2d33


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH] unshare: Use rcu_assign_pointer when setting sighand
  2006-03-16 16:49 [PATCH] unshare: Cleanup up the sys_unshare interface before we are committed Eric W. Biederman
@ 2006-03-16 17:31 ` Eric W. Biederman
  2006-03-17  6:48   ` Paul E. McKenney
  2006-03-17 17:44   ` Oleg Nesterov
  2006-03-16 19:40 ` [PATCH] unshare: Cleanup up the sys_unshare interface before we are committed Michael Kerrisk
  2006-03-16 20:33 ` Andrew Morton
  2 siblings, 2 replies; 24+ messages in thread
From: Eric W. Biederman @ 2006-03-16 17:31 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Andrew Morton, Janak Desai, Al Viro,
	Christoph Hellwig, Michael Kerrisk, Andi Kleen, Paul Mackerras,
	Oleg Nesterov


The sighand pointer only needs the rcu_read_lock on the
read side.  So only depending on task_lock protection
when setting this pointer is not enough.  We also need
a memory barrier to ensure the initialization is seen first.

Use rcu_assign_pointer as it does this for us, and clearly
documents that we are setting an rcu readable pointer.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>


---

 kernel/fork.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

f0cdb649b7140927777f4355631648b396ee235b
diff --git a/kernel/fork.c b/kernel/fork.c
index d2706e9..2f24553 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1573,7 +1573,7 @@ asmlinkage long sys_unshare(unsigned lon
 
 		if (new_sigh) {
 			sigh = current->sighand;
-			current->sighand = new_sigh;
+			rcu_assign_pointer(current->sighand, new_sigh);
 			new_sigh = sigh;
 		}
 
-- 
1.2.4.g2d33


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* Re: [PATCH] unshare: Cleanup up the sys_unshare interface before we are committed.
  2006-03-16 16:49 [PATCH] unshare: Cleanup up the sys_unshare interface before we are committed Eric W. Biederman
  2006-03-16 17:31 ` [PATCH] unshare: Use rcu_assign_pointer when setting sighand Eric W. Biederman
@ 2006-03-16 19:40 ` Michael Kerrisk
  2006-03-16 20:33 ` Andrew Morton
  2 siblings, 0 replies; 24+ messages in thread
From: Michael Kerrisk @ 2006-03-16 19:40 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: torvalds, linux-kernel, akpm, janak, viro, hch, ak, paulus, janak

Eric,

> Since we have not crossed the magic 2.6.16 line can we please
> include this patch.  My apologies for catching this so late in the
> cycle.
> 
> - Error if we are passed any flags we don't expect.
> 
>   This preserves forward compatibility so programs that use new flags 
>   that
>   run on old kernels will fail instead of silently doing the wrong thing.
> 
> - Use separate defines from sys_clone.
> 
>   sys_unshare can't implement half of the clone flags under any
> circumstances
>   and those that it does implement have subtlely different semantics than
>   the clone flags.  Using a different set of flags sets the
>   expectation that things will be different.
> 
>   Binary compatibility with current users of the is still maintained
>   as the unshare flags and the clone flags have the same values.

Thanks for this.  I had begun to think I must simply be dense
for thinking there was a problem here...

I will update my draft manual page accordingly.

Cheers,

Michael

-- 
Michael Kerrisk
maintainer of Linux man pages Sections 2, 3, 4, 5, and 7 

Want to help with man page maintenance?  
Grab the latest tarball at
ftp://ftp.win.tue.nl/pub/linux-local/manpages/, 
read the HOWTOHELP file and grep the source 
files for 'FIXME'.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] unshare: Cleanup up the sys_unshare interface before we are committed.
  2006-03-16 16:49 [PATCH] unshare: Cleanup up the sys_unshare interface before we are committed Eric W. Biederman
  2006-03-16 17:31 ` [PATCH] unshare: Use rcu_assign_pointer when setting sighand Eric W. Biederman
  2006-03-16 19:40 ` [PATCH] unshare: Cleanup up the sys_unshare interface before we are committed Michael Kerrisk
@ 2006-03-16 20:33 ` Andrew Morton
  2006-03-16 20:41   ` Linus Torvalds
  2006-03-16 21:36   ` Janak Desai
  2 siblings, 2 replies; 24+ messages in thread
From: Andrew Morton @ 2006-03-16 20:33 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: torvalds, linux-kernel, janak, viro, hch, mtk-manpages, ak, paulus

ebiederm@xmission.com (Eric W. Biederman) wrote:
>
> Since we have not crossed the magic 2.6.16 line can we please
>  include this patch.  My apologies for catching this so late in the
>  cycle.
> 
>  - Error if we are passed any flags we don't expect.
> 
>    This preserves forward compatibility so programs that use new flags that
>    run on old kernels will fail instead of silently doing the wrong thing.

Makes sense.

>  - Use separate defines from sys_clone.
> 
>    sys_unshare can't implement half of the clone flags under any circumstances
>    and those that it does implement have subtlely different semantics than
>    the clone flags.  Using a different set of flags sets the
>    expectation that things will be different.

iirc there was some discussion about this and it was explicitly decided to
keep the CLONE flags.

Maybe Janak or Linus can comment?

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] unshare: Cleanup up the sys_unshare interface before we are committed.
  2006-03-16 20:33 ` Andrew Morton
@ 2006-03-16 20:41   ` Linus Torvalds
  2006-03-16 21:58     ` Eric W. Biederman
  2006-03-16 21:36   ` Janak Desai
  1 sibling, 1 reply; 24+ messages in thread
From: Linus Torvalds @ 2006-03-16 20:41 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Eric W. Biederman, linux-kernel, janak, viro, hch, mtk-manpages,
	ak, paulus



On Thu, 16 Mar 2006, Andrew Morton wrote:
> 
> iirc there was some discussion about this and it was explicitly decided to
> keep the CLONE flags.
> 
> Maybe Janak or Linus can comment?

My personal opinion is that having a different set of flags is more 
confusing and likely to result in problems later than having the same 
ones. Regardless, I'm not touching this for 2.6.16 any more, 

		Linus

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] unshare: Cleanup up the sys_unshare interface before we are committed.
  2006-03-16 20:33 ` Andrew Morton
  2006-03-16 20:41   ` Linus Torvalds
@ 2006-03-16 21:36   ` Janak Desai
  1 sibling, 0 replies; 24+ messages in thread
From: Janak Desai @ 2006-03-16 21:36 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Eric W. Biederman, torvalds, linux-kernel, viro, hch,
	mtk-manpages, ak, paulus

Andrew Morton wrote:

>ebiederm@xmission.com (Eric W. Biederman) wrote:
>  
>
>>Since we have not crossed the magic 2.6.16 line can we please
>> include this patch.  My apologies for catching this so late in the
>> cycle.
>>
>> - Error if we are passed any flags we don't expect.
>>
>>   This preserves forward compatibility so programs that use new flags that
>>   run on old kernels will fail instead of silently doing the wrong thing.
>>    
>>
>
>Makes sense.
>
>  
>
>> - Use separate defines from sys_clone.
>>
>>   sys_unshare can't implement half of the clone flags under any circumstances
>>   and those that it does implement have subtlely different semantics than
>>   the clone flags.  Using a different set of flags sets the
>>   expectation that things will be different.
>>    
>>
>
>iirc there was some discussion about this and it was explicitly decided to
>keep the CLONE flags.
>
>Maybe Janak or Linus can comment?
>
>  
>
In the two prior discussions on this, the disagreement was on how much 
confusion
(if any) the use of CLONE_* flags would generate. I personally did not 
think that
it was confusing enough to add new flags, with the same values as CLONE_*
flags, in the kernel. Linus's last email (3/1/06) on the subject seemed 
to lean in that
direction as well. That's why I didn't take any action on it.

-Janak


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] unshare: Cleanup up the sys_unshare interface before we are committed.
  2006-03-16 20:41   ` Linus Torvalds
@ 2006-03-16 21:58     ` Eric W. Biederman
  2006-03-16 22:19       ` Andrew Morton
  0 siblings, 1 reply; 24+ messages in thread
From: Eric W. Biederman @ 2006-03-16 21:58 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Andrew Morton, linux-kernel, janak, viro, hch, mtk-manpages, ak, paulus

Linus Torvalds <torvalds@osdl.org> writes:

> On Thu, 16 Mar 2006, Andrew Morton wrote:
>> 
>> iirc there was some discussion about this and it was explicitly decided to
>> keep the CLONE flags.
>> 
>> Maybe Janak or Linus can comment?
>
> My personal opinion is that having a different set of flags is more 
> confusing and likely to result in problems later than having the same 
> ones. Regardless, I'm not touching this for 2.6.16 any more, 

I am actually a lot more concerned with the fact that we don't test
for invalid bits.  So we have an ABI that will change in the future,
and that doesn't allow us to have a program that runs on old and new
kernels.

I guess I can resend some version of my patch after 2.6.16 is out and
break the ABI for the undefined bits then.  Correct programs shouldn't
care.  But it sure would be nice if they could care.

Eric

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] unshare: Cleanup up the sys_unshare interface before we are committed.
  2006-03-16 21:58     ` Eric W. Biederman
@ 2006-03-16 22:19       ` Andrew Morton
  0 siblings, 0 replies; 24+ messages in thread
From: Andrew Morton @ 2006-03-16 22:19 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: torvalds, linux-kernel, janak, viro, hch, mtk-manpages, ak, paulus

ebiederm@xmission.com (Eric W. Biederman) wrote:
>
> Linus Torvalds <torvalds@osdl.org> writes:
> 
> > On Thu, 16 Mar 2006, Andrew Morton wrote:
> >> 
> >> iirc there was some discussion about this and it was explicitly decided to
> >> keep the CLONE flags.
> >> 
> >> Maybe Janak or Linus can comment?
> >
> > My personal opinion is that having a different set of flags is more 
> > confusing and likely to result in problems later than having the same 
> > ones. Regardless, I'm not touching this for 2.6.16 any more, 
> 
> I am actually a lot more concerned with the fact that we don't test
> for invalid bits.  So we have an ABI that will change in the future,
> and that doesn't allow us to have a program that runs on old and new
> kernels.

The risk of breaking things is small - it would require someone to write a
sys_unshare-using app which a) they care about and b) has a particular bug
in it.  But yes, we should check.

> I guess I can resend some version of my patch after 2.6.16 is out and
> break the ABI for the undefined bits then.  Correct programs shouldn't
> care.  But it sure would be nice if they could care.
> 

Your single patch did two different things - there's a lesson here ;)

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] unshare: Use rcu_assign_pointer when setting sighand
  2006-03-16 17:31 ` [PATCH] unshare: Use rcu_assign_pointer when setting sighand Eric W. Biederman
@ 2006-03-17  6:48   ` Paul E. McKenney
  2006-03-17 17:44   ` Oleg Nesterov
  1 sibling, 0 replies; 24+ messages in thread
From: Paul E. McKenney @ 2006-03-17  6:48 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Linus Torvalds, linux-kernel, Andrew Morton, Janak Desai,
	Al Viro, Christoph Hellwig, Michael Kerrisk, Andi Kleen,
	Paul Mackerras, Oleg Nesterov

On Thu, Mar 16, 2006 at 10:31:38AM -0700, Eric W. Biederman wrote:
> 
> The sighand pointer only needs the rcu_read_lock on the
> read side.  So only depending on task_lock protection
> when setting this pointer is not enough.  We also need
> a memory barrier to ensure the initialization is seen first.
> 
> Use rcu_assign_pointer as it does this for us, and clearly
> documents that we are setting an rcu readable pointer.

Good catch!

Acked-by: Paul E. McKenney <paulmck@us.ibm.com>
> Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
> 
> 
> ---
> 
>  kernel/fork.c |    2 +-
>  1 files changed, 1 insertions(+), 1 deletions(-)
> 
> f0cdb649b7140927777f4355631648b396ee235b
> diff --git a/kernel/fork.c b/kernel/fork.c
> index d2706e9..2f24553 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -1573,7 +1573,7 @@ asmlinkage long sys_unshare(unsigned lon
>  
>  		if (new_sigh) {
>  			sigh = current->sighand;
> -			current->sighand = new_sigh;
> +			rcu_assign_pointer(current->sighand, new_sigh);
>  			new_sigh = sigh;
>  		}
>  
> -- 
> 1.2.4.g2d33
> 
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
> 

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] unshare: Use rcu_assign_pointer when setting sighand
  2006-03-16 17:31 ` [PATCH] unshare: Use rcu_assign_pointer when setting sighand Eric W. Biederman
  2006-03-17  6:48   ` Paul E. McKenney
@ 2006-03-17 17:44   ` Oleg Nesterov
  2006-03-17 20:56     ` Andrew Morton
  1 sibling, 1 reply; 24+ messages in thread
From: Oleg Nesterov @ 2006-03-17 17:44 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Linus Torvalds, linux-kernel, Andrew Morton, Janak Desai,
	Al Viro, Christoph Hellwig, Michael Kerrisk, Andi Kleen,
	Paul Mackerras

"Eric W. Biederman" wrote:
> 
> @@ -1573,7 +1573,7 @@ asmlinkage long sys_unshare(unsigned lon
> 
>                 if (new_sigh) {
>                         sigh = current->sighand;
> -                       current->sighand = new_sigh;
> +                       rcu_assign_pointer(current->sighand, new_sigh);
>                         new_sigh = sigh;
>                 }

Isn't it better to just replace this code with
'BUG_ON(new_sigh != NULL)' ?

It is never executed, but totally broken, afaics.
task_lock() has nothing to do with ->sighand changing.

Oleg.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] unshare: Use rcu_assign_pointer when setting sighand
  2006-03-17 17:44   ` Oleg Nesterov
@ 2006-03-17 20:56     ` Andrew Morton
  2006-03-18 13:12       ` Oleg Nesterov
                         ` (3 more replies)
  0 siblings, 4 replies; 24+ messages in thread
From: Andrew Morton @ 2006-03-17 20:56 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: ebiederm, torvalds, linux-kernel, janak, viro, hch, mtk-manpages,
	ak, paulus

Oleg Nesterov <oleg@tv-sign.ru> wrote:
>
> "Eric W. Biederman" wrote:
> > 
> > @@ -1573,7 +1573,7 @@ asmlinkage long sys_unshare(unsigned lon
> > 
> >                 if (new_sigh) {
> >                         sigh = current->sighand;
> > -                       current->sighand = new_sigh;
> > +                       rcu_assign_pointer(current->sighand, new_sigh);
> >                         new_sigh = sigh;
> >                 }
> 
> Isn't it better to just replace this code with
> 'BUG_ON(new_sigh != NULL)' ?
> 
> It is never executed, but totally broken, afaics.
> task_lock() has nothing to do with ->sighand changing.
> 

/*
 * Unsharing of sighand for tasks created with CLONE_SIGHAND is not
 * supported yet
 */
static int unshare_sighand(unsigned long unshare_flags, struct sighand_struct **new_sighp)

It's all just a place-holder at present.

If we don't plan on ever supporting unshare(CLONE_SIGHAND) we should take
that code out and make it return EINVAL.  Right now.

And because we don't presently support CLONE_SIGHAND we should return
EINVAL if it's set.  Right now.

And we should change sys_unshare() to reject not-understood flags.  Right
now.

If we don't do these things we'll silently break 2.6.16-back-compatibility
of applications which are coded for future kernels.


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] unshare: Use rcu_assign_pointer when setting sighand
  2006-03-17 20:56     ` Andrew Morton
@ 2006-03-18 13:12       ` Oleg Nesterov
  2006-03-18 15:43         ` Janak Desai
  2006-03-18 13:13       ` [PATCH] implement unshare(CLONE_SIGHAND) for single-thread case Oleg Nesterov
                         ` (2 subsequent siblings)
  3 siblings, 1 reply; 24+ messages in thread
From: Oleg Nesterov @ 2006-03-18 13:12 UTC (permalink / raw)
  To: Andrew Morton
  Cc: ebiederm, torvalds, linux-kernel, janak, viro, hch, mtk-manpages,
	ak, paulus

Andrew Morton wrote:
> 
> Oleg Nesterov <oleg@tv-sign.ru> wrote:
> >
> > Isn't it better to just replace this code with
> > 'BUG_ON(new_sigh != NULL)' ?
> >
> > It is never executed, but totally broken, afaics.
> > task_lock() has nothing to do with ->sighand changing.
> >
> 
> /*
>  * Unsharing of sighand for tasks created with CLONE_SIGHAND is not
>  * supported yet
>  */
> static int unshare_sighand(unsigned long unshare_flags, struct sighand_struct **new_sighp)
> 
> It's all just a place-holder at present.
> 
> If we don't plan on ever supporting unshare(CLONE_SIGHAND) we should take
> that code out and make it return EINVAL.  Right now.
> 
> And because we don't presently support CLONE_SIGHAND we should return
> EINVAL if it's set.  Right now.
> 
> And we should change sys_unshare() to reject not-understood flags.  Right
> now.
> 
> If we don't do these things we'll silently break 2.6.16-back-compatibility
> of applications which are coded for future kernels.

unshare_sighand() is ok, it never populates *new_sighp, it just returns
errror code: 0 when ->sighand is not shared, EINVAL otherwise.

I argued about 'if (new_sigh)' code in sys_unshare() because it lies about
locking rules.

Btw, copy_process() forbids CLONE_SIGHAND without CLONE_VM (is there a
good reason for that?), but one can do unshare(CLONE_VM). This is odd.

Oleg.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH] implement unshare(CLONE_SIGHAND) for single-thread case
  2006-03-17 20:56     ` Andrew Morton
  2006-03-18 13:12       ` Oleg Nesterov
@ 2006-03-18 13:13       ` Oleg Nesterov
  2006-03-18 15:10       ` [PATCH] unshare: Error if passed unsupported flags Eric W. Biederman
  2006-03-18 16:29       ` [PATCH] unshare: Use rcu_assign_pointer when setting sighand Janak Desai
  3 siblings, 0 replies; 24+ messages in thread
From: Oleg Nesterov @ 2006-03-18 13:13 UTC (permalink / raw)
  To: Andrew Morton
  Cc: ebiederm, torvalds, linux-kernel, janak, viro, hch, mtk-manpages,
	ak, paulus

on top of (already merged)
	unshare-use-rcu_assign_pointer-when-setting-sighand.patch
depends on
	convert-sighand_cache-to-use-slab_destroy_by_rcu.patch

Implement unshare(CLONE_SIGHAND) for single-thread case.

I think it is not possible to do it for multithread case without
complication of copy_process().

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>

--- MM/kernel/fork.c~	2006-03-18 16:23:23.000000000 +0300
+++ MM/kernel/fork.c	2006-03-18 17:00:39.000000000 +0300
@@ -1483,18 +1483,24 @@ static int unshare_namespace(unsigned lo
 }
 
 /*
- * Unsharing of sighand for tasks created with CLONE_SIGHAND is not
- * supported yet
+ * Unsharing of sighand is only supported for single-thread applications
  */
 static int unshare_sighand(unsigned long unshare_flags, struct sighand_struct **new_sighp)
 {
-	struct sighand_struct *sigh = current->sighand;
-
 	if ((unshare_flags & CLONE_SIGHAND) &&
-	    (sigh && atomic_read(&sigh->count) > 1))
-		return -EINVAL;
-	else
-		return 0;
+			atomic_read(&current->sighand->count) > 1) {
+
+		if (!thread_group_empty(current))
+			return -EINVAL;
+
+		*new_sighp = kmem_cache_alloc(sighand_cachep, GFP_KERNEL);
+		if (unlikely(*new_sighp == NULL))
+			return -ENOMEM;
+
+		atomic_set(&(*new_sighp)->count, 1);
+	}
+
+	return 0;
 }
 
 /*
@@ -1579,7 +1585,18 @@ asmlinkage long sys_unshare(unsigned lon
 	if ((err = unshare_semundo(unshare_flags, &new_ulist)))
 		goto bad_unshare_cleanup_fd;
 
-	if (new_fs || new_ns || new_sigh || new_mm || new_fd || new_ulist) {
+	if (new_sigh) {
+		sigh = current->sighand;
+		write_lock_irq(&tasklist_lock);
+		spin_lock(&sigh->siglock);
+		memcpy(new_sigh->action, sigh->action, sizeof(sigh->action));
+		rcu_assign_pointer(current->sighand, new_sigh);
+		spin_unlock(&sigh->siglock);
+		write_unlock_irq(&tasklist_lock);
+		new_sigh = sigh;
+	}
+
+	if (new_fs || new_ns || new_mm || new_fd || new_ulist) {
 
 		task_lock(current);
 
@@ -1595,12 +1612,6 @@ asmlinkage long sys_unshare(unsigned lon
 			new_ns = ns;
 		}
 
-		if (new_sigh) {
-			sigh = current->sighand;
-			rcu_assign_pointer(current->sighand, new_sigh);
-			new_sigh = sigh;
-		}
-
 		if (new_mm) {
 			mm = current->mm;
 			active_mm = current->active_mm;

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH] unshare: Error if passed unsupported flags
  2006-03-17 20:56     ` Andrew Morton
  2006-03-18 13:12       ` Oleg Nesterov
  2006-03-18 13:13       ` [PATCH] implement unshare(CLONE_SIGHAND) for single-thread case Oleg Nesterov
@ 2006-03-18 15:10       ` Eric W. Biederman
  2006-03-18 15:33         ` Janak Desai
  2006-03-18 16:29       ` [PATCH] unshare: Use rcu_assign_pointer when setting sighand Janak Desai
  3 siblings, 1 reply; 24+ messages in thread
From: Eric W. Biederman @ 2006-03-18 15:10 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Oleg Nesterov, torvalds, linux-kernel, janak, viro, hch,
	mtk-manpages, ak, paulus


This patch does a bare bones trivial patch to ensure we always
get -EINVAL on the unsupported cases for sys_unshare.  If this
goes in before 2.6.16 it allows us to forward compatible with
future applications using sys_unshare.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>


---

 kernel/fork.c |    6 ++++++
 1 files changed, 6 insertions(+), 0 deletions(-)

46868b4b6ebeb9042dded68a6f6301ffe06820c9
diff --git a/kernel/fork.c b/kernel/fork.c
index 46060cb..411b10d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1535,6 +1535,12 @@ asmlinkage long sys_unshare(unsigned lon
 	struct sem_undo_list *new_ulist = NULL;
 
 	check_unshare_flags(&unshare_flags);
+       
+	/* Return -EINVAL for all unsupported flags */
+	err = -EINVAL;
+	if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
+				CLONE_VM|CLONE_FILES|CLONE_SYSVSEM))
+		goto bad_unshare_out;
 
 	if ((err = unshare_thread(unshare_flags)))
 		goto bad_unshare_out;
-- 
1.2.4.g2d33


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* Re: [PATCH] unshare: Error if passed unsupported flags
  2006-03-18 15:10       ` [PATCH] unshare: Error if passed unsupported flags Eric W. Biederman
@ 2006-03-18 15:33         ` Janak Desai
  0 siblings, 0 replies; 24+ messages in thread
From: Janak Desai @ 2006-03-18 15:33 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Andrew Morton, Oleg Nesterov, torvalds, linux-kernel, viro, hch,
	mtk-manpages, ak, paulus


Thanks Eric. I had just started to do this based on Andrew's request.

-Janak

Eric W. Biederman wrote:

>This patch does a bare bones trivial patch to ensure we always
>get -EINVAL on the unsupported cases for sys_unshare.  If this
>goes in before 2.6.16 it allows us to forward compatible with
>future applications using sys_unshare.
>
>Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
>
>
>---
>
> kernel/fork.c |    6 ++++++
> 1 files changed, 6 insertions(+), 0 deletions(-)
>
>46868b4b6ebeb9042dded68a6f6301ffe06820c9
>diff --git a/kernel/fork.c b/kernel/fork.c
>index 46060cb..411b10d 100644
>--- a/kernel/fork.c
>+++ b/kernel/fork.c
>@@ -1535,6 +1535,12 @@ asmlinkage long sys_unshare(unsigned lon
> 	struct sem_undo_list *new_ulist = NULL;
> 
> 	check_unshare_flags(&unshare_flags);
>+       
>+	/* Return -EINVAL for all unsupported flags */
>+	err = -EINVAL;
>+	if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
>+				CLONE_VM|CLONE_FILES|CLONE_SYSVSEM))
>+		goto bad_unshare_out;
> 
> 	if ((err = unshare_thread(unshare_flags)))
> 		goto bad_unshare_out;
>  
>


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] unshare: Use rcu_assign_pointer when setting sighand
  2006-03-18 13:12       ` Oleg Nesterov
@ 2006-03-18 15:43         ` Janak Desai
  2006-03-18 17:24           ` Oleg Nesterov
  0 siblings, 1 reply; 24+ messages in thread
From: Janak Desai @ 2006-03-18 15:43 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Andrew Morton, ebiederm, torvalds, linux-kernel, viro, hch,
	mtk-manpages, ak, paulus

Oleg Nesterov wrote:

>Andrew Morton wrote:
>  
>
>>Oleg Nesterov <oleg@tv-sign.ru> wrote:
>>    
>>
>>>Isn't it better to just replace this code with
>>>'BUG_ON(new_sigh != NULL)' ?
>>>
>>>It is never executed, but totally broken, afaics.
>>>task_lock() has nothing to do with ->sighand changing.
>>>
>>>      
>>>
>>/*
>> * Unsharing of sighand for tasks created with CLONE_SIGHAND is not
>> * supported yet
>> */
>>static int unshare_sighand(unsigned long unshare_flags, struct sighand_struct **new_sighp)
>>
>>It's all just a place-holder at present.
>>
>>If we don't plan on ever supporting unshare(CLONE_SIGHAND) we should take
>>that code out and make it return EINVAL.  Right now.
>>
>>And because we don't presently support CLONE_SIGHAND we should return
>>EINVAL if it's set.  Right now.
>>
>>And we should change sys_unshare() to reject not-understood flags.  Right
>>now.
>>
>>If we don't do these things we'll silently break 2.6.16-back-compatibility
>>of applications which are coded for future kernels.
>>    
>>
>
>unshare_sighand() is ok, it never populates *new_sighp, it just returns
>errror code: 0 when ->sighand is not shared, EINVAL otherwise.
>
>I argued about 'if (new_sigh)' code in sys_unshare() because it lies about
>locking rules.
>
>Btw, copy_process() forbids CLONE_SIGHAND without CLONE_VM (is there a
>good reason for that?), but one can do unshare(CLONE_VM). This is odd.
>  
>
Yes, copy_process forbids cloning of signal handlers without cloning of vm.
However, it does allow cloning of vm without cloning of signal handlers. For
those processes, that are sharing vm but not signal handlers, unsharing 
of vm
is allowed.

>Oleg.
>-
>To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
>the body of a message to majordomo@vger.kernel.org
>More majordomo info at  http://vger.kernel.org/majordomo-info.html
>Please read the FAQ at  http://www.tux.org/lkml/
>
>  
>


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] unshare: Use rcu_assign_pointer when setting sighand
  2006-03-17 20:56     ` Andrew Morton
                         ` (2 preceding siblings ...)
  2006-03-18 15:10       ` [PATCH] unshare: Error if passed unsupported flags Eric W. Biederman
@ 2006-03-18 16:29       ` Janak Desai
  3 siblings, 0 replies; 24+ messages in thread
From: Janak Desai @ 2006-03-18 16:29 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Oleg Nesterov, ebiederm, torvalds, linux-kernel, viro, hch,
	mtk-manpages, ak, paulus

Andrew Morton wrote:

>Oleg Nesterov <oleg@tv-sign.ru> wrote:
>  
>
>>"Eric W. Biederman" wrote:
>>    
>>
>>>@@ -1573,7 +1573,7 @@ asmlinkage long sys_unshare(unsigned lon
>>>
>>>                if (new_sigh) {
>>>                        sigh = current->sighand;
>>>-                       current->sighand = new_sigh;
>>>+                       rcu_assign_pointer(current->sighand, new_sigh);
>>>                        new_sigh = sigh;
>>>                }
>>>      
>>>
>>Isn't it better to just replace this code with
>>'BUG_ON(new_sigh != NULL)' ?
>>
>>It is never executed, but totally broken, afaics.
>>task_lock() has nothing to do with ->sighand changing.
>>
>>    
>>
>
>/*
> * Unsharing of sighand for tasks created with CLONE_SIGHAND is not
> * supported yet
> */
>static int unshare_sighand(unsigned long unshare_flags, struct sighand_struct **new_sighp)
>
>It's all just a place-holder at present.
>
>If we don't plan on ever supporting unshare(CLONE_SIGHAND) we should take
>that code out and make it return EINVAL.  Right now.
>
>And because we don't presently support CLONE_SIGHAND we should return
>EINVAL if it's set.  Right now.
>  
>
unshare does return EINVAL if signal handler unsharing is attempted by a
process that is currently sharing its signal handler with another 
process. However,
because of the interaction between signal handlers and vm, CLONE_SIGHAND
is set anytime vm unsharing is attempted. That is, if you are trying to 
unshare
vm, you must unshare signal handlers. But if they are not being shared 
in the
first place, there is no need to prevent unsharing of vm.

Therefore, unshare returns EINVAL if unsharing of signal handlers and/or vm
is attempted by a process that is sharing its signal handler.

>And we should change sys_unshare() to reject not-understood flags.  Right
>now.
>  
>
Eric just posted a patch to do this.

>If we don't do these things we'll silently break 2.6.16-back-compatibility
>of applications which are coded for future kernels.
>
>-
>To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
>the body of a message to majordomo@vger.kernel.org
>More majordomo info at  http://vger.kernel.org/majordomo-info.html
>Please read the FAQ at  http://www.tux.org/lkml/
>
>  
>


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] unshare: Use rcu_assign_pointer when setting sighand
  2006-03-18 15:43         ` Janak Desai
@ 2006-03-18 17:24           ` Oleg Nesterov
  2006-03-18 17:41             ` [PATCH] for 2.6.16, disable unshare_vm() Oleg Nesterov
  2016-03-14 13:15             ` unshare(CLONE_VM) Re: [PATCH] unshare: Use rcu_assign_pointer when setting sighand Julian Smith
  0 siblings, 2 replies; 24+ messages in thread
From: Oleg Nesterov @ 2006-03-18 17:24 UTC (permalink / raw)
  To: Janak Desai
  Cc: Andrew Morton, ebiederm, torvalds, linux-kernel, viro, hch,
	mtk-manpages, ak, paulus

Janak Desai wrote:
> 
> Oleg Nesterov wrote:
> >
> >Btw, copy_process() forbids CLONE_SIGHAND without CLONE_VM (is there a
> >good reason for that?), but one can do unshare(CLONE_VM). This is odd.
> >
> >
> Yes, copy_process forbids cloning of signal handlers without cloning of vm.
> However, it does allow cloning of vm without cloning of signal handlers. For
> those processes, that are sharing vm but not signal handlers, unsharing
> of vm
> is allowed.

Yes, I was wrong, I missed check_unshare_flags(),

	/*
	 * If unsharing vm, must also unshare signal handlers.
	 */
	if (*flags_ptr & CLONE_VM)
		*flags_ptr |= CLONE_SIGHAND;

Looking below,

	/*
	 * If unsharing signal handlers and the task was created
	 * using CLONE_THREAD, then must unshare the thread
	 */
	if ((*flags_ptr & CLONE_SIGHAND) &&
	    (atomic_read(&current->signal->count) > 1))
		*flags_ptr |= CLONE_THREAD;

Shouldn't this be:

		*flags_ptr |= (CLONE_THREAD | CLONE_VM)

? Currently it doesn't matter, but still.


However, I stronly beleive unshare(CLONE_VM) is buggy.

sys_unshare:


		if (new_mm) {
			...
			new_mm = mm;
		}

	...

	bad_unshare_cleanup_vm:
		if (new_mm)
			mmput(new_mm);


mmput() ignores mm->core_waiters.

No?

Oleg.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH] for 2.6.16, disable unshare_vm()
  2006-03-18 17:24           ` Oleg Nesterov
@ 2006-03-18 17:41             ` Oleg Nesterov
  2006-03-18 18:10               ` Linus Torvalds
  2016-03-14 13:15             ` unshare(CLONE_VM) Re: [PATCH] unshare: Use rcu_assign_pointer when setting sighand Julian Smith
  1 sibling, 1 reply; 24+ messages in thread
From: Oleg Nesterov @ 2006-03-18 17:41 UTC (permalink / raw)
  To: Janak Desai, Andrew Morton, torvalds
  Cc: ebiederm, linux-kernel, viro, hch, mtk-manpages, ak, paulus

sys_unshare() does mmput(new_mm). This is not enough
if we have mm->core_waiters. This patch is a temporary
fix for soon to be released 2.6.16.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>

--- MM/kernel/fork.c~	2006-03-18 23:15:13.000000000 +0300
+++ MM/kernel/fork.c	2006-03-18 23:15:56.000000000 +0300
@@ -1512,9 +1512,7 @@ static int unshare_vm(unsigned long unsh
 
 	if ((unshare_flags & CLONE_VM) &&
 	    (mm && atomic_read(&mm->mm_users) > 1)) {
-		*new_mmp = dup_mm(current);
-		if (!*new_mmp)
-			return -ENOMEM;
+		return -EINVAL;
 	}
 
 	return 0;

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] for 2.6.16, disable unshare_vm()
  2006-03-18 17:41             ` [PATCH] for 2.6.16, disable unshare_vm() Oleg Nesterov
@ 2006-03-18 18:10               ` Linus Torvalds
  2006-03-18 18:29                 ` Ulrich Drepper
  2006-03-18 18:48                 ` Janak Desai
  0 siblings, 2 replies; 24+ messages in thread
From: Linus Torvalds @ 2006-03-18 18:10 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Janak Desai, Andrew Morton, ebiederm, Linux Kernel Mailing List,
	viro, hch, mtk-manpages, ak, paulus, Ulrich Drepper



On Sat, 18 Mar 2006, Oleg Nesterov wrote:
>
> sys_unshare() does mmput(new_mm). This is not enough
> if we have mm->core_waiters. This patch is a temporary
> fix for soon to be released 2.6.16.

Yes. Quick raising of hands: is there anybody out there that expects to 
use unshare(CLONE_VM) right now? One of the reasons it was integrated was 
that I thought glibc wanted it for distros. Is disabling the CLONE_VM 
unsharing going to impact that?

		Linus

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] for 2.6.16, disable unshare_vm()
  2006-03-18 18:10               ` Linus Torvalds
@ 2006-03-18 18:29                 ` Ulrich Drepper
  2006-03-18 18:48                 ` Janak Desai
  1 sibling, 0 replies; 24+ messages in thread
From: Ulrich Drepper @ 2006-03-18 18:29 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Oleg Nesterov, Janak Desai, Andrew Morton, ebiederm,
	Linux Kernel Mailing List, viro, hch, mtk-manpages, ak, paulus

[-- Attachment #1: Type: text/plain, Size: 552 bytes --]

Linus Torvalds wrote:
> Yes. Quick raising of hands: is there anybody out there that expects to 
> use unshare(CLONE_VM) right now? One of the reasons it was integrated was 
> that I thought glibc wanted it for distros. Is disabling the CLONE_VM 
> unsharing going to impact that?

I'm not planning to use unshare(CLONE_VM).  It's not needed for any
functionality planned so far.  What we (as in Red Hat) need unshare()
for now is the filesystem side.

-- 
➧ Ulrich Drepper ➧ Red Hat, Inc. ➧ 444 Castro St ➧ Mountain View, CA ❖


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 253 bytes --]

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] for 2.6.16, disable unshare_vm()
  2006-03-18 18:10               ` Linus Torvalds
  2006-03-18 18:29                 ` Ulrich Drepper
@ 2006-03-18 18:48                 ` Janak Desai
  1 sibling, 0 replies; 24+ messages in thread
From: Janak Desai @ 2006-03-18 18:48 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Oleg Nesterov, Andrew Morton, ebiederm,
	Linux Kernel Mailing List, viro, hch, mtk-manpages, ak, paulus,
	Ulrich Drepper

Linus Torvalds wrote:

>On Sat, 18 Mar 2006, Oleg Nesterov wrote:
>  
>
>>sys_unshare() does mmput(new_mm). This is not enough
>>if we have mm->core_waiters. This patch is a temporary
>>fix for soon to be released 2.6.16.
>>    
>>
>
>Yes. Quick raising of hands: is there anybody out there that expects to 
>use unshare(CLONE_VM) right now? One of the reasons it was integrated was 
>that I thought glibc wanted it for distros. Is disabling the CLONE_VM 
>unsharing going to impact that?
>
>		Linus
>
>  
>
We are using namespace and filesystem unsharing for our common criteria
certification work and would not be impacted by disabling of vm unsharing.

-Janak

^ permalink raw reply	[flat|nested] 24+ messages in thread

* unshare(CLONE_VM) Re: [PATCH] unshare: Use rcu_assign_pointer when setting sighand
  2006-03-18 17:24           ` Oleg Nesterov
  2006-03-18 17:41             ` [PATCH] for 2.6.16, disable unshare_vm() Oleg Nesterov
@ 2016-03-14 13:15             ` Julian Smith
  2016-03-14 18:35               ` Linus Torvalds
  1 sibling, 1 reply; 24+ messages in thread
From: Julian Smith @ 2016-03-14 13:15 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Janak Desai, Andrew Morton, ebiederm, Linus Torvalds, linux-kernel

On Sat, 18 Mar 2006 20:24:51 +0300
Oleg Nesterov <oleg@tv-sign.ru> wrote:

[...]

> However, I stronly beleive unshare(CLONE_VM) is buggy.
> 
> sys_unshare:
> 
> 
> 		if (new_mm) {
> 			...
> 			new_mm = mm;
> 		}
> 
> 	...
> 
> 	bad_unshare_cleanup_vm:
> 		if (new_mm)
> 			mmput(new_mm);
> 
> 
> mmput() ignores mm->core_waiters.

Apologies for re-opening a ten-year-old thread.

I'm looking into whether it would be possible to extend the unshare
syscall to support the CLONE_VM flag with multi-threaded processes,
because this would allow us at Undo to record multi-threaded user
processes much more efficiently than at present.

We currently have to serialise threads and so suffer an N-times
slowdown when recording a process with N cpu-bound threads. But if we
could get per-thread memory permissions with unshare(CLONE_VM), we'd be
able record a multi-threaded process with almost no per-thread
slowdown.

When the unshare syscall was introduced, it seems that the
mm->core_waiters issue was the only thing that prevented CLONE_VM being
supported. Is that right, or were there other problems too?

Many thanks for any information about this.

- Julian

-- 
http://undo-software.com

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: unshare(CLONE_VM) Re: [PATCH] unshare: Use rcu_assign_pointer when setting sighand
  2016-03-14 13:15             ` unshare(CLONE_VM) Re: [PATCH] unshare: Use rcu_assign_pointer when setting sighand Julian Smith
@ 2016-03-14 18:35               ` Linus Torvalds
  0 siblings, 0 replies; 24+ messages in thread
From: Linus Torvalds @ 2016-03-14 18:35 UTC (permalink / raw)
  To: Julian Smith
  Cc: Oleg Nesterov, Janak Desai, Andrew Morton, Eric W. Biederman,
	Linux Kernel Mailing List

On Mon, Mar 14, 2016 at 6:15 AM, Julian Smith <jsmith@undo-software.com> wrote:
>
> I'm looking into whether it would be possible to extend the unshare
> syscall to support the CLONE_VM flag with multi-threaded processes,
> because this would allow us at Undo to record multi-threaded user
> processes much more efficiently than at present.

At the point where you want to unsahe the VM, you should just use a
full clone() instead.

The thing is, unsharing the VM absolutely _requires_ you to also
unshare signals and some other state too (we require that thread
groups are in the same VM, for example, but also the child tid
information etc etc).

And the whole "copy VM" case is so expensive that at that point
there's no advantage to "unshare", you might as well just do a full
clone() (perhaps still sharing file descriptors and fs state).

So while I think a

    unshare(CLONE_VM | CLONE_SIGHAND | CLONE_THREAD |
CLONE_CHILD_CLEARTID | CLONE_CHILD_SETTID);

might be possible from a technical standpoint, I'm not seeing the huge
advantage to users vs just doing something like

    clone(new_vm_function, NULL, CLONE_VFORK | CLONE_FILES | CLONE_FS
| CLONE_PARENT..);
     _exit();

(fixup details to actually work - the above is meant more as a
"something remotely like this" rather than actually equivalent)

The costs of forking and exiting a thread are almost all about just
the VM copying and tear-down, so a "unshare(CLONE_VM)" is
fundamentally not a cheap operation (and at the other range of the
spectrum: an exit of a thread where there are other sharing threads is
fundamentally quite cheap, because it just ends up decrementing
counters).

So my gut feel is that no, we really don't want unshare(CLONE_VM),
because it *is* a very complicated operation and doesn't actually
perform any better than just cloning.

And the "it is a very complicated operation" really comes not from the
fact that we can't copy the VM - we have that support already, but
because CLONE_VM really does go hand-in-hand with so many special
cases. Oleg pointed out that mm->core_waiters thing last time around,
but that just ends up being a detail: the whole VM sharing just ends
up being very central to a lot of small details..

               Linus

^ permalink raw reply	[flat|nested] 24+ messages in thread

end of thread, other threads:[~2016-03-14 18:35 UTC | newest]

Thread overview: 24+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2006-03-16 16:49 [PATCH] unshare: Cleanup up the sys_unshare interface before we are committed Eric W. Biederman
2006-03-16 17:31 ` [PATCH] unshare: Use rcu_assign_pointer when setting sighand Eric W. Biederman
2006-03-17  6:48   ` Paul E. McKenney
2006-03-17 17:44   ` Oleg Nesterov
2006-03-17 20:56     ` Andrew Morton
2006-03-18 13:12       ` Oleg Nesterov
2006-03-18 15:43         ` Janak Desai
2006-03-18 17:24           ` Oleg Nesterov
2006-03-18 17:41             ` [PATCH] for 2.6.16, disable unshare_vm() Oleg Nesterov
2006-03-18 18:10               ` Linus Torvalds
2006-03-18 18:29                 ` Ulrich Drepper
2006-03-18 18:48                 ` Janak Desai
2016-03-14 13:15             ` unshare(CLONE_VM) Re: [PATCH] unshare: Use rcu_assign_pointer when setting sighand Julian Smith
2016-03-14 18:35               ` Linus Torvalds
2006-03-18 13:13       ` [PATCH] implement unshare(CLONE_SIGHAND) for single-thread case Oleg Nesterov
2006-03-18 15:10       ` [PATCH] unshare: Error if passed unsupported flags Eric W. Biederman
2006-03-18 15:33         ` Janak Desai
2006-03-18 16:29       ` [PATCH] unshare: Use rcu_assign_pointer when setting sighand Janak Desai
2006-03-16 19:40 ` [PATCH] unshare: Cleanup up the sys_unshare interface before we are committed Michael Kerrisk
2006-03-16 20:33 ` Andrew Morton
2006-03-16 20:41   ` Linus Torvalds
2006-03-16 21:58     ` Eric W. Biederman
2006-03-16 22:19       ` Andrew Morton
2006-03-16 21:36   ` Janak Desai

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).