linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] unshare: Cleanup up the sys_unshare interface before we are committed.
@ 2006-03-16 16:49 Eric W. Biederman
  2006-03-16 17:31 ` [PATCH] unshare: Use rcu_assign_pointer when setting sighand Eric W. Biederman
                   ` (2 more replies)
  0 siblings, 3 replies; 37+ messages in thread
From: Eric W. Biederman @ 2006-03-16 16:49 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Andrew Morton, Janak Desai, Al Viro,
	Christoph Hellwig, Michael Kerrisk, Andi Kleen, Paul Mackerras,
	JANAK DESAI


Since we have not crossed the magic 2.6.16 line can we please
include this patch.  My apologies for catching this so late in the
cycle.

- Error if we are passed any flags we don't expect.

  This preserves forward compatibility so programs that use new flags that
  run on old kernels will fail instead of silently doing the wrong thing.

- Use separate defines from sys_clone.

  sys_unshare can't implement half of the clone flags under any circumstances
  and those that it does implement have subtlely different semantics than
  the clone flags.  Using a different set of flags sets the
  expectation that things will be different.

  Binary compatibility with current users of the is still maintained
  as the unshare flags and the clone flags have the same values.


Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>


---

 include/linux/sched.h |   17 +++++++++++++++++
 kernel/fork.c         |   34 +++++++++++++++++++---------------
 2 files changed, 36 insertions(+), 15 deletions(-)

1b9e67b9696f1e828ba789d3f6c8247d1171f367
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 62e6314..8e46f05 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -69,6 +69,23 @@ struct exec_domain;
 #define CLONE_KERNEL	(CLONE_FS | CLONE_FILES | CLONE_SIGHAND)
 
 /*
+ * unsharing flags: (Keep in sync with the clone flags if possible)
+ */
+#define UNSHARE_VM	0x00000100	/* stop sharing the VM between processes */
+#define UNSHARE_FS	0x00000200	/* stop sharing the fs info between processes */
+#define UNSHARE_FILES	0x00000400	/* stop sharing open files between processes */
+#define UNSHARE_SIGHAND	0x00000800	/* stop sharing signal handlers and blocked signals */
+#define UNSHARE_THREAD	0x00010000	/* stop sharing a thread group */
+#define UNSHARE_NS	0x00020000	/* stop sharing the mount namespace */
+#define UNSHARE_SYSVSEM	0x00040000	/* stop sharing system V SEM_UNDO semantics */
+
+/*
+ * Helper so sys_unshare can check if it is passed valid flags.
+ */
+#define UNSHARE_VALID	(UNSHARE_VM|UNSHARE_FS|UNSHARE_FILES|UNSHARE_SIGHAND| \
+				UNSHARE_THREAD|UNSHARE_NS|UNSHARE_SYSVSEM)
+
+/*
  * These are the constant used to fake the fixed-point load-average
  * counting. Some notes:
  *  - 11 bit fractions expand to 22 bits by the multiplies: this gives
diff --git a/kernel/fork.c b/kernel/fork.c
index ccdfbb1..d2706e9 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1382,28 +1382,28 @@ static inline void check_unshare_flags(u
 	 * If unsharing a thread from a thread group, must also
 	 * unshare vm.
 	 */
-	if (*flags_ptr & CLONE_THREAD)
-		*flags_ptr |= CLONE_VM;
+	if (*flags_ptr & UNSHARE_THREAD)
+		*flags_ptr |= UNSHARE_VM;
 
 	/*
 	 * If unsharing vm, must also unshare signal handlers.
 	 */
-	if (*flags_ptr & CLONE_VM)
-		*flags_ptr |= CLONE_SIGHAND;
+	if (*flags_ptr & UNSHARE_VM)
+		*flags_ptr |= UNSHARE_SIGHAND;
 
 	/*
 	 * If unsharing signal handlers and the task was created
 	 * using CLONE_THREAD, then must unshare the thread
 	 */
-	if ((*flags_ptr & CLONE_SIGHAND) &&
+	if ((*flags_ptr & UNSHARE_SIGHAND) &&
 	    (atomic_read(&current->signal->count) > 1))
-		*flags_ptr |= CLONE_THREAD;
+		*flags_ptr |= UNSHARE_THREAD;
 
 	/*
 	 * If unsharing namespace, must also unshare filesystem information.
 	 */
-	if (*flags_ptr & CLONE_NEWNS)
-		*flags_ptr |= CLONE_FS;
+	if (*flags_ptr & UNSHARE_NS)
+		*flags_ptr |= UNSHARE_FS;
 }
 
 /*
@@ -1411,7 +1411,7 @@ static inline void check_unshare_flags(u
  */
 static int unshare_thread(unsigned long unshare_flags)
 {
-	if (unshare_flags & CLONE_THREAD)
+	if (unshare_flags & UNSHARE_THREAD)
 		return -EINVAL;
 
 	return 0;
@@ -1424,7 +1424,7 @@ static int unshare_fs(unsigned long unsh
 {
 	struct fs_struct *fs = current->fs;
 
-	if ((unshare_flags & CLONE_FS) &&
+	if ((unshare_flags & UNSHARE_FS) &&
 	    (fs && atomic_read(&fs->count) > 1)) {
 		*new_fsp = __copy_fs_struct(current->fs);
 		if (!*new_fsp)
@@ -1441,7 +1441,7 @@ static int unshare_namespace(unsigned lo
 {
 	struct namespace *ns = current->namespace;
 
-	if ((unshare_flags & CLONE_NEWNS) &&
+	if ((unshare_flags & UNSHARE_NS) &&
 	    (ns && atomic_read(&ns->count) > 1)) {
 		if (!capable(CAP_SYS_ADMIN))
 			return -EPERM;
@@ -1462,7 +1462,7 @@ static int unshare_sighand(unsigned long
 {
 	struct sighand_struct *sigh = current->sighand;
 
-	if ((unshare_flags & CLONE_SIGHAND) &&
+	if ((unshare_flags & UNSHARE_SIGHAND) &&
 	    (sigh && atomic_read(&sigh->count) > 1))
 		return -EINVAL;
 	else
@@ -1476,7 +1476,7 @@ static int unshare_vm(unsigned long unsh
 {
 	struct mm_struct *mm = current->mm;
 
-	if ((unshare_flags & CLONE_VM) &&
+	if ((unshare_flags & UNSHARE_VM) &&
 	    (mm && atomic_read(&mm->mm_users) > 1)) {
 		*new_mmp = dup_mm(current);
 		if (!*new_mmp)
@@ -1494,7 +1494,7 @@ static int unshare_fd(unsigned long unsh
 	struct files_struct *fd = current->files;
 	int error = 0;
 
-	if ((unshare_flags & CLONE_FILES) &&
+	if ((unshare_flags & UNSHARE_FILES) &&
 	    (fd && atomic_read(&fd->count) > 1)) {
 		*new_fdp = dup_fd(fd, &error);
 		if (!*new_fdp)
@@ -1510,7 +1510,7 @@ static int unshare_fd(unsigned long unsh
  */
 static int unshare_semundo(unsigned long unshare_flags, struct sem_undo_list **new_ulistp)
 {
-	if (unshare_flags & CLONE_SYSVSEM)
+	if (unshare_flags & UNSHARE_SYSVSEM)
 		return -EINVAL;
 
 	return 0;
@@ -1534,6 +1534,10 @@ asmlinkage long sys_unshare(unsigned lon
 	struct files_struct *fd, *new_fd = NULL;
 	struct sem_undo_list *new_ulist = NULL;
 
+	/* Only accept valid unshare flags */
+	if (unshare_flags & ~UNSHARE_VALID)
+		return -EINVAL;
+
 	check_unshare_flags(&unshare_flags);
 
 	if ((err = unshare_thread(unshare_flags)))
-- 
1.2.4.g2d33


^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [PATCH] unshare: Use rcu_assign_pointer when setting sighand
  2006-03-16 16:49 [PATCH] unshare: Cleanup up the sys_unshare interface before we are committed Eric W. Biederman
@ 2006-03-16 17:31 ` Eric W. Biederman
  2006-03-17  6:48   ` Paul E. McKenney
  2006-03-17 17:44   ` Oleg Nesterov
  2006-03-16 19:40 ` [PATCH] unshare: Cleanup up the sys_unshare interface before we are committed Michael Kerrisk
  2006-03-16 20:33 ` Andrew Morton
  2 siblings, 2 replies; 37+ messages in thread
From: Eric W. Biederman @ 2006-03-16 17:31 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Andrew Morton, Janak Desai, Al Viro,
	Christoph Hellwig, Michael Kerrisk, Andi Kleen, Paul Mackerras,
	Oleg Nesterov


The sighand pointer only needs the rcu_read_lock on the
read side.  So only depending on task_lock protection
when setting this pointer is not enough.  We also need
a memory barrier to ensure the initialization is seen first.

Use rcu_assign_pointer as it does this for us, and clearly
documents that we are setting an rcu readable pointer.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>


---

 kernel/fork.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

f0cdb649b7140927777f4355631648b396ee235b
diff --git a/kernel/fork.c b/kernel/fork.c
index d2706e9..2f24553 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1573,7 +1573,7 @@ asmlinkage long sys_unshare(unsigned lon
 
 		if (new_sigh) {
 			sigh = current->sighand;
-			current->sighand = new_sigh;
+			rcu_assign_pointer(current->sighand, new_sigh);
 			new_sigh = sigh;
 		}
 
-- 
1.2.4.g2d33


^ permalink raw reply related	[flat|nested] 37+ messages in thread

* Re: [PATCH] unshare: Cleanup up the sys_unshare interface before we are committed.
  2006-03-16 16:49 [PATCH] unshare: Cleanup up the sys_unshare interface before we are committed Eric W. Biederman
  2006-03-16 17:31 ` [PATCH] unshare: Use rcu_assign_pointer when setting sighand Eric W. Biederman
@ 2006-03-16 19:40 ` Michael Kerrisk
  2006-03-16 20:33 ` Andrew Morton
  2 siblings, 0 replies; 37+ messages in thread
From: Michael Kerrisk @ 2006-03-16 19:40 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: torvalds, linux-kernel, akpm, janak, viro, hch, ak, paulus, janak

Eric,

> Since we have not crossed the magic 2.6.16 line can we please
> include this patch.  My apologies for catching this so late in the
> cycle.
> 
> - Error if we are passed any flags we don't expect.
> 
>   This preserves forward compatibility so programs that use new flags 
>   that
>   run on old kernels will fail instead of silently doing the wrong thing.
> 
> - Use separate defines from sys_clone.
> 
>   sys_unshare can't implement half of the clone flags under any
> circumstances
>   and those that it does implement have subtlely different semantics than
>   the clone flags.  Using a different set of flags sets the
>   expectation that things will be different.
> 
>   Binary compatibility with current users of the is still maintained
>   as the unshare flags and the clone flags have the same values.

Thanks for this.  I had begun to think I must simply be dense
for thinking there was a problem here...

I will update my draft manual page accordingly.

Cheers,

Michael

-- 
Michael Kerrisk
maintainer of Linux man pages Sections 2, 3, 4, 5, and 7 

Want to help with man page maintenance?  
Grab the latest tarball at
ftp://ftp.win.tue.nl/pub/linux-local/manpages/, 
read the HOWTOHELP file and grep the source 
files for 'FIXME'.

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] unshare: Cleanup up the sys_unshare interface before we are committed.
  2006-03-16 16:49 [PATCH] unshare: Cleanup up the sys_unshare interface before we are committed Eric W. Biederman
  2006-03-16 17:31 ` [PATCH] unshare: Use rcu_assign_pointer when setting sighand Eric W. Biederman
  2006-03-16 19:40 ` [PATCH] unshare: Cleanup up the sys_unshare interface before we are committed Michael Kerrisk
@ 2006-03-16 20:33 ` Andrew Morton
  2006-03-16 20:41   ` Linus Torvalds
  2006-03-16 21:36   ` Janak Desai
  2 siblings, 2 replies; 37+ messages in thread
From: Andrew Morton @ 2006-03-16 20:33 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: torvalds, linux-kernel, janak, viro, hch, mtk-manpages, ak, paulus

ebiederm@xmission.com (Eric W. Biederman) wrote:
>
> Since we have not crossed the magic 2.6.16 line can we please
>  include this patch.  My apologies for catching this so late in the
>  cycle.
> 
>  - Error if we are passed any flags we don't expect.
> 
>    This preserves forward compatibility so programs that use new flags that
>    run on old kernels will fail instead of silently doing the wrong thing.

Makes sense.

>  - Use separate defines from sys_clone.
> 
>    sys_unshare can't implement half of the clone flags under any circumstances
>    and those that it does implement have subtlely different semantics than
>    the clone flags.  Using a different set of flags sets the
>    expectation that things will be different.

iirc there was some discussion about this and it was explicitly decided to
keep the CLONE flags.

Maybe Janak or Linus can comment?

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] unshare: Cleanup up the sys_unshare interface before we are committed.
  2006-03-16 20:33 ` Andrew Morton
@ 2006-03-16 20:41   ` Linus Torvalds
  2006-03-16 21:58     ` Eric W. Biederman
  2006-03-16 21:36   ` Janak Desai
  1 sibling, 1 reply; 37+ messages in thread
From: Linus Torvalds @ 2006-03-16 20:41 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Eric W. Biederman, linux-kernel, janak, viro, hch, mtk-manpages,
	ak, paulus



On Thu, 16 Mar 2006, Andrew Morton wrote:
> 
> iirc there was some discussion about this and it was explicitly decided to
> keep the CLONE flags.
> 
> Maybe Janak or Linus can comment?

My personal opinion is that having a different set of flags is more 
confusing and likely to result in problems later than having the same 
ones. Regardless, I'm not touching this for 2.6.16 any more, 

		Linus

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] unshare: Cleanup up the sys_unshare interface before we are committed.
  2006-03-16 20:33 ` Andrew Morton
  2006-03-16 20:41   ` Linus Torvalds
@ 2006-03-16 21:36   ` Janak Desai
  1 sibling, 0 replies; 37+ messages in thread
From: Janak Desai @ 2006-03-16 21:36 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Eric W. Biederman, torvalds, linux-kernel, viro, hch,
	mtk-manpages, ak, paulus

Andrew Morton wrote:

>ebiederm@xmission.com (Eric W. Biederman) wrote:
>  
>
>>Since we have not crossed the magic 2.6.16 line can we please
>> include this patch.  My apologies for catching this so late in the
>> cycle.
>>
>> - Error if we are passed any flags we don't expect.
>>
>>   This preserves forward compatibility so programs that use new flags that
>>   run on old kernels will fail instead of silently doing the wrong thing.
>>    
>>
>
>Makes sense.
>
>  
>
>> - Use separate defines from sys_clone.
>>
>>   sys_unshare can't implement half of the clone flags under any circumstances
>>   and those that it does implement have subtlely different semantics than
>>   the clone flags.  Using a different set of flags sets the
>>   expectation that things will be different.
>>    
>>
>
>iirc there was some discussion about this and it was explicitly decided to
>keep the CLONE flags.
>
>Maybe Janak or Linus can comment?
>
>  
>
In the two prior discussions on this, the disagreement was on how much 
confusion
(if any) the use of CLONE_* flags would generate. I personally did not 
think that
it was confusing enough to add new flags, with the same values as CLONE_*
flags, in the kernel. Linus's last email (3/1/06) on the subject seemed 
to lean in that
direction as well. That's why I didn't take any action on it.

-Janak


^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] unshare: Cleanup up the sys_unshare interface before we are committed.
  2006-03-16 20:41   ` Linus Torvalds
@ 2006-03-16 21:58     ` Eric W. Biederman
  2006-03-16 22:19       ` Andrew Morton
  0 siblings, 1 reply; 37+ messages in thread
From: Eric W. Biederman @ 2006-03-16 21:58 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Andrew Morton, linux-kernel, janak, viro, hch, mtk-manpages, ak, paulus

Linus Torvalds <torvalds@osdl.org> writes:

> On Thu, 16 Mar 2006, Andrew Morton wrote:
>> 
>> iirc there was some discussion about this and it was explicitly decided to
>> keep the CLONE flags.
>> 
>> Maybe Janak or Linus can comment?
>
> My personal opinion is that having a different set of flags is more 
> confusing and likely to result in problems later than having the same 
> ones. Regardless, I'm not touching this for 2.6.16 any more, 

I am actually a lot more concerned with the fact that we don't test
for invalid bits.  So we have an ABI that will change in the future,
and that doesn't allow us to have a program that runs on old and new
kernels.

I guess I can resend some version of my patch after 2.6.16 is out and
break the ABI for the undefined bits then.  Correct programs shouldn't
care.  But it sure would be nice if they could care.

Eric

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] unshare: Cleanup up the sys_unshare interface before we are committed.
  2006-03-16 21:58     ` Eric W. Biederman
@ 2006-03-16 22:19       ` Andrew Morton
  0 siblings, 0 replies; 37+ messages in thread
From: Andrew Morton @ 2006-03-16 22:19 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: torvalds, linux-kernel, janak, viro, hch, mtk-manpages, ak, paulus

ebiederm@xmission.com (Eric W. Biederman) wrote:
>
> Linus Torvalds <torvalds@osdl.org> writes:
> 
> > On Thu, 16 Mar 2006, Andrew Morton wrote:
> >> 
> >> iirc there was some discussion about this and it was explicitly decided to
> >> keep the CLONE flags.
> >> 
> >> Maybe Janak or Linus can comment?
> >
> > My personal opinion is that having a different set of flags is more 
> > confusing and likely to result in problems later than having the same 
> > ones. Regardless, I'm not touching this for 2.6.16 any more, 
> 
> I am actually a lot more concerned with the fact that we don't test
> for invalid bits.  So we have an ABI that will change in the future,
> and that doesn't allow us to have a program that runs on old and new
> kernels.

The risk of breaking things is small - it would require someone to write a
sys_unshare-using app which a) they care about and b) has a particular bug
in it.  But yes, we should check.

> I guess I can resend some version of my patch after 2.6.16 is out and
> break the ABI for the undefined bits then.  Correct programs shouldn't
> care.  But it sure would be nice if they could care.
> 

Your single patch did two different things - there's a lesson here ;)

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] unshare: Use rcu_assign_pointer when setting sighand
  2006-03-16 17:31 ` [PATCH] unshare: Use rcu_assign_pointer when setting sighand Eric W. Biederman
@ 2006-03-17  6:48   ` Paul E. McKenney
  2006-03-17 17:44   ` Oleg Nesterov
  1 sibling, 0 replies; 37+ messages in thread
From: Paul E. McKenney @ 2006-03-17  6:48 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Linus Torvalds, linux-kernel, Andrew Morton, Janak Desai,
	Al Viro, Christoph Hellwig, Michael Kerrisk, Andi Kleen,
	Paul Mackerras, Oleg Nesterov

On Thu, Mar 16, 2006 at 10:31:38AM -0700, Eric W. Biederman wrote:
> 
> The sighand pointer only needs the rcu_read_lock on the
> read side.  So only depending on task_lock protection
> when setting this pointer is not enough.  We also need
> a memory barrier to ensure the initialization is seen first.
> 
> Use rcu_assign_pointer as it does this for us, and clearly
> documents that we are setting an rcu readable pointer.

Good catch!

Acked-by: Paul E. McKenney <paulmck@us.ibm.com>
> Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
> 
> 
> ---
> 
>  kernel/fork.c |    2 +-
>  1 files changed, 1 insertions(+), 1 deletions(-)
> 
> f0cdb649b7140927777f4355631648b396ee235b
> diff --git a/kernel/fork.c b/kernel/fork.c
> index d2706e9..2f24553 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -1573,7 +1573,7 @@ asmlinkage long sys_unshare(unsigned lon
>  
>  		if (new_sigh) {
>  			sigh = current->sighand;
> -			current->sighand = new_sigh;
> +			rcu_assign_pointer(current->sighand, new_sigh);
>  			new_sigh = sigh;
>  		}
>  
> -- 
> 1.2.4.g2d33
> 
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
> 

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] unshare: Use rcu_assign_pointer when setting sighand
  2006-03-16 17:31 ` [PATCH] unshare: Use rcu_assign_pointer when setting sighand Eric W. Biederman
  2006-03-17  6:48   ` Paul E. McKenney
@ 2006-03-17 17:44   ` Oleg Nesterov
  2006-03-17 20:56     ` Andrew Morton
  1 sibling, 1 reply; 37+ messages in thread
From: Oleg Nesterov @ 2006-03-17 17:44 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Linus Torvalds, linux-kernel, Andrew Morton, Janak Desai,
	Al Viro, Christoph Hellwig, Michael Kerrisk, Andi Kleen,
	Paul Mackerras

"Eric W. Biederman" wrote:
> 
> @@ -1573,7 +1573,7 @@ asmlinkage long sys_unshare(unsigned lon
> 
>                 if (new_sigh) {
>                         sigh = current->sighand;
> -                       current->sighand = new_sigh;
> +                       rcu_assign_pointer(current->sighand, new_sigh);
>                         new_sigh = sigh;
>                 }

Isn't it better to just replace this code with
'BUG_ON(new_sigh != NULL)' ?

It is never executed, but totally broken, afaics.
task_lock() has nothing to do with ->sighand changing.

Oleg.

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] unshare: Use rcu_assign_pointer when setting sighand
  2006-03-17 17:44   ` Oleg Nesterov
@ 2006-03-17 20:56     ` Andrew Morton
  2006-03-18 13:12       ` Oleg Nesterov
                         ` (3 more replies)
  0 siblings, 4 replies; 37+ messages in thread
From: Andrew Morton @ 2006-03-17 20:56 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: ebiederm, torvalds, linux-kernel, janak, viro, hch, mtk-manpages,
	ak, paulus

Oleg Nesterov <oleg@tv-sign.ru> wrote:
>
> "Eric W. Biederman" wrote:
> > 
> > @@ -1573,7 +1573,7 @@ asmlinkage long sys_unshare(unsigned lon
> > 
> >                 if (new_sigh) {
> >                         sigh = current->sighand;
> > -                       current->sighand = new_sigh;
> > +                       rcu_assign_pointer(current->sighand, new_sigh);
> >                         new_sigh = sigh;
> >                 }
> 
> Isn't it better to just replace this code with
> 'BUG_ON(new_sigh != NULL)' ?
> 
> It is never executed, but totally broken, afaics.
> task_lock() has nothing to do with ->sighand changing.
> 

/*
 * Unsharing of sighand for tasks created with CLONE_SIGHAND is not
 * supported yet
 */
static int unshare_sighand(unsigned long unshare_flags, struct sighand_struct **new_sighp)

It's all just a place-holder at present.

If we don't plan on ever supporting unshare(CLONE_SIGHAND) we should take
that code out and make it return EINVAL.  Right now.

And because we don't presently support CLONE_SIGHAND we should return
EINVAL if it's set.  Right now.

And we should change sys_unshare() to reject not-understood flags.  Right
now.

If we don't do these things we'll silently break 2.6.16-back-compatibility
of applications which are coded for future kernels.


^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] unshare: Use rcu_assign_pointer when setting sighand
  2006-03-17 20:56     ` Andrew Morton
@ 2006-03-18 13:12       ` Oleg Nesterov
  2006-03-18 15:43         ` Janak Desai
  2006-03-18 13:13       ` [PATCH] implement unshare(CLONE_SIGHAND) for single-thread case Oleg Nesterov
                         ` (2 subsequent siblings)
  3 siblings, 1 reply; 37+ messages in thread
From: Oleg Nesterov @ 2006-03-18 13:12 UTC (permalink / raw)
  To: Andrew Morton
  Cc: ebiederm, torvalds, linux-kernel, janak, viro, hch, mtk-manpages,
	ak, paulus

Andrew Morton wrote:
> 
> Oleg Nesterov <oleg@tv-sign.ru> wrote:
> >
> > Isn't it better to just replace this code with
> > 'BUG_ON(new_sigh != NULL)' ?
> >
> > It is never executed, but totally broken, afaics.
> > task_lock() has nothing to do with ->sighand changing.
> >
> 
> /*
>  * Unsharing of sighand for tasks created with CLONE_SIGHAND is not
>  * supported yet
>  */
> static int unshare_sighand(unsigned long unshare_flags, struct sighand_struct **new_sighp)
> 
> It's all just a place-holder at present.
> 
> If we don't plan on ever supporting unshare(CLONE_SIGHAND) we should take
> that code out and make it return EINVAL.  Right now.
> 
> And because we don't presently support CLONE_SIGHAND we should return
> EINVAL if it's set.  Right now.
> 
> And we should change sys_unshare() to reject not-understood flags.  Right
> now.
> 
> If we don't do these things we'll silently break 2.6.16-back-compatibility
> of applications which are coded for future kernels.

unshare_sighand() is ok, it never populates *new_sighp, it just returns
errror code: 0 when ->sighand is not shared, EINVAL otherwise.

I argued about 'if (new_sigh)' code in sys_unshare() because it lies about
locking rules.

Btw, copy_process() forbids CLONE_SIGHAND without CLONE_VM (is there a
good reason for that?), but one can do unshare(CLONE_VM). This is odd.

Oleg.

^ permalink raw reply	[flat|nested] 37+ messages in thread

* [PATCH] implement unshare(CLONE_SIGHAND) for single-thread case
  2006-03-17 20:56     ` Andrew Morton
  2006-03-18 13:12       ` Oleg Nesterov
@ 2006-03-18 13:13       ` Oleg Nesterov
  2006-03-18 15:10       ` [PATCH] unshare: Error if passed unsupported flags Eric W. Biederman
  2006-03-18 16:29       ` [PATCH] unshare: Use rcu_assign_pointer when setting sighand Janak Desai
  3 siblings, 0 replies; 37+ messages in thread
From: Oleg Nesterov @ 2006-03-18 13:13 UTC (permalink / raw)
  To: Andrew Morton
  Cc: ebiederm, torvalds, linux-kernel, janak, viro, hch, mtk-manpages,
	ak, paulus

on top of (already merged)
	unshare-use-rcu_assign_pointer-when-setting-sighand.patch
depends on
	convert-sighand_cache-to-use-slab_destroy_by_rcu.patch

Implement unshare(CLONE_SIGHAND) for single-thread case.

I think it is not possible to do it for multithread case without
complication of copy_process().

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>

--- MM/kernel/fork.c~	2006-03-18 16:23:23.000000000 +0300
+++ MM/kernel/fork.c	2006-03-18 17:00:39.000000000 +0300
@@ -1483,18 +1483,24 @@ static int unshare_namespace(unsigned lo
 }
 
 /*
- * Unsharing of sighand for tasks created with CLONE_SIGHAND is not
- * supported yet
+ * Unsharing of sighand is only supported for single-thread applications
  */
 static int unshare_sighand(unsigned long unshare_flags, struct sighand_struct **new_sighp)
 {
-	struct sighand_struct *sigh = current->sighand;
-
 	if ((unshare_flags & CLONE_SIGHAND) &&
-	    (sigh && atomic_read(&sigh->count) > 1))
-		return -EINVAL;
-	else
-		return 0;
+			atomic_read(&current->sighand->count) > 1) {
+
+		if (!thread_group_empty(current))
+			return -EINVAL;
+
+		*new_sighp = kmem_cache_alloc(sighand_cachep, GFP_KERNEL);
+		if (unlikely(*new_sighp == NULL))
+			return -ENOMEM;
+
+		atomic_set(&(*new_sighp)->count, 1);
+	}
+
+	return 0;
 }
 
 /*
@@ -1579,7 +1585,18 @@ asmlinkage long sys_unshare(unsigned lon
 	if ((err = unshare_semundo(unshare_flags, &new_ulist)))
 		goto bad_unshare_cleanup_fd;
 
-	if (new_fs || new_ns || new_sigh || new_mm || new_fd || new_ulist) {
+	if (new_sigh) {
+		sigh = current->sighand;
+		write_lock_irq(&tasklist_lock);
+		spin_lock(&sigh->siglock);
+		memcpy(new_sigh->action, sigh->action, sizeof(sigh->action));
+		rcu_assign_pointer(current->sighand, new_sigh);
+		spin_unlock(&sigh->siglock);
+		write_unlock_irq(&tasklist_lock);
+		new_sigh = sigh;
+	}
+
+	if (new_fs || new_ns || new_mm || new_fd || new_ulist) {
 
 		task_lock(current);
 
@@ -1595,12 +1612,6 @@ asmlinkage long sys_unshare(unsigned lon
 			new_ns = ns;
 		}
 
-		if (new_sigh) {
-			sigh = current->sighand;
-			rcu_assign_pointer(current->sighand, new_sigh);
-			new_sigh = sigh;
-		}
-
 		if (new_mm) {
 			mm = current->mm;
 			active_mm = current->active_mm;

^ permalink raw reply	[flat|nested] 37+ messages in thread

* [PATCH] unshare: Error if passed unsupported flags
  2006-03-17 20:56     ` Andrew Morton
  2006-03-18 13:12       ` Oleg Nesterov
  2006-03-18 13:13       ` [PATCH] implement unshare(CLONE_SIGHAND) for single-thread case Oleg Nesterov
@ 2006-03-18 15:10       ` Eric W. Biederman
  2006-03-18 15:33         ` Janak Desai
  2006-03-18 16:29       ` [PATCH] unshare: Use rcu_assign_pointer when setting sighand Janak Desai
  3 siblings, 1 reply; 37+ messages in thread
From: Eric W. Biederman @ 2006-03-18 15:10 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Oleg Nesterov, torvalds, linux-kernel, janak, viro, hch,
	mtk-manpages, ak, paulus


This patch does a bare bones trivial patch to ensure we always
get -EINVAL on the unsupported cases for sys_unshare.  If this
goes in before 2.6.16 it allows us to forward compatible with
future applications using sys_unshare.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>


---

 kernel/fork.c |    6 ++++++
 1 files changed, 6 insertions(+), 0 deletions(-)

46868b4b6ebeb9042dded68a6f6301ffe06820c9
diff --git a/kernel/fork.c b/kernel/fork.c
index 46060cb..411b10d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1535,6 +1535,12 @@ asmlinkage long sys_unshare(unsigned lon
 	struct sem_undo_list *new_ulist = NULL;
 
 	check_unshare_flags(&unshare_flags);
+       
+	/* Return -EINVAL for all unsupported flags */
+	err = -EINVAL;
+	if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
+				CLONE_VM|CLONE_FILES|CLONE_SYSVSEM))
+		goto bad_unshare_out;
 
 	if ((err = unshare_thread(unshare_flags)))
 		goto bad_unshare_out;
-- 
1.2.4.g2d33


^ permalink raw reply related	[flat|nested] 37+ messages in thread

* Re: [PATCH] unshare: Error if passed unsupported flags
  2006-03-18 15:10       ` [PATCH] unshare: Error if passed unsupported flags Eric W. Biederman
@ 2006-03-18 15:33         ` Janak Desai
  0 siblings, 0 replies; 37+ messages in thread
From: Janak Desai @ 2006-03-18 15:33 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Andrew Morton, Oleg Nesterov, torvalds, linux-kernel, viro, hch,
	mtk-manpages, ak, paulus


Thanks Eric. I had just started to do this based on Andrew's request.

-Janak

Eric W. Biederman wrote:

>This patch does a bare bones trivial patch to ensure we always
>get -EINVAL on the unsupported cases for sys_unshare.  If this
>goes in before 2.6.16 it allows us to forward compatible with
>future applications using sys_unshare.
>
>Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
>
>
>---
>
> kernel/fork.c |    6 ++++++
> 1 files changed, 6 insertions(+), 0 deletions(-)
>
>46868b4b6ebeb9042dded68a6f6301ffe06820c9
>diff --git a/kernel/fork.c b/kernel/fork.c
>index 46060cb..411b10d 100644
>--- a/kernel/fork.c
>+++ b/kernel/fork.c
>@@ -1535,6 +1535,12 @@ asmlinkage long sys_unshare(unsigned lon
> 	struct sem_undo_list *new_ulist = NULL;
> 
> 	check_unshare_flags(&unshare_flags);
>+       
>+	/* Return -EINVAL for all unsupported flags */
>+	err = -EINVAL;
>+	if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
>+				CLONE_VM|CLONE_FILES|CLONE_SYSVSEM))
>+		goto bad_unshare_out;
> 
> 	if ((err = unshare_thread(unshare_flags)))
> 		goto bad_unshare_out;
>  
>


^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] unshare: Use rcu_assign_pointer when setting sighand
  2006-03-18 13:12       ` Oleg Nesterov
@ 2006-03-18 15:43         ` Janak Desai
  2006-03-18 17:24           ` Oleg Nesterov
  0 siblings, 1 reply; 37+ messages in thread
From: Janak Desai @ 2006-03-18 15:43 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Andrew Morton, ebiederm, torvalds, linux-kernel, viro, hch,
	mtk-manpages, ak, paulus

Oleg Nesterov wrote:

>Andrew Morton wrote:
>  
>
>>Oleg Nesterov <oleg@tv-sign.ru> wrote:
>>    
>>
>>>Isn't it better to just replace this code with
>>>'BUG_ON(new_sigh != NULL)' ?
>>>
>>>It is never executed, but totally broken, afaics.
>>>task_lock() has nothing to do with ->sighand changing.
>>>
>>>      
>>>
>>/*
>> * Unsharing of sighand for tasks created with CLONE_SIGHAND is not
>> * supported yet
>> */
>>static int unshare_sighand(unsigned long unshare_flags, struct sighand_struct **new_sighp)
>>
>>It's all just a place-holder at present.
>>
>>If we don't plan on ever supporting unshare(CLONE_SIGHAND) we should take
>>that code out and make it return EINVAL.  Right now.
>>
>>And because we don't presently support CLONE_SIGHAND we should return
>>EINVAL if it's set.  Right now.
>>
>>And we should change sys_unshare() to reject not-understood flags.  Right
>>now.
>>
>>If we don't do these things we'll silently break 2.6.16-back-compatibility
>>of applications which are coded for future kernels.
>>    
>>
>
>unshare_sighand() is ok, it never populates *new_sighp, it just returns
>errror code: 0 when ->sighand is not shared, EINVAL otherwise.
>
>I argued about 'if (new_sigh)' code in sys_unshare() because it lies about
>locking rules.
>
>Btw, copy_process() forbids CLONE_SIGHAND without CLONE_VM (is there a
>good reason for that?), but one can do unshare(CLONE_VM). This is odd.
>  
>
Yes, copy_process forbids cloning of signal handlers without cloning of vm.
However, it does allow cloning of vm without cloning of signal handlers. For
those processes, that are sharing vm but not signal handlers, unsharing 
of vm
is allowed.

>Oleg.
>-
>To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
>the body of a message to majordomo@vger.kernel.org
>More majordomo info at  http://vger.kernel.org/majordomo-info.html
>Please read the FAQ at  http://www.tux.org/lkml/
>
>  
>


^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] unshare: Use rcu_assign_pointer when setting sighand
  2006-03-17 20:56     ` Andrew Morton
                         ` (2 preceding siblings ...)
  2006-03-18 15:10       ` [PATCH] unshare: Error if passed unsupported flags Eric W. Biederman
@ 2006-03-18 16:29       ` Janak Desai
  3 siblings, 0 replies; 37+ messages in thread
From: Janak Desai @ 2006-03-18 16:29 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Oleg Nesterov, ebiederm, torvalds, linux-kernel, viro, hch,
	mtk-manpages, ak, paulus

Andrew Morton wrote:

>Oleg Nesterov <oleg@tv-sign.ru> wrote:
>  
>
>>"Eric W. Biederman" wrote:
>>    
>>
>>>@@ -1573,7 +1573,7 @@ asmlinkage long sys_unshare(unsigned lon
>>>
>>>                if (new_sigh) {
>>>                        sigh = current->sighand;
>>>-                       current->sighand = new_sigh;
>>>+                       rcu_assign_pointer(current->sighand, new_sigh);
>>>                        new_sigh = sigh;
>>>                }
>>>      
>>>
>>Isn't it better to just replace this code with
>>'BUG_ON(new_sigh != NULL)' ?
>>
>>It is never executed, but totally broken, afaics.
>>task_lock() has nothing to do with ->sighand changing.
>>
>>    
>>
>
>/*
> * Unsharing of sighand for tasks created with CLONE_SIGHAND is not
> * supported yet
> */
>static int unshare_sighand(unsigned long unshare_flags, struct sighand_struct **new_sighp)
>
>It's all just a place-holder at present.
>
>If we don't plan on ever supporting unshare(CLONE_SIGHAND) we should take
>that code out and make it return EINVAL.  Right now.
>
>And because we don't presently support CLONE_SIGHAND we should return
>EINVAL if it's set.  Right now.
>  
>
unshare does return EINVAL if signal handler unsharing is attempted by a
process that is currently sharing its signal handler with another 
process. However,
because of the interaction between signal handlers and vm, CLONE_SIGHAND
is set anytime vm unsharing is attempted. That is, if you are trying to 
unshare
vm, you must unshare signal handlers. But if they are not being shared 
in the
first place, there is no need to prevent unsharing of vm.

Therefore, unshare returns EINVAL if unsharing of signal handlers and/or vm
is attempted by a process that is sharing its signal handler.

>And we should change sys_unshare() to reject not-understood flags.  Right
>now.
>  
>
Eric just posted a patch to do this.

>If we don't do these things we'll silently break 2.6.16-back-compatibility
>of applications which are coded for future kernels.
>
>-
>To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
>the body of a message to majordomo@vger.kernel.org
>More majordomo info at  http://vger.kernel.org/majordomo-info.html
>Please read the FAQ at  http://www.tux.org/lkml/
>
>  
>


^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] unshare: Use rcu_assign_pointer when setting sighand
  2006-03-18 15:43         ` Janak Desai
@ 2006-03-18 17:24           ` Oleg Nesterov
  2006-03-18 17:41             ` [PATCH] for 2.6.16, disable unshare_vm() Oleg Nesterov
  2016-03-14 13:15             ` unshare(CLONE_VM) Re: [PATCH] unshare: Use rcu_assign_pointer when setting sighand Julian Smith
  0 siblings, 2 replies; 37+ messages in thread
From: Oleg Nesterov @ 2006-03-18 17:24 UTC (permalink / raw)
  To: Janak Desai
  Cc: Andrew Morton, ebiederm, torvalds, linux-kernel, viro, hch,
	mtk-manpages, ak, paulus

Janak Desai wrote:
> 
> Oleg Nesterov wrote:
> >
> >Btw, copy_process() forbids CLONE_SIGHAND without CLONE_VM (is there a
> >good reason for that?), but one can do unshare(CLONE_VM). This is odd.
> >
> >
> Yes, copy_process forbids cloning of signal handlers without cloning of vm.
> However, it does allow cloning of vm without cloning of signal handlers. For
> those processes, that are sharing vm but not signal handlers, unsharing
> of vm
> is allowed.

Yes, I was wrong, I missed check_unshare_flags(),

	/*
	 * If unsharing vm, must also unshare signal handlers.
	 */
	if (*flags_ptr & CLONE_VM)
		*flags_ptr |= CLONE_SIGHAND;

Looking below,

	/*
	 * If unsharing signal handlers and the task was created
	 * using CLONE_THREAD, then must unshare the thread
	 */
	if ((*flags_ptr & CLONE_SIGHAND) &&
	    (atomic_read(&current->signal->count) > 1))
		*flags_ptr |= CLONE_THREAD;

Shouldn't this be:

		*flags_ptr |= (CLONE_THREAD | CLONE_VM)

? Currently it doesn't matter, but still.


However, I stronly beleive unshare(CLONE_VM) is buggy.

sys_unshare:


		if (new_mm) {
			...
			new_mm = mm;
		}

	...

	bad_unshare_cleanup_vm:
		if (new_mm)
			mmput(new_mm);


mmput() ignores mm->core_waiters.

No?

Oleg.

^ permalink raw reply	[flat|nested] 37+ messages in thread

* [PATCH] for 2.6.16, disable unshare_vm()
  2006-03-18 17:24           ` Oleg Nesterov
@ 2006-03-18 17:41             ` Oleg Nesterov
  2006-03-18 18:10               ` Linus Torvalds
  2016-03-14 13:15             ` unshare(CLONE_VM) Re: [PATCH] unshare: Use rcu_assign_pointer when setting sighand Julian Smith
  1 sibling, 1 reply; 37+ messages in thread
From: Oleg Nesterov @ 2006-03-18 17:41 UTC (permalink / raw)
  To: Janak Desai, Andrew Morton, torvalds
  Cc: ebiederm, linux-kernel, viro, hch, mtk-manpages, ak, paulus

sys_unshare() does mmput(new_mm). This is not enough
if we have mm->core_waiters. This patch is a temporary
fix for soon to be released 2.6.16.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>

--- MM/kernel/fork.c~	2006-03-18 23:15:13.000000000 +0300
+++ MM/kernel/fork.c	2006-03-18 23:15:56.000000000 +0300
@@ -1512,9 +1512,7 @@ static int unshare_vm(unsigned long unsh
 
 	if ((unshare_flags & CLONE_VM) &&
 	    (mm && atomic_read(&mm->mm_users) > 1)) {
-		*new_mmp = dup_mm(current);
-		if (!*new_mmp)
-			return -ENOMEM;
+		return -EINVAL;
 	}
 
 	return 0;

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] for 2.6.16, disable unshare_vm()
  2006-03-18 17:41             ` [PATCH] for 2.6.16, disable unshare_vm() Oleg Nesterov
@ 2006-03-18 18:10               ` Linus Torvalds
  2006-03-18 18:29                 ` Ulrich Drepper
  2006-03-18 18:48                 ` Janak Desai
  0 siblings, 2 replies; 37+ messages in thread
From: Linus Torvalds @ 2006-03-18 18:10 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Janak Desai, Andrew Morton, ebiederm, Linux Kernel Mailing List,
	viro, hch, mtk-manpages, ak, paulus, Ulrich Drepper



On Sat, 18 Mar 2006, Oleg Nesterov wrote:
>
> sys_unshare() does mmput(new_mm). This is not enough
> if we have mm->core_waiters. This patch is a temporary
> fix for soon to be released 2.6.16.

Yes. Quick raising of hands: is there anybody out there that expects to 
use unshare(CLONE_VM) right now? One of the reasons it was integrated was 
that I thought glibc wanted it for distros. Is disabling the CLONE_VM 
unsharing going to impact that?

		Linus

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] for 2.6.16, disable unshare_vm()
  2006-03-18 18:10               ` Linus Torvalds
@ 2006-03-18 18:29                 ` Ulrich Drepper
  2006-03-18 18:48                 ` Janak Desai
  1 sibling, 0 replies; 37+ messages in thread
From: Ulrich Drepper @ 2006-03-18 18:29 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Oleg Nesterov, Janak Desai, Andrew Morton, ebiederm,
	Linux Kernel Mailing List, viro, hch, mtk-manpages, ak, paulus

[-- Attachment #1: Type: text/plain, Size: 552 bytes --]

Linus Torvalds wrote:
> Yes. Quick raising of hands: is there anybody out there that expects to 
> use unshare(CLONE_VM) right now? One of the reasons it was integrated was 
> that I thought glibc wanted it for distros. Is disabling the CLONE_VM 
> unsharing going to impact that?

I'm not planning to use unshare(CLONE_VM).  It's not needed for any
functionality planned so far.  What we (as in Red Hat) need unshare()
for now is the filesystem side.

-- 
➧ Ulrich Drepper ➧ Red Hat, Inc. ➧ 444 Castro St ➧ Mountain View, CA ❖


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 253 bytes --]

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] for 2.6.16, disable unshare_vm()
  2006-03-18 18:10               ` Linus Torvalds
  2006-03-18 18:29                 ` Ulrich Drepper
@ 2006-03-18 18:48                 ` Janak Desai
  1 sibling, 0 replies; 37+ messages in thread
From: Janak Desai @ 2006-03-18 18:48 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Oleg Nesterov, Andrew Morton, ebiederm,
	Linux Kernel Mailing List, viro, hch, mtk-manpages, ak, paulus,
	Ulrich Drepper

Linus Torvalds wrote:

>On Sat, 18 Mar 2006, Oleg Nesterov wrote:
>  
>
>>sys_unshare() does mmput(new_mm). This is not enough
>>if we have mm->core_waiters. This patch is a temporary
>>fix for soon to be released 2.6.16.
>>    
>>
>
>Yes. Quick raising of hands: is there anybody out there that expects to 
>use unshare(CLONE_VM) right now? One of the reasons it was integrated was 
>that I thought glibc wanted it for distros. Is disabling the CLONE_VM 
>unsharing going to impact that?
>
>		Linus
>
>  
>
We are using namespace and filesystem unsharing for our common criteria
certification work and would not be impacted by disabling of vm unsharing.

-Janak

^ permalink raw reply	[flat|nested] 37+ messages in thread

* unshare(CLONE_VM) Re: [PATCH] unshare: Use rcu_assign_pointer when setting sighand
  2006-03-18 17:24           ` Oleg Nesterov
  2006-03-18 17:41             ` [PATCH] for 2.6.16, disable unshare_vm() Oleg Nesterov
@ 2016-03-14 13:15             ` Julian Smith
  2016-03-14 18:35               ` Linus Torvalds
  1 sibling, 1 reply; 37+ messages in thread
From: Julian Smith @ 2016-03-14 13:15 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Janak Desai, Andrew Morton, ebiederm, Linus Torvalds, linux-kernel

On Sat, 18 Mar 2006 20:24:51 +0300
Oleg Nesterov <oleg@tv-sign.ru> wrote:

[...]

> However, I stronly beleive unshare(CLONE_VM) is buggy.
> 
> sys_unshare:
> 
> 
> 		if (new_mm) {
> 			...
> 			new_mm = mm;
> 		}
> 
> 	...
> 
> 	bad_unshare_cleanup_vm:
> 		if (new_mm)
> 			mmput(new_mm);
> 
> 
> mmput() ignores mm->core_waiters.

Apologies for re-opening a ten-year-old thread.

I'm looking into whether it would be possible to extend the unshare
syscall to support the CLONE_VM flag with multi-threaded processes,
because this would allow us at Undo to record multi-threaded user
processes much more efficiently than at present.

We currently have to serialise threads and so suffer an N-times
slowdown when recording a process with N cpu-bound threads. But if we
could get per-thread memory permissions with unshare(CLONE_VM), we'd be
able record a multi-threaded process with almost no per-thread
slowdown.

When the unshare syscall was introduced, it seems that the
mm->core_waiters issue was the only thing that prevented CLONE_VM being
supported. Is that right, or were there other problems too?

Many thanks for any information about this.

- Julian

-- 
http://undo-software.com

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: unshare(CLONE_VM) Re: [PATCH] unshare: Use rcu_assign_pointer when setting sighand
  2016-03-14 13:15             ` unshare(CLONE_VM) Re: [PATCH] unshare: Use rcu_assign_pointer when setting sighand Julian Smith
@ 2016-03-14 18:35               ` Linus Torvalds
  0 siblings, 0 replies; 37+ messages in thread
From: Linus Torvalds @ 2016-03-14 18:35 UTC (permalink / raw)
  To: Julian Smith
  Cc: Oleg Nesterov, Janak Desai, Andrew Morton, Eric W. Biederman,
	Linux Kernel Mailing List

On Mon, Mar 14, 2016 at 6:15 AM, Julian Smith <jsmith@undo-software.com> wrote:
>
> I'm looking into whether it would be possible to extend the unshare
> syscall to support the CLONE_VM flag with multi-threaded processes,
> because this would allow us at Undo to record multi-threaded user
> processes much more efficiently than at present.

At the point where you want to unsahe the VM, you should just use a
full clone() instead.

The thing is, unsharing the VM absolutely _requires_ you to also
unshare signals and some other state too (we require that thread
groups are in the same VM, for example, but also the child tid
information etc etc).

And the whole "copy VM" case is so expensive that at that point
there's no advantage to "unshare", you might as well just do a full
clone() (perhaps still sharing file descriptors and fs state).

So while I think a

    unshare(CLONE_VM | CLONE_SIGHAND | CLONE_THREAD |
CLONE_CHILD_CLEARTID | CLONE_CHILD_SETTID);

might be possible from a technical standpoint, I'm not seeing the huge
advantage to users vs just doing something like

    clone(new_vm_function, NULL, CLONE_VFORK | CLONE_FILES | CLONE_FS
| CLONE_PARENT..);
     _exit();

(fixup details to actually work - the above is meant more as a
"something remotely like this" rather than actually equivalent)

The costs of forking and exiting a thread are almost all about just
the VM copying and tear-down, so a "unshare(CLONE_VM)" is
fundamentally not a cheap operation (and at the other range of the
spectrum: an exit of a thread where there are other sharing threads is
fundamentally quite cheap, because it just ends up decrementing
counters).

So my gut feel is that no, we really don't want unshare(CLONE_VM),
because it *is* a very complicated operation and doesn't actually
perform any better than just cloning.

And the "it is a very complicated operation" really comes not from the
fact that we can't copy the VM - we have that support already, but
because CLONE_VM really does go hand-in-hand with so many special
cases. Oleg pointed out that mm->core_waiters thing last time around,
but that just ends up being a detail: the whole VM sharing just ends
up being very central to a lot of small details..

               Linus

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] unshare: Cleanup up the sys_unshare interface before we are committed.
  2006-03-20  4:45 Albert Cahalan
@ 2006-03-20 16:52 ` Eric W. Biederman
  0 siblings, 0 replies; 37+ messages in thread
From: Eric W. Biederman @ 2006-03-20 16:52 UTC (permalink / raw)
  To: Albert Cahalan
  Cc: linux-kernel, Linus Torvalds, akpm, ebiederm, janak, viro, hch,
	ak, paulus, mtk-manpages

"Albert Cahalan" <acahalan@gmail.com> writes:

> The unshare() syscall is in fact a clone() syscall minus one
> CLONE_* flag that is normally implied: CLONE_TASK_STRUCT.
> (conceptually -- it has no name because it is always implied)
>
> We already have one flag with inverted action: CLONE_NEWNS.
> Adding another such flag (for the task struct) makes sense.
> The new system call is thus not needed at all.
>
> Suggested names: CLONE_NO_TASK, CLONE_SAMETASK, CLONE_SHARETASK

The practical issue there is that even in the best case the implementations
are enough different that it probably would not make a lot of sense.

Eric


^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] unshare: Cleanup up the sys_unshare interface before we are committed.
@ 2006-03-20  4:45 Albert Cahalan
  2006-03-20 16:52 ` Eric W. Biederman
  0 siblings, 1 reply; 37+ messages in thread
From: Albert Cahalan @ 2006-03-20  4:45 UTC (permalink / raw)
  To: linux-kernel, Linus Torvalds, akpm, ebiederm, janak, viro, hch,
	ak, paulus, mtk-manpages

The unshare() syscall is in fact a clone() syscall minus one
CLONE_* flag that is normally implied: CLONE_TASK_STRUCT.
(conceptually -- it has no name because it is always implied)

We already have one flag with inverted action: CLONE_NEWNS.
Adding another such flag (for the task struct) makes sense.
The new system call is thus not needed at all.

Suggested names: CLONE_NO_TASK, CLONE_SAMETASK, CLONE_SHARETASK

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] unshare: Cleanup up the sys_unshare interface before we are committed.
  2006-03-18 19:54           ` Janak Desai
@ 2006-03-19 13:58             ` Eric W. Biederman
  0 siblings, 0 replies; 37+ messages in thread
From: Eric W. Biederman @ 2006-03-19 13:58 UTC (permalink / raw)
  To: Janak Desai
  Cc: Linus Torvalds, Michael Kerrisk, akpm, linux-kernel, viro, hch,
	ak, paulus

Janak Desai <janak@us.ibm.com> writes:

> Eric W. Biederman wrote:
>
>>Was there ever a good reason why we decided to flip the sense of
>>the bits?
>>
>>I put a together a patch to see what the code would look like:
>>
>>- We actually can reuse between clone and unshare.
>>- We don't need the confusing case of when to add additional resources
>>  to unshare.
>>- There is less total code.
>>- We don't confuse users and developers about the inverted values of
>>  the clone bits.
>>
>>
> I guess confusion is subjective. With this patch if I want to unshare files and
> leave the rest as is, I would have to call
>
> unshare(CLONE_VM | CLONE_FS | CLONE_SIGHAND | ...)

Yes. In my patch unshare(0) unshares all resources that a fork won't
share.

Does anyone use unshare on threads?

My corresponding objection with the current interface is that it
appears to be an implementation of "Do what I mean".  Whenever
you do more than is asked for you run into trouble.

> That is, to unshare one type of context, I have to remember and use flags
> corresponding to all other contexts. If I forget to include one of them, I
> might unwittingly unshare it. Unless I am reading the patch incorrectly,
> this to me is more confusing than the current scheme.

Not exactly.  unshare(CLONE_NEWNS) does not need any additional flags
and I believe it is the primary case of interest.

Beyond that for any version of unshare to be predictable you need
to know what you have shared and what you don't.  Which means
either another syscall or a /proc file that will give you that
information.

Personally I think being unthreaded is easier to work with
than the existing rule of everything necessary to unshare the
specified resources.  Especially since I can be explicit and
tell it to leaves things the way they are.

With a query interface mine becomes unshare(get_shared() & ~CLONE_FILES).
Which is the normal paradigm for modifying something expressed as
flags.

With the current kernel implementation I can't see how calling
unshare(CLONE_NEWNS) is any less surprising when called in a user
space thread.  Without a deep understanding of the kernel I don't
see how you can predict that will unshare your current filesystem
root and cwd pointers.   

My rule that you stop being a thread I think is a little easier
to remember if not understand.  Who would suspect that in the current
kernel unshare(CLONE_THREAD) does not completely unshare all of the
resources that a thread shares?

Anyway I think unshare needs to carry a big fat:
WARNING dangerous when used with threads be very very careful!

Unfortunately that isn't something I can think of a general
test for right now.  Which makes using unshare in a library fairly
dangerous.

Eric

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] unshare: Cleanup up the sys_unshare interface before we are committed.
  2006-03-18 18:41         ` Eric W. Biederman
  2006-03-18 19:54           ` Janak Desai
@ 2006-03-18 23:41           ` Paul Mackerras
  1 sibling, 0 replies; 37+ messages in thread
From: Paul Mackerras @ 2006-03-18 23:41 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Linus Torvalds, Michael Kerrisk, akpm, linux-kernel, janak, viro,
	hch, ak

Eric W. Biederman writes:

> - We don't confuse users and developers about the inverted values of
>   the clone bits.

Given that the name starts with "un", I would *expect*
unshare(CLONE_FOO) to mean that I no longer want to share FOO.

If you want the argument to have the same sense as in the clone system
call, you will have to call it "share" rather than "unshare" (and then
explain to confused developers why they can't use it to start sharing
things that previously weren't shared :).

Paul.

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] unshare: Cleanup up the sys_unshare interface before we are committed.
  2006-03-18 18:41         ` Eric W. Biederman
@ 2006-03-18 19:54           ` Janak Desai
  2006-03-19 13:58             ` Eric W. Biederman
  2006-03-18 23:41           ` Paul Mackerras
  1 sibling, 1 reply; 37+ messages in thread
From: Janak Desai @ 2006-03-18 19:54 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Linus Torvalds, Michael Kerrisk, akpm, linux-kernel, viro, hch,
	ak, paulus

Eric W. Biederman wrote:

>Linus Torvalds <torvalds@osdl.org> writes:
>
>  
>
>>On Fri, 17 Mar 2006, Michael Kerrisk wrote:
>>    
>>
>>>> - it's all the same issues that clone() has
>>>>        
>>>>
>>>At the moment, but possibly not in the future (if one day
>>>usnhare() needs a flag that has no analogue in clone()).
>>>      
>>>
>>I don't believe that.
>>
>>If we have something we might want to unshare, that implies by definition 
>>that it was something we wanted to conditionally share in the first place.
>>
>>IOW, it ends up being something that would be a clone() flag.
>>
>>So I really do believe that there is a fundamental 1:1 between the flags. 
>>They aren't just "similar". They are very fundamentally about the same 
>>thing, and giving two different names to the same thing is CONFUSING.
>>
>>		Linus
>>    
>>
>
>Was there ever a good reason why we decided to flip the sense of
>the bits?
>
>I put a together a patch to see what the code would look like:
>
>- We actually can reuse between clone and unshare.
>- We don't need the confusing case of when to add additional resources
>  to unshare.
>- There is less total code.
>- We don't confuse users and developers about the inverted values of
>  the clone bits.
>  
>
I guess confusion is subjective. With this patch if I want to unshare 
files and
leave the rest as is, I would have to call

unshare(CLONE_VM | CLONE_FS | CLONE_SIGHAND | ...)

That is, to unshare one type of context, I have to remember and use flags
corresponding to all other contexts. If I forget to include one of them, I
might unwittingly unshare it. Unless I am reading the patch incorrectly,
this to me is more confusing than the current scheme.

-Janak

> kernel/fork.c |  105 ++++++++++++++++++++++++---------------------------------
> 1 files changed, 45 insertions(+), 60 deletions(-)
>
>28e4502c6d2ca48e0b4a08581123b2c3cf94454e
>diff --git a/kernel/fork.c b/kernel/fork.c
>index 411b10d..0eb0a37 100644
>--- a/kernel/fork.c
>+++ b/kernel/fork.c
>@@ -900,6 +900,36 @@ asmlinkage long sys_set_tid_address(int 
> }
> 
> /*
>+ * Check constraints on flags passed to the clone or unshare system call.
>+ */
>+static int check_clone_flags(unsigned long clone_flags)
>+{
>+	int err = -EINVAL;
>+	if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
>+		goto out;
>+
>+	/*
>+	 * Thread groups must share signals as well, and detached threads
>+	 * can only be started up within the thread group.
>+	 */
>+	if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND))
>+		goto out;
>+
>+	/*
>+	 * Shared signal handlers imply shared VM. By way of the above,
>+	 * thread groups also imply shared VM. Blocking this case allows
>+	 * for various simplifications in other code.
>+	 */
>+	if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM))
>+		goto out;
>+	
>+	/* We made it here without problems */
>+	err = 0;
>+out:	
>+	return err;
>+}
>+
>+/*
>  * This creates a new process as a copy of the old one,
>  * but does not actually start it yet.
>  *
>@@ -918,23 +948,9 @@ static task_t *copy_process(unsigned lon
> 	int retval;
> 	struct task_struct *p = NULL;
> 
>-	if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
>-		return ERR_PTR(-EINVAL);
>-
>-	/*
>-	 * Thread groups must share signals as well, and detached threads
>-	 * can only be started up within the thread group.
>-	 */
>-	if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND))
>-		return ERR_PTR(-EINVAL);
>-
>-	/*
>-	 * Shared signal handlers imply shared VM. By way of the above,
>-	 * thread groups also imply shared VM. Blocking this case allows
>-	 * for various simplifications in other code.
>-	 */
>-	if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM))
>-		return ERR_PTR(-EINVAL);
>+	retval = check_clone_flags(clone_flags);
>+	if (retval)
>+		return ERR_PTR(retval);
> 
> 	retval = security_task_create(clone_flags);
> 	if (retval)
>@@ -1371,47 +1387,12 @@ void __init proc_caches_init(void)
> 			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
> }
> 
>-
>-/*
>- * Check constraints on flags passed to the unshare system call and
>- * force unsharing of additional process context as appropriate.
>- */
>-static inline void check_unshare_flags(unsigned long *flags_ptr)
>-{
>-	/*
>-	 * If unsharing a thread from a thread group, must also
>-	 * unshare vm.
>-	 */
>-	if (*flags_ptr & CLONE_THREAD)
>-		*flags_ptr |= CLONE_VM;
>-
>-	/*
>-	 * If unsharing vm, must also unshare signal handlers.
>-	 */
>-	if (*flags_ptr & CLONE_VM)
>-		*flags_ptr |= CLONE_SIGHAND;
>-
>-	/*
>-	 * If unsharing signal handlers and the task was created
>-	 * using CLONE_THREAD, then must unshare the thread
>-	 */
>-	if ((*flags_ptr & CLONE_SIGHAND) &&
>-	    (atomic_read(&current->signal->count) > 1))
>-		*flags_ptr |= CLONE_THREAD;
>-
>-	/*
>-	 * If unsharing namespace, must also unshare filesystem information.
>-	 */
>-	if (*flags_ptr & CLONE_NEWNS)
>-		*flags_ptr |= CLONE_FS;
>-}
>-
> /*
>  * Unsharing of tasks created with CLONE_THREAD is not supported yet
>  */
> static int unshare_thread(unsigned long unshare_flags)
> {
>-	if (unshare_flags & CLONE_THREAD)
>+	if (!(unshare_flags & CLONE_THREAD) && !thread_group_empty(current))
> 		return -EINVAL;
> 
> 	return 0;
>@@ -1424,7 +1405,7 @@ static int unshare_fs(unsigned long unsh
> {
> 	struct fs_struct *fs = current->fs;
> 
>-	if ((unshare_flags & CLONE_FS) &&
>+	if (!(unshare_flags & CLONE_FS) &&
> 	    (fs && atomic_read(&fs->count) > 1)) {
> 		*new_fsp = __copy_fs_struct(current->fs);
> 		if (!*new_fsp)
>@@ -1441,7 +1422,7 @@ static int unshare_namespace(unsigned lo
> {
> 	struct namespace *ns = current->namespace;
> 
>-	if ((unshare_flags & CLONE_NEWNS) &&
>+	if (!(unshare_flags & CLONE_NEWNS) &&
> 	    (ns && atomic_read(&ns->count) > 1)) {
> 		if (!capable(CAP_SYS_ADMIN))
> 			return -EPERM;
>@@ -1462,7 +1443,7 @@ static int unshare_sighand(unsigned long
> {
> 	struct sighand_struct *sigh = current->sighand;
> 
>-	if ((unshare_flags & CLONE_SIGHAND) &&
>+	if (!(unshare_flags & CLONE_SIGHAND) &&
> 	    (sigh && atomic_read(&sigh->count) > 1))
> 		return -EINVAL;
> 	else
>@@ -1476,7 +1457,7 @@ static int unshare_vm(unsigned long unsh
> {
> 	struct mm_struct *mm = current->mm;
> 
>-	if ((unshare_flags & CLONE_VM) &&
>+	if (!(unshare_flags & CLONE_VM) &&
> 	    (mm && atomic_read(&mm->mm_users) > 1)) {
> 		*new_mmp = dup_mm(current);
> 		if (!*new_mmp)
>@@ -1494,7 +1475,7 @@ static int unshare_fd(unsigned long unsh
> 	struct files_struct *fd = current->files;
> 	int error = 0;
> 
>-	if ((unshare_flags & CLONE_FILES) &&
>+	if (!(unshare_flags & CLONE_FILES) &&
> 	    (fd && atomic_read(&fd->count) > 1)) {
> 		*new_fdp = dup_fd(fd, &error);
> 		if (!*new_fdp)
>@@ -1510,7 +1491,9 @@ static int unshare_fd(unsigned long unsh
>  */
> static int unshare_semundo(unsigned long unshare_flags, struct sem_undo_list **new_ulistp)
> {
>-	if (unshare_flags & CLONE_SYSVSEM)
>+	struct sem_undo_list *undo_list = current->sysvsem.undo_list;
>+	if (!(unshare_flags & CLONE_SYSVSEM) && 
>+	    undo_list && (atomic_read(&undo_list->refcnt) > 1))
> 		return -EINVAL;
> 
> 	return 0;
>@@ -1534,7 +1517,9 @@ asmlinkage long sys_unshare(unsigned lon
> 	struct files_struct *fd, *new_fd = NULL;
> 	struct sem_undo_list *new_ulist = NULL;
> 
>-	check_unshare_flags(&unshare_flags);
>+	err = check_clone_flags(unshare_flags);
>+	if (err)
>+		goto bad_unshare_out;
>        
> 	/* Return -EINVAL for all unsupported flags */
> 	err = -EINVAL;
>
>-
>To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
>the body of a message to majordomo@vger.kernel.org
>More majordomo info at  http://vger.kernel.org/majordomo-info.html
>Please read the FAQ at  http://www.tux.org/lkml/
>
>  
>


^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] unshare: Cleanup up the sys_unshare interface before we are committed.
  2006-03-17  5:42       ` Linus Torvalds
  2006-03-17 16:04         ` Eric W. Biederman
  2006-03-17 20:27         ` Michael Kerrisk
@ 2006-03-18 18:41         ` Eric W. Biederman
  2006-03-18 19:54           ` Janak Desai
  2006-03-18 23:41           ` Paul Mackerras
  2 siblings, 2 replies; 37+ messages in thread
From: Eric W. Biederman @ 2006-03-18 18:41 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Michael Kerrisk, akpm, linux-kernel, janak, viro, hch, ak, paulus

Linus Torvalds <torvalds@osdl.org> writes:

> On Fri, 17 Mar 2006, Michael Kerrisk wrote:
>> 
>> >  - it's all the same issues that clone() has
>> 
>> At the moment, but possibly not in the future (if one day
>> usnhare() needs a flag that has no analogue in clone()).
>
> I don't believe that.
>
> If we have something we might want to unshare, that implies by definition 
> that it was something we wanted to conditionally share in the first place.
>
> IOW, it ends up being something that would be a clone() flag.
>
> So I really do believe that there is a fundamental 1:1 between the flags. 
> They aren't just "similar". They are very fundamentally about the same 
> thing, and giving two different names to the same thing is CONFUSING.
>
> 		Linus

Was there ever a good reason why we decided to flip the sense of
the bits?

I put a together a patch to see what the code would look like:

- We actually can reuse between clone and unshare.
- We don't need the confusing case of when to add additional resources
  to unshare.
- There is less total code.
- We don't confuse users and developers about the inverted values of
  the clone bits.

 kernel/fork.c |  105 ++++++++++++++++++++++++---------------------------------
 1 files changed, 45 insertions(+), 60 deletions(-)

28e4502c6d2ca48e0b4a08581123b2c3cf94454e
diff --git a/kernel/fork.c b/kernel/fork.c
index 411b10d..0eb0a37 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -900,6 +900,36 @@ asmlinkage long sys_set_tid_address(int 
 }
 
 /*
+ * Check constraints on flags passed to the clone or unshare system call.
+ */
+static int check_clone_flags(unsigned long clone_flags)
+{
+	int err = -EINVAL;
+	if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
+		goto out;
+
+	/*
+	 * Thread groups must share signals as well, and detached threads
+	 * can only be started up within the thread group.
+	 */
+	if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND))
+		goto out;
+
+	/*
+	 * Shared signal handlers imply shared VM. By way of the above,
+	 * thread groups also imply shared VM. Blocking this case allows
+	 * for various simplifications in other code.
+	 */
+	if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM))
+		goto out;
+	
+	/* We made it here without problems */
+	err = 0;
+out:	
+	return err;
+}
+
+/*
  * This creates a new process as a copy of the old one,
  * but does not actually start it yet.
  *
@@ -918,23 +948,9 @@ static task_t *copy_process(unsigned lon
 	int retval;
 	struct task_struct *p = NULL;
 
-	if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
-		return ERR_PTR(-EINVAL);
-
-	/*
-	 * Thread groups must share signals as well, and detached threads
-	 * can only be started up within the thread group.
-	 */
-	if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND))
-		return ERR_PTR(-EINVAL);
-
-	/*
-	 * Shared signal handlers imply shared VM. By way of the above,
-	 * thread groups also imply shared VM. Blocking this case allows
-	 * for various simplifications in other code.
-	 */
-	if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM))
-		return ERR_PTR(-EINVAL);
+	retval = check_clone_flags(clone_flags);
+	if (retval)
+		return ERR_PTR(retval);
 
 	retval = security_task_create(clone_flags);
 	if (retval)
@@ -1371,47 +1387,12 @@ void __init proc_caches_init(void)
 			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
 }
 
-
-/*
- * Check constraints on flags passed to the unshare system call and
- * force unsharing of additional process context as appropriate.
- */
-static inline void check_unshare_flags(unsigned long *flags_ptr)
-{
-	/*
-	 * If unsharing a thread from a thread group, must also
-	 * unshare vm.
-	 */
-	if (*flags_ptr & CLONE_THREAD)
-		*flags_ptr |= CLONE_VM;
-
-	/*
-	 * If unsharing vm, must also unshare signal handlers.
-	 */
-	if (*flags_ptr & CLONE_VM)
-		*flags_ptr |= CLONE_SIGHAND;
-
-	/*
-	 * If unsharing signal handlers and the task was created
-	 * using CLONE_THREAD, then must unshare the thread
-	 */
-	if ((*flags_ptr & CLONE_SIGHAND) &&
-	    (atomic_read(&current->signal->count) > 1))
-		*flags_ptr |= CLONE_THREAD;
-
-	/*
-	 * If unsharing namespace, must also unshare filesystem information.
-	 */
-	if (*flags_ptr & CLONE_NEWNS)
-		*flags_ptr |= CLONE_FS;
-}
-
 /*
  * Unsharing of tasks created with CLONE_THREAD is not supported yet
  */
 static int unshare_thread(unsigned long unshare_flags)
 {
-	if (unshare_flags & CLONE_THREAD)
+	if (!(unshare_flags & CLONE_THREAD) && !thread_group_empty(current))
 		return -EINVAL;
 
 	return 0;
@@ -1424,7 +1405,7 @@ static int unshare_fs(unsigned long unsh
 {
 	struct fs_struct *fs = current->fs;
 
-	if ((unshare_flags & CLONE_FS) &&
+	if (!(unshare_flags & CLONE_FS) &&
 	    (fs && atomic_read(&fs->count) > 1)) {
 		*new_fsp = __copy_fs_struct(current->fs);
 		if (!*new_fsp)
@@ -1441,7 +1422,7 @@ static int unshare_namespace(unsigned lo
 {
 	struct namespace *ns = current->namespace;
 
-	if ((unshare_flags & CLONE_NEWNS) &&
+	if (!(unshare_flags & CLONE_NEWNS) &&
 	    (ns && atomic_read(&ns->count) > 1)) {
 		if (!capable(CAP_SYS_ADMIN))
 			return -EPERM;
@@ -1462,7 +1443,7 @@ static int unshare_sighand(unsigned long
 {
 	struct sighand_struct *sigh = current->sighand;
 
-	if ((unshare_flags & CLONE_SIGHAND) &&
+	if (!(unshare_flags & CLONE_SIGHAND) &&
 	    (sigh && atomic_read(&sigh->count) > 1))
 		return -EINVAL;
 	else
@@ -1476,7 +1457,7 @@ static int unshare_vm(unsigned long unsh
 {
 	struct mm_struct *mm = current->mm;
 
-	if ((unshare_flags & CLONE_VM) &&
+	if (!(unshare_flags & CLONE_VM) &&
 	    (mm && atomic_read(&mm->mm_users) > 1)) {
 		*new_mmp = dup_mm(current);
 		if (!*new_mmp)
@@ -1494,7 +1475,7 @@ static int unshare_fd(unsigned long unsh
 	struct files_struct *fd = current->files;
 	int error = 0;
 
-	if ((unshare_flags & CLONE_FILES) &&
+	if (!(unshare_flags & CLONE_FILES) &&
 	    (fd && atomic_read(&fd->count) > 1)) {
 		*new_fdp = dup_fd(fd, &error);
 		if (!*new_fdp)
@@ -1510,7 +1491,9 @@ static int unshare_fd(unsigned long unsh
  */
 static int unshare_semundo(unsigned long unshare_flags, struct sem_undo_list **new_ulistp)
 {
-	if (unshare_flags & CLONE_SYSVSEM)
+	struct sem_undo_list *undo_list = current->sysvsem.undo_list;
+	if (!(unshare_flags & CLONE_SYSVSEM) && 
+	    undo_list && (atomic_read(&undo_list->refcnt) > 1))
 		return -EINVAL;
 
 	return 0;
@@ -1534,7 +1517,9 @@ asmlinkage long sys_unshare(unsigned lon
 	struct files_struct *fd, *new_fd = NULL;
 	struct sem_undo_list *new_ulist = NULL;
 
-	check_unshare_flags(&unshare_flags);
+	err = check_clone_flags(unshare_flags);
+	if (err)
+		goto bad_unshare_out;
        
 	/* Return -EINVAL for all unsupported flags */
 	err = -EINVAL;


^ permalink raw reply related	[flat|nested] 37+ messages in thread

* Re: [PATCH] unshare: Cleanup up the sys_unshare interface before we are committed.
  2006-03-17  5:42       ` Linus Torvalds
  2006-03-17 16:04         ` Eric W. Biederman
@ 2006-03-17 20:27         ` Michael Kerrisk
  2006-03-18 18:41         ` Eric W. Biederman
  2 siblings, 0 replies; 37+ messages in thread
From: Michael Kerrisk @ 2006-03-17 20:27 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: akpm, ebiederm, linux-kernel, janak, viro, hch, ak, paulus, mtk-manpages

Linus,

> On Fri, 17 Mar 2006, Michael Kerrisk wrote:
> > 
> > >  - it's all the same issues that clone() has
> > 
> > At the moment, but possibly not in the future (if one day
> > usnhare() needs a flag that has no analogue in clone()).
> 
> I don't believe that.
> 
> If we have something we might want to unshare, that implies by definition 
> that it was something we wanted to conditionally share in the first 
> place.
> 
> IOW, it ends up being something that would be a clone() flag.

I should have been a little clearer.  I was thinking of 
some orthogonal flag that would change the operation of unshare() 
itself (i.e., not some resource that was unshared, but something 
that changes how unshare() goes about its job).  It 
would not make sense to call such a flag CLONE_xxx.

> So I really do believe that there is a fundamental 1:1 between the flags. 
> They aren't just "similar". They are very fundamentally about the same 
> thing, and giving two different names to the same thing is CONFUSING.

This is your viewpoint ;-).  Actually, it cuts through to the
crux of the matter:

The existing flags are *about* the same fundamental things, 
but they *treat* those things in subtly different ways.  

I believe that sentence summarises how our viewpoints differ, 
and explains why you think the names should be the *same*, 
while I think they should better be just *similar*.

As I said in an earlier message, when I wrote my test program
I found myself getting confused about how CLONE_NEWNS worked,
even though I had just drafted a manual page that clearly
told me that CLONE_NEWNS did not reverse the clone() of the
same name.  I was CONFUSED.  (That's a "fact" ;-).)  

Maybe that is just a demonstration of the obvious: I'm not 
the sharpest tack in the box.  But my feeling is that part 
of my confusion *arose from the naming of the flags*, and
some others will be like me.  This is my belief 
about the two possible alternatives:

a) Flags with names that are *similar but not the same* 
   (CLONE_* vs UNSHARE_*) will give userland programmers what I
   think is the right clue about the reality: these flags are 
   working with the same things, but treating them somewhat 
   differently.

b) The flags have the *same* names.  This will lead *some* 
   programmers into thinking that the correspondence
   between the flags is *exact*.  But that is not so.

I believe that the first possibility is less likely to lead to 
traps (bugs) based on false understanding.  It also fits better 
with the possibility of a hypothetical "orthogonal unshare() 
flag".

Of course, I can construct a counterargument: some 
programmers will be smarter than me, and the ones who are 
like me will at least have my manual page to help them avoid 
trouble.  Nevertheless, my gut feel is that the status quo is 
an example of weak user interface design which could bear 
improvement.

Cheers,

Michael

-- 
Michael Kerrisk
maintainer of Linux man pages Sections 2, 3, 4, 5, and 7 

Want to help with man page maintenance?  
Grab the latest tarball at
ftp://ftp.win.tue.nl/pub/linux-local/manpages/, 
read the HOWTOHELP file and grep the source 
files for 'FIXME'.

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] unshare: Cleanup up the sys_unshare interface before we are committed.
  2006-03-17 16:04         ` Eric W. Biederman
@ 2006-03-17 16:49           ` Janak Desai
  0 siblings, 0 replies; 37+ messages in thread
From: Janak Desai @ 2006-03-17 16:49 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Linus Torvalds, Michael Kerrisk, akpm, linux-kernel, viro, hch,
	ak, paulus

Eric W. Biederman wrote:

>Linus Torvalds <torvalds@osdl.org> writes:
>
>  
>
>>On Fri, 17 Mar 2006, Michael Kerrisk wrote:
>>    
>>
>>>> - it's all the same issues that clone() has
>>>>        
>>>>
>>>At the moment, but possibly not in the future (if one day
>>>usnhare() needs a flag that has no analogue in clone()).
>>>      
>>>
>>I don't believe that.
>>
>>If we have something we might want to unshare, that implies by definition 
>>that it was something we wanted to conditionally share in the first place.
>>
>>IOW, it ends up being something that would be a clone() flag.
>>
>>So I really do believe that there is a fundamental 1:1 between the flags. 
>>They aren't just "similar". They are very fundamentally about the same 
>>thing, and giving two different names to the same thing is CONFUSING.
>>    
>>
>
>The scary thing is that with only 7 things we can share or not,
>and a 32bit field we have only 7 bits left that we can define.
>Last count I think I know of at least that many additional global
>namespaces in the kernel.
>
>
>
>On the confusing side.  Unshare largely because it doesn't default to
>unsharing all of the thread state.  Has the weird issue that unshare
>will automatically add bits you didn't ask for (so it can satisfy your
>request) and unsharing more than you requested.  Clone when presented
>with the same situation returns an error.
>  
>
We are probably digressing from the original discussion but just wanted to
point out that the situations presented to clone and unshare are a bit 
different.
Clone returns an error because in the same call you are trying to provide
illegal combination of flags. Like saying that you want a new namespace
but want to share the filesystem. There is no way for clone to know which
of the two flags to honor or ignore. Where as with unshare if you are
trying to unshare namespace but are currently sharing filesystem, then
it is possible to complete the request by unsharing filesystem as well .
You may remember that the earlier incarnation of the unshare patch did
return an error. However it was pointed out, and correctly so, that it is
better to unshare appropriate related contexts.

-Janak

>So even while the resources are the same the interaction of the
>bits really is quite different between the two calls.
>
>Eric
>-
>To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
>the body of a message to majordomo@vger.kernel.org
>More majordomo info at  http://vger.kernel.org/majordomo-info.html
>Please read the FAQ at  http://www.tux.org/lkml/
>
>  
>


^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] unshare: Cleanup up the sys_unshare interface before we are committed.
  2006-03-17  5:42       ` Linus Torvalds
@ 2006-03-17 16:04         ` Eric W. Biederman
  2006-03-17 16:49           ` Janak Desai
  2006-03-17 20:27         ` Michael Kerrisk
  2006-03-18 18:41         ` Eric W. Biederman
  2 siblings, 1 reply; 37+ messages in thread
From: Eric W. Biederman @ 2006-03-17 16:04 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Michael Kerrisk, akpm, linux-kernel, janak, viro, hch, ak, paulus

Linus Torvalds <torvalds@osdl.org> writes:

> On Fri, 17 Mar 2006, Michael Kerrisk wrote:
>> 
>> >  - it's all the same issues that clone() has
>> 
>> At the moment, but possibly not in the future (if one day
>> usnhare() needs a flag that has no analogue in clone()).
>
> I don't believe that.
>
> If we have something we might want to unshare, that implies by definition 
> that it was something we wanted to conditionally share in the first place.
>
> IOW, it ends up being something that would be a clone() flag.
>
> So I really do believe that there is a fundamental 1:1 between the flags. 
> They aren't just "similar". They are very fundamentally about the same 
> thing, and giving two different names to the same thing is CONFUSING.

The scary thing is that with only 7 things we can share or not,
and a 32bit field we have only 7 bits left that we can define.
Last count I think I know of at least that many additional global
namespaces in the kernel.



On the confusing side.  Unshare largely because it doesn't default to
unsharing all of the thread state.  Has the weird issue that unshare
will automatically add bits you didn't ask for (so it can satisfy your
request) and unsharing more than you requested.  Clone when presented
with the same situation returns an error.

So even while the resources are the same the interaction of the
bits really is quite different between the two calls.

Eric

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] unshare: Cleanup up the sys_unshare interface before we are committed.
  2006-03-17  1:11     ` Michael Kerrisk
@ 2006-03-17  5:42       ` Linus Torvalds
  2006-03-17 16:04         ` Eric W. Biederman
                           ` (2 more replies)
  0 siblings, 3 replies; 37+ messages in thread
From: Linus Torvalds @ 2006-03-17  5:42 UTC (permalink / raw)
  To: Michael Kerrisk
  Cc: akpm, ebiederm, linux-kernel, janak, viro, hch, ak, paulus



On Fri, 17 Mar 2006, Michael Kerrisk wrote:
> 
> >  - it's all the same issues that clone() has
> 
> At the moment, but possibly not in the future (if one day
> usnhare() needs a flag that has no analogue in clone()).

I don't believe that.

If we have something we might want to unshare, that implies by definition 
that it was something we wanted to conditionally share in the first place.

IOW, it ends up being something that would be a clone() flag.

So I really do believe that there is a fundamental 1:1 between the flags. 
They aren't just "similar". They are very fundamentally about the same 
thing, and giving two different names to the same thing is CONFUSING.

		Linus

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] unshare: Cleanup up the sys_unshare interface before we are committed.
  2006-03-16 23:57   ` Linus Torvalds
@ 2006-03-17  1:11     ` Michael Kerrisk
  2006-03-17  5:42       ` Linus Torvalds
  0 siblings, 1 reply; 37+ messages in thread
From: Michael Kerrisk @ 2006-03-17  1:11 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: akpm, ebiederm, linux-kernel, janak, viro, hch, ak, paulus

Linus,

> On Fri, 17 Mar 2006, Michael Kerrisk wrote:
> > > 
> > > My personal opinion is that having a different set of flags is more 
> > > confusing 
> > 
> > How is it confusing?  And who is it confusing for?
> 
> It's confusing because
>  - it's just more flags to keep track of

Agreed, there is a "confusion cost" to that.

>  - it's all the same issues that clone() has

At the moment, but possibly not in the future (if one day
usnhare() needs a flag that has no analogue in clone()).

>  - it's an opportunity for future incoherence

Not sure what future incoherence you mean here.  Anyway,
we inject some incoherence *now* (some unshare() flags reverse
their clone() counterparts, one does not).

> > It will potentially require kernel developers to think for just 
> > a moment about what is going on.  But why care about them -- 
> > they don't have to *use* this interface; userland programmers do.
> 
> All the confusion is equally a userland issue, don't try to just enforce 
> your own opinions as somehow being "facts" by repeating them over 
> and over again.

I'm not trying to do that.  I've repeated my statements 
because I haven't seen any clear counterarguments.  I have a
particular opinion about what constitutes confusion, and it
comes from a perspective that focuses on the kernel-userland 
interface.  I agree that it does create some "confusion cost"
for userland programmers to add new flags.  But _in my opinion_
that cost is outweighed by the greater "confusion costs" that
I have described.  Eric seemed to agree.  Unfortunately for my
argument, you and Janak don't ;-).

Cheers,

Michael

-- 
Michael Kerrisk
maintainer of Linux man pages Sections 2, 3, 4, 5, and 7 

Want to help with man page maintenance?  
Grab the latest tarball at
ftp://ftp.win.tue.nl/pub/linux-local/manpages/, 
read the HOWTOHELP file and grep the source 
files for 'FIXME'.

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] unshare: Cleanup up the sys_unshare interface before we are committed.
  2006-03-16 23:34 ` Michael Kerrisk
@ 2006-03-16 23:57   ` Linus Torvalds
  2006-03-17  1:11     ` Michael Kerrisk
  0 siblings, 1 reply; 37+ messages in thread
From: Linus Torvalds @ 2006-03-16 23:57 UTC (permalink / raw)
  To: Michael Kerrisk
  Cc: akpm, ebiederm, linux-kernel, janak, viro, hch, ak, paulus



On Fri, 17 Mar 2006, Michael Kerrisk wrote:
> > 
> > My personal opinion is that having a different set of flags is more 
> > confusing 
> 
> How is it confusing?  And who is it confusing for?

It's confusing because
 - it's just more flags to keep track of
 - it's all the same issues that clone() has
 - it's an opportunity for future incoherence

> It will potentially require kernel developers to think for just 
> a moment about what is going on.  But why care about them -- 
> they don't have to *use* this interface; userland programmers do.

All the confusion is equally a userland issue, don't try to just enforce 
your own opinions as somehow being "facts" by repeating them over and over 
again.

			Linus

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] unshare: Cleanup up the sys_unshare interface before we are committed.
       [not found] <1359.1142546753@www064.gmx.net>
@ 2006-03-16 23:34 ` Michael Kerrisk
  2006-03-16 23:57   ` Linus Torvalds
  0 siblings, 1 reply; 37+ messages in thread
From: Michael Kerrisk @ 2006-03-16 23:34 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: akpm, ebiederm, linux-kernel, janak, viro, hch, ak, paulus

[I seem to have had a send problem with the previous version of 
this message, so this is a slightly modified resend]

Linus,

> On Thu, 16 Mar 2006, Andrew Morton wrote:
> > 
> > iirc there was some discussion about this and it was explicitly decided
> > to keep the CLONE flags.
> > 
> > Maybe Janak or Linus can comment?
> 
> My personal opinion is that having a different set of flags is more 
> confusing 

How is it confusing?  And who is it confusing for?

It will potentially require kernel developers to think for just 
a moment about what is going on.  But why care about them -- 
they don't have to *use* this interface; userland programmers do.

I have tried to argue in the clearest way I can that the current 
interface (uschare(CLONE_*) is confusing for *users* of this API.
Do you care about the interface that is inflicted on users?

When you give users an interface with the same name, they
expect it to do the same thing.  When their expectations are 
broken, they write bugs.  There are already precedents, 
with much less justification, for defining separate flag names
for different interfaces.  For example, poll() uses constants
with names of the form POLL*, while epoll uses constant
with *EXACTLY* the same meanings, but named EPOLL*.

> and likely to result in problems later than having the same 
> ones. 

What problems do you think can occur?  And what happens on the 
day when unshare() needs a flag that clone() does not have?

> Regardless, I'm not touching this for 2.6.16 any more, 

By which time, the discussion is over.  Since "we don't 
break userspace", we can't (shouldn't) reverse whatever goes
into 2.6.16.

Cheers,

Michael

-- 
Michael Kerrisk
maintainer of Linux man pages Sections 2, 3, 4, 5, and 7 

Want to help with man page maintenance?  
Grab the latest tarball at
ftp://ftp.win.tue.nl/pub/linux-local/manpages/, 
read the HOWTOHELP file and grep the source 
files for 'FIXME'.

^ permalink raw reply	[flat|nested] 37+ messages in thread

end of thread, other threads:[~2016-03-14 18:35 UTC | newest]

Thread overview: 37+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2006-03-16 16:49 [PATCH] unshare: Cleanup up the sys_unshare interface before we are committed Eric W. Biederman
2006-03-16 17:31 ` [PATCH] unshare: Use rcu_assign_pointer when setting sighand Eric W. Biederman
2006-03-17  6:48   ` Paul E. McKenney
2006-03-17 17:44   ` Oleg Nesterov
2006-03-17 20:56     ` Andrew Morton
2006-03-18 13:12       ` Oleg Nesterov
2006-03-18 15:43         ` Janak Desai
2006-03-18 17:24           ` Oleg Nesterov
2006-03-18 17:41             ` [PATCH] for 2.6.16, disable unshare_vm() Oleg Nesterov
2006-03-18 18:10               ` Linus Torvalds
2006-03-18 18:29                 ` Ulrich Drepper
2006-03-18 18:48                 ` Janak Desai
2016-03-14 13:15             ` unshare(CLONE_VM) Re: [PATCH] unshare: Use rcu_assign_pointer when setting sighand Julian Smith
2016-03-14 18:35               ` Linus Torvalds
2006-03-18 13:13       ` [PATCH] implement unshare(CLONE_SIGHAND) for single-thread case Oleg Nesterov
2006-03-18 15:10       ` [PATCH] unshare: Error if passed unsupported flags Eric W. Biederman
2006-03-18 15:33         ` Janak Desai
2006-03-18 16:29       ` [PATCH] unshare: Use rcu_assign_pointer when setting sighand Janak Desai
2006-03-16 19:40 ` [PATCH] unshare: Cleanup up the sys_unshare interface before we are committed Michael Kerrisk
2006-03-16 20:33 ` Andrew Morton
2006-03-16 20:41   ` Linus Torvalds
2006-03-16 21:58     ` Eric W. Biederman
2006-03-16 22:19       ` Andrew Morton
2006-03-16 21:36   ` Janak Desai
     [not found] <1359.1142546753@www064.gmx.net>
2006-03-16 23:34 ` Michael Kerrisk
2006-03-16 23:57   ` Linus Torvalds
2006-03-17  1:11     ` Michael Kerrisk
2006-03-17  5:42       ` Linus Torvalds
2006-03-17 16:04         ` Eric W. Biederman
2006-03-17 16:49           ` Janak Desai
2006-03-17 20:27         ` Michael Kerrisk
2006-03-18 18:41         ` Eric W. Biederman
2006-03-18 19:54           ` Janak Desai
2006-03-19 13:58             ` Eric W. Biederman
2006-03-18 23:41           ` Paul Mackerras
2006-03-20  4:45 Albert Cahalan
2006-03-20 16:52 ` Eric W. Biederman

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).