All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] sparc: make copy_thread honor pid namespaces
@ 2021-02-17  8:00 ` Dmitry V. Levin
  0 siblings, 0 replies; 11+ messages in thread
From: Dmitry V. Levin @ 2021-02-17  8:00 UTC (permalink / raw)
  To: David S. Miller
  Cc: Eric W. Biederman, Alexey Gladkov, Gleb Fotengauer-Malinovskiy,
	Anatoly Pugachev, sparclinux, linux-kernel

On sparc, fork and clone syscalls have an unusual semantics of
returning the pid of the parent process to the child process.

Apparently, the implementation did not honor pid namespaces at all,
so the child used to get the pid of its parent in the init namespace.

This bug was found by strace test suite.

Reproducer:

  $ gcc -Wall -O2 -xc - <<'EOF'
  #define _GNU_SOURCE
  #include <err.h>
  #include <errno.h>
  #include <sched.h>
  #include <stdio.h>
  #include <stdlib.h>
  #include <sys/wait.h>
  #include <unistd.h>
  #include <asm/unistd.h>
  #include <linux/sched.h>
  int main(void)
  {
    if (unshare(CLONE_NEWPID | CLONE_NEWUSER) < 0)
      err(1, "unshare");
    int pid = syscall(__NR_fork);
    if (pid < 0)
      err(1, "fork");
    fprintf(stderr, "current: %d, parent: %d, fork returned: %d\n",
            getpid(), getppid(), pid);
    int status;
    if (wait(&status) < 0) {
      if (errno == ECHILD)
        _exit(0);
      err(1, "wait");
    }
    return !WIFEXITED(status) || WEXITSTATUS(status) != 0;
  }
  EOF
  $ sh -c ./a.out
  current: 10001, parent: 10000, fork returned: 10002
  current: 1, parent: 0, fork returned: 10001

Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: stable@vger.kernel.org
Signed-off-by: Dmitry V. Levin <ldv@altlinux.org>
---

Although the fix seems to be obvious, I have no means to test it myself,
so any help with the testing is much appreciated.

 arch/sparc/kernel/process_32.c | 2 +-
 arch/sparc/kernel/process_64.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c
index a02363735915..7a89969befa8 100644
--- a/arch/sparc/kernel/process_32.c
+++ b/arch/sparc/kernel/process_32.c
@@ -368,7 +368,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
 #endif
 
 	/* Set the return value for the child. */
-	childregs->u_regs[UREG_I0] = current->pid;
+	childregs->u_regs[UREG_I0] = task_pid_nr_ns(current, task_active_pid_ns(p));
 	childregs->u_regs[UREG_I1] = 1;
 
 	/* Set the return value for the parent. */
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index 6f8c7822fc06..ec97217ab970 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -629,7 +629,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
 		t->utraps[0]++;
 
 	/* Set the return value for the child. */
-	t->kregs->u_regs[UREG_I0] = current->pid;
+	t->kregs->u_regs[UREG_I0] = task_pid_nr_ns(current, task_active_pid_ns(p));
 	t->kregs->u_regs[UREG_I1] = 1;
 
 	/* Set the second return value for the parent. */
-- 
ldv

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH] sparc: make copy_thread honor pid namespaces
@ 2021-02-17  8:00 ` Dmitry V. Levin
  0 siblings, 0 replies; 11+ messages in thread
From: Dmitry V. Levin @ 2021-02-17  8:00 UTC (permalink / raw)
  To: David S. Miller
  Cc: Eric W. Biederman, Alexey Gladkov, Gleb Fotengauer-Malinovskiy,
	Anatoly Pugachev, sparclinux, linux-kernel

On sparc, fork and clone syscalls have an unusual semantics of
returning the pid of the parent process to the child process.

Apparently, the implementation did not honor pid namespaces at all,
so the child used to get the pid of its parent in the init namespace.

This bug was found by strace test suite.

Reproducer:

  $ gcc -Wall -O2 -xc - <<'EOF'
  #define _GNU_SOURCE
  #include <err.h>
  #include <errno.h>
  #include <sched.h>
  #include <stdio.h>
  #include <stdlib.h>
  #include <sys/wait.h>
  #include <unistd.h>
  #include <asm/unistd.h>
  #include <linux/sched.h>
  int main(void)
  {
    if (unshare(CLONE_NEWPID | CLONE_NEWUSER) < 0)
      err(1, "unshare");
    int pid = syscall(__NR_fork);
    if (pid < 0)
      err(1, "fork");
    fprintf(stderr, "current: %d, parent: %d, fork returned: %d\n",
            getpid(), getppid(), pid);
    int status;
    if (wait(&status) < 0) {
      if (errno = ECHILD)
        _exit(0);
      err(1, "wait");
    }
    return !WIFEXITED(status) || WEXITSTATUS(status) != 0;
  }
  EOF
  $ sh -c ./a.out
  current: 10001, parent: 10000, fork returned: 10002
  current: 1, parent: 0, fork returned: 10001

Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: stable@vger.kernel.org
Signed-off-by: Dmitry V. Levin <ldv@altlinux.org>
---

Although the fix seems to be obvious, I have no means to test it myself,
so any help with the testing is much appreciated.

 arch/sparc/kernel/process_32.c | 2 +-
 arch/sparc/kernel/process_64.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c
index a02363735915..7a89969befa8 100644
--- a/arch/sparc/kernel/process_32.c
+++ b/arch/sparc/kernel/process_32.c
@@ -368,7 +368,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
 #endif
 
 	/* Set the return value for the child. */
-	childregs->u_regs[UREG_I0] = current->pid;
+	childregs->u_regs[UREG_I0] = task_pid_nr_ns(current, task_active_pid_ns(p));
 	childregs->u_regs[UREG_I1] = 1;
 
 	/* Set the return value for the parent. */
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index 6f8c7822fc06..ec97217ab970 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -629,7 +629,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
 		t->utraps[0]++;
 
 	/* Set the return value for the child. */
-	t->kregs->u_regs[UREG_I0] = current->pid;
+	t->kregs->u_regs[UREG_I0] = task_pid_nr_ns(current, task_active_pid_ns(p));
 	t->kregs->u_regs[UREG_I1] = 1;
 
 	/* Set the second return value for the parent. */
-- 
ldv

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* RE: [PATCH] sparc: make copy_thread honor pid namespaces
  2021-02-17  8:00 ` Dmitry V. Levin
  (?)
@ 2021-02-18 15:28 ` David Laight
  -1 siblings, 0 replies; 11+ messages in thread
From: David Laight @ 2021-02-18 15:28 UTC (permalink / raw)
  To: 'Dmitry V. Levin', David S. Miller
  Cc: Eric W. Biederman, Alexey Gladkov, Gleb Fotengauer-Malinovskiy,
	Anatoly Pugachev, sparclinux, linux-kernel

From: Dmitry V. Levin
> Sent: 17 February 2021 08:00
> 
> On sparc, fork and clone syscalls have an unusual semantics of
> returning the pid of the parent process to the child process.

Isn't that just broken?
The application expects fork() to return 0 in the child.
libc would have to do horrid things to convert the result.

It could be comparing against the saved 'current pid' in
order to save a system call for the first ppid() call.
But that isn't ever going to work if it is possible to
create a child in a different pid namespace.

FWIW the test program ought to use syscall() to get the pid
and ppid - rather than relying on any optimisations in libc.

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] sparc: make copy_thread honor pid namespaces
  2021-02-17  8:00 ` Dmitry V. Levin
@ 2021-02-18 18:21   ` Eric W. Biederman
  -1 siblings, 0 replies; 11+ messages in thread
From: Eric W. Biederman @ 2021-02-18 18:21 UTC (permalink / raw)
  To: Dmitry V. Levin
  Cc: David S. Miller, Alexey Gladkov, Gleb Fotengauer-Malinovskiy,
	Anatoly Pugachev, sparclinux, linux-kernel

"Dmitry V. Levin" <ldv@altlinux.org> writes:

> On sparc, fork and clone syscalls have an unusual semantics of
> returning the pid of the parent process to the child process.
>
> Apparently, the implementation did not honor pid namespaces at all,
> so the child used to get the pid of its parent in the init namespace.
>
> This bug was found by strace test suite.
>
> Reproducer:
>
>   $ gcc -Wall -O2 -xc - <<'EOF'
>   #define _GNU_SOURCE
>   #include <err.h>
>   #include <errno.h>
>   #include <sched.h>
>   #include <stdio.h>
>   #include <stdlib.h>
>   #include <sys/wait.h>
>   #include <unistd.h>
>   #include <asm/unistd.h>
>   #include <linux/sched.h>
>   int main(void)
>   {
>     if (unshare(CLONE_NEWPID | CLONE_NEWUSER) < 0)
>       err(1, "unshare");
>     int pid = syscall(__NR_fork);
>     if (pid < 0)
>       err(1, "fork");
>     fprintf(stderr, "current: %d, parent: %d, fork returned: %d\n",
>             getpid(), getppid(), pid);
>     int status;
>     if (wait(&status) < 0) {
>       if (errno == ECHILD)
>         _exit(0);
>       err(1, "wait");
>     }
>     return !WIFEXITED(status) || WEXITSTATUS(status) != 0;
>   }
>   EOF
>   $ sh -c ./a.out
>   current: 10001, parent: 10000, fork returned: 10002
>   current: 1, parent: 0, fork returned: 10001

From glibc's sysdeps/unix/sparc/fork.S:
> SYSCALL__ (fork, 0)
> 	/* %o1 is now 0 for the parent and 1 for the child.  Decrement it to
> 	   make it -1 (all bits set) for the parent, and 0 (no bits set)
> 	   for the child.  Then AND it with %o0, so the parent gets
> 	   %o0&-1==pid, and the child gets %o0&0==0.  */
> 	sub %o1, 1, %o1
> 	retl
> 	and %o0, %o1, %o0
> libc_hidden_def (__fork)
> 
> weak_alias (__fork, fork)

This needs to be shared to make the test case make sense.

The change below looks reasonable.  Unfortunately because copy_thread
comes before init_task_pid task_active_pid_ns(p) will not work
correctly.

The code below needs to use current->nsproxy->pid_ns_for_children
instead of task_active_pid_ns(p).

For sparc people.  Do we know of anyone who actually uses the parent pid
returned from fork to the child process?  If not the code can simply
return 0 and we don't have to do this.

Eric

> Cc: Eric W. Biederman <ebiederm@xmission.com>
> Cc: stable@vger.kernel.org
> Signed-off-by: Dmitry V. Levin <ldv@altlinux.org>
> ---
>
> Although the fix seems to be obvious, I have no means to test it myself,
> so any help with the testing is much appreciated.
>
>  arch/sparc/kernel/process_32.c | 2 +-
>  arch/sparc/kernel/process_64.c | 2 +-
>  2 files changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c
> index a02363735915..7a89969befa8 100644
> --- a/arch/sparc/kernel/process_32.c
> +++ b/arch/sparc/kernel/process_32.c
> @@ -368,7 +368,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
>  #endif
>  
>  	/* Set the return value for the child. */
> -	childregs->u_regs[UREG_I0] = current->pid;
> +	childregs->u_regs[UREG_I0] = task_pid_nr_ns(current, task_active_pid_ns(p));
>  	childregs->u_regs[UREG_I1] = 1;
>  
>  	/* Set the return value for the parent. */
> diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
> index 6f8c7822fc06..ec97217ab970 100644
> --- a/arch/sparc/kernel/process_64.c
> +++ b/arch/sparc/kernel/process_64.c
> @@ -629,7 +629,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
>  		t->utraps[0]++;
>  
>  	/* Set the return value for the child. */
> -	t->kregs->u_regs[UREG_I0] = current->pid;
> +	t->kregs->u_regs[UREG_I0] = task_pid_nr_ns(current, task_active_pid_ns(p));
>  	t->kregs->u_regs[UREG_I1] = 1;
>  
>  	/* Set the second return value for the parent. */

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] sparc: make copy_thread honor pid namespaces
@ 2021-02-18 18:21   ` Eric W. Biederman
  0 siblings, 0 replies; 11+ messages in thread
From: Eric W. Biederman @ 2021-02-18 18:21 UTC (permalink / raw)
  To: Dmitry V. Levin
  Cc: David S. Miller, Alexey Gladkov, Gleb Fotengauer-Malinovskiy,
	Anatoly Pugachev, sparclinux, linux-kernel

"Dmitry V. Levin" <ldv@altlinux.org> writes:

> On sparc, fork and clone syscalls have an unusual semantics of
> returning the pid of the parent process to the child process.
>
> Apparently, the implementation did not honor pid namespaces at all,
> so the child used to get the pid of its parent in the init namespace.
>
> This bug was found by strace test suite.
>
> Reproducer:
>
>   $ gcc -Wall -O2 -xc - <<'EOF'
>   #define _GNU_SOURCE
>   #include <err.h>
>   #include <errno.h>
>   #include <sched.h>
>   #include <stdio.h>
>   #include <stdlib.h>
>   #include <sys/wait.h>
>   #include <unistd.h>
>   #include <asm/unistd.h>
>   #include <linux/sched.h>
>   int main(void)
>   {
>     if (unshare(CLONE_NEWPID | CLONE_NEWUSER) < 0)
>       err(1, "unshare");
>     int pid = syscall(__NR_fork);
>     if (pid < 0)
>       err(1, "fork");
>     fprintf(stderr, "current: %d, parent: %d, fork returned: %d\n",
>             getpid(), getppid(), pid);
>     int status;
>     if (wait(&status) < 0) {
>       if (errno = ECHILD)
>         _exit(0);
>       err(1, "wait");
>     }
>     return !WIFEXITED(status) || WEXITSTATUS(status) != 0;
>   }
>   EOF
>   $ sh -c ./a.out
>   current: 10001, parent: 10000, fork returned: 10002
>   current: 1, parent: 0, fork returned: 10001

From glibc's sysdeps/unix/sparc/fork.S:
> SYSCALL__ (fork, 0)
> 	/* %o1 is now 0 for the parent and 1 for the child.  Decrement it to
> 	   make it -1 (all bits set) for the parent, and 0 (no bits set)
> 	   for the child.  Then AND it with %o0, so the parent gets
> 	   %o0&-1=pid, and the child gets %o0&0=0.  */
> 	sub %o1, 1, %o1
> 	retl
> 	and %o0, %o1, %o0
> libc_hidden_def (__fork)
> 
> weak_alias (__fork, fork)

This needs to be shared to make the test case make sense.

The change below looks reasonable.  Unfortunately because copy_thread
comes before init_task_pid task_active_pid_ns(p) will not work
correctly.

The code below needs to use current->nsproxy->pid_ns_for_children
instead of task_active_pid_ns(p).

For sparc people.  Do we know of anyone who actually uses the parent pid
returned from fork to the child process?  If not the code can simply
return 0 and we don't have to do this.

Eric

> Cc: Eric W. Biederman <ebiederm@xmission.com>
> Cc: stable@vger.kernel.org
> Signed-off-by: Dmitry V. Levin <ldv@altlinux.org>
> ---
>
> Although the fix seems to be obvious, I have no means to test it myself,
> so any help with the testing is much appreciated.
>
>  arch/sparc/kernel/process_32.c | 2 +-
>  arch/sparc/kernel/process_64.c | 2 +-
>  2 files changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c
> index a02363735915..7a89969befa8 100644
> --- a/arch/sparc/kernel/process_32.c
> +++ b/arch/sparc/kernel/process_32.c
> @@ -368,7 +368,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
>  #endif
>  
>  	/* Set the return value for the child. */
> -	childregs->u_regs[UREG_I0] = current->pid;
> +	childregs->u_regs[UREG_I0] = task_pid_nr_ns(current, task_active_pid_ns(p));
>  	childregs->u_regs[UREG_I1] = 1;
>  
>  	/* Set the return value for the parent. */
> diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
> index 6f8c7822fc06..ec97217ab970 100644
> --- a/arch/sparc/kernel/process_64.c
> +++ b/arch/sparc/kernel/process_64.c
> @@ -629,7 +629,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
>  		t->utraps[0]++;
>  
>  	/* Set the return value for the child. */
> -	t->kregs->u_regs[UREG_I0] = current->pid;
> +	t->kregs->u_regs[UREG_I0] = task_pid_nr_ns(current, task_active_pid_ns(p));
>  	t->kregs->u_regs[UREG_I1] = 1;
>  
>  	/* Set the second return value for the parent. */

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v2] sparc: make copy_thread honor pid namespaces
  2021-02-18 18:21   ` Eric W. Biederman
@ 2021-02-19 22:50     ` Dmitry V. Levin
  -1 siblings, 0 replies; 11+ messages in thread
From: Dmitry V. Levin @ 2021-02-19 22:50 UTC (permalink / raw)
  To: David S. Miller
  Cc: Eric W. Biederman, Alexey Gladkov, Gleb Fotengauer-Malinovskiy,
	Anatoly Pugachev, sparclinux, linux-kernel

On sparc, fork and clone syscalls have an unusual semantics of
returning the pid of the parent process to the child process.

Apparently, the implementation did not honor pid namespaces at all,
so the child used to get the pid of its parent in the init namespace.

Fortunately, most users of these syscalls are not affected by this bug
because they use another register to distinguish the parent process
from its child, and the pid of the parent process is often discarded.

Reproducer:

  $ gcc -Wall -O2 -xc - <<'EOF'
  #define _GNU_SOURCE
  #include <err.h>
  #include <errno.h>
  #include <sched.h>
  #include <stdio.h>
  #include <stdlib.h>
  #include <sys/wait.h>
  #include <unistd.h>
  #include <asm/unistd.h>
  #include <linux/sched.h>
  static void test_fork(void)
  {
  	int pid = syscall(__NR_fork);
  	if (pid < 0)
  		err(1, "fork");
  	fprintf(stderr, "current: %d, parent: %d, fork returned: %d\n",
  		getpid(), getppid(), pid);
  	int status;
  	if (wait(&status) < 0) {
  		if (errno == ECHILD)
  			_exit(0);
  		err(1, "wait");
  	}
  	if (!WIFEXITED(status) || WEXITSTATUS(status) != 0)
  		errx(1, "wait: %#x", status);
  }
  int main(void)
  {
  	test_fork();
  	if (unshare(CLONE_NEWPID | CLONE_NEWUSER) < 0)
  		err(1, "unshare");
  	test_fork();
  	return 0;
  }
  EOF
  $ sh -c ./a.out
  current: 10001, parent: 10000, fork returned: 10002
  current: 10002, parent: 10001, fork returned: 10001
  current: 10001, parent: 10000, fork returned: 10003
  current: 1, parent: 0, fork returned: 10001

This bug was found by strace test suite.

Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: stable@vger.kernel.org
Signed-off-by: Dmitry V. Levin <ldv@altlinux.org>
---

v2: Replaced task_active_pid_ns(p) with current->nsproxy->pid_ns_for_children
    as suggested by Eric.

 arch/sparc/kernel/process_32.c | 3 ++-
 arch/sparc/kernel/process_64.c | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c
index a02363735915..3be653e40204 100644
--- a/arch/sparc/kernel/process_32.c
+++ b/arch/sparc/kernel/process_32.c
@@ -368,7 +368,8 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
 #endif
 
 	/* Set the return value for the child. */
-	childregs->u_regs[UREG_I0] = current->pid;
+	childregs->u_regs[UREG_I0] =
+		task_pid_nr_ns(current, current->nsproxy->pid_ns_for_children);
 	childregs->u_regs[UREG_I1] = 1;
 
 	/* Set the return value for the parent. */
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index 6f8c7822fc06..f53ef5cff46a 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -629,7 +629,8 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
 		t->utraps[0]++;
 
 	/* Set the return value for the child. */
-	t->kregs->u_regs[UREG_I0] = current->pid;
+	t->kregs->u_regs[UREG_I0] =
+		task_pid_nr_ns(current, current->nsproxy->pid_ns_for_children);
 	t->kregs->u_regs[UREG_I1] = 1;
 
 	/* Set the second return value for the parent. */
-- 
ldv

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v2] sparc: make copy_thread honor pid namespaces
@ 2021-02-19 22:50     ` Dmitry V. Levin
  0 siblings, 0 replies; 11+ messages in thread
From: Dmitry V. Levin @ 2021-02-19 22:50 UTC (permalink / raw)
  To: David S. Miller
  Cc: Eric W. Biederman, Alexey Gladkov, Gleb Fotengauer-Malinovskiy,
	Anatoly Pugachev, sparclinux, linux-kernel

On sparc, fork and clone syscalls have an unusual semantics of
returning the pid of the parent process to the child process.

Apparently, the implementation did not honor pid namespaces at all,
so the child used to get the pid of its parent in the init namespace.

Fortunately, most users of these syscalls are not affected by this bug
because they use another register to distinguish the parent process
from its child, and the pid of the parent process is often discarded.

Reproducer:

  $ gcc -Wall -O2 -xc - <<'EOF'
  #define _GNU_SOURCE
  #include <err.h>
  #include <errno.h>
  #include <sched.h>
  #include <stdio.h>
  #include <stdlib.h>
  #include <sys/wait.h>
  #include <unistd.h>
  #include <asm/unistd.h>
  #include <linux/sched.h>
  static void test_fork(void)
  {
  	int pid = syscall(__NR_fork);
  	if (pid < 0)
  		err(1, "fork");
  	fprintf(stderr, "current: %d, parent: %d, fork returned: %d\n",
  		getpid(), getppid(), pid);
  	int status;
  	if (wait(&status) < 0) {
  		if (errno = ECHILD)
  			_exit(0);
  		err(1, "wait");
  	}
  	if (!WIFEXITED(status) || WEXITSTATUS(status) != 0)
  		errx(1, "wait: %#x", status);
  }
  int main(void)
  {
  	test_fork();
  	if (unshare(CLONE_NEWPID | CLONE_NEWUSER) < 0)
  		err(1, "unshare");
  	test_fork();
  	return 0;
  }
  EOF
  $ sh -c ./a.out
  current: 10001, parent: 10000, fork returned: 10002
  current: 10002, parent: 10001, fork returned: 10001
  current: 10001, parent: 10000, fork returned: 10003
  current: 1, parent: 0, fork returned: 10001

This bug was found by strace test suite.

Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: stable@vger.kernel.org
Signed-off-by: Dmitry V. Levin <ldv@altlinux.org>
---

v2: Replaced task_active_pid_ns(p) with current->nsproxy->pid_ns_for_children
    as suggested by Eric.

 arch/sparc/kernel/process_32.c | 3 ++-
 arch/sparc/kernel/process_64.c | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c
index a02363735915..3be653e40204 100644
--- a/arch/sparc/kernel/process_32.c
+++ b/arch/sparc/kernel/process_32.c
@@ -368,7 +368,8 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
 #endif
 
 	/* Set the return value for the child. */
-	childregs->u_regs[UREG_I0] = current->pid;
+	childregs->u_regs[UREG_I0] +		task_pid_nr_ns(current, current->nsproxy->pid_ns_for_children);
 	childregs->u_regs[UREG_I1] = 1;
 
 	/* Set the return value for the parent. */
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index 6f8c7822fc06..f53ef5cff46a 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -629,7 +629,8 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
 		t->utraps[0]++;
 
 	/* Set the return value for the child. */
-	t->kregs->u_regs[UREG_I0] = current->pid;
+	t->kregs->u_regs[UREG_I0] +		task_pid_nr_ns(current, current->nsproxy->pid_ns_for_children);
 	t->kregs->u_regs[UREG_I1] = 1;
 
 	/* Set the second return value for the parent. */
-- 
ldv

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] sparc: make copy_thread honor pid namespaces
  2021-02-19 22:50     ` Dmitry V. Levin
@ 2021-02-22 21:30       ` Eric W. Biederman
  -1 siblings, 0 replies; 11+ messages in thread
From: Eric W. Biederman @ 2021-02-22 21:30 UTC (permalink / raw)
  To: Dmitry V. Levin
  Cc: David S. Miller, Alexey Gladkov, Gleb Fotengauer-Malinovskiy,
	Anatoly Pugachev, sparclinux, linux-kernel

"Dmitry V. Levin" <ldv@altlinux.org> writes:

> On sparc, fork and clone syscalls have an unusual semantics of
> returning the pid of the parent process to the child process.
>
> Apparently, the implementation did not honor pid namespaces at all,
> so the child used to get the pid of its parent in the init namespace.
>
> Fortunately, most users of these syscalls are not affected by this bug
> because they use another register to distinguish the parent process
> from its child, and the pid of the parent process is often discarded.
>
> Reproducer:
>
>   $ gcc -Wall -O2 -xc - <<'EOF'
>   #define _GNU_SOURCE
>   #include <err.h>
>   #include <errno.h>
>   #include <sched.h>
>   #include <stdio.h>
>   #include <stdlib.h>
>   #include <sys/wait.h>
>   #include <unistd.h>
>   #include <asm/unistd.h>
>   #include <linux/sched.h>
>   static void test_fork(void)
>   {
>   	int pid = syscall(__NR_fork);
>   	if (pid < 0)
>   		err(1, "fork");
>   	fprintf(stderr, "current: %d, parent: %d, fork returned: %d\n",
>   		getpid(), getppid(), pid);
>   	int status;
>   	if (wait(&status) < 0) {
>   		if (errno == ECHILD)
>   			_exit(0);
>   		err(1, "wait");
>   	}
>   	if (!WIFEXITED(status) || WEXITSTATUS(status) != 0)
>   		errx(1, "wait: %#x", status);
>   }
>   int main(void)
>   {
>   	test_fork();
>   	if (unshare(CLONE_NEWPID | CLONE_NEWUSER) < 0)
>   		err(1, "unshare");
>   	test_fork();
>   	return 0;
>   }
>   EOF
>   $ sh -c ./a.out
>   current: 10001, parent: 10000, fork returned: 10002
>   current: 10002, parent: 10001, fork returned: 10001
>   current: 10001, parent: 10000, fork returned: 10003
>   current: 1, parent: 0, fork returned: 10001
>
> This bug was found by strace test suite.
>
> Cc: Eric W. Biederman <ebiederm@xmission.com>
> Cc: stable@vger.kernel.org
> Signed-off-by: Dmitry V. Levin <ldv@altlinux.org>
> ---
>
> v2: Replaced task_active_pid_ns(p) with current->nsproxy->pid_ns_for_children
>     as suggested by Eric.

I can't test this either.  But from just reading through the code earlier.
Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>

>  arch/sparc/kernel/process_32.c | 3 ++-
>  arch/sparc/kernel/process_64.c | 3 ++-
>  2 files changed, 4 insertions(+), 2 deletions(-)
>
> diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c
> index a02363735915..3be653e40204 100644
> --- a/arch/sparc/kernel/process_32.c
> +++ b/arch/sparc/kernel/process_32.c
> @@ -368,7 +368,8 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
>  #endif
>  
>  	/* Set the return value for the child. */
> -	childregs->u_regs[UREG_I0] = current->pid;
> +	childregs->u_regs[UREG_I0] =
> +		task_pid_nr_ns(current, current->nsproxy->pid_ns_for_children);
>  	childregs->u_regs[UREG_I1] = 1;
>  
>  	/* Set the return value for the parent. */
> diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
> index 6f8c7822fc06..f53ef5cff46a 100644
> --- a/arch/sparc/kernel/process_64.c
> +++ b/arch/sparc/kernel/process_64.c
> @@ -629,7 +629,8 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
>  		t->utraps[0]++;
>  
>  	/* Set the return value for the child. */
> -	t->kregs->u_regs[UREG_I0] = current->pid;
> +	t->kregs->u_regs[UREG_I0] =
> +		task_pid_nr_ns(current, current->nsproxy->pid_ns_for_children);
>  	t->kregs->u_regs[UREG_I1] = 1;
>  
>  	/* Set the second return value for the parent. */

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] sparc: make copy_thread honor pid namespaces
@ 2021-02-22 21:30       ` Eric W. Biederman
  0 siblings, 0 replies; 11+ messages in thread
From: Eric W. Biederman @ 2021-02-22 21:30 UTC (permalink / raw)
  To: Dmitry V. Levin
  Cc: David S. Miller, Alexey Gladkov, Gleb Fotengauer-Malinovskiy,
	Anatoly Pugachev, sparclinux, linux-kernel

"Dmitry V. Levin" <ldv@altlinux.org> writes:

> On sparc, fork and clone syscalls have an unusual semantics of
> returning the pid of the parent process to the child process.
>
> Apparently, the implementation did not honor pid namespaces at all,
> so the child used to get the pid of its parent in the init namespace.
>
> Fortunately, most users of these syscalls are not affected by this bug
> because they use another register to distinguish the parent process
> from its child, and the pid of the parent process is often discarded.
>
> Reproducer:
>
>   $ gcc -Wall -O2 -xc - <<'EOF'
>   #define _GNU_SOURCE
>   #include <err.h>
>   #include <errno.h>
>   #include <sched.h>
>   #include <stdio.h>
>   #include <stdlib.h>
>   #include <sys/wait.h>
>   #include <unistd.h>
>   #include <asm/unistd.h>
>   #include <linux/sched.h>
>   static void test_fork(void)
>   {
>   	int pid = syscall(__NR_fork);
>   	if (pid < 0)
>   		err(1, "fork");
>   	fprintf(stderr, "current: %d, parent: %d, fork returned: %d\n",
>   		getpid(), getppid(), pid);
>   	int status;
>   	if (wait(&status) < 0) {
>   		if (errno = ECHILD)
>   			_exit(0);
>   		err(1, "wait");
>   	}
>   	if (!WIFEXITED(status) || WEXITSTATUS(status) != 0)
>   		errx(1, "wait: %#x", status);
>   }
>   int main(void)
>   {
>   	test_fork();
>   	if (unshare(CLONE_NEWPID | CLONE_NEWUSER) < 0)
>   		err(1, "unshare");
>   	test_fork();
>   	return 0;
>   }
>   EOF
>   $ sh -c ./a.out
>   current: 10001, parent: 10000, fork returned: 10002
>   current: 10002, parent: 10001, fork returned: 10001
>   current: 10001, parent: 10000, fork returned: 10003
>   current: 1, parent: 0, fork returned: 10001
>
> This bug was found by strace test suite.
>
> Cc: Eric W. Biederman <ebiederm@xmission.com>
> Cc: stable@vger.kernel.org
> Signed-off-by: Dmitry V. Levin <ldv@altlinux.org>
> ---
>
> v2: Replaced task_active_pid_ns(p) with current->nsproxy->pid_ns_for_children
>     as suggested by Eric.

I can't test this either.  But from just reading through the code earlier.
Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>

>  arch/sparc/kernel/process_32.c | 3 ++-
>  arch/sparc/kernel/process_64.c | 3 ++-
>  2 files changed, 4 insertions(+), 2 deletions(-)
>
> diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c
> index a02363735915..3be653e40204 100644
> --- a/arch/sparc/kernel/process_32.c
> +++ b/arch/sparc/kernel/process_32.c
> @@ -368,7 +368,8 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
>  #endif
>  
>  	/* Set the return value for the child. */
> -	childregs->u_regs[UREG_I0] = current->pid;
> +	childregs->u_regs[UREG_I0] > +		task_pid_nr_ns(current, current->nsproxy->pid_ns_for_children);
>  	childregs->u_regs[UREG_I1] = 1;
>  
>  	/* Set the return value for the parent. */
> diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
> index 6f8c7822fc06..f53ef5cff46a 100644
> --- a/arch/sparc/kernel/process_64.c
> +++ b/arch/sparc/kernel/process_64.c
> @@ -629,7 +629,8 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
>  		t->utraps[0]++;
>  
>  	/* Set the return value for the child. */
> -	t->kregs->u_regs[UREG_I0] = current->pid;
> +	t->kregs->u_regs[UREG_I0] > +		task_pid_nr_ns(current, current->nsproxy->pid_ns_for_children);
>  	t->kregs->u_regs[UREG_I1] = 1;
>  
>  	/* Set the second return value for the parent. */

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] sparc: make copy_thread honor pid namespaces
  2021-02-19 22:50     ` Dmitry V. Levin
@ 2021-02-23 11:14       ` Anatoly Pugachev
  -1 siblings, 0 replies; 11+ messages in thread
From: Anatoly Pugachev @ 2021-02-23 11:14 UTC (permalink / raw)
  To: Dmitry V. Levin
  Cc: David S. Miller, Eric W. Biederman, Alexey Gladkov,
	Gleb Fotengauer-Malinovskiy, Sparc kernel list,
	Linux Kernel list

On Sat, Feb 20, 2021 at 1:51 AM Dmitry V. Levin <ldv@altlinux.org> wrote:
>
> On sparc, fork and clone syscalls have an unusual semantics of
> returning the pid of the parent process to the child process.
>
> Apparently, the implementation did not honor pid namespaces at all,
> so the child used to get the pid of its parent in the init namespace.
>
> Fortunately, most users of these syscalls are not affected by this bug
> because they use another register to distinguish the parent process
> from its child, and the pid of the parent process is often discarded.
>
> Reproducer:

tested on my sparc64 ldom:

1. non-patched kernel (kernel 5.11.0-03615-g55f62bc87347), reproducer/test run:

$ gcc -Wall -O2 sparc-fork-namespace2.c
$ ./a.out
current: 72544, parent: 69045, fork returned: 72545
current: 72545, parent: 72544, fork returned: 72544
current: 72544, parent: 69045, fork returned: 72546
current: 1, parent: 0, fork returned: 72544

2. kernel patch applied, test run:

$ uname -a
Linux ttip 5.11.0-09279-g7c70f3a7488d-dirty #185 SMP Tue Feb 23
00:50:11 MSK 2021 sparc64 GNU/Linux

$ ./a.out
current: 939, parent: 918, fork returned: 940
current: 940, parent: 939, fork returned: 939
current: 939, parent: 918, fork returned: 941
a.out: wait: 0x9

and console/kernel logs:

[   72.204212] Kernel unaligned access at TPC[4060c8] ret_from_fork+0x1c/0x2c
[   72.204254] Unsupported unaligned load/store trap for kernel at
<00000000004060c8>.
[   72.204267]               \|/ ____ \|/
[   72.204267]               "@'/ .. \`@"
[   72.204267]               /_| \__/ |_\
[   72.204267]                  \__U_/
[   72.204284] a.out(941): Kernel does fpu/atomic unaligned load/store. [#3]
[   72.204298] CPU: 6 PID: 941 Comm: a.out Tainted: G      D     E
5.11.0-09279-g7c70f3a7488d-dirty #185
[   72.204315] TSTATE: 0000004411001600 TPC: 00000000004060c8 TNPC:
00000000004060cc Y: 000000cf    Tainted: G      D     E
[   72.204330] TPC: <ret_from_fork+0x1c/0x2c>
[   72.204340] g0: 0000000000000000 g1: ffffffffffffffff g2:
0000000000000006 g3: 0000000000000000
[   72.204349] g4: fff8000041482700 g5: fff80021a9890000 g6:
fff8000041038000 g7: 0308000108000004
[   72.204359] o0: 0000000000000000 o1: 000000001fffffff o2:
fff8000041482780 o3: 0000000000000000
[   72.204369] o4: 0000000000000001 o5: 0000000000000000 sp:
fff800004103b6a1 ret_pc: 00000000004060b0
[   72.204378] RPC: <ret_from_fork+0x4/0x2c>
[   72.204387] l0: 0308000108000000 l1: 0000000000000002 l2:
0000000000450cf4 l3: 0000000000000000
[   72.204396] l4: 0000000000000264 l5: 00000100001eb9f0 l6:
fff8000045018000 l7: 00000000006c7840
[   72.204406] i0: 0000000000efcd60 i1: 0000000000450df4 i2:
00000000000003dc i3: 00000000000003dc
[   72.204415] i4: 0000000000eebc78 i5: 0000000000000000 i6:
000007feffa56811 i7: 0000010000000ad8
[   72.204424] I7: <0x10000000ad8>
[   72.204432] Call Trace:
[   72.204439] Caller[0000010000000ad8]: 0x10000000ad8
[   72.204446] Instruction DUMP:
[   72.204449]  0aca0072
[   72.204456]  e059a008
[   72.204461]  e25ba8c7
[   72.204467] <9fc44000>
[   72.204472]  d05ba8cf
[   72.204477]  1068006d
[   72.204483]  90102000
[   72.204488]  0f0011a9
[   72.204493]  10680004
[   72.204499]


So something is actually wrong with the patch.

Thanks.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] sparc: make copy_thread honor pid namespaces
@ 2021-02-23 11:14       ` Anatoly Pugachev
  0 siblings, 0 replies; 11+ messages in thread
From: Anatoly Pugachev @ 2021-02-23 11:14 UTC (permalink / raw)
  To: Dmitry V. Levin
  Cc: David S. Miller, Eric W. Biederman, Alexey Gladkov,
	Gleb Fotengauer-Malinovskiy, Sparc kernel list,
	Linux Kernel list

On Sat, Feb 20, 2021 at 1:51 AM Dmitry V. Levin <ldv@altlinux.org> wrote:
>
> On sparc, fork and clone syscalls have an unusual semantics of
> returning the pid of the parent process to the child process.
>
> Apparently, the implementation did not honor pid namespaces at all,
> so the child used to get the pid of its parent in the init namespace.
>
> Fortunately, most users of these syscalls are not affected by this bug
> because they use another register to distinguish the parent process
> from its child, and the pid of the parent process is often discarded.
>
> Reproducer:

tested on my sparc64 ldom:

1. non-patched kernel (kernel 5.11.0-03615-g55f62bc87347), reproducer/test run:

$ gcc -Wall -O2 sparc-fork-namespace2.c
$ ./a.out
current: 72544, parent: 69045, fork returned: 72545
current: 72545, parent: 72544, fork returned: 72544
current: 72544, parent: 69045, fork returned: 72546
current: 1, parent: 0, fork returned: 72544

2. kernel patch applied, test run:

$ uname -a
Linux ttip 5.11.0-09279-g7c70f3a7488d-dirty #185 SMP Tue Feb 23
00:50:11 MSK 2021 sparc64 GNU/Linux

$ ./a.out
current: 939, parent: 918, fork returned: 940
current: 940, parent: 939, fork returned: 939
current: 939, parent: 918, fork returned: 941
a.out: wait: 0x9

and console/kernel logs:

[   72.204212] Kernel unaligned access at TPC[4060c8] ret_from_fork+0x1c/0x2c
[   72.204254] Unsupported unaligned load/store trap for kernel at
<00000000004060c8>.
[   72.204267]               \|/ ____ \|/
[   72.204267]               "@'/ .. \`@"
[   72.204267]               /_| \__/ |_\
[   72.204267]                  \__U_/
[   72.204284] a.out(941): Kernel does fpu/atomic unaligned load/store. [#3]
[   72.204298] CPU: 6 PID: 941 Comm: a.out Tainted: G      D     E
5.11.0-09279-g7c70f3a7488d-dirty #185
[   72.204315] TSTATE: 0000004411001600 TPC: 00000000004060c8 TNPC:
00000000004060cc Y: 000000cf    Tainted: G      D     E
[   72.204330] TPC: <ret_from_fork+0x1c/0x2c>
[   72.204340] g0: 0000000000000000 g1: ffffffffffffffff g2:
0000000000000006 g3: 0000000000000000
[   72.204349] g4: fff8000041482700 g5: fff80021a9890000 g6:
fff8000041038000 g7: 0308000108000004
[   72.204359] o0: 0000000000000000 o1: 000000001fffffff o2:
fff8000041482780 o3: 0000000000000000
[   72.204369] o4: 0000000000000001 o5: 0000000000000000 sp:
fff800004103b6a1 ret_pc: 00000000004060b0
[   72.204378] RPC: <ret_from_fork+0x4/0x2c>
[   72.204387] l0: 0308000108000000 l1: 0000000000000002 l2:
0000000000450cf4 l3: 0000000000000000
[   72.204396] l4: 0000000000000264 l5: 00000100001eb9f0 l6:
fff8000045018000 l7: 00000000006c7840
[   72.204406] i0: 0000000000efcd60 i1: 0000000000450df4 i2:
00000000000003dc i3: 00000000000003dc
[   72.204415] i4: 0000000000eebc78 i5: 0000000000000000 i6:
000007feffa56811 i7: 0000010000000ad8
[   72.204424] I7: <0x10000000ad8>
[   72.204432] Call Trace:
[   72.204439] Caller[0000010000000ad8]: 0x10000000ad8
[   72.204446] Instruction DUMP:
[   72.204449]  0aca0072
[   72.204456]  e059a008
[   72.204461]  e25ba8c7
[   72.204467] <9fc44000>
[   72.204472]  d05ba8cf
[   72.204477]  1068006d
[   72.204483]  90102000
[   72.204488]  0f0011a9
[   72.204493]  10680004
[   72.204499]


So something is actually wrong with the patch.

Thanks.

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2021-02-23 11:16 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-02-17  8:00 [PATCH] sparc: make copy_thread honor pid namespaces Dmitry V. Levin
2021-02-17  8:00 ` Dmitry V. Levin
2021-02-18 15:28 ` David Laight
2021-02-18 18:21 ` Eric W. Biederman
2021-02-18 18:21   ` Eric W. Biederman
2021-02-19 22:50   ` [PATCH v2] " Dmitry V. Levin
2021-02-19 22:50     ` Dmitry V. Levin
2021-02-22 21:30     ` Eric W. Biederman
2021-02-22 21:30       ` Eric W. Biederman
2021-02-23 11:14     ` Anatoly Pugachev
2021-02-23 11:14       ` Anatoly Pugachev

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.