All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/1][V4] Handle reboot in a child pid namespace
@ 2011-12-12  0:17 ` Daniel Lezcano
  0 siblings, 0 replies; 14+ messages in thread
From: Daniel Lezcano @ 2011-12-12  0:17 UTC (permalink / raw)
  To: akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b
  Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	oleg-H+wXaHxf7aLQT0dZR+AlfA, linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	mtk.manpages-Re5JQEeQqe8AvxtiuMwx3w



ChangeLog:
==========

 * V4
   - store the signal number the child pid namespace init should
     exit from. It is simpler, cleaner, and does not add more encoding
     bits to the exit code of the process.
 * V3
   - removed lock and serialization of pid_ns_reboot
 * V2
   - added a lock for the pid namespace to prevent racy call
     to the 'reboot' syscall
   - Moved 'reboot' command assigned in zap_pid_ns_processes
     instead of wait_task_zombie
   - added tasklist lock around force_sig
   - added do_exit in pid_ns_reboot
   - used task_active_pid_ns instead of declaring a new variable in sys_reboot
   - moved code up before POWER_OFF changed to HALT in sys_reboot


Test case:
==========

#include <alloca.h>
#include <stdio.h>
#include <sched.h>
#include <unistd.h>
#include <signal.h>
#include <sys/reboot.h>
#include <sys/types.h>
#include <sys/wait.h>

#include <linux/reboot.h>

static int do_reboot(void *arg)
{
        int *cmd = arg;

        if (reboot(*cmd))
                printf("failed to reboot(%d): %m\n", *cmd);
}

int test_reboot(int cmd, int sig)
{
        long stack_size = 4096;
        void *stack = alloca(stack_size) + stack_size;
        int status;
        pid_t ret;

        ret = clone(do_reboot, stack, CLONE_NEWPID | SIGCHLD, &cmd);
        if (ret < 0) {
                printf("failed to clone: %m\n");
                return -1;
        }

        if (wait(&status) < 0) {
                printf("unexpected wait error: %m\n");
                return -1;
        }

        if (!WIFSIGNALED(status)) {
                printf("child process exited but was not signaled\n");
                return -1;
        }

        if (WTERMSIG(status) != sig) {
                printf("signal termination is not the one expected\n");
                return -1;
        }

        return 0;
}

int main(int argc, char *argv[])
{
        int status;

        status = test_reboot(LINUX_REBOOT_CMD_RESTART, SIGHUP);
        if (status < 0)
                return 1;
        printf("reboot(LINUX_REBOOT_CMD_RESTART) succeed\n");

        status = test_reboot(LINUX_REBOOT_CMD_RESTART2, SIGHUP);
        if (status < 0)
                return 1;
        printf("reboot(LINUX_REBOOT_CMD_RESTART2) succeed\n");

        status = test_reboot(LINUX_REBOOT_CMD_HALT, SIGINT);
        if (status < 0)
                return 1;
        printf("reboot(LINUX_REBOOT_CMD_HALT) succeed\n");

        status = test_reboot(LINUX_REBOOT_CMD_POWER_OFF, SIGINT);
        if (status < 0)
                return 1;
        printf("reboot(LINUX_REBOOT_CMD_POWERR_OFF) succeed\n");

        status = test_reboot(LINUX_REBOOT_CMD_CAD_ON, -1);
        if (status >= 0) {
                printf("reboot(LINUX_REBOOT_CMD_CAD_ON) should have failed\n");
                return 1;
        }
        printf("reboot(LINUX_REBOOT_CMD_CAD_ON) has failed as expected\n");

        return 0;
}

Daniel Lezcano (1):
  Add reboot_pid_ns to handle the reboot syscall

 include/linux/pid_namespace.h |    8 +++++++-
 kernel/pid_namespace.c        |   33 +++++++++++++++++++++++++++++++++
 kernel/sys.c                  |    3 +++
 3 files changed, 43 insertions(+), 1 deletions(-)

-- 
1.7.5.4

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 0/1][V4] Handle reboot in a child pid namespace
@ 2011-12-12  0:17 ` Daniel Lezcano
  0 siblings, 0 replies; 14+ messages in thread
From: Daniel Lezcano @ 2011-12-12  0:17 UTC (permalink / raw)
  To: akpm; +Cc: serge.hallyn, oleg, containers, gkurz, linux-kernel, mtk.manpages



ChangeLog:
==========

 * V4
   - store the signal number the child pid namespace init should
     exit from. It is simpler, cleaner, and does not add more encoding
     bits to the exit code of the process.
 * V3
   - removed lock and serialization of pid_ns_reboot
 * V2
   - added a lock for the pid namespace to prevent racy call
     to the 'reboot' syscall
   - Moved 'reboot' command assigned in zap_pid_ns_processes
     instead of wait_task_zombie
   - added tasklist lock around force_sig
   - added do_exit in pid_ns_reboot
   - used task_active_pid_ns instead of declaring a new variable in sys_reboot
   - moved code up before POWER_OFF changed to HALT in sys_reboot


Test case:
==========

#include <alloca.h>
#include <stdio.h>
#include <sched.h>
#include <unistd.h>
#include <signal.h>
#include <sys/reboot.h>
#include <sys/types.h>
#include <sys/wait.h>

#include <linux/reboot.h>

static int do_reboot(void *arg)
{
        int *cmd = arg;

        if (reboot(*cmd))
                printf("failed to reboot(%d): %m\n", *cmd);
}

int test_reboot(int cmd, int sig)
{
        long stack_size = 4096;
        void *stack = alloca(stack_size) + stack_size;
        int status;
        pid_t ret;

        ret = clone(do_reboot, stack, CLONE_NEWPID | SIGCHLD, &cmd);
        if (ret < 0) {
                printf("failed to clone: %m\n");
                return -1;
        }

        if (wait(&status) < 0) {
                printf("unexpected wait error: %m\n");
                return -1;
        }

        if (!WIFSIGNALED(status)) {
                printf("child process exited but was not signaled\n");
                return -1;
        }

        if (WTERMSIG(status) != sig) {
                printf("signal termination is not the one expected\n");
                return -1;
        }

        return 0;
}

int main(int argc, char *argv[])
{
        int status;

        status = test_reboot(LINUX_REBOOT_CMD_RESTART, SIGHUP);
        if (status < 0)
                return 1;
        printf("reboot(LINUX_REBOOT_CMD_RESTART) succeed\n");

        status = test_reboot(LINUX_REBOOT_CMD_RESTART2, SIGHUP);
        if (status < 0)
                return 1;
        printf("reboot(LINUX_REBOOT_CMD_RESTART2) succeed\n");

        status = test_reboot(LINUX_REBOOT_CMD_HALT, SIGINT);
        if (status < 0)
                return 1;
        printf("reboot(LINUX_REBOOT_CMD_HALT) succeed\n");

        status = test_reboot(LINUX_REBOOT_CMD_POWER_OFF, SIGINT);
        if (status < 0)
                return 1;
        printf("reboot(LINUX_REBOOT_CMD_POWERR_OFF) succeed\n");

        status = test_reboot(LINUX_REBOOT_CMD_CAD_ON, -1);
        if (status >= 0) {
                printf("reboot(LINUX_REBOOT_CMD_CAD_ON) should have failed\n");
                return 1;
        }
        printf("reboot(LINUX_REBOOT_CMD_CAD_ON) has failed as expected\n");

        return 0;
}

Daniel Lezcano (1):
  Add reboot_pid_ns to handle the reboot syscall

 include/linux/pid_namespace.h |    8 +++++++-
 kernel/pid_namespace.c        |   33 +++++++++++++++++++++++++++++++++
 kernel/sys.c                  |    3 +++
 3 files changed, 43 insertions(+), 1 deletions(-)

-- 
1.7.5.4


^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH][V4] Add reboot_pid_ns to handle the reboot syscall
  2011-12-12  0:17 ` Daniel Lezcano
@ 2011-12-12  0:17     ` Daniel Lezcano
  -1 siblings, 0 replies; 14+ messages in thread
From: Daniel Lezcano @ 2011-12-12  0:17 UTC (permalink / raw)
  To: akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b
  Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	oleg-H+wXaHxf7aLQT0dZR+AlfA, linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	mtk.manpages-Re5JQEeQqe8AvxtiuMwx3w

In the case of a child pid namespace, rebooting the system does not
really makes sense. When the pid namespace is used in conjunction
with the other namespaces in order to create a linux container, the
reboot syscall leads to some problems.

A container can reboot the host. That can be fixed by dropping
the sys_reboot capability but we are unable to correctly poweroff/
halt/reboot a container and the container stays stuck at the shutdown
time with the container's init process waiting indefinitively.

After several attempts, no solution from userspace was found to reliabily
handle the shutdown from a container.

This patch propose to make the init process of the child pid namespace to
exit with a signal status set to : SIGINT if the child pid namespace called
"halt/poweroff" and SIGHUP if the child pid namespace called "reboot".
When the reboot syscall is called and we are not in the initial
pid namespace, we kill the pid namespace for "HALT", "POWEROFF", "RESTART",
and "RESTART2". Otherwise we return EINVAL.

Returning EINVAL is also an easy way to check if this feature is supported
by the kernel when invoking another 'reboot' option like CAD.

By this way the parent process of the child pid namespace knows if
it rebooted or not and can take the right decision.

Signed-off-by: Daniel Lezcano <daniel.lezcano-GANU6spQydw@public.gmane.org>
Acked-by: Serge Hallyn <serge.hallyn-Z7WLFzj8eWMS+FvcfC7Uqw@public.gmane.org>
---
 include/linux/pid_namespace.h |    8 +++++++-
 kernel/pid_namespace.c        |   30 ++++++++++++++++++++++++++++++
 kernel/sys.c                  |    3 +++
 3 files changed, 40 insertions(+), 1 deletions(-)

diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index e7cf666..3279596 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -32,6 +32,7 @@ struct pid_namespace {
 #endif
 	gid_t pid_gid;
 	int hide_pid;
+	int reboot;
 };
 
 extern struct pid_namespace init_pid_ns;
@@ -47,6 +48,7 @@ static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns)
 extern struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *ns);
 extern void free_pid_ns(struct kref *kref);
 extern void zap_pid_ns_processes(struct pid_namespace *pid_ns);
+extern int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd);
 
 static inline void put_pid_ns(struct pid_namespace *ns)
 {
@@ -74,11 +76,15 @@ static inline void put_pid_ns(struct pid_namespace *ns)
 {
 }
 
-
 static inline void zap_pid_ns_processes(struct pid_namespace *ns)
 {
 	BUG();
 }
+
+static inline int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd)
+{
+	BUG();
+}
 #endif /* CONFIG_PID_NS */
 
 extern struct pid_namespace *task_active_pid_ns(struct task_struct *tsk);
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index a896839..1e93a5a 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -15,6 +15,7 @@
 #include <linux/acct.h>
 #include <linux/slab.h>
 #include <linux/proc_fs.h>
+#include <linux/reboot.h>
 
 #define BITS_PER_PAGE		(PAGE_SIZE*8)
 
@@ -187,6 +188,9 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
 		rc = sys_wait4(-1, NULL, __WALL, NULL);
 	} while (rc != -ECHILD);
 
+	if (pid_ns->reboot)
+		current->signal->group_exit_code = pid_ns->reboot;
+
 	acct_exit_ns(pid_ns);
 	return;
 }
@@ -221,6 +225,32 @@ static struct ctl_table pid_ns_ctl_table[] = {
 
 static struct ctl_path kern_path[] = { { .procname = "kernel", }, { } };
 
+int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd)
+{
+	switch(cmd) {
+	case LINUX_REBOOT_CMD_RESTART2:
+	case LINUX_REBOOT_CMD_RESTART:
+		pid_ns->reboot = SIGHUP;
+		break;
+
+	case LINUX_REBOOT_CMD_POWER_OFF:
+	case LINUX_REBOOT_CMD_HALT:
+		pid_ns->reboot = SIGINT;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	read_lock(&tasklist_lock);
+	force_sig(SIGKILL, pid_ns->child_reaper);
+	read_unlock(&tasklist_lock);
+
+	do_exit(0);
+
+	/* Not reached */
+	return 0;
+}
+
 static __init int pid_namespaces_init(void)
 {
 	pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC);
diff --git a/kernel/sys.c b/kernel/sys.c
index ddf8155..31acf63 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -444,6 +444,9 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
 	                magic2 != LINUX_REBOOT_MAGIC2C))
 		return -EINVAL;
 
+	if (task_active_pid_ns(current) != &init_pid_ns)
+		return reboot_pid_ns(task_active_pid_ns(current), cmd);
+
 	/* Instead of trying to make the power_off code look like
 	 * halt when pm_power_off is not set do it the easy way.
 	 */
-- 
1.7.5.4

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH][V4] Add reboot_pid_ns to handle the reboot syscall
@ 2011-12-12  0:17     ` Daniel Lezcano
  0 siblings, 0 replies; 14+ messages in thread
From: Daniel Lezcano @ 2011-12-12  0:17 UTC (permalink / raw)
  To: akpm; +Cc: serge.hallyn, oleg, containers, gkurz, linux-kernel, mtk.manpages

In the case of a child pid namespace, rebooting the system does not
really makes sense. When the pid namespace is used in conjunction
with the other namespaces in order to create a linux container, the
reboot syscall leads to some problems.

A container can reboot the host. That can be fixed by dropping
the sys_reboot capability but we are unable to correctly poweroff/
halt/reboot a container and the container stays stuck at the shutdown
time with the container's init process waiting indefinitively.

After several attempts, no solution from userspace was found to reliabily
handle the shutdown from a container.

This patch propose to make the init process of the child pid namespace to
exit with a signal status set to : SIGINT if the child pid namespace called
"halt/poweroff" and SIGHUP if the child pid namespace called "reboot".
When the reboot syscall is called and we are not in the initial
pid namespace, we kill the pid namespace for "HALT", "POWEROFF", "RESTART",
and "RESTART2". Otherwise we return EINVAL.

Returning EINVAL is also an easy way to check if this feature is supported
by the kernel when invoking another 'reboot' option like CAD.

By this way the parent process of the child pid namespace knows if
it rebooted or not and can take the right decision.

Signed-off-by: Daniel Lezcano <daniel.lezcano@free.fr>
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
---
 include/linux/pid_namespace.h |    8 +++++++-
 kernel/pid_namespace.c        |   30 ++++++++++++++++++++++++++++++
 kernel/sys.c                  |    3 +++
 3 files changed, 40 insertions(+), 1 deletions(-)

diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index e7cf666..3279596 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -32,6 +32,7 @@ struct pid_namespace {
 #endif
 	gid_t pid_gid;
 	int hide_pid;
+	int reboot;
 };
 
 extern struct pid_namespace init_pid_ns;
@@ -47,6 +48,7 @@ static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns)
 extern struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *ns);
 extern void free_pid_ns(struct kref *kref);
 extern void zap_pid_ns_processes(struct pid_namespace *pid_ns);
+extern int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd);
 
 static inline void put_pid_ns(struct pid_namespace *ns)
 {
@@ -74,11 +76,15 @@ static inline void put_pid_ns(struct pid_namespace *ns)
 {
 }
 
-
 static inline void zap_pid_ns_processes(struct pid_namespace *ns)
 {
 	BUG();
 }
+
+static inline int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd)
+{
+	BUG();
+}
 #endif /* CONFIG_PID_NS */
 
 extern struct pid_namespace *task_active_pid_ns(struct task_struct *tsk);
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index a896839..1e93a5a 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -15,6 +15,7 @@
 #include <linux/acct.h>
 #include <linux/slab.h>
 #include <linux/proc_fs.h>
+#include <linux/reboot.h>
 
 #define BITS_PER_PAGE		(PAGE_SIZE*8)
 
@@ -187,6 +188,9 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
 		rc = sys_wait4(-1, NULL, __WALL, NULL);
 	} while (rc != -ECHILD);
 
+	if (pid_ns->reboot)
+		current->signal->group_exit_code = pid_ns->reboot;
+
 	acct_exit_ns(pid_ns);
 	return;
 }
@@ -221,6 +225,32 @@ static struct ctl_table pid_ns_ctl_table[] = {
 
 static struct ctl_path kern_path[] = { { .procname = "kernel", }, { } };
 
+int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd)
+{
+	switch(cmd) {
+	case LINUX_REBOOT_CMD_RESTART2:
+	case LINUX_REBOOT_CMD_RESTART:
+		pid_ns->reboot = SIGHUP;
+		break;
+
+	case LINUX_REBOOT_CMD_POWER_OFF:
+	case LINUX_REBOOT_CMD_HALT:
+		pid_ns->reboot = SIGINT;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	read_lock(&tasklist_lock);
+	force_sig(SIGKILL, pid_ns->child_reaper);
+	read_unlock(&tasklist_lock);
+
+	do_exit(0);
+
+	/* Not reached */
+	return 0;
+}
+
 static __init int pid_namespaces_init(void)
 {
 	pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC);
diff --git a/kernel/sys.c b/kernel/sys.c
index ddf8155..31acf63 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -444,6 +444,9 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
 	                magic2 != LINUX_REBOOT_MAGIC2C))
 		return -EINVAL;
 
+	if (task_active_pid_ns(current) != &init_pid_ns)
+		return reboot_pid_ns(task_active_pid_ns(current), cmd);
+
 	/* Instead of trying to make the power_off code look like
 	 * halt when pm_power_off is not set do it the easy way.
 	 */
-- 
1.7.5.4


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH][V4] Add reboot_pid_ns to handle the reboot syscall
  2011-12-12  0:17     ` Daniel Lezcano
@ 2011-12-12 23:14         ` Serge E. Hallyn
  -1 siblings, 0 replies; 14+ messages in thread
From: Serge E. Hallyn @ 2011-12-12 23:14 UTC (permalink / raw)
  To: Daniel Lezcano
  Cc: linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b,
	mtk.manpages-Re5JQEeQqe8AvxtiuMwx3w,
	containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	oleg-H+wXaHxf7aLQT0dZR+AlfA

Quoting Daniel Lezcano (daniel.lezcano-GANU6spQydw@public.gmane.org):
> In the case of a child pid namespace, rebooting the system does not
> really makes sense. When the pid namespace is used in conjunction
> with the other namespaces in order to create a linux container, the
> reboot syscall leads to some problems.
> 
> A container can reboot the host. That can be fixed by dropping
> the sys_reboot capability but we are unable to correctly poweroff/
> halt/reboot a container and the container stays stuck at the shutdown
> time with the container's init process waiting indefinitively.
> 
> After several attempts, no solution from userspace was found to reliabily
> handle the shutdown from a container.
> 
> This patch propose to make the init process of the child pid namespace to
> exit with a signal status set to : SIGINT if the child pid namespace called
> "halt/poweroff" and SIGHUP if the child pid namespace called "reboot".
> When the reboot syscall is called and we are not in the initial
> pid namespace, we kill the pid namespace for "HALT", "POWEROFF", "RESTART",
> and "RESTART2". Otherwise we return EINVAL.
> 
> Returning EINVAL is also an easy way to check if this feature is supported
> by the kernel when invoking another 'reboot' option like CAD.
> 
> By this way the parent process of the child pid namespace knows if
> it rebooted or not and can take the right decision.
> 
> Signed-off-by: Daniel Lezcano <daniel.lezcano-GANU6spQydw@public.gmane.org>
> Acked-by: Serge Hallyn <serge.hallyn-Z7WLFzj8eWMS+FvcfC7Uqw@public.gmane.org>

Thanks for pushing on this, Daniel.  It'll be really nice if this can get
in soon so we can get rid of the utmp watching hack, which at this point is
the last blocker to being able to use the same disk image for container and
bare metal installs.

> ---
>  include/linux/pid_namespace.h |    8 +++++++-
>  kernel/pid_namespace.c        |   30 ++++++++++++++++++++++++++++++
>  kernel/sys.c                  |    3 +++
>  3 files changed, 40 insertions(+), 1 deletions(-)
> 
> diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
> index e7cf666..3279596 100644
> --- a/include/linux/pid_namespace.h
> +++ b/include/linux/pid_namespace.h
> @@ -32,6 +32,7 @@ struct pid_namespace {
>  #endif
>  	gid_t pid_gid;
>  	int hide_pid;
> +	int reboot;
>  };
>  
>  extern struct pid_namespace init_pid_ns;
> @@ -47,6 +48,7 @@ static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns)
>  extern struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *ns);
>  extern void free_pid_ns(struct kref *kref);
>  extern void zap_pid_ns_processes(struct pid_namespace *pid_ns);
> +extern int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd);
>  
>  static inline void put_pid_ns(struct pid_namespace *ns)
>  {
> @@ -74,11 +76,15 @@ static inline void put_pid_ns(struct pid_namespace *ns)
>  {
>  }
>  
> -
>  static inline void zap_pid_ns_processes(struct pid_namespace *ns)
>  {
>  	BUG();
>  }
> +
> +static inline int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd)
> +{
> +	BUG();
> +}
>  #endif /* CONFIG_PID_NS */
>  
>  extern struct pid_namespace *task_active_pid_ns(struct task_struct *tsk);
> diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
> index a896839..1e93a5a 100644
> --- a/kernel/pid_namespace.c
> +++ b/kernel/pid_namespace.c
> @@ -15,6 +15,7 @@
>  #include <linux/acct.h>
>  #include <linux/slab.h>
>  #include <linux/proc_fs.h>
> +#include <linux/reboot.h>
>  
>  #define BITS_PER_PAGE		(PAGE_SIZE*8)
>  
> @@ -187,6 +188,9 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
>  		rc = sys_wait4(-1, NULL, __WALL, NULL);
>  	} while (rc != -ECHILD);
>  
> +	if (pid_ns->reboot)
> +		current->signal->group_exit_code = pid_ns->reboot;
> +
>  	acct_exit_ns(pid_ns);
>  	return;
>  }
> @@ -221,6 +225,32 @@ static struct ctl_table pid_ns_ctl_table[] = {
>  
>  static struct ctl_path kern_path[] = { { .procname = "kernel", }, { } };
>  
> +int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd)
> +{
> +	switch(cmd) {
> +	case LINUX_REBOOT_CMD_RESTART2:
> +	case LINUX_REBOOT_CMD_RESTART:
> +		pid_ns->reboot = SIGHUP;
> +		break;
> +
> +	case LINUX_REBOOT_CMD_POWER_OFF:
> +	case LINUX_REBOOT_CMD_HALT:
> +		pid_ns->reboot = SIGINT;
> +		break;
> +	default:
> +		return -EINVAL;
> +	}
> +
> +	read_lock(&tasklist_lock);
> +	force_sig(SIGKILL, pid_ns->child_reaper);
> +	read_unlock(&tasklist_lock);
> +
> +	do_exit(0);
> +
> +	/* Not reached */
> +	return 0;
> +}
> +
>  static __init int pid_namespaces_init(void)
>  {
>  	pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC);
> diff --git a/kernel/sys.c b/kernel/sys.c
> index ddf8155..31acf63 100644
> --- a/kernel/sys.c
> +++ b/kernel/sys.c
> @@ -444,6 +444,9 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
>  	                magic2 != LINUX_REBOOT_MAGIC2C))
>  		return -EINVAL;
>  
> +	if (task_active_pid_ns(current) != &init_pid_ns)
> +		return reboot_pid_ns(task_active_pid_ns(current), cmd);
> +
>  	/* Instead of trying to make the power_off code look like
>  	 * halt when pm_power_off is not set do it the easy way.
>  	 */
> -- 
> 1.7.5.4
> 
> _______________________________________________
> Containers mailing list
> Containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org
> https://lists.linuxfoundation.org/mailman/listinfo/containers

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH][V4] Add reboot_pid_ns to handle the reboot syscall
@ 2011-12-12 23:14         ` Serge E. Hallyn
  0 siblings, 0 replies; 14+ messages in thread
From: Serge E. Hallyn @ 2011-12-12 23:14 UTC (permalink / raw)
  To: Daniel Lezcano; +Cc: akpm, containers, oleg, linux-kernel, mtk.manpages

Quoting Daniel Lezcano (daniel.lezcano@free.fr):
> In the case of a child pid namespace, rebooting the system does not
> really makes sense. When the pid namespace is used in conjunction
> with the other namespaces in order to create a linux container, the
> reboot syscall leads to some problems.
> 
> A container can reboot the host. That can be fixed by dropping
> the sys_reboot capability but we are unable to correctly poweroff/
> halt/reboot a container and the container stays stuck at the shutdown
> time with the container's init process waiting indefinitively.
> 
> After several attempts, no solution from userspace was found to reliabily
> handle the shutdown from a container.
> 
> This patch propose to make the init process of the child pid namespace to
> exit with a signal status set to : SIGINT if the child pid namespace called
> "halt/poweroff" and SIGHUP if the child pid namespace called "reboot".
> When the reboot syscall is called and we are not in the initial
> pid namespace, we kill the pid namespace for "HALT", "POWEROFF", "RESTART",
> and "RESTART2". Otherwise we return EINVAL.
> 
> Returning EINVAL is also an easy way to check if this feature is supported
> by the kernel when invoking another 'reboot' option like CAD.
> 
> By this way the parent process of the child pid namespace knows if
> it rebooted or not and can take the right decision.
> 
> Signed-off-by: Daniel Lezcano <daniel.lezcano@free.fr>
> Acked-by: Serge Hallyn <serge.hallyn@canonical.com>

Thanks for pushing on this, Daniel.  It'll be really nice if this can get
in soon so we can get rid of the utmp watching hack, which at this point is
the last blocker to being able to use the same disk image for container and
bare metal installs.

> ---
>  include/linux/pid_namespace.h |    8 +++++++-
>  kernel/pid_namespace.c        |   30 ++++++++++++++++++++++++++++++
>  kernel/sys.c                  |    3 +++
>  3 files changed, 40 insertions(+), 1 deletions(-)
> 
> diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
> index e7cf666..3279596 100644
> --- a/include/linux/pid_namespace.h
> +++ b/include/linux/pid_namespace.h
> @@ -32,6 +32,7 @@ struct pid_namespace {
>  #endif
>  	gid_t pid_gid;
>  	int hide_pid;
> +	int reboot;
>  };
>  
>  extern struct pid_namespace init_pid_ns;
> @@ -47,6 +48,7 @@ static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns)
>  extern struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *ns);
>  extern void free_pid_ns(struct kref *kref);
>  extern void zap_pid_ns_processes(struct pid_namespace *pid_ns);
> +extern int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd);
>  
>  static inline void put_pid_ns(struct pid_namespace *ns)
>  {
> @@ -74,11 +76,15 @@ static inline void put_pid_ns(struct pid_namespace *ns)
>  {
>  }
>  
> -
>  static inline void zap_pid_ns_processes(struct pid_namespace *ns)
>  {
>  	BUG();
>  }
> +
> +static inline int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd)
> +{
> +	BUG();
> +}
>  #endif /* CONFIG_PID_NS */
>  
>  extern struct pid_namespace *task_active_pid_ns(struct task_struct *tsk);
> diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
> index a896839..1e93a5a 100644
> --- a/kernel/pid_namespace.c
> +++ b/kernel/pid_namespace.c
> @@ -15,6 +15,7 @@
>  #include <linux/acct.h>
>  #include <linux/slab.h>
>  #include <linux/proc_fs.h>
> +#include <linux/reboot.h>
>  
>  #define BITS_PER_PAGE		(PAGE_SIZE*8)
>  
> @@ -187,6 +188,9 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
>  		rc = sys_wait4(-1, NULL, __WALL, NULL);
>  	} while (rc != -ECHILD);
>  
> +	if (pid_ns->reboot)
> +		current->signal->group_exit_code = pid_ns->reboot;
> +
>  	acct_exit_ns(pid_ns);
>  	return;
>  }
> @@ -221,6 +225,32 @@ static struct ctl_table pid_ns_ctl_table[] = {
>  
>  static struct ctl_path kern_path[] = { { .procname = "kernel", }, { } };
>  
> +int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd)
> +{
> +	switch(cmd) {
> +	case LINUX_REBOOT_CMD_RESTART2:
> +	case LINUX_REBOOT_CMD_RESTART:
> +		pid_ns->reboot = SIGHUP;
> +		break;
> +
> +	case LINUX_REBOOT_CMD_POWER_OFF:
> +	case LINUX_REBOOT_CMD_HALT:
> +		pid_ns->reboot = SIGINT;
> +		break;
> +	default:
> +		return -EINVAL;
> +	}
> +
> +	read_lock(&tasklist_lock);
> +	force_sig(SIGKILL, pid_ns->child_reaper);
> +	read_unlock(&tasklist_lock);
> +
> +	do_exit(0);
> +
> +	/* Not reached */
> +	return 0;
> +}
> +
>  static __init int pid_namespaces_init(void)
>  {
>  	pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC);
> diff --git a/kernel/sys.c b/kernel/sys.c
> index ddf8155..31acf63 100644
> --- a/kernel/sys.c
> +++ b/kernel/sys.c
> @@ -444,6 +444,9 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
>  	                magic2 != LINUX_REBOOT_MAGIC2C))
>  		return -EINVAL;
>  
> +	if (task_active_pid_ns(current) != &init_pid_ns)
> +		return reboot_pid_ns(task_active_pid_ns(current), cmd);
> +
>  	/* Instead of trying to make the power_off code look like
>  	 * halt when pm_power_off is not set do it the easy way.
>  	 */
> -- 
> 1.7.5.4
> 
> _______________________________________________
> Containers mailing list
> Containers@lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/containers

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 0/1][V4] Handle reboot in a child pid namespace
  2011-12-12  0:17 ` Daniel Lezcano
@ 2011-12-13 22:09     ` Daniel Lezcano
  -1 siblings, 0 replies; 14+ messages in thread
From: Daniel Lezcano @ 2011-12-13 22:09 UTC (permalink / raw)
  To: akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b
  Cc: mtk.manpages-Re5JQEeQqe8AvxtiuMwx3w,
	containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	Bruno Prémont, oleg-H+wXaHxf7aLQT0dZR+AlfA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA

On 12/12/2011 01:17 AM, Daniel Lezcano wrote:

Does someone have an opinion for this patch ?

I cc'ed Bruno who should be interested by this feature too (sorry for
not cc'ing you before).

Oleg, I did not add your signed-off-by because I changed the patch but I
guess the V4 is what you expected to see, right ?

Thanks
  -- Daniel

> ChangeLog:
> ==========
>
>  * V4
>    - store the signal number the child pid namespace init should
>      exit from. It is simpler, cleaner, and does not add more encoding
>      bits to the exit code of the process.
>  * V3
>    - removed lock and serialization of pid_ns_reboot
>  * V2
>    - added a lock for the pid namespace to prevent racy call
>      to the 'reboot' syscall
>    - Moved 'reboot' command assigned in zap_pid_ns_processes
>      instead of wait_task_zombie
>    - added tasklist lock around force_sig
>    - added do_exit in pid_ns_reboot
>    - used task_active_pid_ns instead of declaring a new variable in sys_reboot
>    - moved code up before POWER_OFF changed to HALT in sys_reboot
>
>
> Test case:
> ==========
>
> #include <alloca.h>
> #include <stdio.h>
> #include <sched.h>
> #include <unistd.h>
> #include <signal.h>
> #include <sys/reboot.h>
> #include <sys/types.h>
> #include <sys/wait.h>
>
> #include <linux/reboot.h>
>
> static int do_reboot(void *arg)
> {
>         int *cmd = arg;
>
>         if (reboot(*cmd))
>                 printf("failed to reboot(%d): %m\n", *cmd);
> }
>
> int test_reboot(int cmd, int sig)
> {
>         long stack_size = 4096;
>         void *stack = alloca(stack_size) + stack_size;
>         int status;
>         pid_t ret;
>
>         ret = clone(do_reboot, stack, CLONE_NEWPID | SIGCHLD, &cmd);
>         if (ret < 0) {
>                 printf("failed to clone: %m\n");
>                 return -1;
>         }
>
>         if (wait(&status) < 0) {
>                 printf("unexpected wait error: %m\n");
>                 return -1;
>         }
>
>         if (!WIFSIGNALED(status)) {
>                 printf("child process exited but was not signaled\n");
>                 return -1;
>         }
>
>         if (WTERMSIG(status) != sig) {
>                 printf("signal termination is not the one expected\n");
>                 return -1;
>         }
>
>         return 0;
> }
>
> int main(int argc, char *argv[])
> {
>         int status;
>
>         status = test_reboot(LINUX_REBOOT_CMD_RESTART, SIGHUP);
>         if (status < 0)
>                 return 1;
>         printf("reboot(LINUX_REBOOT_CMD_RESTART) succeed\n");
>
>         status = test_reboot(LINUX_REBOOT_CMD_RESTART2, SIGHUP);
>         if (status < 0)
>                 return 1;
>         printf("reboot(LINUX_REBOOT_CMD_RESTART2) succeed\n");
>
>         status = test_reboot(LINUX_REBOOT_CMD_HALT, SIGINT);
>         if (status < 0)
>                 return 1;
>         printf("reboot(LINUX_REBOOT_CMD_HALT) succeed\n");
>
>         status = test_reboot(LINUX_REBOOT_CMD_POWER_OFF, SIGINT);
>         if (status < 0)
>                 return 1;
>         printf("reboot(LINUX_REBOOT_CMD_POWERR_OFF) succeed\n");
>
>         status = test_reboot(LINUX_REBOOT_CMD_CAD_ON, -1);
>         if (status >= 0) {
>                 printf("reboot(LINUX_REBOOT_CMD_CAD_ON) should have failed\n");
>                 return 1;
>         }
>         printf("reboot(LINUX_REBOOT_CMD_CAD_ON) has failed as expected\n");
>
>         return 0;
> }
>
> Daniel Lezcano (1):
>   Add reboot_pid_ns to handle the reboot syscall
>
>  include/linux/pid_namespace.h |    8 +++++++-
>  kernel/pid_namespace.c        |   33 +++++++++++++++++++++++++++++++++
>  kernel/sys.c                  |    3 +++
>  3 files changed, 43 insertions(+), 1 deletions(-)
>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 0/1][V4] Handle reboot in a child pid namespace
@ 2011-12-13 22:09     ` Daniel Lezcano
  0 siblings, 0 replies; 14+ messages in thread
From: Daniel Lezcano @ 2011-12-13 22:09 UTC (permalink / raw)
  To: akpm; +Cc: containers, oleg, linux-kernel, mtk.manpages, Bruno Prémont

On 12/12/2011 01:17 AM, Daniel Lezcano wrote:

Does someone have an opinion for this patch ?

I cc'ed Bruno who should be interested by this feature too (sorry for
not cc'ing you before).

Oleg, I did not add your signed-off-by because I changed the patch but I
guess the V4 is what you expected to see, right ?

Thanks
  -- Daniel

> ChangeLog:
> ==========
>
>  * V4
>    - store the signal number the child pid namespace init should
>      exit from. It is simpler, cleaner, and does not add more encoding
>      bits to the exit code of the process.
>  * V3
>    - removed lock and serialization of pid_ns_reboot
>  * V2
>    - added a lock for the pid namespace to prevent racy call
>      to the 'reboot' syscall
>    - Moved 'reboot' command assigned in zap_pid_ns_processes
>      instead of wait_task_zombie
>    - added tasklist lock around force_sig
>    - added do_exit in pid_ns_reboot
>    - used task_active_pid_ns instead of declaring a new variable in sys_reboot
>    - moved code up before POWER_OFF changed to HALT in sys_reboot
>
>
> Test case:
> ==========
>
> #include <alloca.h>
> #include <stdio.h>
> #include <sched.h>
> #include <unistd.h>
> #include <signal.h>
> #include <sys/reboot.h>
> #include <sys/types.h>
> #include <sys/wait.h>
>
> #include <linux/reboot.h>
>
> static int do_reboot(void *arg)
> {
>         int *cmd = arg;
>
>         if (reboot(*cmd))
>                 printf("failed to reboot(%d): %m\n", *cmd);
> }
>
> int test_reboot(int cmd, int sig)
> {
>         long stack_size = 4096;
>         void *stack = alloca(stack_size) + stack_size;
>         int status;
>         pid_t ret;
>
>         ret = clone(do_reboot, stack, CLONE_NEWPID | SIGCHLD, &cmd);
>         if (ret < 0) {
>                 printf("failed to clone: %m\n");
>                 return -1;
>         }
>
>         if (wait(&status) < 0) {
>                 printf("unexpected wait error: %m\n");
>                 return -1;
>         }
>
>         if (!WIFSIGNALED(status)) {
>                 printf("child process exited but was not signaled\n");
>                 return -1;
>         }
>
>         if (WTERMSIG(status) != sig) {
>                 printf("signal termination is not the one expected\n");
>                 return -1;
>         }
>
>         return 0;
> }
>
> int main(int argc, char *argv[])
> {
>         int status;
>
>         status = test_reboot(LINUX_REBOOT_CMD_RESTART, SIGHUP);
>         if (status < 0)
>                 return 1;
>         printf("reboot(LINUX_REBOOT_CMD_RESTART) succeed\n");
>
>         status = test_reboot(LINUX_REBOOT_CMD_RESTART2, SIGHUP);
>         if (status < 0)
>                 return 1;
>         printf("reboot(LINUX_REBOOT_CMD_RESTART2) succeed\n");
>
>         status = test_reboot(LINUX_REBOOT_CMD_HALT, SIGINT);
>         if (status < 0)
>                 return 1;
>         printf("reboot(LINUX_REBOOT_CMD_HALT) succeed\n");
>
>         status = test_reboot(LINUX_REBOOT_CMD_POWER_OFF, SIGINT);
>         if (status < 0)
>                 return 1;
>         printf("reboot(LINUX_REBOOT_CMD_POWERR_OFF) succeed\n");
>
>         status = test_reboot(LINUX_REBOOT_CMD_CAD_ON, -1);
>         if (status >= 0) {
>                 printf("reboot(LINUX_REBOOT_CMD_CAD_ON) should have failed\n");
>                 return 1;
>         }
>         printf("reboot(LINUX_REBOOT_CMD_CAD_ON) has failed as expected\n");
>
>         return 0;
> }
>
> Daniel Lezcano (1):
>   Add reboot_pid_ns to handle the reboot syscall
>
>  include/linux/pid_namespace.h |    8 +++++++-
>  kernel/pid_namespace.c        |   33 +++++++++++++++++++++++++++++++++
>  kernel/sys.c                  |    3 +++
>  3 files changed, 43 insertions(+), 1 deletions(-)
>


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH][V4] Add reboot_pid_ns to handle the reboot syscall
  2011-12-12  0:17     ` Daniel Lezcano
@ 2011-12-14  0:22         ` Andrew Morton
  -1 siblings, 0 replies; 14+ messages in thread
From: Andrew Morton @ 2011-12-14  0:22 UTC (permalink / raw)
  To: Daniel Lezcano
  Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	oleg-H+wXaHxf7aLQT0dZR+AlfA, linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	mtk.manpages-Re5JQEeQqe8AvxtiuMwx3w

On Mon, 12 Dec 2011 01:17:44 +0100
Daniel Lezcano <daniel.lezcano-GANU6spQydw@public.gmane.org> wrote:

> In the case of a child pid namespace, rebooting the system does not
> really makes sense. When the pid namespace is used in conjunction
> with the other namespaces in order to create a linux container, the
> reboot syscall leads to some problems.
> 
> A container can reboot the host. That can be fixed by dropping
> the sys_reboot capability but we are unable to correctly poweroff/
> halt/reboot a container and the container stays stuck at the shutdown
> time with the container's init process waiting indefinitively.
> 
> After several attempts, no solution from userspace was found to reliabily
> handle the shutdown from a container.
> 
> This patch propose to make the init process of the child pid namespace to
> exit with a signal status set to : SIGINT if the child pid namespace called
> "halt/poweroff" and SIGHUP if the child pid namespace called "reboot".
> When the reboot syscall is called and we are not in the initial
> pid namespace, we kill the pid namespace for "HALT", "POWEROFF", "RESTART",
> and "RESTART2". Otherwise we return EINVAL.
> 
> Returning EINVAL is also an easy way to check if this feature is supported
> by the kernel when invoking another 'reboot' option like CAD.
> 
> By this way the parent process of the child pid namespace knows if
> it rebooted or not and can take the right decision.
>
> ...
>
> +static inline int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd)
> +{
> +	BUG();
> +}
>  #endif /* CONFIG_PID_NS */

I'd recommend compile-testing this...

> --- a/kernel/sys.c
> +++ b/kernel/sys.c
> @@ -444,6 +444,9 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
>  	                magic2 != LINUX_REBOOT_MAGIC2C))
>  		return -EINVAL;
>  
> +	if (task_active_pid_ns(current) != &init_pid_ns)
> +		return reboot_pid_ns(task_active_pid_ns(current), cmd);
> +
>  	/* Instead of trying to make the power_off code look like
> 	 * halt when pm_power_off is not set do it the easy way.
> 	 */

I'll repeat my cruelly-ignored review comment for v3:

This adds a bunch of useless code if CONFIG_PID_NS=n.  It would be
better to do

#ifdef CONFIG_PID_NS
extern void pidns_handle_reboot(int cmd);
#else
static inline void pidns_handle_reboot(int cmd)
{
}
#endif

(And thereby move the additional code into pid_namespace.c)

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH][V4] Add reboot_pid_ns to handle the reboot syscall
@ 2011-12-14  0:22         ` Andrew Morton
  0 siblings, 0 replies; 14+ messages in thread
From: Andrew Morton @ 2011-12-14  0:22 UTC (permalink / raw)
  To: Daniel Lezcano
  Cc: serge.hallyn, oleg, containers, gkurz, linux-kernel, mtk.manpages

On Mon, 12 Dec 2011 01:17:44 +0100
Daniel Lezcano <daniel.lezcano@free.fr> wrote:

> In the case of a child pid namespace, rebooting the system does not
> really makes sense. When the pid namespace is used in conjunction
> with the other namespaces in order to create a linux container, the
> reboot syscall leads to some problems.
> 
> A container can reboot the host. That can be fixed by dropping
> the sys_reboot capability but we are unable to correctly poweroff/
> halt/reboot a container and the container stays stuck at the shutdown
> time with the container's init process waiting indefinitively.
> 
> After several attempts, no solution from userspace was found to reliabily
> handle the shutdown from a container.
> 
> This patch propose to make the init process of the child pid namespace to
> exit with a signal status set to : SIGINT if the child pid namespace called
> "halt/poweroff" and SIGHUP if the child pid namespace called "reboot".
> When the reboot syscall is called and we are not in the initial
> pid namespace, we kill the pid namespace for "HALT", "POWEROFF", "RESTART",
> and "RESTART2". Otherwise we return EINVAL.
> 
> Returning EINVAL is also an easy way to check if this feature is supported
> by the kernel when invoking another 'reboot' option like CAD.
> 
> By this way the parent process of the child pid namespace knows if
> it rebooted or not and can take the right decision.
>
> ...
>
> +static inline int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd)
> +{
> +	BUG();
> +}
>  #endif /* CONFIG_PID_NS */

I'd recommend compile-testing this...

> --- a/kernel/sys.c
> +++ b/kernel/sys.c
> @@ -444,6 +444,9 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
>  	                magic2 != LINUX_REBOOT_MAGIC2C))
>  		return -EINVAL;
>  
> +	if (task_active_pid_ns(current) != &init_pid_ns)
> +		return reboot_pid_ns(task_active_pid_ns(current), cmd);
> +
>  	/* Instead of trying to make the power_off code look like
> 	 * halt when pm_power_off is not set do it the easy way.
> 	 */

I'll repeat my cruelly-ignored review comment for v3:

This adds a bunch of useless code if CONFIG_PID_NS=n.  It would be
better to do

#ifdef CONFIG_PID_NS
extern void pidns_handle_reboot(int cmd);
#else
static inline void pidns_handle_reboot(int cmd)
{
}
#endif

(And thereby move the additional code into pid_namespace.c)

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH][V4] Add reboot_pid_ns to handle the reboot syscall
  2011-12-14  0:22         ` Andrew Morton
@ 2011-12-14 19:17             ` Oleg Nesterov
  -1 siblings, 0 replies; 14+ messages in thread
From: Oleg Nesterov @ 2011-12-14 19:17 UTC (permalink / raw)
  To: Andrew Morton
  Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	mtk.manpages-Re5JQEeQqe8AvxtiuMwx3w

On 12/13, Andrew Morton wrote:
>
> > --- a/kernel/sys.c
> > +++ b/kernel/sys.c
> > @@ -444,6 +444,9 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
> >  	                magic2 != LINUX_REBOOT_MAGIC2C))
> >  		return -EINVAL;
> >
> > +	if (task_active_pid_ns(current) != &init_pid_ns)
> > +		return reboot_pid_ns(task_active_pid_ns(current), cmd);
> > +
> >  	/* Instead of trying to make the power_off code look like
> > 	 * halt when pm_power_off is not set do it the easy way.
> > 	 */
>
> I'll repeat my cruelly-ignored review comment for v3:
>
> This adds a bunch of useless code if CONFIG_PID_NS=n.

Agreed.

> It would be
> better to do
>
> #ifdef CONFIG_PID_NS
> extern void pidns_handle_reboot(int cmd);
> #else
> static inline void pidns_handle_reboot(int cmd)
> {
> }
> #endif

Can't resist.

Why the kernel always prefers to do it this way, adding the ugly
do-nothing inlines?

Isn't it better to simply call pidns_handle_reboot(cmd) under
CONFIG_PID_NS in sys_reboot() ?

	#ifdef CONFIG_PID_NS
	if (task_active_pid_ns(current) != &init_pid_ns)
		return reboot_pid_ns(cmd);
	#endif

This way, if you look at sys_reboot() you can immediately see what
happens, no need to inspect the !CONFIG_PID_NS definition. Plus this
doesn't add the "unnecesary" entry into tag file.

OK, </flame> ;)

Otherwise,

Reviewed-by: Oleg Nesterov <oleg-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH][V4] Add reboot_pid_ns to handle the reboot syscall
@ 2011-12-14 19:17             ` Oleg Nesterov
  0 siblings, 0 replies; 14+ messages in thread
From: Oleg Nesterov @ 2011-12-14 19:17 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Daniel Lezcano, serge.hallyn, containers, gkurz, linux-kernel,
	mtk.manpages

On 12/13, Andrew Morton wrote:
>
> > --- a/kernel/sys.c
> > +++ b/kernel/sys.c
> > @@ -444,6 +444,9 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
> >  	                magic2 != LINUX_REBOOT_MAGIC2C))
> >  		return -EINVAL;
> >
> > +	if (task_active_pid_ns(current) != &init_pid_ns)
> > +		return reboot_pid_ns(task_active_pid_ns(current), cmd);
> > +
> >  	/* Instead of trying to make the power_off code look like
> > 	 * halt when pm_power_off is not set do it the easy way.
> > 	 */
>
> I'll repeat my cruelly-ignored review comment for v3:
>
> This adds a bunch of useless code if CONFIG_PID_NS=n.

Agreed.

> It would be
> better to do
>
> #ifdef CONFIG_PID_NS
> extern void pidns_handle_reboot(int cmd);
> #else
> static inline void pidns_handle_reboot(int cmd)
> {
> }
> #endif

Can't resist.

Why the kernel always prefers to do it this way, adding the ugly
do-nothing inlines?

Isn't it better to simply call pidns_handle_reboot(cmd) under
CONFIG_PID_NS in sys_reboot() ?

	#ifdef CONFIG_PID_NS
	if (task_active_pid_ns(current) != &init_pid_ns)
		return reboot_pid_ns(cmd);
	#endif

This way, if you look at sys_reboot() you can immediately see what
happens, no need to inspect the !CONFIG_PID_NS definition. Plus this
doesn't add the "unnecesary" entry into tag file.

OK, </flame> ;)

Otherwise,

Reviewed-by: Oleg Nesterov <oleg@redhat.com>


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH][V4] Add reboot_pid_ns to handle the reboot syscall
  2011-12-14 19:17             ` Oleg Nesterov
@ 2011-12-15 22:00                 ` Andrew Morton
  -1 siblings, 0 replies; 14+ messages in thread
From: Andrew Morton @ 2011-12-15 22:00 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	mtk.manpages-Re5JQEeQqe8AvxtiuMwx3w

On Wed, 14 Dec 2011 20:17:39 +0100
Oleg Nesterov <oleg-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org> wrote:

> > It would be
> > better to do
> >
> > #ifdef CONFIG_PID_NS
> > extern void pidns_handle_reboot(int cmd);
> > #else
> > static inline void pidns_handle_reboot(int cmd)
> > {
> > }
> > #endif
> 
> Can't resist.
> 
> Why the kernel always prefers to do it this way, adding the ugly
> do-nothing inlines?
> 
> Isn't it better to simply call pidns_handle_reboot(cmd) under
> CONFIG_PID_NS in sys_reboot() ?
> 
> 	#ifdef CONFIG_PID_NS
> 	if (task_active_pid_ns(current) != &init_pid_ns)
> 		return reboot_pid_ns(cmd);
> 	#endif

Imagine what the code would look like if we took all the existing empty
inline stubs and replaced them with #if/#else/#endif.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH][V4] Add reboot_pid_ns to handle the reboot syscall
@ 2011-12-15 22:00                 ` Andrew Morton
  0 siblings, 0 replies; 14+ messages in thread
From: Andrew Morton @ 2011-12-15 22:00 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Daniel Lezcano, serge.hallyn, containers, gkurz, linux-kernel,
	mtk.manpages

On Wed, 14 Dec 2011 20:17:39 +0100
Oleg Nesterov <oleg@redhat.com> wrote:

> > It would be
> > better to do
> >
> > #ifdef CONFIG_PID_NS
> > extern void pidns_handle_reboot(int cmd);
> > #else
> > static inline void pidns_handle_reboot(int cmd)
> > {
> > }
> > #endif
> 
> Can't resist.
> 
> Why the kernel always prefers to do it this way, adding the ugly
> do-nothing inlines?
> 
> Isn't it better to simply call pidns_handle_reboot(cmd) under
> CONFIG_PID_NS in sys_reboot() ?
> 
> 	#ifdef CONFIG_PID_NS
> 	if (task_active_pid_ns(current) != &init_pid_ns)
> 		return reboot_pid_ns(cmd);
> 	#endif

Imagine what the code would look like if we took all the existing empty
inline stubs and replaced them with #if/#else/#endif.

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2011-12-15 22:00 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-12-12  0:17 [PATCH 0/1][V4] Handle reboot in a child pid namespace Daniel Lezcano
2011-12-12  0:17 ` Daniel Lezcano
     [not found] ` <1323649064-7960-1-git-send-email-daniel.lezcano-GANU6spQydw@public.gmane.org>
2011-12-12  0:17   ` [PATCH][V4] Add reboot_pid_ns to handle the reboot syscall Daniel Lezcano
2011-12-12  0:17     ` Daniel Lezcano
     [not found]     ` <1323649064-7960-2-git-send-email-daniel.lezcano-GANU6spQydw@public.gmane.org>
2011-12-12 23:14       ` Serge E. Hallyn
2011-12-12 23:14         ` Serge E. Hallyn
2011-12-14  0:22       ` Andrew Morton
2011-12-14  0:22         ` Andrew Morton
     [not found]         ` <20111213162242.1ab3cb1a.akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b@public.gmane.org>
2011-12-14 19:17           ` Oleg Nesterov
2011-12-14 19:17             ` Oleg Nesterov
     [not found]             ` <20111214191739.GA14693-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2011-12-15 22:00               ` Andrew Morton
2011-12-15 22:00                 ` Andrew Morton
2011-12-13 22:09   ` [PATCH 0/1][V4] Handle reboot in a child pid namespace Daniel Lezcano
2011-12-13 22:09     ` Daniel Lezcano

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.