All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] [RFC] Limit dump_pipe program's permission to init for container
@ 2016-07-08 11:08 ` Zhao Lei
  0 siblings, 0 replies; 6+ messages in thread
From: Zhao Lei @ 2016-07-08 11:08 UTC (permalink / raw)
  To: linux-kernel-u79uwXL29TY76Z2rM5mHXA
  Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	Eric W. Biederman

Currently when we set core_pattern to a pipe, the pipe program is
forked by kthread running with root's permission, and write dumpfile
into host's filesystem.
Same thing happened for container, the dumper and dumpfile are also
in host(not in container).

It have following program:
1: Not consistent with file_type core_pattern
   When we set core_pattern to a file, the container will write dump
   into container's filesystem instead of host.
2: Not safe for privileged container
   In a privileged container, user can destroy host system by following
   command:
   # # In a container
   # echo "|/bin/dd of=/boot/vmlinuz" >/proc/sys/kernel/core_pattern
   # make_dump

This patch switch dumper program's permission to init task, it is to say,
for container, dumper program have same permission with init task in
container, which make dumper program put in container's filesystem, and
write coredump into container's filesystem.
The dumper's permission is also limited into subset of container's init
process.

See following discussion for detail:
http://www.gossamer-threads.com/lists/linux/kernel/2455363
http://www.gossamer-threads.com/lists/linux/kernel/2397602

Todo:
1: Set different core_pattern to host and each container.
2: Keep compatibility with current design
All above are done in previous version of this patch,
need some restruct.

Any suggestion is welcome for this patch.

Suggested-by: Eric W. Biederman <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
Suggested-by: KOSAKI Motohiro <kosaki.motohiro-+CUm20s59erQFUHtdCDX3A@public.gmane.org>

Signed-off-by: Zhao Lei <zhaolei-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
---
 fs/coredump.c           | 84 ++++++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/binfmts.h |  1 +
 2 files changed, 84 insertions(+), 1 deletion(-)

diff --git a/fs/coredump.c b/fs/coredump.c
index 281b768..92dc343 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -516,6 +516,8 @@ static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
 {
 	struct file *files[2];
 	struct coredump_params *cp = (struct coredump_params *)info->data;
+	struct task_struct *base_task;
+
 	int err = create_pipe_files(files, 0);
 	if (err)
 		return err;
@@ -524,10 +526,78 @@ static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
 
 	err = replace_fd(0, files[0], 0);
 	fput(files[0]);
+	if (err)
+		return err;
+
 	/* and disallow core files too */
 	current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1};
 
-	return err;
+	base_task = cp->base_task;
+	if (base_task) {
+		const struct cred *base_cred;
+
+		/* Set fs_root to base_task */
+		spin_lock(&base_task->fs->lock);
+		set_fs_root(current->fs, &base_task->fs->root);
+		spin_unlock(&base_task->fs->lock);
+
+		/* Set namespaces to base_task */
+		switch_task_namespaces(current, base_task->nsproxy);
+
+		/* Set cgroup to base_task */
+		current->flags &= ~PF_NO_SETAFFINITY;
+		err = cgroup_attach_task_all(base_task, current);
+		if (err < 0)
+			return err;
+
+		/* Set cred to base_task */
+		base_cred = get_task_cred(base_task);
+
+		new->uid   = base_cred->uid;
+		new->gid   = base_cred->gid;
+		new->suid  = base_cred->suid;
+		new->sgid  = base_cred->sgid;
+		new->euid  = base_cred->euid;
+		new->egid  = base_cred->egid;
+		new->fsuid = base_cred->fsuid;
+		new->fsgid = base_cred->fsgid;
+
+		new->securebits = base_cred->securebits;
+
+		new->cap_inheritable = base_cred->cap_inheritable;
+		new->cap_permitted   = base_cred->cap_permitted;
+		new->cap_effective   = base_cred->cap_effective;
+		new->cap_bset        = base_cred->cap_bset;
+		new->cap_ambient     = base_cred->cap_ambient;
+
+		security_cred_free(new);
+#ifdef CONFIG_SECURITY
+		new->security = NULL;
+#endif
+		err = security_prepare_creds(new, base_cred, GFP_KERNEL);
+		if (err < 0) {
+			put_cred(base_cred);
+			return err;
+		}
+
+		free_uid(new->user);
+		new->user = base_cred->user;
+		get_uid(new->user);
+
+		put_user_ns(new->user_ns);
+		new->user_ns = base_cred->user_ns;
+		get_user_ns(new->user_ns);
+
+		put_group_info(new->group_info);
+		new->group_info = base_cred->group_info;
+		get_group_info(new->group_info);
+
+		put_cred(base_cred);
+
+		validate_creds(new);
+	}
+
+	return 0;
 }
 
 void do_coredump(const siginfo_t *siginfo)
@@ -590,6 +660,7 @@ void do_coredump(const siginfo_t *siginfo)
 
 	if (ispipe) {
 		int dump_count;
+		struct task_struct *vinit_task;
 		char **helper_argv;
 		struct subprocess_info *sub_info;
 
@@ -631,6 +702,12 @@ void do_coredump(const siginfo_t *siginfo)
 			goto fail_dropcount;
 		}
 
+		vinit_task = find_task_by_vpid(1);
+		if (!vinit_task) {
+			printk(KERN_WARNING "failed getting init task info, skipping core dump\n");
+			goto fail_dropcount;
+		}
+
 		helper_argv = argv_split(GFP_KERNEL, cn.corename, NULL);
 		if (!helper_argv) {
 			printk(KERN_WARNING "%s failed to allocate memory\n",
@@ -638,6 +715,10 @@ void do_coredump(const siginfo_t *siginfo)
 			goto fail_dropcount;
 		}
 
+		get_task_struct(vinit_task);
+
+		cprm.base_task = vinit_task;
+
 		retval = -ENOMEM;
 		sub_info = call_usermodehelper_setup(helper_argv[0],
 						helper_argv, NULL, GFP_KERNEL,
@@ -646,6 +727,7 @@ void do_coredump(const siginfo_t *siginfo)
 			retval = call_usermodehelper_exec(sub_info,
 							  UMH_WAIT_EXEC);
 
+		put_task_struct(vinit_task);
 		argv_free(helper_argv);
 		if (retval) {
 			printk(KERN_INFO "Core dump to |%s pipe failed\n",
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 314b3ca..0c9a72c 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -59,6 +59,7 @@ struct linux_binprm {
 
 /* Function parameter for binfmt->coredump */
 struct coredump_params {
+	struct task_struct *base_task;
 	const siginfo_t *siginfo;
 	struct pt_regs *regs;
 	struct file *file;
-- 
1.8.5.1

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH] [RFC] Limit dump_pipe program's permission to init for container
@ 2016-07-08 11:08 ` Zhao Lei
  0 siblings, 0 replies; 6+ messages in thread
From: Zhao Lei @ 2016-07-08 11:08 UTC (permalink / raw)
  To: linux-kernel
  Cc: containers, Eric W. Biederman, Mateusz Guzik, Kamezawa Hiroyuki,
	Zhao Lei

Currently when we set core_pattern to a pipe, the pipe program is
forked by kthread running with root's permission, and write dumpfile
into host's filesystem.
Same thing happened for container, the dumper and dumpfile are also
in host(not in container).

It have following program:
1: Not consistent with file_type core_pattern
   When we set core_pattern to a file, the container will write dump
   into container's filesystem instead of host.
2: Not safe for privileged container
   In a privileged container, user can destroy host system by following
   command:
   # # In a container
   # echo "|/bin/dd of=/boot/vmlinuz" >/proc/sys/kernel/core_pattern
   # make_dump

This patch switch dumper program's permission to init task, it is to say,
for container, dumper program have same permission with init task in
container, which make dumper program put in container's filesystem, and
write coredump into container's filesystem.
The dumper's permission is also limited into subset of container's init
process.

See following discussion for detail:
http://www.gossamer-threads.com/lists/linux/kernel/2455363
http://www.gossamer-threads.com/lists/linux/kernel/2397602

Todo:
1: Set different core_pattern to host and each container.
2: Keep compatibility with current design
All above are done in previous version of this patch,
need some restruct.

Any suggestion is welcome for this patch.

Suggested-by: Eric W. Biederman <ebiederm@xmission.com>
Suggested-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>

Signed-off-by: Zhao Lei <zhaolei@cn.fujitsu.com>
---
 fs/coredump.c           | 84 ++++++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/binfmts.h |  1 +
 2 files changed, 84 insertions(+), 1 deletion(-)

diff --git a/fs/coredump.c b/fs/coredump.c
index 281b768..92dc343 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -516,6 +516,8 @@ static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
 {
 	struct file *files[2];
 	struct coredump_params *cp = (struct coredump_params *)info->data;
+	struct task_struct *base_task;
+
 	int err = create_pipe_files(files, 0);
 	if (err)
 		return err;
@@ -524,10 +526,78 @@ static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
 
 	err = replace_fd(0, files[0], 0);
 	fput(files[0]);
+	if (err)
+		return err;
+
 	/* and disallow core files too */
 	current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1};
 
-	return err;
+	base_task = cp->base_task;
+	if (base_task) {
+		const struct cred *base_cred;
+
+		/* Set fs_root to base_task */
+		spin_lock(&base_task->fs->lock);
+		set_fs_root(current->fs, &base_task->fs->root);
+		spin_unlock(&base_task->fs->lock);
+
+		/* Set namespaces to base_task */
+		switch_task_namespaces(current, base_task->nsproxy);
+
+		/* Set cgroup to base_task */
+		current->flags &= ~PF_NO_SETAFFINITY;
+		err = cgroup_attach_task_all(base_task, current);
+		if (err < 0)
+			return err;
+
+		/* Set cred to base_task */
+		base_cred = get_task_cred(base_task);
+
+		new->uid   = base_cred->uid;
+		new->gid   = base_cred->gid;
+		new->suid  = base_cred->suid;
+		new->sgid  = base_cred->sgid;
+		new->euid  = base_cred->euid;
+		new->egid  = base_cred->egid;
+		new->fsuid = base_cred->fsuid;
+		new->fsgid = base_cred->fsgid;
+
+		new->securebits = base_cred->securebits;
+
+		new->cap_inheritable = base_cred->cap_inheritable;
+		new->cap_permitted   = base_cred->cap_permitted;
+		new->cap_effective   = base_cred->cap_effective;
+		new->cap_bset        = base_cred->cap_bset;
+		new->cap_ambient     = base_cred->cap_ambient;
+
+		security_cred_free(new);
+#ifdef CONFIG_SECURITY
+		new->security = NULL;
+#endif
+		err = security_prepare_creds(new, base_cred, GFP_KERNEL);
+		if (err < 0) {
+			put_cred(base_cred);
+			return err;
+		}
+
+		free_uid(new->user);
+		new->user = base_cred->user;
+		get_uid(new->user);
+
+		put_user_ns(new->user_ns);
+		new->user_ns = base_cred->user_ns;
+		get_user_ns(new->user_ns);
+
+		put_group_info(new->group_info);
+		new->group_info = base_cred->group_info;
+		get_group_info(new->group_info);
+
+		put_cred(base_cred);
+
+		validate_creds(new);
+	}
+
+	return 0;
 }
 
 void do_coredump(const siginfo_t *siginfo)
@@ -590,6 +660,7 @@ void do_coredump(const siginfo_t *siginfo)
 
 	if (ispipe) {
 		int dump_count;
+		struct task_struct *vinit_task;
 		char **helper_argv;
 		struct subprocess_info *sub_info;
 
@@ -631,6 +702,12 @@ void do_coredump(const siginfo_t *siginfo)
 			goto fail_dropcount;
 		}
 
+		vinit_task = find_task_by_vpid(1);
+		if (!vinit_task) {
+			printk(KERN_WARNING "failed getting init task info, skipping core dump\n");
+			goto fail_dropcount;
+		}
+
 		helper_argv = argv_split(GFP_KERNEL, cn.corename, NULL);
 		if (!helper_argv) {
 			printk(KERN_WARNING "%s failed to allocate memory\n",
@@ -638,6 +715,10 @@ void do_coredump(const siginfo_t *siginfo)
 			goto fail_dropcount;
 		}
 
+		get_task_struct(vinit_task);
+
+		cprm.base_task = vinit_task;
+
 		retval = -ENOMEM;
 		sub_info = call_usermodehelper_setup(helper_argv[0],
 						helper_argv, NULL, GFP_KERNEL,
@@ -646,6 +727,7 @@ void do_coredump(const siginfo_t *siginfo)
 			retval = call_usermodehelper_exec(sub_info,
 							  UMH_WAIT_EXEC);
 
+		put_task_struct(vinit_task);
 		argv_free(helper_argv);
 		if (retval) {
 			printk(KERN_INFO "Core dump to |%s pipe failed\n",
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 314b3ca..0c9a72c 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -59,6 +59,7 @@ struct linux_binprm {
 
 /* Function parameter for binfmt->coredump */
 struct coredump_params {
+	struct task_struct *base_task;
 	const siginfo_t *siginfo;
 	struct pt_regs *regs;
 	struct file *file;
-- 
1.8.5.1

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] [RFC] Limit dump_pipe program's permission to init for container
  2016-07-08 11:08 ` Zhao Lei
@ 2016-07-08 15:26     ` Stéphane Graber
  -1 siblings, 0 replies; 6+ messages in thread
From: Stéphane Graber @ 2016-07-08 15:26 UTC (permalink / raw)
  To: Zhao Lei
  Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Eric W. Biederman


[-- Attachment #1.1: Type: text/plain, Size: 7832 bytes --]

On Fri, Jul 08, 2016 at 07:08:10PM +0800, Zhao Lei wrote:
> Currently when we set core_pattern to a pipe, the pipe program is
> forked by kthread running with root's permission, and write dumpfile
> into host's filesystem.
> Same thing happened for container, the dumper and dumpfile are also
> in host(not in container).
> 
> It have following program:
> 1: Not consistent with file_type core_pattern
>    When we set core_pattern to a file, the container will write dump
>    into container's filesystem instead of host.
> 2: Not safe for privileged container
>    In a privileged container, user can destroy host system by following
>    command:
>    # # In a container
>    # echo "|/bin/dd of=/boot/vmlinuz" >/proc/sys/kernel/core_pattern
>    # make_dump
> 
> This patch switch dumper program's permission to init task, it is to say,
> for container, dumper program have same permission with init task in
> container, which make dumper program put in container's filesystem, and
> write coredump into container's filesystem.
> The dumper's permission is also limited into subset of container's init
> process.
> 
> See following discussion for detail:
> http://www.gossamer-threads.com/lists/linux/kernel/2455363
> http://www.gossamer-threads.com/lists/linux/kernel/2397602
> 
> Todo:
> 1: Set different core_pattern to host and each container.
> 2: Keep compatibility with current design
> All above are done in previous version of this patch,
> need some restruct.
> 
> Any suggestion is welcome for this patch.


Ok, so those two todo items are I think absolute musts before we can
consider any of this.

Changing the default behavior that we have had ever since pipe in
core_pattern and namespaces have existed would cause serious problem for
existing users.


Speaking for myself, LXC does setup limitations on privileged containers
which prevent them from changing the core_pattern so that it can't be
abused for privileged containers.
The core_pattern on Ubuntu then sends all crashes to apport, as root, on
the host. Apport is then container aware and handles forwarding the
crash (through a unix socket) to the apport process inside the
container.

Your suggested change would likely make this all fail as apport on the
host must be called as root to be able to access the container's
filesystem (through /proc/<PID>/root).


With namespacing in place, then we wouldn't need the apport relay trick
on the host, so that'd be fine.

And otherwise, making this behavior optional (different pattern or
something) would be fine too (not breaking existing users).

> 
> Suggested-by: Eric W. Biederman <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
> Suggested-by: KOSAKI Motohiro <kosaki.motohiro-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
> 
> Signed-off-by: Zhao Lei <zhaolei-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
> ---
>  fs/coredump.c           | 84 ++++++++++++++++++++++++++++++++++++++++++++++++-
>  include/linux/binfmts.h |  1 +
>  2 files changed, 84 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/coredump.c b/fs/coredump.c
> index 281b768..92dc343 100644
> --- a/fs/coredump.c
> +++ b/fs/coredump.c
> @@ -516,6 +516,8 @@ static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
>  {
>  	struct file *files[2];
>  	struct coredump_params *cp = (struct coredump_params *)info->data;
> +	struct task_struct *base_task;
> +
>  	int err = create_pipe_files(files, 0);
>  	if (err)
>  		return err;
> @@ -524,10 +526,78 @@ static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
>  
>  	err = replace_fd(0, files[0], 0);
>  	fput(files[0]);
> +	if (err)
> +		return err;
> +
>  	/* and disallow core files too */
>  	current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1};
>  
> -	return err;
> +	base_task = cp->base_task;
> +	if (base_task) {
> +		const struct cred *base_cred;
> +
> +		/* Set fs_root to base_task */
> +		spin_lock(&base_task->fs->lock);
> +		set_fs_root(current->fs, &base_task->fs->root);
> +		spin_unlock(&base_task->fs->lock);
> +
> +		/* Set namespaces to base_task */
> +		switch_task_namespaces(current, base_task->nsproxy);
> +
> +		/* Set cgroup to base_task */
> +		current->flags &= ~PF_NO_SETAFFINITY;
> +		err = cgroup_attach_task_all(base_task, current);
> +		if (err < 0)
> +			return err;
> +
> +		/* Set cred to base_task */
> +		base_cred = get_task_cred(base_task);
> +
> +		new->uid   = base_cred->uid;
> +		new->gid   = base_cred->gid;
> +		new->suid  = base_cred->suid;
> +		new->sgid  = base_cred->sgid;
> +		new->euid  = base_cred->euid;
> +		new->egid  = base_cred->egid;
> +		new->fsuid = base_cred->fsuid;
> +		new->fsgid = base_cred->fsgid;
> +
> +		new->securebits = base_cred->securebits;
> +
> +		new->cap_inheritable = base_cred->cap_inheritable;
> +		new->cap_permitted   = base_cred->cap_permitted;
> +		new->cap_effective   = base_cred->cap_effective;
> +		new->cap_bset        = base_cred->cap_bset;
> +		new->cap_ambient     = base_cred->cap_ambient;
> +
> +		security_cred_free(new);
> +#ifdef CONFIG_SECURITY
> +		new->security = NULL;
> +#endif
> +		err = security_prepare_creds(new, base_cred, GFP_KERNEL);
> +		if (err < 0) {
> +			put_cred(base_cred);
> +			return err;
> +		}
> +
> +		free_uid(new->user);
> +		new->user = base_cred->user;
> +		get_uid(new->user);
> +
> +		put_user_ns(new->user_ns);
> +		new->user_ns = base_cred->user_ns;
> +		get_user_ns(new->user_ns);
> +
> +		put_group_info(new->group_info);
> +		new->group_info = base_cred->group_info;
> +		get_group_info(new->group_info);
> +
> +		put_cred(base_cred);
> +
> +		validate_creds(new);
> +	}
> +
> +	return 0;
>  }
>  
>  void do_coredump(const siginfo_t *siginfo)
> @@ -590,6 +660,7 @@ void do_coredump(const siginfo_t *siginfo)
>  
>  	if (ispipe) {
>  		int dump_count;
> +		struct task_struct *vinit_task;
>  		char **helper_argv;
>  		struct subprocess_info *sub_info;
>  
> @@ -631,6 +702,12 @@ void do_coredump(const siginfo_t *siginfo)
>  			goto fail_dropcount;
>  		}
>  
> +		vinit_task = find_task_by_vpid(1);
> +		if (!vinit_task) {
> +			printk(KERN_WARNING "failed getting init task info, skipping core dump\n");
> +			goto fail_dropcount;
> +		}
> +
>  		helper_argv = argv_split(GFP_KERNEL, cn.corename, NULL);
>  		if (!helper_argv) {
>  			printk(KERN_WARNING "%s failed to allocate memory\n",
> @@ -638,6 +715,10 @@ void do_coredump(const siginfo_t *siginfo)
>  			goto fail_dropcount;
>  		}
>  
> +		get_task_struct(vinit_task);
> +
> +		cprm.base_task = vinit_task;
> +
>  		retval = -ENOMEM;
>  		sub_info = call_usermodehelper_setup(helper_argv[0],
>  						helper_argv, NULL, GFP_KERNEL,
> @@ -646,6 +727,7 @@ void do_coredump(const siginfo_t *siginfo)
>  			retval = call_usermodehelper_exec(sub_info,
>  							  UMH_WAIT_EXEC);
>  
> +		put_task_struct(vinit_task);
>  		argv_free(helper_argv);
>  		if (retval) {
>  			printk(KERN_INFO "Core dump to |%s pipe failed\n",
> diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
> index 314b3ca..0c9a72c 100644
> --- a/include/linux/binfmts.h
> +++ b/include/linux/binfmts.h
> @@ -59,6 +59,7 @@ struct linux_binprm {
>  
>  /* Function parameter for binfmt->coredump */
>  struct coredump_params {
> +	struct task_struct *base_task;
>  	const siginfo_t *siginfo;
>  	struct pt_regs *regs;
>  	struct file *file;
> -- 
> 1.8.5.1
> 
> 
> 
> _______________________________________________
> Containers mailing list
> Containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org
> https://lists.linuxfoundation.org/mailman/listinfo/containers

-- 
Stéphane Graber
Ubuntu developer
http://www.ubuntu.com

[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

[-- Attachment #2: Type: text/plain, Size: 205 bytes --]

_______________________________________________
Containers mailing list
Containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org
https://lists.linuxfoundation.org/mailman/listinfo/containers

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] [RFC] Limit dump_pipe program's permission to init for container
@ 2016-07-08 15:26     ` Stéphane Graber
  0 siblings, 0 replies; 6+ messages in thread
From: Stéphane Graber @ 2016-07-08 15:26 UTC (permalink / raw)
  To: Zhao Lei; +Cc: linux-kernel, containers, Eric W. Biederman

[-- Attachment #1: Type: text/plain, Size: 7721 bytes --]

On Fri, Jul 08, 2016 at 07:08:10PM +0800, Zhao Lei wrote:
> Currently when we set core_pattern to a pipe, the pipe program is
> forked by kthread running with root's permission, and write dumpfile
> into host's filesystem.
> Same thing happened for container, the dumper and dumpfile are also
> in host(not in container).
> 
> It have following program:
> 1: Not consistent with file_type core_pattern
>    When we set core_pattern to a file, the container will write dump
>    into container's filesystem instead of host.
> 2: Not safe for privileged container
>    In a privileged container, user can destroy host system by following
>    command:
>    # # In a container
>    # echo "|/bin/dd of=/boot/vmlinuz" >/proc/sys/kernel/core_pattern
>    # make_dump
> 
> This patch switch dumper program's permission to init task, it is to say,
> for container, dumper program have same permission with init task in
> container, which make dumper program put in container's filesystem, and
> write coredump into container's filesystem.
> The dumper's permission is also limited into subset of container's init
> process.
> 
> See following discussion for detail:
> http://www.gossamer-threads.com/lists/linux/kernel/2455363
> http://www.gossamer-threads.com/lists/linux/kernel/2397602
> 
> Todo:
> 1: Set different core_pattern to host and each container.
> 2: Keep compatibility with current design
> All above are done in previous version of this patch,
> need some restruct.
> 
> Any suggestion is welcome for this patch.


Ok, so those two todo items are I think absolute musts before we can
consider any of this.

Changing the default behavior that we have had ever since pipe in
core_pattern and namespaces have existed would cause serious problem for
existing users.


Speaking for myself, LXC does setup limitations on privileged containers
which prevent them from changing the core_pattern so that it can't be
abused for privileged containers.
The core_pattern on Ubuntu then sends all crashes to apport, as root, on
the host. Apport is then container aware and handles forwarding the
crash (through a unix socket) to the apport process inside the
container.

Your suggested change would likely make this all fail as apport on the
host must be called as root to be able to access the container's
filesystem (through /proc/<PID>/root).


With namespacing in place, then we wouldn't need the apport relay trick
on the host, so that'd be fine.

And otherwise, making this behavior optional (different pattern or
something) would be fine too (not breaking existing users).

> 
> Suggested-by: Eric W. Biederman <ebiederm@xmission.com>
> Suggested-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
> 
> Signed-off-by: Zhao Lei <zhaolei@cn.fujitsu.com>
> ---
>  fs/coredump.c           | 84 ++++++++++++++++++++++++++++++++++++++++++++++++-
>  include/linux/binfmts.h |  1 +
>  2 files changed, 84 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/coredump.c b/fs/coredump.c
> index 281b768..92dc343 100644
> --- a/fs/coredump.c
> +++ b/fs/coredump.c
> @@ -516,6 +516,8 @@ static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
>  {
>  	struct file *files[2];
>  	struct coredump_params *cp = (struct coredump_params *)info->data;
> +	struct task_struct *base_task;
> +
>  	int err = create_pipe_files(files, 0);
>  	if (err)
>  		return err;
> @@ -524,10 +526,78 @@ static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
>  
>  	err = replace_fd(0, files[0], 0);
>  	fput(files[0]);
> +	if (err)
> +		return err;
> +
>  	/* and disallow core files too */
>  	current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1};
>  
> -	return err;
> +	base_task = cp->base_task;
> +	if (base_task) {
> +		const struct cred *base_cred;
> +
> +		/* Set fs_root to base_task */
> +		spin_lock(&base_task->fs->lock);
> +		set_fs_root(current->fs, &base_task->fs->root);
> +		spin_unlock(&base_task->fs->lock);
> +
> +		/* Set namespaces to base_task */
> +		switch_task_namespaces(current, base_task->nsproxy);
> +
> +		/* Set cgroup to base_task */
> +		current->flags &= ~PF_NO_SETAFFINITY;
> +		err = cgroup_attach_task_all(base_task, current);
> +		if (err < 0)
> +			return err;
> +
> +		/* Set cred to base_task */
> +		base_cred = get_task_cred(base_task);
> +
> +		new->uid   = base_cred->uid;
> +		new->gid   = base_cred->gid;
> +		new->suid  = base_cred->suid;
> +		new->sgid  = base_cred->sgid;
> +		new->euid  = base_cred->euid;
> +		new->egid  = base_cred->egid;
> +		new->fsuid = base_cred->fsuid;
> +		new->fsgid = base_cred->fsgid;
> +
> +		new->securebits = base_cred->securebits;
> +
> +		new->cap_inheritable = base_cred->cap_inheritable;
> +		new->cap_permitted   = base_cred->cap_permitted;
> +		new->cap_effective   = base_cred->cap_effective;
> +		new->cap_bset        = base_cred->cap_bset;
> +		new->cap_ambient     = base_cred->cap_ambient;
> +
> +		security_cred_free(new);
> +#ifdef CONFIG_SECURITY
> +		new->security = NULL;
> +#endif
> +		err = security_prepare_creds(new, base_cred, GFP_KERNEL);
> +		if (err < 0) {
> +			put_cred(base_cred);
> +			return err;
> +		}
> +
> +		free_uid(new->user);
> +		new->user = base_cred->user;
> +		get_uid(new->user);
> +
> +		put_user_ns(new->user_ns);
> +		new->user_ns = base_cred->user_ns;
> +		get_user_ns(new->user_ns);
> +
> +		put_group_info(new->group_info);
> +		new->group_info = base_cred->group_info;
> +		get_group_info(new->group_info);
> +
> +		put_cred(base_cred);
> +
> +		validate_creds(new);
> +	}
> +
> +	return 0;
>  }
>  
>  void do_coredump(const siginfo_t *siginfo)
> @@ -590,6 +660,7 @@ void do_coredump(const siginfo_t *siginfo)
>  
>  	if (ispipe) {
>  		int dump_count;
> +		struct task_struct *vinit_task;
>  		char **helper_argv;
>  		struct subprocess_info *sub_info;
>  
> @@ -631,6 +702,12 @@ void do_coredump(const siginfo_t *siginfo)
>  			goto fail_dropcount;
>  		}
>  
> +		vinit_task = find_task_by_vpid(1);
> +		if (!vinit_task) {
> +			printk(KERN_WARNING "failed getting init task info, skipping core dump\n");
> +			goto fail_dropcount;
> +		}
> +
>  		helper_argv = argv_split(GFP_KERNEL, cn.corename, NULL);
>  		if (!helper_argv) {
>  			printk(KERN_WARNING "%s failed to allocate memory\n",
> @@ -638,6 +715,10 @@ void do_coredump(const siginfo_t *siginfo)
>  			goto fail_dropcount;
>  		}
>  
> +		get_task_struct(vinit_task);
> +
> +		cprm.base_task = vinit_task;
> +
>  		retval = -ENOMEM;
>  		sub_info = call_usermodehelper_setup(helper_argv[0],
>  						helper_argv, NULL, GFP_KERNEL,
> @@ -646,6 +727,7 @@ void do_coredump(const siginfo_t *siginfo)
>  			retval = call_usermodehelper_exec(sub_info,
>  							  UMH_WAIT_EXEC);
>  
> +		put_task_struct(vinit_task);
>  		argv_free(helper_argv);
>  		if (retval) {
>  			printk(KERN_INFO "Core dump to |%s pipe failed\n",
> diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
> index 314b3ca..0c9a72c 100644
> --- a/include/linux/binfmts.h
> +++ b/include/linux/binfmts.h
> @@ -59,6 +59,7 @@ struct linux_binprm {
>  
>  /* Function parameter for binfmt->coredump */
>  struct coredump_params {
> +	struct task_struct *base_task;
>  	const siginfo_t *siginfo;
>  	struct pt_regs *regs;
>  	struct file *file;
> -- 
> 1.8.5.1
> 
> 
> 
> _______________________________________________
> Containers mailing list
> Containers@lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/containers

-- 
Stéphane Graber
Ubuntu developer
http://www.ubuntu.com

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 6+ messages in thread

* RE: [PATCH] [RFC] Limit dump_pipe program's permission to init for container
  2016-07-08 15:26     ` Stéphane Graber
@ 2016-07-12 14:18       ` Zhao Lei
  -1 siblings, 0 replies; 6+ messages in thread
From: Zhao Lei @ 2016-07-12 14:18 UTC (permalink / raw)
  To: 'Stéphane Graber'
  Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, 'Eric W. Biederman'

Hi, Stéphane Graber

Many thanks for your review!

> -----Original Message-----
> From: Stéphane Graber [mailto:stgraber@ubuntu.com]
> Sent: Friday, July 08, 2016 11:26 PM
> To: Zhao Lei <zhaolei@cn.fujitsu.com>
> Cc: linux-kernel@vger.kernel.org; containers@lists.linux-foundation.org; Eric W.
> Biederman <ebiederm@xmission.com>
> Subject: Re: [PATCH] [RFC] Limit dump_pipe program's permission to init for
> container
> 
> On Fri, Jul 08, 2016 at 07:08:10PM +0800, Zhao Lei wrote:
> > Currently when we set core_pattern to a pipe, the pipe program is
> > forked by kthread running with root's permission, and write dumpfile
> > into host's filesystem.
> > Same thing happened for container, the dumper and dumpfile are also
> > in host(not in container).
> >
> > It have following program:
> > 1: Not consistent with file_type core_pattern
> >    When we set core_pattern to a file, the container will write dump
> >    into container's filesystem instead of host.
> > 2: Not safe for privileged container
> >    In a privileged container, user can destroy host system by following
> >    command:
> >    # # In a container
> >    # echo "|/bin/dd of=/boot/vmlinuz" >/proc/sys/kernel/core_pattern
> >    # make_dump
> >
> > This patch switch dumper program's permission to init task, it is to say,
> > for container, dumper program have same permission with init task in
> > container, which make dumper program put in container's filesystem, and
> > write coredump into container's filesystem.
> > The dumper's permission is also limited into subset of container's init
> > process.
> >
> > See following discussion for detail:
> > http://www.gossamer-threads.com/lists/linux/kernel/2455363
> > http://www.gossamer-threads.com/lists/linux/kernel/2397602
> >
> > Todo:
> > 1: Set different core_pattern to host and each container.
> > 2: Keep compatibility with current design
> > All above are done in previous version of this patch,
> > need some restruct.
> >
> > Any suggestion is welcome for this patch.
> 
> 
> Ok, so those two todo items are I think absolute musts before we can
> consider any of this.
> 
> Changing the default behavior that we have had ever since pipe in
> core_pattern and namespaces have existed would cause serious problem for
> existing users.
> 
> 
> Speaking for myself, LXC does setup limitations on privileged containers
> which prevent them from changing the core_pattern so that it can't be
> abused for privileged containers.
> The core_pattern on Ubuntu then sends all crashes to apport, as root, on
> the host. Apport is then container aware and handles forwarding the
> crash (through a unix socket) to the apport process inside the
> container.
> 
> Your suggested change would likely make this all fail as apport on the
> host must be called as root to be able to access the container's
> filesystem (through /proc/<PID>/root).
> 
> 
> With namespacing in place, then we wouldn't need the apport relay trick
> on the host, so that'd be fine.
> 
Yes, keeping compatibility with existed container program
is important, we can not break these tools.

The compatibility problem is discussed in previous impliment of this
patch before, from:
http://www.gossamer-threads.com/lists/linux/kernel/2399347#2399347
to
http://www.gossamer-threads.com/lists/linux/kernel/2400776#2400776

We get this way in discusstion: Use new behavior(dump into container)
only if user re-set core_pattern in container.
And for lxc, because core_pattern is set by host, the dump will
keeping old behavior.

It is already done in the previous patch, and it can be move to
this patch, I'll start it.

> And otherwise, making this behavior optional (different pattern or
> something) would be fine too (not breaking existing users).
>
Yes, it is a good idea, it give us another selection for
compatibility problem.

Thanks
Zhaolei

> >
> > Suggested-by: Eric W. Biederman <ebiederm@xmission.com>
> > Suggested-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
> >
> > Signed-off-by: Zhao Lei <zhaolei@cn.fujitsu.com>
> > ---
> >  fs/coredump.c           | 84
> ++++++++++++++++++++++++++++++++++++++++++++++++-
> >  include/linux/binfmts.h |  1 +
> >  2 files changed, 84 insertions(+), 1 deletion(-)
> >
> > diff --git a/fs/coredump.c b/fs/coredump.c
> > index 281b768..92dc343 100644
> > --- a/fs/coredump.c
> > +++ b/fs/coredump.c
> > @@ -516,6 +516,8 @@ static int umh_pipe_setup(struct subprocess_info
> *info, struct cred *new)
> >  {
> >  	struct file *files[2];
> >  	struct coredump_params *cp = (struct coredump_params *)info->data;
> > +	struct task_struct *base_task;
> > +
> >  	int err = create_pipe_files(files, 0);
> >  	if (err)
> >  		return err;
> > @@ -524,10 +526,78 @@ static int umh_pipe_setup(struct subprocess_info
> *info, struct cred *new)
> >
> >  	err = replace_fd(0, files[0], 0);
> >  	fput(files[0]);
> > +	if (err)
> > +		return err;
> > +
> >  	/* and disallow core files too */
> >  	current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1};
> >
> > -	return err;
> > +	base_task = cp->base_task;
> > +	if (base_task) {
> > +		const struct cred *base_cred;
> > +
> > +		/* Set fs_root to base_task */
> > +		spin_lock(&base_task->fs->lock);
> > +		set_fs_root(current->fs, &base_task->fs->root);
> > +		spin_unlock(&base_task->fs->lock);
> > +
> > +		/* Set namespaces to base_task */
> > +		switch_task_namespaces(current, base_task->nsproxy);
> > +
> > +		/* Set cgroup to base_task */
> > +		current->flags &= ~PF_NO_SETAFFINITY;
> > +		err = cgroup_attach_task_all(base_task, current);
> > +		if (err < 0)
> > +			return err;
> > +
> > +		/* Set cred to base_task */
> > +		base_cred = get_task_cred(base_task);
> > +
> > +		new->uid   = base_cred->uid;
> > +		new->gid   = base_cred->gid;
> > +		new->suid  = base_cred->suid;
> > +		new->sgid  = base_cred->sgid;
> > +		new->euid  = base_cred->euid;
> > +		new->egid  = base_cred->egid;
> > +		new->fsuid = base_cred->fsuid;
> > +		new->fsgid = base_cred->fsgid;
> > +
> > +		new->securebits = base_cred->securebits;
> > +
> > +		new->cap_inheritable = base_cred->cap_inheritable;
> > +		new->cap_permitted   = base_cred->cap_permitted;
> > +		new->cap_effective   = base_cred->cap_effective;
> > +		new->cap_bset        = base_cred->cap_bset;
> > +		new->cap_ambient     = base_cred->cap_ambient;
> > +
> > +		security_cred_free(new);
> > +#ifdef CONFIG_SECURITY
> > +		new->security = NULL;
> > +#endif
> > +		err = security_prepare_creds(new, base_cred, GFP_KERNEL);
> > +		if (err < 0) {
> > +			put_cred(base_cred);
> > +			return err;
> > +		}
> > +
> > +		free_uid(new->user);
> > +		new->user = base_cred->user;
> > +		get_uid(new->user);
> > +
> > +		put_user_ns(new->user_ns);
> > +		new->user_ns = base_cred->user_ns;
> > +		get_user_ns(new->user_ns);
> > +
> > +		put_group_info(new->group_info);
> > +		new->group_info = base_cred->group_info;
> > +		get_group_info(new->group_info);
> > +
> > +		put_cred(base_cred);
> > +
> > +		validate_creds(new);
> > +	}
> > +
> > +	return 0;
> >  }
> >
> >  void do_coredump(const siginfo_t *siginfo)
> > @@ -590,6 +660,7 @@ void do_coredump(const siginfo_t *siginfo)
> >
> >  	if (ispipe) {
> >  		int dump_count;
> > +		struct task_struct *vinit_task;
> >  		char **helper_argv;
> >  		struct subprocess_info *sub_info;
> >
> > @@ -631,6 +702,12 @@ void do_coredump(const siginfo_t *siginfo)
> >  			goto fail_dropcount;
> >  		}
> >
> > +		vinit_task = find_task_by_vpid(1);
> > +		if (!vinit_task) {
> > +			printk(KERN_WARNING "failed getting init task info, skipping
> core dump\n");
> > +			goto fail_dropcount;
> > +		}
> > +
> >  		helper_argv = argv_split(GFP_KERNEL, cn.corename, NULL);
> >  		if (!helper_argv) {
> >  			printk(KERN_WARNING "%s failed to allocate memory\n",
> > @@ -638,6 +715,10 @@ void do_coredump(const siginfo_t *siginfo)
> >  			goto fail_dropcount;
> >  		}
> >
> > +		get_task_struct(vinit_task);
> > +
> > +		cprm.base_task = vinit_task;
> > +
> >  		retval = -ENOMEM;
> >  		sub_info = call_usermodehelper_setup(helper_argv[0],
> >  						helper_argv, NULL, GFP_KERNEL,
> > @@ -646,6 +727,7 @@ void do_coredump(const siginfo_t *siginfo)
> >  			retval = call_usermodehelper_exec(sub_info,
> >  							  UMH_WAIT_EXEC);
> >
> > +		put_task_struct(vinit_task);
> >  		argv_free(helper_argv);
> >  		if (retval) {
> >  			printk(KERN_INFO "Core dump to |%s pipe failed\n",
> > diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
> > index 314b3ca..0c9a72c 100644
> > --- a/include/linux/binfmts.h
> > +++ b/include/linux/binfmts.h
> > @@ -59,6 +59,7 @@ struct linux_binprm {
> >
> >  /* Function parameter for binfmt->coredump */
> >  struct coredump_params {
> > +	struct task_struct *base_task;
> >  	const siginfo_t *siginfo;
> >  	struct pt_regs *regs;
> >  	struct file *file;
> > --
> > 1.8.5.1
> >
> >
> >
> > _______________________________________________
> > Containers mailing list
> > Containers@lists.linux-foundation.org
> > https://lists.linuxfoundation.org/mailman/listinfo/containers
> 
> --
> Stéphane Graber
> Ubuntu developer
> http://www.ubuntu.com



_______________________________________________
Containers mailing list
Containers@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/containers

^ permalink raw reply	[flat|nested] 6+ messages in thread

* RE: [PATCH] [RFC] Limit dump_pipe program's permission to init for container
@ 2016-07-12 14:18       ` Zhao Lei
  0 siblings, 0 replies; 6+ messages in thread
From: Zhao Lei @ 2016-07-12 14:18 UTC (permalink / raw)
  To: 'Stéphane Graber'
  Cc: linux-kernel, containers, 'Eric W. Biederman'

Hi, Stéphane Graber

Many thanks for your review!

> -----Original Message-----
> From: Stéphane Graber [mailto:stgraber@ubuntu.com]
> Sent: Friday, July 08, 2016 11:26 PM
> To: Zhao Lei <zhaolei@cn.fujitsu.com>
> Cc: linux-kernel@vger.kernel.org; containers@lists.linux-foundation.org; Eric W.
> Biederman <ebiederm@xmission.com>
> Subject: Re: [PATCH] [RFC] Limit dump_pipe program's permission to init for
> container
> 
> On Fri, Jul 08, 2016 at 07:08:10PM +0800, Zhao Lei wrote:
> > Currently when we set core_pattern to a pipe, the pipe program is
> > forked by kthread running with root's permission, and write dumpfile
> > into host's filesystem.
> > Same thing happened for container, the dumper and dumpfile are also
> > in host(not in container).
> >
> > It have following program:
> > 1: Not consistent with file_type core_pattern
> >    When we set core_pattern to a file, the container will write dump
> >    into container's filesystem instead of host.
> > 2: Not safe for privileged container
> >    In a privileged container, user can destroy host system by following
> >    command:
> >    # # In a container
> >    # echo "|/bin/dd of=/boot/vmlinuz" >/proc/sys/kernel/core_pattern
> >    # make_dump
> >
> > This patch switch dumper program's permission to init task, it is to say,
> > for container, dumper program have same permission with init task in
> > container, which make dumper program put in container's filesystem, and
> > write coredump into container's filesystem.
> > The dumper's permission is also limited into subset of container's init
> > process.
> >
> > See following discussion for detail:
> > http://www.gossamer-threads.com/lists/linux/kernel/2455363
> > http://www.gossamer-threads.com/lists/linux/kernel/2397602
> >
> > Todo:
> > 1: Set different core_pattern to host and each container.
> > 2: Keep compatibility with current design
> > All above are done in previous version of this patch,
> > need some restruct.
> >
> > Any suggestion is welcome for this patch.
> 
> 
> Ok, so those two todo items are I think absolute musts before we can
> consider any of this.
> 
> Changing the default behavior that we have had ever since pipe in
> core_pattern and namespaces have existed would cause serious problem for
> existing users.
> 
> 
> Speaking for myself, LXC does setup limitations on privileged containers
> which prevent them from changing the core_pattern so that it can't be
> abused for privileged containers.
> The core_pattern on Ubuntu then sends all crashes to apport, as root, on
> the host. Apport is then container aware and handles forwarding the
> crash (through a unix socket) to the apport process inside the
> container.
> 
> Your suggested change would likely make this all fail as apport on the
> host must be called as root to be able to access the container's
> filesystem (through /proc/<PID>/root).
> 
> 
> With namespacing in place, then we wouldn't need the apport relay trick
> on the host, so that'd be fine.
> 
Yes, keeping compatibility with existed container program
is important, we can not break these tools.

The compatibility problem is discussed in previous impliment of this
patch before, from:
http://www.gossamer-threads.com/lists/linux/kernel/2399347#2399347
to
http://www.gossamer-threads.com/lists/linux/kernel/2400776#2400776

We get this way in discusstion: Use new behavior(dump into container)
only if user re-set core_pattern in container.
And for lxc, because core_pattern is set by host, the dump will
keeping old behavior.

It is already done in the previous patch, and it can be move to
this patch, I'll start it.

> And otherwise, making this behavior optional (different pattern or
> something) would be fine too (not breaking existing users).
>
Yes, it is a good idea, it give us another selection for
compatibility problem.

Thanks
Zhaolei

> >
> > Suggested-by: Eric W. Biederman <ebiederm@xmission.com>
> > Suggested-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
> >
> > Signed-off-by: Zhao Lei <zhaolei@cn.fujitsu.com>
> > ---
> >  fs/coredump.c           | 84
> ++++++++++++++++++++++++++++++++++++++++++++++++-
> >  include/linux/binfmts.h |  1 +
> >  2 files changed, 84 insertions(+), 1 deletion(-)
> >
> > diff --git a/fs/coredump.c b/fs/coredump.c
> > index 281b768..92dc343 100644
> > --- a/fs/coredump.c
> > +++ b/fs/coredump.c
> > @@ -516,6 +516,8 @@ static int umh_pipe_setup(struct subprocess_info
> *info, struct cred *new)
> >  {
> >  	struct file *files[2];
> >  	struct coredump_params *cp = (struct coredump_params *)info->data;
> > +	struct task_struct *base_task;
> > +
> >  	int err = create_pipe_files(files, 0);
> >  	if (err)
> >  		return err;
> > @@ -524,10 +526,78 @@ static int umh_pipe_setup(struct subprocess_info
> *info, struct cred *new)
> >
> >  	err = replace_fd(0, files[0], 0);
> >  	fput(files[0]);
> > +	if (err)
> > +		return err;
> > +
> >  	/* and disallow core files too */
> >  	current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1};
> >
> > -	return err;
> > +	base_task = cp->base_task;
> > +	if (base_task) {
> > +		const struct cred *base_cred;
> > +
> > +		/* Set fs_root to base_task */
> > +		spin_lock(&base_task->fs->lock);
> > +		set_fs_root(current->fs, &base_task->fs->root);
> > +		spin_unlock(&base_task->fs->lock);
> > +
> > +		/* Set namespaces to base_task */
> > +		switch_task_namespaces(current, base_task->nsproxy);
> > +
> > +		/* Set cgroup to base_task */
> > +		current->flags &= ~PF_NO_SETAFFINITY;
> > +		err = cgroup_attach_task_all(base_task, current);
> > +		if (err < 0)
> > +			return err;
> > +
> > +		/* Set cred to base_task */
> > +		base_cred = get_task_cred(base_task);
> > +
> > +		new->uid   = base_cred->uid;
> > +		new->gid   = base_cred->gid;
> > +		new->suid  = base_cred->suid;
> > +		new->sgid  = base_cred->sgid;
> > +		new->euid  = base_cred->euid;
> > +		new->egid  = base_cred->egid;
> > +		new->fsuid = base_cred->fsuid;
> > +		new->fsgid = base_cred->fsgid;
> > +
> > +		new->securebits = base_cred->securebits;
> > +
> > +		new->cap_inheritable = base_cred->cap_inheritable;
> > +		new->cap_permitted   = base_cred->cap_permitted;
> > +		new->cap_effective   = base_cred->cap_effective;
> > +		new->cap_bset        = base_cred->cap_bset;
> > +		new->cap_ambient     = base_cred->cap_ambient;
> > +
> > +		security_cred_free(new);
> > +#ifdef CONFIG_SECURITY
> > +		new->security = NULL;
> > +#endif
> > +		err = security_prepare_creds(new, base_cred, GFP_KERNEL);
> > +		if (err < 0) {
> > +			put_cred(base_cred);
> > +			return err;
> > +		}
> > +
> > +		free_uid(new->user);
> > +		new->user = base_cred->user;
> > +		get_uid(new->user);
> > +
> > +		put_user_ns(new->user_ns);
> > +		new->user_ns = base_cred->user_ns;
> > +		get_user_ns(new->user_ns);
> > +
> > +		put_group_info(new->group_info);
> > +		new->group_info = base_cred->group_info;
> > +		get_group_info(new->group_info);
> > +
> > +		put_cred(base_cred);
> > +
> > +		validate_creds(new);
> > +	}
> > +
> > +	return 0;
> >  }
> >
> >  void do_coredump(const siginfo_t *siginfo)
> > @@ -590,6 +660,7 @@ void do_coredump(const siginfo_t *siginfo)
> >
> >  	if (ispipe) {
> >  		int dump_count;
> > +		struct task_struct *vinit_task;
> >  		char **helper_argv;
> >  		struct subprocess_info *sub_info;
> >
> > @@ -631,6 +702,12 @@ void do_coredump(const siginfo_t *siginfo)
> >  			goto fail_dropcount;
> >  		}
> >
> > +		vinit_task = find_task_by_vpid(1);
> > +		if (!vinit_task) {
> > +			printk(KERN_WARNING "failed getting init task info, skipping
> core dump\n");
> > +			goto fail_dropcount;
> > +		}
> > +
> >  		helper_argv = argv_split(GFP_KERNEL, cn.corename, NULL);
> >  		if (!helper_argv) {
> >  			printk(KERN_WARNING "%s failed to allocate memory\n",
> > @@ -638,6 +715,10 @@ void do_coredump(const siginfo_t *siginfo)
> >  			goto fail_dropcount;
> >  		}
> >
> > +		get_task_struct(vinit_task);
> > +
> > +		cprm.base_task = vinit_task;
> > +
> >  		retval = -ENOMEM;
> >  		sub_info = call_usermodehelper_setup(helper_argv[0],
> >  						helper_argv, NULL, GFP_KERNEL,
> > @@ -646,6 +727,7 @@ void do_coredump(const siginfo_t *siginfo)
> >  			retval = call_usermodehelper_exec(sub_info,
> >  							  UMH_WAIT_EXEC);
> >
> > +		put_task_struct(vinit_task);
> >  		argv_free(helper_argv);
> >  		if (retval) {
> >  			printk(KERN_INFO "Core dump to |%s pipe failed\n",
> > diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
> > index 314b3ca..0c9a72c 100644
> > --- a/include/linux/binfmts.h
> > +++ b/include/linux/binfmts.h
> > @@ -59,6 +59,7 @@ struct linux_binprm {
> >
> >  /* Function parameter for binfmt->coredump */
> >  struct coredump_params {
> > +	struct task_struct *base_task;
> >  	const siginfo_t *siginfo;
> >  	struct pt_regs *regs;
> >  	struct file *file;
> > --
> > 1.8.5.1
> >
> >
> >
> > _______________________________________________
> > Containers mailing list
> > Containers@lists.linux-foundation.org
> > https://lists.linuxfoundation.org/mailman/listinfo/containers
> 
> --
> Stéphane Graber
> Ubuntu developer
> http://www.ubuntu.com

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2016-07-12 14:18 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-07-08 11:08 [PATCH] [RFC] Limit dump_pipe program's permission to init for container Zhao Lei
2016-07-08 11:08 ` Zhao Lei
     [not found] ` <ebe572465ba7c6d12004b62ed752cc5804bf13b4.1467975799.git.zhaolei-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
2016-07-08 15:26   ` Stéphane Graber
2016-07-08 15:26     ` Stéphane Graber
2016-07-12 14:18     ` Zhao Lei
2016-07-12 14:18       ` Zhao Lei

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.