From: Andrei Vagin <avagin@virtuozzo.com>
To: David Howells <dhowells@redhat.com>
Cc: viro@zeniv.linux.org.uk, linux-nfs@vger.kernel.org,
linux-kernel@vger.kernel.org,
linux-security-module@vger.kernel.org,
linux-fsdevel@vger.kernel.org, linux-afs@lists.infradead.org
Subject: Re: [12/24] proc: Add fs_context support to procfs [ver #7]
Date: Mon, 18 Jun 2018 20:34:51 -0700 [thread overview]
Message-ID: <20180619033450.GA11639@outlook.office365.com> (raw)
In-Reply-To: <152414474815.23902.6952548431423168966.stgit@warthog.procyon.org.uk>
[-- Attachment #1: Type: text/plain, Size: 10050 bytes --]
Hi David,
We run CRIU tests for vfs/for-next, and today a few of these test failed. I
found that the problem appears after this patch..
https://travis-ci.org/avagin/linux/jobs/393766778
The reproducer is attached. It creates a process in a new set of namespaces
(user, mount, etc) and then this process fails to mount procfs, the mount
syscall returns EBUSY.
666 pipe([3, 4]) = 0
666 clone(child_stack=0x7ffc23a89400, flags=CLONE_NEWNS|CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWUSER|CLONE_NEWPID|CLONE_NEWNET|SIGCHLD) = 667
666 openat(AT_FDCWD, "/proc/667/uid_map", O_WRONLY <unfinished ...>
667 close(4 <unfinished ...>
666 <... openat resumed> ) = 5
666 write(5, "0 100000 100000\n100000 200000 50"..., 36 <unfinished ...>
667 <... close resumed> ) = 0
666 <... write resumed> ) = 36
666 close(5 <unfinished ...>
667 read(3, <unfinished ...>
666 <... close resumed> ) = 0
666 openat(AT_FDCWD, "/proc/667/gid_map", O_WRONLY) = 5
666 write(5, "0 400000 50000\n50000 500000 1000"..., 35) = 35
666 close(5) = 0
666 write(4, " \225\250#", 4) = 4
667 <... read resumed> " \225\250#", 4) = 4
666 wait4(667, <unfinished ...>
667 setsid() = 1
667 setuid(0) = 0
667 setgid(0) = 0
667 setgroups(0, NULL) = 0
667 mount("proc", "/mnt", "proc", MS_MGC_VAL|MS_NOSUID|MS_NODEV|MS_NOEXEC, NULL) = -1 EBUSY (Device or resource busy)
Thanks,
Andrei
On Thu, Apr 19, 2018 at 02:32:28PM +0100, David Howells wrote:
> Add fs_context support to procfs.
>
> Signed-off-by: David Howells <dhowells@redhat.com>
> ---
>
> fs/proc/inode.c | 2 -
> fs/proc/internal.h | 2 -
> fs/proc/root.c | 169 ++++++++++++++++++++++++++++++++++------------------
> 3 files changed, 113 insertions(+), 60 deletions(-)
>
> diff --git a/fs/proc/inode.c b/fs/proc/inode.c
> index 0b13cf6eb6d7..7aa86dd65ba8 100644
> --- a/fs/proc/inode.c
> +++ b/fs/proc/inode.c
> @@ -128,7 +128,7 @@ const struct super_operations proc_sops = {
> .drop_inode = generic_delete_inode,
> .evict_inode = proc_evict_inode,
> .statfs = simple_statfs,
> - .remount_fs = proc_remount,
> + .reconfigure = proc_reconfigure,
> .show_options = proc_show_options,
> };
>
> diff --git a/fs/proc/internal.h b/fs/proc/internal.h
> index 3182e1b636d3..a5ab9504768a 100644
> --- a/fs/proc/internal.h
> +++ b/fs/proc/internal.h
> @@ -254,7 +254,7 @@ static inline void proc_tty_init(void) {}
> extern struct proc_dir_entry proc_root;
>
> extern void proc_self_init(void);
> -extern int proc_remount(struct super_block *, int *, char *, size_t);
> +extern int proc_reconfigure(struct super_block *, struct fs_context *);
>
> /*
> * task_[no]mmu.c
> diff --git a/fs/proc/root.c b/fs/proc/root.c
> index 2fbc177f37a8..e6bd31fbc714 100644
> --- a/fs/proc/root.c
> +++ b/fs/proc/root.c
> @@ -19,14 +19,24 @@
> #include <linux/module.h>
> #include <linux/bitops.h>
> #include <linux/user_namespace.h>
> +#include <linux/fs_context.h>
> #include <linux/mount.h>
> #include <linux/pid_namespace.h>
> #include <linux/parser.h>
> #include <linux/cred.h>
> #include <linux/magic.h>
> +#include <linux/slab.h>
>
> #include "internal.h"
>
> +struct proc_fs_context {
> + struct fs_context fc;
> + struct pid_namespace *pid_ns;
> + unsigned long mask;
> + int hidepid;
> + int gid;
> +};
> +
> enum {
> Opt_gid, Opt_hidepid, Opt_err,
> };
> @@ -37,56 +47,60 @@ static const match_table_t tokens = {
> {Opt_err, NULL},
> };
>
> -static int proc_parse_options(char *options, struct pid_namespace *pid)
> +static int proc_parse_option(struct fs_context *fc, char *opt, size_t len)
> {
> - char *p;
> + struct proc_fs_context *ctx = container_of(fc, struct proc_fs_context, fc);
> substring_t args[MAX_OPT_ARGS];
> - int option;
> -
> - if (!options)
> - return 1;
> -
> - while ((p = strsep(&options, ",")) != NULL) {
> - int token;
> - if (!*p)
> - continue;
> -
> - args[0].to = args[0].from = NULL;
> - token = match_token(p, tokens, args);
> - switch (token) {
> - case Opt_gid:
> - if (match_int(&args[0], &option))
> - return 0;
> - pid->pid_gid = make_kgid(current_user_ns(), option);
> - break;
> - case Opt_hidepid:
> - if (match_int(&args[0], &option))
> - return 0;
> - if (option < HIDEPID_OFF ||
> - option > HIDEPID_INVISIBLE) {
> - pr_err("proc: hidepid value must be between 0 and 2.\n");
> - return 0;
> - }
> - pid->hide_pid = option;
> - break;
> - default:
> - pr_err("proc: unrecognized mount option \"%s\" "
> - "or missing value\n", p);
> - return 0;
> + int token;
> +
> + args[0].to = args[0].from = NULL;
> + token = match_token(opt, tokens, args);
> + switch (token) {
> + case Opt_gid:
> + if (match_int(&args[0], &ctx->gid))
> + return -EINVAL;
> + break;
> +
> + case Opt_hidepid:
> + if (match_int(&args[0], &ctx->hidepid))
> + return -EINVAL;
> + if (ctx->hidepid < HIDEPID_OFF ||
> + ctx->hidepid > HIDEPID_INVISIBLE) {
> + pr_err("proc: hidepid value must be between 0 and 2.\n");
> + return -EINVAL;
> }
> + break;
> +
> + default:
> + pr_err("proc: unrecognized mount option \"%s\" or missing value\n",
> + opt);
> + return -EINVAL;
> }
>
> - return 1;
> + ctx->mask |= 1 << token;
> + return 0;
> +}
> +
> +static void proc_set_options(struct super_block *s,
> + struct fs_context *fc,
> + struct pid_namespace *pid_ns,
> + struct user_namespace *user_ns)
> +{
> + struct proc_fs_context *ctx = container_of(fc, struct proc_fs_context, fc);
> +
> + if (ctx->mask & (1 << Opt_gid))
> + pid_ns->pid_gid = make_kgid(user_ns, ctx->gid);
> + if (ctx->mask & (1 << Opt_hidepid))
> + pid_ns->hide_pid = ctx->hidepid;
> }
>
> -static int proc_fill_super(struct super_block *s, void *data, size_t data_size, int silent)
> +static int proc_fill_super(struct super_block *s, struct fs_context *fc)
> {
> - struct pid_namespace *ns = get_pid_ns(s->s_fs_info);
> + struct pid_namespace *pid_ns = get_pid_ns(s->s_fs_info);
> struct inode *root_inode;
> int ret;
>
> - if (!proc_parse_options(data, ns))
> - return -EINVAL;
> + proc_set_options(s, fc, pid_ns, current_user_ns());
>
> /* User space would break if executables or devices appear on proc */
> s->s_iflags |= SB_I_USERNS_VISIBLE | SB_I_NOEXEC | SB_I_NODEV;
> @@ -103,7 +117,7 @@ static int proc_fill_super(struct super_block *s, void *data, size_t data_size,
> * top of it
> */
> s->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH;
> -
> +
> pde_get(&proc_root);
> root_inode = proc_get_inode(s, &proc_root);
> if (!root_inode) {
> @@ -124,30 +138,46 @@ static int proc_fill_super(struct super_block *s, void *data, size_t data_size,
> return proc_setup_thread_self(s);
> }
>
> -int proc_remount(struct super_block *sb, int *flags,
> - char *data, size_t data_size)
> +int proc_reconfigure(struct super_block *sb, struct fs_context *fc)
> {
> struct pid_namespace *pid = sb->s_fs_info;
>
> sync_filesystem(sb);
> - return !proc_parse_options(data, pid);
> +
> + if (fc)
> + proc_set_options(sb, fc, pid, current_user_ns());
> + return 0;
> }
>
> -static struct dentry *proc_mount(struct file_system_type *fs_type,
> - int flags, const char *dev_name,
> - void *data, size_t data_size)
> +static int proc_get_tree(struct fs_context *fc)
> {
> - struct pid_namespace *ns;
> + struct proc_fs_context *ctx = container_of(fc, struct proc_fs_context, fc);
>
> - if (flags & SB_KERNMOUNT) {
> - ns = data;
> - data = NULL;
> - } else {
> - ns = task_active_pid_ns(current);
> - }
> + ctx->fc.s_fs_info = ctx->pid_ns;
> + return vfs_get_super(fc, vfs_get_keyed_super, proc_fill_super);
> +}
>
> - return mount_ns(fs_type, flags, data, data_size, ns, ns->user_ns,
> - proc_fill_super);
> +static void proc_fs_context_free(struct fs_context *fc)
> +{
> + struct proc_fs_context *ctx = container_of(fc, struct proc_fs_context, fc);
> +
> + if (ctx->pid_ns)
> + put_pid_ns(ctx->pid_ns);
> +}
> +
> +static const struct fs_context_operations proc_fs_context_ops = {
> + .free = proc_fs_context_free,
> + .parse_option = proc_parse_option,
> + .get_tree = proc_get_tree,
> +};
> +
> +static int proc_init_fs_context(struct fs_context *fc, struct super_block *src_sb)
> +{
> + struct proc_fs_context *ctx = container_of(fc, struct proc_fs_context, fc);
> +
> + ctx->pid_ns = get_pid_ns(task_active_pid_ns(current));
> + ctx->fc.ops = &proc_fs_context_ops;
> + return 0;
> }
>
> static void proc_kill_sb(struct super_block *sb)
> @@ -165,7 +195,8 @@ static void proc_kill_sb(struct super_block *sb)
>
> static struct file_system_type proc_fs_type = {
> .name = "proc",
> - .mount = proc_mount,
> + .fs_context_size = sizeof(struct proc_fs_context),
> + .init_fs_context = proc_init_fs_context,
> .kill_sb = proc_kill_sb,
> .fs_flags = FS_USERNS_MOUNT,
> };
> @@ -205,7 +236,7 @@ static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentr
> {
> if (!proc_pid_lookup(dir, dentry, flags))
> return NULL;
> -
> +
> return proc_lookup(dir, dentry, flags);
> }
>
> @@ -259,9 +290,31 @@ struct proc_dir_entry proc_root = {
>
> int pid_ns_prepare_proc(struct pid_namespace *ns)
> {
> + struct proc_fs_context *ctx;
> + struct fs_context *fc;
> struct vfsmount *mnt;
> + int ret;
> +
> + fc = vfs_new_fs_context(&proc_fs_type, NULL, 0,
> + FS_CONTEXT_FOR_KERNEL_MOUNT);
> + if (IS_ERR(fc))
> + return PTR_ERR(fc);
> +
> + ctx = container_of(fc, struct proc_fs_context, fc);
> + if (ctx->pid_ns != ns) {
> + put_pid_ns(ctx->pid_ns);
> + get_pid_ns(ns);
> + ctx->pid_ns = ns;
> + }
> +
> + ret = vfs_get_tree(fc);
> + if (ret < 0) {
> + put_fs_context(fc);
> + return ret;
> + }
>
> - mnt = kern_mount_data(&proc_fs_type, ns, 0);
> + mnt = vfs_create_mount(fc);
> + put_fs_context(fc);
> if (IS_ERR(mnt))
> return PTR_ERR(mnt);
>
[-- Attachment #2: test.c --]
[-- Type: text/plain, Size: 2265 bytes --]
#define _GNU_SOURCE
#include <sys/types.h>
#include <sched.h>
#include <unistd.h>
#include <stdio.h>
#include <sys/mount.h>
#include <sys/wait.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdlib.h>
#include <grp.h>
#include <linux/limits.h>
#define NS_STACK_SIZE 4096
#define __stack_aligned__ __attribute__((aligned(16)))
/* All arguments should be above stack, because it grows down */
struct ns_exec_args {
char stack[NS_STACK_SIZE] __stack_aligned__;
char stack_ptr[0];
int pfd[2];
};
static int ns_exec(void *_arg)
{
struct ns_exec_args *args = (struct ns_exec_args *) _arg;
int ret;
close(args->pfd[1]);
if (read(args->pfd[0], &ret, sizeof(ret)) != sizeof(ret))
return -1;
setsid();
if (setuid(0) || setgid(0) || setgroups(0, NULL)) {
fprintf(stderr, "set*id failed: %m\n");
return -1;
}
if (mount("proc", "/mnt", "proc", MS_MGC_VAL | MS_NOSUID | MS_NOEXEC | MS_NODEV, NULL)) {
fprintf(stderr, "mount(/proc) failed: %m\n");
return -1;
}
return 0;
}
#define UID_MAP "0 100000 100000\n100000 200000 50000"
#define GID_MAP "0 400000 50000\n50000 500000 100000"
int main()
{
pid_t pid;
int ret, status;
struct ns_exec_args args;
int flags;
char pname[PATH_MAX];
int fd, pfd[2];
if (pipe(pfd))
return 1;
args.pfd[0] = pfd[0];
args.pfd[1] = pfd[1];
flags = CLONE_NEWPID | CLONE_NEWNS | CLONE_NEWUTS |
CLONE_NEWNET | CLONE_NEWIPC | CLONE_NEWUSER | SIGCHLD;
pid = clone(ns_exec, args.stack_ptr, flags, &args);
if (pid < 0) {
fprintf(stderr, "clone() failed: %m\n");
exit(1);
}
snprintf(pname, sizeof(pname), "/proc/%d/uid_map", pid);
fd = open(pname, O_WRONLY);
if (fd < 0) {
fprintf(stderr, "open(%s): %m\n", pname);
exit(1);
}
if (write(fd, UID_MAP, sizeof(UID_MAP)) < 0) {
fprintf(stderr, "write(" UID_MAP "): %m\n");
exit(1);
}
close(fd);
snprintf(pname, sizeof(pname), "/proc/%d/gid_map", pid);
fd = open(pname, O_WRONLY);
if (fd < 0) {
fprintf(stderr, "open(%s): %m\n", pname);
exit(1);
}
if (write(fd, GID_MAP, sizeof(GID_MAP)) < 0) {
fprintf(stderr, "write(" GID_MAP "): %m\n");
exit(1);
}
close(fd);
if (write(pfd[1], &ret, sizeof(ret)) != sizeof(ret))
return 1;
if (waitpid(pid, &status, 0) != pid)
return 1;
if (status)
return 1;
return 0;
}
WARNING: multiple messages have this Message-ID (diff)
From: avagin@virtuozzo.com (Andrei Vagin)
To: linux-security-module@vger.kernel.org
Subject: [12/24] proc: Add fs_context support to procfs [ver #7]
Date: Mon, 18 Jun 2018 20:34:51 -0700 [thread overview]
Message-ID: <20180619033450.GA11639@outlook.office365.com> (raw)
In-Reply-To: <152414474815.23902.6952548431423168966.stgit@warthog.procyon.org.uk>
Hi David,
We run CRIU tests for vfs/for-next, and today a few of these test failed. I
found that the problem appears after this patch..
https://travis-ci.org/avagin/linux/jobs/393766778
The reproducer is attached. It creates a process in a new set of namespaces
(user, mount, etc) and then this process fails to mount procfs, the mount
syscall returns EBUSY.
666 pipe([3, 4]) = 0
666 clone(child_stack=0x7ffc23a89400, flags=CLONE_NEWNS|CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWUSER|CLONE_NEWPID|CLONE_NEWNET|SIGCHLD) = 667
666 openat(AT_FDCWD, "/proc/667/uid_map", O_WRONLY <unfinished ...>
667 close(4 <unfinished ...>
666 <... openat resumed> ) = 5
666 write(5, "0 100000 100000\n100000 200000 50"..., 36 <unfinished ...>
667 <... close resumed> ) = 0
666 <... write resumed> ) = 36
666 close(5 <unfinished ...>
667 read(3, <unfinished ...>
666 <... close resumed> ) = 0
666 openat(AT_FDCWD, "/proc/667/gid_map", O_WRONLY) = 5
666 write(5, "0 400000 50000\n50000 500000 1000"..., 35) = 35
666 close(5) = 0
666 write(4, " \225\250#", 4) = 4
667 <... read resumed> " \225\250#", 4) = 4
666 wait4(667, <unfinished ...>
667 setsid() = 1
667 setuid(0) = 0
667 setgid(0) = 0
667 setgroups(0, NULL) = 0
667 mount("proc", "/mnt", "proc", MS_MGC_VAL|MS_NOSUID|MS_NODEV|MS_NOEXEC, NULL) = -1 EBUSY (Device or resource busy)
Thanks,
Andrei
On Thu, Apr 19, 2018 at 02:32:28PM +0100, David Howells wrote:
> Add fs_context support to procfs.
>
> Signed-off-by: David Howells <dhowells@redhat.com>
> ---
>
> fs/proc/inode.c | 2 -
> fs/proc/internal.h | 2 -
> fs/proc/root.c | 169 ++++++++++++++++++++++++++++++++++------------------
> 3 files changed, 113 insertions(+), 60 deletions(-)
>
> diff --git a/fs/proc/inode.c b/fs/proc/inode.c
> index 0b13cf6eb6d7..7aa86dd65ba8 100644
> --- a/fs/proc/inode.c
> +++ b/fs/proc/inode.c
> @@ -128,7 +128,7 @@ const struct super_operations proc_sops = {
> .drop_inode = generic_delete_inode,
> .evict_inode = proc_evict_inode,
> .statfs = simple_statfs,
> - .remount_fs = proc_remount,
> + .reconfigure = proc_reconfigure,
> .show_options = proc_show_options,
> };
>
> diff --git a/fs/proc/internal.h b/fs/proc/internal.h
> index 3182e1b636d3..a5ab9504768a 100644
> --- a/fs/proc/internal.h
> +++ b/fs/proc/internal.h
> @@ -254,7 +254,7 @@ static inline void proc_tty_init(void) {}
> extern struct proc_dir_entry proc_root;
>
> extern void proc_self_init(void);
> -extern int proc_remount(struct super_block *, int *, char *, size_t);
> +extern int proc_reconfigure(struct super_block *, struct fs_context *);
>
> /*
> * task_[no]mmu.c
> diff --git a/fs/proc/root.c b/fs/proc/root.c
> index 2fbc177f37a8..e6bd31fbc714 100644
> --- a/fs/proc/root.c
> +++ b/fs/proc/root.c
> @@ -19,14 +19,24 @@
> #include <linux/module.h>
> #include <linux/bitops.h>
> #include <linux/user_namespace.h>
> +#include <linux/fs_context.h>
> #include <linux/mount.h>
> #include <linux/pid_namespace.h>
> #include <linux/parser.h>
> #include <linux/cred.h>
> #include <linux/magic.h>
> +#include <linux/slab.h>
>
> #include "internal.h"
>
> +struct proc_fs_context {
> + struct fs_context fc;
> + struct pid_namespace *pid_ns;
> + unsigned long mask;
> + int hidepid;
> + int gid;
> +};
> +
> enum {
> Opt_gid, Opt_hidepid, Opt_err,
> };
> @@ -37,56 +47,60 @@ static const match_table_t tokens = {
> {Opt_err, NULL},
> };
>
> -static int proc_parse_options(char *options, struct pid_namespace *pid)
> +static int proc_parse_option(struct fs_context *fc, char *opt, size_t len)
> {
> - char *p;
> + struct proc_fs_context *ctx = container_of(fc, struct proc_fs_context, fc);
> substring_t args[MAX_OPT_ARGS];
> - int option;
> -
> - if (!options)
> - return 1;
> -
> - while ((p = strsep(&options, ",")) != NULL) {
> - int token;
> - if (!*p)
> - continue;
> -
> - args[0].to = args[0].from = NULL;
> - token = match_token(p, tokens, args);
> - switch (token) {
> - case Opt_gid:
> - if (match_int(&args[0], &option))
> - return 0;
> - pid->pid_gid = make_kgid(current_user_ns(), option);
> - break;
> - case Opt_hidepid:
> - if (match_int(&args[0], &option))
> - return 0;
> - if (option < HIDEPID_OFF ||
> - option > HIDEPID_INVISIBLE) {
> - pr_err("proc: hidepid value must be between 0 and 2.\n");
> - return 0;
> - }
> - pid->hide_pid = option;
> - break;
> - default:
> - pr_err("proc: unrecognized mount option \"%s\" "
> - "or missing value\n", p);
> - return 0;
> + int token;
> +
> + args[0].to = args[0].from = NULL;
> + token = match_token(opt, tokens, args);
> + switch (token) {
> + case Opt_gid:
> + if (match_int(&args[0], &ctx->gid))
> + return -EINVAL;
> + break;
> +
> + case Opt_hidepid:
> + if (match_int(&args[0], &ctx->hidepid))
> + return -EINVAL;
> + if (ctx->hidepid < HIDEPID_OFF ||
> + ctx->hidepid > HIDEPID_INVISIBLE) {
> + pr_err("proc: hidepid value must be between 0 and 2.\n");
> + return -EINVAL;
> }
> + break;
> +
> + default:
> + pr_err("proc: unrecognized mount option \"%s\" or missing value\n",
> + opt);
> + return -EINVAL;
> }
>
> - return 1;
> + ctx->mask |= 1 << token;
> + return 0;
> +}
> +
> +static void proc_set_options(struct super_block *s,
> + struct fs_context *fc,
> + struct pid_namespace *pid_ns,
> + struct user_namespace *user_ns)
> +{
> + struct proc_fs_context *ctx = container_of(fc, struct proc_fs_context, fc);
> +
> + if (ctx->mask & (1 << Opt_gid))
> + pid_ns->pid_gid = make_kgid(user_ns, ctx->gid);
> + if (ctx->mask & (1 << Opt_hidepid))
> + pid_ns->hide_pid = ctx->hidepid;
> }
>
> -static int proc_fill_super(struct super_block *s, void *data, size_t data_size, int silent)
> +static int proc_fill_super(struct super_block *s, struct fs_context *fc)
> {
> - struct pid_namespace *ns = get_pid_ns(s->s_fs_info);
> + struct pid_namespace *pid_ns = get_pid_ns(s->s_fs_info);
> struct inode *root_inode;
> int ret;
>
> - if (!proc_parse_options(data, ns))
> - return -EINVAL;
> + proc_set_options(s, fc, pid_ns, current_user_ns());
>
> /* User space would break if executables or devices appear on proc */
> s->s_iflags |= SB_I_USERNS_VISIBLE | SB_I_NOEXEC | SB_I_NODEV;
> @@ -103,7 +117,7 @@ static int proc_fill_super(struct super_block *s, void *data, size_t data_size,
> * top of it
> */
> s->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH;
> -
> +
> pde_get(&proc_root);
> root_inode = proc_get_inode(s, &proc_root);
> if (!root_inode) {
> @@ -124,30 +138,46 @@ static int proc_fill_super(struct super_block *s, void *data, size_t data_size,
> return proc_setup_thread_self(s);
> }
>
> -int proc_remount(struct super_block *sb, int *flags,
> - char *data, size_t data_size)
> +int proc_reconfigure(struct super_block *sb, struct fs_context *fc)
> {
> struct pid_namespace *pid = sb->s_fs_info;
>
> sync_filesystem(sb);
> - return !proc_parse_options(data, pid);
> +
> + if (fc)
> + proc_set_options(sb, fc, pid, current_user_ns());
> + return 0;
> }
>
> -static struct dentry *proc_mount(struct file_system_type *fs_type,
> - int flags, const char *dev_name,
> - void *data, size_t data_size)
> +static int proc_get_tree(struct fs_context *fc)
> {
> - struct pid_namespace *ns;
> + struct proc_fs_context *ctx = container_of(fc, struct proc_fs_context, fc);
>
> - if (flags & SB_KERNMOUNT) {
> - ns = data;
> - data = NULL;
> - } else {
> - ns = task_active_pid_ns(current);
> - }
> + ctx->fc.s_fs_info = ctx->pid_ns;
> + return vfs_get_super(fc, vfs_get_keyed_super, proc_fill_super);
> +}
>
> - return mount_ns(fs_type, flags, data, data_size, ns, ns->user_ns,
> - proc_fill_super);
> +static void proc_fs_context_free(struct fs_context *fc)
> +{
> + struct proc_fs_context *ctx = container_of(fc, struct proc_fs_context, fc);
> +
> + if (ctx->pid_ns)
> + put_pid_ns(ctx->pid_ns);
> +}
> +
> +static const struct fs_context_operations proc_fs_context_ops = {
> + .free = proc_fs_context_free,
> + .parse_option = proc_parse_option,
> + .get_tree = proc_get_tree,
> +};
> +
> +static int proc_init_fs_context(struct fs_context *fc, struct super_block *src_sb)
> +{
> + struct proc_fs_context *ctx = container_of(fc, struct proc_fs_context, fc);
> +
> + ctx->pid_ns = get_pid_ns(task_active_pid_ns(current));
> + ctx->fc.ops = &proc_fs_context_ops;
> + return 0;
> }
>
> static void proc_kill_sb(struct super_block *sb)
> @@ -165,7 +195,8 @@ static void proc_kill_sb(struct super_block *sb)
>
> static struct file_system_type proc_fs_type = {
> .name = "proc",
> - .mount = proc_mount,
> + .fs_context_size = sizeof(struct proc_fs_context),
> + .init_fs_context = proc_init_fs_context,
> .kill_sb = proc_kill_sb,
> .fs_flags = FS_USERNS_MOUNT,
> };
> @@ -205,7 +236,7 @@ static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentr
> {
> if (!proc_pid_lookup(dir, dentry, flags))
> return NULL;
> -
> +
> return proc_lookup(dir, dentry, flags);
> }
>
> @@ -259,9 +290,31 @@ struct proc_dir_entry proc_root = {
>
> int pid_ns_prepare_proc(struct pid_namespace *ns)
> {
> + struct proc_fs_context *ctx;
> + struct fs_context *fc;
> struct vfsmount *mnt;
> + int ret;
> +
> + fc = vfs_new_fs_context(&proc_fs_type, NULL, 0,
> + FS_CONTEXT_FOR_KERNEL_MOUNT);
> + if (IS_ERR(fc))
> + return PTR_ERR(fc);
> +
> + ctx = container_of(fc, struct proc_fs_context, fc);
> + if (ctx->pid_ns != ns) {
> + put_pid_ns(ctx->pid_ns);
> + get_pid_ns(ns);
> + ctx->pid_ns = ns;
> + }
> +
> + ret = vfs_get_tree(fc);
> + if (ret < 0) {
> + put_fs_context(fc);
> + return ret;
> + }
>
> - mnt = kern_mount_data(&proc_fs_type, ns, 0);
> + mnt = vfs_create_mount(fc);
> + put_fs_context(fc);
> if (IS_ERR(mnt))
> return PTR_ERR(mnt);
>
-------------- next part --------------
#define _GNU_SOURCE
#include <sys/types.h>
#include <sched.h>
#include <unistd.h>
#include <stdio.h>
#include <sys/mount.h>
#include <sys/wait.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdlib.h>
#include <grp.h>
#include <linux/limits.h>
#define NS_STACK_SIZE 4096
#define __stack_aligned__ __attribute__((aligned(16)))
/* All arguments should be above stack, because it grows down */
struct ns_exec_args {
char stack[NS_STACK_SIZE] __stack_aligned__;
char stack_ptr[0];
int pfd[2];
};
static int ns_exec(void *_arg)
{
struct ns_exec_args *args = (struct ns_exec_args *) _arg;
int ret;
close(args->pfd[1]);
if (read(args->pfd[0], &ret, sizeof(ret)) != sizeof(ret))
return -1;
setsid();
if (setuid(0) || setgid(0) || setgroups(0, NULL)) {
fprintf(stderr, "set*id failed: %m\n");
return -1;
}
if (mount("proc", "/mnt", "proc", MS_MGC_VAL | MS_NOSUID | MS_NOEXEC | MS_NODEV, NULL)) {
fprintf(stderr, "mount(/proc) failed: %m\n");
return -1;
}
return 0;
}
#define UID_MAP "0 100000 100000\n100000 200000 50000"
#define GID_MAP "0 400000 50000\n50000 500000 100000"
int main()
{
pid_t pid;
int ret, status;
struct ns_exec_args args;
int flags;
char pname[PATH_MAX];
int fd, pfd[2];
if (pipe(pfd))
return 1;
args.pfd[0] = pfd[0];
args.pfd[1] = pfd[1];
flags = CLONE_NEWPID | CLONE_NEWNS | CLONE_NEWUTS |
CLONE_NEWNET | CLONE_NEWIPC | CLONE_NEWUSER | SIGCHLD;
pid = clone(ns_exec, args.stack_ptr, flags, &args);
if (pid < 0) {
fprintf(stderr, "clone() failed: %m\n");
exit(1);
}
snprintf(pname, sizeof(pname), "/proc/%d/uid_map", pid);
fd = open(pname, O_WRONLY);
if (fd < 0) {
fprintf(stderr, "open(%s): %m\n", pname);
exit(1);
}
if (write(fd, UID_MAP, sizeof(UID_MAP)) < 0) {
fprintf(stderr, "write(" UID_MAP "): %m\n");
exit(1);
}
close(fd);
snprintf(pname, sizeof(pname), "/proc/%d/gid_map", pid);
fd = open(pname, O_WRONLY);
if (fd < 0) {
fprintf(stderr, "open(%s): %m\n", pname);
exit(1);
}
if (write(fd, GID_MAP, sizeof(GID_MAP)) < 0) {
fprintf(stderr, "write(" GID_MAP "): %m\n");
exit(1);
}
close(fd);
if (write(pfd[1], &ret, sizeof(ret)) != sizeof(ret))
return 1;
if (waitpid(pid, &status, 0) != pid)
return 1;
if (status)
return 1;
return 0;
}
next prev parent reply other threads:[~2018-06-19 3:35 UTC|newest]
Thread overview: 82+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-04-19 13:31 [PATCH 00/24] VFS: Introduce filesystem context [ver #7] David Howells
2018-04-19 13:31 ` David Howells
2018-04-19 13:31 ` [PATCH 01/24] vfs: Undo an overly zealous MS_RDONLY -> SB_RDONLY conversion " David Howells
2018-04-19 13:31 ` David Howells
2018-04-19 13:31 ` [PATCH 02/24] VFS: Suppress MS_* flag defs within the kernel unless explicitly enabled " David Howells
2018-04-19 13:31 ` David Howells
2018-04-19 13:31 ` [PATCH 03/24] VFS: Introduce the structs and doc for a filesystem context " David Howells
2018-04-19 13:31 ` David Howells
2018-04-23 3:36 ` Randy Dunlap
2018-04-23 3:36 ` Randy Dunlap
2018-05-01 14:29 ` David Howells
2018-05-01 14:29 ` David Howells
2018-05-01 15:31 ` Randy Dunlap
2018-05-01 15:31 ` Randy Dunlap
2018-04-19 13:31 ` [PATCH 04/24] VFS: Add LSM hooks for " David Howells
2018-04-19 13:31 ` David Howells
2018-04-19 20:32 ` Paul Moore
2018-04-19 20:32 ` Paul Moore
2018-04-20 15:35 ` David Howells
2018-04-20 15:35 ` David Howells
2018-04-23 13:25 ` Stephen Smalley
2018-04-23 13:25 ` Stephen Smalley
2018-04-24 15:22 ` David Howells
2018-04-24 15:22 ` David Howells
2018-04-25 14:07 ` Stephen Smalley
2018-04-25 14:07 ` Stephen Smalley
2018-04-19 13:31 ` [PATCH 05/24] apparmor: Implement security hooks for the new mount API " David Howells
2018-04-19 13:31 ` David Howells
2018-05-04 0:10 ` John Johansen
2018-05-04 0:10 ` John Johansen
2018-05-11 12:20 ` David Howells
2018-05-11 12:20 ` David Howells
2018-05-11 12:20 ` David Howells
2018-04-19 13:31 ` [PATCH 06/24] tomoyo: " David Howells
2018-04-19 13:31 ` David Howells
2018-04-19 13:31 ` [PATCH 07/24] smack: Implement filesystem context security hooks " David Howells
2018-04-19 13:31 ` David Howells
2018-04-19 13:31 ` [PATCH 08/24] VFS: Require specification of size of mount data for internal mounts " David Howells
2018-04-19 13:32 ` [PATCH 09/24] VFS: Implement a filesystem superblock creation/configuration context " David Howells
2018-04-19 13:32 ` David Howells
2018-04-19 13:32 ` [PATCH 10/24] VFS: Remove unused code after filesystem context changes " David Howells
2018-04-19 13:32 ` David Howells
2018-04-19 13:32 ` [PATCH 11/24] procfs: Move proc_fill_super() to fs/proc/root.c " David Howells
2018-04-19 13:32 ` David Howells
2018-04-19 13:32 ` [PATCH 12/24] proc: Add fs_context support to procfs " David Howells
2018-04-19 13:32 ` David Howells
2018-06-19 3:34 ` Andrei Vagin [this message]
2018-06-19 3:34 ` [12/24] " Andrei Vagin
2018-06-26 6:13 ` Andrei Vagin
2018-06-26 6:13 ` Andrei Vagin
2018-06-26 7:27 ` Andrei Vagin
2018-06-26 7:27 ` Andrei Vagin
2018-06-26 8:57 ` David Howells
2018-06-26 8:57 ` David Howells
2018-06-28 5:50 ` Andrei Vagin
2018-06-28 5:50 ` Andrei Vagin
2018-06-28 5:50 ` Andrei Vagin
2018-06-28 5:50 ` Andrei Vagin
2018-04-19 13:32 ` [PATCH 13/24] ipc: Convert mqueue fs to fs_context " David Howells
2018-04-19 13:32 ` David Howells
2018-04-19 13:32 ` [PATCH 14/24] cpuset: Use " David Howells
2018-04-19 13:32 ` David Howells
2018-04-19 13:32 ` [PATCH 15/24] kernfs, sysfs, cgroup, intel_rdt: Support " David Howells
2018-04-19 13:32 ` David Howells
2018-04-19 13:33 ` [PATCH 16/24] hugetlbfs: Convert to " David Howells
2018-04-19 13:33 ` David Howells
2018-04-19 13:33 ` [PATCH 17/24] VFS: Remove kern_mount_data() " David Howells
2018-04-19 13:33 ` David Howells
2018-04-19 13:33 ` [PATCH 18/24] VFS: Implement fsopen() to prepare for a mount " David Howells
2018-04-19 13:33 ` David Howells
2018-04-19 13:33 ` [PATCH 19/24] VFS: Implement fsmount() to effect a pre-configured " David Howells
2018-04-19 13:33 ` David Howells
2018-04-19 13:33 ` [PATCH 20/24] afs: Fix server record deletion " David Howells
2018-04-19 13:33 ` David Howells
2018-04-19 13:33 ` [PATCH 21/24] net: Export get_proc_net() " David Howells
2018-04-19 13:33 ` David Howells
2018-04-19 13:33 ` [PATCH 22/24] afs: Add fs_context support " David Howells
2018-04-19 13:33 ` David Howells
2018-04-19 13:33 ` [PATCH 23/24] afs: Implement namespacing " David Howells
2018-04-19 13:33 ` David Howells
2018-04-19 13:33 ` [PATCH 24/24] afs: Use fs_context to pass parameters over automount " David Howells
2018-04-19 13:33 ` David Howells
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20180619033450.GA11639@outlook.office365.com \
--to=avagin@virtuozzo.com \
--cc=dhowells@redhat.com \
--cc=linux-afs@lists.infradead.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-nfs@vger.kernel.org \
--cc=linux-security-module@vger.kernel.org \
--cc=viro@zeniv.linux.org.uk \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.