linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: David Howells <dhowells@redhat.com>
To: viro@zeniv.linux.org.uk
Cc: dhowells@redhat.com, linux-api@vger.kernel.org,
	linux-fsdevel@vger.kernel.org, torvalds@linux-foundation.org,
	linux-kernel@vger.kernel.org
Subject: [PATCH 25/32] vfs: syscall: Add fsmount() to create a mount for a superblock [ver #9]
Date: Tue, 10 Jul 2018 23:44:16 +0100	[thread overview]
Message-ID: <153126265618.14533.14836767135315368046.stgit@warthog.procyon.org.uk> (raw)
In-Reply-To: <153126248868.14533.9751473662727327569.stgit@warthog.procyon.org.uk>

Provide a system call by which a filesystem opened with fsopen() and
configured by a series of writes can be mounted:

	int ret = fsmount(int fsfd, unsigned int flags,
			  unsigned int ms_flags);

where fsfd is the file descriptor returned by fsopen().  flags can be 0 or
FSMOUNT_CLOEXEC.  ms_flags is a bitwise-OR of the following flags:

	MS_RDONLY
	MS_NOSUID
	MS_NODEV
	MS_NOEXEC
	MS_NOATIME
	MS_NODIRATIME
	MS_RELATIME
	MS_STRICTATIME

	MS_UNBINDABLE
	MS_PRIVATE
	MS_SLAVE
	MS_SHARED

In the event that fsmount() fails, it may be possible to get an error
message by calling read() on fsfd.  If no message is available, ENODATA
will be reported.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: linux-api@vger.kernel.org
---

 arch/x86/entry/syscalls/syscall_32.tbl |    1 
 arch/x86/entry/syscalls/syscall_64.tbl |    1 
 fs/namespace.c                         |  140 +++++++++++++++++++++++++++++++-
 include/linux/fs_context.h             |    2 
 include/linux/syscalls.h               |    1 
 include/uapi/linux/fs.h                |    2 
 6 files changed, 143 insertions(+), 4 deletions(-)

diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 1647fefd2969..537572098032 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -401,3 +401,4 @@
 387	i386	open_tree		sys_open_tree			__ia32_sys_open_tree
 388	i386	move_mount		sys_move_mount			__ia32_sys_move_mount
 389	i386	fsopen			sys_fsopen			__ia32_sys_fsopen
+390	i386	fsmount			sys_fsmount			__ia32_sys_fsmount
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 235d33dbccb2..47abbc2a2bbe 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -346,6 +346,7 @@
 335	common	open_tree		__x64_sys_open_tree
 336	common	move_mount		__x64_sys_move_mount
 337	common	fsopen			__x64_sys_fsopen
+338	common	fsmount			__x64_sys_fsmount
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/fs/namespace.c b/fs/namespace.c
index d5a4d9351a17..a6fbfba8e448 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2503,7 +2503,7 @@ static int do_move_mount(struct path *old_path, struct path *new_path)
 
 	attached = mnt_has_parent(old);
 	/*
-	 * We need to allow open_tree(OPEN_TREE_CLONE) followed by
+	 * We need to allow open_tree(OPEN_TREE_CLONE) or fsmount() followed by
 	 * move_mount(), but mustn't allow "/" to be moved.
 	 */
 	if (old->mnt_ns && !attached)
@@ -3347,9 +3347,141 @@ struct vfsmount *kern_mount(struct file_system_type *type)
 EXPORT_SYMBOL_GPL(kern_mount);
 
 /*
- * Move a mount from one place to another.
- * In combination with open_tree(OPEN_TREE_CLONE [| AT_RECURSIVE]) it can be
- * used to copy a mount subtree.
+ * Create a kernel mount representation for a new, prepared superblock
+ * (specified by fs_fd) and attach to an open_tree-like file descriptor.
+ */
+SYSCALL_DEFINE3(fsmount, int, fs_fd, unsigned int, flags, unsigned int, ms_flags)
+{
+	struct fs_context *fc;
+	struct file *file;
+	struct path newmount;
+	struct fd f;
+	unsigned int mnt_flags = 0;
+	long ret;
+
+	if (!may_mount())
+		return -EPERM;
+
+	if ((flags & ~(FSMOUNT_CLOEXEC)) != 0)
+		return -EINVAL;
+
+	if (ms_flags & ~(MS_RDONLY | MS_NOSUID | MS_NODEV | MS_NOEXEC |
+			 MS_NOATIME | MS_NODIRATIME | MS_RELATIME |
+			 MS_STRICTATIME))
+		return -EINVAL;
+
+	if (ms_flags & MS_RDONLY)
+		mnt_flags |= MNT_READONLY;
+	if (ms_flags & MS_NOSUID)
+		mnt_flags |= MNT_NOSUID;
+	if (ms_flags & MS_NODEV)
+		mnt_flags |= MNT_NODEV;
+	if (ms_flags & MS_NOEXEC)
+		mnt_flags |= MNT_NOEXEC;
+	if (ms_flags & MS_NODIRATIME)
+		mnt_flags |= MNT_NODIRATIME;
+
+	if (ms_flags & MS_STRICTATIME) {
+		if (ms_flags & MS_NOATIME)
+			return -EINVAL;
+	} else if (ms_flags & MS_NOATIME) {
+		mnt_flags |= MNT_NOATIME;
+	} else {
+		mnt_flags |= MNT_RELATIME;
+	}
+
+	f = fdget(fs_fd);
+	if (!f.file)
+		return -EBADF;
+
+	ret = -EINVAL;
+	if (f.file->f_op != &fscontext_fs_fops)
+		goto err_fsfd;
+
+	fc = f.file->private_data;
+
+	/* There must be a valid superblock or we can't mount it */
+	ret = -EINVAL;
+	if (!fc->root)
+		goto err_fsfd;
+
+	ret = -EPERM;
+	if (mount_too_revealing(fc->root->d_sb, &mnt_flags)) {
+		pr_warn("VFS: Mount too revealing\n");
+		goto err_fsfd;
+	}
+
+	ret = mutex_lock_interruptible(&fc->uapi_mutex);
+	if (ret < 0)
+		goto err_fsfd;
+
+	ret = -EBUSY;
+	if (fc->phase != FS_CONTEXT_AWAITING_MOUNT)
+		goto err_unlock;
+
+	ret = -EPERM;
+	if ((fc->sb_flags & SB_MANDLOCK) && !may_mandlock())
+		goto err_unlock;
+
+	newmount.mnt = vfs_create_mount(fc, mnt_flags);
+	if (IS_ERR(newmount.mnt)) {
+		ret = PTR_ERR(newmount.mnt);
+		goto err_unlock;
+	}
+	newmount.dentry = dget(fc->root);
+
+	/* We've done the mount bit - now move the file context into more or
+	 * less the same state as if we'd done an fspick().  We don't want to
+	 * do any memory allocation or anything like that at this point as we
+	 * don't want to have to handle any errors incurred.
+	 */
+	if (fc->ops && fc->ops->free)
+		fc->ops->free(fc);
+	fc->fs_private = NULL;
+	fc->s_fs_info = NULL;
+	fc->sb_flags = 0;
+	fc->sloppy = false;
+	fc->silent = false;
+	security_fs_context_free(fc);
+	fc->security = NULL;
+	kfree(fc->subtype);
+	fc->subtype = NULL;
+	kfree(fc->source);
+	fc->source = NULL;
+
+	fc->purpose = FS_CONTEXT_FOR_RECONFIGURE;
+	fc->phase = FS_CONTEXT_AWAITING_RECONF;
+
+	/* Attach to an apparent O_PATH fd with a note that we need to unmount
+	 * it, not just simply put it.
+	 */
+	file = dentry_open(&newmount, O_PATH, fc->cred);
+	if (IS_ERR(file)) {
+		ret = PTR_ERR(file);
+		goto err_path;
+	}
+	file->f_mode |= FMODE_NEED_UNMOUNT;
+
+	ret = get_unused_fd_flags((flags & FSMOUNT_CLOEXEC) ? O_CLOEXEC : 0);
+	if (ret >= 0)
+		fd_install(ret, file);
+	else
+		fput(file);
+
+err_path:
+	path_put(&newmount);
+err_unlock:
+	mutex_unlock(&fc->uapi_mutex);
+err_fsfd:
+	fdput(f);
+	return ret;
+}
+
+/*
+ * Move a mount from one place to another.  In combination with
+ * fsopen()/fsmount() this is used to install a new mount and in combination
+ * with open_tree(OPEN_TREE_CLONE [| AT_RECURSIVE]) it can be used to copy
+ * a mount subtree.
  *
  * Note the flags value is a combination of MOVE_MOUNT_* flags.
  */
diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h
index 387f25d7acc4..2cde97490c6f 100644
--- a/include/linux/fs_context.h
+++ b/include/linux/fs_context.h
@@ -115,4 +115,6 @@ extern int vfs_get_super(struct fs_context *fc,
 			 int (*fill_super)(struct super_block *sb,
 					   struct fs_context *fc));
 
+extern const struct file_operations fscontext_fs_fops;
+
 #endif /* _LINUX_FS_CONTEXT_H */
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index ad6c7ff33c01..917fe10e1030 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -905,6 +905,7 @@ asmlinkage long sys_move_mount(int from_dfd, const char __user *from_path,
 			       int to_dfd, const char __user *to_path,
 			       unsigned int ms_flags);
 asmlinkage long sys_fsopen(const char __user *fs_name, unsigned int flags);
+asmlinkage long sys_fsmount(int fs_fd, unsigned int flags, unsigned int ms_flags);
 
 /*
  * Architecture-specific system calls
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index f8818e6cddd6..30a2fb85c4b7 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -349,4 +349,6 @@ typedef int __bitwise __kernel_rwf_t;
  */
 #define FSOPEN_CLOEXEC		0x00000001
 
+#define FSMOUNT_CLOEXEC		0x00000001
+
 #endif /* _UAPI_LINUX_FS_H */


  parent reply	other threads:[~2018-07-10 22:44 UTC|newest]

Thread overview: 113+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-07-10 22:41 [PATCH 00/32] VFS: Introduce filesystem context [ver #9] David Howells
2018-07-10 22:41 ` [PATCH 01/32] vfs: syscall: Add open_tree(2) to reference or clone a mount " David Howells
2018-07-10 22:41 ` [PATCH 02/32] vfs: syscall: Add move_mount(2) to move mounts around " David Howells
2018-07-10 22:41 ` [PATCH 03/32] teach move_mount(2) to work with OPEN_TREE_CLONE " David Howells
2018-07-10 22:41 ` [PATCH 04/32] vfs: Suppress MS_* flag defs within the kernel unless explicitly enabled " David Howells
2018-07-10 22:42 ` [PATCH 05/32] vfs: Introduce the basic header for the new mount API's filesystem context " David Howells
2018-07-10 22:42 ` [PATCH 06/32] vfs: Add LSM hooks for the new mount API " David Howells
2018-07-10 22:42 ` [PATCH 07/32] selinux: Implement the new mount API LSM hooks " David Howells
2018-07-11 14:08   ` Stephen Smalley
2018-07-10 22:42 ` [PATCH 08/32] smack: Implement filesystem context security " David Howells
2018-07-10 23:13   ` Casey Schaufler
2018-07-10 23:19   ` David Howells
2018-07-10 23:28     ` Casey Schaufler
2018-07-10 22:42 ` [PATCH 09/32] apparmor: Implement security hooks for the new mount API " David Howells
2018-07-10 22:42 ` [PATCH 10/32] tomoyo: " David Howells
2018-07-10 23:34   ` Tetsuo Handa
2018-07-10 22:42 ` [PATCH 11/32] vfs: Require specification of size of mount data for internal mounts " David Howells
2018-07-10 22:51   ` Linus Torvalds
2018-07-10 22:42 ` [PATCH 12/32] vfs: Separate changing mount flags full remount " David Howells
2018-07-10 22:42 ` [PATCH 13/32] vfs: Implement a filesystem superblock creation/configuration context " David Howells
2018-07-10 22:43 ` [PATCH 14/32] vfs: Remove unused code after filesystem context changes " David Howells
2018-07-10 22:43 ` [PATCH 15/32] procfs: Move proc_fill_super() to fs/proc/root.c " David Howells
2018-07-10 22:43 ` [PATCH 16/32] proc: Add fs_context support to procfs " David Howells
2018-07-10 22:43 ` [PATCH 17/32] ipc: Convert mqueue fs to fs_context " David Howells
2018-07-10 22:43 ` [PATCH 18/32] cpuset: Use " David Howells
2018-07-10 22:43 ` [PATCH 19/32] kernfs, sysfs, cgroup, intel_rdt: Support " David Howells
2018-07-10 22:43 ` [PATCH 20/32] hugetlbfs: Convert to " David Howells
2018-07-10 22:43 ` [PATCH 21/32] vfs: Remove kern_mount_data() " David Howells
2018-07-10 22:43 ` [PATCH 22/32] vfs: Provide documentation for new mount API " David Howells
2018-07-13  1:37   ` Randy Dunlap
2018-07-13  9:45   ` David Howells
2018-07-10 22:44 ` [PATCH 23/32] Make anon_inodes unconditional " David Howells
2018-07-10 22:44 ` [PATCH 24/32] vfs: syscall: Add fsopen() to prepare for superblock creation " David Howells
2018-07-10 23:59   ` Andy Lutomirski
2018-07-11  1:05     ` Linus Torvalds
2018-07-11  1:15       ` Al Viro
2018-07-11  1:33         ` Andy Lutomirski
2018-07-11  1:48         ` Linus Torvalds
2018-07-11  8:43         ` David Howells
2018-07-11  1:14     ` Jann Horn
2018-07-11  1:16       ` Al Viro
2018-07-11  8:42     ` David Howells
2018-07-11 16:03       ` Linus Torvalds
2018-07-11  7:22   ` David Howells
2018-07-11 16:38     ` Eric Biggers
2018-07-11 17:06     ` Andy Lutomirski
2018-07-12 14:54     ` David Howells
2018-07-12 15:50       ` Linus Torvalds
2018-07-12 16:00         ` Al Viro
2018-07-12 16:07           ` Linus Torvalds
2018-07-12 16:31             ` Al Viro
2018-07-12 16:39               ` Linus Torvalds
2018-07-12 17:14                 ` Linus Torvalds
2018-07-12 17:44                   ` Al Viro
2018-07-12 17:54                     ` Linus Torvalds
2018-07-12 17:52                 ` Al Viro
2018-07-12 16:23       ` Andy Lutomirski
2018-07-12 16:31         ` Linus Torvalds
2018-07-12 16:41         ` Al Viro
2018-07-12 16:58         ` Al Viro
2018-07-12 17:54           ` Andy Lutomirski
2018-07-12 20:23       ` David Howells
2018-07-12 20:25         ` Andy Lutomirski
2018-07-12 20:34         ` Linus Torvalds
2018-07-12 20:36           ` Linus Torvalds
2018-07-12 21:26         ` David Howells
2018-07-12 21:40           ` Linus Torvalds
2018-07-12 22:32           ` Theodore Y. Ts'o
2018-07-12 22:54           ` David Howells
2018-07-12 23:21             ` Andy Lutomirski
2018-07-12 23:23             ` Jann Horn
2018-07-12 23:33               ` Jann Horn
2018-07-12 23:35             ` David Howells
2018-07-12 23:50               ` Andy Lutomirski
2018-07-13  0:03               ` David Howells
2018-07-13  0:24                 ` Andy Lutomirski
2018-07-13  7:30                 ` David Howells
2018-07-19  1:30                   ` Eric W. Biederman
2018-07-13  2:35             ` Theodore Y. Ts'o
2018-07-12 21:00       ` David Howells
2018-07-12 21:29         ` Linus Torvalds
2018-07-13 13:27         ` David Howells
2018-07-13 15:01           ` Andy Lutomirski
2018-07-13 15:40           ` David Howells
2018-07-13 17:14             ` Andy Lutomirski
2018-07-17  9:40           ` David Howells
2018-07-11 15:51   ` Jonathan Corbet
2018-07-11 16:18   ` David Howells
2018-07-12 17:15   ` Greg KH
2018-07-12 17:20     ` Al Viro
2018-07-12 18:03       ` Greg KH
2018-07-12 18:30         ` Andy Lutomirski
2018-07-12 18:34           ` Al Viro
2018-07-12 18:35             ` Al Viro
2018-07-12 19:08           ` Greg KH
2018-07-10 22:44 ` David Howells [this message]
2018-07-10 22:44 ` [PATCH 26/32] vfs: syscall: Add fspick() to select a superblock for reconfiguration " David Howells
2018-07-10 22:44 ` [PATCH 27/32] vfs: Implement logging through fs_context " David Howells
2018-07-10 22:44 ` [PATCH 28/32] vfs: Add some logging to the core users of the fs_context log " David Howells
2018-07-10 22:44 ` [PATCH 29/32] afs: Add fs_context support " David Howells
2018-07-10 22:44 ` [PATCH 30/32] afs: Use fs_context to pass parameters over automount " David Howells
2018-07-10 22:44 ` [PATCH 31/32] vfs: syscall: Add fsinfo() to query filesystem information " David Howells
2018-07-10 22:45 ` [PATCH 32/32] afs: Add fsinfo support " David Howells
2018-07-10 22:52 ` [MANPAGE PATCH] Add manpages for move_mount(2) and open_tree(2) David Howells
2019-10-09  9:51   ` Michael Kerrisk (man-pages)
2018-07-10 22:54 ` [MANPAGE PATCH] Add manpage for fsopen(2), fspick(2) and fsmount(2) David Howells
2019-10-09  9:52   ` Michael Kerrisk (man-pages)
2018-07-10 22:55 ` [MANPAGE PATCH] Add manpage for fsinfo(2) David Howells
2019-10-09  9:52   ` Michael Kerrisk (man-pages)
2019-10-09 12:02   ` David Howells
2018-07-10 23:01 ` [PATCH 00/32] VFS: Introduce filesystem context [ver #9] Linus Torvalds
2018-07-12  0:46 ` David Howells
2018-07-18 21:29 ` Getting rid of the usage of write() -- was " David Howells

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=153126265618.14533.14836767135315368046.stgit@warthog.procyon.org.uk \
    --to=dhowells@redhat.com \
    --cc=linux-api@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=torvalds@linux-foundation.org \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).