linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 1/2] fs: make all new mount api fds cloexec by default
@ 2019-05-09 15:58 Christian Brauner
  2019-05-09 15:58 ` [PATCH v2 2/2] fsopen: use square brackets around "fscontext" Christian Brauner
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Christian Brauner @ 2019-05-09 15:58 UTC (permalink / raw)
  To: viro, dhowells, linux-fsdevel, linux-kernel; +Cc: Christian Brauner

This makes all file descriptors returned from new syscalls of the new mount
api cloexec by default.

From a userspace perspective it is rarely the case that fds are supposed to
be inherited across exec. Having them not cloexec by default forces
userspace to remember to pass the <SPECIFIC>_CLOEXEC flag along or to
invoke fcntl() on the fd to prevent leaking it. And leaking the fd is a
much bigger issue than forgetting to remove the cloexec flag and failing to
inherit the fd.
For old fd types we can't break userspace. But for new ones we should
whenever reasonable make them cloexec by default (Examples of this policy
are the new seccomp notify fds and also pidfds.). If userspace wants to
inherit fds across exec they can remove the O_CLOEXEC flag and so opt in to
inheritance explicitly.

This patch also has the advantage that we can get rid of all the special
flags per file descriptor type for the new mount api. In total this lets us
remove 4 flags:
- FSMOUNT_CLOEXEC
- FSOPEN_CLOEXEC
- FSPICK_CLOEXEC
- OPEN_TREE_CLOEXEC

Signed-off-by: Christian Brauner <christian@brauner.io>
---
v1:
- David Howells <dhowells@redhat.com>:
  - ensure that only O_CLOEXEC is passed so that fd allocation doesn't
    break when new flags are added to a syscall
v2:
- reworded commit message
---
 fs/fsopen.c                | 13 ++++++-------
 fs/namespace.c             | 11 ++++-------
 include/uapi/linux/mount.h | 18 +++---------------
 3 files changed, 13 insertions(+), 29 deletions(-)

diff --git a/fs/fsopen.c b/fs/fsopen.c
index 3bb9c0c8cbcc..a38fa8c616cf 100644
--- a/fs/fsopen.c
+++ b/fs/fsopen.c
@@ -88,12 +88,12 @@ const struct file_operations fscontext_fops = {
 /*
  * Attach a filesystem context to a file and an fd.
  */
-static int fscontext_create_fd(struct fs_context *fc, unsigned int o_flags)
+static int fscontext_create_fd(struct fs_context *fc)
 {
 	int fd;
 
 	fd = anon_inode_getfd("fscontext", &fscontext_fops, fc,
-			      O_RDWR | o_flags);
+			      O_RDWR | O_CLOEXEC);
 	if (fd < 0)
 		put_fs_context(fc);
 	return fd;
@@ -126,7 +126,7 @@ SYSCALL_DEFINE2(fsopen, const char __user *, _fs_name, unsigned int, flags)
 	if (!ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN))
 		return -EPERM;
 
-	if (flags & ~FSOPEN_CLOEXEC)
+	if (flags)
 		return -EINVAL;
 
 	fs_name = strndup_user(_fs_name, PAGE_SIZE);
@@ -149,7 +149,7 @@ SYSCALL_DEFINE2(fsopen, const char __user *, _fs_name, unsigned int, flags)
 	if (ret < 0)
 		goto err_fc;
 
-	return fscontext_create_fd(fc, flags & FSOPEN_CLOEXEC ? O_CLOEXEC : 0);
+	return fscontext_create_fd(fc);
 
 err_fc:
 	put_fs_context(fc);
@@ -169,8 +169,7 @@ SYSCALL_DEFINE3(fspick, int, dfd, const char __user *, path, unsigned int, flags
 	if (!ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN))
 		return -EPERM;
 
-	if ((flags & ~(FSPICK_CLOEXEC |
-		       FSPICK_SYMLINK_NOFOLLOW |
+	if ((flags & ~(FSPICK_SYMLINK_NOFOLLOW |
 		       FSPICK_NO_AUTOMOUNT |
 		       FSPICK_EMPTY_PATH)) != 0)
 		return -EINVAL;
@@ -203,7 +202,7 @@ SYSCALL_DEFINE3(fspick, int, dfd, const char __user *, path, unsigned int, flags
 		goto err_fc;
 
 	path_put(&target);
-	return fscontext_create_fd(fc, flags & FSPICK_CLOEXEC ? O_CLOEXEC : 0);
+	return fscontext_create_fd(fc);
 
 err_fc:
 	put_fs_context(fc);
diff --git a/fs/namespace.c b/fs/namespace.c
index 3357c3d65475..b024e2a05384 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2369,11 +2369,8 @@ SYSCALL_DEFINE3(open_tree, int, dfd, const char *, filename, unsigned, flags)
 	int error;
 	int fd;
 
-	BUILD_BUG_ON(OPEN_TREE_CLOEXEC != O_CLOEXEC);
-
 	if (flags & ~(AT_EMPTY_PATH | AT_NO_AUTOMOUNT | AT_RECURSIVE |
-		      AT_SYMLINK_NOFOLLOW | OPEN_TREE_CLONE |
-		      OPEN_TREE_CLOEXEC))
+		      AT_SYMLINK_NOFOLLOW | OPEN_TREE_CLONE))
 		return -EINVAL;
 
 	if ((flags & (AT_RECURSIVE | OPEN_TREE_CLONE)) == AT_RECURSIVE)
@@ -2389,7 +2386,7 @@ SYSCALL_DEFINE3(open_tree, int, dfd, const char *, filename, unsigned, flags)
 	if (detached && !may_mount())
 		return -EPERM;
 
-	fd = get_unused_fd_flags(flags & O_CLOEXEC);
+	fd = get_unused_fd_flags(O_CLOEXEC);
 	if (fd < 0)
 		return fd;
 
@@ -3352,7 +3349,7 @@ SYSCALL_DEFINE3(fsmount, int, fs_fd, unsigned int, flags,
 	if (!may_mount())
 		return -EPERM;
 
-	if ((flags & ~(FSMOUNT_CLOEXEC)) != 0)
+	if (flags)
 		return -EINVAL;
 
 	if (attr_flags & ~(MOUNT_ATTR_RDONLY |
@@ -3457,7 +3454,7 @@ SYSCALL_DEFINE3(fsmount, int, fs_fd, unsigned int, flags,
 	}
 	file->f_mode |= FMODE_NEED_UNMOUNT;
 
-	ret = get_unused_fd_flags((flags & FSMOUNT_CLOEXEC) ? O_CLOEXEC : 0);
+	ret = get_unused_fd_flags(O_CLOEXEC);
 	if (ret >= 0)
 		fd_install(ret, file);
 	else
diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h
index 96a0240f23fe..c688e4ac843b 100644
--- a/include/uapi/linux/mount.h
+++ b/include/uapi/linux/mount.h
@@ -59,7 +59,6 @@
  * open_tree() flags.
  */
 #define OPEN_TREE_CLONE		1		/* Clone the target tree and attach the clone */
-#define OPEN_TREE_CLOEXEC	O_CLOEXEC	/* Close the file on execve() */
 
 /*
  * move_mount() flags.
@@ -72,18 +71,12 @@
 #define MOVE_MOUNT_T_EMPTY_PATH		0x00000040 /* Empty to path permitted */
 #define MOVE_MOUNT__MASK		0x00000077
 
-/*
- * fsopen() flags.
- */
-#define FSOPEN_CLOEXEC		0x00000001
-
 /*
  * fspick() flags.
  */
-#define FSPICK_CLOEXEC		0x00000001
-#define FSPICK_SYMLINK_NOFOLLOW	0x00000002
-#define FSPICK_NO_AUTOMOUNT	0x00000004
-#define FSPICK_EMPTY_PATH	0x00000008
+#define FSPICK_SYMLINK_NOFOLLOW	0x00000001
+#define FSPICK_NO_AUTOMOUNT	0x00000002
+#define FSPICK_EMPTY_PATH	0x00000004
 
 /*
  * The type of fsconfig() call made.
@@ -99,11 +92,6 @@ enum fsconfig_command {
 	FSCONFIG_CMD_RECONFIGURE = 7,	/* Invoke superblock reconfiguration */
 };
 
-/*
- * fsmount() flags.
- */
-#define FSMOUNT_CLOEXEC		0x00000001
-
 /*
  * Mount attributes.
  */
-- 
2.21.0


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH v2 2/2] fsopen: use square brackets around "fscontext"
  2019-05-09 15:58 [PATCH v2 1/2] fs: make all new mount api fds cloexec by default Christian Brauner
@ 2019-05-09 15:58 ` Christian Brauner
  2019-05-11  8:40 ` [PATCH v2 1/2] fs: make all new mount api fds cloexec by default David Howells
  2019-05-11  8:40 ` [PATCH v2 2/2] fsopen: use square brackets around "fscontext" David Howells
  2 siblings, 0 replies; 4+ messages in thread
From: Christian Brauner @ 2019-05-09 15:58 UTC (permalink / raw)
  To: viro, dhowells, linux-fsdevel, linux-kernel; +Cc: Christian Brauner

Make the name of the anon inode fd "[fscontext]" instead of "fscontext".
This is minor but most core-kernel anon inode fds already carry square
brackets around their name:

[eventfd]
[eventpoll]
[fanotify]
[io_uring]
[pidfd]
[signalfd]
[timerfd]
[userfaultfd]

For the sake of consistency lets do the same for the fscontext anon inode
fd that comes with the new mount api.

Signed-off-by: Christian Brauner <christian@brauner.io>
---
v1: patch not present
v2:
- David Howells <dhowells@redhat.com>:
  - remove unneeded reference from commit message and split paragraph to
    place list of anon inode fds in between
---
 fs/fsopen.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/fsopen.c b/fs/fsopen.c
index a38fa8c616cf..83d0d2001bb2 100644
--- a/fs/fsopen.c
+++ b/fs/fsopen.c
@@ -92,7 +92,7 @@ static int fscontext_create_fd(struct fs_context *fc)
 {
 	int fd;
 
-	fd = anon_inode_getfd("fscontext", &fscontext_fops, fc,
+	fd = anon_inode_getfd("[fscontext]", &fscontext_fops, fc,
 			      O_RDWR | O_CLOEXEC);
 	if (fd < 0)
 		put_fs_context(fc);
-- 
2.21.0


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH v2 1/2] fs: make all new mount api fds cloexec by default
  2019-05-09 15:58 [PATCH v2 1/2] fs: make all new mount api fds cloexec by default Christian Brauner
  2019-05-09 15:58 ` [PATCH v2 2/2] fsopen: use square brackets around "fscontext" Christian Brauner
@ 2019-05-11  8:40 ` David Howells
  2019-05-11  8:40 ` [PATCH v2 2/2] fsopen: use square brackets around "fscontext" David Howells
  2 siblings, 0 replies; 4+ messages in thread
From: David Howells @ 2019-05-11  8:40 UTC (permalink / raw)
  To: Christian Brauner; +Cc: dhowells, viro, linux-fsdevel, linux-kernel

Christian Brauner <christian@brauner.io> wrote:

> This makes all file descriptors returned from new syscalls of the new mount
> api cloexec by default.
> 
> From a userspace perspective it is rarely the case that fds are supposed to
> be inherited across exec. Having them not cloexec by default forces
> userspace to remember to pass the <SPECIFIC>_CLOEXEC flag along or to
> invoke fcntl() on the fd to prevent leaking it. And leaking the fd is a
> much bigger issue than forgetting to remove the cloexec flag and failing to
> inherit the fd.
> For old fd types we can't break userspace. But for new ones we should
> whenever reasonable make them cloexec by default (Examples of this policy
> are the new seccomp notify fds and also pidfds.). If userspace wants to
> inherit fds across exec they can remove the O_CLOEXEC flag and so opt in to
> inheritance explicitly.
> 
> This patch also has the advantage that we can get rid of all the special
> flags per file descriptor type for the new mount api. In total this lets us
> remove 4 flags:
> - FSMOUNT_CLOEXEC
> - FSOPEN_CLOEXEC
> - FSPICK_CLOEXEC
> - OPEN_TREE_CLOEXEC
> 
> Signed-off-by: Christian Brauner <christian@brauner.io>

Fine by me.

Reviewed-by: David Howells <dhowells@redhat.com>

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v2 2/2] fsopen: use square brackets around "fscontext"
  2019-05-09 15:58 [PATCH v2 1/2] fs: make all new mount api fds cloexec by default Christian Brauner
  2019-05-09 15:58 ` [PATCH v2 2/2] fsopen: use square brackets around "fscontext" Christian Brauner
  2019-05-11  8:40 ` [PATCH v2 1/2] fs: make all new mount api fds cloexec by default David Howells
@ 2019-05-11  8:40 ` David Howells
  2 siblings, 0 replies; 4+ messages in thread
From: David Howells @ 2019-05-11  8:40 UTC (permalink / raw)
  To: Christian Brauner; +Cc: dhowells, viro, linux-fsdevel, linux-kernel

Christian Brauner <christian@brauner.io> wrote:

> Make the name of the anon inode fd "[fscontext]" instead of "fscontext".
> This is minor but most core-kernel anon inode fds already carry square
> brackets around their name:
> 
> [eventfd]
> [eventpoll]
> [fanotify]
> [io_uring]
> [pidfd]
> [signalfd]
> [timerfd]
> [userfaultfd]
> 
> For the sake of consistency lets do the same for the fscontext anon inode
> fd that comes with the new mount api.
> 
> Signed-off-by: Christian Brauner <christian@brauner.io>

Reviewed-by: David Howells <dhowells@redhat.com>

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2019-05-11  8:40 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-05-09 15:58 [PATCH v2 1/2] fs: make all new mount api fds cloexec by default Christian Brauner
2019-05-09 15:58 ` [PATCH v2 2/2] fsopen: use square brackets around "fscontext" Christian Brauner
2019-05-11  8:40 ` [PATCH v2 1/2] fs: make all new mount api fds cloexec by default David Howells
2019-05-11  8:40 ` [PATCH v2 2/2] fsopen: use square brackets around "fscontext" David Howells

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).