Linux-MIPS Archive on lore.kernel.org
 help / color / Atom feed
From: Aleksa Sarai <cyphar@cyphar.com>
To: Al Viro <viro@zeniv.linux.org.uk>,
	Jeff Layton <jlayton@kernel.org>,
	"J. Bruce Fields" <bfields@fieldses.org>,
	Arnd Bergmann <arnd@arndb.de>,
	David Howells <dhowells@redhat.com>,
	Shuah Khan <shuah@kernel.org>,
	Shuah Khan <skhan@linuxfoundation.org>,
	Ingo Molnar <mingo@redhat.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Christian Brauner <christian@brauner.io>
Cc: Aleksa Sarai <cyphar@cyphar.com>,
	Eric Biederman <ebiederm@xmission.com>,
	Andy Lutomirski <luto@kernel.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	Alexei Starovoitov <ast@kernel.org>,
	Kees Cook <keescook@chromium.org>, Jann Horn <jannh@google.com>,
	Tycho Andersen <tycho@tycho.ws>,
	David Drysdale <drysdale@google.com>,
	Chanho Min <chanho.min@lge.com>, Oleg Nesterov <oleg@redhat.com>,
	Rasmus Villemoes <linux@rasmusvillemoes.dk>,
	Alexander Shishkin <alexander.shishkin@linux.intel.com>,
	Jiri Olsa <jolsa@redhat.com>, Namhyung Kim <namhyung@kernel.org>,
	Aleksa Sarai <asarai@suse.de>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	containers@lists.linux-foundation.org,
	linux-alpha@vger.kernel.org, linux-api@vger.kernel.org,
	linux-arch@vger.kernel.org, linux-arm-kernel@lists.infradead.org,
	linux-fsdevel@vger.kernel.org, linux-ia64@vger.kernel.org,
	linux-kernel@vger.kernel.org, linux-kselftest@vger.kernel.org,
	linux-m68k@lists.linux-m68k.org, linux-mips@vger.kernel.org,
	linux-parisc@vger.kernel.org, linuxppc-dev@lists.ozlabs.org,
	linux-s390@vger.kernel.org, linux-sh@vger.kernel.org,
	linux-xtensa@linux-xtensa.org, sparclinux@vger.kernel.org
Subject: [PATCH v12 09/12] namei: LOOKUP_IN_ROOT: chroot-like path resolution
Date: Thu,  5 Sep 2019 06:19:30 +1000
Message-ID: <20190904201933.10736-10-cyphar@cyphar.com> (raw)
In-Reply-To: <20190904201933.10736-1-cyphar@cyphar.com>

The primary motivation for the need for this flag is container runtimes
which have to interact with malicious root filesystems in the host
namespaces. One of the first requirements for a container runtime to be
secure against a malicious rootfs is that they correctly scope symlinks
(that is, they should be scoped as though they are chroot(2)ed into the
container's rootfs) and ".."-style paths[*]. The already-existing
LOOKUP_NO_XDEV and LOOKUP_NO_MAGICLINKS help defend against other
potential attacks in a malicious rootfs scenario.

Currently most container runtimes try to do this resolution in
userspace[1], causing many potential race conditions. In addition, the
"obvious" alternative (actually performing a {ch,pivot_}root(2))
requires a fork+exec (for some runtimes) which is *very* costly if
necessary for every filesystem operation involving a container.

[*] At the moment, ".." and magic-link jumping are disallowed for the
    same reason it is disabled for LOOKUP_BENEATH -- currently it is not
    safe to allow it. Future patches may enable it unconditionally once
    we have resolved the possible races (for "..") and semantics (for
    magic-link jumping).

The most significant *at(2) semantic change with LOOKUP_IN_ROOT is that
absolute pathnames no longer cause the dirfd to be ignored completely.

The rationale is that LOOKUP_IN_ROOT must necessarily chroot-scope
symlinks with absolute paths to dirfd, and so doing it for the base path
seems to be the most consistent behaviour (and also avoids foot-gunning
users who want to scope paths that are absolute).

[1]: https://github.com/cyphar/filepath-securejoin

Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
---
 fs/namei.c            | 41 +++++++++++++++++++++++++++++++----------
 include/linux/namei.h |  1 +
 2 files changed, 32 insertions(+), 10 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index 2e18ce5a313e..0352d275bd13 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -676,7 +676,7 @@ static int unlazy_walk(struct nameidata *nd)
 		goto out1;
 	if (!nd->root.mnt) {
 		/* Restart from path_init() if nd->root was cleared. */
-		if (nd->flags & LOOKUP_BENEATH)
+		if (nd->flags & (LOOKUP_BENEATH | LOOKUP_IN_ROOT))
 			goto out;
 	} else if (!(nd->flags & LOOKUP_ROOT)) {
 		if (unlikely(!legitimize_path(nd, &nd->root, nd->root_seq)))
@@ -809,10 +809,18 @@ static int complete_walk(struct nameidata *nd)
 	return status;
 }
 
-static void set_root(struct nameidata *nd)
+static int set_root(struct nameidata *nd)
 {
 	struct fs_struct *fs = current->fs;
 
+	/*
+	 * Jumping to the real root as part of LOOKUP_IN_ROOT is a BUG in namei,
+	 * but we still have to ensure it doesn't happen because it will cause a
+	 * breakout from the dirfd.
+	 */
+	if (WARN_ON(nd->flags & LOOKUP_IN_ROOT))
+		return -ENOTRECOVERABLE;
+
 	if (nd->flags & LOOKUP_RCU) {
 		unsigned seq;
 
@@ -824,6 +832,7 @@ static void set_root(struct nameidata *nd)
 	} else {
 		get_fs_root(fs, &nd->root);
 	}
+	return 0;
 }
 
 static void path_put_conditional(struct path *path, struct nameidata *nd)
@@ -854,6 +863,11 @@ static int nd_jump_root(struct nameidata *nd)
 		if (nd->path.mnt != NULL && nd->path.mnt != nd->root.mnt)
 			return -EXDEV;
 	}
+	if (!nd->root.mnt) {
+		int error = set_root(nd);
+		if (error)
+			return error;
+	}
 	if (nd->flags & LOOKUP_RCU) {
 		struct dentry *d;
 		nd->path = nd->root;
@@ -1100,15 +1114,13 @@ const char *get_link(struct nameidata *nd)
 			if (unlikely(nd->flags & LOOKUP_NO_MAGICLINKS))
 				return ERR_PTR(-ELOOP);
 			/* Not currently safe. */
-			if (unlikely(nd->flags & LOOKUP_BENEATH))
+			if (unlikely(nd->flags & (LOOKUP_BENEATH | LOOKUP_IN_ROOT)))
 				return ERR_PTR(-EXDEV);
 		}
 		if (IS_ERR_OR_NULL(res))
 			return res;
 	}
 	if (*res == '/') {
-		if (!nd->root.mnt)
-			set_root(nd);
 		error = nd_jump_root(nd);
 		if (unlikely(error))
 			return ERR_PTR(error);
@@ -1744,15 +1756,20 @@ static inline int may_lookup(struct nameidata *nd)
 static inline int handle_dots(struct nameidata *nd, int type)
 {
 	if (type == LAST_DOTDOT) {
+		int error = 0;
+
 		/*
 		 * LOOKUP_BENEATH resolving ".." is not currently safe -- races
 		 * can cause our parent to have moved outside of the root and
 		 * us to skip over it.
 		 */
-		if (unlikely(nd->flags & LOOKUP_BENEATH))
+		if (unlikely(nd->flags & (LOOKUP_BENEATH | LOOKUP_IN_ROOT)))
 			return -EXDEV;
-		if (!nd->root.mnt)
-			set_root(nd);
+		if (!nd->root.mnt) {
+			error = set_root(nd);
+			if (error)
+				return error;
+		}
 		if (nd->flags & LOOKUP_RCU) {
 			return follow_dotdot_rcu(nd);
 		} else
@@ -2251,9 +2268,13 @@ static const char *path_init(struct nameidata *nd, unsigned flags)
 
 	nd->m_seq = read_seqbegin(&mount_lock);
 
+	/* LOOKUP_IN_ROOT treats absolute paths as being relative-to-dirfd. */
+	if (flags & LOOKUP_IN_ROOT)
+		while (*s == '/')
+			s++;
+
 	/* Figure out the starting path and root (if needed). */
 	if (*s == '/') {
-		set_root(nd);
 		error = nd_jump_root(nd);
 		if (unlikely(error))
 			return ERR_PTR(error);
@@ -2298,7 +2319,7 @@ static const char *path_init(struct nameidata *nd, unsigned flags)
 		fdput(f);
 	}
 	/* For scoped-lookups we need to set the root to the dirfd as well. */
-	if (flags & LOOKUP_BENEATH) {
+	if (flags & (LOOKUP_BENEATH | LOOKUP_IN_ROOT)) {
 		nd->root = nd->path;
 		if (flags & LOOKUP_RCU)
 			nd->root_seq = nd->seq;
diff --git a/include/linux/namei.h b/include/linux/namei.h
index be407415c28a..ec2c6c588ea7 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -57,6 +57,7 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND};
 #define LOOKUP_NO_MAGICLINKS	0x080000 /* No /proc/$pid/fd/ "symlink" crossing. */
 #define LOOKUP_NO_SYMLINKS	0x100000 /* No symlink crossing *at all*.
 					    Implies LOOKUP_NO_MAGICLINKS. */
+#define LOOKUP_IN_ROOT		0x200000 /* Treat dirfd as %current->fs->root. */
 
 extern int path_pts(struct path *path);
 
-- 
2.23.0


  parent reply index

Thread overview: 65+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-09-04 20:19 [PATCH v12 00/12] namei: openat2(2) path resolution restrictions Aleksa Sarai
2019-09-04 20:19 ` [PATCH v12 01/12] lib: introduce copy_struct_{to,from}_user helpers Aleksa Sarai
2019-09-04 20:48   ` Linus Torvalds
2019-09-04 21:00   ` Randy Dunlap
2019-09-05  7:32   ` Peter Zijlstra
2019-09-05  9:26     ` Aleksa Sarai
2019-09-05  9:43       ` Peter Zijlstra
2019-09-05 10:57         ` Peter Zijlstra
2019-09-11 10:37           ` Aleksa Sarai
2019-09-05 13:35         ` Aleksa Sarai
2019-09-05 17:01         ` Aleksa Sarai
2019-09-05  8:43   ` Rasmus Villemoes
2019-09-05  9:50     ` Aleksa Sarai
2019-09-05 10:45       ` Christian Brauner
2019-09-05  9:09   ` Andreas Schwab
2019-09-05 10:13     ` [PATCH v12 01/12] lib: introduce copy_struct_{to, from}_user helpers Gabriel Paubert
2019-09-05 11:05   ` [PATCH v12 01/12] lib: introduce copy_struct_{to,from}_user helpers Christian Brauner
2019-09-05 11:17     ` Rasmus Villemoes
2019-09-05 11:29       ` Christian Brauner
2019-09-05 13:40     ` Aleksa Sarai
2019-09-05 11:09   ` Christian Brauner
2019-09-05 11:27     ` Aleksa Sarai
2019-09-05 11:40       ` Christian Brauner
2019-09-05 18:07   ` Al Viro
2019-09-05 18:23     ` Christian Brauner
2019-09-05 18:28       ` Al Viro
2019-09-05 18:35         ` Christian Brauner
2019-09-05 19:56         ` Aleksa Sarai
2019-09-05 22:31           ` Al Viro
2019-09-06  7:00           ` Christian Brauner
2019-09-05 23:00     ` Aleksa Sarai
2019-09-05 23:49       ` Al Viro
2019-09-06  0:09         ` Aleksa Sarai
2019-09-06  0:14         ` Al Viro
2019-09-04 20:19 ` [PATCH v12 02/12] clone3: switch to copy_struct_from_user() Aleksa Sarai
2019-09-04 20:19 ` [PATCH v12 03/12] sched_setattr: switch to copy_struct_{to,from}_user() Aleksa Sarai
2019-09-04 20:19 ` [PATCH v12 04/12] perf_event_open: switch to copy_struct_from_user() Aleksa Sarai
2019-09-04 20:19 ` [PATCH v12 05/12] namei: obey trailing magic-link DAC permissions Aleksa Sarai
2019-09-17 21:30   ` Jann Horn
2019-09-18 13:51     ` Aleksa Sarai
2019-09-18 15:46       ` Aleksa Sarai
2019-09-04 20:19 ` [PATCH v12 06/12] procfs: switch magic-link modes to be more sane Aleksa Sarai
2019-09-04 20:19 ` [PATCH v12 07/12] open: O_EMPTYPATH: procfs-less file descriptor re-opening Aleksa Sarai
2019-09-04 20:19 ` [PATCH v12 08/12] namei: O_BENEATH-style path resolution flags Aleksa Sarai
2019-09-04 20:19 ` Aleksa Sarai [this message]
2019-09-04 20:19 ` [PATCH v12 10/12] namei: aggressively check for nd->root escape on ".." resolution Aleksa Sarai
2019-09-04 21:09   ` Linus Torvalds
2019-09-04 21:35     ` Linus Torvalds
2019-09-04 21:36       ` Linus Torvalds
2019-09-04 21:48     ` Aleksa Sarai
2019-09-04 22:16       ` Linus Torvalds
2019-09-04 22:31       ` David Howells
2019-09-04 22:38         ` Linus Torvalds
2019-09-04 23:29           ` Al Viro
2019-09-04 23:44             ` Linus Torvalds
2019-09-04 20:19 ` [PATCH v12 11/12] open: openat2(2) syscall Aleksa Sarai
2019-09-04 21:00   ` Randy Dunlap
2019-09-07 12:40   ` Jeff Layton
2019-09-07 16:58     ` Linus Torvalds
2019-09-07 17:42       ` Andy Lutomirski
2019-09-07 17:45         ` Linus Torvalds
2019-09-07 18:15           ` Andy Lutomirski
2019-09-10  6:35           ` Ingo Molnar
2019-09-08 16:24     ` Aleksa Sarai
2019-09-04 20:19 ` [PATCH v12 12/12] selftests: add openat2(2) selftests Aleksa Sarai

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190904201933.10736-10-cyphar@cyphar.com \
    --to=cyphar@cyphar.com \
    --cc=akpm@linux-foundation.org \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=arnd@arndb.de \
    --cc=asarai@suse.de \
    --cc=ast@kernel.org \
    --cc=bfields@fieldses.org \
    --cc=chanho.min@lge.com \
    --cc=christian@brauner.io \
    --cc=containers@lists.linux-foundation.org \
    --cc=dhowells@redhat.com \
    --cc=drysdale@google.com \
    --cc=ebiederm@xmission.com \
    --cc=jannh@google.com \
    --cc=jlayton@kernel.org \
    --cc=jolsa@redhat.com \
    --cc=keescook@chromium.org \
    --cc=linux-alpha@vger.kernel.org \
    --cc=linux-api@vger.kernel.org \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-ia64@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-kselftest@vger.kernel.org \
    --cc=linux-m68k@lists.linux-m68k.org \
    --cc=linux-mips@vger.kernel.org \
    --cc=linux-parisc@vger.kernel.org \
    --cc=linux-s390@vger.kernel.org \
    --cc=linux-sh@vger.kernel.org \
    --cc=linux-xtensa@linux-xtensa.org \
    --cc=linux@rasmusvillemoes.dk \
    --cc=linuxppc-dev@lists.ozlabs.org \
    --cc=luto@kernel.org \
    --cc=mingo@redhat.com \
    --cc=namhyung@kernel.org \
    --cc=oleg@redhat.com \
    --cc=peterz@infradead.org \
    --cc=shuah@kernel.org \
    --cc=skhan@linuxfoundation.org \
    --cc=sparclinux@vger.kernel.org \
    --cc=torvalds@linux-foundation.org \
    --cc=tycho@tycho.ws \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Linux-MIPS Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-mips/0 linux-mips/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-mips linux-mips/ https://lore.kernel.org/linux-mips \
		linux-mips@vger.kernel.org
	public-inbox-index linux-mips

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-mips


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git