All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Dr. David Alan Gilbert (git)" <dgilbert@redhat.com>
To: qemu-devel@nongnu.org, renzhen@linux.alibaba.com,
	eguan@linux.alibaba.com, ganesh.mahalingam@intel.com,
	m.mizuma@jp.fujitsu.com, mszeredi@redhat.com,
	misono.tomohiro@jp.fujitsu.com, tao.peng@linux.alibaba.com,
	piaojun@huawei.com, stefanha@redhat.com, vgoyal@redhat.com,
	mst@redhat.com, berrange@redhat.com
Subject: [PATCH 06/25] virtiofsd: passthrough_ll: add fallback for racy ops
Date: Thu, 24 Oct 2019 12:26:59 +0100	[thread overview]
Message-ID: <20191024112718.34657-7-dgilbert@redhat.com> (raw)
In-Reply-To: <20191024112718.34657-1-dgilbert@redhat.com>

From: Miklos Szeredi <mszeredi@redhat.com>

We have two operations that cannot be done race-free on a symlink in
certain cases: utimes and link.

Add racy fallback for these if the race-free method doesn't work.  We do
our best to avoid races even in this case:

  - get absolute path by reading /proc/self/fd/NN symlink

  - lookup parent directory: after this we are safe against renames in
    ancestors

  - lookup name in parent directory, and verify that we got to the original
    inode,  if not retry the whole thing

Both utimes(2) and link(2) hold i_lock on the inode across the operation,
so a racing rename/delete by this fuse instance is not possible, only from
other entities changing the filesystem.

If the "norace" option is given, then disable the racy fallbacks.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 contrib/virtiofsd/passthrough_ll.c | 149 +++++++++++++++++++++++++----
 1 file changed, 131 insertions(+), 18 deletions(-)

diff --git a/contrib/virtiofsd/passthrough_ll.c b/contrib/virtiofsd/passthrough_ll.c
index a71fbff143..9f84419816 100644
--- a/contrib/virtiofsd/passthrough_ll.c
+++ b/contrib/virtiofsd/passthrough_ll.c
@@ -98,6 +98,7 @@ enum {
 struct lo_data {
 	pthread_mutex_t mutex;
 	int debug;
+	int norace;
 	int writeback;
 	int flock;
 	int xattr;
@@ -136,10 +137,16 @@ static const struct fuse_opt lo_opts[] = {
 	  offsetof(struct lo_data, cache), CACHE_NORMAL },
 	{ "cache=always",
 	  offsetof(struct lo_data, cache), CACHE_ALWAYS },
-
+	{ "norace",
+	  offsetof(struct lo_data, norace), 1 },
 	FUSE_OPT_END
 };
 
+static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n);
+
+static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st);
+
+
 static struct lo_data *lo_data(fuse_req_t req)
 {
 	return (struct lo_data *) fuse_req_userdata(req);
@@ -345,24 +352,116 @@ static void lo_getattr(fuse_req_t req, fuse_ino_t ino,
 	fuse_reply_attr(req, &buf, lo->timeout);
 }
 
-static int utimensat_empty_nofollow(struct lo_inode *inode,
-				    const struct timespec *tv)
+static int lo_parent_and_name(struct lo_data *lo, struct lo_inode *inode,
+			      char path[PATH_MAX], struct lo_inode **parent)
 {
-	int res;
 	char procname[64];
+	char *last;
+	struct stat stat;
+	struct lo_inode *p;
+	int retries = 2;
+	int res;
+
+retry:
+	sprintf(procname, "/proc/self/fd/%i", inode->fd);
+
+	res = readlink(procname, path, PATH_MAX);
+	if (res < 0) {
+		fuse_log(FUSE_LOG_WARNING, "lo_parent_and_name: readlink failed: %m\n");
+		goto fail_noretry;
+	}
+
+	if (res >= PATH_MAX) {
+		fuse_log(FUSE_LOG_WARNING, "lo_parent_and_name: readlink overflowed\n");
+		goto fail_noretry;
+	}
+	path[res] = '\0';
+
+	last = strrchr(path, '/');
+	if (last == NULL) {
+		/* Shouldn't happen */
+		fuse_log(FUSE_LOG_WARNING, "lo_parent_and_name: INTERNAL ERROR: bad path read from proc\n");
+		goto fail_noretry;
+	}
+	if (last == path) {
+		p = &lo->root;
+		pthread_mutex_lock(&lo->mutex);
+		p->refcount++;
+		pthread_mutex_unlock(&lo->mutex);
+	} else {
+		*last = '\0';
+		res = fstatat(AT_FDCWD, last == path ? "/" : path, &stat, 0);
+		if (res == -1) {
+			if (!retries)
+				fuse_log(FUSE_LOG_WARNING, "lo_parent_and_name: failed to stat parent: %m\n");
+			goto fail;
+		}
+		p = lo_find(lo, &stat);
+		if (p == NULL) {
+			if (!retries)
+				fuse_log(FUSE_LOG_WARNING, "lo_parent_and_name: failed to find parent\n");
+			goto fail;
+		}
+	}
+	last++;
+	res = fstatat(p->fd, last, &stat, AT_SYMLINK_NOFOLLOW);
+	if (res == -1) {
+		if (!retries)
+			fuse_log(FUSE_LOG_WARNING, "lo_parent_and_name: failed to stat last\n");
+		goto fail_unref;
+	}
+	if (stat.st_dev != inode->dev || stat.st_ino != inode->ino) {
+		if (!retries)
+			fuse_log(FUSE_LOG_WARNING, "lo_parent_and_name: failed to match last\n");
+		goto fail_unref;
+	}
+	*parent = p;
+	memmove(path, last, strlen(last) + 1);
+
+	return 0;
+
+fail_unref:
+	unref_inode(lo, p, 1);
+fail:
+	if (retries) {
+		retries--;
+		goto retry;
+	}
+fail_noretry:
+	errno = EIO;
+	return -1;
+}
+
+static int utimensat_empty(struct lo_data *lo, struct lo_inode *inode,
+			   const struct timespec *tv)
+{
+	int res;
+	struct lo_inode *parent;
+	char path[PATH_MAX];
 
 	if (inode->is_symlink) {
-		res = utimensat(inode->fd, "", tv,
-				AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
+		res = utimensat(inode->fd, "", tv, AT_EMPTY_PATH);
 		if (res == -1 && errno == EINVAL) {
 			/* Sorry, no race free way to set times on symlink. */
-			errno = EPERM;
+			if (lo->norace)
+				errno = EPERM;
+			else
+				goto fallback;
 		}
 		return res;
 	}
-	sprintf(procname, "/proc/self/fd/%i", inode->fd);
+	sprintf(path, "/proc/self/fd/%i", inode->fd);
+
+	return utimensat(AT_FDCWD, path, tv, 0);
+
+fallback:
+	res = lo_parent_and_name(lo, inode, path, &parent);
+	if (res != -1) {
+		res = utimensat(parent->fd, path, tv, AT_SYMLINK_NOFOLLOW);
+		unref_inode(lo, parent, 1);
+	}
 
-	return utimensat(AT_FDCWD, procname, tv, 0);
+	return res;
 }
 
 static int lo_fi_fd(fuse_req_t req, struct fuse_file_info *fi)
@@ -385,6 +484,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr,
 {
 	int saverr;
 	char procname[64];
+	struct lo_data *lo = lo_data(req);
 	struct lo_inode *inode;
 	int ifd;
 	int res;
@@ -454,7 +554,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr,
 		if (fi)
 			res = futimens(fd, tv);
 		else
-			res = utimensat_empty_nofollow(inode, tv);
+			res = utimensat_empty(lo, inode, tv);
 		if (res == -1)
 			goto out_err;
 	}
@@ -673,24 +773,37 @@ static void lo_symlink(fuse_req_t req, const char *link,
 	lo_mknod_symlink(req, parent, name, S_IFLNK, 0, link);
 }
 
-static int linkat_empty_nofollow(struct lo_inode *inode, int dfd,
-				 const char *name)
+static int linkat_empty_nofollow(struct lo_data *lo, struct lo_inode *inode,
+				 int dfd, const char *name)
 {
 	int res;
-	char procname[64];
+	struct lo_inode *parent;
+	char path[PATH_MAX];
 
 	if (inode->is_symlink) {
 		res = linkat(inode->fd, "", dfd, name, AT_EMPTY_PATH);
 		if (res == -1 && (errno == ENOENT || errno == EINVAL)) {
 			/* Sorry, no race free way to hard-link a symlink. */
-			errno = EPERM;
+			if (lo->norace)
+				errno = EPERM;
+			else
+				goto fallback;
 		}
 		return res;
 	}
 
-	sprintf(procname, "/proc/self/fd/%i", inode->fd);
+	sprintf(path, "/proc/self/fd/%i", inode->fd);
+
+	return linkat(AT_FDCWD, path, dfd, name, AT_SYMLINK_FOLLOW);
 
-	return linkat(AT_FDCWD, procname, dfd, name, AT_SYMLINK_FOLLOW);
+fallback:
+	res = lo_parent_and_name(lo, inode, path, &parent);
+	if (res != -1) {
+		res = linkat(parent->fd, path, dfd, name, 0);
+		unref_inode(lo, parent, 1);
+	}
+
+	return res;
 }
 
 static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent,
@@ -712,7 +825,7 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent,
 	e.attr_timeout = lo->timeout;
 	e.entry_timeout = lo->timeout;
 
-	res = linkat_empty_nofollow(inode, lo_fd(req, parent), name);
+	res = linkat_empty_nofollow(lo, inode, lo_fd(req, parent), name);
 	if (res == -1)
 		goto out_err;
 
@@ -1466,7 +1579,7 @@ static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name,
 	}
 
 	if (inode->is_symlink) {
-		/* Sorry, no race free way to setxattr on symlink. */
+		/* Sorry, no race free way to removexattr on symlink. */
 		saverr = EPERM;
 		goto out;
 	}
-- 
2.23.0



  parent reply	other threads:[~2019-10-24 13:12 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-10-24 11:26 [PATCH 00/25] virtiofs daemon (security) Dr. David Alan Gilbert (git)
2019-10-24 11:26 ` [PATCH 01/25] virtiofsd: passthrough_ll: create new files in caller's context Dr. David Alan Gilbert (git)
2019-10-24 11:26 ` [PATCH 02/25] virtiofsd: passthrough_ll: add lo_map for ino/fh indirection Dr. David Alan Gilbert (git)
2019-10-24 11:26 ` [PATCH 03/25] virtiofsd: passthrough_ll: add ino_map to hide lo_inode pointers Dr. David Alan Gilbert (git)
2019-10-24 11:26 ` [PATCH 04/25] virtiofsd: passthrough_ll: add dirp_map to hide lo_dirp pointers Dr. David Alan Gilbert (git)
2019-10-24 11:26 ` [PATCH 05/25] virtiofsd: passthrough_ll: add fd_map to hide file descriptors Dr. David Alan Gilbert (git)
2019-10-24 11:26 ` Dr. David Alan Gilbert (git) [this message]
2019-10-24 11:27 ` [PATCH 07/25] virtiofsd: validate path components Dr. David Alan Gilbert (git)
2019-10-24 11:27 ` [PATCH 08/25] virtiofsd: Plumb fuse_bufvec through to do_write_buf Dr. David Alan Gilbert (git)
2019-10-24 11:27 ` [PATCH 09/25] virtiofsd: Pass write iov's all the way through Dr. David Alan Gilbert (git)
2019-10-24 11:27 ` [PATCH 10/25] virtiofsd: add fuse_mbuf_iter API Dr. David Alan Gilbert (git)
2019-10-24 11:27 ` [PATCH 11/25] virtiofsd: validate input buffer sizes in do_write_buf() Dr. David Alan Gilbert (git)
2019-10-24 11:27 ` [PATCH 12/25] virtiofsd: check input buffer size in fuse_lowlevel.c ops Dr. David Alan Gilbert (git)
2019-10-24 11:27 ` [PATCH 13/25] virtiofsd: prevent ".." escape in lo_do_lookup() Dr. David Alan Gilbert (git)
2019-10-24 11:27 ` [PATCH 14/25] virtiofsd: prevent ".." escape in lo_do_readdir() Dr. David Alan Gilbert (git)
2019-10-24 11:27 ` [PATCH 15/25] virtiofsd: use /proc/self/fd/ O_PATH file descriptor Dr. David Alan Gilbert (git)
2019-10-24 11:27 ` [PATCH 16/25] virtiofsd: sandbox mount namespace Dr. David Alan Gilbert (git)
2019-10-24 11:27 ` [PATCH 17/25] virtiofsd: move to an empty network namespace Dr. David Alan Gilbert (git)
2019-10-24 11:27 ` [PATCH 18/25] virtiofsd: move to a new pid namespace Dr. David Alan Gilbert (git)
2019-10-24 11:27 ` [PATCH 19/25] virtiofsd: add seccomp whitelist Dr. David Alan Gilbert (git)
2019-10-24 11:27 ` [PATCH 20/25] virtiofsd: Parse flag FUSE_WRITE_KILL_PRIV Dr. David Alan Gilbert (git)
2019-10-24 11:27 ` [PATCH 21/25] virtiofsd: Drop CAP_FSETID if client asked for it Dr. David Alan Gilbert (git)
2019-10-24 11:27 ` [PATCH 22/25] virtiofsd: set maximum RLIMIT_NOFILE limit Dr. David Alan Gilbert (git)
2019-10-24 11:27 ` [PATCH 23/25] virtiofsd: add security guide document Dr. David Alan Gilbert (git)
2019-10-24 11:27 ` [PATCH 24/25] virtiofsd: add --syslog command-line option Dr. David Alan Gilbert (git)
2019-10-24 11:27 ` [PATCH 25/25] virtiofsd: print log only when priority is high enough Dr. David Alan Gilbert (git)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191024112718.34657-7-dgilbert@redhat.com \
    --to=dgilbert@redhat.com \
    --cc=berrange@redhat.com \
    --cc=eguan@linux.alibaba.com \
    --cc=ganesh.mahalingam@intel.com \
    --cc=m.mizuma@jp.fujitsu.com \
    --cc=misono.tomohiro@jp.fujitsu.com \
    --cc=mst@redhat.com \
    --cc=mszeredi@redhat.com \
    --cc=piaojun@huawei.com \
    --cc=qemu-devel@nongnu.org \
    --cc=renzhen@linux.alibaba.com \
    --cc=stefanha@redhat.com \
    --cc=tao.peng@linux.alibaba.com \
    --cc=vgoyal@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.