All of lore.kernel.org
 help / color / mirror / Atom feed
From: Misono Tomohiro <misono.tomohiro@jp.fujitsu.com>
To: <virtio-fs@redhat.com>
Cc: qemu-devel@nongnu.org, misono.tomohiro@jp.fujitsu.com
Subject: [PATCH] virtiofsd: Fix data corruption with O_APPEND wirte in writeback mode
Date: Wed, 23 Oct 2019 21:25:23 +0900	[thread overview]
Message-ID: <20191023122523.1816-1-misono.tomohiro@jp.fujitsu.com> (raw)

When writeback mode is enabled (-o writeback), O_APPEND handling is
done in kernel. Therefore virtiofsd clears O_APPEND flag when open.
Otherwise O_APPEND flag takes precedence over pwrite() and write
data may corrupt.

Currently clearing O_APPEND flag is done in lo_open(), but we also
need the same operation in lo_create(). So, factor out the flag
update operation in lo_open() to update_open_flags() and call it
in both lo_open() and lo_create().

This fixes the failure of xfstest generic/069 in writeback mode
(which tests O_APPEND write data integrity).

Signed-off-by: Misono Tomohiro <misono.tomohiro@jp.fujitsu.com>
---
 contrib/virtiofsd/passthrough_ll.c | 56 +++++++++++++++---------------
 1 file changed, 28 insertions(+), 28 deletions(-)

diff --git a/contrib/virtiofsd/passthrough_ll.c b/contrib/virtiofsd/passthrough_ll.c
index e8892c3c32..79fb78ecce 100644
--- a/contrib/virtiofsd/passthrough_ll.c
+++ b/contrib/virtiofsd/passthrough_ll.c
@@ -1733,6 +1733,32 @@ static void lo_releasedir(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info
 	fuse_reply_err(req, 0);
 }
 
+static void update_open_flags(int writeback, struct fuse_file_info *fi)
+{
+	/* With writeback cache, kernel may send read requests even
+	   when userspace opened write-only */
+	if (writeback && (fi->flags & O_ACCMODE) == O_WRONLY) {
+		fi->flags &= ~O_ACCMODE;
+		fi->flags |= O_RDWR;
+	}
+
+	/* With writeback cache, O_APPEND is handled by the kernel.
+	   This breaks atomicity (since the file may change in the
+	   underlying filesystem, so that the kernel's idea of the
+	   end of the file isn't accurate anymore). In this example,
+	   we just accept that. A more rigorous filesystem may want
+	   to return an error here */
+	if (writeback && (fi->flags & O_APPEND))
+		fi->flags &= ~O_APPEND;
+
+	/*
+	 * O_DIRECT in guest should not necessarily mean bypassing page
+	 * cache on host as well. If somebody needs that behavior, it
+	 * probably should be a configuration knob in daemon.
+	 */
+	fi->flags &= ~O_DIRECT;
+}
+
 static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name,
 		      mode_t mode, struct fuse_file_info *fi)
 {
@@ -1760,12 +1786,7 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name,
 	if (err)
 		goto out;
 
-	/*
-	 * O_DIRECT in guest should not necessarily mean bypassing page
-	 * cache on host as well. If somebody needs that behavior, it
-	 * probably should be a configuration knob in daemon.
-	 */
-	fi->flags &= ~O_DIRECT;
+	update_open_flags(lo->writeback, fi);
 
 	fd = openat(parent_inode->fd, name,
 		    (fi->flags | O_CREAT) & ~O_NOFOLLOW, mode);
@@ -1966,28 +1987,7 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi)
 
 	fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", ino, fi->flags);
 
-	/* With writeback cache, kernel may send read requests even
-	   when userspace opened write-only */
-	if (lo->writeback && (fi->flags & O_ACCMODE) == O_WRONLY) {
-		fi->flags &= ~O_ACCMODE;
-		fi->flags |= O_RDWR;
-	}
-
-	/* With writeback cache, O_APPEND is handled by the kernel.
-	   This breaks atomicity (since the file may change in the
-	   underlying filesystem, so that the kernel's idea of the
-	   end of the file isn't accurate anymore). In this example,
-	   we just accept that. A more rigorous filesystem may want
-	   to return an error here */
-	if (lo->writeback && (fi->flags & O_APPEND))
-		fi->flags &= ~O_APPEND;
-
-	/*
-	 * O_DIRECT in guest should not necessarily mean bypassing page
-	 * cache on host as well. If somebody needs that behavior, it
-	 * probably should be a configuration knob in daemon.
-	 */
-	fi->flags &= ~O_DIRECT;
+	update_open_flags(lo->writeback, fi);
 
 	sprintf(buf, "%i", lo_fd(req, ino));
 	fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW);
-- 
2.21.0



WARNING: multiple messages have this Message-ID (diff)
From: Misono Tomohiro <misono.tomohiro@jp.fujitsu.com>
To: virtio-fs@redhat.com
Cc: qemu-devel@nongnu.org
Subject: [Virtio-fs] [PATCH] virtiofsd: Fix data corruption with O_APPEND wirte in writeback mode
Date: Wed, 23 Oct 2019 21:25:23 +0900	[thread overview]
Message-ID: <20191023122523.1816-1-misono.tomohiro@jp.fujitsu.com> (raw)

When writeback mode is enabled (-o writeback), O_APPEND handling is
done in kernel. Therefore virtiofsd clears O_APPEND flag when open.
Otherwise O_APPEND flag takes precedence over pwrite() and write
data may corrupt.

Currently clearing O_APPEND flag is done in lo_open(), but we also
need the same operation in lo_create(). So, factor out the flag
update operation in lo_open() to update_open_flags() and call it
in both lo_open() and lo_create().

This fixes the failure of xfstest generic/069 in writeback mode
(which tests O_APPEND write data integrity).

Signed-off-by: Misono Tomohiro <misono.tomohiro@jp.fujitsu.com>
---
 contrib/virtiofsd/passthrough_ll.c | 56 +++++++++++++++---------------
 1 file changed, 28 insertions(+), 28 deletions(-)

diff --git a/contrib/virtiofsd/passthrough_ll.c b/contrib/virtiofsd/passthrough_ll.c
index e8892c3c32..79fb78ecce 100644
--- a/contrib/virtiofsd/passthrough_ll.c
+++ b/contrib/virtiofsd/passthrough_ll.c
@@ -1733,6 +1733,32 @@ static void lo_releasedir(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info
 	fuse_reply_err(req, 0);
 }
 
+static void update_open_flags(int writeback, struct fuse_file_info *fi)
+{
+	/* With writeback cache, kernel may send read requests even
+	   when userspace opened write-only */
+	if (writeback && (fi->flags & O_ACCMODE) == O_WRONLY) {
+		fi->flags &= ~O_ACCMODE;
+		fi->flags |= O_RDWR;
+	}
+
+	/* With writeback cache, O_APPEND is handled by the kernel.
+	   This breaks atomicity (since the file may change in the
+	   underlying filesystem, so that the kernel's idea of the
+	   end of the file isn't accurate anymore). In this example,
+	   we just accept that. A more rigorous filesystem may want
+	   to return an error here */
+	if (writeback && (fi->flags & O_APPEND))
+		fi->flags &= ~O_APPEND;
+
+	/*
+	 * O_DIRECT in guest should not necessarily mean bypassing page
+	 * cache on host as well. If somebody needs that behavior, it
+	 * probably should be a configuration knob in daemon.
+	 */
+	fi->flags &= ~O_DIRECT;
+}
+
 static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name,
 		      mode_t mode, struct fuse_file_info *fi)
 {
@@ -1760,12 +1786,7 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name,
 	if (err)
 		goto out;
 
-	/*
-	 * O_DIRECT in guest should not necessarily mean bypassing page
-	 * cache on host as well. If somebody needs that behavior, it
-	 * probably should be a configuration knob in daemon.
-	 */
-	fi->flags &= ~O_DIRECT;
+	update_open_flags(lo->writeback, fi);
 
 	fd = openat(parent_inode->fd, name,
 		    (fi->flags | O_CREAT) & ~O_NOFOLLOW, mode);
@@ -1966,28 +1987,7 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi)
 
 	fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", ino, fi->flags);
 
-	/* With writeback cache, kernel may send read requests even
-	   when userspace opened write-only */
-	if (lo->writeback && (fi->flags & O_ACCMODE) == O_WRONLY) {
-		fi->flags &= ~O_ACCMODE;
-		fi->flags |= O_RDWR;
-	}
-
-	/* With writeback cache, O_APPEND is handled by the kernel.
-	   This breaks atomicity (since the file may change in the
-	   underlying filesystem, so that the kernel's idea of the
-	   end of the file isn't accurate anymore). In this example,
-	   we just accept that. A more rigorous filesystem may want
-	   to return an error here */
-	if (lo->writeback && (fi->flags & O_APPEND))
-		fi->flags &= ~O_APPEND;
-
-	/*
-	 * O_DIRECT in guest should not necessarily mean bypassing page
-	 * cache on host as well. If somebody needs that behavior, it
-	 * probably should be a configuration knob in daemon.
-	 */
-	fi->flags &= ~O_DIRECT;
+	update_open_flags(lo->writeback, fi);
 
 	sprintf(buf, "%i", lo_fd(req, ino));
 	fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW);
-- 
2.21.0


             reply	other threads:[~2019-10-23 12:22 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-10-23 12:25 Misono Tomohiro [this message]
2019-10-23 12:25 ` [Virtio-fs] [PATCH] virtiofsd: Fix data corruption with O_APPEND wirte in writeback mode Misono Tomohiro
2019-10-23 20:07 ` Vivek Goyal
2019-10-24 15:02   ` Vivek Goyal
2019-10-24 15:02     ` Vivek Goyal
2019-10-25 10:02     ` misono.tomohiro
2019-10-25 10:02       ` misono.tomohiro
2019-10-29 10:07       ` misono.tomohiro
2019-10-29 10:07         ` misono.tomohiro
2019-10-31  9:39         ` misono.tomohiro
2019-10-31  9:39           ` misono.tomohiro
2019-10-31 15:47           ` Vivek Goyal
2019-10-31 15:47 ` Vivek Goyal
2019-11-01  8:08   ` Dr. David Alan Gilbert
2019-11-01  8:08     ` Dr. David Alan Gilbert

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191023122523.1816-1-misono.tomohiro@jp.fujitsu.com \
    --to=misono.tomohiro@jp.fujitsu.com \
    --cc=qemu-devel@nongnu.org \
    --cc=virtio-fs@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.