All of lore.kernel.org
 help / color / mirror / Atom feed
From: Maxim Patlasov <MPatlasov@parallels.com>
To: miklos@szeredi.hu
Cc: riel@redhat.com, dev@parallels.com, xemul@parallels.com,
	fuse-devel@lists.sourceforge.net, bfoster@redhat.com,
	linux-kernel@vger.kernel.org, jbottomley@parallels.com,
	linux-mm@kvack.org, viro@zeniv.linux.org.uk,
	linux-fsdevel@vger.kernel.org, akpm@linux-foundation.org,
	fengguang.wu@intel.com, devel@openvz.org, mgorman@suse.de
Subject: [PATCH 06/16] fuse: Trust kernel i_size only - v4
Date: Sat, 29 Jun 2013 21:44:40 +0400	[thread overview]
Message-ID: <20130629174304.20175.84433.stgit@maximpc.sw.ru> (raw)
In-Reply-To: <20130629172211.20175.70154.stgit@maximpc.sw.ru>

From: Pavel Emelyanov <xemul@openvz.org>

Make fuse think that when writeback is on the inode's i_size is always
up-to-date and not update it with the value received from the userspace.
This is done because the page cache code may update i_size without letting
the FS know.

This assumption implies fixing the previously introduced short-read helper --
when a short read occurs the 'hole' is filled with zeroes.

fuse_file_fallocate() is also fixed because now we should keep i_size up to
date, so it must be updated if FUSE_FALLOCATE request succeeded.

Changed in v2:
 - improved comment in fuse_short_read()
 - fixed fuse_file_fallocate() for KEEP_SIZE mode

Changed in v3:
 - fixed fuse_fillattr() not to use local i_size if writeback-cache is off
 - added a comment explaining why we cannot trust attr.size from server

Changed in v4:
 - do not change fuse_file_fallocate() because what we need is already
   implemented in commits a200a2d and 14c1441

Signed-off-by: Maxim V. Patlasov <MPatlasov@parallels.com>
---
 fs/fuse/dir.c   |   13 +++++++++++--
 fs/fuse/file.c  |   26 ++++++++++++++++++++++++--
 fs/fuse/inode.c |   11 +++++++++--
 3 files changed, 44 insertions(+), 6 deletions(-)

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index f3f783d..b755884 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -851,6 +851,11 @@ static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr,
 			  struct kstat *stat)
 {
 	unsigned int blkbits;
+	struct fuse_conn *fc = get_fuse_conn(inode);
+
+	/* see the comment in fuse_change_attributes() */
+	if (fc->writeback_cache && S_ISREG(inode->i_mode))
+		attr->size = i_size_read(inode);
 
 	stat->dev = inode->i_sb->s_dev;
 	stat->ino = attr->ino;
@@ -1576,6 +1581,7 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
 	struct fuse_setattr_in inarg;
 	struct fuse_attr_out outarg;
 	bool is_truncate = false;
+	bool is_wb = fc->writeback_cache;
 	loff_t oldsize;
 	int err;
 
@@ -1645,7 +1651,9 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
 	fuse_change_attributes_common(inode, &outarg.attr,
 				      attr_timeout(&outarg));
 	oldsize = inode->i_size;
-	i_size_write(inode, outarg.attr.size);
+	/* see the comment in fuse_change_attributes() */
+	if (!is_wb || is_truncate || !S_ISREG(inode->i_mode))
+		i_size_write(inode, outarg.attr.size);
 
 	if (is_truncate) {
 		/* NOTE: this may release/reacquire fc->lock */
@@ -1657,7 +1665,8 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
 	 * Only call invalidate_inode_pages2() after removing
 	 * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock.
 	 */
-	if (S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
+	if ((is_truncate || !is_wb) &&
+	    S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
 		truncate_pagecache(inode, oldsize, outarg.attr.size);
 		invalidate_inode_pages2(inode->i_mapping);
 	}
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 90fb235..98bc0d0 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -658,9 +658,31 @@ static void fuse_short_read(struct fuse_req *req, struct inode *inode,
 			    u64 attr_ver)
 {
 	size_t num_read = req->out.args[0].size;
+	struct fuse_conn *fc = get_fuse_conn(inode);
+
+	if (fc->writeback_cache) {
+		/*
+		 * A hole in a file. Some data after the hole are in page cache,
+		 * but have not reached the client fs yet. So, the hole is not
+		 * present there.
+		 */
+		int i;
+		int start_idx = num_read >> PAGE_CACHE_SHIFT;
+		size_t off = num_read & (PAGE_CACHE_SIZE - 1);
+
+		for (i = start_idx; i < req->num_pages; i++) {
+			struct page *page = req->pages[i];
+			void *mapaddr = kmap_atomic(page);
 
-	loff_t pos = page_offset(req->pages[0]) + num_read;
-	fuse_read_update_size(inode, pos, attr_ver);
+			memset(mapaddr + off, 0, PAGE_CACHE_SIZE - off);
+
+			kunmap_atomic(mapaddr);
+			off = 0;
+		}
+	} else {
+		loff_t pos = page_offset(req->pages[0]) + num_read;
+		fuse_read_update_size(inode, pos, attr_ver);
+	}
 }
 
 static int fuse_readpage(struct file *file, struct page *page)
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 9a0cdde..121638d 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -197,6 +197,7 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
 {
 	struct fuse_conn *fc = get_fuse_conn(inode);
 	struct fuse_inode *fi = get_fuse_inode(inode);
+	bool is_wb = fc->writeback_cache;
 	loff_t oldsize;
 	struct timespec old_mtime;
 
@@ -210,10 +211,16 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
 	fuse_change_attributes_common(inode, attr, attr_valid);
 
 	oldsize = inode->i_size;
-	i_size_write(inode, attr->size);
+	/*
+	 * In case of writeback_cache enabled, the cached writes beyond EOF
+	 * extend local i_size without keeping userspace server in sync. So,
+	 * attr->size coming from server can be stale. We cannot trust it.
+	 */
+	if (!is_wb || !S_ISREG(inode->i_mode))
+		i_size_write(inode, attr->size);
 	spin_unlock(&fc->lock);
 
-	if (S_ISREG(inode->i_mode)) {
+	if (!is_wb && S_ISREG(inode->i_mode)) {
 		bool inval = false;
 
 		if (oldsize != attr->size) {


WARNING: multiple messages have this Message-ID (diff)
From: Maxim Patlasov <MPatlasov@parallels.com>
To: miklos@szeredi.hu
Cc: riel@redhat.com, dev@parallels.com, xemul@parallels.com,
	fuse-devel@lists.sourceforge.net, bfoster@redhat.com,
	linux-kernel@vger.kernel.org, jbottomley@parallels.com,
	linux-mm@kvack.org, viro@zeniv.linux.org.uk,
	linux-fsdevel@vger.kernel.org, akpm@linux-foundation.org,
	fengguang.wu@intel.com, devel@openvz.org, mgorman@suse.de
Subject: [PATCH 06/16] fuse: Trust kernel i_size only - v4
Date: Sat, 29 Jun 2013 21:44:40 +0400	[thread overview]
Message-ID: <20130629174304.20175.84433.stgit@maximpc.sw.ru> (raw)
In-Reply-To: <20130629172211.20175.70154.stgit@maximpc.sw.ru>

From: Pavel Emelyanov <xemul@openvz.org>

Make fuse think that when writeback is on the inode's i_size is always
up-to-date and not update it with the value received from the userspace.
This is done because the page cache code may update i_size without letting
the FS know.

This assumption implies fixing the previously introduced short-read helper --
when a short read occurs the 'hole' is filled with zeroes.

fuse_file_fallocate() is also fixed because now we should keep i_size up to
date, so it must be updated if FUSE_FALLOCATE request succeeded.

Changed in v2:
 - improved comment in fuse_short_read()
 - fixed fuse_file_fallocate() for KEEP_SIZE mode

Changed in v3:
 - fixed fuse_fillattr() not to use local i_size if writeback-cache is off
 - added a comment explaining why we cannot trust attr.size from server

Changed in v4:
 - do not change fuse_file_fallocate() because what we need is already
   implemented in commits a200a2d and 14c1441

Signed-off-by: Maxim V. Patlasov <MPatlasov@parallels.com>
---
 fs/fuse/dir.c   |   13 +++++++++++--
 fs/fuse/file.c  |   26 ++++++++++++++++++++++++--
 fs/fuse/inode.c |   11 +++++++++--
 3 files changed, 44 insertions(+), 6 deletions(-)

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index f3f783d..b755884 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -851,6 +851,11 @@ static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr,
 			  struct kstat *stat)
 {
 	unsigned int blkbits;
+	struct fuse_conn *fc = get_fuse_conn(inode);
+
+	/* see the comment in fuse_change_attributes() */
+	if (fc->writeback_cache && S_ISREG(inode->i_mode))
+		attr->size = i_size_read(inode);
 
 	stat->dev = inode->i_sb->s_dev;
 	stat->ino = attr->ino;
@@ -1576,6 +1581,7 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
 	struct fuse_setattr_in inarg;
 	struct fuse_attr_out outarg;
 	bool is_truncate = false;
+	bool is_wb = fc->writeback_cache;
 	loff_t oldsize;
 	int err;
 
@@ -1645,7 +1651,9 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
 	fuse_change_attributes_common(inode, &outarg.attr,
 				      attr_timeout(&outarg));
 	oldsize = inode->i_size;
-	i_size_write(inode, outarg.attr.size);
+	/* see the comment in fuse_change_attributes() */
+	if (!is_wb || is_truncate || !S_ISREG(inode->i_mode))
+		i_size_write(inode, outarg.attr.size);
 
 	if (is_truncate) {
 		/* NOTE: this may release/reacquire fc->lock */
@@ -1657,7 +1665,8 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
 	 * Only call invalidate_inode_pages2() after removing
 	 * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock.
 	 */
-	if (S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
+	if ((is_truncate || !is_wb) &&
+	    S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
 		truncate_pagecache(inode, oldsize, outarg.attr.size);
 		invalidate_inode_pages2(inode->i_mapping);
 	}
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 90fb235..98bc0d0 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -658,9 +658,31 @@ static void fuse_short_read(struct fuse_req *req, struct inode *inode,
 			    u64 attr_ver)
 {
 	size_t num_read = req->out.args[0].size;
+	struct fuse_conn *fc = get_fuse_conn(inode);
+
+	if (fc->writeback_cache) {
+		/*
+		 * A hole in a file. Some data after the hole are in page cache,
+		 * but have not reached the client fs yet. So, the hole is not
+		 * present there.
+		 */
+		int i;
+		int start_idx = num_read >> PAGE_CACHE_SHIFT;
+		size_t off = num_read & (PAGE_CACHE_SIZE - 1);
+
+		for (i = start_idx; i < req->num_pages; i++) {
+			struct page *page = req->pages[i];
+			void *mapaddr = kmap_atomic(page);
 
-	loff_t pos = page_offset(req->pages[0]) + num_read;
-	fuse_read_update_size(inode, pos, attr_ver);
+			memset(mapaddr + off, 0, PAGE_CACHE_SIZE - off);
+
+			kunmap_atomic(mapaddr);
+			off = 0;
+		}
+	} else {
+		loff_t pos = page_offset(req->pages[0]) + num_read;
+		fuse_read_update_size(inode, pos, attr_ver);
+	}
 }
 
 static int fuse_readpage(struct file *file, struct page *page)
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 9a0cdde..121638d 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -197,6 +197,7 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
 {
 	struct fuse_conn *fc = get_fuse_conn(inode);
 	struct fuse_inode *fi = get_fuse_inode(inode);
+	bool is_wb = fc->writeback_cache;
 	loff_t oldsize;
 	struct timespec old_mtime;
 
@@ -210,10 +211,16 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
 	fuse_change_attributes_common(inode, attr, attr_valid);
 
 	oldsize = inode->i_size;
-	i_size_write(inode, attr->size);
+	/*
+	 * In case of writeback_cache enabled, the cached writes beyond EOF
+	 * extend local i_size without keeping userspace server in sync. So,
+	 * attr->size coming from server can be stale. We cannot trust it.
+	 */
+	if (!is_wb || !S_ISREG(inode->i_mode))
+		i_size_write(inode, attr->size);
 	spin_unlock(&fc->lock);
 
-	if (S_ISREG(inode->i_mode)) {
+	if (!is_wb && S_ISREG(inode->i_mode)) {
 		bool inval = false;
 
 		if (oldsize != attr->size) {

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2013-06-29 17:44 UTC|newest]

Thread overview: 79+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-06-29 17:41 [PATCH v5 00/16] fuse: An attempt to implement a write-back cache policy Maxim Patlasov
2013-06-29 17:41 ` Maxim Patlasov
2013-06-29 17:42 ` [PATCH 01/16] fuse: Linking file to inode helper Maxim Patlasov
2013-06-29 17:42   ` Maxim Patlasov
2013-06-29 17:42 ` [PATCH 02/16] fuse: Getting file for writeback helper Maxim Patlasov
2013-06-29 17:42   ` Maxim Patlasov
2013-06-29 17:42 ` [PATCH 03/16] fuse: Prepare to handle short reads Maxim Patlasov
2013-06-29 17:42   ` Maxim Patlasov
2013-06-29 17:42   ` Maxim Patlasov
2013-06-29 17:42 ` [PATCH 04/16] fuse: Prepare to handle multiple pages in writeback Maxim Patlasov
2013-06-29 17:42   ` Maxim Patlasov
2013-06-29 17:42 ` [PATCH 05/16] fuse: Connection bit for enabling writeback Maxim Patlasov
2013-06-29 17:42   ` Maxim Patlasov
2013-06-29 17:44 ` Maxim Patlasov [this message]
2013-06-29 17:44   ` [PATCH 06/16] fuse: Trust kernel i_size only - v4 Maxim Patlasov
2013-06-29 17:44 ` [PATCH 07/16] fuse: Trust kernel i_mtime only Maxim Patlasov
2013-06-29 17:44   ` Maxim Patlasov
2013-07-11 16:14   ` [PATCH 07/16] fuse: Trust kernel i_mtime only -v2 Maxim Patlasov
2013-06-29 17:45 ` [PATCH 08/16] fuse: Flush files on wb close Maxim Patlasov
2013-06-29 17:45   ` Maxim Patlasov
2013-07-11 16:18   ` [PATCH 08/16] fuse: Flush files on wb close -v2 Maxim Patlasov
2013-06-29 17:45 ` [PATCH 09/16] fuse: restructure fuse_readpage() Maxim Patlasov
2013-06-29 17:45   ` Maxim Patlasov
2013-06-29 17:45   ` Maxim Patlasov
2013-06-29 17:45 ` [PATCH 10/16] fuse: Implement writepages callback Maxim Patlasov
2013-06-29 17:45   ` Maxim Patlasov
2013-07-19 16:50   ` Miklos Szeredi
2013-07-19 16:50     ` Miklos Szeredi
2013-08-02 15:40     ` Maxim Patlasov
2013-08-02 15:40       ` Maxim Patlasov
2013-08-02 15:40       ` Maxim Patlasov
2013-08-06 16:25       ` Miklos Szeredi
2013-08-06 16:25         ` Miklos Szeredi
2013-08-06 16:26         ` Eric Boxer
2013-08-09 15:02         ` Maxim Patlasov
2013-08-09 15:02           ` Maxim Patlasov
2013-08-09 15:02           ` Maxim Patlasov
2013-08-30 10:12           ` Miklos Szeredi
2013-08-30 10:12             ` Miklos Szeredi
2013-08-30 10:12             ` Miklos Szeredi
2013-08-30 14:50             ` Maxim Patlasov
2013-08-30 14:50               ` Maxim Patlasov
2013-08-30 14:50               ` Maxim Patlasov
2013-09-03 10:31               ` Miklos Szeredi
2013-09-03 10:31                 ` Miklos Szeredi
2013-09-03 10:31                 ` Miklos Szeredi
2013-09-03 16:02                 ` Maxim Patlasov
2013-09-03 16:02                   ` Maxim Patlasov
2013-09-03 16:02                   ` Maxim Patlasov
2013-06-29 17:45 ` [PATCH 11/16] fuse: Implement write_begin/write_end callbacks Maxim Patlasov
2013-06-29 17:45   ` Maxim Patlasov
2013-06-29 17:46 ` [PATCH 12/16] fuse: fuse_writepage_locked() should wait on writeback Maxim Patlasov
2013-06-29 17:46   ` Maxim Patlasov
2013-06-29 17:46 ` [PATCH 13/16] fuse: fuse_flush() " Maxim Patlasov
2013-06-29 17:46   ` Maxim Patlasov
2013-06-29 17:46 ` [PATCH 14/16] fuse: Fix O_DIRECT operations vs cached writeback misorder - v2 Maxim Patlasov
2013-06-29 17:46   ` Maxim Patlasov
2013-06-29 17:47 ` [PATCH 15/16] fuse: Turn writeback cache on Maxim Patlasov
2013-06-29 17:47   ` Maxim Patlasov
2013-06-29 17:48 ` [PATCH 16/16] mm: strictlimit feature Maxim Patlasov
2013-06-29 17:48   ` Maxim Patlasov
2013-07-01 21:16   ` Andrew Morton
2013-07-01 21:16     ` Andrew Morton
2013-07-02  8:33     ` Maxim Patlasov
2013-07-02  8:33       ` Maxim Patlasov
2013-07-02  8:33       ` Maxim Patlasov
2013-07-02 17:44   ` [PATCH] mm: strictlimit feature -v2 Maxim Patlasov
2013-07-02 17:44     ` Maxim Patlasov
2013-07-02 19:38     ` Andrew Morton
2013-07-02 19:38       ` Andrew Morton
2013-07-03 11:01       ` Maxim Patlasov
2013-07-03 11:01         ` Maxim Patlasov
2013-07-03 11:01         ` Maxim Patlasov
2013-07-03 23:16         ` Jan Kara
2013-07-03 23:16           ` Jan Kara
2013-07-03 23:16           ` Jan Kara
2013-07-05 13:14           ` Maxim Patlasov
2013-07-05 13:14             ` Maxim Patlasov
2013-07-05 13:14             ` Maxim Patlasov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20130629174304.20175.84433.stgit@maximpc.sw.ru \
    --to=mpatlasov@parallels.com \
    --cc=akpm@linux-foundation.org \
    --cc=bfoster@redhat.com \
    --cc=dev@parallels.com \
    --cc=devel@openvz.org \
    --cc=fengguang.wu@intel.com \
    --cc=fuse-devel@lists.sourceforge.net \
    --cc=jbottomley@parallels.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mgorman@suse.de \
    --cc=miklos@szeredi.hu \
    --cc=riel@redhat.com \
    --cc=viro@zeniv.linux.org.uk \
    --cc=xemul@parallels.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.