All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3 01/13] NFS: Don't flush caches for a getattr that races with writeback
@ 2016-06-22 20:47 Trond Myklebust
  2016-06-22 20:47 ` [PATCH v3 02/13] NFS: Cache access checks more aggressively Trond Myklebust
  0 siblings, 1 reply; 13+ messages in thread
From: Trond Myklebust @ 2016-06-22 20:47 UTC (permalink / raw)
  To: linux-nfs

If there were outstanding writes then chalk up the unexpected change
attribute on the server to them.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/inode.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 52e7d6869e3b..60051e62d3f1 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1729,12 +1729,15 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 		if (inode->i_version != fattr->change_attr) {
 			dprintk("NFS: change_attr change on server for file %s/%ld\n",
 					inode->i_sb->s_id, inode->i_ino);
-			invalid |= NFS_INO_INVALID_ATTR
-				| NFS_INO_INVALID_DATA
-				| NFS_INO_INVALID_ACCESS
-				| NFS_INO_INVALID_ACL;
-			if (S_ISDIR(inode->i_mode))
-				nfs_force_lookup_revalidate(inode);
+			/* Could it be a race with writeback? */
+			if (nfsi->nrequests == 0) {
+				invalid |= NFS_INO_INVALID_ATTR
+					| NFS_INO_INVALID_DATA
+					| NFS_INO_INVALID_ACCESS
+					| NFS_INO_INVALID_ACL;
+				if (S_ISDIR(inode->i_mode))
+					nfs_force_lookup_revalidate(inode);
+			}
 			inode->i_version = fattr->change_attr;
 		}
 	} else {
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH v3 02/13] NFS: Cache access checks more aggressively
  2016-06-22 20:47 [PATCH v3 01/13] NFS: Don't flush caches for a getattr that races with writeback Trond Myklebust
@ 2016-06-22 20:47 ` Trond Myklebust
  2016-06-22 20:47   ` [PATCH v3 03/13] NFS: Cache aggressively when file is open for writing Trond Myklebust
  0 siblings, 1 reply; 13+ messages in thread
From: Trond Myklebust @ 2016-06-22 20:47 UTC (permalink / raw)
  To: linux-nfs

If an attribute revalidation fails, then we already know that we'll
zap the access cache. If, OTOH, the inode isn't changing, there should
be no need to eject access calls just because they are old.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/dir.c | 52 +++++++++++++++++++++++++++++++---------------------
 1 file changed, 31 insertions(+), 21 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index aaf7bd0cbae2..210b33636fe4 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -2228,21 +2228,37 @@ static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, st
 	return NULL;
 }
 
-static int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res)
+static int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res, bool may_block)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_access_entry *cache;
-	int err = -ENOENT;
+	bool retry = true;
+	int err;
 
 	spin_lock(&inode->i_lock);
-	if (nfsi->cache_validity & NFS_INO_INVALID_ACCESS)
-		goto out_zap;
-	cache = nfs_access_search_rbtree(inode, cred);
-	if (cache == NULL)
-		goto out;
-	if (!nfs_have_delegated_attributes(inode) &&
-	    !time_in_range_open(jiffies, cache->jiffies, cache->jiffies + nfsi->attrtimeo))
-		goto out_stale;
+	for(;;) {
+		if (nfsi->cache_validity & NFS_INO_INVALID_ACCESS)
+			goto out_zap;
+		cache = nfs_access_search_rbtree(inode, cred);
+		err = -ENOENT;
+		if (cache == NULL)
+			goto out;
+		/* Found an entry, is our attribute cache valid? */
+		if (!nfs_attribute_cache_expired(inode) &&
+		    !(nfsi->cache_validity & NFS_INO_INVALID_ATTR))
+			break;
+		err = -ECHILD;
+		if (!may_block)
+			goto out;
+		if (!retry)
+			goto out_zap;
+		spin_unlock(&inode->i_lock);
+		err = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
+		if (err)
+			return err;
+		spin_lock(&inode->i_lock);
+		retry = false;
+	}
 	res->jiffies = cache->jiffies;
 	res->cred = cache->cred;
 	res->mask = cache->mask;
@@ -2251,12 +2267,6 @@ static int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, str
 out:
 	spin_unlock(&inode->i_lock);
 	return err;
-out_stale:
-	rb_erase(&cache->rb_node, &nfsi->access_cache);
-	list_del(&cache->lru);
-	spin_unlock(&inode->i_lock);
-	nfs_access_free_entry(cache);
-	return -ENOENT;
 out_zap:
 	spin_unlock(&inode->i_lock);
 	nfs_access_zap_cache(inode);
@@ -2283,13 +2293,12 @@ static int nfs_access_get_cached_rcu(struct inode *inode, struct rpc_cred *cred,
 		cache = NULL;
 	if (cache == NULL)
 		goto out;
-	if (!nfs_have_delegated_attributes(inode) &&
-	    !time_in_range_open(jiffies, cache->jiffies, cache->jiffies + nfsi->attrtimeo))
+	err = nfs_revalidate_inode_rcu(NFS_SERVER(inode), inode);
+	if (err)
 		goto out;
 	res->jiffies = cache->jiffies;
 	res->cred = cache->cred;
 	res->mask = cache->mask;
-	err = 0;
 out:
 	rcu_read_unlock();
 	return err;
@@ -2378,18 +2387,19 @@ EXPORT_SYMBOL_GPL(nfs_access_set_mask);
 static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask)
 {
 	struct nfs_access_entry cache;
+	bool may_block = (mask & MAY_NOT_BLOCK) == 0;
 	int status;
 
 	trace_nfs_access_enter(inode);
 
 	status = nfs_access_get_cached_rcu(inode, cred, &cache);
 	if (status != 0)
-		status = nfs_access_get_cached(inode, cred, &cache);
+		status = nfs_access_get_cached(inode, cred, &cache, may_block);
 	if (status == 0)
 		goto out_cached;
 
 	status = -ECHILD;
-	if (mask & MAY_NOT_BLOCK)
+	if (!may_block)
 		goto out;
 
 	/* Be clever: ask server to check for all possible rights */
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH v3 03/13] NFS: Cache aggressively when file is open for writing
  2016-06-22 20:47 ` [PATCH v3 02/13] NFS: Cache access checks more aggressively Trond Myklebust
@ 2016-06-22 20:47   ` Trond Myklebust
  2016-06-22 20:47     ` [PATCH v3 04/13] NFS: Kill NFS_INO_NFS_INO_FLUSHING: it is a performance killer Trond Myklebust
  0 siblings, 1 reply; 13+ messages in thread
From: Trond Myklebust @ 2016-06-22 20:47 UTC (permalink / raw)
  To: linux-nfs

Unless the user is using file locking, we must assume close-to-open
cache consistency when the file is open for writing. Adjust the
caching algorithm so that it does not clear the cache on out-of-order
writes and/or attribute revalidations.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/file.c  | 13 ++----------
 fs/nfs/inode.c | 62 +++++++++++++++++++++++++++++++++++++++++-----------------
 2 files changed, 46 insertions(+), 29 deletions(-)

diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 717a8d6af52d..2d39d9f9da7d 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -780,11 +780,6 @@ do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
 }
 
 static int
-is_time_granular(struct timespec *ts) {
-	return ((ts->tv_sec == 0) && (ts->tv_nsec <= 1000));
-}
-
-static int
 do_setlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
 {
 	struct inode *inode = filp->f_mapping->host;
@@ -817,12 +812,8 @@ do_setlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
 	 * This makes locking act as a cache coherency point.
 	 */
 	nfs_sync_mapping(filp->f_mapping);
-	if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) {
-		if (is_time_granular(&NFS_SERVER(inode)->time_delta))
-			__nfs_revalidate_inode(NFS_SERVER(inode), inode);
-		else
-			nfs_zap_caches(inode);
-	}
+	if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ))
+		nfs_zap_mapping(inode, filp->f_mapping);
 out:
 	return status;
 }
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 60051e62d3f1..4e65a5a8a01b 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -878,7 +878,10 @@ void nfs_inode_attach_open_context(struct nfs_open_context *ctx)
 	struct nfs_inode *nfsi = NFS_I(inode);
 
 	spin_lock(&inode->i_lock);
-	list_add(&ctx->list, &nfsi->open_files);
+	if (ctx->mode & FMODE_WRITE)
+		list_add(&ctx->list, &nfsi->open_files);
+	else
+		list_add_tail(&ctx->list, &nfsi->open_files);
 	spin_unlock(&inode->i_lock);
 }
 EXPORT_SYMBOL_GPL(nfs_inode_attach_open_context);
@@ -1215,6 +1218,25 @@ int nfs_revalidate_mapping_protected(struct inode *inode, struct address_space *
 	return __nfs_revalidate_mapping(inode, mapping, true);
 }
 
+static bool nfs_file_has_writers(struct nfs_inode *nfsi)
+{
+	struct inode *inode = &nfsi->vfs_inode;
+
+	assert_spin_locked(&inode->i_lock);
+
+	if (!S_ISREG(inode->i_mode))
+		return false;
+	if (list_empty(&nfsi->open_files))
+		return false;
+	/* Note: This relies on nfsi->open_files being ordered with writers
+	 *       being placed at the head of the list.
+	 *       See nfs_inode_attach_open_context()
+	 */
+	return (list_first_entry(&nfsi->open_files,
+			struct nfs_open_context,
+			list)->mode & FMODE_WRITE) == FMODE_WRITE;
+}
+
 static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
@@ -1279,22 +1301,24 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
 	if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT))
 		return -EIO;
 
-	if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 &&
-			inode->i_version != fattr->change_attr)
-		invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
+	if (!nfs_file_has_writers(nfsi)) {
+		/* Verify a few of the more important attributes */
+		if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 && inode->i_version != fattr->change_attr)
+			invalid |= NFS_INO_INVALID_ATTR | NFS_INO_REVAL_PAGECACHE;
 
-	/* Verify a few of the more important attributes */
-	if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec_equal(&inode->i_mtime, &fattr->mtime))
-		invalid |= NFS_INO_INVALID_ATTR;
+		if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec_equal(&inode->i_mtime, &fattr->mtime))
+			invalid |= NFS_INO_INVALID_ATTR;
 
-	if (fattr->valid & NFS_ATTR_FATTR_SIZE) {
-		cur_size = i_size_read(inode);
-		new_isize = nfs_size_to_loff_t(fattr->size);
-		if (cur_size != new_isize)
-			invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
+		if ((fattr->valid & NFS_ATTR_FATTR_CTIME) && !timespec_equal(&inode->i_ctime, &fattr->ctime))
+			invalid |= NFS_INO_INVALID_ATTR;
+
+		if (fattr->valid & NFS_ATTR_FATTR_SIZE) {
+			cur_size = i_size_read(inode);
+			new_isize = nfs_size_to_loff_t(fattr->size);
+			if (cur_size != new_isize)
+				invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
+		}
 	}
-	if (nfsi->nrequests != 0)
-		invalid &= ~NFS_INO_REVAL_PAGECACHE;
 
 	/* Have any file permissions changed? */
 	if ((fattr->valid & NFS_ATTR_FATTR_MODE) && (inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO))
@@ -1526,7 +1550,7 @@ EXPORT_SYMBOL_GPL(nfs_refresh_inode);
 
 static int nfs_post_op_update_inode_locked(struct inode *inode, struct nfs_fattr *fattr)
 {
-	unsigned long invalid = NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
+	unsigned long invalid = NFS_INO_INVALID_ATTR;
 
 	/*
 	 * Don't revalidate the pagecache if we hold a delegation, but do
@@ -1675,6 +1699,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 	unsigned long invalid = 0;
 	unsigned long now = jiffies;
 	unsigned long save_cache_validity;
+	bool have_writers = nfs_file_has_writers(nfsi);
 	bool cache_revalidated = true;
 
 	dfprintk(VFS, "NFS: %s(%s/%lu fh_crc=0x%08x ct=%d info=0x%x)\n",
@@ -1730,7 +1755,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 			dprintk("NFS: change_attr change on server for file %s/%ld\n",
 					inode->i_sb->s_id, inode->i_ino);
 			/* Could it be a race with writeback? */
-			if (nfsi->nrequests == 0) {
+			if (!have_writers) {
 				invalid |= NFS_INO_INVALID_ATTR
 					| NFS_INO_INVALID_DATA
 					| NFS_INO_INVALID_ACCESS
@@ -1770,9 +1795,10 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 		if (new_isize != cur_isize) {
 			/* Do we perhaps have any outstanding writes, or has
 			 * the file grown beyond our last write? */
-			if ((nfsi->nrequests == 0) || new_isize > cur_isize) {
+			if (nfsi->nrequests == 0 || new_isize > cur_isize) {
 				i_size_write(inode, new_isize);
-				invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
+				if (!have_writers)
+					invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
 			}
 			dprintk("NFS: isize change on server for file %s/%ld "
 					"(%Ld to %Ld)\n",
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH v3 04/13] NFS: Kill NFS_INO_NFS_INO_FLUSHING: it is a performance killer
  2016-06-22 20:47   ` [PATCH v3 03/13] NFS: Cache aggressively when file is open for writing Trond Myklebust
@ 2016-06-22 20:47     ` Trond Myklebust
  2016-06-22 20:47       ` [PATCH v3 05/13] NFS: writepage of a single page should not be synchronous Trond Myklebust
  0 siblings, 1 reply; 13+ messages in thread
From: Trond Myklebust @ 2016-06-22 20:47 UTC (permalink / raw)
  To: linux-nfs

filemap_datawrite() and friends already deal just fine with livelock.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/file.c          |  8 --------
 fs/nfs/nfstrace.h      |  1 -
 fs/nfs/write.c         | 11 -----------
 include/linux/nfs_fs.h |  1 -
 4 files changed, 21 deletions(-)

diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 2d39d9f9da7d..29d7477a62e8 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -360,14 +360,6 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping,
 
 start:
 	/*
-	 * Prevent starvation issues if someone is doing a consistency
-	 * sync-to-disk
-	 */
-	ret = wait_on_bit_action(&NFS_I(mapping->host)->flags, NFS_INO_FLUSHING,
-				 nfs_wait_bit_killable, TASK_KILLABLE);
-	if (ret)
-		return ret;
-	/*
 	 * Wait for O_DIRECT to complete
 	 */
 	inode_dio_wait(mapping->host);
diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h
index 0b9e5cc9a747..fe80a1c26340 100644
--- a/fs/nfs/nfstrace.h
+++ b/fs/nfs/nfstrace.h
@@ -37,7 +37,6 @@
 			{ 1 << NFS_INO_ADVISE_RDPLUS, "ADVISE_RDPLUS" }, \
 			{ 1 << NFS_INO_STALE, "STALE" }, \
 			{ 1 << NFS_INO_INVALIDATING, "INVALIDATING" }, \
-			{ 1 << NFS_INO_FLUSHING, "FLUSHING" }, \
 			{ 1 << NFS_INO_FSCACHE, "FSCACHE" }, \
 			{ 1 << NFS_INO_LAYOUTCOMMIT, "NEED_LAYOUTCOMMIT" }, \
 			{ 1 << NFS_INO_LAYOUTCOMMITTING, "LAYOUTCOMMIT" })
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index e1c74d3db64d..980d44f3a84c 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -657,16 +657,9 @@ static int nfs_writepages_callback(struct page *page, struct writeback_control *
 int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
 {
 	struct inode *inode = mapping->host;
-	unsigned long *bitlock = &NFS_I(inode)->flags;
 	struct nfs_pageio_descriptor pgio;
 	int err;
 
-	/* Stop dirtying of new pages while we sync */
-	err = wait_on_bit_lock_action(bitlock, NFS_INO_FLUSHING,
-			nfs_wait_bit_killable, TASK_KILLABLE);
-	if (err)
-		goto out_err;
-
 	nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
 
 	nfs_pageio_init_write(&pgio, inode, wb_priority(wbc), false,
@@ -674,10 +667,6 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
 	err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio);
 	nfs_pageio_complete(&pgio);
 
-	clear_bit_unlock(NFS_INO_FLUSHING, bitlock);
-	smp_mb__after_atomic();
-	wake_up_bit(bitlock, NFS_INO_FLUSHING);
-
 	if (err < 0)
 		goto out_err;
 	err = pgio.pg_error;
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index d71278c3c5bd..120dd04b553c 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -205,7 +205,6 @@ struct nfs_inode {
 #define NFS_INO_STALE		(1)		/* possible stale inode */
 #define NFS_INO_ACL_LRU_SET	(2)		/* Inode is on the LRU list */
 #define NFS_INO_INVALIDATING	(3)		/* inode is being invalidated */
-#define NFS_INO_FLUSHING	(4)		/* inode is flushing out data */
 #define NFS_INO_FSCACHE		(5)		/* inode can be cached by FS-Cache */
 #define NFS_INO_FSCACHE_LOCK	(6)		/* FS-Cache cookie management lock */
 #define NFS_INO_LAYOUTCOMMIT	(9)		/* layoutcommit required */
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH v3 05/13] NFS: writepage of a single page should not be synchronous
  2016-06-22 20:47     ` [PATCH v3 04/13] NFS: Kill NFS_INO_NFS_INO_FLUSHING: it is a performance killer Trond Myklebust
@ 2016-06-22 20:47       ` Trond Myklebust
  2016-06-22 20:47         ` [PATCH v3 06/13] NFS: Don't hold the inode lock across fsync() Trond Myklebust
  0 siblings, 1 reply; 13+ messages in thread
From: Trond Myklebust @ 2016-06-22 20:47 UTC (permalink / raw)
  To: linux-nfs

It is almost always better to wait for more so that we can issue a
bulk commit.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/write.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 980d44f3a84c..b13d48881d3a 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -625,7 +625,7 @@ static int nfs_writepage_locked(struct page *page,
 	int err;
 
 	nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
-	nfs_pageio_init_write(&pgio, inode, wb_priority(wbc),
+	nfs_pageio_init_write(&pgio, inode, 0,
 				false, &nfs_async_write_completion_ops);
 	err = nfs_do_writepage(page, wbc, &pgio, launder);
 	nfs_pageio_complete(&pgio);
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH v3 06/13] NFS: Don't hold the inode lock across fsync()
  2016-06-22 20:47       ` [PATCH v3 05/13] NFS: writepage of a single page should not be synchronous Trond Myklebust
@ 2016-06-22 20:47         ` Trond Myklebust
  2016-06-22 20:47           ` [PATCH v3 07/13] NFS: Don't call COMMIT in ->releasepage() Trond Myklebust
  0 siblings, 1 reply; 13+ messages in thread
From: Trond Myklebust @ 2016-06-22 20:47 UTC (permalink / raw)
  To: linux-nfs

Commits are no longer required to be serialised.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/file.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 29d7477a62e8..249262b6bcbe 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -277,11 +277,9 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 		ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
 		if (ret != 0)
 			break;
-		inode_lock(inode);
 		ret = nfs_file_fsync_commit(file, start, end, datasync);
 		if (!ret)
 			ret = pnfs_sync_inode(inode, !!datasync);
-		inode_unlock(inode);
 		/*
 		 * If nfs_file_fsync_commit detected a server reboot, then
 		 * resend all dirty pages that might have been covered by
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH v3 07/13] NFS: Don't call COMMIT in ->releasepage()
  2016-06-22 20:47         ` [PATCH v3 06/13] NFS: Don't hold the inode lock across fsync() Trond Myklebust
@ 2016-06-22 20:47           ` Trond Myklebust
  2016-06-22 20:47             ` [PATCH v3 08/13] NFS: Fix O_DIRECT verifier problems Trond Myklebust
  0 siblings, 1 reply; 13+ messages in thread
From: Trond Myklebust @ 2016-06-22 20:47 UTC (permalink / raw)
  To: linux-nfs

While COMMIT has the potential to free up a lot of memory that is being
taken by unstable writes, it isn't guaranteed to free up this particular
page. Also, calling fsync() on the server is expensive and so we want to
do it in a more controlled fashion, rather than have it triggered at
random by the VM.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/file.c | 23 -----------------------
 1 file changed, 23 deletions(-)

diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 249262b6bcbe..df4dd8e7e62e 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -460,31 +460,8 @@ static void nfs_invalidate_page(struct page *page, unsigned int offset,
  */
 static int nfs_release_page(struct page *page, gfp_t gfp)
 {
-	struct address_space *mapping = page->mapping;
-
 	dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page);
 
-	/* Always try to initiate a 'commit' if relevant, but only
-	 * wait for it if the caller allows blocking.  Even then,
-	 * only wait 1 second and only if the 'bdi' is not congested.
-	 * Waiting indefinitely can cause deadlocks when the NFS
-	 * server is on this machine, when a new TCP connection is
-	 * needed and in other rare cases.  There is no particular
-	 * need to wait extensively here.  A short wait has the
-	 * benefit that someone else can worry about the freezer.
-	 */
-	if (mapping) {
-		struct nfs_server *nfss = NFS_SERVER(mapping->host);
-		nfs_commit_inode(mapping->host, 0);
-		if (gfpflags_allow_blocking(gfp) &&
-		    !bdi_write_congested(&nfss->backing_dev_info)) {
-			wait_on_page_bit_killable_timeout(page, PG_private,
-							  HZ);
-			if (PagePrivate(page))
-				set_bdi_congested(&nfss->backing_dev_info,
-						  BLK_RW_ASYNC);
-		}
-	}
 	/* If PagePrivate() is set, then the page is not freeable */
 	if (PagePrivate(page))
 		return 0;
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH v3 08/13] NFS: Fix O_DIRECT verifier problems
  2016-06-22 20:47           ` [PATCH v3 07/13] NFS: Don't call COMMIT in ->releasepage() Trond Myklebust
@ 2016-06-22 20:47             ` Trond Myklebust
  2016-06-22 20:47               ` [PATCH v3 09/13] NFS: Ensure we reset the write verifier 'committed' value on resend Trond Myklebust
  0 siblings, 1 reply; 13+ messages in thread
From: Trond Myklebust @ 2016-06-22 20:47 UTC (permalink / raw)
  To: linux-nfs

We should not be interested in looking at the value of the stable field,
since that could take any value.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/direct.c   | 10 ++++++++--
 fs/nfs/internal.h |  7 +++++++
 fs/nfs/write.c    |  2 +-
 3 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 979b3c4dee6a..d6d43b5eafb3 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -196,6 +196,12 @@ static void nfs_direct_set_hdr_verf(struct nfs_direct_req *dreq,
 	WARN_ON_ONCE(verfp->committed < 0);
 }
 
+static int nfs_direct_cmp_verf(const struct nfs_writeverf *v1,
+		const struct nfs_writeverf *v2)
+{
+	return nfs_write_verifier_cmp(&v1->verifier, &v2->verifier);
+}
+
 /*
  * nfs_direct_cmp_hdr_verf - compare verifier for pgio header
  * @dreq - direct request possibly spanning multiple servers
@@ -215,7 +221,7 @@ static int nfs_direct_set_or_cmp_hdr_verf(struct nfs_direct_req *dreq,
 		nfs_direct_set_hdr_verf(dreq, hdr);
 		return 0;
 	}
-	return memcmp(verfp, &hdr->verf, sizeof(struct nfs_writeverf));
+	return nfs_direct_cmp_verf(verfp, &hdr->verf);
 }
 
 /*
@@ -238,7 +244,7 @@ static int nfs_direct_cmp_commit_data_verf(struct nfs_direct_req *dreq,
 	if (verfp->committed < 0)
 		return 1;
 
-	return memcmp(verfp, &data->verf, sizeof(struct nfs_writeverf));
+	return nfs_direct_cmp_verf(verfp, &data->verf);
 }
 
 /**
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 5154fa65a2f2..150a8eb0f323 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -506,6 +506,13 @@ extern int nfs_migrate_page(struct address_space *,
 #define nfs_migrate_page NULL
 #endif
 
+static inline int
+nfs_write_verifier_cmp(const struct nfs_write_verifier *v1,
+		const struct nfs_write_verifier *v2)
+{
+	return memcmp(v1->data, v2->data, sizeof(v1->data));
+}
+
 /* unlink.c */
 extern struct rpc_task *
 nfs_async_rename(struct inode *old_dir, struct inode *new_dir,
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index b13d48881d3a..3087fb6f1983 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1789,7 +1789,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
 
 		/* Okay, COMMIT succeeded, apparently. Check the verifier
 		 * returned by the server against all stored verfs. */
-		if (!memcmp(&req->wb_verf, &data->verf.verifier, sizeof(req->wb_verf))) {
+		if (!nfs_write_verifier_cmp(&req->wb_verf, &data->verf.verifier)) {
 			/* We have a match */
 			nfs_inode_remove_request(req);
 			dprintk(" OK\n");
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH v3 09/13] NFS: Ensure we reset the write verifier 'committed' value on resend.
  2016-06-22 20:47             ` [PATCH v3 08/13] NFS: Fix O_DIRECT verifier problems Trond Myklebust
@ 2016-06-22 20:47               ` Trond Myklebust
  2016-06-22 20:47                 ` [PATCH v3 10/13] NFS: Do not serialise O_DIRECT reads and writes Trond Myklebust
  0 siblings, 1 reply; 13+ messages in thread
From: Trond Myklebust @ 2016-06-22 20:47 UTC (permalink / raw)
  To: linux-nfs

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/direct.c   |  2 ++
 fs/nfs/internal.h | 17 +++++++++++++++++
 2 files changed, 19 insertions(+)

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index d6d43b5eafb3..fb659bb50678 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -661,6 +661,8 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
 	nfs_direct_write_scan_commit_list(dreq->inode, &reqs, &cinfo);
 
 	dreq->count = 0;
+	dreq->verf.committed = NFS_INVALID_STABLE_HOW;
+	nfs_clear_pnfs_ds_commit_verifiers(&dreq->ds_cinfo);
 	for (i = 0; i < dreq->mirror_count; i++)
 		dreq->mirrors[i].count = 0;
 	get_dreq(dreq);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 150a8eb0f323..0eb5c924886d 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -499,6 +499,23 @@ int nfs_key_timeout_notify(struct file *filp, struct inode *inode);
 bool nfs_ctx_key_to_expire(struct nfs_open_context *ctx);
 void nfs_pageio_stop_mirroring(struct nfs_pageio_descriptor *pgio);
 
+#ifdef CONFIG_NFS_V4_1
+static inline
+void nfs_clear_pnfs_ds_commit_verifiers(struct pnfs_ds_commit_info *cinfo)
+{
+	int i;
+
+	for (i = 0; i < cinfo->nbuckets; i++)
+		cinfo->buckets[i].direct_verf.committed = NFS_INVALID_STABLE_HOW;
+}
+#else
+static inline
+void nfs_clear_pnfs_ds_commit_verifiers(struct pnfs_ds_commit_info *cinfo)
+{
+}
+#endif
+
+
 #ifdef CONFIG_MIGRATION
 extern int nfs_migrate_page(struct address_space *,
 		struct page *, struct page *, enum migrate_mode);
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH v3 10/13] NFS: Do not serialise O_DIRECT reads and writes
  2016-06-22 20:47               ` [PATCH v3 09/13] NFS: Ensure we reset the write verifier 'committed' value on resend Trond Myklebust
@ 2016-06-22 20:47                 ` Trond Myklebust
  2016-06-22 20:47                   ` [PATCH v3 11/13] NFS: Remove racy size manipulations in O_DIRECT Trond Myklebust
  0 siblings, 1 reply; 13+ messages in thread
From: Trond Myklebust @ 2016-06-22 20:47 UTC (permalink / raw)
  To: linux-nfs

Allow dio requests to be scheduled in parallel, but ensuring that they
do not conflict with buffered I/O.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/Makefile        |   2 +-
 fs/nfs/direct.c        |  12 ++---
 fs/nfs/file.c          |  39 ++++++++++-----
 fs/nfs/internal.h      |   8 ++++
 fs/nfs/io.c            | 128 +++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/nfs_fs.h |   1 +
 6 files changed, 170 insertions(+), 20 deletions(-)
 create mode 100644 fs/nfs/io.c

diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index 8664417955a2..6abdda209642 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -6,7 +6,7 @@ obj-$(CONFIG_NFS_FS) += nfs.o
 
 CFLAGS_nfstrace.o += -I$(src)
 nfs-y 			:= client.o dir.o file.o getroot.o inode.o super.o \
-			   direct.o pagelist.o read.o symlink.o unlink.o \
+			   io.o direct.o pagelist.o read.o symlink.o unlink.o \
 			   write.o namespace.o mount_clnt.o nfstrace.o
 nfs-$(CONFIG_ROOT_NFS)	+= nfsroot.o
 nfs-$(CONFIG_SYSCTL)	+= sysctl.o
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index fb659bb50678..2333e9973802 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -587,7 +587,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter)
 	if (!count)
 		goto out;
 
-	inode_lock(inode);
+	nfs_start_io_direct(inode);
 	result = nfs_sync_mapping(mapping);
 	if (result)
 		goto out_unlock;
@@ -615,7 +615,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter)
 	NFS_I(inode)->read_io += count;
 	result = nfs_direct_read_schedule_iovec(dreq, iter, iocb->ki_pos);
 
-	inode_unlock(inode);
+	nfs_end_io_direct(inode);
 
 	if (!result) {
 		result = nfs_direct_wait(dreq);
@@ -629,7 +629,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter)
 out_release:
 	nfs_direct_req_release(dreq);
 out_unlock:
-	inode_unlock(inode);
+	nfs_end_io_direct(inode);
 out:
 	return result;
 }
@@ -1013,7 +1013,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)
 	pos = iocb->ki_pos;
 	end = (pos + iov_iter_count(iter) - 1) >> PAGE_SHIFT;
 
-	inode_lock(inode);
+	nfs_start_io_direct(inode);
 
 	result = nfs_sync_mapping(mapping);
 	if (result)
@@ -1053,7 +1053,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)
 					      pos >> PAGE_SHIFT, end);
 	}
 
-	inode_unlock(inode);
+	nfs_end_io_direct(inode);
 
 	if (!result) {
 		result = nfs_direct_wait(dreq);
@@ -1076,7 +1076,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)
 out_release:
 	nfs_direct_req_release(dreq);
 out_unlock:
-	inode_unlock(inode);
+	nfs_end_io_direct(inode);
 	return result;
 }
 
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index df4dd8e7e62e..b5772e5a5961 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -170,12 +170,14 @@ nfs_file_read(struct kiocb *iocb, struct iov_iter *to)
 		iocb->ki_filp,
 		iov_iter_count(to), (unsigned long) iocb->ki_pos);
 
-	result = nfs_revalidate_mapping_protected(inode, iocb->ki_filp->f_mapping);
+	nfs_start_io_read(inode);
+	result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping);
 	if (!result) {
 		result = generic_file_read_iter(iocb, to);
 		if (result > 0)
 			nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, result);
 	}
+	nfs_end_io_read(inode);
 	return result;
 }
 EXPORT_SYMBOL_GPL(nfs_file_read);
@@ -191,12 +193,14 @@ nfs_file_splice_read(struct file *filp, loff_t *ppos,
 	dprintk("NFS: splice_read(%pD2, %lu@%Lu)\n",
 		filp, (unsigned long) count, (unsigned long long) *ppos);
 
-	res = nfs_revalidate_mapping_protected(inode, filp->f_mapping);
+	nfs_start_io_read(inode);
+	res = nfs_revalidate_mapping(inode, filp->f_mapping);
 	if (!res) {
 		res = generic_file_splice_read(filp, ppos, pipe, count, flags);
 		if (res > 0)
 			nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, res);
 	}
+	nfs_end_io_read(inode);
 	return res;
 }
 EXPORT_SYMBOL_GPL(nfs_file_splice_read);
@@ -639,40 +643,49 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
 	dprintk("NFS: write(%pD2, %zu@%Ld)\n",
 		file, count, (long long) iocb->ki_pos);
 
-	result = -EBUSY;
 	if (IS_SWAPFILE(inode))
 		goto out_swapfile;
+
+	nfs_start_io_write(inode);
 	/*
 	 * O_APPEND implies that we must revalidate the file length.
 	 */
 	if (iocb->ki_flags & IOCB_APPEND) {
 		result = nfs_revalidate_file_size(inode, file);
 		if (result)
-			goto out;
+			goto out_unlock;
 	}
 
-	result = count;
-	if (!count)
+	result = generic_write_checks(iocb, from);
+	if (result <= 0)
+		goto out_unlock;
+
+	current->backing_dev_info = inode_to_bdi(inode);
+	result = generic_perform_write(file, from, iocb->ki_pos);
+	current->backing_dev_info = NULL;
+	nfs_end_io_write(inode);
+	if (result <= 0)
 		goto out;
 
-	result = generic_file_write_iter(iocb, from);
-	if (result > 0)
-		written = result;
+	written = generic_write_sync(iocb, result);
+	iocb->ki_pos += written;
 
 	/* Return error values */
-	if (result >= 0 && nfs_need_check_write(file, inode)) {
+	if (nfs_need_check_write(file, inode)) {
 		int err = vfs_fsync(file, 0);
 		if (err < 0)
 			result = err;
 	}
-	if (result > 0)
-		nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written);
+	nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written);
 out:
 	return result;
+out_unlock:
+	nfs_end_io_write(inode);
+	return result;
 
 out_swapfile:
 	printk(KERN_INFO "NFS: attempt to write to active swap file!\n");
-	goto out;
+	return -EBUSY;
 }
 EXPORT_SYMBOL_GPL(nfs_file_write);
 
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 0eb5c924886d..159b64ede82a 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -411,6 +411,14 @@ extern void __exit unregister_nfs_fs(void);
 extern bool nfs_sb_active(struct super_block *sb);
 extern void nfs_sb_deactive(struct super_block *sb);
 
+/* io.c */
+extern void nfs_start_io_read(struct inode *inode);
+extern void nfs_end_io_read(struct inode *inode);
+extern void nfs_start_io_write(struct inode *inode);
+extern void nfs_end_io_write(struct inode *inode);
+extern void nfs_start_io_direct(struct inode *inode);
+extern void nfs_end_io_direct(struct inode *inode);
+
 /* namespace.c */
 #define NFS_PATH_CANONICAL 1
 extern char *nfs_path(char **p, struct dentry *dentry,
diff --git a/fs/nfs/io.c b/fs/nfs/io.c
new file mode 100644
index 000000000000..013ccb67c106
--- /dev/null
+++ b/fs/nfs/io.c
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2016 Trond Myklebust
+ *
+ * I/O and data path helper functionality.
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/bitops.h>
+#include <linux/rwsem.h>
+#include <linux/fs.h>
+#include <linux/nfs_fs.h>
+
+#include "internal.h"
+
+/**
+ * nfs_start_io_read - declare the file is being used for buffered reads
+ * @inode - file inode
+ *
+ * Declare that a buffered read operation is about to start, and ensure
+ * that we block all direct I/O.
+ * On exit, the function ensures that the NFS_INO_ODIRECT flag is unset,
+ * and holds a shared lock on inode->i_rwsem to ensure that the flag
+ * cannot be changed.
+ * In practice, this means that buffered read operations are allowed to
+ * execute in parallel, thanks to the shared lock, whereas direct I/O
+ * operations need to wait to grab an exclusive lock in order to set
+ * NFS_INO_ODIRECT.
+ * Note that buffered writes and truncates both take a write lock on
+ * inode->i_rwsem, meaning that those are serialised w.r.t. the reads.
+ */
+void
+nfs_start_io_read(struct inode *inode)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+	/* Be an optimist! */
+	down_read(&inode->i_rwsem);
+	if (test_bit(NFS_INO_ODIRECT, &nfsi->flags) == 0)
+		return;
+	up_read(&inode->i_rwsem);
+	/* Slow path.... */
+	down_write(&inode->i_rwsem);
+	clear_bit(NFS_INO_ODIRECT, &nfsi->flags);
+	downgrade_write(&inode->i_rwsem);
+}
+
+/**
+ * nfs_end_io_read - declare that the buffered read operation is done
+ * @inode - file inode
+ *
+ * Declare that a buffered read operation is done, and release the shared
+ * lock on inode->i_rwsem.
+ */
+void
+nfs_end_io_read(struct inode *inode)
+{
+	up_read(&inode->i_rwsem);
+}
+
+/**
+ * nfs_start_io_write - declare the file is being used for buffered writes
+ * @inode - file inode
+ *
+ * Declare that a buffered read operation is about to start, and ensure
+ * that we block all direct I/O.
+ */
+void
+nfs_start_io_write(struct inode *inode)
+{
+	down_write(&inode->i_rwsem);
+}
+
+/**
+ * nfs_end_io_write - declare that the buffered write operation is done
+ * @inode - file inode
+ *
+ * Declare that a buffered write operation is done, and release the
+ * lock on inode->i_rwsem.
+ */
+void
+nfs_end_io_write(struct inode *inode)
+{
+	up_write(&inode->i_rwsem);
+}
+
+/**
+ * nfs_end_io_direct - declare the file is being used for direct i/o
+ * @inode - file inode
+ *
+ * Declare that a direct I/O operation is about to start, and ensure
+ * that we block all buffered I/O.
+ * On exit, the function ensures that the NFS_INO_ODIRECT flag is set,
+ * and holds a shared lock on inode->i_rwsem to ensure that the flag
+ * cannot be changed.
+ * In practice, this means that direct I/O operations are allowed to
+ * execute in parallel, thanks to the shared lock, whereas buffered I/O
+ * operations need to wait to grab an exclusive lock in order to clear
+ * NFS_INO_ODIRECT.
+ * Note that buffered writes and truncates both take a write lock on
+ * inode->i_rwsem, meaning that those are serialised w.r.t. O_DIRECT.
+ */
+void
+nfs_start_io_direct(struct inode *inode)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+	/* Be an optimist! */
+	down_read(&inode->i_rwsem);
+	if (test_bit(NFS_INO_ODIRECT, &nfsi->flags) != 0)
+		return;
+	up_read(&inode->i_rwsem);
+	/* Slow path.... */
+	down_write(&inode->i_rwsem);
+	set_bit(NFS_INO_ODIRECT, &nfsi->flags);
+	downgrade_write(&inode->i_rwsem);
+}
+
+/**
+ * nfs_end_io_direct - declare that the direct i/o operation is done
+ * @inode - file inode
+ *
+ * Declare that a direct I/O operation is done, and release the shared
+ * lock on inode->i_rwsem.
+ */
+void
+nfs_end_io_direct(struct inode *inode)
+{
+	up_read(&inode->i_rwsem);
+}
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 120dd04b553c..225d17d35277 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -210,6 +210,7 @@ struct nfs_inode {
 #define NFS_INO_LAYOUTCOMMIT	(9)		/* layoutcommit required */
 #define NFS_INO_LAYOUTCOMMITTING (10)		/* layoutcommit inflight */
 #define NFS_INO_LAYOUTSTATS	(11)		/* layoutstats inflight */
+#define NFS_INO_ODIRECT		(12)		/* I/O setting is O_DIRECT */
 
 static inline struct nfs_inode *NFS_I(const struct inode *inode)
 {
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH v3 11/13] NFS: Remove racy size manipulations in O_DIRECT
  2016-06-22 20:47                 ` [PATCH v3 10/13] NFS: Do not serialise O_DIRECT reads and writes Trond Myklebust
@ 2016-06-22 20:47                   ` Trond Myklebust
  2016-06-22 20:47                     ` [PATCH v3 12/13] NFS: Remove inode->i_dio_count from the NFS O_DIRECT code Trond Myklebust
  0 siblings, 1 reply; 13+ messages in thread
From: Trond Myklebust @ 2016-06-22 20:47 UTC (permalink / raw)
  To: linux-nfs

On success, the RPC callbacks will ensure that we make the appropriate calls
to nfs_writeback_update_inode()

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/direct.c | 16 ----------------
 1 file changed, 16 deletions(-)

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 2333e9973802..d96a4b7aa5c0 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -376,15 +376,6 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq, bool write)
 {
 	struct inode *inode = dreq->inode;
 
-	if (dreq->iocb && write) {
-		loff_t pos = dreq->iocb->ki_pos + dreq->count;
-
-		spin_lock(&inode->i_lock);
-		if (i_size_read(inode) < pos)
-			i_size_write(inode, pos);
-		spin_unlock(&inode->i_lock);
-	}
-
 	if (write)
 		nfs_zap_mapping(inode, inode->i_mapping);
 
@@ -1058,14 +1049,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)
 	if (!result) {
 		result = nfs_direct_wait(dreq);
 		if (result > 0) {
-			struct inode *inode = mapping->host;
-
 			iocb->ki_pos = pos + result;
-			spin_lock(&inode->i_lock);
-			if (i_size_read(inode) < iocb->ki_pos)
-				i_size_write(inode, iocb->ki_pos);
-			spin_unlock(&inode->i_lock);
-
 			/* XXX: should check the generic_write_sync retval */
 			generic_write_sync(iocb, result);
 		}
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH v3 12/13] NFS: Remove inode->i_dio_count from the NFS O_DIRECT code
  2016-06-22 20:47                   ` [PATCH v3 11/13] NFS: Remove racy size manipulations in O_DIRECT Trond Myklebust
@ 2016-06-22 20:47                     ` Trond Myklebust
  2016-06-22 20:47                       ` [PATCH v3 13/13] NFS: Remove unused function nfs_revalidate_mapping_protected() Trond Myklebust
  0 siblings, 1 reply; 13+ messages in thread
From: Trond Myklebust @ 2016-06-22 20:47 UTC (permalink / raw)
  To: linux-nfs

Now that we can serialise O_DIRECT and write/truncate using the
inode->i_rwsem, we no longer need inode->i_dio_count.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/direct.c | 22 +++++++---------------
 fs/nfs/file.c   |  2 --
 fs/nfs/inode.c  |  1 -
 3 files changed, 7 insertions(+), 18 deletions(-)

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index d96a4b7aa5c0..742cc34c3456 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -372,15 +372,8 @@ out:
  * Synchronous I/O uses a stack-allocated iocb.  Thus we can't trust
  * the iocb is still valid here if this is a synchronous request.
  */
-static void nfs_direct_complete(struct nfs_direct_req *dreq, bool write)
+static void nfs_direct_complete(struct nfs_direct_req *dreq)
 {
-	struct inode *inode = dreq->inode;
-
-	if (write)
-		nfs_zap_mapping(inode, inode->i_mapping);
-
-	inode_dio_end(inode);
-
 	if (dreq->iocb) {
 		long res = (long) dreq->error;
 		if (!res)
@@ -431,7 +424,7 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr)
 	}
 out_put:
 	if (put_dreq(dreq))
-		nfs_direct_complete(dreq, false);
+		nfs_direct_complete(dreq);
 	hdr->release(hdr);
 }
 
@@ -479,7 +472,6 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
 			     &nfs_direct_read_completion_ops);
 	get_dreq(dreq);
 	desc.pg_dreq = dreq;
-	inode_dio_begin(inode);
 
 	while (iov_iter_count(iter)) {
 		struct page **pagevec;
@@ -531,13 +523,12 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
 	 * generic layer handle the completion.
 	 */
 	if (requested_bytes == 0) {
-		inode_dio_end(inode);
 		nfs_direct_req_release(dreq);
 		return result < 0 ? result : -EIO;
 	}
 
 	if (put_dreq(dreq))
-		nfs_direct_complete(dreq, false);
+		nfs_direct_complete(dreq);
 	return 0;
 }
 
@@ -761,6 +752,7 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
 static void nfs_direct_write_schedule_work(struct work_struct *work)
 {
 	struct nfs_direct_req *dreq = container_of(work, struct nfs_direct_req, work);
+	struct inode *inode = dreq->inode;
 	int flags = dreq->flags;
 
 	dreq->flags = 0;
@@ -772,7 +764,9 @@ static void nfs_direct_write_schedule_work(struct work_struct *work)
 			nfs_direct_write_reschedule(dreq);
 			break;
 		default:
-			nfs_direct_complete(dreq, true);
+			nfs_zap_mapping(inode, inode->i_mapping);
+
+			nfs_direct_complete(dreq);
 	}
 }
 
@@ -894,7 +888,6 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
 			      &nfs_direct_write_completion_ops);
 	desc.pg_dreq = dreq;
 	get_dreq(dreq);
-	inode_dio_begin(inode);
 
 	NFS_I(inode)->write_io += iov_iter_count(iter);
 	while (iov_iter_count(iter)) {
@@ -955,7 +948,6 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
 	 * generic layer handle the completion.
 	 */
 	if (requested_bytes == 0) {
-		inode_dio_end(inode);
 		nfs_direct_req_release(dreq);
 		return result < 0 ? result : -EIO;
 	}
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index b5772e5a5961..8581e348c09b 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -276,7 +276,6 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 
 	trace_nfs_fsync_enter(inode);
 
-	inode_dio_wait(inode);
 	do {
 		ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
 		if (ret != 0)
@@ -364,7 +363,6 @@ start:
 	/*
 	 * Wait for O_DIRECT to complete
 	 */
-	inode_dio_wait(mapping->host);
 
 	page = grab_cache_page_write_begin(mapping, index, flags);
 	if (!page)
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 4e65a5a8a01b..9521fa6154c8 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -141,7 +141,6 @@ void nfs_evict_inode(struct inode *inode)
 
 int nfs_sync_inode(struct inode *inode)
 {
-	inode_dio_wait(inode);
 	return nfs_wb_all(inode);
 }
 EXPORT_SYMBOL_GPL(nfs_sync_inode);
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH v3 13/13] NFS: Remove unused function nfs_revalidate_mapping_protected()
  2016-06-22 20:47                     ` [PATCH v3 12/13] NFS: Remove inode->i_dio_count from the NFS O_DIRECT code Trond Myklebust
@ 2016-06-22 20:47                       ` Trond Myklebust
  0 siblings, 0 replies; 13+ messages in thread
From: Trond Myklebust @ 2016-06-22 20:47 UTC (permalink / raw)
  To: linux-nfs

Clean up...

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/inode.c         | 38 ++++----------------------------------
 include/linux/nfs_fs.h |  1 -
 2 files changed, 4 insertions(+), 35 deletions(-)

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 9521fa6154c8..3f683ae85d43 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1123,14 +1123,12 @@ out:
 }
 
 /**
- * __nfs_revalidate_mapping - Revalidate the pagecache
+ * nfs_revalidate_mapping - Revalidate the pagecache
  * @inode - pointer to host inode
  * @mapping - pointer to mapping
- * @may_lock - take inode->i_mutex?
  */
-static int __nfs_revalidate_mapping(struct inode *inode,
-		struct address_space *mapping,
-		bool may_lock)
+int nfs_revalidate_mapping(struct inode *inode,
+		struct address_space *mapping)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
 	unsigned long *bitlock = &nfsi->flags;
@@ -1179,12 +1177,7 @@ static int __nfs_revalidate_mapping(struct inode *inode,
 	nfsi->cache_validity &= ~NFS_INO_INVALID_DATA;
 	spin_unlock(&inode->i_lock);
 	trace_nfs_invalidate_mapping_enter(inode);
-	if (may_lock) {
-		inode_lock(inode);
-		ret = nfs_invalidate_mapping(inode, mapping);
-		inode_unlock(inode);
-	} else
-		ret = nfs_invalidate_mapping(inode, mapping);
+	ret = nfs_invalidate_mapping(inode, mapping);
 	trace_nfs_invalidate_mapping_exit(inode, ret);
 
 	clear_bit_unlock(NFS_INO_INVALIDATING, bitlock);
@@ -1194,29 +1187,6 @@ out:
 	return ret;
 }
 
-/**
- * nfs_revalidate_mapping - Revalidate the pagecache
- * @inode - pointer to host inode
- * @mapping - pointer to mapping
- */
-int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
-{
-	return __nfs_revalidate_mapping(inode, mapping, false);
-}
-
-/**
- * nfs_revalidate_mapping_protected - Revalidate the pagecache
- * @inode - pointer to host inode
- * @mapping - pointer to mapping
- *
- * Differs from nfs_revalidate_mapping() in that it grabs the inode->i_mutex
- * while invalidating the mapping.
- */
-int nfs_revalidate_mapping_protected(struct inode *inode, struct address_space *mapping)
-{
-	return __nfs_revalidate_mapping(inode, mapping, true);
-}
-
 static bool nfs_file_has_writers(struct nfs_inode *nfsi)
 {
 	struct inode *inode = &nfsi->vfs_inode;
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 225d17d35277..810124b33327 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -351,7 +351,6 @@ extern int nfs_revalidate_inode_rcu(struct nfs_server *server, struct inode *ino
 extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *);
 extern int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping);
 extern int nfs_revalidate_mapping_rcu(struct inode *inode);
-extern int nfs_revalidate_mapping_protected(struct inode *inode, struct address_space *mapping);
 extern int nfs_setattr(struct dentry *, struct iattr *);
 extern void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr, struct nfs_fattr *);
 extern void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr,
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2016-06-22 20:48 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-06-22 20:47 [PATCH v3 01/13] NFS: Don't flush caches for a getattr that races with writeback Trond Myklebust
2016-06-22 20:47 ` [PATCH v3 02/13] NFS: Cache access checks more aggressively Trond Myklebust
2016-06-22 20:47   ` [PATCH v3 03/13] NFS: Cache aggressively when file is open for writing Trond Myklebust
2016-06-22 20:47     ` [PATCH v3 04/13] NFS: Kill NFS_INO_NFS_INO_FLUSHING: it is a performance killer Trond Myklebust
2016-06-22 20:47       ` [PATCH v3 05/13] NFS: writepage of a single page should not be synchronous Trond Myklebust
2016-06-22 20:47         ` [PATCH v3 06/13] NFS: Don't hold the inode lock across fsync() Trond Myklebust
2016-06-22 20:47           ` [PATCH v3 07/13] NFS: Don't call COMMIT in ->releasepage() Trond Myklebust
2016-06-22 20:47             ` [PATCH v3 08/13] NFS: Fix O_DIRECT verifier problems Trond Myklebust
2016-06-22 20:47               ` [PATCH v3 09/13] NFS: Ensure we reset the write verifier 'committed' value on resend Trond Myklebust
2016-06-22 20:47                 ` [PATCH v3 10/13] NFS: Do not serialise O_DIRECT reads and writes Trond Myklebust
2016-06-22 20:47                   ` [PATCH v3 11/13] NFS: Remove racy size manipulations in O_DIRECT Trond Myklebust
2016-06-22 20:47                     ` [PATCH v3 12/13] NFS: Remove inode->i_dio_count from the NFS O_DIRECT code Trond Myklebust
2016-06-22 20:47                       ` [PATCH v3 13/13] NFS: Remove unused function nfs_revalidate_mapping_protected() Trond Myklebust

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.