All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 01/12] NFS: Don't flush caches for a getattr that races with writeback
@ 2016-06-21 21:34 Trond Myklebust
  2016-06-21 21:34 ` [PATCH v2 02/12] NFS: Cache access checks more aggressively Trond Myklebust
  2016-06-22 15:48 ` [PATCH v2 01/12] NFS: Don't flush caches for a getattr that races with writeback Christoph Hellwig
  0 siblings, 2 replies; 29+ messages in thread
From: Trond Myklebust @ 2016-06-21 21:34 UTC (permalink / raw)
  To: linux-nfs

If there were outstanding writes then chalk up the unexpected change
attribute on the server to them.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/inode.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 52e7d6869e3b..60051e62d3f1 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1729,12 +1729,15 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 		if (inode->i_version != fattr->change_attr) {
 			dprintk("NFS: change_attr change on server for file %s/%ld\n",
 					inode->i_sb->s_id, inode->i_ino);
-			invalid |= NFS_INO_INVALID_ATTR
-				| NFS_INO_INVALID_DATA
-				| NFS_INO_INVALID_ACCESS
-				| NFS_INO_INVALID_ACL;
-			if (S_ISDIR(inode->i_mode))
-				nfs_force_lookup_revalidate(inode);
+			/* Could it be a race with writeback? */
+			if (nfsi->nrequests == 0) {
+				invalid |= NFS_INO_INVALID_ATTR
+					| NFS_INO_INVALID_DATA
+					| NFS_INO_INVALID_ACCESS
+					| NFS_INO_INVALID_ACL;
+				if (S_ISDIR(inode->i_mode))
+					nfs_force_lookup_revalidate(inode);
+			}
 			inode->i_version = fattr->change_attr;
 		}
 	} else {
-- 
2.5.5


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH v2 02/12] NFS: Cache access checks more aggressively
  2016-06-21 21:34 [PATCH v2 01/12] NFS: Don't flush caches for a getattr that races with writeback Trond Myklebust
@ 2016-06-21 21:34 ` Trond Myklebust
  2016-06-21 21:34   ` [PATCH v2 03/12] NFS: Cache aggressively when file is open for writing Trond Myklebust
  2016-06-22 15:48 ` [PATCH v2 01/12] NFS: Don't flush caches for a getattr that races with writeback Christoph Hellwig
  1 sibling, 1 reply; 29+ messages in thread
From: Trond Myklebust @ 2016-06-21 21:34 UTC (permalink / raw)
  To: linux-nfs

If an attribute revalidation fails, then we already know that we'll
zap the access cache. If, OTOH, the inode isn't changing, there should
be no need to eject access calls just because they are old.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/dir.c | 52 +++++++++++++++++++++++++++++++---------------------
 1 file changed, 31 insertions(+), 21 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index aaf7bd0cbae2..210b33636fe4 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -2228,21 +2228,37 @@ static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, st
 	return NULL;
 }
 
-static int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res)
+static int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res, bool may_block)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_access_entry *cache;
-	int err = -ENOENT;
+	bool retry = true;
+	int err;
 
 	spin_lock(&inode->i_lock);
-	if (nfsi->cache_validity & NFS_INO_INVALID_ACCESS)
-		goto out_zap;
-	cache = nfs_access_search_rbtree(inode, cred);
-	if (cache == NULL)
-		goto out;
-	if (!nfs_have_delegated_attributes(inode) &&
-	    !time_in_range_open(jiffies, cache->jiffies, cache->jiffies + nfsi->attrtimeo))
-		goto out_stale;
+	for(;;) {
+		if (nfsi->cache_validity & NFS_INO_INVALID_ACCESS)
+			goto out_zap;
+		cache = nfs_access_search_rbtree(inode, cred);
+		err = -ENOENT;
+		if (cache == NULL)
+			goto out;
+		/* Found an entry, is our attribute cache valid? */
+		if (!nfs_attribute_cache_expired(inode) &&
+		    !(nfsi->cache_validity & NFS_INO_INVALID_ATTR))
+			break;
+		err = -ECHILD;
+		if (!may_block)
+			goto out;
+		if (!retry)
+			goto out_zap;
+		spin_unlock(&inode->i_lock);
+		err = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
+		if (err)
+			return err;
+		spin_lock(&inode->i_lock);
+		retry = false;
+	}
 	res->jiffies = cache->jiffies;
 	res->cred = cache->cred;
 	res->mask = cache->mask;
@@ -2251,12 +2267,6 @@ static int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, str
 out:
 	spin_unlock(&inode->i_lock);
 	return err;
-out_stale:
-	rb_erase(&cache->rb_node, &nfsi->access_cache);
-	list_del(&cache->lru);
-	spin_unlock(&inode->i_lock);
-	nfs_access_free_entry(cache);
-	return -ENOENT;
 out_zap:
 	spin_unlock(&inode->i_lock);
 	nfs_access_zap_cache(inode);
@@ -2283,13 +2293,12 @@ static int nfs_access_get_cached_rcu(struct inode *inode, struct rpc_cred *cred,
 		cache = NULL;
 	if (cache == NULL)
 		goto out;
-	if (!nfs_have_delegated_attributes(inode) &&
-	    !time_in_range_open(jiffies, cache->jiffies, cache->jiffies + nfsi->attrtimeo))
+	err = nfs_revalidate_inode_rcu(NFS_SERVER(inode), inode);
+	if (err)
 		goto out;
 	res->jiffies = cache->jiffies;
 	res->cred = cache->cred;
 	res->mask = cache->mask;
-	err = 0;
 out:
 	rcu_read_unlock();
 	return err;
@@ -2378,18 +2387,19 @@ EXPORT_SYMBOL_GPL(nfs_access_set_mask);
 static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask)
 {
 	struct nfs_access_entry cache;
+	bool may_block = (mask & MAY_NOT_BLOCK) == 0;
 	int status;
 
 	trace_nfs_access_enter(inode);
 
 	status = nfs_access_get_cached_rcu(inode, cred, &cache);
 	if (status != 0)
-		status = nfs_access_get_cached(inode, cred, &cache);
+		status = nfs_access_get_cached(inode, cred, &cache, may_block);
 	if (status == 0)
 		goto out_cached;
 
 	status = -ECHILD;
-	if (mask & MAY_NOT_BLOCK)
+	if (!may_block)
 		goto out;
 
 	/* Be clever: ask server to check for all possible rights */
-- 
2.5.5


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH v2 03/12] NFS: Cache aggressively when file is open for writing
  2016-06-21 21:34 ` [PATCH v2 02/12] NFS: Cache access checks more aggressively Trond Myklebust
@ 2016-06-21 21:34   ` Trond Myklebust
  2016-06-21 21:34     ` [PATCH v2 04/12] NFS: Kill NFS_INO_NFS_INO_FLUSHING: it is a performance killer Trond Myklebust
  2016-06-21 22:25     ` [PATCH v2 03/12] NFS: Cache aggressively when file is open for writing Oleg Drokin
  0 siblings, 2 replies; 29+ messages in thread
From: Trond Myklebust @ 2016-06-21 21:34 UTC (permalink / raw)
  To: linux-nfs

Unless the user is using file locking, we must assume close-to-open
cache consistency when the file is open for writing. Adjust the
caching algorithm so that it does not clear the cache on out-of-order
writes and/or attribute revalidations.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/file.c  | 13 ++-----------
 fs/nfs/inode.c | 58 ++++++++++++++++++++++++++++++++++++++++------------------
 2 files changed, 42 insertions(+), 29 deletions(-)

diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 717a8d6af52d..2d39d9f9da7d 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -780,11 +780,6 @@ do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
 }
 
 static int
-is_time_granular(struct timespec *ts) {
-	return ((ts->tv_sec == 0) && (ts->tv_nsec <= 1000));
-}
-
-static int
 do_setlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
 {
 	struct inode *inode = filp->f_mapping->host;
@@ -817,12 +812,8 @@ do_setlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
 	 * This makes locking act as a cache coherency point.
 	 */
 	nfs_sync_mapping(filp->f_mapping);
-	if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) {
-		if (is_time_granular(&NFS_SERVER(inode)->time_delta))
-			__nfs_revalidate_inode(NFS_SERVER(inode), inode);
-		else
-			nfs_zap_caches(inode);
-	}
+	if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ))
+		nfs_zap_mapping(inode, filp->f_mapping);
 out:
 	return status;
 }
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 60051e62d3f1..bd0390da3344 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -878,7 +878,10 @@ void nfs_inode_attach_open_context(struct nfs_open_context *ctx)
 	struct nfs_inode *nfsi = NFS_I(inode);
 
 	spin_lock(&inode->i_lock);
-	list_add(&ctx->list, &nfsi->open_files);
+	if (ctx->mode & FMODE_WRITE)
+		list_add(&ctx->list, &nfsi->open_files);
+	else
+		list_add_tail(&ctx->list, &nfsi->open_files);
 	spin_unlock(&inode->i_lock);
 }
 EXPORT_SYMBOL_GPL(nfs_inode_attach_open_context);
@@ -1215,6 +1218,21 @@ int nfs_revalidate_mapping_protected(struct inode *inode, struct address_space *
 	return __nfs_revalidate_mapping(inode, mapping, true);
 }
 
+static bool nfs_file_has_writers(struct nfs_inode *nfsi)
+{
+	assert_spin_locked(&nfsi->vfs_inode.i_lock);
+
+	if (list_empty(&nfsi->open_files))
+		return false;
+	/* Note: This relies on nfsi->open_files being ordered with writers
+	 *       being placed at the head of the list.
+	 *       See nfs_inode_attach_open_context()
+	 */
+	return (list_first_entry(&nfsi->open_files,
+			struct nfs_open_context,
+			list)->mode & FMODE_WRITE) == FMODE_WRITE;
+}
+
 static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
@@ -1279,22 +1297,24 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
 	if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT))
 		return -EIO;
 
-	if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 &&
-			inode->i_version != fattr->change_attr)
-		invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
+	if (!nfs_file_has_writers(nfsi)) {
+		/* Verify a few of the more important attributes */
+		if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 && inode->i_version != fattr->change_attr)
+			invalid |= NFS_INO_INVALID_ATTR | NFS_INO_REVAL_PAGECACHE;
 
-	/* Verify a few of the more important attributes */
-	if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec_equal(&inode->i_mtime, &fattr->mtime))
-		invalid |= NFS_INO_INVALID_ATTR;
+		if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec_equal(&inode->i_mtime, &fattr->mtime))
+			invalid |= NFS_INO_INVALID_ATTR;
 
-	if (fattr->valid & NFS_ATTR_FATTR_SIZE) {
-		cur_size = i_size_read(inode);
-		new_isize = nfs_size_to_loff_t(fattr->size);
-		if (cur_size != new_isize)
-			invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
+		if ((fattr->valid & NFS_ATTR_FATTR_CTIME) && !timespec_equal(&inode->i_ctime, &fattr->ctime))
+			invalid |= NFS_INO_INVALID_ATTR;
+
+		if (fattr->valid & NFS_ATTR_FATTR_SIZE) {
+			cur_size = i_size_read(inode);
+			new_isize = nfs_size_to_loff_t(fattr->size);
+			if (cur_size != new_isize)
+				invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
+		}
 	}
-	if (nfsi->nrequests != 0)
-		invalid &= ~NFS_INO_REVAL_PAGECACHE;
 
 	/* Have any file permissions changed? */
 	if ((fattr->valid & NFS_ATTR_FATTR_MODE) && (inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO))
@@ -1526,7 +1546,7 @@ EXPORT_SYMBOL_GPL(nfs_refresh_inode);
 
 static int nfs_post_op_update_inode_locked(struct inode *inode, struct nfs_fattr *fattr)
 {
-	unsigned long invalid = NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
+	unsigned long invalid = NFS_INO_INVALID_ATTR;
 
 	/*
 	 * Don't revalidate the pagecache if we hold a delegation, but do
@@ -1675,6 +1695,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 	unsigned long invalid = 0;
 	unsigned long now = jiffies;
 	unsigned long save_cache_validity;
+	bool have_writers = nfs_file_has_writers(nfsi);
 	bool cache_revalidated = true;
 
 	dfprintk(VFS, "NFS: %s(%s/%lu fh_crc=0x%08x ct=%d info=0x%x)\n",
@@ -1730,7 +1751,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 			dprintk("NFS: change_attr change on server for file %s/%ld\n",
 					inode->i_sb->s_id, inode->i_ino);
 			/* Could it be a race with writeback? */
-			if (nfsi->nrequests == 0) {
+			if (!have_writers) {
 				invalid |= NFS_INO_INVALID_ATTR
 					| NFS_INO_INVALID_DATA
 					| NFS_INO_INVALID_ACCESS
@@ -1770,9 +1791,10 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 		if (new_isize != cur_isize) {
 			/* Do we perhaps have any outstanding writes, or has
 			 * the file grown beyond our last write? */
-			if ((nfsi->nrequests == 0) || new_isize > cur_isize) {
+			if (nfsi->nrequests == 0 || new_isize > cur_isize) {
 				i_size_write(inode, new_isize);
-				invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
+				if (!have_writers)
+					invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
 			}
 			dprintk("NFS: isize change on server for file %s/%ld "
 					"(%Ld to %Ld)\n",
-- 
2.5.5


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH v2 04/12] NFS: Kill NFS_INO_NFS_INO_FLUSHING: it is a performance killer
  2016-06-21 21:34   ` [PATCH v2 03/12] NFS: Cache aggressively when file is open for writing Trond Myklebust
@ 2016-06-21 21:34     ` Trond Myklebust
  2016-06-21 21:34       ` [PATCH v2 05/12] NFS: writepage of a single page should not be synchronous Trond Myklebust
  2016-06-21 22:25     ` [PATCH v2 03/12] NFS: Cache aggressively when file is open for writing Oleg Drokin
  1 sibling, 1 reply; 29+ messages in thread
From: Trond Myklebust @ 2016-06-21 21:34 UTC (permalink / raw)
  To: linux-nfs

filemap_datawrite() and friends already deal just fine with livelock.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/file.c          |  8 --------
 fs/nfs/nfstrace.h      |  1 -
 fs/nfs/write.c         | 11 -----------
 include/linux/nfs_fs.h |  1 -
 4 files changed, 21 deletions(-)

diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 2d39d9f9da7d..29d7477a62e8 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -360,14 +360,6 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping,
 
 start:
 	/*
-	 * Prevent starvation issues if someone is doing a consistency
-	 * sync-to-disk
-	 */
-	ret = wait_on_bit_action(&NFS_I(mapping->host)->flags, NFS_INO_FLUSHING,
-				 nfs_wait_bit_killable, TASK_KILLABLE);
-	if (ret)
-		return ret;
-	/*
 	 * Wait for O_DIRECT to complete
 	 */
 	inode_dio_wait(mapping->host);
diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h
index 0b9e5cc9a747..fe80a1c26340 100644
--- a/fs/nfs/nfstrace.h
+++ b/fs/nfs/nfstrace.h
@@ -37,7 +37,6 @@
 			{ 1 << NFS_INO_ADVISE_RDPLUS, "ADVISE_RDPLUS" }, \
 			{ 1 << NFS_INO_STALE, "STALE" }, \
 			{ 1 << NFS_INO_INVALIDATING, "INVALIDATING" }, \
-			{ 1 << NFS_INO_FLUSHING, "FLUSHING" }, \
 			{ 1 << NFS_INO_FSCACHE, "FSCACHE" }, \
 			{ 1 << NFS_INO_LAYOUTCOMMIT, "NEED_LAYOUTCOMMIT" }, \
 			{ 1 << NFS_INO_LAYOUTCOMMITTING, "LAYOUTCOMMIT" })
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index e1c74d3db64d..980d44f3a84c 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -657,16 +657,9 @@ static int nfs_writepages_callback(struct page *page, struct writeback_control *
 int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
 {
 	struct inode *inode = mapping->host;
-	unsigned long *bitlock = &NFS_I(inode)->flags;
 	struct nfs_pageio_descriptor pgio;
 	int err;
 
-	/* Stop dirtying of new pages while we sync */
-	err = wait_on_bit_lock_action(bitlock, NFS_INO_FLUSHING,
-			nfs_wait_bit_killable, TASK_KILLABLE);
-	if (err)
-		goto out_err;
-
 	nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
 
 	nfs_pageio_init_write(&pgio, inode, wb_priority(wbc), false,
@@ -674,10 +667,6 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
 	err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio);
 	nfs_pageio_complete(&pgio);
 
-	clear_bit_unlock(NFS_INO_FLUSHING, bitlock);
-	smp_mb__after_atomic();
-	wake_up_bit(bitlock, NFS_INO_FLUSHING);
-
 	if (err < 0)
 		goto out_err;
 	err = pgio.pg_error;
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index d71278c3c5bd..120dd04b553c 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -205,7 +205,6 @@ struct nfs_inode {
 #define NFS_INO_STALE		(1)		/* possible stale inode */
 #define NFS_INO_ACL_LRU_SET	(2)		/* Inode is on the LRU list */
 #define NFS_INO_INVALIDATING	(3)		/* inode is being invalidated */
-#define NFS_INO_FLUSHING	(4)		/* inode is flushing out data */
 #define NFS_INO_FSCACHE		(5)		/* inode can be cached by FS-Cache */
 #define NFS_INO_FSCACHE_LOCK	(6)		/* FS-Cache cookie management lock */
 #define NFS_INO_LAYOUTCOMMIT	(9)		/* layoutcommit required */
-- 
2.5.5


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH v2 05/12] NFS: writepage of a single page should not be synchronous
  2016-06-21 21:34     ` [PATCH v2 04/12] NFS: Kill NFS_INO_NFS_INO_FLUSHING: it is a performance killer Trond Myklebust
@ 2016-06-21 21:34       ` Trond Myklebust
  2016-06-21 21:34         ` [PATCH v2 06/12] NFS: Don't hold the inode lock across fsync() Trond Myklebust
  0 siblings, 1 reply; 29+ messages in thread
From: Trond Myklebust @ 2016-06-21 21:34 UTC (permalink / raw)
  To: linux-nfs

It is almost always better to wait for more so that we can issue a
bulk commit.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/write.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 980d44f3a84c..b13d48881d3a 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -625,7 +625,7 @@ static int nfs_writepage_locked(struct page *page,
 	int err;
 
 	nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
-	nfs_pageio_init_write(&pgio, inode, wb_priority(wbc),
+	nfs_pageio_init_write(&pgio, inode, 0,
 				false, &nfs_async_write_completion_ops);
 	err = nfs_do_writepage(page, wbc, &pgio, launder);
 	nfs_pageio_complete(&pgio);
-- 
2.5.5


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH v2 06/12] NFS: Don't hold the inode lock across fsync()
  2016-06-21 21:34       ` [PATCH v2 05/12] NFS: writepage of a single page should not be synchronous Trond Myklebust
@ 2016-06-21 21:34         ` Trond Myklebust
  2016-06-21 21:34           ` [PATCH v2 07/12] NFS: Don't call COMMIT in ->releasepage() Trond Myklebust
  0 siblings, 1 reply; 29+ messages in thread
From: Trond Myklebust @ 2016-06-21 21:34 UTC (permalink / raw)
  To: linux-nfs

Commits are no longer required to be serialised.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/file.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 29d7477a62e8..249262b6bcbe 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -277,11 +277,9 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 		ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
 		if (ret != 0)
 			break;
-		inode_lock(inode);
 		ret = nfs_file_fsync_commit(file, start, end, datasync);
 		if (!ret)
 			ret = pnfs_sync_inode(inode, !!datasync);
-		inode_unlock(inode);
 		/*
 		 * If nfs_file_fsync_commit detected a server reboot, then
 		 * resend all dirty pages that might have been covered by
-- 
2.5.5


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH v2 07/12] NFS: Don't call COMMIT in ->releasepage()
  2016-06-21 21:34         ` [PATCH v2 06/12] NFS: Don't hold the inode lock across fsync() Trond Myklebust
@ 2016-06-21 21:34           ` Trond Myklebust
  2016-06-21 21:34             ` [PATCH v2 08/12] NFS: Fix O_DIRECT verifier problems Trond Myklebust
  0 siblings, 1 reply; 29+ messages in thread
From: Trond Myklebust @ 2016-06-21 21:34 UTC (permalink / raw)
  To: linux-nfs

While COMMIT has the potential to free up a lot of memory that is being
taken by unstable writes, it isn't guaranteed to free up this particular
page. Also, calling fsync() on the server is expensive and so we want to
do it in a more controlled fashion, rather than have it triggered at
random by the VM.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/file.c | 23 -----------------------
 1 file changed, 23 deletions(-)

diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 249262b6bcbe..df4dd8e7e62e 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -460,31 +460,8 @@ static void nfs_invalidate_page(struct page *page, unsigned int offset,
  */
 static int nfs_release_page(struct page *page, gfp_t gfp)
 {
-	struct address_space *mapping = page->mapping;
-
 	dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page);
 
-	/* Always try to initiate a 'commit' if relevant, but only
-	 * wait for it if the caller allows blocking.  Even then,
-	 * only wait 1 second and only if the 'bdi' is not congested.
-	 * Waiting indefinitely can cause deadlocks when the NFS
-	 * server is on this machine, when a new TCP connection is
-	 * needed and in other rare cases.  There is no particular
-	 * need to wait extensively here.  A short wait has the
-	 * benefit that someone else can worry about the freezer.
-	 */
-	if (mapping) {
-		struct nfs_server *nfss = NFS_SERVER(mapping->host);
-		nfs_commit_inode(mapping->host, 0);
-		if (gfpflags_allow_blocking(gfp) &&
-		    !bdi_write_congested(&nfss->backing_dev_info)) {
-			wait_on_page_bit_killable_timeout(page, PG_private,
-							  HZ);
-			if (PagePrivate(page))
-				set_bdi_congested(&nfss->backing_dev_info,
-						  BLK_RW_ASYNC);
-		}
-	}
 	/* If PagePrivate() is set, then the page is not freeable */
 	if (PagePrivate(page))
 		return 0;
-- 
2.5.5


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH v2 08/12] NFS: Fix O_DIRECT verifier problems
  2016-06-21 21:34           ` [PATCH v2 07/12] NFS: Don't call COMMIT in ->releasepage() Trond Myklebust
@ 2016-06-21 21:34             ` Trond Myklebust
  2016-06-21 21:34               ` [PATCH v2 09/12] NFS: Ensure we reset the write verifier 'committed' value on resend Trond Myklebust
  0 siblings, 1 reply; 29+ messages in thread
From: Trond Myklebust @ 2016-06-21 21:34 UTC (permalink / raw)
  To: linux-nfs

We should not be interested in looking at the value of the stable field,
since that could take any value.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/direct.c   | 10 ++++++++--
 fs/nfs/internal.h |  7 +++++++
 fs/nfs/write.c    |  2 +-
 3 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 979b3c4dee6a..d6d43b5eafb3 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -196,6 +196,12 @@ static void nfs_direct_set_hdr_verf(struct nfs_direct_req *dreq,
 	WARN_ON_ONCE(verfp->committed < 0);
 }
 
+static int nfs_direct_cmp_verf(const struct nfs_writeverf *v1,
+		const struct nfs_writeverf *v2)
+{
+	return nfs_write_verifier_cmp(&v1->verifier, &v2->verifier);
+}
+
 /*
  * nfs_direct_cmp_hdr_verf - compare verifier for pgio header
  * @dreq - direct request possibly spanning multiple servers
@@ -215,7 +221,7 @@ static int nfs_direct_set_or_cmp_hdr_verf(struct nfs_direct_req *dreq,
 		nfs_direct_set_hdr_verf(dreq, hdr);
 		return 0;
 	}
-	return memcmp(verfp, &hdr->verf, sizeof(struct nfs_writeverf));
+	return nfs_direct_cmp_verf(verfp, &hdr->verf);
 }
 
 /*
@@ -238,7 +244,7 @@ static int nfs_direct_cmp_commit_data_verf(struct nfs_direct_req *dreq,
 	if (verfp->committed < 0)
 		return 1;
 
-	return memcmp(verfp, &data->verf, sizeof(struct nfs_writeverf));
+	return nfs_direct_cmp_verf(verfp, &data->verf);
 }
 
 /**
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 5154fa65a2f2..150a8eb0f323 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -506,6 +506,13 @@ extern int nfs_migrate_page(struct address_space *,
 #define nfs_migrate_page NULL
 #endif
 
+static inline int
+nfs_write_verifier_cmp(const struct nfs_write_verifier *v1,
+		const struct nfs_write_verifier *v2)
+{
+	return memcmp(v1->data, v2->data, sizeof(v1->data));
+}
+
 /* unlink.c */
 extern struct rpc_task *
 nfs_async_rename(struct inode *old_dir, struct inode *new_dir,
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index b13d48881d3a..3087fb6f1983 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1789,7 +1789,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
 
 		/* Okay, COMMIT succeeded, apparently. Check the verifier
 		 * returned by the server against all stored verfs. */
-		if (!memcmp(&req->wb_verf, &data->verf.verifier, sizeof(req->wb_verf))) {
+		if (!nfs_write_verifier_cmp(&req->wb_verf, &data->verf.verifier)) {
 			/* We have a match */
 			nfs_inode_remove_request(req);
 			dprintk(" OK\n");
-- 
2.5.5


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH v2 09/12] NFS: Ensure we reset the write verifier 'committed' value on resend.
  2016-06-21 21:34             ` [PATCH v2 08/12] NFS: Fix O_DIRECT verifier problems Trond Myklebust
@ 2016-06-21 21:34               ` Trond Myklebust
  2016-06-21 21:34                 ` [PATCH v2 10/12] NFS: Do not serialise O_DIRECT reads and writes Trond Myklebust
  0 siblings, 1 reply; 29+ messages in thread
From: Trond Myklebust @ 2016-06-21 21:34 UTC (permalink / raw)
  To: linux-nfs

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/direct.c   |  2 ++
 fs/nfs/internal.h | 17 +++++++++++++++++
 2 files changed, 19 insertions(+)

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index d6d43b5eafb3..fb659bb50678 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -661,6 +661,8 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
 	nfs_direct_write_scan_commit_list(dreq->inode, &reqs, &cinfo);
 
 	dreq->count = 0;
+	dreq->verf.committed = NFS_INVALID_STABLE_HOW;
+	nfs_clear_pnfs_ds_commit_verifiers(&dreq->ds_cinfo);
 	for (i = 0; i < dreq->mirror_count; i++)
 		dreq->mirrors[i].count = 0;
 	get_dreq(dreq);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 150a8eb0f323..0eb5c924886d 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -499,6 +499,23 @@ int nfs_key_timeout_notify(struct file *filp, struct inode *inode);
 bool nfs_ctx_key_to_expire(struct nfs_open_context *ctx);
 void nfs_pageio_stop_mirroring(struct nfs_pageio_descriptor *pgio);
 
+#ifdef CONFIG_NFS_V4_1
+static inline
+void nfs_clear_pnfs_ds_commit_verifiers(struct pnfs_ds_commit_info *cinfo)
+{
+	int i;
+
+	for (i = 0; i < cinfo->nbuckets; i++)
+		cinfo->buckets[i].direct_verf.committed = NFS_INVALID_STABLE_HOW;
+}
+#else
+static inline
+void nfs_clear_pnfs_ds_commit_verifiers(struct pnfs_ds_commit_info *cinfo)
+{
+}
+#endif
+
+
 #ifdef CONFIG_MIGRATION
 extern int nfs_migrate_page(struct address_space *,
 		struct page *, struct page *, enum migrate_mode);
-- 
2.5.5


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH v2 10/12] NFS: Do not serialise O_DIRECT reads and writes
  2016-06-21 21:34               ` [PATCH v2 09/12] NFS: Ensure we reset the write verifier 'committed' value on resend Trond Myklebust
@ 2016-06-21 21:34                 ` Trond Myklebust
  2016-06-21 21:34                   ` [PATCH v2 11/12] NFS: Remove inode->i_dio_count from the NFS O_DIRECT code Trond Myklebust
  2016-06-22 16:47                   ` [PATCH v2 10/12] NFS: Do not serialise O_DIRECT reads and writes Christoph Hellwig
  0 siblings, 2 replies; 29+ messages in thread
From: Trond Myklebust @ 2016-06-21 21:34 UTC (permalink / raw)
  To: linux-nfs

Allow dio requests to be scheduled in parallel, but ensuring that they
do not conflict with buffered I/O.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/Makefile        |   2 +-
 fs/nfs/direct.c        |  12 ++---
 fs/nfs/file.c          |  21 ++++++--
 fs/nfs/internal.h      |   8 ++++
 fs/nfs/io.c            | 128 +++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/nfs_fs.h |   1 +
 6 files changed, 161 insertions(+), 11 deletions(-)
 create mode 100644 fs/nfs/io.c

diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index 8664417955a2..6abdda209642 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -6,7 +6,7 @@ obj-$(CONFIG_NFS_FS) += nfs.o
 
 CFLAGS_nfstrace.o += -I$(src)
 nfs-y 			:= client.o dir.o file.o getroot.o inode.o super.o \
-			   direct.o pagelist.o read.o symlink.o unlink.o \
+			   io.o direct.o pagelist.o read.o symlink.o unlink.o \
 			   write.o namespace.o mount_clnt.o nfstrace.o
 nfs-$(CONFIG_ROOT_NFS)	+= nfsroot.o
 nfs-$(CONFIG_SYSCTL)	+= sysctl.o
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index fb659bb50678..2333e9973802 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -587,7 +587,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter)
 	if (!count)
 		goto out;
 
-	inode_lock(inode);
+	nfs_start_io_direct(inode);
 	result = nfs_sync_mapping(mapping);
 	if (result)
 		goto out_unlock;
@@ -615,7 +615,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter)
 	NFS_I(inode)->read_io += count;
 	result = nfs_direct_read_schedule_iovec(dreq, iter, iocb->ki_pos);
 
-	inode_unlock(inode);
+	nfs_end_io_direct(inode);
 
 	if (!result) {
 		result = nfs_direct_wait(dreq);
@@ -629,7 +629,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter)
 out_release:
 	nfs_direct_req_release(dreq);
 out_unlock:
-	inode_unlock(inode);
+	nfs_end_io_direct(inode);
 out:
 	return result;
 }
@@ -1013,7 +1013,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)
 	pos = iocb->ki_pos;
 	end = (pos + iov_iter_count(iter) - 1) >> PAGE_SHIFT;
 
-	inode_lock(inode);
+	nfs_start_io_direct(inode);
 
 	result = nfs_sync_mapping(mapping);
 	if (result)
@@ -1053,7 +1053,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)
 					      pos >> PAGE_SHIFT, end);
 	}
 
-	inode_unlock(inode);
+	nfs_end_io_direct(inode);
 
 	if (!result) {
 		result = nfs_direct_wait(dreq);
@@ -1076,7 +1076,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)
 out_release:
 	nfs_direct_req_release(dreq);
 out_unlock:
-	inode_unlock(inode);
+	nfs_end_io_direct(inode);
 	return result;
 }
 
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index df4dd8e7e62e..b1dd7f2c64f4 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -170,12 +170,14 @@ nfs_file_read(struct kiocb *iocb, struct iov_iter *to)
 		iocb->ki_filp,
 		iov_iter_count(to), (unsigned long) iocb->ki_pos);
 
+	nfs_start_io_read(inode);
 	result = nfs_revalidate_mapping_protected(inode, iocb->ki_filp->f_mapping);
 	if (!result) {
 		result = generic_file_read_iter(iocb, to);
 		if (result > 0)
 			nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, result);
 	}
+	nfs_end_io_read(inode);
 	return result;
 }
 EXPORT_SYMBOL_GPL(nfs_file_read);
@@ -191,12 +193,14 @@ nfs_file_splice_read(struct file *filp, loff_t *ppos,
 	dprintk("NFS: splice_read(%pD2, %lu@%Lu)\n",
 		filp, (unsigned long) count, (unsigned long long) *ppos);
 
+	nfs_start_io_read(inode);
 	res = nfs_revalidate_mapping_protected(inode, filp->f_mapping);
 	if (!res) {
 		res = generic_file_splice_read(filp, ppos, pipe, count, flags);
 		if (res > 0)
 			nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, res);
 	}
+	nfs_end_io_read(inode);
 	return res;
 }
 EXPORT_SYMBOL_GPL(nfs_file_splice_read);
@@ -639,9 +643,13 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
 	dprintk("NFS: write(%pD2, %zu@%Ld)\n",
 		file, count, (long long) iocb->ki_pos);
 
-	result = -EBUSY;
 	if (IS_SWAPFILE(inode))
 		goto out_swapfile;
+
+	nfs_start_io_write(inode);
+	result = generic_write_checks(iocb, from);
+	if (result <= 0)
+		goto out;
 	/*
 	 * O_APPEND implies that we must revalidate the file length.
 	 */
@@ -655,9 +663,13 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
 	if (!count)
 		goto out;
 
-	result = generic_file_write_iter(iocb, from);
-	if (result > 0)
+	current->backing_dev_info = inode_to_bdi(inode);
+	result = generic_perform_write(file, from, iocb->ki_pos);
+	current->backing_dev_info = NULL;
+	if (result > 0) {
+		iocb->ki_pos += written;
 		written = result;
+	}
 
 	/* Return error values */
 	if (result >= 0 && nfs_need_check_write(file, inode)) {
@@ -668,11 +680,12 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
 	if (result > 0)
 		nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written);
 out:
+	nfs_end_io_write(inode);
 	return result;
 
 out_swapfile:
 	printk(KERN_INFO "NFS: attempt to write to active swap file!\n");
-	goto out;
+	return -EBUSY;
 }
 EXPORT_SYMBOL_GPL(nfs_file_write);
 
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 0eb5c924886d..159b64ede82a 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -411,6 +411,14 @@ extern void __exit unregister_nfs_fs(void);
 extern bool nfs_sb_active(struct super_block *sb);
 extern void nfs_sb_deactive(struct super_block *sb);
 
+/* io.c */
+extern void nfs_start_io_read(struct inode *inode);
+extern void nfs_end_io_read(struct inode *inode);
+extern void nfs_start_io_write(struct inode *inode);
+extern void nfs_end_io_write(struct inode *inode);
+extern void nfs_start_io_direct(struct inode *inode);
+extern void nfs_end_io_direct(struct inode *inode);
+
 /* namespace.c */
 #define NFS_PATH_CANONICAL 1
 extern char *nfs_path(char **p, struct dentry *dentry,
diff --git a/fs/nfs/io.c b/fs/nfs/io.c
new file mode 100644
index 000000000000..013ccb67c106
--- /dev/null
+++ b/fs/nfs/io.c
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2016 Trond Myklebust
+ *
+ * I/O and data path helper functionality.
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/bitops.h>
+#include <linux/rwsem.h>
+#include <linux/fs.h>
+#include <linux/nfs_fs.h>
+
+#include "internal.h"
+
+/**
+ * nfs_start_io_read - declare the file is being used for buffered reads
+ * @inode - file inode
+ *
+ * Declare that a buffered read operation is about to start, and ensure
+ * that we block all direct I/O.
+ * On exit, the function ensures that the NFS_INO_ODIRECT flag is unset,
+ * and holds a shared lock on inode->i_rwsem to ensure that the flag
+ * cannot be changed.
+ * In practice, this means that buffered read operations are allowed to
+ * execute in parallel, thanks to the shared lock, whereas direct I/O
+ * operations need to wait to grab an exclusive lock in order to set
+ * NFS_INO_ODIRECT.
+ * Note that buffered writes and truncates both take a write lock on
+ * inode->i_rwsem, meaning that those are serialised w.r.t. the reads.
+ */
+void
+nfs_start_io_read(struct inode *inode)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+	/* Be an optimist! */
+	down_read(&inode->i_rwsem);
+	if (test_bit(NFS_INO_ODIRECT, &nfsi->flags) == 0)
+		return;
+	up_read(&inode->i_rwsem);
+	/* Slow path.... */
+	down_write(&inode->i_rwsem);
+	clear_bit(NFS_INO_ODIRECT, &nfsi->flags);
+	downgrade_write(&inode->i_rwsem);
+}
+
+/**
+ * nfs_end_io_read - declare that the buffered read operation is done
+ * @inode - file inode
+ *
+ * Declare that a buffered read operation is done, and release the shared
+ * lock on inode->i_rwsem.
+ */
+void
+nfs_end_io_read(struct inode *inode)
+{
+	up_read(&inode->i_rwsem);
+}
+
+/**
+ * nfs_start_io_write - declare the file is being used for buffered writes
+ * @inode - file inode
+ *
+ * Declare that a buffered read operation is about to start, and ensure
+ * that we block all direct I/O.
+ */
+void
+nfs_start_io_write(struct inode *inode)
+{
+	down_write(&inode->i_rwsem);
+}
+
+/**
+ * nfs_end_io_write - declare that the buffered write operation is done
+ * @inode - file inode
+ *
+ * Declare that a buffered write operation is done, and release the
+ * lock on inode->i_rwsem.
+ */
+void
+nfs_end_io_write(struct inode *inode)
+{
+	up_write(&inode->i_rwsem);
+}
+
+/**
+ * nfs_end_io_direct - declare the file is being used for direct i/o
+ * @inode - file inode
+ *
+ * Declare that a direct I/O operation is about to start, and ensure
+ * that we block all buffered I/O.
+ * On exit, the function ensures that the NFS_INO_ODIRECT flag is set,
+ * and holds a shared lock on inode->i_rwsem to ensure that the flag
+ * cannot be changed.
+ * In practice, this means that direct I/O operations are allowed to
+ * execute in parallel, thanks to the shared lock, whereas buffered I/O
+ * operations need to wait to grab an exclusive lock in order to clear
+ * NFS_INO_ODIRECT.
+ * Note that buffered writes and truncates both take a write lock on
+ * inode->i_rwsem, meaning that those are serialised w.r.t. O_DIRECT.
+ */
+void
+nfs_start_io_direct(struct inode *inode)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+	/* Be an optimist! */
+	down_read(&inode->i_rwsem);
+	if (test_bit(NFS_INO_ODIRECT, &nfsi->flags) != 0)
+		return;
+	up_read(&inode->i_rwsem);
+	/* Slow path.... */
+	down_write(&inode->i_rwsem);
+	set_bit(NFS_INO_ODIRECT, &nfsi->flags);
+	downgrade_write(&inode->i_rwsem);
+}
+
+/**
+ * nfs_end_io_direct - declare that the direct i/o operation is done
+ * @inode - file inode
+ *
+ * Declare that a direct I/O operation is done, and release the shared
+ * lock on inode->i_rwsem.
+ */
+void
+nfs_end_io_direct(struct inode *inode)
+{
+	up_read(&inode->i_rwsem);
+}
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 120dd04b553c..225d17d35277 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -210,6 +210,7 @@ struct nfs_inode {
 #define NFS_INO_LAYOUTCOMMIT	(9)		/* layoutcommit required */
 #define NFS_INO_LAYOUTCOMMITTING (10)		/* layoutcommit inflight */
 #define NFS_INO_LAYOUTSTATS	(11)		/* layoutstats inflight */
+#define NFS_INO_ODIRECT		(12)		/* I/O setting is O_DIRECT */
 
 static inline struct nfs_inode *NFS_I(const struct inode *inode)
 {
-- 
2.5.5


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH v2 11/12] NFS: Remove inode->i_dio_count from the NFS O_DIRECT code
  2016-06-21 21:34                 ` [PATCH v2 10/12] NFS: Do not serialise O_DIRECT reads and writes Trond Myklebust
@ 2016-06-21 21:34                   ` Trond Myklebust
  2016-06-21 21:34                     ` [PATCH v2 12/12] NFS: Clean up nfs_direct_complete() Trond Myklebust
                                       ` (2 more replies)
  2016-06-22 16:47                   ` [PATCH v2 10/12] NFS: Do not serialise O_DIRECT reads and writes Christoph Hellwig
  1 sibling, 3 replies; 29+ messages in thread
From: Trond Myklebust @ 2016-06-21 21:34 UTC (permalink / raw)
  To: linux-nfs

Now that we can serialise O_DIRECT and write/truncate using the
inode->i_rwsem, we no longer need inode->i_dio_count.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/direct.c | 12 +++---------
 fs/nfs/file.c   |  2 --
 fs/nfs/inode.c  |  1 -
 3 files changed, 3 insertions(+), 12 deletions(-)

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 2333e9973802..92267316290a 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -385,11 +385,6 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq, bool write)
 		spin_unlock(&inode->i_lock);
 	}
 
-	if (write)
-		nfs_zap_mapping(inode, inode->i_mapping);
-
-	inode_dio_end(inode);
-
 	if (dreq->iocb) {
 		long res = (long) dreq->error;
 		if (!res)
@@ -488,7 +483,6 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
 			     &nfs_direct_read_completion_ops);
 	get_dreq(dreq);
 	desc.pg_dreq = dreq;
-	inode_dio_begin(inode);
 
 	while (iov_iter_count(iter)) {
 		struct page **pagevec;
@@ -540,7 +534,6 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
 	 * generic layer handle the completion.
 	 */
 	if (requested_bytes == 0) {
-		inode_dio_end(inode);
 		nfs_direct_req_release(dreq);
 		return result < 0 ? result : -EIO;
 	}
@@ -770,6 +763,7 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
 static void nfs_direct_write_schedule_work(struct work_struct *work)
 {
 	struct nfs_direct_req *dreq = container_of(work, struct nfs_direct_req, work);
+	struct inode *inode = dreq->inode;
 	int flags = dreq->flags;
 
 	dreq->flags = 0;
@@ -781,6 +775,8 @@ static void nfs_direct_write_schedule_work(struct work_struct *work)
 			nfs_direct_write_reschedule(dreq);
 			break;
 		default:
+			nfs_zap_mapping(inode, inode->i_mapping);
+
 			nfs_direct_complete(dreq, true);
 	}
 }
@@ -903,7 +899,6 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
 			      &nfs_direct_write_completion_ops);
 	desc.pg_dreq = dreq;
 	get_dreq(dreq);
-	inode_dio_begin(inode);
 
 	NFS_I(inode)->write_io += iov_iter_count(iter);
 	while (iov_iter_count(iter)) {
@@ -964,7 +959,6 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
 	 * generic layer handle the completion.
 	 */
 	if (requested_bytes == 0) {
-		inode_dio_end(inode);
 		nfs_direct_req_release(dreq);
 		return result < 0 ? result : -EIO;
 	}
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index b1dd7f2c64f4..fb7a1b0b6a20 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -276,7 +276,6 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 
 	trace_nfs_fsync_enter(inode);
 
-	inode_dio_wait(inode);
 	do {
 		ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
 		if (ret != 0)
@@ -364,7 +363,6 @@ start:
 	/*
 	 * Wait for O_DIRECT to complete
 	 */
-	inode_dio_wait(mapping->host);
 
 	page = grab_cache_page_write_begin(mapping, index, flags);
 	if (!page)
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index bd0390da3344..6bc65ffc3c6c 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -141,7 +141,6 @@ void nfs_evict_inode(struct inode *inode)
 
 int nfs_sync_inode(struct inode *inode)
 {
-	inode_dio_wait(inode);
 	return nfs_wb_all(inode);
 }
 EXPORT_SYMBOL_GPL(nfs_sync_inode);
-- 
2.5.5


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH v2 12/12] NFS: Clean up nfs_direct_complete()
  2016-06-21 21:34                   ` [PATCH v2 11/12] NFS: Remove inode->i_dio_count from the NFS O_DIRECT code Trond Myklebust
@ 2016-06-21 21:34                     ` Trond Myklebust
  2016-06-22 16:43                       ` Christoph Hellwig
  2016-06-22 16:42                     ` [PATCH v2 11/12] NFS: Remove inode->i_dio_count from the NFS O_DIRECT code Christoph Hellwig
  2016-06-22 17:58                     ` Anna Schumaker
  2 siblings, 1 reply; 29+ messages in thread
From: Trond Myklebust @ 2016-06-21 21:34 UTC (permalink / raw)
  To: linux-nfs

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/direct.c | 27 ++++++++++++---------------
 1 file changed, 12 insertions(+), 15 deletions(-)

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 92267316290a..598585eac14b 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -372,19 +372,8 @@ out:
  * Synchronous I/O uses a stack-allocated iocb.  Thus we can't trust
  * the iocb is still valid here if this is a synchronous request.
  */
-static void nfs_direct_complete(struct nfs_direct_req *dreq, bool write)
+static void nfs_direct_complete(struct nfs_direct_req *dreq)
 {
-	struct inode *inode = dreq->inode;
-
-	if (dreq->iocb && write) {
-		loff_t pos = dreq->iocb->ki_pos + dreq->count;
-
-		spin_lock(&inode->i_lock);
-		if (i_size_read(inode) < pos)
-			i_size_write(inode, pos);
-		spin_unlock(&inode->i_lock);
-	}
-
 	if (dreq->iocb) {
 		long res = (long) dreq->error;
 		if (!res)
@@ -435,7 +424,7 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr)
 	}
 out_put:
 	if (put_dreq(dreq))
-		nfs_direct_complete(dreq, false);
+		nfs_direct_complete(dreq);
 	hdr->release(hdr);
 }
 
@@ -539,7 +528,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
 	}
 
 	if (put_dreq(dreq))
-		nfs_direct_complete(dreq, false);
+		nfs_direct_complete(dreq);
 	return 0;
 }
 
@@ -775,9 +764,17 @@ static void nfs_direct_write_schedule_work(struct work_struct *work)
 			nfs_direct_write_reschedule(dreq);
 			break;
 		default:
+			if (dreq->iocb) {
+				loff_t pos = dreq->iocb->ki_pos + dreq->count;
+				spin_lock(&inode->i_lock);
+				if (i_size_read(inode) < pos)
+					i_size_write(inode, pos);
+				spin_unlock(&inode->i_lock);
+			}
+
 			nfs_zap_mapping(inode, inode->i_mapping);
 
-			nfs_direct_complete(dreq, true);
+			nfs_direct_complete(dreq);
 	}
 }
 
-- 
2.5.5


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 03/12] NFS: Cache aggressively when file is open for writing
  2016-06-21 21:34   ` [PATCH v2 03/12] NFS: Cache aggressively when file is open for writing Trond Myklebust
  2016-06-21 21:34     ` [PATCH v2 04/12] NFS: Kill NFS_INO_NFS_INO_FLUSHING: it is a performance killer Trond Myklebust
@ 2016-06-21 22:25     ` Oleg Drokin
  2016-06-22 13:06       ` Trond Myklebust
  1 sibling, 1 reply; 29+ messages in thread
From: Oleg Drokin @ 2016-06-21 22:25 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: linux-nfs

This patch still crashes for me.

[  183.814855] BUG: unable to handle kernel paging request at ffff88001b18cfe8
[  183.815403] IP: [<ffffffff81381b63>] nfs_update_inode+0x53/0xa30
[  183.815868] PGD 3580067 PUD 3581067 PMD 77f69067 PTE 800000001b18c060
[  183.816640] Oops: 0000 [#1] SMP DEBUG_PAGEALLOC
[  183.817252] Modules linked in: loop rpcsec_gss_krb5 joydev acpi_cpufreq tpm_tis virtio_console tpm i2c_piix4 pcspkr nfsd ttm drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops virtio_blk serio_raw drm floppy
[  183.820203] CPU: 6 PID: 40935 Comm: rm Not tainted 4.7.0-rc3-vm-nfst+ #6
[  183.820818] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
[  183.821329] task: ffff88001b430c80 ti: ffff88001cf2c000 task.ti: ffff88001cf2c000
[  183.822223] RIP: 0010:[<ffffffff81381b63>]  [<ffffffff81381b63>] nfs_update_inode+0x53/0xa30
[  183.823007] RSP: 0018:ffff88001cf2fb08  EFLAGS: 00010283
[  183.823417] RAX: ffff88001b18d000 RBX: ffff880021b582a8 RCX: 0000000000000000
[  183.823845] RDX: ffff88001b18d000 RSI: ffff88001c3cdf00 RDI: ffff880021b582a8
[  183.824276] RBP: ffff88001cf2fb58 R08: 0000000000000000 R09: 0000000000000001
[  183.824701] R10: ffff88001b430c80 R11: 00000000fffe3818 R12: ffff88001c3cdf00
[  183.825190] R13: ffff88001c3cdf00 R14: ffff88001c3cdf00 R15: 0000000000000001
[  183.825651] FS:  00007fa0e9c5b700(0000) GS:ffff880075200000(0000) knlGS:0000000000000000
[  183.826396] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  183.826884] CR2: ffff88001b18cfe8 CR3: 000000001bc1f000 CR4: 00000000000006e0
[  183.827460] Stack:
[  183.827825]  ffff880000000000 ffff88001cf2fb58 0000000000000246 ffffffff81382f33
[  183.828734]  0000000000000246 ffff880021b582a8 ffff880021b582a8 ffff88001c3cdf00
[  183.829634]  ffff88001c3cdf00 0000000000000001 ffff88001cf2fb80 ffffffff81382aa1
[  183.830562] Call Trace:
[  183.830938]  [<ffffffff81382f33>] ? nfs_refresh_inode.part.24+0x23/0x50
[  183.831368]  [<ffffffff81382aa1>] nfs_refresh_inode_locked+0x71/0x4e0
[  183.831785]  [<ffffffff81382f3e>] nfs_refresh_inode.part.24+0x2e/0x50
[  183.832209]  [<ffffffff81384024>] __nfs_revalidate_inode+0x314/0x4b0
[  183.832632]  [<ffffffff8137c6fe>] nfs_do_access+0x47e/0x620
[  183.833036]  [<ffffffff8137c2d1>] ? nfs_do_access+0x51/0x620
[  183.833456]  [<ffffffff818507ea>] ? generic_lookup_cred+0x1a/0x20
[  183.833872]  [<ffffffff8184ef7e>] ? rpcauth_lookupcred+0x8e/0xd0
[  183.834288]  [<ffffffff8137cb69>] nfs_permission+0x289/0x2e0
[  183.834699]  [<ffffffff8137c943>] ? nfs_permission+0x63/0x2e0
[  183.835108]  [<ffffffff812768da>] __inode_permission+0x6a/0xb0
[  183.835521]  [<ffffffff81276934>] inode_permission+0x14/0x50
[  183.835928]  [<ffffffff8127b2cf>] link_path_walk+0x2ef/0x6b0
[  183.837327]  [<ffffffff8127b6a9>] ? path_parentat+0x19/0x80
[  183.837988]  [<ffffffff8127b6be>] path_parentat+0x2e/0x80
[  183.838653]  [<ffffffff8127cb13>] filename_parentat+0xb3/0x190
[  183.839328]  [<ffffffff8123bc4c>] ? cache_alloc_debugcheck_after.isra.52+0x19c/0x1f0
[  183.840533]  [<ffffffff8123e0a0>] ? kmem_cache_alloc+0x300/0x3d0
[  183.841217]  [<ffffffff8127c89f>] ? getname_flags+0x4f/0x1f0
[  183.841888]  [<ffffffff810e6d0d>] ? trace_hardirqs_on+0xd/0x10
[  183.842570]  [<ffffffff8127d455>] do_unlinkat+0x65/0x2f0
[  183.843243]  [<ffffffff8100201a>] ? trace_hardirqs_on_thunk+0x1a/0x1c
[  183.843910]  [<ffffffff8127e17b>] SyS_unlinkat+0x1b/0x30
[  183.844582]  [<ffffffff8188aebc>] entry_SYSCALL_64_fastpath+0x1f/0xbd

(gdb) l *(nfs_update_inode+0x53)
0xffffffff81381b63 is in nfs_update_inode (/home/green/bk/linux-test/fs/nfs/inode.c:1233).
1228		 *       being placed at the head of the list.
1229		 *       See nfs_inode_attach_open_context()
1230		 */
1231		return (list_first_entry(&nfsi->open_files,
1232				struct nfs_open_context,
1233				list)->mode & FMODE_WRITE) == FMODE_WRITE;
1234	}
1235	
1236	static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1237	{

Reverting it makes the crash go away.

But there's also this other problem that I have not tracked to a particular patch yet:

[   25.830288] =============================================
[   25.830468] [ INFO: possible recursive locking detected ]
[   25.830658] 4.7.0-rc3-vm-nfst+ #8 Not tainted
[   25.830822] ---------------------------------------------
[   25.831009] cat/6588 is trying to acquire lock:
[   25.831179]  (&sb->s_type->i_mutex_key#13){++++++}, at: [<ffffffff813841f4>] __nfs_revalidate_mapping+0x124/0x3e0
[   25.832488] 
               but task is already holding lock:
[   25.833214]  (&sb->s_type->i_mutex_key#13){++++++}, at: [<ffffffff81388b7e>] nfs_start_io_read+0x1e/0x50
[   25.834209] 
               other info that might help us debug this:
[   25.834970]  Possible unsafe locking scenario:

[   25.835699]        CPU0
[   25.836077]        ----
[   25.836459]   lock(&sb->s_type->i_mutex_key#13);
[   25.836988]   lock(&sb->s_type->i_mutex_key#13);
[   25.837636] 
                *** DEADLOCK ***

[   25.838749]  May be due to missing lock nesting notation

[   25.839489] 1 lock held by cat/6588:
[   25.839878]  #0:  (&sb->s_type->i_mutex_key#13){++++++}, at: [<ffffffff81388b7e>] nfs_start_io_read+0x1e/0x50
[   25.840931] 
               stack backtrace:
[   25.841645] CPU: 0 PID: 6588 Comm: cat Not tainted 4.7.0-rc3-vm-nfst+ #8
[   25.842074] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
[   25.842497]  0000000000000086 00000000d8302202 ffff8800908c7c50 ffffffff814a0835
[   25.843418]  ffffffff82c82ac0 ffff880091890c80 ffff8800908c7d20 ffffffff810e7786
[   25.844354]  ffff8800908c7d28 ffffffff00000001 0000000000000000 ffffffff8257be00
[   25.845271] Call Trace:
[   25.845653]  [<ffffffff814a0835>] dump_stack+0x86/0xc1
[   25.846068]  [<ffffffff810e7786>] __lock_acquire+0x726/0x1210
[   25.846519]  [<ffffffff8104a425>] ? kvm_sched_clock_read+0x25/0x40
[   25.846941]  [<ffffffff8101f6f9>] ? sched_clock+0x9/0x10
[   25.847358]  [<ffffffff813841bc>] ? __nfs_revalidate_mapping+0xec/0x3e0
[   25.847787]  [<ffffffff8104a425>] ? kvm_sched_clock_read+0x25/0x40
[   25.848210]  [<ffffffff8101f6f9>] ? sched_clock+0x9/0x10
[   25.848630]  [<ffffffff810e86de>] lock_acquire+0xfe/0x1f0
[   25.849046]  [<ffffffff813841f4>] ? __nfs_revalidate_mapping+0x124/0x3e0
[   25.849503]  [<ffffffff818884aa>] down_write+0x5a/0xc0
[   25.849919]  [<ffffffff813841f4>] ? __nfs_revalidate_mapping+0x124/0x3e0
[   25.850355]  [<ffffffff813841f4>] __nfs_revalidate_mapping+0x124/0x3e0
[   25.850781]  [<ffffffff81384b53>] nfs_revalidate_mapping_protected+0x13/0x20
[   25.851214]  [<ffffffff8137f587>] nfs_file_read+0x47/0xc0
[   25.851636]  [<ffffffff8126a3f5>] __vfs_read+0xe5/0x150
[   25.852051]  [<ffffffff8126b569>] vfs_read+0x99/0x140
[   25.852485]  [<ffffffff8126cab8>] SyS_read+0x58/0xc0
[   25.852892]  [<ffffffff8188adbc>] entry_SYSCALL_64_fastpath+0x1f/0xbd

On Jun 21, 2016, at 5:34 PM, Trond Myklebust wrote:

> Unless the user is using file locking, we must assume close-to-open
> cache consistency when the file is open for writing. Adjust the
> caching algorithm so that it does not clear the cache on out-of-order
> writes and/or attribute revalidations.
> 
> Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
> ---
> fs/nfs/file.c  | 13 ++-----------
> fs/nfs/inode.c | 58 ++++++++++++++++++++++++++++++++++++++++------------------
> 2 files changed, 42 insertions(+), 29 deletions(-)
> 
> diff --git a/fs/nfs/file.c b/fs/nfs/file.c
> index 717a8d6af52d..2d39d9f9da7d 100644
> --- a/fs/nfs/file.c
> +++ b/fs/nfs/file.c
> @@ -780,11 +780,6 @@ do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
> }
> 
> static int
> -is_time_granular(struct timespec *ts) {
> -	return ((ts->tv_sec == 0) && (ts->tv_nsec <= 1000));
> -}
> -
> -static int
> do_setlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
> {
> 	struct inode *inode = filp->f_mapping->host;
> @@ -817,12 +812,8 @@ do_setlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
> 	 * This makes locking act as a cache coherency point.
> 	 */
> 	nfs_sync_mapping(filp->f_mapping);
> -	if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) {
> -		if (is_time_granular(&NFS_SERVER(inode)->time_delta))
> -			__nfs_revalidate_inode(NFS_SERVER(inode), inode);
> -		else
> -			nfs_zap_caches(inode);
> -	}
> +	if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ))
> +		nfs_zap_mapping(inode, filp->f_mapping);
> out:
> 	return status;
> }
> diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
> index 60051e62d3f1..bd0390da3344 100644
> --- a/fs/nfs/inode.c
> +++ b/fs/nfs/inode.c
> @@ -878,7 +878,10 @@ void nfs_inode_attach_open_context(struct nfs_open_context *ctx)
> 	struct nfs_inode *nfsi = NFS_I(inode);
> 
> 	spin_lock(&inode->i_lock);
> -	list_add(&ctx->list, &nfsi->open_files);
> +	if (ctx->mode & FMODE_WRITE)
> +		list_add(&ctx->list, &nfsi->open_files);
> +	else
> +		list_add_tail(&ctx->list, &nfsi->open_files);
> 	spin_unlock(&inode->i_lock);
> }
> EXPORT_SYMBOL_GPL(nfs_inode_attach_open_context);
> @@ -1215,6 +1218,21 @@ int nfs_revalidate_mapping_protected(struct inode *inode, struct address_space *
> 	return __nfs_revalidate_mapping(inode, mapping, true);
> }
> 
> +static bool nfs_file_has_writers(struct nfs_inode *nfsi)
> +{
> +	assert_spin_locked(&nfsi->vfs_inode.i_lock);
> +
> +	if (list_empty(&nfsi->open_files))
> +		return false;
> +	/* Note: This relies on nfsi->open_files being ordered with writers
> +	 *       being placed at the head of the list.
> +	 *       See nfs_inode_attach_open_context()
> +	 */
> +	return (list_first_entry(&nfsi->open_files,
> +			struct nfs_open_context,
> +			list)->mode & FMODE_WRITE) == FMODE_WRITE;
> +}
> +
> static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr)
> {
> 	struct nfs_inode *nfsi = NFS_I(inode);
> @@ -1279,22 +1297,24 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
> 	if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT))
> 		return -EIO;
> 
> -	if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 &&
> -			inode->i_version != fattr->change_attr)
> -		invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
> +	if (!nfs_file_has_writers(nfsi)) {
> +		/* Verify a few of the more important attributes */
> +		if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 && inode->i_version != fattr->change_attr)
> +			invalid |= NFS_INO_INVALID_ATTR | NFS_INO_REVAL_PAGECACHE;
> 
> -	/* Verify a few of the more important attributes */
> -	if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec_equal(&inode->i_mtime, &fattr->mtime))
> -		invalid |= NFS_INO_INVALID_ATTR;
> +		if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec_equal(&inode->i_mtime, &fattr->mtime))
> +			invalid |= NFS_INO_INVALID_ATTR;
> 
> -	if (fattr->valid & NFS_ATTR_FATTR_SIZE) {
> -		cur_size = i_size_read(inode);
> -		new_isize = nfs_size_to_loff_t(fattr->size);
> -		if (cur_size != new_isize)
> -			invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
> +		if ((fattr->valid & NFS_ATTR_FATTR_CTIME) && !timespec_equal(&inode->i_ctime, &fattr->ctime))
> +			invalid |= NFS_INO_INVALID_ATTR;
> +
> +		if (fattr->valid & NFS_ATTR_FATTR_SIZE) {
> +			cur_size = i_size_read(inode);
> +			new_isize = nfs_size_to_loff_t(fattr->size);
> +			if (cur_size != new_isize)
> +				invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
> +		}
> 	}
> -	if (nfsi->nrequests != 0)
> -		invalid &= ~NFS_INO_REVAL_PAGECACHE;
> 
> 	/* Have any file permissions changed? */
> 	if ((fattr->valid & NFS_ATTR_FATTR_MODE) && (inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO))
> @@ -1526,7 +1546,7 @@ EXPORT_SYMBOL_GPL(nfs_refresh_inode);
> 
> static int nfs_post_op_update_inode_locked(struct inode *inode, struct nfs_fattr *fattr)
> {
> -	unsigned long invalid = NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
> +	unsigned long invalid = NFS_INO_INVALID_ATTR;
> 
> 	/*
> 	 * Don't revalidate the pagecache if we hold a delegation, but do
> @@ -1675,6 +1695,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
> 	unsigned long invalid = 0;
> 	unsigned long now = jiffies;
> 	unsigned long save_cache_validity;
> +	bool have_writers = nfs_file_has_writers(nfsi);
> 	bool cache_revalidated = true;
> 
> 	dfprintk(VFS, "NFS: %s(%s/%lu fh_crc=0x%08x ct=%d info=0x%x)\n",
> @@ -1730,7 +1751,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
> 			dprintk("NFS: change_attr change on server for file %s/%ld\n",
> 					inode->i_sb->s_id, inode->i_ino);
> 			/* Could it be a race with writeback? */
> -			if (nfsi->nrequests == 0) {
> +			if (!have_writers) {
> 				invalid |= NFS_INO_INVALID_ATTR
> 					| NFS_INO_INVALID_DATA
> 					| NFS_INO_INVALID_ACCESS
> @@ -1770,9 +1791,10 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
> 		if (new_isize != cur_isize) {
> 			/* Do we perhaps have any outstanding writes, or has
> 			 * the file grown beyond our last write? */
> -			if ((nfsi->nrequests == 0) || new_isize > cur_isize) {
> +			if (nfsi->nrequests == 0 || new_isize > cur_isize) {
> 				i_size_write(inode, new_isize);
> -				invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
> +				if (!have_writers)
> +					invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
> 			}
> 			dprintk("NFS: isize change on server for file %s/%ld "
> 					"(%Ld to %Ld)\n",
> -- 
> 2.5.5
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 03/12] NFS: Cache aggressively when file is open for writing
  2016-06-21 22:25     ` [PATCH v2 03/12] NFS: Cache aggressively when file is open for writing Oleg Drokin
@ 2016-06-22 13:06       ` Trond Myklebust
  2016-06-22 16:19         ` Oleg Drokin
  0 siblings, 1 reply; 29+ messages in thread
From: Trond Myklebust @ 2016-06-22 13:06 UTC (permalink / raw)
  To: green; +Cc: linux-nfs

[-- Attachment #1: Type: text/plain, Size: 6053 bytes --]

Hi Oleg,

Thanks for testing!

On Tue, 2016-06-21 at 18:25 -0400, Oleg Drokin wrote:
> This patch still crashes for me.
> 
> [  183.814855] BUG: unable to handle kernel paging request at
> ffff88001b18cfe8
> [  183.815403] IP: [<ffffffff81381b63>] nfs_update_inode+0x53/0xa30
> [  183.815868] PGD 3580067 PUD 3581067 PMD 77f69067 PTE
> 800000001b18c060
> [  183.816640] Oops: 0000 [#1] SMP DEBUG_PAGEALLOC
> [  183.817252] Modules linked in: loop rpcsec_gss_krb5 joydev
> acpi_cpufreq tpm_tis virtio_console tpm i2c_piix4 pcspkr nfsd ttm
> drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops
> virtio_blk serio_raw drm floppy
> [  183.820203] CPU: 6 PID: 40935 Comm: rm Not tainted 4.7.0-rc3-vm-
> nfst+ #6
> [  183.820818] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
> [  183.821329] task: ffff88001b430c80 ti: ffff88001cf2c000 task.ti:
> ffff88001cf2c000
> [  183.822223] RIP: 0010:[<ffffffff81381b63>]  [<ffffffff81381b63>]
> nfs_update_inode+0x53/0xa30
> [  183.823007] RSP: 0018:ffff88001cf2fb08  EFLAGS: 00010283
> [  183.823417] RAX: ffff88001b18d000 RBX: ffff880021b582a8 RCX:
> 0000000000000000
> [  183.823845] RDX: ffff88001b18d000 RSI: ffff88001c3cdf00 RDI:
> ffff880021b582a8
> [  183.824276] RBP: ffff88001cf2fb58 R08: 0000000000000000 R09:
> 0000000000000001
> [  183.824701] R10: ffff88001b430c80 R11: 00000000fffe3818 R12:
> ffff88001c3cdf00
> [  183.825190] R13: ffff88001c3cdf00 R14: ffff88001c3cdf00 R15:
> 0000000000000001
> [  183.825651] FS:  00007fa0e9c5b700(0000) GS:ffff880075200000(0000)
> knlGS:0000000000000000
> [  183.826396] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [  183.826884] CR2: ffff88001b18cfe8 CR3: 000000001bc1f000 CR4:
> 00000000000006e0
> [  183.827460] Stack:
> [  183.827825]  ffff880000000000 ffff88001cf2fb58 0000000000000246
> ffffffff81382f33
> [  183.828734]  0000000000000246 ffff880021b582a8 ffff880021b582a8
> ffff88001c3cdf00
> [  183.829634]  ffff88001c3cdf00 0000000000000001 ffff88001cf2fb80
> ffffffff81382aa1
> [  183.830562] Call Trace:
> [  183.830938]  [<ffffffff81382f33>] ?
> nfs_refresh_inode.part.24+0x23/0x50
> [  183.831368]  [<ffffffff81382aa1>]
> nfs_refresh_inode_locked+0x71/0x4e0
> [  183.831785]  [<ffffffff81382f3e>]
> nfs_refresh_inode.part.24+0x2e/0x50
> [  183.832209]  [<ffffffff81384024>]
> __nfs_revalidate_inode+0x314/0x4b0
> [  183.832632]  [<ffffffff8137c6fe>] nfs_do_access+0x47e/0x620
> [  183.833036]  [<ffffffff8137c2d1>] ? nfs_do_access+0x51/0x620
> [  183.833456]  [<ffffffff818507ea>] ? generic_lookup_cred+0x1a/0x20
> [  183.833872]  [<ffffffff8184ef7e>] ? rpcauth_lookupcred+0x8e/0xd0
> [  183.834288]  [<ffffffff8137cb69>] nfs_permission+0x289/0x2e0
> [  183.834699]  [<ffffffff8137c943>] ? nfs_permission+0x63/0x2e0
> [  183.835108]  [<ffffffff812768da>] __inode_permission+0x6a/0xb0
> [  183.835521]  [<ffffffff81276934>] inode_permission+0x14/0x50
> [  183.835928]  [<ffffffff8127b2cf>] link_path_walk+0x2ef/0x6b0
> [  183.837327]  [<ffffffff8127b6a9>] ? path_parentat+0x19/0x80
> [  183.837988]  [<ffffffff8127b6be>] path_parentat+0x2e/0x80
> [  183.838653]  [<ffffffff8127cb13>] filename_parentat+0xb3/0x190
> [  183.839328]  [<ffffffff8123bc4c>] ?
> cache_alloc_debugcheck_after.isra.52+0x19c/0x1f0
> [  183.840533]  [<ffffffff8123e0a0>] ? kmem_cache_alloc+0x300/0x3d0
> [  183.841217]  [<ffffffff8127c89f>] ? getname_flags+0x4f/0x1f0
> [  183.841888]  [<ffffffff810e6d0d>] ? trace_hardirqs_on+0xd/0x10
> [  183.842570]  [<ffffffff8127d455>] do_unlinkat+0x65/0x2f0
> [  183.843243]  [<ffffffff8100201a>] ?
> trace_hardirqs_on_thunk+0x1a/0x1c
> [  183.843910]  [<ffffffff8127e17b>] SyS_unlinkat+0x1b/0x30
> [  183.844582]  [<ffffffff8188aebc>]
> entry_SYSCALL_64_fastpath+0x1f/0xbd
> 
> (gdb) l *(nfs_update_inode+0x53)
> 0xffffffff81381b63 is in nfs_update_inode (/home/green/bk/linux-
> test/fs/nfs/inode.c:1233).
> 1228		 *       being placed at the head of the list.
> 1229		 *       See nfs_inode_attach_open_context()
> 1230		 */
> 1231		return (list_first_entry(&nfsi->open_files,
> 1232				struct nfs_open_context,
> 1233				list)->mode & FMODE_WRITE) ==
> FMODE_WRITE;
> 1234	}
> 1235	
> 1236	static unsigned long nfs_wcc_update_inode(struct inode
> *inode, struct nfs_fattr *fattr)
> 1237	{
> 
> Reverting it makes the crash go away.
> 
> But there's also this other problem that I have not tracked to a
> particular patch yet:
> 
> [   25.830288] =============================================
> [   25.830468] [ INFO: possible recursive locking detected ]
> [   25.830658] 4.7.0-rc3-vm-nfst+ #8 Not tainted
> [   25.830822] ---------------------------------------------
> [   25.831009] cat/6588 is trying to acquire lock:
> [   25.831179]  (&sb->s_type->i_mutex_key#13){++++++}, at:
> [<ffffffff813841f4>] __nfs_revalidate_mapping+0x124/0x3e0
> [   25.832488] 
>                but task is already holding lock:
> [   25.833214]  (&sb->s_type->i_mutex_key#13){++++++}, at:
> [<ffffffff81388b7e>] nfs_start_io_read+0x1e/0x50
> [   25.834209] 
>                other info that might help us debug this:
> [   25.834970]  Possible unsafe locking scenario:
> 
> [   25.835699]        CPU0
> [   25.836077]        ----
> [   25.836459]   lock(&sb->s_type->i_mutex_key#13);
> [   25.836988]   lock(&sb->s_type->i_mutex_key#13);
> [   25.837636] 
>                 *** DEADLOCK ***
> 

The 2 attached patches should hopefully fix these 2 issues and will be
rolled into v3 together with a cleanup patch to remove the now-obsolete 
function nfs_revalidate_mapping_protected().

Cheers
  Trond
  
-- 

Trond Myklebust
Linux NFS client maintainer, PrimaryData
trond.myklebust@primarydata.com

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 0001-fixup-NFS-Cache-aggressively-when-file-is-open-for-w.patch --]
[-- Type: text/x-patch; name=0001-fixup-NFS-Cache-aggressively-when-file-is-open-for-w.patch, Size: 968 bytes --]

From df4c863d69f5c333648d8c8e077f00b2cc93aadc Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Wed, 22 Jun 2016 08:04:15 -0400
Subject: [PATCH 1/2] fixup! NFS: Cache aggressively when file is open for
 writing

---
 fs/nfs/inode.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 6bc65ffc3c6c..9521fa6154c8 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1219,8 +1219,12 @@ int nfs_revalidate_mapping_protected(struct inode *inode, struct address_space *
 
 static bool nfs_file_has_writers(struct nfs_inode *nfsi)
 {
-	assert_spin_locked(&nfsi->vfs_inode.i_lock);
+	struct inode *inode = &nfsi->vfs_inode;
 
+	assert_spin_locked(&inode->i_lock);
+
+	if (!S_ISREG(inode->i_mode))
+		return false;
 	if (list_empty(&nfsi->open_files))
 		return false;
 	/* Note: This relies on nfsi->open_files being ordered with writers
-- 
2.7.4


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #3: 0002-fixup-NFS-Do-not-serialise-O_DIRECT-reads-and-writes.patch --]
[-- Type: text/x-patch; name=0002-fixup-NFS-Do-not-serialise-O_DIRECT-reads-and-writes.patch, Size: 1259 bytes --]

From 65d27ea6cc51cb653c2674666353cf3b8d0f765f Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Wed, 22 Jun 2016 08:16:54 -0400
Subject: [PATCH 2/2] fixup! NFS: Do not serialise O_DIRECT reads and writes

---
 fs/nfs/file.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index fb7a1b0b6a20..9081afacb457 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -171,7 +171,7 @@ nfs_file_read(struct kiocb *iocb, struct iov_iter *to)
 		iov_iter_count(to), (unsigned long) iocb->ki_pos);
 
 	nfs_start_io_read(inode);
-	result = nfs_revalidate_mapping_protected(inode, iocb->ki_filp->f_mapping);
+	result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping);
 	if (!result) {
 		result = generic_file_read_iter(iocb, to);
 		if (result > 0)
@@ -194,7 +194,7 @@ nfs_file_splice_read(struct file *filp, loff_t *ppos,
 		filp, (unsigned long) count, (unsigned long long) *ppos);
 
 	nfs_start_io_read(inode);
-	res = nfs_revalidate_mapping_protected(inode, filp->f_mapping);
+	res = nfs_revalidate_mapping(inode, filp->f_mapping);
 	if (!res) {
 		res = generic_file_splice_read(filp, ppos, pipe, count, flags);
 		if (res > 0)
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 01/12] NFS: Don't flush caches for a getattr that races with writeback
  2016-06-21 21:34 [PATCH v2 01/12] NFS: Don't flush caches for a getattr that races with writeback Trond Myklebust
  2016-06-21 21:34 ` [PATCH v2 02/12] NFS: Cache access checks more aggressively Trond Myklebust
@ 2016-06-22 15:48 ` Christoph Hellwig
  1 sibling, 0 replies; 29+ messages in thread
From: Christoph Hellwig @ 2016-06-22 15:48 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: linux-nfs

This series (or rather your testing branch, which should have the fixups
later in the thread already folded in) causes various xfstests
regressions:

generic/010 generic/020 generic/037 generic/062
generic/100 generic/247 generic/249 generic/337

They seems to be a mix of xattrs, direct I/O consistency and maybe
a few other issues.

This was a auto group run against a Linux server running xfs using
NFSv4.0.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 03/12] NFS: Cache aggressively when file is open for writing
  2016-06-22 13:06       ` Trond Myklebust
@ 2016-06-22 16:19         ` Oleg Drokin
  0 siblings, 0 replies; 29+ messages in thread
From: Oleg Drokin @ 2016-06-22 16:19 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: linux-nfs


On Jun 22, 2016, at 9:06 AM, Trond Myklebust wrote:

> Hi Oleg,
> 
> Thanks for testing!
> 
> On Tue, 2016-06-21 at 18:25 -0400, Oleg Drokin wrote:
>> This patch still crashes for me.
>> 
> 
> The 2 attached patches should hopefully fix these 2 issues and will be
> rolled into v3 together with a cleanup patch to remove the now-obsolete 
> function nfs_revalidate_mapping_protected().

Yes, these two seems to be solving the issues.

Thanks.


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 11/12] NFS: Remove inode->i_dio_count from the NFS O_DIRECT code
  2016-06-21 21:34                   ` [PATCH v2 11/12] NFS: Remove inode->i_dio_count from the NFS O_DIRECT code Trond Myklebust
  2016-06-21 21:34                     ` [PATCH v2 12/12] NFS: Clean up nfs_direct_complete() Trond Myklebust
@ 2016-06-22 16:42                     ` Christoph Hellwig
  2016-06-22 16:58                       ` Trond Myklebust
  2016-06-22 17:58                     ` Anna Schumaker
  2 siblings, 1 reply; 29+ messages in thread
From: Christoph Hellwig @ 2016-06-22 16:42 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: linux-nfs

On Tue, Jun 21, 2016 at 05:34:52PM -0400, Trond Myklebust wrote:
> Now that we can serialise O_DIRECT and write/truncate using the
> inode->i_rwsem, we no longer need inode->i_dio_count.

For the AIO case that's not true, we can't hold i_rwsem until
aio completes.  So for something that does block allocations we'll
need something like i_dio_count still.  This probably includes the
block / scsi layout code.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 12/12] NFS: Clean up nfs_direct_complete()
  2016-06-21 21:34                     ` [PATCH v2 12/12] NFS: Clean up nfs_direct_complete() Trond Myklebust
@ 2016-06-22 16:43                       ` Christoph Hellwig
  0 siblings, 0 replies; 29+ messages in thread
From: Christoph Hellwig @ 2016-06-22 16:43 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: linux-nfs

Looks fine,

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 10/12] NFS: Do not serialise O_DIRECT reads and writes
  2016-06-21 21:34                 ` [PATCH v2 10/12] NFS: Do not serialise O_DIRECT reads and writes Trond Myklebust
  2016-06-21 21:34                   ` [PATCH v2 11/12] NFS: Remove inode->i_dio_count from the NFS O_DIRECT code Trond Myklebust
@ 2016-06-22 16:47                   ` Christoph Hellwig
  2016-06-22 17:24                     ` Trond Myklebust
  1 sibling, 1 reply; 29+ messages in thread
From: Christoph Hellwig @ 2016-06-22 16:47 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: linux-nfs

On Tue, Jun 21, 2016 at 05:34:51PM -0400, Trond Myklebust wrote:
> Allow dio requests to be scheduled in parallel, but ensuring that they
> do not conflict with buffered I/O.

Can you explain why we care about the full direct / bufferd exclusion
that no other file system seems do be doing?  I would have expected
something more like the patch below, which follows the XFS locking
model 1:1.  This passed xfstests with plain NFSv4, haven't done any pNFS
testing yet:

---

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 11/12] NFS: Remove inode->i_dio_count from the NFS O_DIRECT code
  2016-06-22 16:42                     ` [PATCH v2 11/12] NFS: Remove inode->i_dio_count from the NFS O_DIRECT code Christoph Hellwig
@ 2016-06-22 16:58                       ` Trond Myklebust
  2016-06-23 10:19                         ` Christoph Hellwig
  0 siblings, 1 reply; 29+ messages in thread
From: Trond Myklebust @ 2016-06-22 16:58 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-nfs


> On Jun 22, 2016, at 12:42, Christoph Hellwig <hch@infradead.org> wrote:
> 
> On Tue, Jun 21, 2016 at 05:34:52PM -0400, Trond Myklebust wrote:
>> Now that we can serialise O_DIRECT and write/truncate using the
>> inode->i_rwsem, we no longer need inode->i_dio_count.
> 
> For the AIO case that's not true, we can't hold i_rwsem until
> aio completes.  So for something that does block allocations we'll
> need something like i_dio_count still.  This probably includes the
> block / scsi layout code.

Why can’t we hold the i_rwsem until aio completes? It is only a read lock, so we’re not blocking the aio engine from adding more requests. As long as notify_change() holds the i_rwsem for write, then we have exclusion.

Cheers
  Trond


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 10/12] NFS: Do not serialise O_DIRECT reads and writes
  2016-06-22 16:47                   ` [PATCH v2 10/12] NFS: Do not serialise O_DIRECT reads and writes Christoph Hellwig
@ 2016-06-22 17:24                     ` Trond Myklebust
  2016-06-23 11:00                         ` Christoph Hellwig
  0 siblings, 1 reply; 29+ messages in thread
From: Trond Myklebust @ 2016-06-22 17:24 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-nfs


> On Jun 22, 2016, at 12:47, Christoph Hellwig <hch@infradead.org> wrote:
> 
> On Tue, Jun 21, 2016 at 05:34:51PM -0400, Trond Myklebust wrote:
>> Allow dio requests to be scheduled in parallel, but ensuring that they
>> do not conflict with buffered I/O.
> 
> Can you explain why we care about the full direct / bufferd exclusion
> that no other file system seems do be doing?  I would have expected
> something more like the patch below, which follows the XFS locking
> model 1:1.  This passed xfstests with plain NFSv4, haven't done any pNFS
> testing yet:


If we’re going to worry about write atomicity in the buffered I/O case, then we really should also make sure that O_DIRECT writes are atomic w.r.t. page cache updates too. With this locking model, a buffered read() can race with the O_DIRECT write() and get a mixture of old data and new.

> 
> ---
> From 913189e25fc0e7318f077610c0aa8d5c1071c0c8 Mon Sep 17 00:00:00 2001
> From: Christoph Hellwig <hch@lst.de>
> Date: Wed, 22 Jun 2016 18:00:24 +0200
> Subject: nfs: do not serialise O_DIRECT reads and writes
> 
> Currently NFS takes the inode lock exclusive for all direct I/O reads and
> writes, which forces users of direct I/O to be unessecarily synchronized.
> We only need the exclusive lock to invalidate the page cache, and could
> otherwise do with a shared lock to protect against buffered writers and
> truncate.  This also adds the shared lock to buffered reads to provide
> Posix synchronized I/O guarantees for reads after synchronous writes,
> although that change shouldn't be nessecary to allow parallel direct I/O.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
> fs/nfs/direct.c | 47 +++++++++++++++++++++++++++++++----------------
> fs/nfs/file.c   |  4 +++-
> 2 files changed, 34 insertions(+), 17 deletions(-)
> 
> diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
> index 979b3c4..8b9c7e95 100644
> --- a/fs/nfs/direct.c
> +++ b/fs/nfs/direct.c
> @@ -581,10 +581,17 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter)
> 	if (!count)
> 		goto out;
> 
> -	inode_lock(inode);
> -	result = nfs_sync_mapping(mapping);
> -	if (result)
> -		goto out_unlock;
> +	if (mapping->nrpages) {
> +		inode_lock(inode);

This is unnecessary now that we have a rw_semaphore. You don’t need to take an exclusive lock in order to serialise w.r.t. new writes, and by doing so you end up serialising all reads if there happens to be pages in the page cache. This is true whether or not those pages are dirty.

> +		result = nfs_sync_mapping(mapping);
> +		if (result) {
> +			inode_unlock(inode);
> +			goto out;
> +		}
> +		downgrade_write(&inode->i_rwsem);
> +	} else {
> +		inode_lock_shared(inode);
> +	}
> 
> 	task_io_account_read(count);
> 
> @@ -609,7 +616,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter)
> 	NFS_I(inode)->read_io += count;
> 	result = nfs_direct_read_schedule_iovec(dreq, iter, iocb->ki_pos);
> 
> -	inode_unlock(inode);
> +	inode_unlock_shared(inode);
> 
> 	if (!result) {
> 		result = nfs_direct_wait(dreq);
> @@ -623,7 +630,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter)
> out_release:
> 	nfs_direct_req_release(dreq);
> out_unlock:
> -	inode_unlock(inode);
> +	inode_unlock_shared(inode);
> out:
> 	return result;
> }
> @@ -1005,17 +1012,22 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)
> 	pos = iocb->ki_pos;
> 	end = (pos + iov_iter_count(iter) - 1) >> PAGE_SHIFT;
> 
> -	inode_lock(inode);
> -
> -	result = nfs_sync_mapping(mapping);
> -	if (result)
> -		goto out_unlock;
> -
> 	if (mapping->nrpages) {
> -		result = invalidate_inode_pages2_range(mapping,
> -					pos >> PAGE_SHIFT, end);
> +		inode_lock(inode);
> +		result = nfs_sync_mapping(mapping);
> 		if (result)
> -			goto out_unlock;
> +			goto out_unlock_exclusive;
> +
> +		if (mapping->nrpages) {
> +			result = invalidate_inode_pages2_range(mapping,
> +					pos >> PAGE_SHIFT, end);
> +			if (result)	
> +				goto out_unlock_exclusive;
> +		}
> +
> +		downgrade_write(&inode->i_rwsem);
> +	} else {
> +		inode_lock_shared(inode);
> 	}
> 
> 	task_io_account_write(iov_iter_count(iter));
> @@ -1045,7 +1057,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)
> 					      pos >> PAGE_SHIFT, end);
> 	}
> 
> -	inode_unlock(inode);
> +	inode_unlock_shared(inode);
> 
> 	if (!result) {
> 		result = nfs_direct_wait(dreq);
> @@ -1068,6 +1080,9 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)
> out_release:
> 	nfs_direct_req_release(dreq);
> out_unlock:
> +	inode_unlock_shared(inode);
> +	return result;
> +out_unlock_exclusive:
> 	inode_unlock(inode);
> 	return result;
> }
> diff --git a/fs/nfs/file.c b/fs/nfs/file.c
> index 717a8d6..719296f 100644
> --- a/fs/nfs/file.c
> +++ b/fs/nfs/file.c
> @@ -170,12 +170,14 @@ nfs_file_read(struct kiocb *iocb, struct iov_iter *to)
> 		iocb->ki_filp,
> 		iov_iter_count(to), (unsigned long) iocb->ki_pos);
> 
> -	result = nfs_revalidate_mapping_protected(inode, iocb->ki_filp->f_mapping);
> +	inode_lock_shared(inode);
> +	result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping);
> 	if (!result) {
> 		result = generic_file_read_iter(iocb, to);
> 		if (result > 0)
> 			nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, result);
> 	}
> +	inode_unlock_shared(inode);
> 	return result;
> }
> EXPORT_SYMBOL_GPL(nfs_file_read);
> -- 
> 2.1.4
> 


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 11/12] NFS: Remove inode->i_dio_count from the NFS O_DIRECT code
  2016-06-21 21:34                   ` [PATCH v2 11/12] NFS: Remove inode->i_dio_count from the NFS O_DIRECT code Trond Myklebust
  2016-06-21 21:34                     ` [PATCH v2 12/12] NFS: Clean up nfs_direct_complete() Trond Myklebust
  2016-06-22 16:42                     ` [PATCH v2 11/12] NFS: Remove inode->i_dio_count from the NFS O_DIRECT code Christoph Hellwig
@ 2016-06-22 17:58                     ` Anna Schumaker
  2016-06-22 18:06                       ` Trond Myklebust
  2 siblings, 1 reply; 29+ messages in thread
From: Anna Schumaker @ 2016-06-22 17:58 UTC (permalink / raw)
  To: Trond Myklebust, linux-nfs

Hi Trond,

On 06/21/2016 05:34 PM, Trond Myklebust wrote:
> Now that we can serialise O_DIRECT and write/truncate using the
> inode->i_rwsem, we no longer need inode->i_dio_count.

I'm seeing cthon basic tests fail on all NFS versions after applying this patch:

./test5: read and write
        ./test5: (/nfs/basic) 'bigfile' has size 8192, should be 1048576
basic tests failed

Just thought you should know :)
Anna

> 
> Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
> ---
>  fs/nfs/direct.c | 12 +++---------
>  fs/nfs/file.c   |  2 --
>  fs/nfs/inode.c  |  1 -
>  3 files changed, 3 insertions(+), 12 deletions(-)
> 
> diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
> index 2333e9973802..92267316290a 100644
> --- a/fs/nfs/direct.c
> +++ b/fs/nfs/direct.c
> @@ -385,11 +385,6 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq, bool write)
>  		spin_unlock(&inode->i_lock);
>  	}
>  
> -	if (write)
> -		nfs_zap_mapping(inode, inode->i_mapping);
> -
> -	inode_dio_end(inode);
> -
>  	if (dreq->iocb) {
>  		long res = (long) dreq->error;
>  		if (!res)
> @@ -488,7 +483,6 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
>  			     &nfs_direct_read_completion_ops);
>  	get_dreq(dreq);
>  	desc.pg_dreq = dreq;
> -	inode_dio_begin(inode);
>  
>  	while (iov_iter_count(iter)) {
>  		struct page **pagevec;
> @@ -540,7 +534,6 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
>  	 * generic layer handle the completion.
>  	 */
>  	if (requested_bytes == 0) {
> -		inode_dio_end(inode);
>  		nfs_direct_req_release(dreq);
>  		return result < 0 ? result : -EIO;
>  	}
> @@ -770,6 +763,7 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
>  static void nfs_direct_write_schedule_work(struct work_struct *work)
>  {
>  	struct nfs_direct_req *dreq = container_of(work, struct nfs_direct_req, work);
> +	struct inode *inode = dreq->inode;
>  	int flags = dreq->flags;
>  
>  	dreq->flags = 0;
> @@ -781,6 +775,8 @@ static void nfs_direct_write_schedule_work(struct work_struct *work)
>  			nfs_direct_write_reschedule(dreq);
>  			break;
>  		default:
> +			nfs_zap_mapping(inode, inode->i_mapping);
> +
>  			nfs_direct_complete(dreq, true);
>  	}
>  }
> @@ -903,7 +899,6 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
>  			      &nfs_direct_write_completion_ops);
>  	desc.pg_dreq = dreq;
>  	get_dreq(dreq);
> -	inode_dio_begin(inode);
>  
>  	NFS_I(inode)->write_io += iov_iter_count(iter);
>  	while (iov_iter_count(iter)) {
> @@ -964,7 +959,6 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
>  	 * generic layer handle the completion.
>  	 */
>  	if (requested_bytes == 0) {
> -		inode_dio_end(inode);
>  		nfs_direct_req_release(dreq);
>  		return result < 0 ? result : -EIO;
>  	}
> diff --git a/fs/nfs/file.c b/fs/nfs/file.c
> index b1dd7f2c64f4..fb7a1b0b6a20 100644
> --- a/fs/nfs/file.c
> +++ b/fs/nfs/file.c
> @@ -276,7 +276,6 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
>  
>  	trace_nfs_fsync_enter(inode);
>  
> -	inode_dio_wait(inode);
>  	do {
>  		ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
>  		if (ret != 0)
> @@ -364,7 +363,6 @@ start:
>  	/*
>  	 * Wait for O_DIRECT to complete
>  	 */
> -	inode_dio_wait(mapping->host);
>  
>  	page = grab_cache_page_write_begin(mapping, index, flags);
>  	if (!page)
> diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
> index bd0390da3344..6bc65ffc3c6c 100644
> --- a/fs/nfs/inode.c
> +++ b/fs/nfs/inode.c
> @@ -141,7 +141,6 @@ void nfs_evict_inode(struct inode *inode)
>  
>  int nfs_sync_inode(struct inode *inode)
>  {
> -	inode_dio_wait(inode);
>  	return nfs_wb_all(inode);
>  }
>  EXPORT_SYMBOL_GPL(nfs_sync_inode);
> 


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 11/12] NFS: Remove inode->i_dio_count from the NFS O_DIRECT code
  2016-06-22 17:58                     ` Anna Schumaker
@ 2016-06-22 18:06                       ` Trond Myklebust
  2016-06-22 18:08                         ` Anna Schumaker
  0 siblings, 1 reply; 29+ messages in thread
From: Trond Myklebust @ 2016-06-22 18:06 UTC (permalink / raw)
  To: Schumaker Anna; +Cc: linux-nfs

DQo+IE9uIEp1biAyMiwgMjAxNiwgYXQgMTM6NTgsIEFubmEgU2NodW1ha2VyIDxBbm5hLlNjaHVt
YWtlckBuZXRhcHAuY29tPiB3cm90ZToNCj4gDQo+IEhpIFRyb25kLA0KPiANCj4gT24gMDYvMjEv
MjAxNiAwNTozNCBQTSwgVHJvbmQgTXlrbGVidXN0IHdyb3RlOg0KPj4gTm93IHRoYXQgd2UgY2Fu
IHNlcmlhbGlzZSBPX0RJUkVDVCBhbmQgd3JpdGUvdHJ1bmNhdGUgdXNpbmcgdGhlDQo+PiBpbm9k
ZS0+aV9yd3NlbSwgd2Ugbm8gbG9uZ2VyIG5lZWQgaW5vZGUtPmlfZGlvX2NvdW50Lg0KPiANCj4g
SSdtIHNlZWluZyBjdGhvbiBiYXNpYyB0ZXN0cyBmYWlsIG9uIGFsbCBORlMgdmVyc2lvbnMgYWZ0
ZXIgYXBwbHlpbmcgdGhpcyBwYXRjaDoNCj4gDQo+IC4vdGVzdDU6IHJlYWQgYW5kIHdyaXRlDQo+
ICAgICAgICAuL3Rlc3Q1OiAoL25mcy9iYXNpYykgJ2JpZ2ZpbGUnIGhhcyBzaXplIDgxOTIsIHNo
b3VsZCBiZSAxMDQ4NTc2DQo+IGJhc2ljIHRlc3RzIGZhaWxlZA0KPiANCg0KPz8/PyBDb25uZWN0
YXRob24gZG9lc27igJl0IHVzZSBPX0RJUkVDVC4gQXJlIHlvdSBzdXJlIGl0IGlzIHRoaXMgcGF0
Y2g/DQoNCg==


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 11/12] NFS: Remove inode->i_dio_count from the NFS O_DIRECT code
  2016-06-22 18:06                       ` Trond Myklebust
@ 2016-06-22 18:08                         ` Anna Schumaker
  2016-06-22 18:51                           ` Anna Schumaker
  0 siblings, 1 reply; 29+ messages in thread
From: Anna Schumaker @ 2016-06-22 18:08 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: linux-nfs

On 06/22/2016 02:06 PM, Trond Myklebust wrote:
> 
>> On Jun 22, 2016, at 13:58, Anna Schumaker <Anna.Schumaker@netapp.com> wrote:
>>
>> Hi Trond,
>>
>> On 06/21/2016 05:34 PM, Trond Myklebust wrote:
>>> Now that we can serialise O_DIRECT and write/truncate using the
>>> inode->i_rwsem, we no longer need inode->i_dio_count.
>>
>> I'm seeing cthon basic tests fail on all NFS versions after applying this patch:
>>
>> ./test5: read and write
>>        ./test5: (/nfs/basic) 'bigfile' has size 8192, should be 1048576
>> basic tests failed
>>
> 
> ???? Connectathon doesn’t use O_DIRECT. Are you sure it is this patch?
> 

Pretty sure.  Cthon worked for me at patch #10, but it failed after adding this one.

Anna

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 11/12] NFS: Remove inode->i_dio_count from the NFS O_DIRECT code
  2016-06-22 18:08                         ` Anna Schumaker
@ 2016-06-22 18:51                           ` Anna Schumaker
  2016-06-22 19:42                             ` Trond Myklebust
  0 siblings, 1 reply; 29+ messages in thread
From: Anna Schumaker @ 2016-06-22 18:51 UTC (permalink / raw)
  To: Anna Schumaker, Trond Myklebust; +Cc: linux-nfs

On 06/22/2016 02:08 PM, Anna Schumaker wrote:
> On 06/22/2016 02:06 PM, Trond Myklebust wrote:
>>
>>> On Jun 22, 2016, at 13:58, Anna Schumaker <Anna.Schumaker@netapp.com> wrote:
>>>
>>> Hi Trond,
>>>
>>> On 06/21/2016 05:34 PM, Trond Myklebust wrote:
>>>> Now that we can serialise O_DIRECT and write/truncate using the
>>>> inode->i_rwsem, we no longer need inode->i_dio_count.
>>>
>>> I'm seeing cthon basic tests fail on all NFS versions after applying this patch:
>>>
>>> ./test5: read and write
>>>        ./test5: (/nfs/basic) 'bigfile' has size 8192, should be 1048576
>>> basic tests failed
>>>
>>
>> ???? Connectathon doesn’t use O_DIRECT. Are you sure it is this patch?
>>
> 
> Pretty sure.  Cthon worked for me at patch #10, but it failed after adding this one.

I retried the tests, and it's actually patch #10 where I start having problems.  Sorry for the confusion!

Anna

> 
> Anna
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 11/12] NFS: Remove inode->i_dio_count from the NFS O_DIRECT code
  2016-06-22 18:51                           ` Anna Schumaker
@ 2016-06-22 19:42                             ` Trond Myklebust
  0 siblings, 0 replies; 29+ messages in thread
From: Trond Myklebust @ 2016-06-22 19:42 UTC (permalink / raw)
  To: Schumaker Anna; +Cc: linux-nfs

DQo+IE9uIEp1biAyMiwgMjAxNiwgYXQgMTQ6NTEsIEFubmEgU2NodW1ha2VyIDxBbm5hLlNjaHVt
YWtlckBuZXRhcHAuY29tPiB3cm90ZToNCj4gDQo+IE9uIDA2LzIyLzIwMTYgMDI6MDggUE0sIEFu
bmEgU2NodW1ha2VyIHdyb3RlOg0KPj4gT24gMDYvMjIvMjAxNiAwMjowNiBQTSwgVHJvbmQgTXlr
bGVidXN0IHdyb3RlOg0KPj4+IA0KPj4+PiBPbiBKdW4gMjIsIDIwMTYsIGF0IDEzOjU4LCBBbm5h
IFNjaHVtYWtlciA8QW5uYS5TY2h1bWFrZXJAbmV0YXBwLmNvbT4gd3JvdGU6DQo+Pj4+IA0KPj4+
PiBIaSBUcm9uZCwNCj4+Pj4gDQo+Pj4+IE9uIDA2LzIxLzIwMTYgMDU6MzQgUE0sIFRyb25kIE15
a2xlYnVzdCB3cm90ZToNCj4+Pj4+IE5vdyB0aGF0IHdlIGNhbiBzZXJpYWxpc2UgT19ESVJFQ1Qg
YW5kIHdyaXRlL3RydW5jYXRlIHVzaW5nIHRoZQ0KPj4+Pj4gaW5vZGUtPmlfcndzZW0sIHdlIG5v
IGxvbmdlciBuZWVkIGlub2RlLT5pX2Rpb19jb3VudC4NCj4+Pj4gDQo+Pj4+IEknbSBzZWVpbmcg
Y3Rob24gYmFzaWMgdGVzdHMgZmFpbCBvbiBhbGwgTkZTIHZlcnNpb25zIGFmdGVyIGFwcGx5aW5n
IHRoaXMgcGF0Y2g6DQo+Pj4+IA0KPj4+PiAuL3Rlc3Q1OiByZWFkIGFuZCB3cml0ZQ0KPj4+PiAg
ICAgICAuL3Rlc3Q1OiAoL25mcy9iYXNpYykgJ2JpZ2ZpbGUnIGhhcyBzaXplIDgxOTIsIHNob3Vs
ZCBiZSAxMDQ4NTc2DQo+Pj4+IGJhc2ljIHRlc3RzIGZhaWxlZA0KPj4+PiANCj4+PiANCj4+PiA/
Pz8/IENvbm5lY3RhdGhvbiBkb2VzbuKAmXQgdXNlIE9fRElSRUNULiBBcmUgeW91IHN1cmUgaXQg
aXMgdGhpcyBwYXRjaD8NCj4+PiANCj4+IA0KPj4gUHJldHR5IHN1cmUuICBDdGhvbiB3b3JrZWQg
Zm9yIG1lIGF0IHBhdGNoICMxMCwgYnV0IGl0IGZhaWxlZCBhZnRlciBhZGRpbmcgdGhpcyBvbmUu
DQo+IA0KPiBJIHJldHJpZWQgdGhlIHRlc3RzLCBhbmQgaXQncyBhY3R1YWxseSBwYXRjaCAjMTAg
d2hlcmUgSSBzdGFydCBoYXZpbmcgcHJvYmxlbXMuICBTb3JyeSBmb3IgdGhlIGNvbmZ1c2lvbiEN
Cj4gDQoNClRoYXQgbWFrZXMgbW9yZSBzZW5zZSwgYW5kIEkgdGhpbmsgSeKAmXZlIGZvdW5kIHRo
ZSBpc3N1ZXMuIFRoZXJlIHdlcmUgMjoNCg0KMSkgVGhlIHVwZGF0ZSBvZiBpb2NiLT5raV9wb3Mg
d2FzIGJyb2tlbi4NCjIpIGdlbmVyaWNfd3JpdGVfY2hlY2tzKCkgbmVlZHMgdG8gcnVuIGFmdGVy
IHRoZSBmaWxlIHNpemUgcmV2YWxpZGF0aW9uDQoNCkNoZWVycw0KICBUcm9uZA==


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 11/12] NFS: Remove inode->i_dio_count from the NFS O_DIRECT code
  2016-06-22 16:58                       ` Trond Myklebust
@ 2016-06-23 10:19                         ` Christoph Hellwig
  0 siblings, 0 replies; 29+ messages in thread
From: Christoph Hellwig @ 2016-06-23 10:19 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: Christoph Hellwig, linux-nfs

On Wed, Jun 22, 2016 at 04:58:05PM +0000, Trond Myklebust wrote:
> > For the AIO case that's not true, we can't hold i_rwsem until
> > aio completes.  So for something that does block allocations we'll
> > need something like i_dio_count still.  This probably includes the
> > block / scsi layout code.
> 
> Why can?t we hold the i_rwsem until aio completes?

Because Linux rw_semaphores require have owner semantics, and require
the owner that acquired them to release them again.  If we have
an AIO request we're not going to complete the request in the calling
context, though.

The same is true for regular mutexes, that's why we drop the inode lock
at the end of nfs_file_direct_read/write even if I/O might still be
pending.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 10/12] NFS: Do not serialise O_DIRECT reads and writes
  2016-06-22 17:24                     ` Trond Myklebust
@ 2016-06-23 11:00                         ` Christoph Hellwig
  0 siblings, 0 replies; 29+ messages in thread
From: Christoph Hellwig @ 2016-06-23 11:00 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: linux-nfs, linux-fsdevel, linux-mm

On Wed, Jun 22, 2016 at 05:24:50PM +0000, Trond Myklebust wrote:
> If we?re going to worry about write atomicity in the buffered I/O case,
> then we really should also make sure that O_DIRECT writes are atomic
> w.r.t. page cache updates too.  With this locking model, a buffered
> read() can race with the O_DIRECT write() and get a mixture of old
> data and new.

The difference between buffered I/O and direct I/O is that the former
is covered by standards, and the latter is a Linux extension with very
lose semantics.  But I'm perfectly fine with removing the buffered
reader shared lock for now - for the purposes of direct I/O
synchronization it's not nessecary.


Yes.

> > +	if (mapping->nrpages) {
> > +		inode_lock(inode);
> 
> This is unnecessary now that we have a rw_semaphore. You don?t need to
> take an exclusive lock in order to serialise w.r.t. new writes, and by
> doing so you end up serialising all reads if there happens to be pages
> in the page cache. This is true whether or not those pages are dirty.

Traditionally we needed the exclusive lock around
invalidate_inode_pages2 and unmap_mapping_range, and from a quick look
that's what the existing callers all have.  I don't actually see that
requirement documented anywhere, though.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 10/12] NFS: Do not serialise O_DIRECT reads and writes
@ 2016-06-23 11:00                         ` Christoph Hellwig
  0 siblings, 0 replies; 29+ messages in thread
From: Christoph Hellwig @ 2016-06-23 11:00 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: linux-nfs, linux-fsdevel, linux-mm

On Wed, Jun 22, 2016 at 05:24:50PM +0000, Trond Myklebust wrote:
> If we?re going to worry about write atomicity in the buffered I/O case,
> then we really should also make sure that O_DIRECT writes are atomic
> w.r.t. page cache updates too.  With this locking model, a buffered
> read() can race with the O_DIRECT write() and get a mixture of old
> data and new.

The difference between buffered I/O and direct I/O is that the former
is covered by standards, and the latter is a Linux extension with very
lose semantics.  But I'm perfectly fine with removing the buffered
reader shared lock for now - for the purposes of direct I/O
synchronization it's not nessecary.


Yes.

> > +	if (mapping->nrpages) {
> > +		inode_lock(inode);
> 
> This is unnecessary now that we have a rw_semaphore. You don?t need to
> take an exclusive lock in order to serialise w.r.t. new writes, and by
> doing so you end up serialising all reads if there happens to be pages
> in the page cache. This is true whether or not those pages are dirty.

Traditionally we needed the exclusive lock around
invalidate_inode_pages2 and unmap_mapping_range, and from a quick look
that's what the existing callers all have.  I don't actually see that
requirement documented anywhere, though.

^ permalink raw reply	[flat|nested] 29+ messages in thread

end of thread, other threads:[~2016-06-23 11:00 UTC | newest]

Thread overview: 29+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-06-21 21:34 [PATCH v2 01/12] NFS: Don't flush caches for a getattr that races with writeback Trond Myklebust
2016-06-21 21:34 ` [PATCH v2 02/12] NFS: Cache access checks more aggressively Trond Myklebust
2016-06-21 21:34   ` [PATCH v2 03/12] NFS: Cache aggressively when file is open for writing Trond Myklebust
2016-06-21 21:34     ` [PATCH v2 04/12] NFS: Kill NFS_INO_NFS_INO_FLUSHING: it is a performance killer Trond Myklebust
2016-06-21 21:34       ` [PATCH v2 05/12] NFS: writepage of a single page should not be synchronous Trond Myklebust
2016-06-21 21:34         ` [PATCH v2 06/12] NFS: Don't hold the inode lock across fsync() Trond Myklebust
2016-06-21 21:34           ` [PATCH v2 07/12] NFS: Don't call COMMIT in ->releasepage() Trond Myklebust
2016-06-21 21:34             ` [PATCH v2 08/12] NFS: Fix O_DIRECT verifier problems Trond Myklebust
2016-06-21 21:34               ` [PATCH v2 09/12] NFS: Ensure we reset the write verifier 'committed' value on resend Trond Myklebust
2016-06-21 21:34                 ` [PATCH v2 10/12] NFS: Do not serialise O_DIRECT reads and writes Trond Myklebust
2016-06-21 21:34                   ` [PATCH v2 11/12] NFS: Remove inode->i_dio_count from the NFS O_DIRECT code Trond Myklebust
2016-06-21 21:34                     ` [PATCH v2 12/12] NFS: Clean up nfs_direct_complete() Trond Myklebust
2016-06-22 16:43                       ` Christoph Hellwig
2016-06-22 16:42                     ` [PATCH v2 11/12] NFS: Remove inode->i_dio_count from the NFS O_DIRECT code Christoph Hellwig
2016-06-22 16:58                       ` Trond Myklebust
2016-06-23 10:19                         ` Christoph Hellwig
2016-06-22 17:58                     ` Anna Schumaker
2016-06-22 18:06                       ` Trond Myklebust
2016-06-22 18:08                         ` Anna Schumaker
2016-06-22 18:51                           ` Anna Schumaker
2016-06-22 19:42                             ` Trond Myklebust
2016-06-22 16:47                   ` [PATCH v2 10/12] NFS: Do not serialise O_DIRECT reads and writes Christoph Hellwig
2016-06-22 17:24                     ` Trond Myklebust
2016-06-23 11:00                       ` Christoph Hellwig
2016-06-23 11:00                         ` Christoph Hellwig
2016-06-21 22:25     ` [PATCH v2 03/12] NFS: Cache aggressively when file is open for writing Oleg Drokin
2016-06-22 13:06       ` Trond Myklebust
2016-06-22 16:19         ` Oleg Drokin
2016-06-22 15:48 ` [PATCH v2 01/12] NFS: Don't flush caches for a getattr that races with writeback Christoph Hellwig

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.