linux-nfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v4 0/5] Readdir improvements
@ 2022-02-17 22:33 trondmy
  2022-02-17 22:33 ` [PATCH v4 1/5] NFS: Adjust the amount of readahead performed by NFS readdir trondmy
  0 siblings, 1 reply; 8+ messages in thread
From: trondmy @ 2022-02-17 22:33 UTC (permalink / raw)
  To: linux-nfs

From: Trond Myklebust <trond.myklebust@hammerspace.com>

The current NFS readdir code will always try to maximise the amount of
readahead it performs on the assumption that we can cache anything that
isn't immediately read by the process.
There are several cases where this assumption breaks down, including
when the 'ls -l' heuristic kicks in to try to force use of readdirplus
as a batch replacement for lookup/getattr.

--
v2: Remove reset of dtsize when NFS_INO_FORCE_READDIR is set
v3: Avoid excessive window shrinking in uncached_readdir case
v4: Track 'ls -l' cache hit/miss statistics
    Improved algorithm for falling back to uncached readdir
    Skip readdirplus when files are being written to

Trond Myklebust (5):
  NFS: Adjust the amount of readahead performed by NFS readdir
  NFS: Simplify nfs_readdir_xdr_to_array()
  NFS: Improve algorithm for falling back to uncached readdir
  NFS: Improve heuristic for readdirplus
  NFS: Don't ask for readdirplus if files are being written to

 fs/nfs/dir.c           | 210 ++++++++++++++++++++++++++---------------
 fs/nfs/inode.c         |  17 ++--
 fs/nfs/internal.h      |   4 +-
 fs/nfs/nfstrace.h      |   1 -
 include/linux/nfs_fs.h |   7 +-
 5 files changed, 153 insertions(+), 86 deletions(-)

-- 
2.35.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH v4 1/5] NFS: Adjust the amount of readahead performed by NFS readdir
  2022-02-17 22:33 [PATCH v4 0/5] Readdir improvements trondmy
@ 2022-02-17 22:33 ` trondmy
  2022-02-17 22:33   ` [PATCH v4 2/5] NFS: Simplify nfs_readdir_xdr_to_array() trondmy
  0 siblings, 1 reply; 8+ messages in thread
From: trondmy @ 2022-02-17 22:33 UTC (permalink / raw)
  To: linux-nfs

From: Trond Myklebust <trond.myklebust@hammerspace.com>

The current NFS readdir code will always try to maximise the amount of
readahead it performs on the assumption that we can cache anything that
isn't immediately read by the process.
There are several cases where this assumption breaks down, including
when the 'ls -l' heuristic kicks in to try to force use of readdirplus
as a batch replacement for lookup/getattr.

This patch therefore tries to tone down the amount of readahead we
perform, and adjust it to try to match the amount of data being
requested by user space.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/dir.c           | 55 +++++++++++++++++++++++++++++++++++++++++-
 include/linux/nfs_fs.h |  1 +
 2 files changed, 55 insertions(+), 1 deletion(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 8b190c8e4a45..b0ee3a0e0f81 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -69,6 +69,8 @@ const struct address_space_operations nfs_dir_aops = {
 	.freepage = nfs_readdir_clear_array,
 };
 
+#define NFS_INIT_DTSIZE PAGE_SIZE
+
 static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir)
 {
 	struct nfs_inode *nfsi = NFS_I(dir);
@@ -80,6 +82,7 @@ static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir
 		ctx->dir_cookie = 0;
 		ctx->dup_cookie = 0;
 		ctx->page_index = 0;
+		ctx->dtsize = NFS_INIT_DTSIZE;
 		ctx->eof = false;
 		spin_lock(&dir->i_lock);
 		if (list_empty(&nfsi->open_files) &&
@@ -155,6 +158,7 @@ struct nfs_readdir_descriptor {
 	struct page	*page;
 	struct dir_context *ctx;
 	pgoff_t		page_index;
+	pgoff_t		page_index_max;
 	u64		dir_cookie;
 	u64		last_cookie;
 	u64		dup_cookie;
@@ -167,12 +171,36 @@ struct nfs_readdir_descriptor {
 	unsigned long	gencount;
 	unsigned long	attr_gencount;
 	unsigned int	cache_entry_index;
+	unsigned int	buffer_fills;
+	unsigned int	dtsize;
 	signed char duped;
 	bool plus;
 	bool eob;
 	bool eof;
 };
 
+static void nfs_set_dtsize(struct nfs_readdir_descriptor *desc, unsigned int sz)
+{
+	struct nfs_server *server = NFS_SERVER(file_inode(desc->file));
+	unsigned int maxsize = server->dtsize;
+
+	if (sz > maxsize)
+		sz = maxsize;
+	if (sz < NFS_MIN_FILE_IO_SIZE)
+		sz = NFS_MIN_FILE_IO_SIZE;
+	desc->dtsize = sz;
+}
+
+static void nfs_shrink_dtsize(struct nfs_readdir_descriptor *desc)
+{
+	nfs_set_dtsize(desc, desc->dtsize >> 1);
+}
+
+static void nfs_grow_dtsize(struct nfs_readdir_descriptor *desc)
+{
+	nfs_set_dtsize(desc, desc->dtsize << 1);
+}
+
 static void nfs_readdir_array_init(struct nfs_cache_array *array)
 {
 	memset(array, 0, sizeof(struct nfs_cache_array));
@@ -759,6 +787,7 @@ static int nfs_readdir_page_filler(struct nfs_readdir_descriptor *desc,
 				break;
 			arrays++;
 			*arrays = page = new;
+			desc->page_index_max++;
 		} else {
 			new = nfs_readdir_page_get_next(mapping,
 							page->index + 1,
@@ -768,6 +797,7 @@ static int nfs_readdir_page_filler(struct nfs_readdir_descriptor *desc,
 			if (page != *arrays)
 				nfs_readdir_page_unlock_and_put(page);
 			page = new;
+			desc->page_index_max = new->index;
 		}
 		status = nfs_readdir_add_to_array(entry, page);
 	} while (!status && !entry->eof);
@@ -833,7 +863,7 @@ static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc,
 	struct nfs_entry *entry;
 	size_t array_size;
 	struct inode *inode = file_inode(desc->file);
-	size_t dtsize = NFS_SERVER(inode)->dtsize;
+	unsigned int dtsize = desc->dtsize;
 	int status = -ENOMEM;
 
 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
@@ -869,6 +899,7 @@ static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc,
 
 		status = nfs_readdir_page_filler(desc, entry, pages, pglen,
 						 arrays, narrays);
+		desc->buffer_fills++;
 	} while (!status && nfs_readdir_page_needs_filling(page) &&
 		page_mapping(page));
 
@@ -916,6 +947,7 @@ static int find_and_lock_cache_page(struct nfs_readdir_descriptor *desc)
 	if (!desc->page)
 		return -ENOMEM;
 	if (nfs_readdir_page_needs_filling(desc->page)) {
+		desc->page_index_max = desc->page_index;
 		res = nfs_readdir_xdr_to_array(desc, nfsi->cookieverf, verf,
 					       &desc->page, 1);
 		if (res < 0) {
@@ -1047,6 +1079,7 @@ static int uncached_readdir(struct nfs_readdir_descriptor *desc)
 	desc->cache_entry_index = 0;
 	desc->last_cookie = desc->dir_cookie;
 	desc->duped = 0;
+	desc->page_index_max = 0;
 
 	status = nfs_readdir_xdr_to_array(desc, desc->verf, verf, arrays, sz);
 
@@ -1056,10 +1089,22 @@ static int uncached_readdir(struct nfs_readdir_descriptor *desc)
 	}
 	desc->page = NULL;
 
+	/*
+	 * Grow the dtsize if we have to go back for more pages,
+	 * or shrink it if we're reading too many.
+	 */
+	if (!desc->eof) {
+		if (!desc->eob)
+			nfs_grow_dtsize(desc);
+		else if (desc->buffer_fills == 1 &&
+			 i < (desc->page_index_max >> 1))
+			nfs_shrink_dtsize(desc);
+	}
 
 	for (i = 0; i < sz && arrays[i]; i++)
 		nfs_readdir_page_array_free(arrays[i]);
 out:
+	desc->page_index_max = -1;
 	kfree(arrays);
 	dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __func__, status);
 	return status;
@@ -1102,6 +1147,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
 	desc->file = file;
 	desc->ctx = ctx;
 	desc->plus = nfs_use_readdirplus(inode, ctx);
+	desc->page_index_max = -1;
 
 	spin_lock(&file->f_lock);
 	desc->dir_cookie = dir_ctx->dir_cookie;
@@ -1110,6 +1156,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
 	page_index = dir_ctx->page_index;
 	desc->attr_gencount = dir_ctx->attr_gencount;
 	desc->eof = dir_ctx->eof;
+	nfs_set_dtsize(desc, dir_ctx->dtsize);
 	memcpy(desc->verf, dir_ctx->verf, sizeof(desc->verf));
 	spin_unlock(&file->f_lock);
 
@@ -1151,6 +1198,11 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
 
 		nfs_do_filldir(desc, nfsi->cookieverf);
 		nfs_readdir_page_unlock_and_put_cached(desc);
+		if (desc->eob || desc->eof)
+			break;
+		/* Grow the dtsize if we have to go back for more pages */
+		if (desc->page_index == desc->page_index_max)
+			nfs_grow_dtsize(desc);
 	} while (!desc->eob && !desc->eof);
 
 	spin_lock(&file->f_lock);
@@ -1160,6 +1212,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
 	dir_ctx->attr_gencount = desc->attr_gencount;
 	dir_ctx->page_index = desc->page_index;
 	dir_ctx->eof = desc->eof;
+	dir_ctx->dtsize = desc->dtsize;
 	memcpy(dir_ctx->verf, desc->verf, sizeof(dir_ctx->verf));
 	spin_unlock(&file->f_lock);
 out_free:
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 72a732a5103c..98120f2d7e0b 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -106,6 +106,7 @@ struct nfs_open_dir_context {
 	__u64 dir_cookie;
 	__u64 dup_cookie;
 	pgoff_t page_index;
+	unsigned int dtsize;
 	signed char duped;
 	bool eof;
 };
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v4 2/5] NFS: Simplify nfs_readdir_xdr_to_array()
  2022-02-17 22:33 ` [PATCH v4 1/5] NFS: Adjust the amount of readahead performed by NFS readdir trondmy
@ 2022-02-17 22:33   ` trondmy
  2022-02-17 22:33     ` [PATCH v4 3/5] NFS: Improve algorithm for falling back to uncached readdir trondmy
  0 siblings, 1 reply; 8+ messages in thread
From: trondmy @ 2022-02-17 22:33 UTC (permalink / raw)
  To: linux-nfs

From: Trond Myklebust <trond.myklebust@hammerspace.com>

Recent changes to readdir mean that we can cope with partially filled
page cache entries, so we no longer need to rely on looping in
nfs_readdir_xdr_to_array().

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/dir.c | 29 +++++++++++------------------
 1 file changed, 11 insertions(+), 18 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index b0ee3a0e0f81..10421b5331ca 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -864,6 +864,7 @@ static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc,
 	size_t array_size;
 	struct inode *inode = file_inode(desc->file);
 	unsigned int dtsize = desc->dtsize;
+	unsigned int pglen;
 	int status = -ENOMEM;
 
 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
@@ -881,28 +882,20 @@ static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc,
 	if (!pages)
 		goto out;
 
-	do {
-		unsigned int pglen;
-		status = nfs_readdir_xdr_filler(desc, verf_arg, entry->cookie,
-						pages, dtsize,
-						verf_res);
-		if (status < 0)
-			break;
-
-		pglen = status;
-		if (pglen == 0) {
-			nfs_readdir_page_set_eof(page);
-			break;
-		}
-
-		verf_arg = verf_res;
+	status = nfs_readdir_xdr_filler(desc, verf_arg, entry->cookie, pages,
+					dtsize, verf_res);
+	if (status < 0)
+		goto free_pages;
 
+	pglen = status;
+	if (pglen != 0)
 		status = nfs_readdir_page_filler(desc, entry, pages, pglen,
 						 arrays, narrays);
-		desc->buffer_fills++;
-	} while (!status && nfs_readdir_page_needs_filling(page) &&
-		page_mapping(page));
+	else
+		nfs_readdir_page_set_eof(page);
+	desc->buffer_fills++;
 
+free_pages:
 	nfs_readdir_free_pages(pages, array_size);
 out:
 	nfs_free_fattr(entry->fattr);
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v4 3/5] NFS: Improve algorithm for falling back to uncached readdir
  2022-02-17 22:33   ` [PATCH v4 2/5] NFS: Simplify nfs_readdir_xdr_to_array() trondmy
@ 2022-02-17 22:33     ` trondmy
  2022-02-17 22:33       ` [PATCH v4 4/5] NFS: Improve heuristic for readdirplus trondmy
  0 siblings, 1 reply; 8+ messages in thread
From: trondmy @ 2022-02-17 22:33 UTC (permalink / raw)
  To: linux-nfs

From: Trond Myklebust <trond.myklebust@hammerspace.com>

When reading a very large directory, we want to try to keep the page
cache up to date if doing so is inexpensive. Right now, we will try to
refill the page cache if it is non-empty, irrespective of whether or not
doing so is going to take a long time.

Replace that algorithm with something that looks at how many times we've
refilled the page cache without seeing a cache hit.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/dir.c           | 51 +++++++++++++++++++++---------------------
 include/linux/nfs_fs.h |  1 +
 2 files changed, 27 insertions(+), 25 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 10421b5331ca..43a559b34f4a 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -71,19 +71,16 @@ const struct address_space_operations nfs_dir_aops = {
 
 #define NFS_INIT_DTSIZE PAGE_SIZE
 
-static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir)
+static struct nfs_open_dir_context *
+alloc_nfs_open_dir_context(struct inode *dir)
 {
 	struct nfs_inode *nfsi = NFS_I(dir);
 	struct nfs_open_dir_context *ctx;
-	ctx = kmalloc(sizeof(*ctx), GFP_KERNEL_ACCOUNT);
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL_ACCOUNT);
 	if (ctx != NULL) {
-		ctx->duped = 0;
 		ctx->attr_gencount = nfsi->attr_gencount;
-		ctx->dir_cookie = 0;
-		ctx->dup_cookie = 0;
-		ctx->page_index = 0;
 		ctx->dtsize = NFS_INIT_DTSIZE;
-		ctx->eof = false;
 		spin_lock(&dir->i_lock);
 		if (list_empty(&nfsi->open_files) &&
 		    (nfsi->cache_validity & NFS_INO_DATA_INVAL_DEFER))
@@ -170,6 +167,7 @@ struct nfs_readdir_descriptor {
 	unsigned long	timestamp;
 	unsigned long	gencount;
 	unsigned long	attr_gencount;
+	unsigned int	page_fill_misses;
 	unsigned int	cache_entry_index;
 	unsigned int	buffer_fills;
 	unsigned int	dtsize;
@@ -925,6 +923,18 @@ nfs_readdir_page_get_cached(struct nfs_readdir_descriptor *desc)
 					   desc->last_cookie);
 }
 
+#define NFS_READDIR_PAGE_FILL_MISS_MAX 5
+/*
+ * If we've tried to refill the page cache more than 5 times, and
+ * still not found our cookie, then we should stop and fall back
+ * to uncached readdir
+ */
+static bool nfs_readdir_may_fill_pagecache(struct nfs_readdir_descriptor *desc)
+{
+	return desc->dir_cookie == 0 ||
+	       desc->page_fill_misses < NFS_READDIR_PAGE_FILL_MISS_MAX;
+}
+
 /*
  * Returns 0 if desc->dir_cookie was found on page desc->page_index
  * and locks the page to prevent removal from the page cache.
@@ -940,6 +950,8 @@ static int find_and_lock_cache_page(struct nfs_readdir_descriptor *desc)
 	if (!desc->page)
 		return -ENOMEM;
 	if (nfs_readdir_page_needs_filling(desc->page)) {
+		if (!nfs_readdir_may_fill_pagecache(desc))
+			return -EBADCOOKIE;
 		desc->page_index_max = desc->page_index;
 		res = nfs_readdir_xdr_to_array(desc, nfsi->cookieverf, verf,
 					       &desc->page, 1);
@@ -958,36 +970,22 @@ static int find_and_lock_cache_page(struct nfs_readdir_descriptor *desc)
 		if (desc->page_index == 0)
 			memcpy(nfsi->cookieverf, verf,
 			       sizeof(nfsi->cookieverf));
+		desc->page_fill_misses++;
 	}
 	res = nfs_readdir_search_array(desc);
-	if (res == 0)
+	if (res == 0) {
+		desc->page_fill_misses = 0;
 		return 0;
+	}
 	nfs_readdir_page_unlock_and_put_cached(desc);
 	return res;
 }
 
-static bool nfs_readdir_dont_search_cache(struct nfs_readdir_descriptor *desc)
-{
-	struct address_space *mapping = desc->file->f_mapping;
-	struct inode *dir = file_inode(desc->file);
-	unsigned int dtsize = NFS_SERVER(dir)->dtsize;
-	loff_t size = i_size_read(dir);
-
-	/*
-	 * Default to uncached readdir if the page cache is empty, and
-	 * we're looking for a non-zero cookie in a large directory.
-	 */
-	return desc->dir_cookie != 0 && mapping->nrpages == 0 && size > dtsize;
-}
-
 /* Search for desc->dir_cookie from the beginning of the page cache */
 static int readdir_search_pagecache(struct nfs_readdir_descriptor *desc)
 {
 	int res;
 
-	if (nfs_readdir_dont_search_cache(desc))
-		return -EBADCOOKIE;
-
 	do {
 		if (desc->page_index == 0) {
 			desc->current_index = 0;
@@ -1149,6 +1147,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
 	page_index = dir_ctx->page_index;
 	desc->attr_gencount = dir_ctx->attr_gencount;
 	desc->eof = dir_ctx->eof;
+	desc->page_fill_misses = dir_ctx->page_fill_misses;
 	nfs_set_dtsize(desc, dir_ctx->dtsize);
 	memcpy(desc->verf, dir_ctx->verf, sizeof(desc->verf));
 	spin_unlock(&file->f_lock);
@@ -1204,6 +1203,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
 	dir_ctx->duped = desc->duped;
 	dir_ctx->attr_gencount = desc->attr_gencount;
 	dir_ctx->page_index = desc->page_index;
+	dir_ctx->page_fill_misses = desc->page_fill_misses;
 	dir_ctx->eof = desc->eof;
 	dir_ctx->dtsize = desc->dtsize;
 	memcpy(dir_ctx->verf, desc->verf, sizeof(dir_ctx->verf));
@@ -1247,6 +1247,7 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence)
 			dir_ctx->dir_cookie = offset;
 		else
 			dir_ctx->dir_cookie = 0;
+		dir_ctx->page_fill_misses = 0;
 		if (offset == 0)
 			memset(dir_ctx->verf, 0, sizeof(dir_ctx->verf));
 		dir_ctx->duped = 0;
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 98120f2d7e0b..9e5fc29723c2 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -106,6 +106,7 @@ struct nfs_open_dir_context {
 	__u64 dir_cookie;
 	__u64 dup_cookie;
 	pgoff_t page_index;
+	unsigned int page_fill_misses;
 	unsigned int dtsize;
 	signed char duped;
 	bool eof;
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v4 4/5] NFS: Improve heuristic for readdirplus
  2022-02-17 22:33     ` [PATCH v4 3/5] NFS: Improve algorithm for falling back to uncached readdir trondmy
@ 2022-02-17 22:33       ` trondmy
  2022-02-17 22:33         ` [PATCH v4 5/5] NFS: Don't ask for readdirplus if files are being written to trondmy
  2022-02-18 11:37         ` [PATCH v4 4/5] NFS: Improve heuristic for readdirplus Trond Myklebust
  0 siblings, 2 replies; 8+ messages in thread
From: trondmy @ 2022-02-17 22:33 UTC (permalink / raw)
  To: linux-nfs

From: Trond Myklebust <trond.myklebust@hammerspace.com>

The heuristic for readdirplus is designed to try to detect 'ls -l' and
similar patterns. It does so by looking for cache hit/miss patterns in
both the attribute cache and in the dcache of the files in a given
directory, and then sets a flag for the readdirplus code to interpret.

The problem with this approach is that a single attribute or dcache miss
can cause the NFS code to force a refresh of the attributes for the
entire set of files contained in the directory.

To be able to make a more nuanced decision, let's sample the number of
hits and misses in the set of open directory descriptors. That allows us
to set thresholds at which we start preferring READDIRPLUS over regular
READDIR, or at which we start to force a re-read of the remaining
readdir cache using READDIRPLUS.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/dir.c           | 77 +++++++++++++++++++++++++-----------------
 fs/nfs/inode.c         |  4 +--
 fs/nfs/internal.h      |  4 +--
 fs/nfs/nfstrace.h      |  1 -
 include/linux/nfs_fs.h |  5 +--
 5 files changed, 53 insertions(+), 38 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 43a559b34f4a..cd57df004789 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -87,8 +87,7 @@ alloc_nfs_open_dir_context(struct inode *dir)
 			nfs_set_cache_invalid(dir,
 					      NFS_INO_INVALID_DATA |
 						      NFS_INO_REVAL_FORCED);
-		list_add(&ctx->list, &nfsi->open_files);
-		clear_bit(NFS_INO_FORCE_READDIR, &nfsi->flags);
+		list_add_tail_rcu(&ctx->list, &nfsi->open_files);
 		spin_unlock(&dir->i_lock);
 		return ctx;
 	}
@@ -98,9 +97,9 @@ alloc_nfs_open_dir_context(struct inode *dir)
 static void put_nfs_open_dir_context(struct inode *dir, struct nfs_open_dir_context *ctx)
 {
 	spin_lock(&dir->i_lock);
-	list_del(&ctx->list);
+	list_del_rcu(&ctx->list);
 	spin_unlock(&dir->i_lock);
-	kfree(ctx);
+	kfree_rcu(ctx, rcu_head);
 }
 
 /*
@@ -567,7 +566,6 @@ static int nfs_readdir_xdr_filler(struct nfs_readdir_descriptor *desc,
 		/* We requested READDIRPLUS, but the server doesn't grok it */
 		if (error == -ENOTSUPP && desc->plus) {
 			NFS_SERVER(inode)->caps &= ~NFS_CAP_READDIRPLUS;
-			clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
 			desc->plus = arg.plus = false;
 			goto again;
 		}
@@ -617,51 +615,57 @@ int nfs_same_file(struct dentry *dentry, struct nfs_entry *entry)
 	return 1;
 }
 
-static
-bool nfs_use_readdirplus(struct inode *dir, struct dir_context *ctx)
+#define NFS_READDIR_CACHE_USAGE_THRESHOLD (8UL)
+
+static bool nfs_use_readdirplus(struct inode *dir, struct dir_context *ctx,
+				unsigned int cache_hits,
+				unsigned int cache_misses)
 {
 	if (!nfs_server_capable(dir, NFS_CAP_READDIRPLUS))
 		return false;
-	if (test_and_clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(dir)->flags))
-		return true;
-	if (ctx->pos == 0)
+	if (ctx->pos == 0 ||
+	    cache_hits + cache_misses > NFS_READDIR_CACHE_USAGE_THRESHOLD)
 		return true;
 	return false;
 }
 
 /*
- * This function is called by the lookup and getattr code to request the
+ * This function is called by the getattr code to request the
  * use of readdirplus to accelerate any future lookups in the same
  * directory.
  */
-void nfs_advise_use_readdirplus(struct inode *dir)
+void nfs_readdir_record_entry_cache_hit(struct inode *dir)
 {
 	struct nfs_inode *nfsi = NFS_I(dir);
+	struct nfs_open_dir_context *ctx;
 
-	if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) &&
-	    !list_empty(&nfsi->open_files))
-		set_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags);
+	if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS)) {
+		list_for_each_entry_rcu (ctx, &nfsi->open_files, list)
+			atomic_inc(&ctx->cache_hits);
+	}
 }
 
 /*
  * This function is mainly for use by nfs_getattr().
  *
  * If this is an 'ls -l', we want to force use of readdirplus.
- * Do this by checking if there is an active file descriptor
- * and calling nfs_advise_use_readdirplus, then forcing a
- * cache flush.
  */
-void nfs_force_use_readdirplus(struct inode *dir)
+void nfs_readdir_record_entry_cache_miss(struct inode *dir)
 {
 	struct nfs_inode *nfsi = NFS_I(dir);
+	struct nfs_open_dir_context *ctx;
 
-	if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) &&
-	    !list_empty(&nfsi->open_files)) {
-		set_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags);
-		set_bit(NFS_INO_FORCE_READDIR, &nfsi->flags);
+	if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS)) {
+		list_for_each_entry_rcu (ctx, &nfsi->open_files, list)
+			atomic_inc(&ctx->cache_misses);
 	}
 }
 
+static void nfs_readdir_record_dcache_miss(struct inode *dir)
+{
+	nfs_readdir_record_entry_cache_miss(dir);
+}
+
 static
 void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry,
 		unsigned long dir_verifier)
@@ -1101,6 +1105,18 @@ static int uncached_readdir(struct nfs_readdir_descriptor *desc)
 	return status;
 }
 
+#define NFS_READDIR_CACHE_MISS_THRESHOLD (16UL)
+
+static void nfs_readdir_handle_cache_misses(struct inode *inode,
+					    struct nfs_readdir_descriptor *desc,
+					    pgoff_t page_index,
+					    unsigned int cache_misses)
+{
+	if (desc->ctx->pos != 0 &&
+	    cache_misses > NFS_READDIR_CACHE_MISS_THRESHOLD)
+		invalidate_mapping_pages(inode->i_mapping, page_index + 1, -1);
+}
+
 /* The file offset position represents the dirent entry number.  A
    last cookie cache takes care of the common case of reading the
    whole directory.
@@ -1112,6 +1128,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_open_dir_context *dir_ctx = file->private_data;
 	struct nfs_readdir_descriptor *desc;
+	unsigned int cache_hits, cache_misses;
 	pgoff_t page_index;
 	int res;
 
@@ -1137,7 +1154,6 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
 		goto out;
 	desc->file = file;
 	desc->ctx = ctx;
-	desc->plus = nfs_use_readdirplus(inode, ctx);
 	desc->page_index_max = -1;
 
 	spin_lock(&file->f_lock);
@@ -1150,6 +1166,8 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
 	desc->page_fill_misses = dir_ctx->page_fill_misses;
 	nfs_set_dtsize(desc, dir_ctx->dtsize);
 	memcpy(desc->verf, dir_ctx->verf, sizeof(desc->verf));
+	cache_hits = atomic_xchg(&dir_ctx->cache_hits, 0);
+	cache_misses = atomic_xchg(&dir_ctx->cache_misses, 0);
 	spin_unlock(&file->f_lock);
 
 	if (desc->eof) {
@@ -1157,9 +1175,8 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
 		goto out_free;
 	}
 
-	if (test_and_clear_bit(NFS_INO_FORCE_READDIR, &nfsi->flags) &&
-	    list_is_singular(&nfsi->open_files))
-		invalidate_mapping_pages(inode->i_mapping, page_index + 1, -1);
+	desc->plus = nfs_use_readdirplus(inode, ctx, cache_hits, cache_misses);
+	nfs_readdir_handle_cache_misses(inode, desc, page_index, cache_misses);
 
 	do {
 		res = readdir_search_pagecache(desc);
@@ -1178,7 +1195,6 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
 			break;
 		}
 		if (res == -ETOOSMALL && desc->plus) {
-			clear_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags);
 			nfs_zap_caches(inode);
 			desc->page_index = 0;
 			desc->plus = false;
@@ -1602,7 +1618,7 @@ nfs_lookup_revalidate_dentry(struct inode *dir, struct dentry *dentry,
 	nfs_set_verifier(dentry, dir_verifier);
 
 	/* set a readdirplus hint that we had a cache miss */
-	nfs_force_use_readdirplus(dir);
+	nfs_readdir_record_dcache_miss(dir);
 	ret = 1;
 out:
 	nfs_free_fattr(fattr);
@@ -1659,7 +1675,6 @@ nfs_do_lookup_revalidate(struct inode *dir, struct dentry *dentry,
 				nfs_mark_dir_for_revalidate(dir);
 			goto out_bad;
 		}
-		nfs_advise_use_readdirplus(dir);
 		goto out_valid;
 	}
 
@@ -1866,7 +1881,7 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in
 		goto out;
 
 	/* Notify readdir to use READDIRPLUS */
-	nfs_force_use_readdirplus(dir);
+	nfs_readdir_record_dcache_miss(dir);
 
 no_entry:
 	res = d_splice_alias(inode, dentry);
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index f9fc506ebb29..1bef81f5373a 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -789,7 +789,7 @@ static void nfs_readdirplus_parent_cache_miss(struct dentry *dentry)
 	if (!nfs_server_capable(d_inode(dentry), NFS_CAP_READDIRPLUS))
 		return;
 	parent = dget_parent(dentry);
-	nfs_force_use_readdirplus(d_inode(parent));
+	nfs_readdir_record_entry_cache_miss(d_inode(parent));
 	dput(parent);
 }
 
@@ -800,7 +800,7 @@ static void nfs_readdirplus_parent_cache_hit(struct dentry *dentry)
 	if (!nfs_server_capable(d_inode(dentry), NFS_CAP_READDIRPLUS))
 		return;
 	parent = dget_parent(dentry);
-	nfs_advise_use_readdirplus(d_inode(parent));
+	nfs_readdir_record_entry_cache_hit(d_inode(parent));
 	dput(parent);
 }
 
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 2de7c56a1fbe..46dc97b65661 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -366,8 +366,8 @@ extern struct nfs_client *nfs_init_client(struct nfs_client *clp,
 			   const struct nfs_client_initdata *);
 
 /* dir.c */
-extern void nfs_advise_use_readdirplus(struct inode *dir);
-extern void nfs_force_use_readdirplus(struct inode *dir);
+extern void nfs_readdir_record_entry_cache_hit(struct inode *dir);
+extern void nfs_readdir_record_entry_cache_miss(struct inode *dir);
 extern unsigned long nfs_access_cache_count(struct shrinker *shrink,
 					    struct shrink_control *sc);
 extern unsigned long nfs_access_cache_scan(struct shrinker *shrink,
diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h
index 45a310b586ce..3672f6703ee7 100644
--- a/fs/nfs/nfstrace.h
+++ b/fs/nfs/nfstrace.h
@@ -36,7 +36,6 @@
 
 #define nfs_show_nfsi_flags(v) \
 	__print_flags(v, "|", \
-			{ BIT(NFS_INO_ADVISE_RDPLUS), "ADVISE_RDPLUS" }, \
 			{ BIT(NFS_INO_STALE), "STALE" }, \
 			{ BIT(NFS_INO_ACL_LRU_SET), "ACL_LRU_SET" }, \
 			{ BIT(NFS_INO_INVALIDATING), "INVALIDATING" }, \
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 9e5fc29723c2..e21bd9452d27 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -101,6 +101,8 @@ struct nfs_open_context {
 
 struct nfs_open_dir_context {
 	struct list_head list;
+	atomic_t cache_hits;
+	atomic_t cache_misses;
 	unsigned long attr_gencount;
 	__be32	verf[NFS_DIR_VERIFIER_SIZE];
 	__u64 dir_cookie;
@@ -110,6 +112,7 @@ struct nfs_open_dir_context {
 	unsigned int dtsize;
 	signed char duped;
 	bool eof;
+	struct rcu_head rcu_head;
 };
 
 /*
@@ -274,13 +277,11 @@ struct nfs4_copy_state {
 /*
  * Bit offsets in flags field
  */
-#define NFS_INO_ADVISE_RDPLUS	(0)		/* advise readdirplus */
 #define NFS_INO_STALE		(1)		/* possible stale inode */
 #define NFS_INO_ACL_LRU_SET	(2)		/* Inode is on the LRU list */
 #define NFS_INO_INVALIDATING	(3)		/* inode is being invalidated */
 #define NFS_INO_PRESERVE_UNLINKED (4)		/* preserve file if removed while open */
 #define NFS_INO_FSCACHE		(5)		/* inode can be cached by FS-Cache */
-#define NFS_INO_FORCE_READDIR	(7)		/* force readdirplus */
 #define NFS_INO_LAYOUTCOMMIT	(9)		/* layoutcommit required */
 #define NFS_INO_LAYOUTCOMMITTING (10)		/* layoutcommit inflight */
 #define NFS_INO_LAYOUTSTATS	(11)		/* layoutstats inflight */
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v4 5/5] NFS: Don't ask for readdirplus if files are being written to
  2022-02-17 22:33       ` [PATCH v4 4/5] NFS: Improve heuristic for readdirplus trondmy
@ 2022-02-17 22:33         ` trondmy
  2022-02-18 11:40           ` Trond Myklebust
  2022-02-18 11:37         ` [PATCH v4 4/5] NFS: Improve heuristic for readdirplus Trond Myklebust
  1 sibling, 1 reply; 8+ messages in thread
From: trondmy @ 2022-02-17 22:33 UTC (permalink / raw)
  To: linux-nfs

From: Trond Myklebust <trond.myklebust@hammerspace.com>

If a file is being written to, then readdirplus isn't going to help with
retrieving attributes, since we will have to flush out writes anyway in
order to sync the mtime/ctime.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/inode.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 1bef81f5373a..00500c369c5f 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -837,6 +837,7 @@ int nfs_getattr(struct user_namespace *mnt_userns, const struct path *path,
 	int err = 0;
 	bool force_sync = query_flags & AT_STATX_FORCE_SYNC;
 	bool do_update = false;
+	bool record_cache = !nfs_have_writebacks(inode);
 
 	trace_nfs_getattr_enter(inode);
 
@@ -845,7 +846,8 @@ int nfs_getattr(struct user_namespace *mnt_userns, const struct path *path,
 			STATX_INO | STATX_SIZE | STATX_BLOCKS;
 
 	if ((query_flags & AT_STATX_DONT_SYNC) && !force_sync) {
-		nfs_readdirplus_parent_cache_hit(path->dentry);
+		if (record_cache)
+			nfs_readdirplus_parent_cache_hit(path->dentry);
 		goto out_no_revalidate;
 	}
 
@@ -894,17 +896,18 @@ int nfs_getattr(struct user_namespace *mnt_userns, const struct path *path,
 	if (request_mask & STATX_BLOCKS)
 		do_update |= cache_validity & NFS_INO_INVALID_BLOCKS;
 
-	if (do_update) {
+	if (record_cache) {
 		/* Update the attribute cache */
-		if (!(server->flags & NFS_MOUNT_NOAC))
+		if (do_update && !(server->flags & NFS_MOUNT_NOAC))
 			nfs_readdirplus_parent_cache_miss(path->dentry);
 		else
 			nfs_readdirplus_parent_cache_hit(path->dentry);
+	}
+	if (do_update) {
 		err = __nfs_revalidate_inode(server, inode);
 		if (err)
 			goto out;
-	} else
-		nfs_readdirplus_parent_cache_hit(path->dentry);
+	}
 out_no_revalidate:
 	/* Only return attributes that were revalidated. */
 	stat->result_mask = nfs_get_valid_attrmask(inode) | request_mask;
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH v4 4/5] NFS: Improve heuristic for readdirplus
  2022-02-17 22:33       ` [PATCH v4 4/5] NFS: Improve heuristic for readdirplus trondmy
  2022-02-17 22:33         ` [PATCH v4 5/5] NFS: Don't ask for readdirplus if files are being written to trondmy
@ 2022-02-18 11:37         ` Trond Myklebust
  1 sibling, 0 replies; 8+ messages in thread
From: Trond Myklebust @ 2022-02-18 11:37 UTC (permalink / raw)
  To: linux-nfs

On Thu, 2022-02-17 at 17:33 -0500, trondmy@kernel.org wrote:
> From: Trond Myklebust <trond.myklebust@hammerspace.com>
> 
> The heuristic for readdirplus is designed to try to detect 'ls -l'
> and
> similar patterns. It does so by looking for cache hit/miss patterns
> in
> both the attribute cache and in the dcache of the files in a given
> directory, and then sets a flag for the readdirplus code to
> interpret.
> 
> The problem with this approach is that a single attribute or dcache
> miss
> can cause the NFS code to force a refresh of the attributes for the
> entire set of files contained in the directory.
> 
> To be able to make a more nuanced decision, let's sample the number
> of
> hits and misses in the set of open directory descriptors. That allows
> us
> to set thresholds at which we start preferring READDIRPLUS over
> regular
> READDIR, or at which we start to force a re-read of the remaining
> readdir cache using READDIRPLUS.
> 
> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
> ---
>  fs/nfs/dir.c           | 77 +++++++++++++++++++++++++---------------
> --
>  fs/nfs/inode.c         |  4 +--
>  fs/nfs/internal.h      |  4 +--
>  fs/nfs/nfstrace.h      |  1 -
>  include/linux/nfs_fs.h |  5 +--
>  5 files changed, 53 insertions(+), 38 deletions(-)
> 
> diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
> index 43a559b34f4a..cd57df004789 100644
> --- a/fs/nfs/dir.c
> +++ b/fs/nfs/dir.c
> @@ -87,8 +87,7 @@ alloc_nfs_open_dir_context(struct inode *dir)
>                         nfs_set_cache_invalid(dir,
>                                               NFS_INO_INVALID_DATA |
>                                                      
> NFS_INO_REVAL_FORCED);
> -               list_add(&ctx->list, &nfsi->open_files);
> -               clear_bit(NFS_INO_FORCE_READDIR, &nfsi->flags);
> +               list_add_tail_rcu(&ctx->list, &nfsi->open_files);
>                 spin_unlock(&dir->i_lock);
>                 return ctx;
>         }
> @@ -98,9 +97,9 @@ alloc_nfs_open_dir_context(struct inode *dir)
>  static void put_nfs_open_dir_context(struct inode *dir, struct
> nfs_open_dir_context *ctx)
>  {
>         spin_lock(&dir->i_lock);
> -       list_del(&ctx->list);
> +       list_del_rcu(&ctx->list);
>         spin_unlock(&dir->i_lock);
> -       kfree(ctx);
> +       kfree_rcu(ctx, rcu_head);
>  }
>  
>  /*
> @@ -567,7 +566,6 @@ static int nfs_readdir_xdr_filler(struct
> nfs_readdir_descriptor *desc,
>                 /* We requested READDIRPLUS, but the server doesn't
> grok it */
>                 if (error == -ENOTSUPP && desc->plus) {
>                         NFS_SERVER(inode)->caps &=
> ~NFS_CAP_READDIRPLUS;
> -                       clear_bit(NFS_INO_ADVISE_RDPLUS,
> &NFS_I(inode)->flags);
>                         desc->plus = arg.plus = false;
>                         goto again;
>                 }
> @@ -617,51 +615,57 @@ int nfs_same_file(struct dentry *dentry, struct
> nfs_entry *entry)
>         return 1;
>  }
>  
> -static
> -bool nfs_use_readdirplus(struct inode *dir, struct dir_context *ctx)
> +#define NFS_READDIR_CACHE_USAGE_THRESHOLD (8UL)
> +
> +static bool nfs_use_readdirplus(struct inode *dir, struct
> dir_context *ctx,
> +                               unsigned int cache_hits,
> +                               unsigned int cache_misses)
>  {
>         if (!nfs_server_capable(dir, NFS_CAP_READDIRPLUS))
>                 return false;
> -       if (test_and_clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(dir)-
> >flags))
> -               return true;
> -       if (ctx->pos == 0)
> +       if (ctx->pos == 0 ||
> +           cache_hits + cache_misses >
> NFS_READDIR_CACHE_USAGE_THRESHOLD)
>                 return true;
>         return false;
>  }
>  
>  /*
> - * This function is called by the lookup and getattr code to request
> the
> + * This function is called by the getattr code to request the
>   * use of readdirplus to accelerate any future lookups in the same
>   * directory.
>   */
> -void nfs_advise_use_readdirplus(struct inode *dir)
> +void nfs_readdir_record_entry_cache_hit(struct inode *dir)
>  {
>         struct nfs_inode *nfsi = NFS_I(dir);
> +       struct nfs_open_dir_context *ctx;
>  
> -       if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) &&
> -           !list_empty(&nfsi->open_files))
> -               set_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags);
> +       if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS)) {
> +               list_for_each_entry_rcu (ctx, &nfsi->open_files,
> list)
> +                       atomic_inc(&ctx->cache_hits);

Missing rcu_read_lock()/rcu_read_unlock() protection. Fixed in the
version committed to the testing branch.

> +       }
>  }
>  
>  /*
>   * This function is mainly for use by nfs_getattr().
>   *
>   * If this is an 'ls -l', we want to force use of readdirplus.
> - * Do this by checking if there is an active file descriptor
> - * and calling nfs_advise_use_readdirplus, then forcing a
> - * cache flush.
>   */
> -void nfs_force_use_readdirplus(struct inode *dir)
> +void nfs_readdir_record_entry_cache_miss(struct inode *dir)
>  {
>         struct nfs_inode *nfsi = NFS_I(dir);
> +       struct nfs_open_dir_context *ctx;
>  
> -       if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) &&
> -           !list_empty(&nfsi->open_files)) {
> -               set_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags);
> -               set_bit(NFS_INO_FORCE_READDIR, &nfsi->flags);
> +       if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS)) {
> +               list_for_each_entry_rcu (ctx, &nfsi->open_files,
> list)
> +                       atomic_inc(&ctx->cache_misses);

Ditto.

>         }
>  }
>  
> +static void nfs_readdir_record_dcache_miss(struct inode *dir)
> +{
> +       nfs_readdir_record_entry_cache_miss(dir);
> +}
> +
>  static
>  void nfs_prime_dcache(struct dentry *parent, struct nfs_entry
> *entry,
>                 unsigned long dir_verifier)
> @@ -1101,6 +1105,18 @@ static int uncached_readdir(struct
> nfs_readdir_descriptor *desc)
>         return status;
>  }
>  
> +#define NFS_READDIR_CACHE_MISS_THRESHOLD (16UL)
> +
> +static void nfs_readdir_handle_cache_misses(struct inode *inode,
> +                                           struct
> nfs_readdir_descriptor *desc,
> +                                           pgoff_t page_index,
> +                                           unsigned int
> cache_misses)
> +{
> +       if (desc->ctx->pos != 0 &&
> +           cache_misses > NFS_READDIR_CACHE_MISS_THRESHOLD)
> +               invalidate_mapping_pages(inode->i_mapping, page_index
> + 1, -1);
> +}
> +
>  /* The file offset position represents the dirent entry number.  A
>     last cookie cache takes care of the common case of reading the
>     whole directory.
> @@ -1112,6 +1128,7 @@ static int nfs_readdir(struct file *file,
> struct dir_context *ctx)
>         struct nfs_inode *nfsi = NFS_I(inode);
>         struct nfs_open_dir_context *dir_ctx = file->private_data;
>         struct nfs_readdir_descriptor *desc;
> +       unsigned int cache_hits, cache_misses;
>         pgoff_t page_index;
>         int res;
>  
> @@ -1137,7 +1154,6 @@ static int nfs_readdir(struct file *file,
> struct dir_context *ctx)
>                 goto out;
>         desc->file = file;
>         desc->ctx = ctx;
> -       desc->plus = nfs_use_readdirplus(inode, ctx);
>         desc->page_index_max = -1;
>  
>         spin_lock(&file->f_lock);
> @@ -1150,6 +1166,8 @@ static int nfs_readdir(struct file *file,
> struct dir_context *ctx)
>         desc->page_fill_misses = dir_ctx->page_fill_misses;
>         nfs_set_dtsize(desc, dir_ctx->dtsize);
>         memcpy(desc->verf, dir_ctx->verf, sizeof(desc->verf));
> +       cache_hits = atomic_xchg(&dir_ctx->cache_hits, 0);
> +       cache_misses = atomic_xchg(&dir_ctx->cache_misses, 0);
>         spin_unlock(&file->f_lock);
>  
>         if (desc->eof) {
> @@ -1157,9 +1175,8 @@ static int nfs_readdir(struct file *file,
> struct dir_context *ctx)
>                 goto out_free;
>         }
>  
> -       if (test_and_clear_bit(NFS_INO_FORCE_READDIR, &nfsi->flags)
> &&
> -           list_is_singular(&nfsi->open_files))
> -               invalidate_mapping_pages(inode->i_mapping, page_index
> + 1, -1);
> +       desc->plus = nfs_use_readdirplus(inode, ctx, cache_hits,
> cache_misses);
> +       nfs_readdir_handle_cache_misses(inode, desc, page_index,
> cache_misses);
>  
>         do {
>                 res = readdir_search_pagecache(desc);
> @@ -1178,7 +1195,6 @@ static int nfs_readdir(struct file *file,
> struct dir_context *ctx)
>                         break;
>                 }
>                 if (res == -ETOOSMALL && desc->plus) {
> -                       clear_bit(NFS_INO_ADVISE_RDPLUS, &nfsi-
> >flags);
>                         nfs_zap_caches(inode);
>                         desc->page_index = 0;
>                         desc->plus = false;
> @@ -1602,7 +1618,7 @@ nfs_lookup_revalidate_dentry(struct inode *dir,
> struct dentry *dentry,
>         nfs_set_verifier(dentry, dir_verifier);
>  
>         /* set a readdirplus hint that we had a cache miss */
> -       nfs_force_use_readdirplus(dir);
> +       nfs_readdir_record_dcache_miss(dir);
>         ret = 1;
>  out:
>         nfs_free_fattr(fattr);
> @@ -1659,7 +1675,6 @@ nfs_do_lookup_revalidate(struct inode *dir,
> struct dentry *dentry,
>                                 nfs_mark_dir_for_revalidate(dir);
>                         goto out_bad;
>                 }
> -               nfs_advise_use_readdirplus(dir);
>                 goto out_valid;
>         }
>  
> @@ -1866,7 +1881,7 @@ struct dentry *nfs_lookup(struct inode *dir,
> struct dentry * dentry, unsigned in
>                 goto out;
>  
>         /* Notify readdir to use READDIRPLUS */
> -       nfs_force_use_readdirplus(dir);
> +       nfs_readdir_record_dcache_miss(dir);
>  
>  no_entry:
>         res = d_splice_alias(inode, dentry);
> diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
> index f9fc506ebb29..1bef81f5373a 100644
> --- a/fs/nfs/inode.c
> +++ b/fs/nfs/inode.c
> @@ -789,7 +789,7 @@ static void
> nfs_readdirplus_parent_cache_miss(struct dentry *dentry)
>         if (!nfs_server_capable(d_inode(dentry),
> NFS_CAP_READDIRPLUS))
>                 return;
>         parent = dget_parent(dentry);
> -       nfs_force_use_readdirplus(d_inode(parent));
> +       nfs_readdir_record_entry_cache_miss(d_inode(parent));
>         dput(parent);
>  }
>  
> @@ -800,7 +800,7 @@ static void
> nfs_readdirplus_parent_cache_hit(struct dentry *dentry)
>         if (!nfs_server_capable(d_inode(dentry),
> NFS_CAP_READDIRPLUS))
>                 return;
>         parent = dget_parent(dentry);
> -       nfs_advise_use_readdirplus(d_inode(parent));
> +       nfs_readdir_record_entry_cache_hit(d_inode(parent));
>         dput(parent);
>  }
>  
> diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
> index 2de7c56a1fbe..46dc97b65661 100644
> --- a/fs/nfs/internal.h
> +++ b/fs/nfs/internal.h
> @@ -366,8 +366,8 @@ extern struct nfs_client *nfs_init_client(struct
> nfs_client *clp,
>                            const struct nfs_client_initdata *);
>  
>  /* dir.c */
> -extern void nfs_advise_use_readdirplus(struct inode *dir);
> -extern void nfs_force_use_readdirplus(struct inode *dir);
> +extern void nfs_readdir_record_entry_cache_hit(struct inode *dir);
> +extern void nfs_readdir_record_entry_cache_miss(struct inode *dir);
>  extern unsigned long nfs_access_cache_count(struct shrinker *shrink,
>                                             struct shrink_control
> *sc);
>  extern unsigned long nfs_access_cache_scan(struct shrinker *shrink,
> diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h
> index 45a310b586ce..3672f6703ee7 100644
> --- a/fs/nfs/nfstrace.h
> +++ b/fs/nfs/nfstrace.h
> @@ -36,7 +36,6 @@
>  
>  #define nfs_show_nfsi_flags(v) \
>         __print_flags(v, "|", \
> -                       { BIT(NFS_INO_ADVISE_RDPLUS), "ADVISE_RDPLUS"
> }, \
>                         { BIT(NFS_INO_STALE), "STALE" }, \
>                         { BIT(NFS_INO_ACL_LRU_SET), "ACL_LRU_SET" },
> \
>                         { BIT(NFS_INO_INVALIDATING), "INVALIDATING"
> }, \
> diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
> index 9e5fc29723c2..e21bd9452d27 100644
> --- a/include/linux/nfs_fs.h
> +++ b/include/linux/nfs_fs.h
> @@ -101,6 +101,8 @@ struct nfs_open_context {
>  
>  struct nfs_open_dir_context {
>         struct list_head list;
> +       atomic_t cache_hits;
> +       atomic_t cache_misses;
>         unsigned long attr_gencount;
>         __be32  verf[NFS_DIR_VERIFIER_SIZE];
>         __u64 dir_cookie;
> @@ -110,6 +112,7 @@ struct nfs_open_dir_context {
>         unsigned int dtsize;
>         signed char duped;
>         bool eof;
> +       struct rcu_head rcu_head;
>  };
>  
>  /*
> @@ -274,13 +277,11 @@ struct nfs4_copy_state {
>  /*
>   * Bit offsets in flags field
>   */
> -#define NFS_INO_ADVISE_RDPLUS  (0)             /* advise readdirplus
> */
>  #define NFS_INO_STALE          (1)             /* possible stale
> inode */
>  #define NFS_INO_ACL_LRU_SET    (2)             /* Inode is on the
> LRU list */
>  #define NFS_INO_INVALIDATING   (3)             /* inode is being
> invalidated */
>  #define NFS_INO_PRESERVE_UNLINKED (4)          /* preserve file if
> removed while open */
>  #define NFS_INO_FSCACHE                (5)             /* inode can
> be cached by FS-Cache */
> -#define NFS_INO_FORCE_READDIR  (7)             /* force readdirplus
> */
>  #define NFS_INO_LAYOUTCOMMIT   (9)             /* layoutcommit
> required */
>  #define NFS_INO_LAYOUTCOMMITTING (10)          /* layoutcommit
> inflight */
>  #define NFS_INO_LAYOUTSTATS    (11)            /* layoutstats
> inflight */

-- 
Trond Myklebust
Linux NFS client maintainer, Hammerspace
trond.myklebust@hammerspace.com



^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v4 5/5] NFS: Don't ask for readdirplus if files are being written to
  2022-02-17 22:33         ` [PATCH v4 5/5] NFS: Don't ask for readdirplus if files are being written to trondmy
@ 2022-02-18 11:40           ` Trond Myklebust
  0 siblings, 0 replies; 8+ messages in thread
From: Trond Myklebust @ 2022-02-18 11:40 UTC (permalink / raw)
  To: linux-nfs

On Thu, 2022-02-17 at 17:33 -0500, trondmy@kernel.org wrote:
> From: Trond Myklebust <trond.myklebust@hammerspace.com>
> 
> If a file is being written to, then readdirplus isn't going to help
> with
> retrieving attributes, since we will have to flush out writes anyway
> in
> order to sync the mtime/ctime.
> 
> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
> ---
>  fs/nfs/inode.c | 13 ++++++++-----
>  1 file changed, 8 insertions(+), 5 deletions(-)
> 
> diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
> index 1bef81f5373a..00500c369c5f 100644
> --- a/fs/nfs/inode.c
> +++ b/fs/nfs/inode.c
> @@ -837,6 +837,7 @@ int nfs_getattr(struct user_namespace
> *mnt_userns, const struct path *path,
>         int err = 0;
>         bool force_sync = query_flags & AT_STATX_FORCE_SYNC;
>         bool do_update = false;
> +       bool record_cache = !nfs_have_writebacks(inode);

There is a second case where readdirplus won't help stat() performance:
if the user has specified 'noac' or has otherwise set values for the
acdirmax/acregmax that are too low, then caching breaks down.

Also fixed in the version committed to 'testing'.

>  
>         trace_nfs_getattr_enter(inode);
>  
> @@ -845,7 +846,8 @@ int nfs_getattr(struct user_namespace
> *mnt_userns, const struct path *path,
>                         STATX_INO | STATX_SIZE | STATX_BLOCKS;
>  
>         if ((query_flags & AT_STATX_DONT_SYNC) && !force_sync) {
> -               nfs_readdirplus_parent_cache_hit(path->dentry);
> +               if (record_cache)
> +                       nfs_readdirplus_parent_cache_hit(path-
> >dentry);
>                 goto out_no_revalidate;
>         }
>  
> @@ -894,17 +896,18 @@ int nfs_getattr(struct user_namespace
> *mnt_userns, const struct path *path,
>         if (request_mask & STATX_BLOCKS)
>                 do_update |= cache_validity & NFS_INO_INVALID_BLOCKS;
>  
> -       if (do_update) {
> +       if (record_cache) {
>                 /* Update the attribute cache */
> -               if (!(server->flags & NFS_MOUNT_NOAC))
> +               if (do_update && !(server->flags & NFS_MOUNT_NOAC))
>                         nfs_readdirplus_parent_cache_miss(path-
> >dentry);
>                 else
>                         nfs_readdirplus_parent_cache_hit(path-
> >dentry);
> +       }
> +       if (do_update) {
>                 err = __nfs_revalidate_inode(server, inode);
>                 if (err)
>                         goto out;
> -       } else
> -               nfs_readdirplus_parent_cache_hit(path->dentry);
> +       }
>  out_no_revalidate:
>         /* Only return attributes that were revalidated. */
>         stat->result_mask = nfs_get_valid_attrmask(inode) |
> request_mask;

-- 
Trond Myklebust
Linux NFS client maintainer, Hammerspace
trond.myklebust@hammerspace.com



^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2022-02-18 11:40 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-02-17 22:33 [PATCH v4 0/5] Readdir improvements trondmy
2022-02-17 22:33 ` [PATCH v4 1/5] NFS: Adjust the amount of readahead performed by NFS readdir trondmy
2022-02-17 22:33   ` [PATCH v4 2/5] NFS: Simplify nfs_readdir_xdr_to_array() trondmy
2022-02-17 22:33     ` [PATCH v4 3/5] NFS: Improve algorithm for falling back to uncached readdir trondmy
2022-02-17 22:33       ` [PATCH v4 4/5] NFS: Improve heuristic for readdirplus trondmy
2022-02-17 22:33         ` [PATCH v4 5/5] NFS: Don't ask for readdirplus if files are being written to trondmy
2022-02-18 11:40           ` Trond Myklebust
2022-02-18 11:37         ` [PATCH v4 4/5] NFS: Improve heuristic for readdirplus Trond Myklebust

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).