linux-nfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v3 00/17] Readdir enhancements
@ 2020-11-04 16:16 trondmy
  2020-11-04 16:16 ` [PATCH v3 01/17] NFS: Ensure contents of struct nfs_open_dir_context are consistent trondmy
  2020-11-07 12:49 ` [PATCH v3 00/17] Readdir enhancements Benjamin Coddington
  0 siblings, 2 replies; 28+ messages in thread
From: trondmy @ 2020-11-04 16:16 UTC (permalink / raw)
  To: linux-nfs

From: Trond Myklebust <trond.myklebust@hammerspace.com>

The following patch series performs a number of cleanups on the readdir
code.
It also adds support for 1MB readdir RPC calls on-the-wire, and modifies
the caching code to ensure that we cache the entire contents of that
1MB call (instead of discarding the data that doesn't fit into a single
page).

v2: Fix the handling of the NFSv3/v4 directory verifier
v3: Optimise searching when the readdir cookies are seen to be ordered

Trond Myklebust (17):
  NFS: Ensure contents of struct nfs_open_dir_context are consistent
  NFS: Clean up readdir struct nfs_cache_array
  NFS: Clean up nfs_readdir_page_filler()
  NFS: Clean up directory array handling
  NFS: Don't discard readdir results
  NFS: Remove unnecessary kmap in nfs_readdir_xdr_to_array()
  NFS: Replace kmap() with kmap_atomic() in nfs_readdir_search_array()
  NFS: Simplify struct nfs_cache_array_entry
  NFS: Support larger readdir buffers
  NFS: More readdir cleanups
  NFS: nfs_do_filldir() does not return a value
  NFS: Reduce readdir stack usage
  NFS: Cleanup to remove nfs_readdir_descriptor_t typedef
  NFS: Allow the NFS generic code to pass in a verifier to readdir
  NFS: Handle NFS4ERR_NOT_SAME and NFSERR_BADCOOKIE from readdir calls
  NFS: Improve handling of directory verifiers
  NFS: Optimisations for monotonically increasing readdir cookies

 fs/nfs/client.c         |   4 +-
 fs/nfs/dir.c            | 629 +++++++++++++++++++++++++---------------
 fs/nfs/inode.c          |   7 -
 fs/nfs/internal.h       |   6 -
 fs/nfs/nfs3proc.c       |  35 ++-
 fs/nfs/nfs4proc.c       |  40 +--
 fs/nfs/proc.c           |  18 +-
 include/linux/nfs_fs.h  |   9 +-
 include/linux/nfs_xdr.h |  17 +-
 9 files changed, 459 insertions(+), 306 deletions(-)

-- 
2.28.0


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH v3 01/17] NFS: Ensure contents of struct nfs_open_dir_context are consistent
  2020-11-04 16:16 [PATCH v3 00/17] Readdir enhancements trondmy
@ 2020-11-04 16:16 ` trondmy
  2020-11-04 16:16   ` [PATCH v3 02/17] NFS: Clean up readdir struct nfs_cache_array trondmy
  2020-11-07 12:49 ` [PATCH v3 00/17] Readdir enhancements Benjamin Coddington
  1 sibling, 1 reply; 28+ messages in thread
From: trondmy @ 2020-11-04 16:16 UTC (permalink / raw)
  To: linux-nfs

From: Trond Myklebust <trond.myklebust@hammerspace.com>

Ensure that the contents of struct nfs_open_dir_context are consistent
by setting them under the file->f_lock from a private copy (that is
known to be consistent).

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/dir.c | 72 +++++++++++++++++++++++++++++++---------------------
 1 file changed, 43 insertions(+), 29 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 4e011adaf967..67d8595cd6e5 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -144,20 +144,23 @@ struct nfs_cache_array {
 	struct nfs_cache_array_entry array[];
 };
 
-typedef struct {
+typedef struct nfs_readdir_descriptor {
 	struct file	*file;
 	struct page	*page;
 	struct dir_context *ctx;
 	unsigned long	page_index;
-	u64		*dir_cookie;
+	u64		dir_cookie;
 	u64		last_cookie;
+	u64		dup_cookie;
 	loff_t		current_index;
 	loff_t		prev_index;
 
 	unsigned long	dir_verifier;
 	unsigned long	timestamp;
 	unsigned long	gencount;
+	unsigned long	attr_gencount;
 	unsigned int	cache_entry_index;
+	signed char duped;
 	bool plus;
 	bool eof;
 } nfs_readdir_descriptor_t;
@@ -273,7 +276,7 @@ int nfs_readdir_search_for_pos(struct nfs_cache_array *array, nfs_readdir_descri
 	}
 
 	index = (unsigned int)diff;
-	*desc->dir_cookie = array->array[index].cookie;
+	desc->dir_cookie = array->array[index].cookie;
 	desc->cache_entry_index = index;
 	return 0;
 out_eof:
@@ -298,33 +301,32 @@ int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_des
 	int status = -EAGAIN;
 
 	for (i = 0; i < array->size; i++) {
-		if (array->array[i].cookie == *desc->dir_cookie) {
+		if (array->array[i].cookie == desc->dir_cookie) {
 			struct nfs_inode *nfsi = NFS_I(file_inode(desc->file));
-			struct nfs_open_dir_context *ctx = desc->file->private_data;
 
 			new_pos = desc->current_index + i;
-			if (ctx->attr_gencount != nfsi->attr_gencount ||
+			if (desc->attr_gencount != nfsi->attr_gencount ||
 			    !nfs_readdir_inode_mapping_valid(nfsi)) {
-				ctx->duped = 0;
-				ctx->attr_gencount = nfsi->attr_gencount;
+				desc->duped = 0;
+				desc->attr_gencount = nfsi->attr_gencount;
 			} else if (new_pos < desc->prev_index) {
-				if (ctx->duped > 0
-				    && ctx->dup_cookie == *desc->dir_cookie) {
+				if (desc->duped > 0
+				    && desc->dup_cookie == desc->dir_cookie) {
 					if (printk_ratelimit()) {
 						pr_notice("NFS: directory %pD2 contains a readdir loop."
 								"Please contact your server vendor.  "
 								"The file: %.*s has duplicate cookie %llu\n",
 								desc->file, array->array[i].string.len,
-								array->array[i].string.name, *desc->dir_cookie);
+								array->array[i].string.name, desc->dir_cookie);
 					}
 					status = -ELOOP;
 					goto out;
 				}
-				ctx->dup_cookie = *desc->dir_cookie;
-				ctx->duped = -1;
+				desc->dup_cookie = desc->dir_cookie;
+				desc->duped = -1;
 			}
 			if (nfs_readdir_use_cookie(desc->file))
-				desc->ctx->pos = *desc->dir_cookie;
+				desc->ctx->pos = desc->dir_cookie;
 			else
 				desc->ctx->pos = new_pos;
 			desc->prev_index = new_pos;
@@ -334,7 +336,7 @@ int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_des
 	}
 	if (array->eof_index >= 0) {
 		status = -EBADCOOKIE;
-		if (*desc->dir_cookie == array->last_cookie)
+		if (desc->dir_cookie == array->last_cookie)
 			desc->eof = true;
 	}
 out:
@@ -349,7 +351,7 @@ int nfs_readdir_search_array(nfs_readdir_descriptor_t *desc)
 
 	array = kmap(desc->page);
 
-	if (*desc->dir_cookie == 0)
+	if (desc->dir_cookie == 0)
 		status = nfs_readdir_search_for_pos(array, desc);
 	else
 		status = nfs_readdir_search_for_cookie(array, desc);
@@ -801,7 +803,6 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc)
 	int i = 0;
 	int res = 0;
 	struct nfs_cache_array *array = NULL;
-	struct nfs_open_dir_context *ctx = file->private_data;
 
 	array = kmap(desc->page);
 	for (i = desc->cache_entry_index; i < array->size; i++) {
@@ -814,22 +815,22 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc)
 			break;
 		}
 		if (i < (array->size-1))
-			*desc->dir_cookie = array->array[i+1].cookie;
+			desc->dir_cookie = array->array[i+1].cookie;
 		else
-			*desc->dir_cookie = array->last_cookie;
+			desc->dir_cookie = array->last_cookie;
 		if (nfs_readdir_use_cookie(file))
-			desc->ctx->pos = *desc->dir_cookie;
+			desc->ctx->pos = desc->dir_cookie;
 		else
 			desc->ctx->pos++;
-		if (ctx->duped != 0)
-			ctx->duped = 1;
+		if (desc->duped != 0)
+			desc->duped = 1;
 	}
 	if (array->eof_index >= 0)
 		desc->eof = true;
 
 	kunmap(desc->page);
 	dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n",
-			(unsigned long long)*desc->dir_cookie, res);
+			(unsigned long long)desc->dir_cookie, res);
 	return res;
 }
 
@@ -851,10 +852,9 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc)
 	struct page	*page = NULL;
 	int		status;
 	struct inode *inode = file_inode(desc->file);
-	struct nfs_open_dir_context *ctx = desc->file->private_data;
 
 	dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n",
-			(unsigned long long)*desc->dir_cookie);
+			(unsigned long long)desc->dir_cookie);
 
 	page = alloc_page(GFP_HIGHUSER);
 	if (!page) {
@@ -863,9 +863,9 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc)
 	}
 
 	desc->page_index = 0;
-	desc->last_cookie = *desc->dir_cookie;
+	desc->last_cookie = desc->dir_cookie;
 	desc->page = page;
-	ctx->duped = 0;
+	desc->duped = 0;
 
 	status = nfs_readdir_xdr_to_array(desc, page, inode);
 	if (status < 0)
@@ -894,7 +894,6 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
 	nfs_readdir_descriptor_t my_desc = {
 		.file = file,
 		.ctx = ctx,
-		.dir_cookie = &dir_ctx->dir_cookie,
 		.plus = nfs_use_readdirplus(inode, ctx),
 	},
 			*desc = &my_desc;
@@ -915,13 +914,20 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
 	if (res < 0)
 		goto out;
 
+	spin_lock(&file->f_lock);
+	desc->dir_cookie = dir_ctx->dir_cookie;
+	desc->dup_cookie = dir_ctx->dup_cookie;
+	desc->duped = dir_ctx->duped;
+	desc->attr_gencount = dir_ctx->attr_gencount;
+	spin_unlock(&file->f_lock);
+
 	do {
 		res = readdir_search_pagecache(desc);
 
 		if (res == -EBADCOOKIE) {
 			res = 0;
 			/* This means either end of directory */
-			if (*desc->dir_cookie && !desc->eof) {
+			if (desc->dir_cookie && !desc->eof) {
 				/* Or that the server has 'lost' a cookie */
 				res = uncached_readdir(desc);
 				if (res == 0)
@@ -946,6 +952,14 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
 		if (res < 0)
 			break;
 	} while (!desc->eof);
+
+	spin_lock(&file->f_lock);
+	dir_ctx->dir_cookie = desc->dir_cookie;
+	dir_ctx->dup_cookie = desc->dup_cookie;
+	dir_ctx->duped = desc->duped;
+	dir_ctx->attr_gencount = desc->attr_gencount;
+	spin_unlock(&file->f_lock);
+
 out:
 	if (res > 0)
 		res = 0;
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH v3 02/17] NFS: Clean up readdir struct nfs_cache_array
  2020-11-04 16:16 ` [PATCH v3 01/17] NFS: Ensure contents of struct nfs_open_dir_context are consistent trondmy
@ 2020-11-04 16:16   ` trondmy
  2020-11-04 16:16     ` [PATCH v3 03/17] NFS: Clean up nfs_readdir_page_filler() trondmy
  0 siblings, 1 reply; 28+ messages in thread
From: trondmy @ 2020-11-04 16:16 UTC (permalink / raw)
  To: linux-nfs

From: Trond Myklebust <trond.myklebust@hammerspace.com>

Since the 'eof_index' is only ever used as a flag, make it so.
Also add a flag to detect if the page has been completely filled.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/dir.c | 66 ++++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 49 insertions(+), 17 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 67d8595cd6e5..604ebe015387 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -138,9 +138,10 @@ struct nfs_cache_array_entry {
 };
 
 struct nfs_cache_array {
-	int size;
-	int eof_index;
 	u64 last_cookie;
+	unsigned int size;
+	unsigned char page_full : 1,
+		      page_is_eof : 1;
 	struct nfs_cache_array_entry array[];
 };
 
@@ -172,7 +173,6 @@ void nfs_readdir_init_array(struct page *page)
 
 	array = kmap_atomic(page);
 	memset(array, 0, sizeof(struct nfs_cache_array));
-	array->eof_index = -1;
 	kunmap_atomic(array);
 }
 
@@ -192,6 +192,17 @@ void nfs_readdir_clear_array(struct page *page)
 	kunmap_atomic(array);
 }
 
+static void nfs_readdir_array_set_eof(struct nfs_cache_array *array)
+{
+	array->page_is_eof = 1;
+	array->page_full = 1;
+}
+
+static bool nfs_readdir_array_is_full(struct nfs_cache_array *array)
+{
+	return array->page_full;
+}
+
 /*
  * the caller is responsible for freeing qstr.name
  * when called by nfs_readdir_add_to_array, the strings will be freed in
@@ -213,6 +224,23 @@ int nfs_readdir_make_qstr(struct qstr *string, const char *name, unsigned int le
 	return 0;
 }
 
+/*
+ * Check that the next array entry lies entirely within the page bounds
+ */
+static int nfs_readdir_array_can_expand(struct nfs_cache_array *array)
+{
+	struct nfs_cache_array_entry *cache_entry;
+
+	if (array->page_full)
+		return -ENOSPC;
+	cache_entry = &array->array[array->size + 1];
+	if ((char *)cache_entry - (char *)array > PAGE_SIZE) {
+		array->page_full = 1;
+		return -ENOSPC;
+	}
+	return 0;
+}
+
 static
 int nfs_readdir_add_to_array(struct nfs_entry *entry, struct page *page)
 {
@@ -220,13 +248,11 @@ int nfs_readdir_add_to_array(struct nfs_entry *entry, struct page *page)
 	struct nfs_cache_array_entry *cache_entry;
 	int ret;
 
-	cache_entry = &array->array[array->size];
-
-	/* Check that this entry lies within the page bounds */
-	ret = -ENOSPC;
-	if ((char *)&cache_entry[1] - (char *)page_address(page) > PAGE_SIZE)
+	ret = nfs_readdir_array_can_expand(array);
+	if (ret)
 		goto out;
 
+	cache_entry = &array->array[array->size];
 	cache_entry->cookie = entry->prev_cookie;
 	cache_entry->ino = entry->ino;
 	cache_entry->d_type = entry->d_type;
@@ -236,12 +262,21 @@ int nfs_readdir_add_to_array(struct nfs_entry *entry, struct page *page)
 	array->last_cookie = entry->cookie;
 	array->size++;
 	if (entry->eof != 0)
-		array->eof_index = array->size;
+		nfs_readdir_array_set_eof(array);
 out:
 	kunmap(page);
 	return ret;
 }
 
+static void nfs_readdir_page_set_eof(struct page *page)
+{
+	struct nfs_cache_array *array;
+
+	array = kmap_atomic(page);
+	nfs_readdir_array_set_eof(array);
+	kunmap_atomic(array);
+}
+
 static inline
 int is_32bit_api(void)
 {
@@ -270,7 +305,7 @@ int nfs_readdir_search_for_pos(struct nfs_cache_array *array, nfs_readdir_descri
 	if (diff < 0)
 		goto out_eof;
 	if (diff >= array->size) {
-		if (array->eof_index >= 0)
+		if (array->page_is_eof)
 			goto out_eof;
 		return -EAGAIN;
 	}
@@ -334,7 +369,7 @@ int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_des
 			return 0;
 		}
 	}
-	if (array->eof_index >= 0) {
+	if (array->page_is_eof) {
 		status = -EBADCOOKIE;
 		if (desc->dir_cookie == array->last_cookie)
 			desc->eof = true;
@@ -566,7 +601,6 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
 	struct xdr_stream stream;
 	struct xdr_buf buf;
 	struct page *scratch;
-	struct nfs_cache_array *array;
 	unsigned int count = 0;
 	int status;
 
@@ -604,10 +638,8 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
 
 out_nopages:
 	if (count == 0 || (status == -EBADCOOKIE && entry->eof != 0)) {
-		array = kmap(page);
-		array->eof_index = array->size;
+		nfs_readdir_page_set_eof(page);
 		status = 0;
-		kunmap(page);
 	}
 
 	put_page(scratch);
@@ -689,7 +721,7 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
 				status = 0;
 			break;
 		}
-	} while (array->eof_index < 0);
+	} while (!nfs_readdir_array_is_full(array));
 
 	nfs_readdir_free_pages(pages, array_size);
 out_release_array:
@@ -825,7 +857,7 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc)
 		if (desc->duped != 0)
 			desc->duped = 1;
 	}
-	if (array->eof_index >= 0)
+	if (array->page_is_eof)
 		desc->eof = true;
 
 	kunmap(desc->page);
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH v3 03/17] NFS: Clean up nfs_readdir_page_filler()
  2020-11-04 16:16   ` [PATCH v3 02/17] NFS: Clean up readdir struct nfs_cache_array trondmy
@ 2020-11-04 16:16     ` trondmy
  2020-11-04 16:16       ` [PATCH v3 04/17] NFS: Clean up directory array handling trondmy
  0 siblings, 1 reply; 28+ messages in thread
From: trondmy @ 2020-11-04 16:16 UTC (permalink / raw)
  To: linux-nfs

From: Trond Myklebust <trond.myklebust@hammerspace.com>

Clean up handling of the case where there are no entries in the readdir
reply.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/dir.c | 39 ++++++++++++++++++---------------------
 1 file changed, 18 insertions(+), 21 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 604ebe015387..68acbde3f914 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -601,16 +601,12 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
 	struct xdr_stream stream;
 	struct xdr_buf buf;
 	struct page *scratch;
-	unsigned int count = 0;
 	int status;
 
 	scratch = alloc_page(GFP_KERNEL);
 	if (scratch == NULL)
 		return -ENOMEM;
 
-	if (buflen == 0)
-		goto out_nopages;
-
 	xdr_init_decode_pages(&stream, &buf, xdr_pages, buflen);
 	xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
 
@@ -619,27 +615,27 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
 			entry->label->len = NFS4_MAXLABELLEN;
 
 		status = xdr_decode(desc, entry, &stream);
-		if (status != 0) {
-			if (status == -EAGAIN)
-				status = 0;
+		if (status != 0)
 			break;
-		}
-
-		count++;
 
 		if (desc->plus)
 			nfs_prime_dcache(file_dentry(desc->file), entry,
 					desc->dir_verifier);
 
 		status = nfs_readdir_add_to_array(entry, page);
-		if (status != 0)
-			break;
-	} while (!entry->eof);
+	} while (!status && !entry->eof);
 
-out_nopages:
-	if (count == 0 || (status == -EBADCOOKIE && entry->eof != 0)) {
-		nfs_readdir_page_set_eof(page);
+	switch (status) {
+	case -EBADCOOKIE:
+		if (entry->eof) {
+			nfs_readdir_page_set_eof(page);
+			status = 0;
+		}
+		break;
+	case -ENOSPC:
+	case -EAGAIN:
 		status = 0;
+		break;
 	}
 
 	put_page(scratch);
@@ -714,14 +710,15 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
 
 		if (status < 0)
 			break;
+
 		pglen = status;
-		status = nfs_readdir_page_filler(desc, &entry, pages, page, pglen);
-		if (status < 0) {
-			if (status == -ENOSPC)
-				status = 0;
+		if (pglen == 0) {
+			nfs_readdir_page_set_eof(page);
 			break;
 		}
-	} while (!nfs_readdir_array_is_full(array));
+
+		status = nfs_readdir_page_filler(desc, &entry, pages, page, pglen);
+	} while (!status && !nfs_readdir_array_is_full(array));
 
 	nfs_readdir_free_pages(pages, array_size);
 out_release_array:
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH v3 04/17] NFS: Clean up directory array handling
  2020-11-04 16:16     ` [PATCH v3 03/17] NFS: Clean up nfs_readdir_page_filler() trondmy
@ 2020-11-04 16:16       ` trondmy
  2020-11-04 16:16         ` [PATCH v3 05/17] NFS: Don't discard readdir results trondmy
  0 siblings, 1 reply; 28+ messages in thread
From: trondmy @ 2020-11-04 16:16 UTC (permalink / raw)
  To: linux-nfs

From: Trond Myklebust <trond.myklebust@hammerspace.com>

Refactor to use pagecache_get_page() so that we can fill the page
in multiple stages.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/dir.c | 138 ++++++++++++++++++++++++++++-----------------------
 1 file changed, 77 insertions(+), 61 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 68acbde3f914..842f69120a01 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -149,7 +149,7 @@ typedef struct nfs_readdir_descriptor {
 	struct file	*file;
 	struct page	*page;
 	struct dir_context *ctx;
-	unsigned long	page_index;
+	pgoff_t		page_index;
 	u64		dir_cookie;
 	u64		last_cookie;
 	u64		dup_cookie;
@@ -166,13 +166,18 @@ typedef struct nfs_readdir_descriptor {
 	bool eof;
 } nfs_readdir_descriptor_t;
 
-static
-void nfs_readdir_init_array(struct page *page)
+static void nfs_readdir_array_init(struct nfs_cache_array *array)
+{
+	memset(array, 0, sizeof(struct nfs_cache_array));
+}
+
+static void nfs_readdir_page_init_array(struct page *page, u64 last_cookie)
 {
 	struct nfs_cache_array *array;
 
 	array = kmap_atomic(page);
-	memset(array, 0, sizeof(struct nfs_cache_array));
+	nfs_readdir_array_init(array);
+	array->last_cookie = last_cookie;
 	kunmap_atomic(array);
 }
 
@@ -188,7 +193,7 @@ void nfs_readdir_clear_array(struct page *page)
 	array = kmap_atomic(page);
 	for (i = 0; i < array->size; i++)
 		kfree(array->array[i].string.name);
-	array->size = 0;
+	nfs_readdir_array_init(array);
 	kunmap_atomic(array);
 }
 
@@ -268,6 +273,44 @@ int nfs_readdir_add_to_array(struct nfs_entry *entry, struct page *page)
 	return ret;
 }
 
+static struct page *nfs_readdir_page_get_locked(struct address_space *mapping,
+						pgoff_t index, u64 last_cookie)
+{
+	struct page *page;
+
+	page = grab_cache_page(mapping, index);
+	if (page && !PageUptodate(page)) {
+		nfs_readdir_page_init_array(page, last_cookie);
+		if (invalidate_inode_pages2_range(mapping, index + 1, -1) < 0)
+			nfs_zap_mapping(mapping->host, mapping);
+		SetPageUptodate(page);
+	}
+
+	return page;
+}
+
+static u64 nfs_readdir_page_last_cookie(struct page *page)
+{
+	struct nfs_cache_array *array;
+	u64 ret;
+
+	array = kmap_atomic(page);
+	ret = array->last_cookie;
+	kunmap_atomic(array);
+	return ret;
+}
+
+static bool nfs_readdir_page_needs_filling(struct page *page)
+{
+	struct nfs_cache_array *array;
+	bool ret;
+
+	array = kmap_atomic(page);
+	ret = !nfs_readdir_array_is_full(array);
+	kunmap_atomic(array);
+	return ret;
+}
+
 static void nfs_readdir_page_set_eof(struct page *page)
 {
 	struct nfs_cache_array *array;
@@ -682,10 +725,8 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
 	int status = -ENOMEM;
 	unsigned int array_size = ARRAY_SIZE(pages);
 
-	nfs_readdir_init_array(page);
-
 	entry.prev_cookie = 0;
-	entry.cookie = desc->last_cookie;
+	entry.cookie = nfs_readdir_page_last_cookie(page);
 	entry.eof = 0;
 	entry.fh = nfs_alloc_fhandle();
 	entry.fattr = nfs_alloc_fattr();
@@ -730,48 +771,25 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
 	return status;
 }
 
-/*
- * Now we cache directories properly, by converting xdr information
- * to an array that can be used for lookups later.  This results in
- * fewer cache pages, since we can store more information on each page.
- * We only need to convert from xdr once so future lookups are much simpler
- */
-static
-int nfs_readdir_filler(void *data, struct page* page)
+static void nfs_readdir_page_put(struct nfs_readdir_descriptor *desc)
 {
-	nfs_readdir_descriptor_t *desc = data;
-	struct inode	*inode = file_inode(desc->file);
-	int ret;
-
-	ret = nfs_readdir_xdr_to_array(desc, page, inode);
-	if (ret < 0)
-		goto error;
-	SetPageUptodate(page);
-
-	if (invalidate_inode_pages2_range(inode->i_mapping, page->index + 1, -1) < 0) {
-		/* Should never happen */
-		nfs_zap_mapping(inode, inode->i_mapping);
-	}
-	unlock_page(page);
-	return 0;
- error:
-	nfs_readdir_clear_array(page);
-	unlock_page(page);
-	return ret;
+	put_page(desc->page);
+	desc->page = NULL;
 }
 
-static
-void cache_page_release(nfs_readdir_descriptor_t *desc)
+static void
+nfs_readdir_page_unlock_and_put_cached(struct nfs_readdir_descriptor *desc)
 {
-	put_page(desc->page);
-	desc->page = NULL;
+	unlock_page(desc->page);
+	nfs_readdir_page_put(desc);
 }
 
-static
-struct page *get_cache_page(nfs_readdir_descriptor_t *desc)
+static struct page *
+nfs_readdir_page_get_cached(struct nfs_readdir_descriptor *desc)
 {
-	return read_cache_page(desc->file->f_mapping, desc->page_index,
-			nfs_readdir_filler, desc);
+	return nfs_readdir_page_get_locked(desc->file->f_mapping,
+					   desc->page_index,
+					   desc->last_cookie);
 }
 
 /*
@@ -785,23 +803,21 @@ int find_and_lock_cache_page(nfs_readdir_descriptor_t *desc)
 	struct nfs_inode *nfsi = NFS_I(inode);
 	int res;
 
-	desc->page = get_cache_page(desc);
-	if (IS_ERR(desc->page))
-		return PTR_ERR(desc->page);
-	res = lock_page_killable(desc->page);
-	if (res != 0)
-		goto error;
-	res = -EAGAIN;
-	if (desc->page->mapping != NULL) {
-		res = nfs_readdir_search_array(desc);
-		if (res == 0) {
-			nfsi->page_index = desc->page_index;
-			return 0;
-		}
+	desc->page = nfs_readdir_page_get_cached(desc);
+	if (!desc->page)
+		return -ENOMEM;
+	if (nfs_readdir_page_needs_filling(desc->page)) {
+		res = nfs_readdir_xdr_to_array(desc, desc->page, inode);
+		if (res < 0)
+			goto error;
+	}
+	res = nfs_readdir_search_array(desc);
+	if (res == 0) {
+		nfsi->page_index = desc->page_index;
+		return 0;
 	}
-	unlock_page(desc->page);
 error:
-	cache_page_release(desc);
+	nfs_readdir_page_unlock_and_put_cached(desc);
 	return res;
 }
 
@@ -896,6 +912,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc)
 	desc->page = page;
 	desc->duped = 0;
 
+	nfs_readdir_page_init_array(page, desc->dir_cookie);
 	status = nfs_readdir_xdr_to_array(desc, page, inode);
 	if (status < 0)
 		goto out_release;
@@ -904,7 +921,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc)
 
  out_release:
 	nfs_readdir_clear_array(desc->page);
-	cache_page_release(desc);
+	nfs_readdir_page_put(desc);
  out:
 	dfprintk(DIRCACHE, "NFS: %s: returns %d\n",
 			__func__, status);
@@ -976,8 +993,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
 			break;
 
 		res = nfs_do_filldir(desc);
-		unlock_page(desc->page);
-		cache_page_release(desc);
+		nfs_readdir_page_unlock_and_put_cached(desc);
 		if (res < 0)
 			break;
 	} while (!desc->eof);
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH v3 05/17] NFS: Don't discard readdir results
  2020-11-04 16:16       ` [PATCH v3 04/17] NFS: Clean up directory array handling trondmy
@ 2020-11-04 16:16         ` trondmy
  2020-11-04 16:16           ` [PATCH v3 06/17] NFS: Remove unnecessary kmap in nfs_readdir_xdr_to_array() trondmy
  2020-11-06 13:30           ` [PATCH v3 05/17] NFS: Don't discard readdir results David Wysochanski
  0 siblings, 2 replies; 28+ messages in thread
From: trondmy @ 2020-11-04 16:16 UTC (permalink / raw)
  To: linux-nfs

From: Trond Myklebust <trond.myklebust@hammerspace.com>

If a readdir call returns more data than we can fit into one page
cache page, then allocate a new one for that data rather than
discarding the data.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/dir.c | 46 ++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 42 insertions(+), 4 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 842f69120a01..f7248145c333 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -320,6 +320,26 @@ static void nfs_readdir_page_set_eof(struct page *page)
 	kunmap_atomic(array);
 }
 
+static void nfs_readdir_page_unlock_and_put(struct page *page)
+{
+	unlock_page(page);
+	put_page(page);
+}
+
+static struct page *nfs_readdir_page_get_next(struct address_space *mapping,
+					      pgoff_t index, u64 cookie)
+{
+	struct page *page;
+
+	page = nfs_readdir_page_get_locked(mapping, index, cookie);
+	if (page) {
+		if (nfs_readdir_page_last_cookie(page) == cookie)
+			return page;
+		nfs_readdir_page_unlock_and_put(page);
+	}
+	return NULL;
+}
+
 static inline
 int is_32bit_api(void)
 {
@@ -637,13 +657,15 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry,
 }
 
 /* Perform conversion from xdr to cache array */
-static
-int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry,
-				struct page **xdr_pages, struct page *page, unsigned int buflen)
+static int nfs_readdir_page_filler(struct nfs_readdir_descriptor *desc,
+				   struct nfs_entry *entry,
+				   struct page **xdr_pages,
+				   struct page *fillme, unsigned int buflen)
 {
+	struct address_space *mapping = desc->file->f_mapping;
 	struct xdr_stream stream;
 	struct xdr_buf buf;
-	struct page *scratch;
+	struct page *scratch, *new, *page = fillme;
 	int status;
 
 	scratch = alloc_page(GFP_KERNEL);
@@ -666,6 +688,19 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
 					desc->dir_verifier);
 
 		status = nfs_readdir_add_to_array(entry, page);
+		if (status != -ENOSPC)
+			continue;
+
+		if (page->mapping != mapping)
+			break;
+		new = nfs_readdir_page_get_next(mapping, page->index + 1,
+						entry->prev_cookie);
+		if (!new)
+			break;
+		if (page != fillme)
+			nfs_readdir_page_unlock_and_put(page);
+		page = new;
+		status = nfs_readdir_add_to_array(entry, page);
 	} while (!status && !entry->eof);
 
 	switch (status) {
@@ -681,6 +716,9 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
 		break;
 	}
 
+	if (page != fillme)
+		nfs_readdir_page_unlock_and_put(page);
+
 	put_page(scratch);
 	return status;
 }
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH v3 06/17] NFS: Remove unnecessary kmap in nfs_readdir_xdr_to_array()
  2020-11-04 16:16         ` [PATCH v3 05/17] NFS: Don't discard readdir results trondmy
@ 2020-11-04 16:16           ` trondmy
  2020-11-04 16:16             ` [PATCH v3 07/17] NFS: Replace kmap() with kmap_atomic() in nfs_readdir_search_array() trondmy
  2020-11-06 13:30           ` [PATCH v3 05/17] NFS: Don't discard readdir results David Wysochanski
  1 sibling, 1 reply; 28+ messages in thread
From: trondmy @ 2020-11-04 16:16 UTC (permalink / raw)
  To: linux-nfs

From: Trond Myklebust <trond.myklebust@hammerspace.com>

The kmapped pointer is only used once per loop to check if we need to
exit.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/dir.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index f7248145c333..e8b0fcc1bc9e 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -759,7 +759,6 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
 	struct page *pages[NFS_MAX_READDIR_PAGES];
 	struct nfs_entry entry;
 	struct file	*file = desc->file;
-	struct nfs_cache_array *array;
 	int status = -ENOMEM;
 	unsigned int array_size = ARRAY_SIZE(pages);
 
@@ -778,11 +777,9 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
 		goto out;
 	}
 
-	array = kmap(page);
-
 	status = nfs_readdir_alloc_pages(pages, array_size);
 	if (status < 0)
-		goto out_release_array;
+		goto out_release_label;
 	do {
 		unsigned int pglen;
 		status = nfs_readdir_xdr_filler(pages, desc, &entry, file, inode);
@@ -797,11 +794,10 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
 		}
 
 		status = nfs_readdir_page_filler(desc, &entry, pages, page, pglen);
-	} while (!status && !nfs_readdir_array_is_full(array));
+	} while (!status && nfs_readdir_page_needs_filling(page));
 
 	nfs_readdir_free_pages(pages, array_size);
-out_release_array:
-	kunmap(page);
+out_release_label:
 	nfs4_label_free(entry.label);
 out:
 	nfs_free_fattr(entry.fattr);
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH v3 07/17] NFS: Replace kmap() with kmap_atomic() in nfs_readdir_search_array()
  2020-11-04 16:16           ` [PATCH v3 06/17] NFS: Remove unnecessary kmap in nfs_readdir_xdr_to_array() trondmy
@ 2020-11-04 16:16             ` trondmy
  2020-11-04 16:16               ` [PATCH v3 08/17] NFS: Simplify struct nfs_cache_array_entry trondmy
  0 siblings, 1 reply; 28+ messages in thread
From: trondmy @ 2020-11-04 16:16 UTC (permalink / raw)
  To: linux-nfs

From: Trond Myklebust <trond.myklebust@hammerspace.com>

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/dir.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index e8b0fcc1bc9e..b9001123ec84 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -447,7 +447,7 @@ int nfs_readdir_search_array(nfs_readdir_descriptor_t *desc)
 	struct nfs_cache_array *array;
 	int status;
 
-	array = kmap(desc->page);
+	array = kmap_atomic(desc->page);
 
 	if (desc->dir_cookie == 0)
 		status = nfs_readdir_search_for_pos(array, desc);
@@ -459,7 +459,7 @@ int nfs_readdir_search_array(nfs_readdir_descriptor_t *desc)
 		desc->current_index += array->size;
 		desc->page_index++;
 	}
-	kunmap(desc->page);
+	kunmap_atomic(array);
 	return status;
 }
 
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH v3 08/17] NFS: Simplify struct nfs_cache_array_entry
  2020-11-04 16:16             ` [PATCH v3 07/17] NFS: Replace kmap() with kmap_atomic() in nfs_readdir_search_array() trondmy
@ 2020-11-04 16:16               ` trondmy
  2020-11-04 16:16                 ` [PATCH v3 09/17] NFS: Support larger readdir buffers trondmy
  0 siblings, 1 reply; 28+ messages in thread
From: trondmy @ 2020-11-04 16:16 UTC (permalink / raw)
  To: linux-nfs

From: Trond Myklebust <trond.myklebust@hammerspace.com>

We don't need to store a hash, so replace struct qstr with a simple
const char pointer and length.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/dir.c | 46 +++++++++++++++++++++++++---------------------
 1 file changed, 25 insertions(+), 21 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index b9001123ec84..be0e2891fecc 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -133,7 +133,8 @@ nfs_closedir(struct inode *inode, struct file *filp)
 struct nfs_cache_array_entry {
 	u64 cookie;
 	u64 ino;
-	struct qstr string;
+	const char *name;
+	unsigned int name_len;
 	unsigned char d_type;
 };
 
@@ -192,7 +193,7 @@ void nfs_readdir_clear_array(struct page *page)
 
 	array = kmap_atomic(page);
 	for (i = 0; i < array->size; i++)
-		kfree(array->array[i].string.name);
+		kfree(array->array[i].name);
 	nfs_readdir_array_init(array);
 	kunmap_atomic(array);
 }
@@ -213,20 +214,17 @@ static bool nfs_readdir_array_is_full(struct nfs_cache_array *array)
  * when called by nfs_readdir_add_to_array, the strings will be freed in
  * nfs_clear_readdir_array()
  */
-static
-int nfs_readdir_make_qstr(struct qstr *string, const char *name, unsigned int len)
+static const char *nfs_readdir_copy_name(const char *name, unsigned int len)
 {
-	string->len = len;
-	string->name = kmemdup_nul(name, len, GFP_KERNEL);
-	if (string->name == NULL)
-		return -ENOMEM;
+	const char *ret = kmemdup_nul(name, len, GFP_KERNEL);
+
 	/*
 	 * Avoid a kmemleak false positive. The pointer to the name is stored
 	 * in a page cache page which kmemleak does not scan.
 	 */
-	kmemleak_not_leak(string->name);
-	string->hash = full_name_hash(NULL, name, len);
-	return 0;
+	if (ret != NULL)
+		kmemleak_not_leak(ret);
+	return ret;
 }
 
 /*
@@ -249,27 +247,34 @@ static int nfs_readdir_array_can_expand(struct nfs_cache_array *array)
 static
 int nfs_readdir_add_to_array(struct nfs_entry *entry, struct page *page)
 {
-	struct nfs_cache_array *array = kmap(page);
+	struct nfs_cache_array *array;
 	struct nfs_cache_array_entry *cache_entry;
+	const char *name;
 	int ret;
 
+	name = nfs_readdir_copy_name(entry->name, entry->len);
+	if (!name)
+		return -ENOMEM;
+
+	array = kmap_atomic(page);
 	ret = nfs_readdir_array_can_expand(array);
-	if (ret)
+	if (ret) {
+		kfree(name);
 		goto out;
+	}
 
 	cache_entry = &array->array[array->size];
 	cache_entry->cookie = entry->prev_cookie;
 	cache_entry->ino = entry->ino;
 	cache_entry->d_type = entry->d_type;
-	ret = nfs_readdir_make_qstr(&cache_entry->string, entry->name, entry->len);
-	if (ret)
-		goto out;
+	cache_entry->name_len = entry->len;
+	cache_entry->name = name;
 	array->last_cookie = entry->cookie;
 	array->size++;
 	if (entry->eof != 0)
 		nfs_readdir_array_set_eof(array);
 out:
-	kunmap(page);
+	kunmap_atomic(array);
 	return ret;
 }
 
@@ -413,9 +418,8 @@ int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_des
 					if (printk_ratelimit()) {
 						pr_notice("NFS: directory %pD2 contains a readdir loop."
 								"Please contact your server vendor.  "
-								"The file: %.*s has duplicate cookie %llu\n",
-								desc->file, array->array[i].string.len,
-								array->array[i].string.name, desc->dir_cookie);
+								"The file: %s has duplicate cookie %llu\n",
+								desc->file, array->array[i].name, desc->dir_cookie);
 					}
 					status = -ELOOP;
 					goto out;
@@ -888,7 +892,7 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc)
 		struct nfs_cache_array_entry *ent;
 
 		ent = &array->array[i];
-		if (!dir_emit(desc->ctx, ent->string.name, ent->string.len,
+		if (!dir_emit(desc->ctx, ent->name, ent->name_len,
 		    nfs_compat_user_ino64(ent->ino), ent->d_type)) {
 			desc->eof = true;
 			break;
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH v3 09/17] NFS: Support larger readdir buffers
  2020-11-04 16:16               ` [PATCH v3 08/17] NFS: Simplify struct nfs_cache_array_entry trondmy
@ 2020-11-04 16:16                 ` trondmy
  2020-11-04 16:16                   ` [PATCH v3 10/17] NFS: More readdir cleanups trondmy
  0 siblings, 1 reply; 28+ messages in thread
From: trondmy @ 2020-11-04 16:16 UTC (permalink / raw)
  To: linux-nfs

From: Trond Myklebust <trond.myklebust@hammerspace.com>

Support readdir buffers of up to 1MB in size so that we can read
large directories using few RPC calls.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/client.c   |  4 ++--
 fs/nfs/dir.c      | 33 +++++++++++++++++++--------------
 fs/nfs/internal.h |  6 ------
 3 files changed, 21 insertions(+), 22 deletions(-)

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 4b8cc93913f7..f6454ba53d05 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -781,8 +781,8 @@ static void nfs_server_set_fsinfo(struct nfs_server *server,
 	server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL);
 
 	server->dtsize = nfs_block_size(fsinfo->dtpref, NULL);
-	if (server->dtsize > PAGE_SIZE * NFS_MAX_READDIR_PAGES)
-		server->dtsize = PAGE_SIZE * NFS_MAX_READDIR_PAGES;
+	if (server->dtsize > NFS_MAX_FILE_IO_SIZE)
+		server->dtsize = NFS_MAX_FILE_IO_SIZE;
 	if (server->dtsize > server->rsize)
 		server->dtsize = server->rsize;
 
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index be0e2891fecc..438906dae083 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -727,44 +727,47 @@ static int nfs_readdir_page_filler(struct nfs_readdir_descriptor *desc,
 	return status;
 }
 
-static
-void nfs_readdir_free_pages(struct page **pages, unsigned int npages)
+static void nfs_readdir_free_pages(struct page **pages, size_t npages)
 {
-	unsigned int i;
-	for (i = 0; i < npages; i++)
-		put_page(pages[i]);
+	while (npages--)
+		put_page(pages[npages]);
+	kfree(pages);
 }
 
 /*
  * nfs_readdir_alloc_pages() will allocate pages that must be freed with a call
  * to nfs_readdir_free_pages()
  */
-static
-int nfs_readdir_alloc_pages(struct page **pages, unsigned int npages)
+static struct page **nfs_readdir_alloc_pages(size_t npages)
 {
-	unsigned int i;
+	struct page **pages;
+	size_t i;
 
+	pages = kmalloc_array(npages, sizeof(*pages), GFP_KERNEL);
+	if (!pages)
+		return NULL;
 	for (i = 0; i < npages; i++) {
 		struct page *page = alloc_page(GFP_KERNEL);
 		if (page == NULL)
 			goto out_freepages;
 		pages[i] = page;
 	}
-	return 0;
+	return pages;
 
 out_freepages:
 	nfs_readdir_free_pages(pages, i);
-	return -ENOMEM;
+	return NULL;
 }
 
 static
 int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, struct inode *inode)
 {
-	struct page *pages[NFS_MAX_READDIR_PAGES];
+	struct page **pages;
 	struct nfs_entry entry;
 	struct file	*file = desc->file;
+	size_t array_size;
+	size_t dtsize = NFS_SERVER(inode)->dtsize;
 	int status = -ENOMEM;
-	unsigned int array_size = ARRAY_SIZE(pages);
 
 	entry.prev_cookie = 0;
 	entry.cookie = nfs_readdir_page_last_cookie(page);
@@ -781,9 +784,11 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
 		goto out;
 	}
 
-	status = nfs_readdir_alloc_pages(pages, array_size);
-	if (status < 0)
+	array_size = (dtsize + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	pages = nfs_readdir_alloc_pages(array_size);
+	if (!pages)
 		goto out_release_label;
+
 	do {
 		unsigned int pglen;
 		status = nfs_readdir_xdr_filler(pages, desc, &entry, file, inode);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 6673a77884d9..b840d0a91c9d 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -56,12 +56,6 @@ static inline bool nfs_lookup_is_soft_revalidate(const struct dentry *dentry)
 #define NFS_UNSPEC_RETRANS	(UINT_MAX)
 #define NFS_UNSPEC_TIMEO	(UINT_MAX)
 
-/*
- * Maximum number of pages that readdir can use for creating
- * a vmapped array of pages.
- */
-#define NFS_MAX_READDIR_PAGES 8
-
 struct nfs_client_initdata {
 	unsigned long init_flags;
 	const char *hostname;			/* Hostname of the server */
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH v3 10/17] NFS: More readdir cleanups
  2020-11-04 16:16                 ` [PATCH v3 09/17] NFS: Support larger readdir buffers trondmy
@ 2020-11-04 16:16                   ` trondmy
  2020-11-04 16:16                     ` [PATCH v3 11/17] NFS: nfs_do_filldir() does not return a value trondmy
  0 siblings, 1 reply; 28+ messages in thread
From: trondmy @ 2020-11-04 16:16 UTC (permalink / raw)
  To: linux-nfs

From: Trond Myklebust <trond.myklebust@hammerspace.com>

Remove the redundant caching of the credential in struct
nfs_open_dir_context.
Pass the buffer size as an argument to nfs_readdir_xdr_filler().

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/dir.c           | 25 +++++++++++--------------
 include/linux/nfs_fs.h |  1 -
 2 files changed, 11 insertions(+), 15 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 438906dae083..bc366bd8e8f3 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -68,7 +68,7 @@ const struct address_space_operations nfs_dir_aops = {
 	.freepage = nfs_readdir_clear_array,
 };
 
-static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir, const struct cred *cred)
+static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir)
 {
 	struct nfs_inode *nfsi = NFS_I(dir);
 	struct nfs_open_dir_context *ctx;
@@ -78,7 +78,6 @@ static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir
 		ctx->attr_gencount = nfsi->attr_gencount;
 		ctx->dir_cookie = 0;
 		ctx->dup_cookie = 0;
-		ctx->cred = get_cred(cred);
 		spin_lock(&dir->i_lock);
 		if (list_empty(&nfsi->open_files) &&
 		    (nfsi->cache_validity & NFS_INO_DATA_INVAL_DEFER))
@@ -96,7 +95,6 @@ static void put_nfs_open_dir_context(struct inode *dir, struct nfs_open_dir_cont
 	spin_lock(&dir->i_lock);
 	list_del(&ctx->list);
 	spin_unlock(&dir->i_lock);
-	put_cred(ctx->cred);
 	kfree(ctx);
 }
 
@@ -113,7 +111,7 @@ nfs_opendir(struct inode *inode, struct file *filp)
 
 	nfs_inc_stats(inode, NFSIOS_VFSOPEN);
 
-	ctx = alloc_nfs_open_dir_context(inode, current_cred());
+	ctx = alloc_nfs_open_dir_context(inode);
 	if (IS_ERR(ctx)) {
 		res = PTR_ERR(ctx);
 		goto out;
@@ -468,12 +466,12 @@ int nfs_readdir_search_array(nfs_readdir_descriptor_t *desc)
 }
 
 /* Fill a page with xdr information before transferring to the cache page */
-static
-int nfs_readdir_xdr_filler(struct page **pages, nfs_readdir_descriptor_t *desc,
-			struct nfs_entry *entry, struct file *file, struct inode *inode)
+static int nfs_readdir_xdr_filler(struct nfs_readdir_descriptor *desc,
+				  u64 cookie, struct page **pages,
+				  size_t bufsize)
 {
-	struct nfs_open_dir_context *ctx = file->private_data;
-	const struct cred *cred = ctx->cred;
+	struct file *file = desc->file;
+	struct inode *inode = file_inode(file);
 	unsigned long	timestamp, gencount;
 	int		error;
 
@@ -481,8 +479,8 @@ int nfs_readdir_xdr_filler(struct page **pages, nfs_readdir_descriptor_t *desc,
 	timestamp = jiffies;
 	gencount = nfs_inc_attr_generation_counter();
 	desc->dir_verifier = nfs_save_change_attribute(inode);
-	error = NFS_PROTO(inode)->readdir(file_dentry(file), cred, entry->cookie, pages,
-					  NFS_SERVER(inode)->dtsize, desc->plus);
+	error = NFS_PROTO(inode)->readdir(file_dentry(file), file->f_cred,
+					  cookie, pages, bufsize, desc->plus);
 	if (error < 0) {
 		/* We requested READDIRPLUS, but the server doesn't grok it */
 		if (error == -ENOTSUPP && desc->plus) {
@@ -764,7 +762,6 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
 {
 	struct page **pages;
 	struct nfs_entry entry;
-	struct file	*file = desc->file;
 	size_t array_size;
 	size_t dtsize = NFS_SERVER(inode)->dtsize;
 	int status = -ENOMEM;
@@ -791,8 +788,8 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
 
 	do {
 		unsigned int pglen;
-		status = nfs_readdir_xdr_filler(pages, desc, &entry, file, inode);
-
+		status = nfs_readdir_xdr_filler(desc, entry.cookie,
+						pages, dtsize);
 		if (status < 0)
 			break;
 
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index a2c6455ea3fa..dd6b463dda80 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -88,7 +88,6 @@ struct nfs_open_context {
 
 struct nfs_open_dir_context {
 	struct list_head list;
-	const struct cred *cred;
 	unsigned long attr_gencount;
 	__u64 dir_cookie;
 	__u64 dup_cookie;
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH v3 11/17] NFS: nfs_do_filldir() does not return a value
  2020-11-04 16:16                   ` [PATCH v3 10/17] NFS: More readdir cleanups trondmy
@ 2020-11-04 16:16                     ` trondmy
  2020-11-04 16:16                       ` [PATCH v3 12/17] NFS: Reduce readdir stack usage trondmy
  0 siblings, 1 reply; 28+ messages in thread
From: trondmy @ 2020-11-04 16:16 UTC (permalink / raw)
  To: linux-nfs

From: Trond Myklebust <trond.myklebust@hammerspace.com>

Clean up nfs_do_filldir().

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/dir.c | 21 +++++++--------------
 1 file changed, 7 insertions(+), 14 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index bc366bd8e8f3..48856cee10de 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -881,13 +881,11 @@ int readdir_search_pagecache(nfs_readdir_descriptor_t *desc)
 /*
  * Once we've found the start of the dirent within a page: fill 'er up...
  */
-static 
-int nfs_do_filldir(nfs_readdir_descriptor_t *desc)
+static void nfs_do_filldir(struct nfs_readdir_descriptor *desc)
 {
 	struct file	*file = desc->file;
-	int i = 0;
-	int res = 0;
-	struct nfs_cache_array *array = NULL;
+	struct nfs_cache_array *array;
+	unsigned int i = 0;
 
 	array = kmap(desc->page);
 	for (i = desc->cache_entry_index; i < array->size; i++) {
@@ -914,9 +912,8 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc)
 		desc->eof = true;
 
 	kunmap(desc->page);
-	dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n",
-			(unsigned long long)desc->dir_cookie, res);
-	return res;
+	dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %llu\n",
+			(unsigned long long)desc->dir_cookie);
 }
 
 /*
@@ -957,7 +954,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc)
 	if (status < 0)
 		goto out_release;
 
-	status = nfs_do_filldir(desc);
+	nfs_do_filldir(desc);
 
  out_release:
 	nfs_readdir_clear_array(desc->page);
@@ -1032,10 +1029,8 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
 		if (res < 0)
 			break;
 
-		res = nfs_do_filldir(desc);
+		nfs_do_filldir(desc);
 		nfs_readdir_page_unlock_and_put_cached(desc);
-		if (res < 0)
-			break;
 	} while (!desc->eof);
 
 	spin_lock(&file->f_lock);
@@ -1046,8 +1041,6 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
 	spin_unlock(&file->f_lock);
 
 out:
-	if (res > 0)
-		res = 0;
 	dfprintk(FILE, "NFS: readdir(%pD2) returns %d\n", file, res);
 	return res;
 }
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH v3 12/17] NFS: Reduce readdir stack usage
  2020-11-04 16:16                     ` [PATCH v3 11/17] NFS: nfs_do_filldir() does not return a value trondmy
@ 2020-11-04 16:16                       ` trondmy
  2020-11-04 16:16                         ` [PATCH v3 13/17] NFS: Cleanup to remove nfs_readdir_descriptor_t typedef trondmy
  0 siblings, 1 reply; 28+ messages in thread
From: trondmy @ 2020-11-04 16:16 UTC (permalink / raw)
  To: linux-nfs

From: Trond Myklebust <trond.myklebust@hammerspace.com>

The descriptor and the struct nfs_entry are both large structures,
so don't allocate them from the stack.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/dir.c | 58 ++++++++++++++++++++++++++++++----------------------
 1 file changed, 33 insertions(+), 25 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 48856cee10de..c3af54640f6c 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -761,23 +761,24 @@ static
 int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, struct inode *inode)
 {
 	struct page **pages;
-	struct nfs_entry entry;
+	struct nfs_entry *entry;
 	size_t array_size;
 	size_t dtsize = NFS_SERVER(inode)->dtsize;
 	int status = -ENOMEM;
 
-	entry.prev_cookie = 0;
-	entry.cookie = nfs_readdir_page_last_cookie(page);
-	entry.eof = 0;
-	entry.fh = nfs_alloc_fhandle();
-	entry.fattr = nfs_alloc_fattr();
-	entry.server = NFS_SERVER(inode);
-	if (entry.fh == NULL || entry.fattr == NULL)
+	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry)
+		return -ENOMEM;
+	entry->cookie = nfs_readdir_page_last_cookie(page);
+	entry->fh = nfs_alloc_fhandle();
+	entry->fattr = nfs_alloc_fattr();
+	entry->server = NFS_SERVER(inode);
+	if (entry->fh == NULL || entry->fattr == NULL)
 		goto out;
 
-	entry.label = nfs4_label_alloc(NFS_SERVER(inode), GFP_NOWAIT);
-	if (IS_ERR(entry.label)) {
-		status = PTR_ERR(entry.label);
+	entry->label = nfs4_label_alloc(NFS_SERVER(inode), GFP_NOWAIT);
+	if (IS_ERR(entry->label)) {
+		status = PTR_ERR(entry->label);
 		goto out;
 	}
 
@@ -788,7 +789,7 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
 
 	do {
 		unsigned int pglen;
-		status = nfs_readdir_xdr_filler(desc, entry.cookie,
+		status = nfs_readdir_xdr_filler(desc, entry->cookie,
 						pages, dtsize);
 		if (status < 0)
 			break;
@@ -799,15 +800,16 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
 			break;
 		}
 
-		status = nfs_readdir_page_filler(desc, &entry, pages, page, pglen);
+		status = nfs_readdir_page_filler(desc, entry, pages, page, pglen);
 	} while (!status && nfs_readdir_page_needs_filling(page));
 
 	nfs_readdir_free_pages(pages, array_size);
 out_release_label:
-	nfs4_label_free(entry.label);
+	nfs4_label_free(entry->label);
 out:
-	nfs_free_fattr(entry.fattr);
-	nfs_free_fhandle(entry.fh);
+	nfs_free_fattr(entry->fattr);
+	nfs_free_fhandle(entry->fh);
+	kfree(entry);
 	return status;
 }
 
@@ -974,13 +976,8 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
 	struct dentry	*dentry = file_dentry(file);
 	struct inode	*inode = d_inode(dentry);
 	struct nfs_open_dir_context *dir_ctx = file->private_data;
-	nfs_readdir_descriptor_t my_desc = {
-		.file = file,
-		.ctx = ctx,
-		.plus = nfs_use_readdirplus(inode, ctx),
-	},
-			*desc = &my_desc;
-	int res = 0;
+	struct nfs_readdir_descriptor *desc;
+	int res;
 
 	dfprintk(FILE, "NFS: readdir(%pD2) starting at cookie %llu\n",
 			file, (long long)ctx->pos);
@@ -992,10 +989,19 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
 	 * to either find the entry with the appropriate number or
 	 * revalidate the cookie.
 	 */
-	if (ctx->pos == 0 || nfs_attribute_cache_expired(inode))
+	if (ctx->pos == 0 || nfs_attribute_cache_expired(inode)) {
 		res = nfs_revalidate_mapping(inode, file->f_mapping);
-	if (res < 0)
+		if (res < 0)
+			goto out;
+	}
+
+	res = -ENOMEM;
+	desc = kzalloc(sizeof(*desc), GFP_KERNEL);
+	if (!desc)
 		goto out;
+	desc->file = file;
+	desc->ctx = ctx;
+	desc->plus = nfs_use_readdirplus(inode, ctx);
 
 	spin_lock(&file->f_lock);
 	desc->dir_cookie = dir_ctx->dir_cookie;
@@ -1040,6 +1046,8 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
 	dir_ctx->attr_gencount = desc->attr_gencount;
 	spin_unlock(&file->f_lock);
 
+	kfree(desc);
+
 out:
 	dfprintk(FILE, "NFS: readdir(%pD2) returns %d\n", file, res);
 	return res;
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH v3 13/17] NFS: Cleanup to remove nfs_readdir_descriptor_t typedef
  2020-11-04 16:16                       ` [PATCH v3 12/17] NFS: Reduce readdir stack usage trondmy
@ 2020-11-04 16:16                         ` trondmy
  2020-11-04 16:16                           ` [PATCH v3 14/17] NFS: Allow the NFS generic code to pass in a verifier to readdir trondmy
  0 siblings, 1 reply; 28+ messages in thread
From: trondmy @ 2020-11-04 16:16 UTC (permalink / raw)
  To: linux-nfs

From: Trond Myklebust <trond.myklebust@hammerspace.com>

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/dir.c | 30 +++++++++++++-----------------
 1 file changed, 13 insertions(+), 17 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index c3af54640f6c..b226f6f3ae96 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -144,7 +144,7 @@ struct nfs_cache_array {
 	struct nfs_cache_array_entry array[];
 };
 
-typedef struct nfs_readdir_descriptor {
+struct nfs_readdir_descriptor {
 	struct file	*file;
 	struct page	*page;
 	struct dir_context *ctx;
@@ -163,7 +163,7 @@ typedef struct nfs_readdir_descriptor {
 	signed char duped;
 	bool plus;
 	bool eof;
-} nfs_readdir_descriptor_t;
+};
 
 static void nfs_readdir_array_init(struct nfs_cache_array *array)
 {
@@ -362,8 +362,8 @@ bool nfs_readdir_use_cookie(const struct file *filp)
 	return true;
 }
 
-static
-int nfs_readdir_search_for_pos(struct nfs_cache_array *array, nfs_readdir_descriptor_t *desc)
+static int nfs_readdir_search_for_pos(struct nfs_cache_array *array,
+				      struct nfs_readdir_descriptor *desc)
 {
 	loff_t diff = desc->ctx->pos - desc->current_index;
 	unsigned int index;
@@ -394,8 +394,8 @@ nfs_readdir_inode_mapping_valid(struct nfs_inode *nfsi)
 	return !test_bit(NFS_INO_INVALIDATING, &nfsi->flags);
 }
 
-static
-int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_descriptor_t *desc)
+static int nfs_readdir_search_for_cookie(struct nfs_cache_array *array,
+					 struct nfs_readdir_descriptor *desc)
 {
 	int i;
 	loff_t new_pos;
@@ -443,8 +443,7 @@ int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_des
 	return status;
 }
 
-static
-int nfs_readdir_search_array(nfs_readdir_descriptor_t *desc)
+static int nfs_readdir_search_array(struct nfs_readdir_descriptor *desc)
 {
 	struct nfs_cache_array *array;
 	int status;
@@ -497,7 +496,7 @@ static int nfs_readdir_xdr_filler(struct nfs_readdir_descriptor *desc,
 	return error;
 }
 
-static int xdr_decode(nfs_readdir_descriptor_t *desc,
+static int xdr_decode(struct nfs_readdir_descriptor *desc,
 		      struct nfs_entry *entry, struct xdr_stream *xdr)
 {
 	struct inode *inode = file_inode(desc->file);
@@ -757,8 +756,8 @@ static struct page **nfs_readdir_alloc_pages(size_t npages)
 	return NULL;
 }
 
-static
-int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, struct inode *inode)
+static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc,
+				    struct page *page, struct inode *inode)
 {
 	struct page **pages;
 	struct nfs_entry *entry;
@@ -838,8 +837,7 @@ nfs_readdir_page_get_cached(struct nfs_readdir_descriptor *desc)
  * Returns 0 if desc->dir_cookie was found on page desc->page_index
  * and locks the page to prevent removal from the page cache.
  */
-static
-int find_and_lock_cache_page(nfs_readdir_descriptor_t *desc)
+static int find_and_lock_cache_page(struct nfs_readdir_descriptor *desc)
 {
 	struct inode *inode = file_inode(desc->file);
 	struct nfs_inode *nfsi = NFS_I(inode);
@@ -864,8 +862,7 @@ int find_and_lock_cache_page(nfs_readdir_descriptor_t *desc)
 }
 
 /* Search for desc->dir_cookie from the beginning of the page cache */
-static inline
-int readdir_search_pagecache(nfs_readdir_descriptor_t *desc)
+static int readdir_search_pagecache(struct nfs_readdir_descriptor *desc)
 {
 	int res;
 
@@ -930,8 +927,7 @@ static void nfs_do_filldir(struct nfs_readdir_descriptor *desc)
  *	 we should already have a complete representation of the
  *	 directory in the page cache by the time we get here.
  */
-static inline
-int uncached_readdir(nfs_readdir_descriptor_t *desc)
+static int uncached_readdir(struct nfs_readdir_descriptor *desc)
 {
 	struct page	*page = NULL;
 	int		status;
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH v3 14/17] NFS: Allow the NFS generic code to pass in a verifier to readdir
  2020-11-04 16:16                         ` [PATCH v3 13/17] NFS: Cleanup to remove nfs_readdir_descriptor_t typedef trondmy
@ 2020-11-04 16:16                           ` trondmy
  2020-11-04 16:16                             ` [PATCH v3 15/17] NFS: Handle NFS4ERR_NOT_SAME and NFSERR_BADCOOKIE from readdir calls trondmy
  0 siblings, 1 reply; 28+ messages in thread
From: trondmy @ 2020-11-04 16:16 UTC (permalink / raw)
  To: linux-nfs

From: Trond Myklebust <trond.myklebust@hammerspace.com>

If we're ever going to allow support for servers that use the readdir
verifier, then that use needs to be managed by the middle layers as
those need to be able to reject cookies from other verifiers.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/dir.c            | 23 ++++++++++++++++++-----
 fs/nfs/nfs3proc.c       | 35 +++++++++++++++++------------------
 fs/nfs/nfs4proc.c       | 38 ++++++++++++++++++--------------------
 fs/nfs/proc.c           | 18 +++++++++---------
 include/linux/nfs_xdr.h | 17 +++++++++++++++--
 5 files changed, 77 insertions(+), 54 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index b226f6f3ae96..3ee0668a9719 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -469,8 +469,20 @@ static int nfs_readdir_xdr_filler(struct nfs_readdir_descriptor *desc,
 				  u64 cookie, struct page **pages,
 				  size_t bufsize)
 {
-	struct file *file = desc->file;
-	struct inode *inode = file_inode(file);
+	struct inode *inode = file_inode(desc->file);
+	__be32 verf_res[2];
+	struct nfs_readdir_arg arg = {
+		.dentry = file_dentry(desc->file),
+		.cred = desc->file->f_cred,
+		.verf = NFS_I(inode)->cookieverf,
+		.cookie = cookie,
+		.pages = pages,
+		.page_len = bufsize,
+		.plus = desc->plus,
+	};
+	struct nfs_readdir_res res = {
+		.verf = verf_res,
+	};
 	unsigned long	timestamp, gencount;
 	int		error;
 
@@ -478,20 +490,21 @@ static int nfs_readdir_xdr_filler(struct nfs_readdir_descriptor *desc,
 	timestamp = jiffies;
 	gencount = nfs_inc_attr_generation_counter();
 	desc->dir_verifier = nfs_save_change_attribute(inode);
-	error = NFS_PROTO(inode)->readdir(file_dentry(file), file->f_cred,
-					  cookie, pages, bufsize, desc->plus);
+	error = NFS_PROTO(inode)->readdir(&arg, &res);
 	if (error < 0) {
 		/* We requested READDIRPLUS, but the server doesn't grok it */
 		if (error == -ENOTSUPP && desc->plus) {
 			NFS_SERVER(inode)->caps &= ~NFS_CAP_READDIRPLUS;
 			clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
-			desc->plus = false;
+			desc->plus = arg.plus = false;
 			goto again;
 		}
 		goto error;
 	}
 	desc->timestamp = timestamp;
 	desc->gencount = gencount;
+	memcpy(NFS_I(inode)->cookieverf, res.verf,
+	       sizeof(NFS_I(inode)->cookieverf));
 error:
 	return error;
 }
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 6b66b73a50eb..5c4e23abc345 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -662,37 +662,36 @@ nfs3_proc_rmdir(struct inode *dir, const struct qstr *name)
  * Also note that this implementation handles both plain readdir and
  * readdirplus.
  */
-static int
-nfs3_proc_readdir(struct dentry *dentry, const struct cred *cred,
-		  u64 cookie, struct page **pages, unsigned int count, bool plus)
+static int nfs3_proc_readdir(struct nfs_readdir_arg *nr_arg,
+			     struct nfs_readdir_res *nr_res)
 {
-	struct inode		*dir = d_inode(dentry);
-	__be32			*verf = NFS_I(dir)->cookieverf;
+	struct inode		*dir = d_inode(nr_arg->dentry);
 	struct nfs3_readdirargs	arg = {
 		.fh		= NFS_FH(dir),
-		.cookie		= cookie,
-		.verf		= {verf[0], verf[1]},
-		.plus		= plus,
-		.count		= count,
-		.pages		= pages
+		.cookie		= nr_arg->cookie,
+		.plus		= nr_arg->plus,
+		.count		= nr_arg->page_len,
+		.pages		= nr_arg->pages
 	};
 	struct nfs3_readdirres	res = {
-		.verf		= verf,
-		.plus		= plus
+		.verf		= nr_res->verf,
+		.plus		= nr_arg->plus,
 	};
 	struct rpc_message	msg = {
 		.rpc_proc	= &nfs3_procedures[NFS3PROC_READDIR],
 		.rpc_argp	= &arg,
 		.rpc_resp	= &res,
-		.rpc_cred	= cred,
+		.rpc_cred	= nr_arg->cred,
 	};
 	int status = -ENOMEM;
 
-	if (plus)
+	if (nr_arg->plus)
 		msg.rpc_proc = &nfs3_procedures[NFS3PROC_READDIRPLUS];
+	if (arg.cookie)
+		memcpy(arg.verf, nr_arg->verf, sizeof(arg.verf));
 
-	dprintk("NFS call  readdir%s %d\n",
-			plus? "plus" : "", (unsigned int) cookie);
+	dprintk("NFS call  readdir%s %llu\n", nr_arg->plus ? "plus" : "",
+		(unsigned long long)nr_arg->cookie);
 
 	res.dir_attr = nfs_alloc_fattr();
 	if (res.dir_attr == NULL)
@@ -705,8 +704,8 @@ nfs3_proc_readdir(struct dentry *dentry, const struct cred *cred,
 
 	nfs_free_fattr(res.dir_attr);
 out:
-	dprintk("NFS reply readdir%s: %d\n",
-			plus? "plus" : "", status);
+	dprintk("NFS reply readdir%s: %d\n", nr_arg->plus ? "plus" : "",
+		status);
 	return status;
 }
 
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index a5b9356bee6a..8e82f988a11f 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -4961,35 +4961,34 @@ static int nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry,
 	return err;
 }
 
-static int _nfs4_proc_readdir(struct dentry *dentry, const struct cred *cred,
-		u64 cookie, struct page **pages, unsigned int count, bool plus)
+static int _nfs4_proc_readdir(struct nfs_readdir_arg *nr_arg,
+			      struct nfs_readdir_res *nr_res)
 {
-	struct inode		*dir = d_inode(dentry);
+	struct inode		*dir = d_inode(nr_arg->dentry);
 	struct nfs4_readdir_arg args = {
 		.fh = NFS_FH(dir),
-		.pages = pages,
+		.pages = nr_arg->pages,
 		.pgbase = 0,
-		.count = count,
-		.bitmask = NFS_SERVER(d_inode(dentry))->attr_bitmask,
-		.plus = plus,
+		.count = nr_arg->page_len,
+		.bitmask = NFS_SERVER(d_inode(nr_arg->dentry))->attr_bitmask,
+		.plus = nr_arg->plus,
 	};
 	struct nfs4_readdir_res res;
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READDIR],
 		.rpc_argp = &args,
 		.rpc_resp = &res,
-		.rpc_cred = cred,
+		.rpc_cred = nr_arg->cred,
 	};
 	int			status;
 
-	dprintk("%s: dentry = %pd2, cookie = %Lu\n", __func__,
-			dentry,
-			(unsigned long long)cookie);
-	nfs4_setup_readdir(cookie, NFS_I(dir)->cookieverf, dentry, &args);
+	dprintk("%s: dentry = %pd2, cookie = %llu\n", __func__,
+		nr_arg->dentry, (unsigned long long)nr_arg->cookie);
+	nfs4_setup_readdir(nr_arg->cookie, nr_arg->verf, nr_arg->dentry, &args);
 	res.pgbase = args.pgbase;
 	status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &msg, &args.seq_args, &res.seq_res, 0);
 	if (status >= 0) {
-		memcpy(NFS_I(dir)->cookieverf, res.verifier.data, NFS4_VERIFIER_SIZE);
+		memcpy(nr_res->verf, res.verifier.data, NFS4_VERIFIER_SIZE);
 		status += args.pgbase;
 	}
 
@@ -4999,19 +4998,18 @@ static int _nfs4_proc_readdir(struct dentry *dentry, const struct cred *cred,
 	return status;
 }
 
-static int nfs4_proc_readdir(struct dentry *dentry, const struct cred *cred,
-		u64 cookie, struct page **pages, unsigned int count, bool plus)
+static int nfs4_proc_readdir(struct nfs_readdir_arg *arg,
+			     struct nfs_readdir_res *res)
 {
 	struct nfs4_exception exception = {
 		.interruptible = true,
 	};
 	int err;
 	do {
-		err = _nfs4_proc_readdir(dentry, cred, cookie,
-				pages, count, plus);
-		trace_nfs4_readdir(d_inode(dentry), err);
-		err = nfs4_handle_exception(NFS_SERVER(d_inode(dentry)), err,
-				&exception);
+		err = _nfs4_proc_readdir(arg, res);
+		trace_nfs4_readdir(d_inode(arg->dentry), err);
+		err = nfs4_handle_exception(NFS_SERVER(d_inode(arg->dentry)),
+					    err, &exception);
 	} while (exception.retry);
 	return err;
 }
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 15c865cc837f..73ab7c59d3a7 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -499,26 +499,26 @@ nfs_proc_rmdir(struct inode *dir, const struct qstr *name)
  * sure it is syntactically correct; the entries itself are decoded
  * from nfs_readdir by calling the decode_entry function directly.
  */
-static int
-nfs_proc_readdir(struct dentry *dentry, const struct cred *cred,
-		 u64 cookie, struct page **pages, unsigned int count, bool plus)
+static int nfs_proc_readdir(struct nfs_readdir_arg *nr_arg,
+			    struct nfs_readdir_res *nr_res)
 {
-	struct inode		*dir = d_inode(dentry);
+	struct inode		*dir = d_inode(nr_arg->dentry);
 	struct nfs_readdirargs	arg = {
 		.fh		= NFS_FH(dir),
-		.cookie		= cookie,
-		.count		= count,
-		.pages		= pages,
+		.cookie		= nr_arg->cookie,
+		.count		= nr_arg->page_len,
+		.pages		= nr_arg->pages,
 	};
 	struct rpc_message	msg = {
 		.rpc_proc	= &nfs_procedures[NFSPROC_READDIR],
 		.rpc_argp	= &arg,
-		.rpc_cred	= cred,
+		.rpc_cred	= nr_arg->cred,
 	};
 	int			status;
 
-	dprintk("NFS call  readdir %d\n", (unsigned int)cookie);
+	dprintk("NFS call  readdir %llu\n", (unsigned long long)nr_arg->cookie);
 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+	nr_res->verf[0] = nr_res->verf[1] = 0;
 
 	nfs_invalidate_atime(dir);
 
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index d63cb862d58e..3327239fa2f9 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -750,6 +750,20 @@ struct nfs_entry {
 	struct nfs_server *	server;
 };
 
+struct nfs_readdir_arg {
+	struct dentry		*dentry;
+	const struct cred	*cred;
+	__be32			*verf;
+	u64			cookie;
+	struct page		**pages;
+	unsigned int		page_len;
+	bool			plus;
+};
+
+struct nfs_readdir_res {
+	__be32			*verf;
+};
+
 /*
  * The following types are for NFSv2 only.
  */
@@ -1744,8 +1758,7 @@ struct nfs_rpc_ops {
 			    unsigned int, struct iattr *);
 	int	(*mkdir)   (struct inode *, struct dentry *, struct iattr *);
 	int	(*rmdir)   (struct inode *, const struct qstr *);
-	int	(*readdir) (struct dentry *, const struct cred *,
-			    u64, struct page **, unsigned int, bool);
+	int	(*readdir) (struct nfs_readdir_arg *, struct nfs_readdir_res *);
 	int	(*mknod)   (struct inode *, struct dentry *, struct iattr *,
 			    dev_t);
 	int	(*statfs)  (struct nfs_server *, struct nfs_fh *,
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH v3 15/17] NFS: Handle NFS4ERR_NOT_SAME and NFSERR_BADCOOKIE from readdir calls
  2020-11-04 16:16                           ` [PATCH v3 14/17] NFS: Allow the NFS generic code to pass in a verifier to readdir trondmy
@ 2020-11-04 16:16                             ` trondmy
  2020-11-04 16:16                               ` [PATCH v3 16/17] NFS: Improve handling of directory verifiers trondmy
  0 siblings, 1 reply; 28+ messages in thread
From: trondmy @ 2020-11-04 16:16 UTC (permalink / raw)
  To: linux-nfs

From: Trond Myklebust <trond.myklebust@hammerspace.com>

If the server returns NFS4ERR_NOT_SAME or tells us that the cookie is
bad in response to a READDIR call, then we should empty the page cache
so that we can fill it from scratch again.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/dir.c      | 24 ++++++++++++++++--------
 fs/nfs/nfs4proc.c |  2 ++
 2 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 3ee0668a9719..3b44bef3a1b4 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -861,15 +861,21 @@ static int find_and_lock_cache_page(struct nfs_readdir_descriptor *desc)
 		return -ENOMEM;
 	if (nfs_readdir_page_needs_filling(desc->page)) {
 		res = nfs_readdir_xdr_to_array(desc, desc->page, inode);
-		if (res < 0)
-			goto error;
+		if (res < 0) {
+			nfs_readdir_page_unlock_and_put_cached(desc);
+			if (res == -EBADCOOKIE || res == -ENOTSYNC) {
+				invalidate_inode_pages2(desc->file->f_mapping);
+				desc->page_index = 0;
+				return -EAGAIN;
+			}
+			return res;
+		}
 	}
 	res = nfs_readdir_search_array(desc);
 	if (res == 0) {
 		nfsi->page_index = desc->page_index;
 		return 0;
 	}
-error:
 	nfs_readdir_page_unlock_and_put_cached(desc);
 	return res;
 }
@@ -879,12 +885,12 @@ static int readdir_search_pagecache(struct nfs_readdir_descriptor *desc)
 {
 	int res;
 
-	if (desc->page_index == 0) {
-		desc->current_index = 0;
-		desc->prev_index = 0;
-		desc->last_cookie = 0;
-	}
 	do {
+		if (desc->page_index == 0) {
+			desc->current_index = 0;
+			desc->prev_index = 0;
+			desc->last_cookie = 0;
+		}
 		res = find_and_lock_cache_page(desc);
 	} while (res == -EAGAIN);
 	return res;
@@ -1030,6 +1036,8 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
 				res = uncached_readdir(desc);
 				if (res == 0)
 					continue;
+				if (res == -EBADCOOKIE || res == -ENOTSYNC)
+					res = 0;
 			}
 			break;
 		}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 8e82f988a11f..3f1fdb06ba56 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -184,6 +184,8 @@ static int nfs4_map_errors(int err)
 		return -EPROTONOSUPPORT;
 	case -NFS4ERR_FILE_OPEN:
 		return -EBUSY;
+	case -NFS4ERR_NOT_SAME:
+		return -ENOTSYNC;
 	default:
 		dprintk("%s could not handle NFSv4 error %d\n",
 				__func__, -err);
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH v3 16/17] NFS: Improve handling of directory verifiers
  2020-11-04 16:16                             ` [PATCH v3 15/17] NFS: Handle NFS4ERR_NOT_SAME and NFSERR_BADCOOKIE from readdir calls trondmy
@ 2020-11-04 16:16                               ` trondmy
  2020-11-04 16:16                                 ` [PATCH v3 17/17] NFS: Optimisations for monotonically increasing readdir cookies trondmy
  2020-11-04 21:01                                 ` [PATCH v3 16/17] NFS: Improve handling of directory verifiers David Wysochanski
  0 siblings, 2 replies; 28+ messages in thread
From: trondmy @ 2020-11-04 16:16 UTC (permalink / raw)
  To: linux-nfs

From: Trond Myklebust <trond.myklebust@hammerspace.com>

If the server insists on using the readdir verifiers in order to allow
cookies to expire, then we should ensure that we cache the verifier
with the cookie, so that we can return an error if the application
tries to use the expired cookie.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/dir.c           | 35 +++++++++++++++++++++++------------
 fs/nfs/inode.c         |  7 -------
 include/linux/nfs_fs.h |  8 +++++++-
 3 files changed, 30 insertions(+), 20 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 3b44bef3a1b4..454377228167 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -155,6 +155,7 @@ struct nfs_readdir_descriptor {
 	loff_t		current_index;
 	loff_t		prev_index;
 
+	__be32		verf[NFS_DIR_VERIFIER_SIZE];
 	unsigned long	dir_verifier;
 	unsigned long	timestamp;
 	unsigned long	gencount;
@@ -466,15 +467,15 @@ static int nfs_readdir_search_array(struct nfs_readdir_descriptor *desc)
 
 /* Fill a page with xdr information before transferring to the cache page */
 static int nfs_readdir_xdr_filler(struct nfs_readdir_descriptor *desc,
-				  u64 cookie, struct page **pages,
-				  size_t bufsize)
+				  __be32 *verf, u64 cookie,
+				  struct page **pages, size_t bufsize,
+				  __be32 *verf_res)
 {
 	struct inode *inode = file_inode(desc->file);
-	__be32 verf_res[2];
 	struct nfs_readdir_arg arg = {
 		.dentry = file_dentry(desc->file),
 		.cred = desc->file->f_cred,
-		.verf = NFS_I(inode)->cookieverf,
+		.verf = verf,
 		.cookie = cookie,
 		.pages = pages,
 		.page_len = bufsize,
@@ -503,8 +504,6 @@ static int nfs_readdir_xdr_filler(struct nfs_readdir_descriptor *desc,
 	}
 	desc->timestamp = timestamp;
 	desc->gencount = gencount;
-	memcpy(NFS_I(inode)->cookieverf, res.verf,
-	       sizeof(NFS_I(inode)->cookieverf));
 error:
 	return error;
 }
@@ -770,11 +769,13 @@ static struct page **nfs_readdir_alloc_pages(size_t npages)
 }
 
 static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc,
-				    struct page *page, struct inode *inode)
+				    struct page *page, __be32 *verf_arg,
+				    __be32 *verf_res)
 {
 	struct page **pages;
 	struct nfs_entry *entry;
 	size_t array_size;
+	struct inode *inode = file_inode(desc->file);
 	size_t dtsize = NFS_SERVER(inode)->dtsize;
 	int status = -ENOMEM;
 
@@ -801,8 +802,9 @@ static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc,
 
 	do {
 		unsigned int pglen;
-		status = nfs_readdir_xdr_filler(desc, entry->cookie,
-						pages, dtsize);
+		status = nfs_readdir_xdr_filler(desc, verf_arg, entry->cookie,
+						pages, dtsize,
+						verf_res);
 		if (status < 0)
 			break;
 
@@ -854,13 +856,15 @@ static int find_and_lock_cache_page(struct nfs_readdir_descriptor *desc)
 {
 	struct inode *inode = file_inode(desc->file);
 	struct nfs_inode *nfsi = NFS_I(inode);
+	__be32 verf[NFS_DIR_VERIFIER_SIZE];
 	int res;
 
 	desc->page = nfs_readdir_page_get_cached(desc);
 	if (!desc->page)
 		return -ENOMEM;
 	if (nfs_readdir_page_needs_filling(desc->page)) {
-		res = nfs_readdir_xdr_to_array(desc, desc->page, inode);
+		res = nfs_readdir_xdr_to_array(desc, desc->page,
+					       nfsi->cookieverf, verf);
 		if (res < 0) {
 			nfs_readdir_page_unlock_and_put_cached(desc);
 			if (res == -EBADCOOKIE || res == -ENOTSYNC) {
@@ -870,6 +874,7 @@ static int find_and_lock_cache_page(struct nfs_readdir_descriptor *desc)
 			}
 			return res;
 		}
+		memcpy(nfsi->cookieverf, verf, sizeof(nfsi->cookieverf));
 	}
 	res = nfs_readdir_search_array(desc);
 	if (res == 0) {
@@ -902,6 +907,7 @@ static int readdir_search_pagecache(struct nfs_readdir_descriptor *desc)
 static void nfs_do_filldir(struct nfs_readdir_descriptor *desc)
 {
 	struct file	*file = desc->file;
+	struct nfs_inode *nfsi = NFS_I(file_inode(file));
 	struct nfs_cache_array *array;
 	unsigned int i = 0;
 
@@ -915,6 +921,7 @@ static void nfs_do_filldir(struct nfs_readdir_descriptor *desc)
 			desc->eof = true;
 			break;
 		}
+		memcpy(desc->verf, nfsi->cookieverf, sizeof(desc->verf));
 		if (i < (array->size-1))
 			desc->dir_cookie = array->array[i+1].cookie;
 		else
@@ -949,8 +956,8 @@ static void nfs_do_filldir(struct nfs_readdir_descriptor *desc)
 static int uncached_readdir(struct nfs_readdir_descriptor *desc)
 {
 	struct page	*page = NULL;
+	__be32		verf[NFS_DIR_VERIFIER_SIZE];
 	int		status;
-	struct inode *inode = file_inode(desc->file);
 
 	dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n",
 			(unsigned long long)desc->dir_cookie);
@@ -967,7 +974,7 @@ static int uncached_readdir(struct nfs_readdir_descriptor *desc)
 	desc->duped = 0;
 
 	nfs_readdir_page_init_array(page, desc->dir_cookie);
-	status = nfs_readdir_xdr_to_array(desc, page, inode);
+	status = nfs_readdir_xdr_to_array(desc, page, desc->verf, verf);
 	if (status < 0)
 		goto out_release;
 
@@ -1023,6 +1030,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
 	desc->dup_cookie = dir_ctx->dup_cookie;
 	desc->duped = dir_ctx->duped;
 	desc->attr_gencount = dir_ctx->attr_gencount;
+	memcpy(desc->verf, dir_ctx->verf, sizeof(desc->verf));
 	spin_unlock(&file->f_lock);
 
 	do {
@@ -1061,6 +1069,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
 	dir_ctx->dup_cookie = desc->dup_cookie;
 	dir_ctx->duped = desc->duped;
 	dir_ctx->attr_gencount = desc->attr_gencount;
+	memcpy(dir_ctx->verf, desc->verf, sizeof(dir_ctx->verf));
 	spin_unlock(&file->f_lock);
 
 	kfree(desc);
@@ -1101,6 +1110,8 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence)
 			dir_ctx->dir_cookie = offset;
 		else
 			dir_ctx->dir_cookie = 0;
+		if (offset == 0)
+			memset(dir_ctx->verf, 0, sizeof(dir_ctx->verf));
 		dir_ctx->duped = 0;
 	}
 	spin_unlock(&filp->f_lock);
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index aa6493905bbe..9b765a900b28 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -229,7 +229,6 @@ static void nfs_zap_caches_locked(struct inode *inode)
 	nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
 	nfsi->attrtimeo_timestamp = jiffies;
 
-	memset(NFS_I(inode)->cookieverf, 0, sizeof(NFS_I(inode)->cookieverf));
 	if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) {
 		nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR
 					| NFS_INO_INVALID_DATA
@@ -1237,7 +1236,6 @@ EXPORT_SYMBOL_GPL(nfs_revalidate_inode);
 
 static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping)
 {
-	struct nfs_inode *nfsi = NFS_I(inode);
 	int ret;
 
 	if (mapping->nrpages != 0) {
@@ -1250,11 +1248,6 @@ static int nfs_invalidate_mapping(struct inode *inode, struct address_space *map
 		if (ret < 0)
 			return ret;
 	}
-	if (S_ISDIR(inode->i_mode)) {
-		spin_lock(&inode->i_lock);
-		memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
-		spin_unlock(&inode->i_lock);
-	}
 	nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE);
 	nfs_fscache_wait_on_invalidate(inode);
 
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index dd6b463dda80..681ed98e4ba8 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -45,6 +45,11 @@
  */
 #define NFS_RPC_SWAPFLAGS		(RPC_TASK_SWAPPER|RPC_TASK_ROOTCREDS)
 
+/*
+ * Size of the NFS directory verifier
+ */
+#define NFS_DIR_VERIFIER_SIZE		2
+
 /*
  * NFSv3/v4 Access mode cache entry
  */
@@ -89,6 +94,7 @@ struct nfs_open_context {
 struct nfs_open_dir_context {
 	struct list_head list;
 	unsigned long attr_gencount;
+	__be32	verf[NFS_DIR_VERIFIER_SIZE];
 	__u64 dir_cookie;
 	__u64 dup_cookie;
 	signed char duped;
@@ -156,7 +162,7 @@ struct nfs_inode {
 	 * This is the cookie verifier used for NFSv3 readdir
 	 * operations
 	 */
-	__be32			cookieverf[2];
+	__be32			cookieverf[NFS_DIR_VERIFIER_SIZE];
 
 	atomic_long_t		nrequests;
 	struct nfs_mds_commit_info commit_info;
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH v3 17/17] NFS: Optimisations for monotonically increasing readdir cookies
  2020-11-04 16:16                               ` [PATCH v3 16/17] NFS: Improve handling of directory verifiers trondmy
@ 2020-11-04 16:16                                 ` trondmy
  2020-11-04 21:01                                 ` [PATCH v3 16/17] NFS: Improve handling of directory verifiers David Wysochanski
  1 sibling, 0 replies; 28+ messages in thread
From: trondmy @ 2020-11-04 16:16 UTC (permalink / raw)
  To: linux-nfs

From: Trond Myklebust <trond.myklebust@hammerspace.com>

If the server is handing out monotonically increasing readdir cookie values,
then we can optimise away searches through pages that contain cookies that
lie outside our search range.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/dir.c | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 454377228167..b6c3501e8f61 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -140,7 +140,8 @@ struct nfs_cache_array {
 	u64 last_cookie;
 	unsigned int size;
 	unsigned char page_full : 1,
-		      page_is_eof : 1;
+		      page_is_eof : 1,
+		      cookies_are_ordered : 1;
 	struct nfs_cache_array_entry array[];
 };
 
@@ -178,6 +179,7 @@ static void nfs_readdir_page_init_array(struct page *page, u64 last_cookie)
 	array = kmap_atomic(page);
 	nfs_readdir_array_init(array);
 	array->last_cookie = last_cookie;
+	array->cookies_are_ordered = 1;
 	kunmap_atomic(array);
 }
 
@@ -269,6 +271,8 @@ int nfs_readdir_add_to_array(struct nfs_entry *entry, struct page *page)
 	cache_entry->name_len = entry->len;
 	cache_entry->name = name;
 	array->last_cookie = entry->cookie;
+	if (array->last_cookie <= cache_entry->cookie)
+		array->cookies_are_ordered = 0;
 	array->size++;
 	if (entry->eof != 0)
 		nfs_readdir_array_set_eof(array);
@@ -395,6 +399,19 @@ nfs_readdir_inode_mapping_valid(struct nfs_inode *nfsi)
 	return !test_bit(NFS_INO_INVALIDATING, &nfsi->flags);
 }
 
+static bool nfs_readdir_array_cookie_in_range(struct nfs_cache_array *array,
+					      u64 cookie)
+{
+	if (!array->cookies_are_ordered)
+		return true;
+	/* Optimisation for monotonically increasing cookies */
+	if (cookie >= array->last_cookie)
+		return false;
+	if (array->size && cookie < array->array[0].cookie)
+		return false;
+	return true;
+}
+
 static int nfs_readdir_search_for_cookie(struct nfs_cache_array *array,
 					 struct nfs_readdir_descriptor *desc)
 {
@@ -402,6 +419,9 @@ static int nfs_readdir_search_for_cookie(struct nfs_cache_array *array,
 	loff_t new_pos;
 	int status = -EAGAIN;
 
+	if (!nfs_readdir_array_cookie_in_range(array, desc->dir_cookie))
+		goto check_eof;
+
 	for (i = 0; i < array->size; i++) {
 		if (array->array[i].cookie == desc->dir_cookie) {
 			struct nfs_inode *nfsi = NFS_I(file_inode(desc->file));
@@ -435,6 +455,7 @@ static int nfs_readdir_search_for_cookie(struct nfs_cache_array *array,
 			return 0;
 		}
 	}
+check_eof:
 	if (array->page_is_eof) {
 		status = -EBADCOOKIE;
 		if (desc->dir_cookie == array->last_cookie)
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [PATCH v3 16/17] NFS: Improve handling of directory verifiers
  2020-11-04 16:16                               ` [PATCH v3 16/17] NFS: Improve handling of directory verifiers trondmy
  2020-11-04 16:16                                 ` [PATCH v3 17/17] NFS: Optimisations for monotonically increasing readdir cookies trondmy
@ 2020-11-04 21:01                                 ` David Wysochanski
  2020-11-04 21:31                                   ` Trond Myklebust
  1 sibling, 1 reply; 28+ messages in thread
From: David Wysochanski @ 2020-11-04 21:01 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: linux-nfs

On Wed, Nov 4, 2020 at 11:28 AM <trondmy@gmail.com> wrote:
>
> From: Trond Myklebust <trond.myklebust@hammerspace.com>
>
> If the server insists on using the readdir verifiers in order to allow
> cookies to expire, then we should ensure that we cache the verifier
> with the cookie, so that we can return an error if the application
> tries to use the expired cookie.
>
> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
> ---
>  fs/nfs/dir.c           | 35 +++++++++++++++++++++++------------
>  fs/nfs/inode.c         |  7 -------
>  include/linux/nfs_fs.h |  8 +++++++-
>  3 files changed, 30 insertions(+), 20 deletions(-)
>
> diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
> index 3b44bef3a1b4..454377228167 100644
> --- a/fs/nfs/dir.c
> +++ b/fs/nfs/dir.c
> @@ -155,6 +155,7 @@ struct nfs_readdir_descriptor {
>         loff_t          current_index;
>         loff_t          prev_index;
>
> +       __be32          verf[NFS_DIR_VERIFIER_SIZE];
>         unsigned long   dir_verifier;
>         unsigned long   timestamp;
>         unsigned long   gencount;
> @@ -466,15 +467,15 @@ static int nfs_readdir_search_array(struct nfs_readdir_descriptor *desc)
>
>  /* Fill a page with xdr information before transferring to the cache page */
>  static int nfs_readdir_xdr_filler(struct nfs_readdir_descriptor *desc,
> -                                 u64 cookie, struct page **pages,
> -                                 size_t bufsize)
> +                                 __be32 *verf, u64 cookie,
> +                                 struct page **pages, size_t bufsize,
> +                                 __be32 *verf_res)
>  {
>         struct inode *inode = file_inode(desc->file);
> -       __be32 verf_res[2];
>         struct nfs_readdir_arg arg = {
>                 .dentry = file_dentry(desc->file),
>                 .cred = desc->file->f_cred,
> -               .verf = NFS_I(inode)->cookieverf,
> +               .verf = verf,
>                 .cookie = cookie,
>                 .pages = pages,
>                 .page_len = bufsize,
> @@ -503,8 +504,6 @@ static int nfs_readdir_xdr_filler(struct nfs_readdir_descriptor *desc,
>         }
>         desc->timestamp = timestamp;
>         desc->gencount = gencount;
> -       memcpy(NFS_I(inode)->cookieverf, res.verf,
> -              sizeof(NFS_I(inode)->cookieverf));
>  error:
>         return error;
>  }
> @@ -770,11 +769,13 @@ static struct page **nfs_readdir_alloc_pages(size_t npages)
>  }
>
>  static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc,
> -                                   struct page *page, struct inode *inode)
> +                                   struct page *page, __be32 *verf_arg,
> +                                   __be32 *verf_res)
>  {
>         struct page **pages;
>         struct nfs_entry *entry;
>         size_t array_size;
> +       struct inode *inode = file_inode(desc->file);
>         size_t dtsize = NFS_SERVER(inode)->dtsize;
>         int status = -ENOMEM;
>
> @@ -801,8 +802,9 @@ static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc,
>
>         do {
>                 unsigned int pglen;
> -               status = nfs_readdir_xdr_filler(desc, entry->cookie,
> -                                               pages, dtsize);
> +               status = nfs_readdir_xdr_filler(desc, verf_arg, entry->cookie,
> +                                               pages, dtsize,
> +                                               verf_res);
>                 if (status < 0)
>                         break;
>
> @@ -854,13 +856,15 @@ static int find_and_lock_cache_page(struct nfs_readdir_descriptor *desc)
>  {
>         struct inode *inode = file_inode(desc->file);
>         struct nfs_inode *nfsi = NFS_I(inode);
> +       __be32 verf[NFS_DIR_VERIFIER_SIZE];
>         int res;
>
>         desc->page = nfs_readdir_page_get_cached(desc);
>         if (!desc->page)
>                 return -ENOMEM;
>         if (nfs_readdir_page_needs_filling(desc->page)) {
> -               res = nfs_readdir_xdr_to_array(desc, desc->page, inode);
> +               res = nfs_readdir_xdr_to_array(desc, desc->page,
> +                                              nfsi->cookieverf, verf);
>                 if (res < 0) {
>                         nfs_readdir_page_unlock_and_put_cached(desc);
>                         if (res == -EBADCOOKIE || res == -ENOTSYNC) {
> @@ -870,6 +874,7 @@ static int find_and_lock_cache_page(struct nfs_readdir_descriptor *desc)
>                         }
>                         return res;
>                 }
> +               memcpy(nfsi->cookieverf, verf, sizeof(nfsi->cookieverf));
>         }
>         res = nfs_readdir_search_array(desc);
>         if (res == 0) {
> @@ -902,6 +907,7 @@ static int readdir_search_pagecache(struct nfs_readdir_descriptor *desc)
>  static void nfs_do_filldir(struct nfs_readdir_descriptor *desc)
>  {
>         struct file     *file = desc->file;
> +       struct nfs_inode *nfsi = NFS_I(file_inode(file));
>         struct nfs_cache_array *array;
>         unsigned int i = 0;
>
> @@ -915,6 +921,7 @@ static void nfs_do_filldir(struct nfs_readdir_descriptor *desc)
>                         desc->eof = true;
>                         break;
>                 }
> +               memcpy(desc->verf, nfsi->cookieverf, sizeof(desc->verf));
>                 if (i < (array->size-1))
>                         desc->dir_cookie = array->array[i+1].cookie;
>                 else
> @@ -949,8 +956,8 @@ static void nfs_do_filldir(struct nfs_readdir_descriptor *desc)
>  static int uncached_readdir(struct nfs_readdir_descriptor *desc)
>  {
>         struct page     *page = NULL;
> +       __be32          verf[NFS_DIR_VERIFIER_SIZE];
>         int             status;
> -       struct inode *inode = file_inode(desc->file);
>
>         dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n",
>                         (unsigned long long)desc->dir_cookie);
> @@ -967,7 +974,7 @@ static int uncached_readdir(struct nfs_readdir_descriptor *desc)
>         desc->duped = 0;
>
>         nfs_readdir_page_init_array(page, desc->dir_cookie);
> -       status = nfs_readdir_xdr_to_array(desc, page, inode);
> +       status = nfs_readdir_xdr_to_array(desc, page, desc->verf, verf);
>         if (status < 0)
>                 goto out_release;
>
> @@ -1023,6 +1030,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
>         desc->dup_cookie = dir_ctx->dup_cookie;
>         desc->duped = dir_ctx->duped;
>         desc->attr_gencount = dir_ctx->attr_gencount;
> +       memcpy(desc->verf, dir_ctx->verf, sizeof(desc->verf));
>         spin_unlock(&file->f_lock);
>
>         do {
> @@ -1061,6 +1069,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
>         dir_ctx->dup_cookie = desc->dup_cookie;
>         dir_ctx->duped = desc->duped;
>         dir_ctx->attr_gencount = desc->attr_gencount;
> +       memcpy(dir_ctx->verf, desc->verf, sizeof(dir_ctx->verf));
>         spin_unlock(&file->f_lock);
>
>         kfree(desc);
> @@ -1101,6 +1110,8 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence)
>                         dir_ctx->dir_cookie = offset;
>                 else
>                         dir_ctx->dir_cookie = 0;
> +               if (offset == 0)
> +                       memset(dir_ctx->verf, 0, sizeof(dir_ctx->verf));
>                 dir_ctx->duped = 0;
>         }
>         spin_unlock(&filp->f_lock);

Thanks for doing these patches!

For some reason this patch does not apply but I get a problem at this hunk.
Is there a fixup or hunk or two missing from 01/17 ?
I'm starting at 3cea11cd5e3b (Linux 5.10-rc2).

Problem looks like it's at the spin_unlock - here's what the hunk
looks like for me:
fs/nfs/dir.c
1092         inode_lock(inode);
1093         offset += filp->f_pos;
1094         if (offset < 0) {
1095             inode_unlock(inode);
1096             return -EINVAL;
1097         }
1098     }
1099     if (offset != filp->f_pos) {
1100         filp->f_pos = offset;
1101         if (nfs_readdir_use_cookie(filp))
1102             dir_ctx->dir_cookie = offset;
1103         else
1104             dir_ctx->dir_cookie = 0;
1105         dir_ctx->duped = 0;
1106     }
1107     inode_unlock(inode);
1108     return offset;
1109 }



$ git reset --hard 3cea11cd5e3b
HEAD is now at 3cea11cd5e3b Linux 5.10-rc2
$ for f in
~/Downloads/trond-nfs-readdir/v3/*; do echo applying $(basename "$f");
git am "$f"; done
applying [PATCH v3 01_17] NFS_ Ensure contents of struct
nfs_open_dir_context are consistent.eml
Applying: NFS: Ensure contents of struct nfs_open_dir_context are consistent
applying [PATCH v3 02_17] NFS_ Clean up readdir struct nfs_cache_array.eml
Applying: NFS: Clean up readdir struct nfs_cache_array
applying [PATCH v3 03_17] NFS_ Clean up nfs_readdir_page_filler().eml
Applying: NFS: Clean up nfs_readdir_page_filler()
applying [PATCH v3 04_17] NFS_ Clean up directory array handling.eml
Applying: NFS: Clean up directory array handling
applying [PATCH v3 05_17] NFS_ Don't discard readdir results.eml
Applying: NFS: Don't discard readdir results
applying [PATCH v3 06_17] NFS_ Remove unnecessary kmap in
nfs_readdir_xdr_to_array().eml
Applying: NFS: Remove unnecessary kmap in nfs_readdir_xdr_to_array()
applying [PATCH v3 07_17] NFS_ Replace kmap() with kmap_atomic() in
nfs_readdir_search_array().eml
Applying: NFS: Replace kmap() with kmap_atomic() in nfs_readdir_search_array()
applying [PATCH v3 08_17] NFS_ Simplify struct nfs_cache_array_entry.eml
Applying: NFS: Simplify struct nfs_cache_array_entry
applying [PATCH v3 09_17] NFS_ Support larger readdir buffers.eml
Applying: NFS: Support larger readdir buffers
applying [PATCH v3 10_17] NFS_ More readdir cleanups.eml
Applying: NFS: More readdir cleanups
applying [PATCH v3 11_17] NFS_ nfs_do_filldir() does not return a value.eml
Applying: NFS: nfs_do_filldir() does not return a value
applying [PATCH v3 12_17] NFS_ Reduce readdir stack usage.eml
Applying: NFS: Reduce readdir stack usage
applying [PATCH v3 13_17] NFS_ Cleanup to remove
nfs_readdir_descriptor_t typedef.eml
Applying: NFS: Cleanup to remove nfs_readdir_descriptor_t typedef
applying [PATCH v3 14_17] NFS_ Allow the NFS generic code to pass in a
verifier to readdir.eml
Applying: NFS: Allow the NFS generic code to pass in a verifier to readdir
applying [PATCH v3 15_17] NFS_ Handle NFS4ERR_NOT_SAME and
NFSERR_BADCOOKIE from readdir calls.eml
Applying: NFS: Handle NFS4ERR_NOT_SAME and NFSERR_BADCOOKIE from readdir calls
applying [PATCH v3 16_17] NFS_ Improve handling of directory verifiers.eml
Applying: NFS: Improve handling of directory verifiers
error: patch failed: fs/nfs/dir.c:1101
error: fs/nfs/dir.c: patch does not apply
Patch failed at 0001 NFS: Improve handling of directory verifiers
The copy of the patch that failed is found in:
   /home/dwysocha/git/kernel/.git/rebase-apply/patch
When you have resolved this problem, run "git am --resolved".
If you prefer to skip this patch, run "git am --skip" instead.
To restore the original branch and stop patching, run "git am --abort".
applying [PATCH v3 17_17] NFS_ Optimisations for monotonically
increasing readdir cookies.eml
previous rebase directory /home/dwysocha/git/kernel/.git/rebase-apply
still exists but mbox given.


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v3 16/17] NFS: Improve handling of directory verifiers
  2020-11-04 21:01                                 ` [PATCH v3 16/17] NFS: Improve handling of directory verifiers David Wysochanski
@ 2020-11-04 21:31                                   ` Trond Myklebust
  2020-11-04 21:40                                     ` David Wysochanski
  0 siblings, 1 reply; 28+ messages in thread
From: Trond Myklebust @ 2020-11-04 21:31 UTC (permalink / raw)
  To: David Wysochanski; +Cc: linux-nfs



> On Nov 4, 2020, at 16:01, David Wysochanski <dwysocha@redhat.com> wrote:
> 
> On Wed, Nov 4, 2020 at 11:28 AM <trondmy@gmail.com> wrote:
>> 
>> From: Trond Myklebust <trond.myklebust@hammerspace.com>
>> 
>> If the server insists on using the readdir verifiers in order to allow
>> cookies to expire, then we should ensure that we cache the verifier
>> with the cookie, so that we can return an error if the application
>> tries to use the expired cookie.
>> 
>> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
>> ---
>> fs/nfs/dir.c           | 35 +++++++++++++++++++++++------------
>> fs/nfs/inode.c         |  7 -------
>> include/linux/nfs_fs.h |  8 +++++++-
>> 3 files changed, 30 insertions(+), 20 deletions(-)
>> 
>> diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
>> index 3b44bef3a1b4..454377228167 100644
>> --- a/fs/nfs/dir.c
>> +++ b/fs/nfs/dir.c
>> @@ -155,6 +155,7 @@ struct nfs_readdir_descriptor {
>>        loff_t          current_index;
>>        loff_t          prev_index;
>> 
>> +       __be32          verf[NFS_DIR_VERIFIER_SIZE];
>>        unsigned long   dir_verifier;
>>        unsigned long   timestamp;
>>        unsigned long   gencount;
>> @@ -466,15 +467,15 @@ static int nfs_readdir_search_array(struct nfs_readdir_descriptor *desc)
>> 
>> /* Fill a page with xdr information before transferring to the cache page */
>> static int nfs_readdir_xdr_filler(struct nfs_readdir_descriptor *desc,
>> -                                 u64 cookie, struct page **pages,
>> -                                 size_t bufsize)
>> +                                 __be32 *verf, u64 cookie,
>> +                                 struct page **pages, size_t bufsize,
>> +                                 __be32 *verf_res)
>> {
>>        struct inode *inode = file_inode(desc->file);
>> -       __be32 verf_res[2];
>>        struct nfs_readdir_arg arg = {
>>                .dentry = file_dentry(desc->file),
>>                .cred = desc->file->f_cred,
>> -               .verf = NFS_I(inode)->cookieverf,
>> +               .verf = verf,
>>                .cookie = cookie,
>>                .pages = pages,
>>                .page_len = bufsize,
>> @@ -503,8 +504,6 @@ static int nfs_readdir_xdr_filler(struct nfs_readdir_descriptor *desc,
>>        }
>>        desc->timestamp = timestamp;
>>        desc->gencount = gencount;
>> -       memcpy(NFS_I(inode)->cookieverf, res.verf,
>> -              sizeof(NFS_I(inode)->cookieverf));
>> error:
>>        return error;
>> }
>> @@ -770,11 +769,13 @@ static struct page **nfs_readdir_alloc_pages(size_t npages)
>> }
>> 
>> static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc,
>> -                                   struct page *page, struct inode *inode)
>> +                                   struct page *page, __be32 *verf_arg,
>> +                                   __be32 *verf_res)
>> {
>>        struct page **pages;
>>        struct nfs_entry *entry;
>>        size_t array_size;
>> +       struct inode *inode = file_inode(desc->file);
>>        size_t dtsize = NFS_SERVER(inode)->dtsize;
>>        int status = -ENOMEM;
>> 
>> @@ -801,8 +802,9 @@ static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc,
>> 
>>        do {
>>                unsigned int pglen;
>> -               status = nfs_readdir_xdr_filler(desc, entry->cookie,
>> -                                               pages, dtsize);
>> +               status = nfs_readdir_xdr_filler(desc, verf_arg, entry->cookie,
>> +                                               pages, dtsize,
>> +                                               verf_res);
>>                if (status < 0)
>>                        break;
>> 
>> @@ -854,13 +856,15 @@ static int find_and_lock_cache_page(struct nfs_readdir_descriptor *desc)
>> {
>>        struct inode *inode = file_inode(desc->file);
>>        struct nfs_inode *nfsi = NFS_I(inode);
>> +       __be32 verf[NFS_DIR_VERIFIER_SIZE];
>>        int res;
>> 
>>        desc->page = nfs_readdir_page_get_cached(desc);
>>        if (!desc->page)
>>                return -ENOMEM;
>>        if (nfs_readdir_page_needs_filling(desc->page)) {
>> -               res = nfs_readdir_xdr_to_array(desc, desc->page, inode);
>> +               res = nfs_readdir_xdr_to_array(desc, desc->page,
>> +                                              nfsi->cookieverf, verf);
>>                if (res < 0) {
>>                        nfs_readdir_page_unlock_and_put_cached(desc);
>>                        if (res == -EBADCOOKIE || res == -ENOTSYNC) {
>> @@ -870,6 +874,7 @@ static int find_and_lock_cache_page(struct nfs_readdir_descriptor *desc)
>>                        }
>>                        return res;
>>                }
>> +               memcpy(nfsi->cookieverf, verf, sizeof(nfsi->cookieverf));
>>        }
>>        res = nfs_readdir_search_array(desc);
>>        if (res == 0) {
>> @@ -902,6 +907,7 @@ static int readdir_search_pagecache(struct nfs_readdir_descriptor *desc)
>> static void nfs_do_filldir(struct nfs_readdir_descriptor *desc)
>> {
>>        struct file     *file = desc->file;
>> +       struct nfs_inode *nfsi = NFS_I(file_inode(file));
>>        struct nfs_cache_array *array;
>>        unsigned int i = 0;
>> 
>> @@ -915,6 +921,7 @@ static void nfs_do_filldir(struct nfs_readdir_descriptor *desc)
>>                        desc->eof = true;
>>                        break;
>>                }
>> +               memcpy(desc->verf, nfsi->cookieverf, sizeof(desc->verf));
>>                if (i < (array->size-1))
>>                        desc->dir_cookie = array->array[i+1].cookie;
>>                else
>> @@ -949,8 +956,8 @@ static void nfs_do_filldir(struct nfs_readdir_descriptor *desc)
>> static int uncached_readdir(struct nfs_readdir_descriptor *desc)
>> {
>>        struct page     *page = NULL;
>> +       __be32          verf[NFS_DIR_VERIFIER_SIZE];
>>        int             status;
>> -       struct inode *inode = file_inode(desc->file);
>> 
>>        dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n",
>>                        (unsigned long long)desc->dir_cookie);
>> @@ -967,7 +974,7 @@ static int uncached_readdir(struct nfs_readdir_descriptor *desc)
>>        desc->duped = 0;
>> 
>>        nfs_readdir_page_init_array(page, desc->dir_cookie);
>> -       status = nfs_readdir_xdr_to_array(desc, page, inode);
>> +       status = nfs_readdir_xdr_to_array(desc, page, desc->verf, verf);
>>        if (status < 0)
>>                goto out_release;
>> 
>> @@ -1023,6 +1030,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
>>        desc->dup_cookie = dir_ctx->dup_cookie;
>>        desc->duped = dir_ctx->duped;
>>        desc->attr_gencount = dir_ctx->attr_gencount;
>> +       memcpy(desc->verf, dir_ctx->verf, sizeof(desc->verf));
>>        spin_unlock(&file->f_lock);
>> 
>>        do {
>> @@ -1061,6 +1069,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
>>        dir_ctx->dup_cookie = desc->dup_cookie;
>>        dir_ctx->duped = desc->duped;
>>        dir_ctx->attr_gencount = desc->attr_gencount;
>> +       memcpy(dir_ctx->verf, desc->verf, sizeof(dir_ctx->verf));
>>        spin_unlock(&file->f_lock);
>> 
>>        kfree(desc);
>> @@ -1101,6 +1110,8 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence)
>>                        dir_ctx->dir_cookie = offset;
>>                else
>>                        dir_ctx->dir_cookie = 0;
>> +               if (offset == 0)
>> +                       memset(dir_ctx->verf, 0, sizeof(dir_ctx->verf));
>>                dir_ctx->duped = 0;
>>        }
>>        spin_unlock(&filp->f_lock);
> 
> Thanks for doing these patches!
> 
> For some reason this patch does not apply but I get a problem at this hunk.
> Is there a fixup or hunk or two missing from 01/17 ?
> I'm starting at 3cea11cd5e3b (Linux 5.10-rc2).
> 
> Problem looks like it's at the spin_unlock - here's what the hunk
> looks like for me:
> fs/nfs/dir.c
> 1092         inode_lock(inode);
> 1093         offset += filp->f_pos;
> 1094         if (offset < 0) {
> 1095             inode_unlock(inode);
> 1096             return -EINVAL;
> 1097         }
> 1098     }
> 1099     if (offset != filp->f_pos) {
> 1100         filp->f_pos = offset;
> 1101         if (nfs_readdir_use_cookie(filp))
> 1102             dir_ctx->dir_cookie = offset;
> 1103         else
> 1104             dir_ctx->dir_cookie = 0;
> 1105         dir_ctx->duped = 0;
> 1106     }
> 1107     inode_unlock(inode);
> 1108     return offset;
> 1109 }
> 

Yes, it depends on the patch "[PATCH 1/2] NFS: Remove unnecessary inode locking in nfs_llseek_dir()” that I sent out to the list on Oct 30.
I can include that in future updates of this patchset.

_________________________________
Trond Myklebust
Linux NFS client maintainer, Hammerspace
trond.myklebust@hammerspace.com


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v3 16/17] NFS: Improve handling of directory verifiers
  2020-11-04 21:31                                   ` Trond Myklebust
@ 2020-11-04 21:40                                     ` David Wysochanski
  0 siblings, 0 replies; 28+ messages in thread
From: David Wysochanski @ 2020-11-04 21:40 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: linux-nfs

On Wed, Nov 4, 2020 at 4:32 PM Trond Myklebust <trondmy@hammerspace.com> wrote:
>
>
>
> > On Nov 4, 2020, at 16:01, David Wysochanski <dwysocha@redhat.com> wrote:
> >
> > On Wed, Nov 4, 2020 at 11:28 AM <trondmy@gmail.com> wrote:
> >>
> >> From: Trond Myklebust <trond.myklebust@hammerspace.com>
> >>
> >> If the server insists on using the readdir verifiers in order to allow
> >> cookies to expire, then we should ensure that we cache the verifier
> >> with the cookie, so that we can return an error if the application
> >> tries to use the expired cookie.
> >>
> >> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
> >> ---
> >> fs/nfs/dir.c           | 35 +++++++++++++++++++++++------------
> >> fs/nfs/inode.c         |  7 -------
> >> include/linux/nfs_fs.h |  8 +++++++-
> >> 3 files changed, 30 insertions(+), 20 deletions(-)
> >>
> >> diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
> >> index 3b44bef3a1b4..454377228167 100644
> >> --- a/fs/nfs/dir.c
> >> +++ b/fs/nfs/dir.c
> >> @@ -155,6 +155,7 @@ struct nfs_readdir_descriptor {
> >>        loff_t          current_index;
> >>        loff_t          prev_index;
> >>
> >> +       __be32          verf[NFS_DIR_VERIFIER_SIZE];
> >>        unsigned long   dir_verifier;
> >>        unsigned long   timestamp;
> >>        unsigned long   gencount;
> >> @@ -466,15 +467,15 @@ static int nfs_readdir_search_array(struct nfs_readdir_descriptor *desc)
> >>
> >> /* Fill a page with xdr information before transferring to the cache page */
> >> static int nfs_readdir_xdr_filler(struct nfs_readdir_descriptor *desc,
> >> -                                 u64 cookie, struct page **pages,
> >> -                                 size_t bufsize)
> >> +                                 __be32 *verf, u64 cookie,
> >> +                                 struct page **pages, size_t bufsize,
> >> +                                 __be32 *verf_res)
> >> {
> >>        struct inode *inode = file_inode(desc->file);
> >> -       __be32 verf_res[2];
> >>        struct nfs_readdir_arg arg = {
> >>                .dentry = file_dentry(desc->file),
> >>                .cred = desc->file->f_cred,
> >> -               .verf = NFS_I(inode)->cookieverf,
> >> +               .verf = verf,
> >>                .cookie = cookie,
> >>                .pages = pages,
> >>                .page_len = bufsize,
> >> @@ -503,8 +504,6 @@ static int nfs_readdir_xdr_filler(struct nfs_readdir_descriptor *desc,
> >>        }
> >>        desc->timestamp = timestamp;
> >>        desc->gencount = gencount;
> >> -       memcpy(NFS_I(inode)->cookieverf, res.verf,
> >> -              sizeof(NFS_I(inode)->cookieverf));
> >> error:
> >>        return error;
> >> }
> >> @@ -770,11 +769,13 @@ static struct page **nfs_readdir_alloc_pages(size_t npages)
> >> }
> >>
> >> static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc,
> >> -                                   struct page *page, struct inode *inode)
> >> +                                   struct page *page, __be32 *verf_arg,
> >> +                                   __be32 *verf_res)
> >> {
> >>        struct page **pages;
> >>        struct nfs_entry *entry;
> >>        size_t array_size;
> >> +       struct inode *inode = file_inode(desc->file);
> >>        size_t dtsize = NFS_SERVER(inode)->dtsize;
> >>        int status = -ENOMEM;
> >>
> >> @@ -801,8 +802,9 @@ static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc,
> >>
> >>        do {
> >>                unsigned int pglen;
> >> -               status = nfs_readdir_xdr_filler(desc, entry->cookie,
> >> -                                               pages, dtsize);
> >> +               status = nfs_readdir_xdr_filler(desc, verf_arg, entry->cookie,
> >> +                                               pages, dtsize,
> >> +                                               verf_res);
> >>                if (status < 0)
> >>                        break;
> >>
> >> @@ -854,13 +856,15 @@ static int find_and_lock_cache_page(struct nfs_readdir_descriptor *desc)
> >> {
> >>        struct inode *inode = file_inode(desc->file);
> >>        struct nfs_inode *nfsi = NFS_I(inode);
> >> +       __be32 verf[NFS_DIR_VERIFIER_SIZE];
> >>        int res;
> >>
> >>        desc->page = nfs_readdir_page_get_cached(desc);
> >>        if (!desc->page)
> >>                return -ENOMEM;
> >>        if (nfs_readdir_page_needs_filling(desc->page)) {
> >> -               res = nfs_readdir_xdr_to_array(desc, desc->page, inode);
> >> +               res = nfs_readdir_xdr_to_array(desc, desc->page,
> >> +                                              nfsi->cookieverf, verf);
> >>                if (res < 0) {
> >>                        nfs_readdir_page_unlock_and_put_cached(desc);
> >>                        if (res == -EBADCOOKIE || res == -ENOTSYNC) {
> >> @@ -870,6 +874,7 @@ static int find_and_lock_cache_page(struct nfs_readdir_descriptor *desc)
> >>                        }
> >>                        return res;
> >>                }
> >> +               memcpy(nfsi->cookieverf, verf, sizeof(nfsi->cookieverf));
> >>        }
> >>        res = nfs_readdir_search_array(desc);
> >>        if (res == 0) {
> >> @@ -902,6 +907,7 @@ static int readdir_search_pagecache(struct nfs_readdir_descriptor *desc)
> >> static void nfs_do_filldir(struct nfs_readdir_descriptor *desc)
> >> {
> >>        struct file     *file = desc->file;
> >> +       struct nfs_inode *nfsi = NFS_I(file_inode(file));
> >>        struct nfs_cache_array *array;
> >>        unsigned int i = 0;
> >>
> >> @@ -915,6 +921,7 @@ static void nfs_do_filldir(struct nfs_readdir_descriptor *desc)
> >>                        desc->eof = true;
> >>                        break;
> >>                }
> >> +               memcpy(desc->verf, nfsi->cookieverf, sizeof(desc->verf));
> >>                if (i < (array->size-1))
> >>                        desc->dir_cookie = array->array[i+1].cookie;
> >>                else
> >> @@ -949,8 +956,8 @@ static void nfs_do_filldir(struct nfs_readdir_descriptor *desc)
> >> static int uncached_readdir(struct nfs_readdir_descriptor *desc)
> >> {
> >>        struct page     *page = NULL;
> >> +       __be32          verf[NFS_DIR_VERIFIER_SIZE];
> >>        int             status;
> >> -       struct inode *inode = file_inode(desc->file);
> >>
> >>        dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n",
> >>                        (unsigned long long)desc->dir_cookie);
> >> @@ -967,7 +974,7 @@ static int uncached_readdir(struct nfs_readdir_descriptor *desc)
> >>        desc->duped = 0;
> >>
> >>        nfs_readdir_page_init_array(page, desc->dir_cookie);
> >> -       status = nfs_readdir_xdr_to_array(desc, page, inode);
> >> +       status = nfs_readdir_xdr_to_array(desc, page, desc->verf, verf);
> >>        if (status < 0)
> >>                goto out_release;
> >>
> >> @@ -1023,6 +1030,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
> >>        desc->dup_cookie = dir_ctx->dup_cookie;
> >>        desc->duped = dir_ctx->duped;
> >>        desc->attr_gencount = dir_ctx->attr_gencount;
> >> +       memcpy(desc->verf, dir_ctx->verf, sizeof(desc->verf));
> >>        spin_unlock(&file->f_lock);
> >>
> >>        do {
> >> @@ -1061,6 +1069,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
> >>        dir_ctx->dup_cookie = desc->dup_cookie;
> >>        dir_ctx->duped = desc->duped;
> >>        dir_ctx->attr_gencount = desc->attr_gencount;
> >> +       memcpy(dir_ctx->verf, desc->verf, sizeof(dir_ctx->verf));
> >>        spin_unlock(&file->f_lock);
> >>
> >>        kfree(desc);
> >> @@ -1101,6 +1110,8 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence)
> >>                        dir_ctx->dir_cookie = offset;
> >>                else
> >>                        dir_ctx->dir_cookie = 0;
> >> +               if (offset == 0)
> >> +                       memset(dir_ctx->verf, 0, sizeof(dir_ctx->verf));
> >>                dir_ctx->duped = 0;
> >>        }
> >>        spin_unlock(&filp->f_lock);
> >
> > Thanks for doing these patches!
> >
> > For some reason this patch does not apply but I get a problem at this hunk.
> > Is there a fixup or hunk or two missing from 01/17 ?
> > I'm starting at 3cea11cd5e3b (Linux 5.10-rc2).
> >
> > Problem looks like it's at the spin_unlock - here's what the hunk
> > looks like for me:
> > fs/nfs/dir.c
> > 1092         inode_lock(inode);
> > 1093         offset += filp->f_pos;
> > 1094         if (offset < 0) {
> > 1095             inode_unlock(inode);
> > 1096             return -EINVAL;
> > 1097         }
> > 1098     }
> > 1099     if (offset != filp->f_pos) {
> > 1100         filp->f_pos = offset;
> > 1101         if (nfs_readdir_use_cookie(filp))
> > 1102             dir_ctx->dir_cookie = offset;
> > 1103         else
> > 1104             dir_ctx->dir_cookie = 0;
> > 1105         dir_ctx->duped = 0;
> > 1106     }
> > 1107     inode_unlock(inode);
> > 1108     return offset;
> > 1109 }
> >
>
> Yes, it depends on the patch "[PATCH 1/2] NFS: Remove unnecessary inode locking in nfs_llseek_dir()” that I sent out to the list on Oct 30.
> I can include that in future updates of this patchset.
>

FWIW, the statement of the dependency is fine with me - I grabbed the
other two patches and now applies cleanly.
Thanks!


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v3 05/17] NFS: Don't discard readdir results
  2020-11-04 16:16         ` [PATCH v3 05/17] NFS: Don't discard readdir results trondmy
  2020-11-04 16:16           ` [PATCH v3 06/17] NFS: Remove unnecessary kmap in nfs_readdir_xdr_to_array() trondmy
@ 2020-11-06 13:30           ` David Wysochanski
  2020-11-06 15:05             ` Trond Myklebust
  1 sibling, 1 reply; 28+ messages in thread
From: David Wysochanski @ 2020-11-06 13:30 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: linux-nfs

On Wed, Nov 4, 2020 at 11:27 AM <trondmy@gmail.com> wrote:
>
> From: Trond Myklebust <trond.myklebust@hammerspace.com>
>
> If a readdir call returns more data than we can fit into one page
> cache page, then allocate a new one for that data rather than
> discarding the data.
>
> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
> ---
>  fs/nfs/dir.c | 46 ++++++++++++++++++++++++++++++++++++++++++----
>  1 file changed, 42 insertions(+), 4 deletions(-)
>
> diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
> index 842f69120a01..f7248145c333 100644
> --- a/fs/nfs/dir.c
> +++ b/fs/nfs/dir.c
> @@ -320,6 +320,26 @@ static void nfs_readdir_page_set_eof(struct page *page)
>         kunmap_atomic(array);
>  }
>
> +static void nfs_readdir_page_unlock_and_put(struct page *page)
> +{
> +       unlock_page(page);
> +       put_page(page);
> +}
> +
> +static struct page *nfs_readdir_page_get_next(struct address_space *mapping,
> +                                             pgoff_t index, u64 cookie)
> +{
> +       struct page *page;
> +
> +       page = nfs_readdir_page_get_locked(mapping, index, cookie);
> +       if (page) {
> +               if (nfs_readdir_page_last_cookie(page) == cookie)
> +                       return page;
> +               nfs_readdir_page_unlock_and_put(page);
> +       }
> +       return NULL;
> +}
> +
>  static inline
>  int is_32bit_api(void)
>  {
> @@ -637,13 +657,15 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry,
>  }
>
>  /* Perform conversion from xdr to cache array */
> -static
> -int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry,
> -                               struct page **xdr_pages, struct page *page, unsigned int buflen)
> +static int nfs_readdir_page_filler(struct nfs_readdir_descriptor *desc,
> +                                  struct nfs_entry *entry,
> +                                  struct page **xdr_pages,
> +                                  struct page *fillme, unsigned int buflen)
>  {
> +       struct address_space *mapping = desc->file->f_mapping;
>         struct xdr_stream stream;
>         struct xdr_buf buf;
> -       struct page *scratch;
> +       struct page *scratch, *new, *page = fillme;
>         int status;
>
>         scratch = alloc_page(GFP_KERNEL);
> @@ -666,6 +688,19 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
>                                         desc->dir_verifier);
>
>                 status = nfs_readdir_add_to_array(entry, page);
> +               if (status != -ENOSPC)
> +                       continue;
> +
> +               if (page->mapping != mapping)
> +                       break;
> +               new = nfs_readdir_page_get_next(mapping, page->index + 1,
> +                                               entry->prev_cookie);
> +               if (!new)
> +                       break;
> +               if (page != fillme)
> +                       nfs_readdir_page_unlock_and_put(page);
> +               page = new;
> +               status = nfs_readdir_add_to_array(entry, page);
>         } while (!status && !entry->eof);
>
>         switch (status) {
> @@ -681,6 +716,9 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
>                 break;
>         }
>
> +       if (page != fillme)
> +               nfs_readdir_page_unlock_and_put(page);
> +
>         put_page(scratch);
>         return status;
>  }
> --
> 2.28.0
>

It doesn't look like this handles uncached_readdir.  Were you planning
on addressing that somehow, or should we think about something like
this to move dtsize up as a parameter to nfs_readdir_xdr_to_array(),
and force uncached_readdir() to 1 page:
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index b6c3501e8f61..ca30e2dbb9c3 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -791,13 +791,12 @@ static struct page
**nfs_readdir_alloc_pages(size_t npages)

 static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc,
                                    struct page *page, __be32 *verf_arg,
-                                   __be32 *verf_res)
+                                   __be32 *verf_res, size_t dtsize)
 {
        struct page **pages;
        struct nfs_entry *entry;
        size_t array_size;
        struct inode *inode = file_inode(desc->file);
-       size_t dtsize = NFS_SERVER(inode)->dtsize;
        int status = -ENOMEM;

        entry = kzalloc(sizeof(*entry), GFP_KERNEL);
@@ -879,13 +878,15 @@ static int find_and_lock_cache_page(struct
nfs_readdir_descriptor *desc)
        struct nfs_inode *nfsi = NFS_I(inode);
        __be32 verf[NFS_DIR_VERIFIER_SIZE];
        int res;
+       size_t dtsize = NFS_SERVER(inode)->dtsize;

        desc->page = nfs_readdir_page_get_cached(desc);
        if (!desc->page)
                return -ENOMEM;
        if (nfs_readdir_page_needs_filling(desc->page)) {
                res = nfs_readdir_xdr_to_array(desc, desc->page,
-                                              nfsi->cookieverf, verf);
+                                              nfsi->cookieverf, verf,
+                                              dtsize);
                if (res < 0) {
                        nfs_readdir_page_unlock_and_put_cached(desc);
                        if (res == -EBADCOOKIE || res == -ENOTSYNC) {
@@ -995,7 +996,8 @@ static int uncached_readdir(struct
nfs_readdir_descriptor *desc)
        desc->duped = 0;

        nfs_readdir_page_init_array(page, desc->dir_cookie);
-       status = nfs_readdir_xdr_to_array(desc, page, desc->verf, verf);
+       status = nfs_readdir_xdr_to_array(desc, page, desc->verf, verf,
+                                         PAGE_SIZE);
        if (status < 0)
                goto out_release;


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [PATCH v3 05/17] NFS: Don't discard readdir results
  2020-11-06 13:30           ` [PATCH v3 05/17] NFS: Don't discard readdir results David Wysochanski
@ 2020-11-06 15:05             ` Trond Myklebust
  2020-11-06 18:00               ` David Wysochanski
  0 siblings, 1 reply; 28+ messages in thread
From: Trond Myklebust @ 2020-11-06 15:05 UTC (permalink / raw)
  To: dwysocha; +Cc: linux-nfs

On Fri, 2020-11-06 at 08:30 -0500, David Wysochanski wrote:
> On Wed, Nov 4, 2020 at 11:27 AM <trondmy@gmail.com> wrote:
> > 
> > From: Trond Myklebust <trond.myklebust@hammerspace.com>
> > 
> > If a readdir call returns more data than we can fit into one page
> > cache page, then allocate a new one for that data rather than
> > discarding the data.
> > 
> > Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
> > ---
> >  fs/nfs/dir.c | 46 ++++++++++++++++++++++++++++++++++++++++++----
> >  1 file changed, 42 insertions(+), 4 deletions(-)
> > 
> > diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
> > index 842f69120a01..f7248145c333 100644
> > --- a/fs/nfs/dir.c
> > +++ b/fs/nfs/dir.c
> > @@ -320,6 +320,26 @@ static void nfs_readdir_page_set_eof(struct
> > page *page)
> >         kunmap_atomic(array);
> >  }
> > 
> > +static void nfs_readdir_page_unlock_and_put(struct page *page)
> > +{
> > +       unlock_page(page);
> > +       put_page(page);
> > +}
> > +
> > +static struct page *nfs_readdir_page_get_next(struct address_space
> > *mapping,
> > +                                             pgoff_t index, u64
> > cookie)
> > +{
> > +       struct page *page;
> > +
> > +       page = nfs_readdir_page_get_locked(mapping, index, cookie);
> > +       if (page) {
> > +               if (nfs_readdir_page_last_cookie(page) == cookie)
> > +                       return page;
> > +               nfs_readdir_page_unlock_and_put(page);
> > +       }
> > +       return NULL;
> > +}
> > +
> >  static inline
> >  int is_32bit_api(void)
> >  {
> > @@ -637,13 +657,15 @@ void nfs_prime_dcache(struct dentry *parent,
> > struct nfs_entry *entry,
> >  }
> > 
> >  /* Perform conversion from xdr to cache array */
> > -static
> > -int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct
> > nfs_entry *entry,
> > -                               struct page **xdr_pages, struct
> > page *page, unsigned int buflen)
> > +static int nfs_readdir_page_filler(struct nfs_readdir_descriptor
> > *desc,
> > +                                  struct nfs_entry *entry,
> > +                                  struct page **xdr_pages,
> > +                                  struct page *fillme, unsigned
> > int buflen)
> >  {
> > +       struct address_space *mapping = desc->file->f_mapping;
> >         struct xdr_stream stream;
> >         struct xdr_buf buf;
> > -       struct page *scratch;
> > +       struct page *scratch, *new, *page = fillme;
> >         int status;
> > 
> >         scratch = alloc_page(GFP_KERNEL);
> > @@ -666,6 +688,19 @@ int
> > nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct
> > nfs_entry *en
> >                                         desc->dir_verifier);
> > 
> >                 status = nfs_readdir_add_to_array(entry, page);
> > +               if (status != -ENOSPC)
> > +                       continue;
> > +
> > +               if (page->mapping != mapping)
> > +                       break;
> > +               new = nfs_readdir_page_get_next(mapping, page-
> > >index + 1,
> > +                                               entry-
> > >prev_cookie);
> > +               if (!new)
> > +                       break;
> > +               if (page != fillme)
> > +                       nfs_readdir_page_unlock_and_put(page);
> > +               page = new;
> > +               status = nfs_readdir_add_to_array(entry, page);
> >         } while (!status && !entry->eof);
> > 
> >         switch (status) {
> > @@ -681,6 +716,9 @@ int
> > nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct
> > nfs_entry *en
> >                 break;
> >         }
> > 
> > +       if (page != fillme)
> > +               nfs_readdir_page_unlock_and_put(page);
> > +
> >         put_page(scratch);
> >         return status;
> >  }
> > --
> > 2.28.0
> > 
> 
> It doesn't look like this handles uncached_readdir.  Were you
> planning
> on addressing that somehow, or should we think about something like
> this to move dtsize up as a parameter to nfs_readdir_xdr_to_array(),
> and force uncached_readdir() to 1 page:
> diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
> index b6c3501e8f61..ca30e2dbb9c3 100644
> --- a/fs/nfs/dir.c
> +++ b/fs/nfs/dir.c
> @@ -791,13 +791,12 @@ static struct page
> **nfs_readdir_alloc_pages(size_t npages)
> 
>  static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor
> *desc,
>                                     struct page *page, __be32
> *verf_arg,
> -                                   __be32 *verf_res)
> +                                   __be32 *verf_res, size_t dtsize)
>  {
>         struct page **pages;
>         struct nfs_entry *entry;
>         size_t array_size;
>         struct inode *inode = file_inode(desc->file);
> -       size_t dtsize = NFS_SERVER(inode)->dtsize;
>         int status = -ENOMEM;
> 
>         entry = kzalloc(sizeof(*entry), GFP_KERNEL);
> @@ -879,13 +878,15 @@ static int find_and_lock_cache_page(struct
> nfs_readdir_descriptor *desc)
>         struct nfs_inode *nfsi = NFS_I(inode);
>         __be32 verf[NFS_DIR_VERIFIER_SIZE];
>         int res;
> +       size_t dtsize = NFS_SERVER(inode)->dtsize;
> 
>         desc->page = nfs_readdir_page_get_cached(desc);
>         if (!desc->page)
>                 return -ENOMEM;
>         if (nfs_readdir_page_needs_filling(desc->page)) {
>                 res = nfs_readdir_xdr_to_array(desc, desc->page,
> -                                              nfsi->cookieverf,
> verf);
> +                                              nfsi->cookieverf,
> verf,
> +                                              dtsize);
>                 if (res < 0) {
>                         nfs_readdir_page_unlock_and_put_cached(desc);
>                         if (res == -EBADCOOKIE || res == -ENOTSYNC) {
> @@ -995,7 +996,8 @@ static int uncached_readdir(struct
> nfs_readdir_descriptor *desc)
>         desc->duped = 0;
> 
>         nfs_readdir_page_init_array(page, desc->dir_cookie);
> -       status = nfs_readdir_xdr_to_array(desc, page, desc->verf,
> verf);
> +       status = nfs_readdir_xdr_to_array(desc, page, desc->verf,
> verf,
> +                                         PAGE_SIZE);
>         if (status < 0)
>                 goto out_release;
> 

Actually for uncached readdir, I was thinking we might want to convert
nfs_readdir_xdr_to_array() and nfs_readdir_page_filler() to take an
array of pages + buffer size.
IOW: convert uncached_readdir() to allocate an array of pages, and pass
in a 'struct page **' + a buffer length.

I don't like the idea of passing in a dtsize because that restricts the
size of the READDIR RPC request buffer instead of restricting the
number of entries the server returns. For any given buffer size, that
number of entries fluctuates wildly depending on the filenames in that
directory and their differing lengths, whereas your page can take a
fixed number of entries irrespective of the filename lengths (in fact
it can always take 127 entries on an x86_64).

It is true that the number of entries that nfs_do_filldir() can handle
also depends on the filename length, but we don't have any information
in the filesystem about the buffer size that was passed in to the
getdents() system call of how much space remains in that buffer. All
that information is hidden in the opaque 'struct dir_context'. So for
that reason, we can't use that information to set a dtsize either.

-- 
Trond Myklebust
Linux NFS client maintainer, Hammerspace
trond.myklebust@hammerspace.com



^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v3 05/17] NFS: Don't discard readdir results
  2020-11-06 15:05             ` Trond Myklebust
@ 2020-11-06 18:00               ` David Wysochanski
  0 siblings, 0 replies; 28+ messages in thread
From: David Wysochanski @ 2020-11-06 18:00 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: linux-nfs

On Fri, Nov 6, 2020 at 10:05 AM Trond Myklebust <trondmy@hammerspace.com> wrote:
>
> On Fri, 2020-11-06 at 08:30 -0500, David Wysochanski wrote:
> > On Wed, Nov 4, 2020 at 11:27 AM <trondmy@gmail.com> wrote:
> > >
> > > From: Trond Myklebust <trond.myklebust@hammerspace.com>
> > >
> > > If a readdir call returns more data than we can fit into one page
> > > cache page, then allocate a new one for that data rather than
> > > discarding the data.
> > >
> > > Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
> > > ---
> > >  fs/nfs/dir.c | 46 ++++++++++++++++++++++++++++++++++++++++++----
> > >  1 file changed, 42 insertions(+), 4 deletions(-)
> > >
> > > diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
> > > index 842f69120a01..f7248145c333 100644
> > > --- a/fs/nfs/dir.c
> > > +++ b/fs/nfs/dir.c
> > > @@ -320,6 +320,26 @@ static void nfs_readdir_page_set_eof(struct
> > > page *page)
> > >         kunmap_atomic(array);
> > >  }
> > >
> > > +static void nfs_readdir_page_unlock_and_put(struct page *page)
> > > +{
> > > +       unlock_page(page);
> > > +       put_page(page);
> > > +}
> > > +
> > > +static struct page *nfs_readdir_page_get_next(struct address_space
> > > *mapping,
> > > +                                             pgoff_t index, u64
> > > cookie)
> > > +{
> > > +       struct page *page;
> > > +
> > > +       page = nfs_readdir_page_get_locked(mapping, index, cookie);
> > > +       if (page) {
> > > +               if (nfs_readdir_page_last_cookie(page) == cookie)
> > > +                       return page;
> > > +               nfs_readdir_page_unlock_and_put(page);
> > > +       }
> > > +       return NULL;
> > > +}
> > > +
> > >  static inline
> > >  int is_32bit_api(void)
> > >  {
> > > @@ -637,13 +657,15 @@ void nfs_prime_dcache(struct dentry *parent,
> > > struct nfs_entry *entry,
> > >  }
> > >
> > >  /* Perform conversion from xdr to cache array */
> > > -static
> > > -int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct
> > > nfs_entry *entry,
> > > -                               struct page **xdr_pages, struct
> > > page *page, unsigned int buflen)
> > > +static int nfs_readdir_page_filler(struct nfs_readdir_descriptor
> > > *desc,
> > > +                                  struct nfs_entry *entry,
> > > +                                  struct page **xdr_pages,
> > > +                                  struct page *fillme, unsigned
> > > int buflen)
> > >  {
> > > +       struct address_space *mapping = desc->file->f_mapping;
> > >         struct xdr_stream stream;
> > >         struct xdr_buf buf;
> > > -       struct page *scratch;
> > > +       struct page *scratch, *new, *page = fillme;
> > >         int status;
> > >
> > >         scratch = alloc_page(GFP_KERNEL);
> > > @@ -666,6 +688,19 @@ int
> > > nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct
> > > nfs_entry *en
> > >                                         desc->dir_verifier);
> > >
> > >                 status = nfs_readdir_add_to_array(entry, page);
> > > +               if (status != -ENOSPC)
> > > +                       continue;
> > > +
> > > +               if (page->mapping != mapping)
> > > +                       break;
> > > +               new = nfs_readdir_page_get_next(mapping, page-
> > > >index + 1,
> > > +                                               entry-
> > > >prev_cookie);
> > > +               if (!new)
> > > +                       break;
> > > +               if (page != fillme)
> > > +                       nfs_readdir_page_unlock_and_put(page);
> > > +               page = new;
> > > +               status = nfs_readdir_add_to_array(entry, page);
> > >         } while (!status && !entry->eof);
> > >
> > >         switch (status) {
> > > @@ -681,6 +716,9 @@ int
> > > nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct
> > > nfs_entry *en
> > >                 break;
> > >         }
> > >
> > > +       if (page != fillme)
> > > +               nfs_readdir_page_unlock_and_put(page);
> > > +
> > >         put_page(scratch);
> > >         return status;
> > >  }
> > > --
> > > 2.28.0
> > >
> >
> > It doesn't look like this handles uncached_readdir.  Were you
> > planning
> > on addressing that somehow, or should we think about something like
> > this to move dtsize up as a parameter to nfs_readdir_xdr_to_array(),
> > and force uncached_readdir() to 1 page:
> > diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
> > index b6c3501e8f61..ca30e2dbb9c3 100644
> > --- a/fs/nfs/dir.c
> > +++ b/fs/nfs/dir.c
> > @@ -791,13 +791,12 @@ static struct page
> > **nfs_readdir_alloc_pages(size_t npages)
> >
> >  static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor
> > *desc,
> >                                     struct page *page, __be32
> > *verf_arg,
> > -                                   __be32 *verf_res)
> > +                                   __be32 *verf_res, size_t dtsize)
> >  {
> >         struct page **pages;
> >         struct nfs_entry *entry;
> >         size_t array_size;
> >         struct inode *inode = file_inode(desc->file);
> > -       size_t dtsize = NFS_SERVER(inode)->dtsize;
> >         int status = -ENOMEM;
> >
> >         entry = kzalloc(sizeof(*entry), GFP_KERNEL);
> > @@ -879,13 +878,15 @@ static int find_and_lock_cache_page(struct
> > nfs_readdir_descriptor *desc)
> >         struct nfs_inode *nfsi = NFS_I(inode);
> >         __be32 verf[NFS_DIR_VERIFIER_SIZE];
> >         int res;
> > +       size_t dtsize = NFS_SERVER(inode)->dtsize;
> >
> >         desc->page = nfs_readdir_page_get_cached(desc);
> >         if (!desc->page)
> >                 return -ENOMEM;
> >         if (nfs_readdir_page_needs_filling(desc->page)) {
> >                 res = nfs_readdir_xdr_to_array(desc, desc->page,
> > -                                              nfsi->cookieverf,
> > verf);
> > +                                              nfsi->cookieverf,
> > verf,
> > +                                              dtsize);
> >                 if (res < 0) {
> >                         nfs_readdir_page_unlock_and_put_cached(desc);
> >                         if (res == -EBADCOOKIE || res == -ENOTSYNC) {
> > @@ -995,7 +996,8 @@ static int uncached_readdir(struct
> > nfs_readdir_descriptor *desc)
> >         desc->duped = 0;
> >
> >         nfs_readdir_page_init_array(page, desc->dir_cookie);
> > -       status = nfs_readdir_xdr_to_array(desc, page, desc->verf,
> > verf);
> > +       status = nfs_readdir_xdr_to_array(desc, page, desc->verf,
> > verf,
> > +                                         PAGE_SIZE);
> >         if (status < 0)
> >                 goto out_release;
> >
>
> Actually for uncached readdir, I was thinking we might want to convert
> nfs_readdir_xdr_to_array() and nfs_readdir_page_filler() to take an
> array of pages + buffer size.
> IOW: convert uncached_readdir() to allocate an array of pages, and pass
> in a 'struct page **' + a buffer length.
>
Yes I agree and this looks more like the right way to fix it rather than
this single page approach.


> I don't like the idea of passing in a dtsize because that restricts the
> size of the READDIR RPC request buffer instead of restricting the
> number of entries the server returns. For any given buffer size, that
> number of entries fluctuates wildly depending on the filenames in that
> directory and their differing lengths, whereas your page can take a
> fixed number of entries irrespective of the filename lengths (in fact
> it can always take 127 entries on an x86_64).
>
> It is true that the number of entries that nfs_do_filldir() can handle
> also depends on the filename length, but we don't have any information
> in the filesystem about the buffer size that was passed in to the
> getdents() system call of how much space remains in that buffer. All
> that information is hidden in the opaque 'struct dir_context'. So for
> that reason, we can't use that information to set a dtsize either.
>
> --
> Trond Myklebust
> Linux NFS client maintainer, Hammerspace
> trond.myklebust@hammerspace.com
>
>


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v3 00/17] Readdir enhancements
  2020-11-04 16:16 [PATCH v3 00/17] Readdir enhancements trondmy
  2020-11-04 16:16 ` [PATCH v3 01/17] NFS: Ensure contents of struct nfs_open_dir_context are consistent trondmy
@ 2020-11-07 12:49 ` Benjamin Coddington
  2020-11-07 14:23   ` Trond Myklebust
  2020-11-08 18:15   ` Mkrtchyan, Tigran
  1 sibling, 2 replies; 28+ messages in thread
From: Benjamin Coddington @ 2020-11-07 12:49 UTC (permalink / raw)
  To: trondmy; +Cc: linux-nfs

On 4 Nov 2020, at 11:16, trondmy@gmail.com wrote:

> From: Trond Myklebust <trond.myklebust@hammerspace.com>
>
> The following patch series performs a number of cleanups on the readdir
> code.
> It also adds support for 1MB readdir RPC calls on-the-wire, and modifies
> the caching code to ensure that we cache the entire contents of that
> 1MB call (instead of discarding the data that doesn't fit into a single
> page).
>
> v2: Fix the handling of the NFSv3/v4 directory verifier
> v3: Optimise searching when the readdir cookies are seen to be ordered

Hi Trond, thanks for these.

I did a bit of testing with these on 4-core/4G client listing 1.5M files
with READDIR.  I compared v5.10-rc2 without/with this set.

+------+     v5.10.rc-2      +--+ this v3 patch set  +
| run  |  time   | rpc calls |  |  time  | rpc calls |

nfsv3 with dtsize 262144:
+------+---------+-----------+--+--------+-----------+
| 1    | 81.583  | 14710     |  | 53.568 | 215       |
| 2    | 81.147  | 14710     |  | 50.781 | 215       |
| 3    | 81.61   | 14710     |  | 50.514 | 215       |
| 4    | 82.405  | 14710     |  | 50.746 | 215       |
| 5    | 82.066  | 14710     |  | 50.397 | 215       |
| 6    | 82.395  | 14710     |  | 50.892 | 215       |
| 7    | 81.657  | 14710     |  | 50.882 | 215       |
| 8    | 81.555  | 14710     |  | 50.981 | 215       |
| 9    | 81.421  | 14710     |  | 50.558 | 215       |
| 10   | 81.472  | 14710     |  | 50.588 | 215       |

nfsv3 with dtsize 1048576:
+------+---------+-----------+--+--------+-----------+
| 1    | 81.563  | 14710     |  | 52.692 | 61        |
| 2    | 82.123  | 14710     |  | 49.934 | 61        |
| 3    | 81.714  | 14710     |  | 50.158 | 61        |
| 4    | 81.707  | 14710     |  | 50.083 | 61        |
| 5    | 81.44   | 14710     |  | 50.045 | 61        |
| 6    | 81.685  | 14710     |  | 50.021 | 61        |
| 7    | 81.17   | 14710     |  | 50.131 | 61        |
| 8    | 81.366  | 14710     |  | 49.928 | 61        |
| 9    | 81.067  | 14710     |  | 50.081 | 61        |
| 10   | 81.524  | 14710     |  | 50.442 | 61        |

nfsv4 with dtsize 32768:
+------+---------+-----------+--+--------+-----------+
| 1    | 99.534  | 14712     |  | 79.461 | 331       |
| 2    | 98.998  | 14712     |  | 79.338 | 331       |
| 3    | 99.462  | 14712     |  | 81.101 | 331       |
| 4    | 99.891  | 14712     |  | 78.888 | 331       |
| 5    | 99.516  | 14712     |  | 81.147 | 331       |
| 6    | 98.649  | 14712     |  | 83.084 | 331       |
| 7    | 101.159 | 14712     |  | 80.461 | 331       |
| 8    | 100.402 | 14712     |  | 79.003 | 331       |
| 9    | 98.548  | 14712     |  | 80.619 | 331       |
| 10   | 97.456  | 14712     |  | 81.317 | 331       |

nfsv4 with dtsize 1048576:
+------+---------+-----------+--+--------+-----------+
| 1    | 100.357 | 14712     |  | 78.976 | 91        |
| 2    | 99.61   | 14712     |  | 79.328 | 91        |
| 3    | 101.095 | 14712     |  | 80.649 | 91        |
| 4    | 107.904 | 14712     |  | 78.285 | 91        |
| 5    | 103.665 | 14712     |  | 79.258 | 91        |
| 6    | 98.877  | 14712     |  | 78.817 | 91        |
| 7    | 99.567  | 14712     |  | 81.11  | 91        |
| 8    | 99.096  | 14712     |  | 80.296 | 91        |
| 9    | 100.124 | 14712     |  | 78.865 | 91        |
| 10   | 100.603 | 14712     |  | 79.143 | 91        |

These look great.  Feel free to add either/both of my:
Reviewed-by: Benjamin Coddington <bcodding@redhat.com>
Tested-by: Benjamin Coddington <bcodding@redhat.com>

Ben


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v3 00/17] Readdir enhancements
  2020-11-07 12:49 ` [PATCH v3 00/17] Readdir enhancements Benjamin Coddington
@ 2020-11-07 14:23   ` Trond Myklebust
  2020-11-08 11:05     ` Benjamin Coddington
  2020-11-08 18:15   ` Mkrtchyan, Tigran
  1 sibling, 1 reply; 28+ messages in thread
From: Trond Myklebust @ 2020-11-07 14:23 UTC (permalink / raw)
  To: bcodding; +Cc: linux-nfs

On Sat, 2020-11-07 at 07:49 -0500, Benjamin Coddington wrote:
> On 4 Nov 2020, at 11:16, trondmy@gmail.com wrote:
> 
> > From: Trond Myklebust <trond.myklebust@hammerspace.com>
> > 
> > The following patch series performs a number of cleanups on the
> > readdir
> > code.
> > It also adds support for 1MB readdir RPC calls on-the-wire, and
> > modifies
> > the caching code to ensure that we cache the entire contents of
> > that
> > 1MB call (instead of discarding the data that doesn't fit into a
> > single
> > page).
> > 
> > v2: Fix the handling of the NFSv3/v4 directory verifier
> > v3: Optimise searching when the readdir cookies are seen to be
> > ordered
> 
> Hi Trond, thanks for these.
> 
> I did a bit of testing with these on 4-core/4G client listing 1.5M
> files
> with READDIR.  I compared v5.10-rc2 without/with this set.
> 
> +------+     v5.10.rc-2      +--+ this v3 patch set  +
> > run  |  time   | rpc calls |  |  time  | rpc calls |
> 
> nfsv3 with dtsize 262144:
> +------+---------+-----------+--+--------+-----------+
> > 1    | 81.583  | 14710     |  | 53.568 | 215       |
> > 2    | 81.147  | 14710     |  | 50.781 | 215       |
> > 3    | 81.61   | 14710     |  | 50.514 | 215       |
> > 4    | 82.405  | 14710     |  | 50.746 | 215       |
> > 5    | 82.066  | 14710     |  | 50.397 | 215       |
> > 6    | 82.395  | 14710     |  | 50.892 | 215       |
> > 7    | 81.657  | 14710     |  | 50.882 | 215       |
> > 8    | 81.555  | 14710     |  | 50.981 | 215       |
> > 9    | 81.421  | 14710     |  | 50.558 | 215       |
> > 10   | 81.472  | 14710     |  | 50.588 | 215       |
> 
> nfsv3 with dtsize 1048576:
> +------+---------+-----------+--+--------+-----------+
> > 1    | 81.563  | 14710     |  | 52.692 | 61        |
> > 2    | 82.123  | 14710     |  | 49.934 | 61        |
> > 3    | 81.714  | 14710     |  | 50.158 | 61        |
> > 4    | 81.707  | 14710     |  | 50.083 | 61        |
> > 5    | 81.44   | 14710     |  | 50.045 | 61        |
> > 6    | 81.685  | 14710     |  | 50.021 | 61        |
> > 7    | 81.17   | 14710     |  | 50.131 | 61        |
> > 8    | 81.366  | 14710     |  | 49.928 | 61        |
> > 9    | 81.067  | 14710     |  | 50.081 | 61        |
> > 10   | 81.524  | 14710     |  | 50.442 | 61        |
> 
> nfsv4 with dtsize 32768:
> +------+---------+-----------+--+--------+-----------+
> > 1    | 99.534  | 14712     |  | 79.461 | 331       |
> > 2    | 98.998  | 14712     |  | 79.338 | 331       |
> > 3    | 99.462  | 14712     |  | 81.101 | 331       |
> > 4    | 99.891  | 14712     |  | 78.888 | 331       |
> > 5    | 99.516  | 14712     |  | 81.147 | 331       |
> > 6    | 98.649  | 14712     |  | 83.084 | 331       |
> > 7    | 101.159 | 14712     |  | 80.461 | 331       |
> > 8    | 100.402 | 14712     |  | 79.003 | 331       |
> > 9    | 98.548  | 14712     |  | 80.619 | 331       |
> > 10   | 97.456  | 14712     |  | 81.317 | 331       |
> 
> nfsv4 with dtsize 1048576:
> +------+---------+-----------+--+--------+-----------+
> > 1    | 100.357 | 14712     |  | 78.976 | 91        |
> > 2    | 99.61   | 14712     |  | 79.328 | 91        |
> > 3    | 101.095 | 14712     |  | 80.649 | 91        |
> > 4    | 107.904 | 14712     |  | 78.285 | 91        |
> > 5    | 103.665 | 14712     |  | 79.258 | 91        |
> > 6    | 98.877  | 14712     |  | 78.817 | 91        |
> > 7    | 99.567  | 14712     |  | 81.11  | 91        |
> > 8    | 99.096  | 14712     |  | 80.296 | 91        |
> > 9    | 100.124 | 14712     |  | 78.865 | 91        |
> > 10   | 100.603 | 14712     |  | 79.143 | 91        |
> 
> These look great.  Feel free to add either/both of my:
> Reviewed-by: Benjamin Coddington <bcodding@redhat.com>
> Tested-by: Benjamin Coddington <bcodding@redhat.com>

Thanks again for testing! I missed this email before sending out v4,
but since that only adds 2 new patches to the series to deal with
Dave's v. large changing directory case, I assume I can apply the above
tags to the rest anyway as they have not changed?

-- 
Trond Myklebust
Linux NFS client maintainer, Hammerspace
trond.myklebust@hammerspace.com



^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v3 00/17] Readdir enhancements
  2020-11-07 14:23   ` Trond Myklebust
@ 2020-11-08 11:05     ` Benjamin Coddington
  0 siblings, 0 replies; 28+ messages in thread
From: Benjamin Coddington @ 2020-11-08 11:05 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: linux-nfs

On 7 Nov 2020, at 9:23, Trond Myklebust wrote:

> On Sat, 2020-11-07 at 07:49 -0500, Benjamin Coddington wrote:
>> On 4 Nov 2020, at 11:16, trondmy@gmail.com wrote:
>>
>>> From: Trond Myklebust <trond.myklebust@hammerspace.com>
>>>
>>> The following patch series performs a number of cleanups on the
>>> readdir
>>> code.
>>> It also adds support for 1MB readdir RPC calls on-the-wire, and
>>> modifies
>>> the caching code to ensure that we cache the entire contents of
>>> that
>>> 1MB call (instead of discarding the data that doesn't fit into a
>>> single
>>> page).
>>>
>>> v2: Fix the handling of the NFSv3/v4 directory verifier
>>> v3: Optimise searching when the readdir cookies are seen to be
>>> ordered
>>
>> Hi Trond, thanks for these.
>>
>> I did a bit of testing with these on 4-core/4G client listing 1.5M
>> files
>> with READDIR.  I compared v5.10-rc2 without/with this set.
>>
>> +------+     v5.10.rc-2      +--+ this v3 patch set  +
>>> run  |  time   | rpc calls |  |  time  | rpc calls |
>>
>> nfsv3 with dtsize 262144:
>> +------+---------+-----------+--+--------+-----------+
>>> 1    | 81.583  | 14710     |  | 53.568 | 215       |
>>> 2    | 81.147  | 14710     |  | 50.781 | 215       |
>>> 3    | 81.61   | 14710     |  | 50.514 | 215       |
>>> 4    | 82.405  | 14710     |  | 50.746 | 215       |
>>> 5    | 82.066  | 14710     |  | 50.397 | 215       |
>>> 6    | 82.395  | 14710     |  | 50.892 | 215       |
>>> 7    | 81.657  | 14710     |  | 50.882 | 215       |
>>> 8    | 81.555  | 14710     |  | 50.981 | 215       |
>>> 9    | 81.421  | 14710     |  | 50.558 | 215       |
>>> 10   | 81.472  | 14710     |  | 50.588 | 215       |
>>
>> nfsv3 with dtsize 1048576:
>> +------+---------+-----------+--+--------+-----------+
>>> 1    | 81.563  | 14710     |  | 52.692 | 61        |
>>> 2    | 82.123  | 14710     |  | 49.934 | 61        |
>>> 3    | 81.714  | 14710     |  | 50.158 | 61        |
>>> 4    | 81.707  | 14710     |  | 50.083 | 61        |
>>> 5    | 81.44   | 14710     |  | 50.045 | 61        |
>>> 6    | 81.685  | 14710     |  | 50.021 | 61        |
>>> 7    | 81.17   | 14710     |  | 50.131 | 61        |
>>> 8    | 81.366  | 14710     |  | 49.928 | 61        |
>>> 9    | 81.067  | 14710     |  | 50.081 | 61        |
>>> 10   | 81.524  | 14710     |  | 50.442 | 61        |
>>
>> nfsv4 with dtsize 32768:
>> +------+---------+-----------+--+--------+-----------+
>>> 1    | 99.534  | 14712     |  | 79.461 | 331       |
>>> 2    | 98.998  | 14712     |  | 79.338 | 331       |
>>> 3    | 99.462  | 14712     |  | 81.101 | 331       |
>>> 4    | 99.891  | 14712     |  | 78.888 | 331       |
>>> 5    | 99.516  | 14712     |  | 81.147 | 331       |
>>> 6    | 98.649  | 14712     |  | 83.084 | 331       |
>>> 7    | 101.159 | 14712     |  | 80.461 | 331       |
>>> 8    | 100.402 | 14712     |  | 79.003 | 331       |
>>> 9    | 98.548  | 14712     |  | 80.619 | 331       |
>>> 10   | 97.456  | 14712     |  | 81.317 | 331       |
>>
>> nfsv4 with dtsize 1048576:
>> +------+---------+-----------+--+--------+-----------+
>>> 1    | 100.357 | 14712     |  | 78.976 | 91        |
>>> 2    | 99.61   | 14712     |  | 79.328 | 91        |
>>> 3    | 101.095 | 14712     |  | 80.649 | 91        |
>>> 4    | 107.904 | 14712     |  | 78.285 | 91        |
>>> 5    | 103.665 | 14712     |  | 79.258 | 91        |
>>> 6    | 98.877  | 14712     |  | 78.817 | 91        |
>>> 7    | 99.567  | 14712     |  | 81.11  | 91        |
>>> 8    | 99.096  | 14712     |  | 80.296 | 91        |
>>> 9    | 100.124 | 14712     |  | 78.865 | 91        |
>>> 10   | 100.603 | 14712     |  | 79.143 | 91        |
>>
>> These look great.  Feel free to add either/both of my:
>> Reviewed-by: Benjamin Coddington <bcodding@redhat.com>
>> Tested-by: Benjamin Coddington <bcodding@redhat.com>
>
> Thanks again for testing! I missed this email before sending out v4,
> but since that only adds 2 new patches to the series to deal with
> Dave's v. large changing directory case, I assume I can apply the above
> tags to the rest anyway as they have not changed?

Yes, I'll check those out too.

Ben


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v3 00/17] Readdir enhancements
  2020-11-07 12:49 ` [PATCH v3 00/17] Readdir enhancements Benjamin Coddington
  2020-11-07 14:23   ` Trond Myklebust
@ 2020-11-08 18:15   ` Mkrtchyan, Tigran
  1 sibling, 0 replies; 28+ messages in thread
From: Mkrtchyan, Tigran @ 2020-11-08 18:15 UTC (permalink / raw)
  To: Benjamin Coddington; +Cc: Trond Myklebust, linux-nfs



----- Original Message -----
> From: "Benjamin Coddington" <bcodding@redhat.com>
> To: "Trond Myklebust" <trondmy@gmail.com>
> Cc: "linux-nfs" <linux-nfs@vger.kernel.org>
> Sent: Saturday, 7 November, 2020 13:49:31
> Subject: Re: [PATCH v3 00/17] Readdir enhancements

> On 4 Nov 2020, at 11:16, trondmy@gmail.com wrote:
> 
>> From: Trond Myklebust <trond.myklebust@hammerspace.com>
>>
>> The following patch series performs a number of cleanups on the readdir
>> code.
>> It also adds support for 1MB readdir RPC calls on-the-wire, and modifies
>> the caching code to ensure that we cache the entire contents of that
>> 1MB call (instead of discarding the data that doesn't fit into a single
>> page).
>>
>> v2: Fix the handling of the NFSv3/v4 directory verifier
>> v3: Optimise searching when the readdir cookies are seen to be ordered
> 
> Hi Trond, thanks for these.
> 
> I did a bit of testing with these on 4-core/4G client listing 1.5M files
> with READDIR.  I compared v5.10-rc2 without/with this set.
> 
> +------+     v5.10.rc-2      +--+ this v3 patch set  +
>| run  |  time   | rpc calls |  |  time  | rpc calls |
> 
> nfsv3 with dtsize 262144:
> +------+---------+-----------+--+--------+-----------+
>| 1    | 81.583  | 14710     |  | 53.568 | 215       |
>| 2    | 81.147  | 14710     |  | 50.781 | 215       |
>| 3    | 81.61   | 14710     |  | 50.514 | 215       |
>| 4    | 82.405  | 14710     |  | 50.746 | 215       |
>| 5    | 82.066  | 14710     |  | 50.397 | 215       |
>| 6    | 82.395  | 14710     |  | 50.892 | 215       |
>| 7    | 81.657  | 14710     |  | 50.882 | 215       |
>| 8    | 81.555  | 14710     |  | 50.981 | 215       |
>| 9    | 81.421  | 14710     |  | 50.558 | 215       |
>| 10   | 81.472  | 14710     |  | 50.588 | 215       |
> 
> nfsv3 with dtsize 1048576:
> +------+---------+-----------+--+--------+-----------+
>| 1    | 81.563  | 14710     |  | 52.692 | 61        |
>| 2    | 82.123  | 14710     |  | 49.934 | 61        |
>| 3    | 81.714  | 14710     |  | 50.158 | 61        |
>| 4    | 81.707  | 14710     |  | 50.083 | 61        |
>| 5    | 81.44   | 14710     |  | 50.045 | 61        |
>| 6    | 81.685  | 14710     |  | 50.021 | 61        |
>| 7    | 81.17   | 14710     |  | 50.131 | 61        |
>| 8    | 81.366  | 14710     |  | 49.928 | 61        |
>| 9    | 81.067  | 14710     |  | 50.081 | 61        |
>| 10   | 81.524  | 14710     |  | 50.442 | 61        |
> 
> nfsv4 with dtsize 32768:
> +------+---------+-----------+--+--------+-----------+
>| 1    | 99.534  | 14712     |  | 79.461 | 331       |
>| 2    | 98.998  | 14712     |  | 79.338 | 331       |
>| 3    | 99.462  | 14712     |  | 81.101 | 331       |
>| 4    | 99.891  | 14712     |  | 78.888 | 331       |
>| 5    | 99.516  | 14712     |  | 81.147 | 331       |
>| 6    | 98.649  | 14712     |  | 83.084 | 331       |
>| 7    | 101.159 | 14712     |  | 80.461 | 331       |
>| 8    | 100.402 | 14712     |  | 79.003 | 331       |
>| 9    | 98.548  | 14712     |  | 80.619 | 331       |
>| 10   | 97.456  | 14712     |  | 81.317 | 331       |
> 
> nfsv4 with 1048576:
> +------+---------+-----------+--+--------+-----------+
>| 1    | 100.357 | 14712     |  | 78.976 | 91        |
>| 2    | 99.61   | 14712     |  | 79.328 | 91        |
>| 3    | 101.095 | 14712     |  | 80.649 | 91        |
>| 4    | 107.904 | 14712     |  | 78.285 | 91        |
>| 5    | 103.665 | 14712     |  | 79.258 | 91        |
>| 6    | 98.877  | 14712     |  | 78.817 | 91        |
>| 7    | 99.567  | 14712     |  | 81.11  | 91        |
>| 8    | 99.096  | 14712     |  | 80.296 | 91        |
>| 9    | 100.124 | 14712     |  | 78.865 | 91        |
>| 10   | 100.603 | 14712     |  | 79.143 | 91        |


Hi Ben, hi Trond,

though number of RPC call between dtsize 1048576 and 32768
is x3 less, the time it takes almost the same. According to
your results, at some point (<= 32K) a bigger dtsize makes
no difference. As the original dtsize is 32K 
(#define NFS_MAX_READDIR_PAGES 8), it looks like that the
performance enhancements mostly contributed by a change
not related to the buffer size.

On another, the number of RPC calls with v3-patch-set drops
by x40. What ever Trond have changed there has a big impact!

Thanks a lot for your efforts,
   Tigran.

> 
> These look great.  Feel free to add either/both of my:
> Reviewed-by: Benjamin Coddington <bcodding@redhat.com>
> Tested-by: Benjamin Coddington <bcodding@redhat.com>
> 
> Ben

^ permalink raw reply	[flat|nested] 28+ messages in thread

end of thread, other threads:[~2020-11-08 18:15 UTC | newest]

Thread overview: 28+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-11-04 16:16 [PATCH v3 00/17] Readdir enhancements trondmy
2020-11-04 16:16 ` [PATCH v3 01/17] NFS: Ensure contents of struct nfs_open_dir_context are consistent trondmy
2020-11-04 16:16   ` [PATCH v3 02/17] NFS: Clean up readdir struct nfs_cache_array trondmy
2020-11-04 16:16     ` [PATCH v3 03/17] NFS: Clean up nfs_readdir_page_filler() trondmy
2020-11-04 16:16       ` [PATCH v3 04/17] NFS: Clean up directory array handling trondmy
2020-11-04 16:16         ` [PATCH v3 05/17] NFS: Don't discard readdir results trondmy
2020-11-04 16:16           ` [PATCH v3 06/17] NFS: Remove unnecessary kmap in nfs_readdir_xdr_to_array() trondmy
2020-11-04 16:16             ` [PATCH v3 07/17] NFS: Replace kmap() with kmap_atomic() in nfs_readdir_search_array() trondmy
2020-11-04 16:16               ` [PATCH v3 08/17] NFS: Simplify struct nfs_cache_array_entry trondmy
2020-11-04 16:16                 ` [PATCH v3 09/17] NFS: Support larger readdir buffers trondmy
2020-11-04 16:16                   ` [PATCH v3 10/17] NFS: More readdir cleanups trondmy
2020-11-04 16:16                     ` [PATCH v3 11/17] NFS: nfs_do_filldir() does not return a value trondmy
2020-11-04 16:16                       ` [PATCH v3 12/17] NFS: Reduce readdir stack usage trondmy
2020-11-04 16:16                         ` [PATCH v3 13/17] NFS: Cleanup to remove nfs_readdir_descriptor_t typedef trondmy
2020-11-04 16:16                           ` [PATCH v3 14/17] NFS: Allow the NFS generic code to pass in a verifier to readdir trondmy
2020-11-04 16:16                             ` [PATCH v3 15/17] NFS: Handle NFS4ERR_NOT_SAME and NFSERR_BADCOOKIE from readdir calls trondmy
2020-11-04 16:16                               ` [PATCH v3 16/17] NFS: Improve handling of directory verifiers trondmy
2020-11-04 16:16                                 ` [PATCH v3 17/17] NFS: Optimisations for monotonically increasing readdir cookies trondmy
2020-11-04 21:01                                 ` [PATCH v3 16/17] NFS: Improve handling of directory verifiers David Wysochanski
2020-11-04 21:31                                   ` Trond Myklebust
2020-11-04 21:40                                     ` David Wysochanski
2020-11-06 13:30           ` [PATCH v3 05/17] NFS: Don't discard readdir results David Wysochanski
2020-11-06 15:05             ` Trond Myklebust
2020-11-06 18:00               ` David Wysochanski
2020-11-07 12:49 ` [PATCH v3 00/17] Readdir enhancements Benjamin Coddington
2020-11-07 14:23   ` Trond Myklebust
2020-11-08 11:05     ` Benjamin Coddington
2020-11-08 18:15   ` Mkrtchyan, Tigran

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).