linux-nfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] nfsd: don't fsync nfsd_files on last close
@ 2023-02-07 14:50 Jeff Layton
  2023-02-07 15:01 ` Chuck Lever III
  0 siblings, 1 reply; 4+ messages in thread
From: Jeff Layton @ 2023-02-07 14:50 UTC (permalink / raw)
  To: chuck.lever; +Cc: linux-nfs, Pierguido Lambri

Most of the time, NFSv4 clients issue a COMMIT before the final CLOSE of
an open stateid, so with NFSv4, the fsync in the nfsd_file_free path is
usually a no-op and doesn't block.

We have a customer running knfsd over very slow storage (XFS over Ceph
RBD). They were using the "async" export option because performance was
more important than data integrity for this application. That export
option turns NFSv4 COMMIT calls into no-ops. Due to the fsync in this
codepath however, their final CLOSE calls would still stall (since a
CLOSE effectively became a COMMIT).

I think this fsync is not strictly necessary. We only use that result to
reset the write verifier. Instead of fsync'ing all of the data when we
free an nfsd_file, we can just check for writeback errors when one is
acquired and when it is freed.

If the client never comes back, then it'll never see the error anyway
and there is no point in resetting it. If an error occurs after the
nfsd_file is removed from the cache but before the inode is evicted,
then it will reset the write verifier on the next nfsd_file_acquire,
(since there will be an unseen error).

The only exception here is if something else opens and fsyncs the file
during that window. Given that local applications work with this
limitation today, I don't see that as an issue.

Reported-and-Tested-by: Pierguido Lambri <plambri@redhat.com>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 fs/nfsd/filecache.c | 48 ++++++++++++++-------------------------------
 fs/nfsd/trace.h     | 31 -----------------------------
 2 files changed, 15 insertions(+), 64 deletions(-)

diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
index 136e543ae44b..fcd16ffbf9ad 100644
--- a/fs/nfsd/filecache.c
+++ b/fs/nfsd/filecache.c
@@ -233,37 +233,27 @@ nfsd_file_alloc(struct net *net, struct inode *inode, unsigned char need,
 	return nf;
 }
 
+/**
+ * nfsd_file_check_write_error - check for writeback errors on a file
+ * @nf: nfsd_file to check for writeback errors
+ *
+ * Check whether a nfsd_file has an unseen error. Reset the write
+ * verifier if so.
+ */
 static void
-nfsd_file_fsync(struct nfsd_file *nf)
-{
-	struct file *file = nf->nf_file;
-	int ret;
-
-	if (!file || !(file->f_mode & FMODE_WRITE))
-		return;
-	ret = vfs_fsync(file, 1);
-	trace_nfsd_file_fsync(nf, ret);
-	if (ret)
-		nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id));
-}
-
-static int
 nfsd_file_check_write_error(struct nfsd_file *nf)
 {
 	struct file *file = nf->nf_file;
 
-	if (!file || !(file->f_mode & FMODE_WRITE))
-		return 0;
-	return filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err));
+	if ((file->f_mode & FMODE_WRITE) &&
+	    filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err)))
+		nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id));
 }
 
 static void
 nfsd_file_hash_remove(struct nfsd_file *nf)
 {
 	trace_nfsd_file_unhash(nf);
-
-	if (nfsd_file_check_write_error(nf))
-		nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id));
 	rhltable_remove(&nfsd_file_rhltable, &nf->nf_rlist,
 			nfsd_file_rhash_params);
 }
@@ -289,22 +279,13 @@ nfsd_file_free(struct nfsd_file *nf)
 	this_cpu_add(nfsd_file_total_age, age);
 
 	nfsd_file_unhash(nf);
-
-	/*
-	 * We call fsync here in order to catch writeback errors. It's not
-	 * strictly required by the protocol, but an nfsd_file could get
-	 * evicted from the cache before a COMMIT comes in. If another
-	 * task were to open that file in the interim and scrape the error,
-	 * then the client may never see it. By calling fsync here, we ensure
-	 * that writeback happens before the entry is freed, and that any
-	 * errors reported result in the write verifier changing.
-	 */
-	nfsd_file_fsync(nf);
-
 	if (nf->nf_mark)
 		nfsd_file_mark_put(nf->nf_mark);
-	if (nf->nf_file)
+
+	if (nf->nf_file) {
+		nfsd_file_check_write_error(nf);
 		filp_close(nf->nf_file, NULL);
+	}
 
 	/*
 	 * If this item is still linked via nf_lru, that's a bug.
@@ -1080,6 +1061,7 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
 out:
 	if (status == nfs_ok) {
 		this_cpu_inc(nfsd_file_acquisitions);
+		nfsd_file_check_write_error(nf);
 		*pnf = nf;
 	}
 	put_cred(cred);
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index 8f9c82d9e075..4183819ea082 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -1202,37 +1202,6 @@ TRACE_EVENT(nfsd_file_close,
 	)
 );
 
-TRACE_EVENT(nfsd_file_fsync,
-	TP_PROTO(
-		const struct nfsd_file *nf,
-		int ret
-	),
-	TP_ARGS(nf, ret),
-	TP_STRUCT__entry(
-		__field(void *, nf_inode)
-		__field(int, nf_ref)
-		__field(int, ret)
-		__field(unsigned long, nf_flags)
-		__field(unsigned char, nf_may)
-		__field(struct file *, nf_file)
-	),
-	TP_fast_assign(
-		__entry->nf_inode = nf->nf_inode;
-		__entry->nf_ref = refcount_read(&nf->nf_ref);
-		__entry->ret = ret;
-		__entry->nf_flags = nf->nf_flags;
-		__entry->nf_may = nf->nf_may;
-		__entry->nf_file = nf->nf_file;
-	),
-	TP_printk("inode=%p ref=%d flags=%s may=%s nf_file=%p ret=%d",
-		__entry->nf_inode,
-		__entry->nf_ref,
-		show_nf_flags(__entry->nf_flags),
-		show_nfsd_may_flags(__entry->nf_may),
-		__entry->nf_file, __entry->ret
-	)
-);
-
 #include "cache.h"
 
 TRACE_DEFINE_ENUM(RC_DROPIT);
-- 
2.39.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] nfsd: don't fsync nfsd_files on last close
  2023-02-07 14:50 [PATCH] nfsd: don't fsync nfsd_files on last close Jeff Layton
@ 2023-02-07 15:01 ` Chuck Lever III
  2023-02-07 15:15   ` Jeff Layton
  0 siblings, 1 reply; 4+ messages in thread
From: Chuck Lever III @ 2023-02-07 15:01 UTC (permalink / raw)
  To: Jeff Layton; +Cc: Linux NFS Mailing List, Pierguido Lambri



> On Feb 7, 2023, at 9:50 AM, Jeff Layton <jlayton@kernel.org> wrote:
> 
> Most of the time, NFSv4 clients issue a COMMIT before the final CLOSE of
> an open stateid, so with NFSv4, the fsync in the nfsd_file_free path is
> usually a no-op and doesn't block.
> 
> We have a customer running knfsd over very slow storage (XFS over Ceph
> RBD). They were using the "async" export option because performance was
> more important than data integrity for this application. That export
> option turns NFSv4 COMMIT calls into no-ops. Due to the fsync in this
> codepath however, their final CLOSE calls would still stall (since a
> CLOSE effectively became a COMMIT).
> 
> I think this fsync is not strictly necessary. We only use that result to
> reset the write verifier. Instead of fsync'ing all of the data when we
> free an nfsd_file, we can just check for writeback errors when one is
> acquired and when it is freed.
> 
> If the client never comes back, then it'll never see the error anyway
> and there is no point in resetting it. If an error occurs after the
> nfsd_file is removed from the cache but before the inode is evicted,
> then it will reset the write verifier on the next nfsd_file_acquire,
> (since there will be an unseen error).
> 
> The only exception here is if something else opens and fsyncs the file
> during that window. Given that local applications work with this
> limitation today, I don't see that as an issue.
> 
> Reported-and-Tested-by: Pierguido Lambri <plambri@redhat.com>
> Signed-off-by: Jeff Layton <jlayton@kernel.org>

Seems sensible and clean.

I would like to queue this in the filecache topic branch, but
that means it won't get merged until v6.4 at the earliest. Is
that OK?


> ---
> fs/nfsd/filecache.c | 48 ++++++++++++++-------------------------------
> fs/nfsd/trace.h     | 31 -----------------------------
> 2 files changed, 15 insertions(+), 64 deletions(-)
> 
> diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
> index 136e543ae44b..fcd16ffbf9ad 100644
> --- a/fs/nfsd/filecache.c
> +++ b/fs/nfsd/filecache.c
> @@ -233,37 +233,27 @@ nfsd_file_alloc(struct net *net, struct inode *inode, unsigned char need,
> 	return nf;
> }
> 
> +/**
> + * nfsd_file_check_write_error - check for writeback errors on a file
> + * @nf: nfsd_file to check for writeback errors
> + *
> + * Check whether a nfsd_file has an unseen error. Reset the write
> + * verifier if so.
> + */
> static void
> -nfsd_file_fsync(struct nfsd_file *nf)
> -{
> -	struct file *file = nf->nf_file;
> -	int ret;
> -
> -	if (!file || !(file->f_mode & FMODE_WRITE))
> -		return;
> -	ret = vfs_fsync(file, 1);
> -	trace_nfsd_file_fsync(nf, ret);
> -	if (ret)
> -		nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id));
> -}
> -
> -static int
> nfsd_file_check_write_error(struct nfsd_file *nf)
> {
> 	struct file *file = nf->nf_file;
> 
> -	if (!file || !(file->f_mode & FMODE_WRITE))
> -		return 0;
> -	return filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err));
> +	if ((file->f_mode & FMODE_WRITE) &&
> +	    filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err)))
> +		nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id));
> }
> 
> static void
> nfsd_file_hash_remove(struct nfsd_file *nf)
> {
> 	trace_nfsd_file_unhash(nf);
> -
> -	if (nfsd_file_check_write_error(nf))
> -		nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id));
> 	rhltable_remove(&nfsd_file_rhltable, &nf->nf_rlist,
> 			nfsd_file_rhash_params);
> }
> @@ -289,22 +279,13 @@ nfsd_file_free(struct nfsd_file *nf)
> 	this_cpu_add(nfsd_file_total_age, age);
> 
> 	nfsd_file_unhash(nf);
> -
> -	/*
> -	 * We call fsync here in order to catch writeback errors. It's not
> -	 * strictly required by the protocol, but an nfsd_file could get
> -	 * evicted from the cache before a COMMIT comes in. If another
> -	 * task were to open that file in the interim and scrape the error,
> -	 * then the client may never see it. By calling fsync here, we ensure
> -	 * that writeback happens before the entry is freed, and that any
> -	 * errors reported result in the write verifier changing.
> -	 */
> -	nfsd_file_fsync(nf);
> -
> 	if (nf->nf_mark)
> 		nfsd_file_mark_put(nf->nf_mark);
> -	if (nf->nf_file)
> +
> +	if (nf->nf_file) {
> +		nfsd_file_check_write_error(nf);
> 		filp_close(nf->nf_file, NULL);
> +	}
> 
> 	/*
> 	 * If this item is still linked via nf_lru, that's a bug.
> @@ -1080,6 +1061,7 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
> out:
> 	if (status == nfs_ok) {
> 		this_cpu_inc(nfsd_file_acquisitions);
> +		nfsd_file_check_write_error(nf);
> 		*pnf = nf;
> 	}
> 	put_cred(cred);
> diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
> index 8f9c82d9e075..4183819ea082 100644
> --- a/fs/nfsd/trace.h
> +++ b/fs/nfsd/trace.h
> @@ -1202,37 +1202,6 @@ TRACE_EVENT(nfsd_file_close,
> 	)
> );
> 
> -TRACE_EVENT(nfsd_file_fsync,
> -	TP_PROTO(
> -		const struct nfsd_file *nf,
> -		int ret
> -	),
> -	TP_ARGS(nf, ret),
> -	TP_STRUCT__entry(
> -		__field(void *, nf_inode)
> -		__field(int, nf_ref)
> -		__field(int, ret)
> -		__field(unsigned long, nf_flags)
> -		__field(unsigned char, nf_may)
> -		__field(struct file *, nf_file)
> -	),
> -	TP_fast_assign(
> -		__entry->nf_inode = nf->nf_inode;
> -		__entry->nf_ref = refcount_read(&nf->nf_ref);
> -		__entry->ret = ret;
> -		__entry->nf_flags = nf->nf_flags;
> -		__entry->nf_may = nf->nf_may;
> -		__entry->nf_file = nf->nf_file;
> -	),
> -	TP_printk("inode=%p ref=%d flags=%s may=%s nf_file=%p ret=%d",
> -		__entry->nf_inode,
> -		__entry->nf_ref,
> -		show_nf_flags(__entry->nf_flags),
> -		show_nfsd_may_flags(__entry->nf_may),
> -		__entry->nf_file, __entry->ret
> -	)
> -);
> -
> #include "cache.h"
> 
> TRACE_DEFINE_ENUM(RC_DROPIT);
> -- 
> 2.39.1
> 

--
Chuck Lever




^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] nfsd: don't fsync nfsd_files on last close
  2023-02-07 15:01 ` Chuck Lever III
@ 2023-02-07 15:15   ` Jeff Layton
  2023-02-07 15:30     ` Chuck Lever III
  0 siblings, 1 reply; 4+ messages in thread
From: Jeff Layton @ 2023-02-07 15:15 UTC (permalink / raw)
  To: Chuck Lever III; +Cc: Linux NFS Mailing List, Pierguido Lambri

On Tue, 2023-02-07 at 15:01 +0000, Chuck Lever III wrote:
> 
> > On Feb 7, 2023, at 9:50 AM, Jeff Layton <jlayton@kernel.org> wrote:
> > 
> > Most of the time, NFSv4 clients issue a COMMIT before the final CLOSE of
> > an open stateid, so with NFSv4, the fsync in the nfsd_file_free path is
> > usually a no-op and doesn't block.
> > 
> > We have a customer running knfsd over very slow storage (XFS over Ceph
> > RBD). They were using the "async" export option because performance was
> > more important than data integrity for this application. That export
> > option turns NFSv4 COMMIT calls into no-ops. Due to the fsync in this
> > codepath however, their final CLOSE calls would still stall (since a
> > CLOSE effectively became a COMMIT).
> > 
> > I think this fsync is not strictly necessary. We only use that result to
> > reset the write verifier. Instead of fsync'ing all of the data when we
> > free an nfsd_file, we can just check for writeback errors when one is
> > acquired and when it is freed.
> > 
> > If the client never comes back, then it'll never see the error anyway
> > and there is no point in resetting it. If an error occurs after the
> > nfsd_file is removed from the cache but before the inode is evicted,
> > then it will reset the write verifier on the next nfsd_file_acquire,
> > (since there will be an unseen error).
> > 
> > The only exception here is if something else opens and fsyncs the file
> > during that window. Given that local applications work with this
> > limitation today, I don't see that as an issue.
> > 
> > Reported-and-Tested-by: Pierguido Lambri <plambri@redhat.com>
> > Signed-off-by: Jeff Layton <jlayton@kernel.org>
> 
> Seems sensible and clean.
> 
> I would like to queue this in the filecache topic branch, but
> that means it won't get merged until v6.4 at the earliest. Is
> that OK?
> 
> 

Thanks! v6.4 would be a little late. Can we get it in for v6.3?

Exporting with -o async is (unfortunately) quite common. I suspect we're
going to see other bug reports due to this. Waiting out a whole cycle
means wading through a bunch of those (and telling those folks to use
older kernels until we can get it in).


> > ---
> > fs/nfsd/filecache.c | 48 ++++++++++++++-------------------------------
> > fs/nfsd/trace.h     | 31 -----------------------------
> > 2 files changed, 15 insertions(+), 64 deletions(-)
> > 
> > diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
> > index 136e543ae44b..fcd16ffbf9ad 100644
> > --- a/fs/nfsd/filecache.c
> > +++ b/fs/nfsd/filecache.c
> > @@ -233,37 +233,27 @@ nfsd_file_alloc(struct net *net, struct inode *inode, unsigned char need,
> > 	return nf;
> > }
> > 
> > +/**
> > + * nfsd_file_check_write_error - check for writeback errors on a file
> > + * @nf: nfsd_file to check for writeback errors
> > + *
> > + * Check whether a nfsd_file has an unseen error. Reset the write
> > + * verifier if so.
> > + */
> > static void
> > -nfsd_file_fsync(struct nfsd_file *nf)
> > -{
> > -	struct file *file = nf->nf_file;
> > -	int ret;
> > -
> > -	if (!file || !(file->f_mode & FMODE_WRITE))
> > -		return;
> > -	ret = vfs_fsync(file, 1);
> > -	trace_nfsd_file_fsync(nf, ret);
> > -	if (ret)
> > -		nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id));
> > -}
> > -
> > -static int
> > nfsd_file_check_write_error(struct nfsd_file *nf)
> > {
> > 	struct file *file = nf->nf_file;
> > 
> > -	if (!file || !(file->f_mode & FMODE_WRITE))
> > -		return 0;
> > -	return filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err));
> > +	if ((file->f_mode & FMODE_WRITE) &&
> > +	    filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err)))
> > +		nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id));
> > }
> > 
> > static void
> > nfsd_file_hash_remove(struct nfsd_file *nf)
> > {
> > 	trace_nfsd_file_unhash(nf);
> > -
> > -	if (nfsd_file_check_write_error(nf))
> > -		nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id));
> > 	rhltable_remove(&nfsd_file_rhltable, &nf->nf_rlist,
> > 			nfsd_file_rhash_params);
> > }
> > @@ -289,22 +279,13 @@ nfsd_file_free(struct nfsd_file *nf)
> > 	this_cpu_add(nfsd_file_total_age, age);
> > 
> > 	nfsd_file_unhash(nf);
> > -
> > -	/*
> > -	 * We call fsync here in order to catch writeback errors. It's not
> > -	 * strictly required by the protocol, but an nfsd_file could get
> > -	 * evicted from the cache before a COMMIT comes in. If another
> > -	 * task were to open that file in the interim and scrape the error,
> > -	 * then the client may never see it. By calling fsync here, we ensure
> > -	 * that writeback happens before the entry is freed, and that any
> > -	 * errors reported result in the write verifier changing.
> > -	 */
> > -	nfsd_file_fsync(nf);
> > -
> > 	if (nf->nf_mark)
> > 		nfsd_file_mark_put(nf->nf_mark);
> > -	if (nf->nf_file)
> > +
> > +	if (nf->nf_file) {
> > +		nfsd_file_check_write_error(nf);
> > 		filp_close(nf->nf_file, NULL);
> > +	}
> > 
> > 	/*
> > 	 * If this item is still linked via nf_lru, that's a bug.
> > @@ -1080,6 +1061,7 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
> > out:
> > 	if (status == nfs_ok) {
> > 		this_cpu_inc(nfsd_file_acquisitions);
> > +		nfsd_file_check_write_error(nf);
> > 		*pnf = nf;
> > 	}
> > 	put_cred(cred);
> > diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
> > index 8f9c82d9e075..4183819ea082 100644
> > --- a/fs/nfsd/trace.h
> > +++ b/fs/nfsd/trace.h
> > @@ -1202,37 +1202,6 @@ TRACE_EVENT(nfsd_file_close,
> > 	)
> > );
> > 
> > -TRACE_EVENT(nfsd_file_fsync,
> > -	TP_PROTO(
> > -		const struct nfsd_file *nf,
> > -		int ret
> > -	),
> > -	TP_ARGS(nf, ret),
> > -	TP_STRUCT__entry(
> > -		__field(void *, nf_inode)
> > -		__field(int, nf_ref)
> > -		__field(int, ret)
> > -		__field(unsigned long, nf_flags)
> > -		__field(unsigned char, nf_may)
> > -		__field(struct file *, nf_file)
> > -	),
> > -	TP_fast_assign(
> > -		__entry->nf_inode = nf->nf_inode;
> > -		__entry->nf_ref = refcount_read(&nf->nf_ref);
> > -		__entry->ret = ret;
> > -		__entry->nf_flags = nf->nf_flags;
> > -		__entry->nf_may = nf->nf_may;
> > -		__entry->nf_file = nf->nf_file;
> > -	),
> > -	TP_printk("inode=%p ref=%d flags=%s may=%s nf_file=%p ret=%d",
> > -		__entry->nf_inode,
> > -		__entry->nf_ref,
> > -		show_nf_flags(__entry->nf_flags),
> > -		show_nfsd_may_flags(__entry->nf_may),
> > -		__entry->nf_file, __entry->ret
> > -	)
> > -);
> > -
> > #include "cache.h"
> > 
> > TRACE_DEFINE_ENUM(RC_DROPIT);
> > -- 
> > 2.39.1
> > 
> 
> --
> Chuck Lever
> 
> 
> 

-- 
Jeff Layton <jlayton@kernel.org>

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] nfsd: don't fsync nfsd_files on last close
  2023-02-07 15:15   ` Jeff Layton
@ 2023-02-07 15:30     ` Chuck Lever III
  0 siblings, 0 replies; 4+ messages in thread
From: Chuck Lever III @ 2023-02-07 15:30 UTC (permalink / raw)
  To: Jeff Layton; +Cc: Linux NFS Mailing List, Pierguido Lambri



> On Feb 7, 2023, at 10:15 AM, Jeff Layton <jlayton@kernel.org> wrote:
> 
> On Tue, 2023-02-07 at 15:01 +0000, Chuck Lever III wrote:
>> 
>>> On Feb 7, 2023, at 9:50 AM, Jeff Layton <jlayton@kernel.org> wrote:
>>> 
>>> Most of the time, NFSv4 clients issue a COMMIT before the final CLOSE of
>>> an open stateid, so with NFSv4, the fsync in the nfsd_file_free path is
>>> usually a no-op and doesn't block.
>>> 
>>> We have a customer running knfsd over very slow storage (XFS over Ceph
>>> RBD). They were using the "async" export option because performance was
>>> more important than data integrity for this application. That export
>>> option turns NFSv4 COMMIT calls into no-ops. Due to the fsync in this
>>> codepath however, their final CLOSE calls would still stall (since a
>>> CLOSE effectively became a COMMIT).
>>> 
>>> I think this fsync is not strictly necessary. We only use that result to
>>> reset the write verifier. Instead of fsync'ing all of the data when we
>>> free an nfsd_file, we can just check for writeback errors when one is
>>> acquired and when it is freed.
>>> 
>>> If the client never comes back, then it'll never see the error anyway
>>> and there is no point in resetting it. If an error occurs after the
>>> nfsd_file is removed from the cache but before the inode is evicted,
>>> then it will reset the write verifier on the next nfsd_file_acquire,
>>> (since there will be an unseen error).
>>> 
>>> The only exception here is if something else opens and fsyncs the file
>>> during that window. Given that local applications work with this
>>> limitation today, I don't see that as an issue.
>>> 
>>> Reported-and-Tested-by: Pierguido Lambri <plambri@redhat.com>
>>> Signed-off-by: Jeff Layton <jlayton@kernel.org>
>> 
>> Seems sensible and clean.
>> 
>> I would like to queue this in the filecache topic branch, but
>> that means it won't get merged until v6.4 at the earliest. Is
>> that OK?
>> 
>> 
> 
> Thanks! v6.4 would be a little late. Can we get it in for v6.3?
> 
> Exporting with -o async is (unfortunately) quite common. I suspect we're
> going to see other bug reports due to this. Waiting out a whole cycle
> means wading through a bunch of those (and telling those folks to use
> older kernels until we can get it in).

I will apply this to v6.3 if I can get a Fixes: or Cc: stable tag.
It looks like this is similar to 6b8a94332ee4 ("nfsd: Fix a write
performance regression").

Also one or two more R-b's would be awesome sauce.


>>> ---
>>> fs/nfsd/filecache.c | 48 ++++++++++++++-------------------------------
>>> fs/nfsd/trace.h     | 31 -----------------------------
>>> 2 files changed, 15 insertions(+), 64 deletions(-)
>>> 
>>> diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
>>> index 136e543ae44b..fcd16ffbf9ad 100644
>>> --- a/fs/nfsd/filecache.c
>>> +++ b/fs/nfsd/filecache.c
>>> @@ -233,37 +233,27 @@ nfsd_file_alloc(struct net *net, struct inode *inode, unsigned char need,
>>> 	return nf;
>>> }
>>> 
>>> +/**
>>> + * nfsd_file_check_write_error - check for writeback errors on a file
>>> + * @nf: nfsd_file to check for writeback errors
>>> + *
>>> + * Check whether a nfsd_file has an unseen error. Reset the write
>>> + * verifier if so.
>>> + */
>>> static void
>>> -nfsd_file_fsync(struct nfsd_file *nf)
>>> -{
>>> -	struct file *file = nf->nf_file;
>>> -	int ret;
>>> -
>>> -	if (!file || !(file->f_mode & FMODE_WRITE))
>>> -		return;
>>> -	ret = vfs_fsync(file, 1);
>>> -	trace_nfsd_file_fsync(nf, ret);
>>> -	if (ret)
>>> -		nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id));
>>> -}
>>> -
>>> -static int
>>> nfsd_file_check_write_error(struct nfsd_file *nf)
>>> {
>>> 	struct file *file = nf->nf_file;
>>> 
>>> -	if (!file || !(file->f_mode & FMODE_WRITE))
>>> -		return 0;
>>> -	return filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err));
>>> +	if ((file->f_mode & FMODE_WRITE) &&
>>> +	    filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err)))
>>> +		nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id));
>>> }
>>> 
>>> static void
>>> nfsd_file_hash_remove(struct nfsd_file *nf)
>>> {
>>> 	trace_nfsd_file_unhash(nf);
>>> -
>>> -	if (nfsd_file_check_write_error(nf))
>>> -		nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id));
>>> 	rhltable_remove(&nfsd_file_rhltable, &nf->nf_rlist,
>>> 			nfsd_file_rhash_params);
>>> }
>>> @@ -289,22 +279,13 @@ nfsd_file_free(struct nfsd_file *nf)
>>> 	this_cpu_add(nfsd_file_total_age, age);
>>> 
>>> 	nfsd_file_unhash(nf);
>>> -
>>> -	/*
>>> -	 * We call fsync here in order to catch writeback errors. It's not
>>> -	 * strictly required by the protocol, but an nfsd_file could get
>>> -	 * evicted from the cache before a COMMIT comes in. If another
>>> -	 * task were to open that file in the interim and scrape the error,
>>> -	 * then the client may never see it. By calling fsync here, we ensure
>>> -	 * that writeback happens before the entry is freed, and that any
>>> -	 * errors reported result in the write verifier changing.
>>> -	 */
>>> -	nfsd_file_fsync(nf);
>>> -
>>> 	if (nf->nf_mark)
>>> 		nfsd_file_mark_put(nf->nf_mark);
>>> -	if (nf->nf_file)
>>> +
>>> +	if (nf->nf_file) {
>>> +		nfsd_file_check_write_error(nf);
>>> 		filp_close(nf->nf_file, NULL);
>>> +	}
>>> 
>>> 	/*
>>> 	 * If this item is still linked via nf_lru, that's a bug.
>>> @@ -1080,6 +1061,7 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
>>> out:
>>> 	if (status == nfs_ok) {
>>> 		this_cpu_inc(nfsd_file_acquisitions);
>>> +		nfsd_file_check_write_error(nf);
>>> 		*pnf = nf;
>>> 	}
>>> 	put_cred(cred);
>>> diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
>>> index 8f9c82d9e075..4183819ea082 100644
>>> --- a/fs/nfsd/trace.h
>>> +++ b/fs/nfsd/trace.h
>>> @@ -1202,37 +1202,6 @@ TRACE_EVENT(nfsd_file_close,
>>> 	)
>>> );
>>> 
>>> -TRACE_EVENT(nfsd_file_fsync,
>>> -	TP_PROTO(
>>> -		const struct nfsd_file *nf,
>>> -		int ret
>>> -	),
>>> -	TP_ARGS(nf, ret),
>>> -	TP_STRUCT__entry(
>>> -		__field(void *, nf_inode)
>>> -		__field(int, nf_ref)
>>> -		__field(int, ret)
>>> -		__field(unsigned long, nf_flags)
>>> -		__field(unsigned char, nf_may)
>>> -		__field(struct file *, nf_file)
>>> -	),
>>> -	TP_fast_assign(
>>> -		__entry->nf_inode = nf->nf_inode;
>>> -		__entry->nf_ref = refcount_read(&nf->nf_ref);
>>> -		__entry->ret = ret;
>>> -		__entry->nf_flags = nf->nf_flags;
>>> -		__entry->nf_may = nf->nf_may;
>>> -		__entry->nf_file = nf->nf_file;
>>> -	),
>>> -	TP_printk("inode=%p ref=%d flags=%s may=%s nf_file=%p ret=%d",
>>> -		__entry->nf_inode,
>>> -		__entry->nf_ref,
>>> -		show_nf_flags(__entry->nf_flags),
>>> -		show_nfsd_may_flags(__entry->nf_may),
>>> -		__entry->nf_file, __entry->ret
>>> -	)
>>> -);
>>> -
>>> #include "cache.h"
>>> 
>>> TRACE_DEFINE_ENUM(RC_DROPIT);
>>> -- 
>>> 2.39.1
>>> 
>> 
>> --
>> Chuck Lever
>> 
>> 
>> 
> 
> -- 
> Jeff Layton <jlayton@kernel.org>

--
Chuck Lever




^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2023-02-07 15:30 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-02-07 14:50 [PATCH] nfsd: don't fsync nfsd_files on last close Jeff Layton
2023-02-07 15:01 ` Chuck Lever III
2023-02-07 15:15   ` Jeff Layton
2023-02-07 15:30     ` Chuck Lever III

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).