* [PATCH 01/10] NFSD: Fix verifier returned in stable WRITEs
2022-01-02 17:35 [PATCH 00/10] Write verifier fixes and clean-ups Chuck Lever
@ 2022-01-02 17:35 ` Chuck Lever
2022-01-02 17:35 ` [PATCH 02/10] nfsd: Replace use of rwsem with errseq_t Chuck Lever
` (8 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Chuck Lever @ 2022-01-02 17:35 UTC (permalink / raw)
To: linux-nfs; +Cc: trondmy
RFC 8881 explains the purpose of the write verifier this way:
> The final portion of the result is the field writeverf. This field
> is the write verifier and is a cookie that the client can use to
> determine whether a server has changed instance state (e.g., server
> restart) between a call to WRITE and a subsequent call to either
> WRITE or COMMIT.
But then it says:
> This cookie MUST be unchanged during a single instance of the
> NFSv4.1 server and MUST be unique between instances of the NFSv4.1
> server. If the cookie changes, then the client MUST assume that
> any data written with an UNSTABLE4 value for committed and an old
> writeverf in the reply has been lost and will need to be
> recovered.
RFC 1813 has similar language for NFSv3. NFSv2 does not have a write
verifier since it doesn't implement the COMMIT procedure.
Since commit 19e0663ff9bc ("nfsd: Ensure sampling of the write
verifier is atomic with the write"), the Linux NFS server has
returned a boot-time-based verifier for UNSTABLE WRITEs, but a zero
verifier for FILE_SYNC and DATA_SYNC WRITEs. FILE_SYNC and DATA_SYNC
WRITEs are not followed up with a COMMIT, so there's no need for
clients to compare verifiers for stable writes.
However, by returning a different verifier for stable and unstable
writes, the above commit puts the Linux NFS server a step farther
out of compliance with the first MUST above. At least one NFS client
(FreeBSD) noticed the difference, making this a potential
regression.
Reported-by: Rick Macklem <rmacklem@uoguelph.ca>
Link: https://lore.kernel.org/linux-nfs/YQXPR0101MB096857EEACF04A6DF1FC6D9BDD749@YQXPR0101MB0968.CANPRD01.PROD.OUTLOOK.COM/T/
Fixes: 19e0663ff9bc ("nfsd: Ensure sampling of the write verifier is atomic with the write")
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
fs/nfsd/vfs.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 0faa3839ea6c..74c3451c2089 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -995,6 +995,10 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
iov_iter_kvec(&iter, WRITE, vec, vlen, *cnt);
if (flags & RWF_SYNC) {
down_write(&nf->nf_rwsem);
+ if (verf)
+ nfsd_copy_boot_verifier(verf,
+ net_generic(SVC_NET(rqstp),
+ nfsd_net_id));
host_err = vfs_iter_write(file, &iter, &pos, flags);
if (host_err < 0)
nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp),
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH 02/10] nfsd: Replace use of rwsem with errseq_t
2022-01-02 17:35 [PATCH 00/10] Write verifier fixes and clean-ups Chuck Lever
2022-01-02 17:35 ` [PATCH 01/10] NFSD: Fix verifier returned in stable WRITEs Chuck Lever
@ 2022-01-02 17:35 ` Chuck Lever
2022-01-02 17:35 ` [PATCH 03/10] NFSD: Clean up nfsd_vfs_write() Chuck Lever
` (7 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Chuck Lever @ 2022-01-02 17:35 UTC (permalink / raw)
To: linux-nfs; +Cc: trondmy
From: Trond Myklebust <trond.myklebust@hammerspace.com>
The nfsd_file nf_rwsem is currently being used to separate file write
and commit instances to ensure that we catch errors and apply them to
the correct write/commit.
We can improve scalability at the expense of a little accuracy (some
extra false positives) by replacing the nf_rwsem with more careful
use of the errseq_t mechanism to track errors across the different
operations.
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
[ cel: rebased on zero-verifier fix ]
---
fs/nfsd/filecache.c | 1 -
fs/nfsd/filecache.h | 1 -
fs/nfsd/nfs4proc.c | 16 +++++++++-------
fs/nfsd/vfs.c | 40 +++++++++++++++-------------------------
4 files changed, 24 insertions(+), 34 deletions(-)
diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
index aa5dca498b27..e2904540e463 100644
--- a/fs/nfsd/filecache.c
+++ b/fs/nfsd/filecache.c
@@ -189,7 +189,6 @@ nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval,
__set_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags);
}
nf->nf_mark = NULL;
- init_rwsem(&nf->nf_rwsem);
trace_nfsd_file_alloc(nf);
}
return nf;
diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
index 7872df5a0fe3..435ceab27897 100644
--- a/fs/nfsd/filecache.h
+++ b/fs/nfsd/filecache.h
@@ -46,7 +46,6 @@ struct nfsd_file {
refcount_t nf_ref;
unsigned char nf_may;
struct nfsd_file_mark *nf_mark;
- struct rw_semaphore nf_rwsem;
};
int nfsd_file_cache_init(void);
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index a6dc5e18c498..56405fc58bfc 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1510,6 +1510,9 @@ static void nfsd4_init_copy_res(struct nfsd4_copy *copy, bool sync)
static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy)
{
+ struct file *dst = copy->nf_dst->nf_file;
+ struct file *src = copy->nf_src->nf_file;
+ errseq_t since;
ssize_t bytes_copied = 0;
u64 bytes_total = copy->cp_count;
u64 src_pos = copy->cp_src_pos;
@@ -1522,9 +1525,8 @@ static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy)
do {
if (kthread_should_stop())
break;
- bytes_copied = nfsd_copy_file_range(copy->nf_src->nf_file,
- src_pos, copy->nf_dst->nf_file, dst_pos,
- bytes_total);
+ bytes_copied = nfsd_copy_file_range(src, src_pos, dst, dst_pos,
+ bytes_total);
if (bytes_copied <= 0)
break;
bytes_total -= bytes_copied;
@@ -1534,11 +1536,11 @@ static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy)
} while (bytes_total > 0 && !copy->cp_synchronous);
/* for a non-zero asynchronous copy do a commit of data */
if (!copy->cp_synchronous && copy->cp_res.wr_bytes_written > 0) {
- down_write(©->nf_dst->nf_rwsem);
- status = vfs_fsync_range(copy->nf_dst->nf_file,
- copy->cp_dst_pos,
+ since = READ_ONCE(dst->f_wb_err);
+ status = vfs_fsync_range(dst, copy->cp_dst_pos,
copy->cp_res.wr_bytes_written, 0);
- up_write(©->nf_dst->nf_rwsem);
+ if (!status)
+ status = filemap_check_wb_err(dst->f_mapping, since);
if (!status)
copy->committed = true;
}
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 74c3451c2089..316ed702d518 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -522,10 +522,11 @@ __be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos,
{
struct file *src = nf_src->nf_file;
struct file *dst = nf_dst->nf_file;
+ errseq_t since;
loff_t cloned;
__be32 ret = 0;
- down_write(&nf_dst->nf_rwsem);
+ since = READ_ONCE(dst->f_wb_err);
cloned = vfs_clone_file_range(src, src_pos, dst, dst_pos, count, 0);
if (cloned < 0) {
ret = nfserrno(cloned);
@@ -539,6 +540,8 @@ __be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos,
loff_t dst_end = count ? dst_pos + count - 1 : LLONG_MAX;
int status = vfs_fsync_range(dst, dst_pos, dst_end, 0);
+ if (!status)
+ status = filemap_check_wb_err(dst->f_mapping, since);
if (!status)
status = commit_inode_metadata(file_inode(src));
if (status < 0) {
@@ -548,7 +551,6 @@ __be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos,
}
}
out_err:
- up_write(&nf_dst->nf_rwsem);
return ret;
}
@@ -956,6 +958,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
struct super_block *sb = file_inode(file)->i_sb;
struct svc_export *exp;
struct iov_iter iter;
+ errseq_t since;
__be32 nfserr;
int host_err;
int use_wgather;
@@ -993,8 +996,8 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
flags |= RWF_SYNC;
iov_iter_kvec(&iter, WRITE, vec, vlen, *cnt);
+ since = READ_ONCE(file->f_wb_err);
if (flags & RWF_SYNC) {
- down_write(&nf->nf_rwsem);
if (verf)
nfsd_copy_boot_verifier(verf,
net_generic(SVC_NET(rqstp),
@@ -1003,15 +1006,12 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
if (host_err < 0)
nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp),
nfsd_net_id));
- up_write(&nf->nf_rwsem);
} else {
- down_read(&nf->nf_rwsem);
if (verf)
nfsd_copy_boot_verifier(verf,
net_generic(SVC_NET(rqstp),
nfsd_net_id));
host_err = vfs_iter_write(file, &iter, &pos, flags);
- up_read(&nf->nf_rwsem);
}
if (host_err < 0) {
nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp),
@@ -1021,6 +1021,9 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
*cnt = host_err;
nfsd_stats_io_write_add(exp, *cnt);
fsnotify_modify(file);
+ host_err = filemap_check_wb_err(file->f_mapping, since);
+ if (host_err < 0)
+ goto out_nfserr;
if (stable && use_wgather) {
host_err = wait_for_concurrent_writes(file);
@@ -1101,19 +1104,6 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
}
#ifdef CONFIG_NFSD_V3
-static int
-nfsd_filemap_write_and_wait_range(struct nfsd_file *nf, loff_t offset,
- loff_t end)
-{
- struct address_space *mapping = nf->nf_file->f_mapping;
- int ret = filemap_fdatawrite_range(mapping, offset, end);
-
- if (ret)
- return ret;
- filemap_fdatawait_range_keep_errors(mapping, offset, end);
- return 0;
-}
-
/*
* Commit all pending writes to stable storage.
*
@@ -1144,25 +1134,25 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
if (err)
goto out;
if (EX_ISSYNC(fhp->fh_export)) {
- int err2 = nfsd_filemap_write_and_wait_range(nf, offset, end);
+ errseq_t since = READ_ONCE(nf->nf_file->f_wb_err);
+ int err2;
- down_write(&nf->nf_rwsem);
- if (!err2)
- err2 = vfs_fsync_range(nf->nf_file, offset, end, 0);
+ err2 = vfs_fsync_range(nf->nf_file, offset, end, 0);
switch (err2) {
case 0:
nfsd_copy_boot_verifier(verf, net_generic(nf->nf_net,
nfsd_net_id));
+ err2 = filemap_check_wb_err(nf->nf_file->f_mapping,
+ since);
break;
case -EINVAL:
err = nfserr_notsupp;
break;
default:
- err = nfserrno(err2);
nfsd_reset_boot_verifier(net_generic(nf->nf_net,
nfsd_net_id));
}
- up_write(&nf->nf_rwsem);
+ err = nfserrno(err2);
} else
nfsd_copy_boot_verifier(verf, net_generic(nf->nf_net,
nfsd_net_id));
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH 03/10] NFSD: Clean up nfsd_vfs_write()
2022-01-02 17:35 [PATCH 00/10] Write verifier fixes and clean-ups Chuck Lever
2022-01-02 17:35 ` [PATCH 01/10] NFSD: Fix verifier returned in stable WRITEs Chuck Lever
2022-01-02 17:35 ` [PATCH 02/10] nfsd: Replace use of rwsem with errseq_t Chuck Lever
@ 2022-01-02 17:35 ` Chuck Lever
2022-01-02 17:35 ` [PATCH 04/10] NFSD: De-duplicate net_generic(SVC_NET(rqstp), nfsd_net_id) Chuck Lever
` (6 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Chuck Lever @ 2022-01-02 17:35 UTC (permalink / raw)
To: linux-nfs; +Cc: trondmy
The RWF_SYNC and !RWF_SYNC arms are now exactly alike except that
the RWF_SYNC arm resets the boot verifier twice in a row. Fix that
redundancy and de-duplicate the code.
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
fs/nfsd/vfs.c | 21 +++++----------------
1 file changed, 5 insertions(+), 16 deletions(-)
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 316ed702d518..8f0ac710fd1a 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -997,22 +997,11 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
iov_iter_kvec(&iter, WRITE, vec, vlen, *cnt);
since = READ_ONCE(file->f_wb_err);
- if (flags & RWF_SYNC) {
- if (verf)
- nfsd_copy_boot_verifier(verf,
- net_generic(SVC_NET(rqstp),
- nfsd_net_id));
- host_err = vfs_iter_write(file, &iter, &pos, flags);
- if (host_err < 0)
- nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp),
- nfsd_net_id));
- } else {
- if (verf)
- nfsd_copy_boot_verifier(verf,
- net_generic(SVC_NET(rqstp),
- nfsd_net_id));
- host_err = vfs_iter_write(file, &iter, &pos, flags);
- }
+ if (verf)
+ nfsd_copy_boot_verifier(verf,
+ net_generic(SVC_NET(rqstp),
+ nfsd_net_id));
+ host_err = vfs_iter_write(file, &iter, &pos, flags);
if (host_err < 0) {
nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp),
nfsd_net_id));
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH 04/10] NFSD: De-duplicate net_generic(SVC_NET(rqstp), nfsd_net_id)
2022-01-02 17:35 [PATCH 00/10] Write verifier fixes and clean-ups Chuck Lever
` (2 preceding siblings ...)
2022-01-02 17:35 ` [PATCH 03/10] NFSD: Clean up nfsd_vfs_write() Chuck Lever
@ 2022-01-02 17:35 ` Chuck Lever
2022-01-02 17:35 ` [PATCH 05/10] NFSD: De-duplicate net_generic(nf->nf_net, nfsd_net_id) Chuck Lever
` (5 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Chuck Lever @ 2022-01-02 17:35 UTC (permalink / raw)
To: linux-nfs; +Cc: trondmy
Since this pointer is used repeatedly, move it to a stack variable.
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
fs/nfsd/vfs.c | 11 ++++-------
1 file changed, 4 insertions(+), 7 deletions(-)
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 8f0ac710fd1a..2e473d2f47e5 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -954,6 +954,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
unsigned long *cnt, int stable,
__be32 *verf)
{
+ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
struct file *file = nf->nf_file;
struct super_block *sb = file_inode(file)->i_sb;
struct svc_export *exp;
@@ -998,13 +999,10 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
iov_iter_kvec(&iter, WRITE, vec, vlen, *cnt);
since = READ_ONCE(file->f_wb_err);
if (verf)
- nfsd_copy_boot_verifier(verf,
- net_generic(SVC_NET(rqstp),
- nfsd_net_id));
+ nfsd_copy_boot_verifier(verf, nn);
host_err = vfs_iter_write(file, &iter, &pos, flags);
if (host_err < 0) {
- nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp),
- nfsd_net_id));
+ nfsd_reset_boot_verifier(nn);
goto out_nfserr;
}
*cnt = host_err;
@@ -1017,8 +1015,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
if (stable && use_wgather) {
host_err = wait_for_concurrent_writes(file);
if (host_err < 0)
- nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp),
- nfsd_net_id));
+ nfsd_reset_boot_verifier(nn);
}
out_nfserr:
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH 05/10] NFSD: De-duplicate net_generic(nf->nf_net, nfsd_net_id)
2022-01-02 17:35 [PATCH 00/10] Write verifier fixes and clean-ups Chuck Lever
` (3 preceding siblings ...)
2022-01-02 17:35 ` [PATCH 04/10] NFSD: De-duplicate net_generic(SVC_NET(rqstp), nfsd_net_id) Chuck Lever
@ 2022-01-02 17:35 ` Chuck Lever
2022-01-02 17:35 ` [PATCH 06/10] nfsd: Add a tracepoint for errors in nfsd4_clone_file_range() Chuck Lever
` (4 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Chuck Lever @ 2022-01-02 17:35 UTC (permalink / raw)
To: linux-nfs; +Cc: trondmy
Since this pointer is used repeatedly, move it to a stack variable.
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
fs/nfsd/vfs.c | 11 +++++------
1 file changed, 5 insertions(+), 6 deletions(-)
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 2e473d2f47e5..c22511decc4c 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1103,6 +1103,7 @@ __be32
nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
loff_t offset, unsigned long count, __be32 *verf)
{
+ struct nfsd_net *nn;
struct nfsd_file *nf;
loff_t end = LLONG_MAX;
__be32 err = nfserr_inval;
@@ -1119,6 +1120,7 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
NFSD_MAY_WRITE|NFSD_MAY_NOT_BREAK_LEASE, &nf);
if (err)
goto out;
+ nn = net_generic(nf->nf_net, nfsd_net_id);
if (EX_ISSYNC(fhp->fh_export)) {
errseq_t since = READ_ONCE(nf->nf_file->f_wb_err);
int err2;
@@ -1126,8 +1128,7 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
err2 = vfs_fsync_range(nf->nf_file, offset, end, 0);
switch (err2) {
case 0:
- nfsd_copy_boot_verifier(verf, net_generic(nf->nf_net,
- nfsd_net_id));
+ nfsd_copy_boot_verifier(verf, nn);
err2 = filemap_check_wb_err(nf->nf_file->f_mapping,
since);
break;
@@ -1135,13 +1136,11 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
err = nfserr_notsupp;
break;
default:
- nfsd_reset_boot_verifier(net_generic(nf->nf_net,
- nfsd_net_id));
+ nfsd_reset_boot_verifier(nn);
}
err = nfserrno(err2);
} else
- nfsd_copy_boot_verifier(verf, net_generic(nf->nf_net,
- nfsd_net_id));
+ nfsd_copy_boot_verifier(verf, nn);
nfsd_file_put(nf);
out:
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH 06/10] nfsd: Add a tracepoint for errors in nfsd4_clone_file_range()
2022-01-02 17:35 [PATCH 00/10] Write verifier fixes and clean-ups Chuck Lever
` (4 preceding siblings ...)
2022-01-02 17:35 ` [PATCH 05/10] NFSD: De-duplicate net_generic(nf->nf_net, nfsd_net_id) Chuck Lever
@ 2022-01-02 17:35 ` Chuck Lever
2022-01-02 17:35 ` [PATCH 07/10] NFSD: Write verifier might go backwards Chuck Lever
` (3 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Chuck Lever @ 2022-01-02 17:35 UTC (permalink / raw)
To: linux-nfs; +Cc: trondmy
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Since a clone error commit can cause the boot verifier to change,
we should trace those errors.
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
[ cel: Addressed a checkpatch.pl splat in fs/nfsd/vfs.h ]
---
fs/nfsd/nfs4proc.c | 2 +-
fs/nfsd/trace.h | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
fs/nfsd/vfs.c | 18 ++++++++++++++++--
fs/nfsd/vfs.h | 3 ++-
4 files changed, 69 insertions(+), 4 deletions(-)
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 56405fc58bfc..43057080d2aa 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1101,7 +1101,7 @@ nfsd4_clone(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (status)
goto out;
- status = nfsd4_clone_file_range(src, clone->cl_src_pos,
+ status = nfsd4_clone_file_range(rqstp, src, clone->cl_src_pos,
dst, clone->cl_dst_pos, clone->cl_count,
EX_ISSYNC(cstate->current_fh.fh_export));
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index ebfe2b878574..54a84b5f7e35 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -389,6 +389,56 @@ TRACE_EVENT(nfsd_dirent,
)
)
+DECLARE_EVENT_CLASS(nfsd_copy_err_class,
+ TP_PROTO(struct svc_rqst *rqstp,
+ struct svc_fh *src_fhp,
+ loff_t src_offset,
+ struct svc_fh *dst_fhp,
+ loff_t dst_offset,
+ u64 count,
+ int status),
+ TP_ARGS(rqstp, src_fhp, src_offset, dst_fhp, dst_offset, count, status),
+ TP_STRUCT__entry(
+ __field(u32, xid)
+ __field(u32, src_fh_hash)
+ __field(loff_t, src_offset)
+ __field(u32, dst_fh_hash)
+ __field(loff_t, dst_offset)
+ __field(u64, count)
+ __field(int, status)
+ ),
+ TP_fast_assign(
+ __entry->xid = be32_to_cpu(rqstp->rq_xid);
+ __entry->src_fh_hash = knfsd_fh_hash(&src_fhp->fh_handle);
+ __entry->src_offset = src_offset;
+ __entry->dst_fh_hash = knfsd_fh_hash(&dst_fhp->fh_handle);
+ __entry->dst_offset = dst_offset;
+ __entry->count = count;
+ __entry->status = status;
+ ),
+ TP_printk("xid=0x%08x src_fh_hash=0x%08x src_offset=%lld "
+ "dst_fh_hash=0x%08x dst_offset=%lld "
+ "count=%llu status=%d",
+ __entry->xid, __entry->src_fh_hash, __entry->src_offset,
+ __entry->dst_fh_hash, __entry->dst_offset,
+ (unsigned long long)__entry->count,
+ __entry->status)
+)
+
+#define DEFINE_NFSD_COPY_ERR_EVENT(name) \
+DEFINE_EVENT(nfsd_copy_err_class, nfsd_##name, \
+ TP_PROTO(struct svc_rqst *rqstp, \
+ struct svc_fh *src_fhp, \
+ loff_t src_offset, \
+ struct svc_fh *dst_fhp, \
+ loff_t dst_offset, \
+ u64 count, \
+ int status), \
+ TP_ARGS(rqstp, src_fhp, src_offset, dst_fhp, dst_offset, \
+ count, status))
+
+DEFINE_NFSD_COPY_ERR_EVENT(clone_file_range_err);
+
#include "state.h"
#include "filecache.h"
#include "vfs.h"
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index c22511decc4c..70ea7e0aae07 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -40,6 +40,7 @@
#include "../internal.h"
#include "acl.h"
#include "idmap.h"
+#include "xdr4.h"
#endif /* CONFIG_NFSD_V4 */
#include "nfsd.h"
@@ -517,8 +518,15 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp,
}
#endif
-__be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos,
- struct nfsd_file *nf_dst, u64 dst_pos, u64 count, bool sync)
+static struct nfsd4_compound_state *nfsd4_get_cstate(struct svc_rqst *rqstp)
+{
+ return &((struct nfsd4_compoundres *)rqstp->rq_resp)->cstate;
+}
+
+__be32 nfsd4_clone_file_range(struct svc_rqst *rqstp,
+ struct nfsd_file *nf_src, u64 src_pos,
+ struct nfsd_file *nf_dst, u64 dst_pos,
+ u64 count, bool sync)
{
struct file *src = nf_src->nf_file;
struct file *dst = nf_dst->nf_file;
@@ -545,6 +553,12 @@ __be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos,
if (!status)
status = commit_inode_metadata(file_inode(src));
if (status < 0) {
+ trace_nfsd_clone_file_range_err(rqstp,
+ &nfsd4_get_cstate(rqstp)->save_fh,
+ src_pos,
+ &nfsd4_get_cstate(rqstp)->current_fh,
+ dst_pos,
+ count, status);
nfsd_reset_boot_verifier(net_generic(nf_dst->nf_net,
nfsd_net_id));
ret = nfserrno(status);
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index b21b76e6b9a8..9f56dcb22ff7 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -57,7 +57,8 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *, struct svc_fh *,
struct xdr_netobj *);
__be32 nfsd4_vfs_fallocate(struct svc_rqst *, struct svc_fh *,
struct file *, loff_t, loff_t, int);
-__be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos,
+__be32 nfsd4_clone_file_range(struct svc_rqst *rqstp,
+ struct nfsd_file *nf_src, u64 src_pos,
struct nfsd_file *nf_dst, u64 dst_pos,
u64 count, bool sync);
#endif /* CONFIG_NFSD_V4 */
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH 07/10] NFSD: Write verifier might go backwards
2022-01-02 17:35 [PATCH 00/10] Write verifier fixes and clean-ups Chuck Lever
` (5 preceding siblings ...)
2022-01-02 17:35 ` [PATCH 06/10] nfsd: Add a tracepoint for errors in nfsd4_clone_file_range() Chuck Lever
@ 2022-01-02 17:35 ` Chuck Lever
2022-01-02 17:36 ` [PATCH 08/10] NFSD: Clean up the nfsd_net::nfssvc_boot field Chuck Lever
` (2 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Chuck Lever @ 2022-01-02 17:35 UTC (permalink / raw)
To: linux-nfs; +Cc: trondmy
When vfs_iter_write() starts to fail because a file system is full,
a bunch of writes can fail at once with ENOSPC. These writes
repeatedly invoke nfsd_reset_boot_verifier() in quick succession.
Ensure that the time it grabs doesn't go backwards due to an ntp
adjustment going on at the same time.
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
fs/nfsd/nfssvc.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 14c1ef6f8cc7..6eccf6700250 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -363,7 +363,7 @@ void nfsd_copy_boot_verifier(__be32 verf[2], struct nfsd_net *nn)
static void nfsd_reset_boot_verifier_locked(struct nfsd_net *nn)
{
- ktime_get_real_ts64(&nn->nfssvc_boot);
+ ktime_get_raw_ts64(&nn->nfssvc_boot);
}
void nfsd_reset_boot_verifier(struct nfsd_net *nn)
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH 08/10] NFSD: Clean up the nfsd_net::nfssvc_boot field
2022-01-02 17:35 [PATCH 00/10] Write verifier fixes and clean-ups Chuck Lever
` (6 preceding siblings ...)
2022-01-02 17:35 ` [PATCH 07/10] NFSD: Write verifier might go backwards Chuck Lever
@ 2022-01-02 17:36 ` Chuck Lever
2022-01-02 17:36 ` [PATCH 09/10] NFSD: Rename boot verifier functions Chuck Lever
2022-01-02 17:36 ` [PATCH 10/10] NFSD: Trace boot verifier resets Chuck Lever
9 siblings, 0 replies; 11+ messages in thread
From: Chuck Lever @ 2022-01-02 17:36 UTC (permalink / raw)
To: linux-nfs; +Cc: trondmy
There are two boot-time fields in struct nfsd_net: one called
boot_time and one called nfssvc_boot. The latter is used only to
form write verifiers, but its documenting comment declares:
/* Time of server startup */
Since commit 27c438f53e79 ("nfsd: Support the server resetting the
boot verifier"), this field can be reset at any time; it's no
longer tied to server restart. So that comment is stale.
Also, according to pahole, struct timespec64 is 16 bytes long on
x86_64. The nfssvc_boot field is used only to form a write verifier,
which is 8 bytes long.
Let's clarify this situation by manufacturing an 8-byte verifier
in nfs_reset_boot_verifier() and storing only that in struct
nfsd_net.
We're grabbing 128 bits of time, so compress all of those into a
64-bit verifier instead of throwing out the high-order bits.
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
fs/nfsd/netns.h | 7 ++++---
fs/nfsd/nfsctl.c | 3 ++-
fs/nfsd/nfssvc.c | 51 ++++++++++++++++++++++++++++++++++++++-------------
3 files changed, 44 insertions(+), 17 deletions(-)
diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 9e8b77d2a3a4..c879d80d5449 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -11,6 +11,7 @@
#include <net/net_namespace.h>
#include <net/netns/generic.h>
#include <linux/percpu_counter.h>
+#include <linux/siphash.h>
/* Hash tables for nfs4_clientid state */
#define CLIENT_HASH_BITS 4
@@ -108,9 +109,9 @@ struct nfsd_net {
bool nfsd_net_up;
bool lockd_up;
- /* Time of server startup */
- struct timespec64 nfssvc_boot;
- seqlock_t boot_lock;
+ seqlock_t writeverf_lock;
+ unsigned char writeverf[8];
+ siphash_key_t siphash_key;
/*
* Max number of connections this nfsd container will allow. Defaults
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index a8ad71567fc7..b9f27fbcd768 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1483,7 +1483,8 @@ static __net_init int nfsd_init_net(struct net *net)
nn->clientid_counter = nn->clientid_base + 1;
nn->s2s_cp_cl_id = nn->clientid_counter++;
- seqlock_init(&nn->boot_lock);
+ get_random_bytes(&nn->siphash_key, sizeof(nn->siphash_key));
+ seqlock_init(&nn->writeverf_lock);
return 0;
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 6eccf6700250..81d47049588f 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -12,6 +12,7 @@
#include <linux/module.h>
#include <linux/fs_struct.h>
#include <linux/swap.h>
+#include <linux/siphash.h>
#include <linux/sunrpc/stats.h>
#include <linux/sunrpc/svcsock.h>
@@ -344,33 +345,57 @@ static bool nfsd_needs_lockd(struct nfsd_net *nn)
return nfsd_vers(nn, 2, NFSD_TEST) || nfsd_vers(nn, 3, NFSD_TEST);
}
+/**
+ * nfsd_copy_boot_verifier - Atomically copy a write verifier
+ * @verf: buffer in which to receive the verifier cookie
+ * @nn: NFS net namespace
+ *
+ * This function provides a wait-free mechanism for copying the
+ * namespace's boot verifier without tearing it.
+ */
void nfsd_copy_boot_verifier(__be32 verf[2], struct nfsd_net *nn)
{
int seq = 0;
do {
- read_seqbegin_or_lock(&nn->boot_lock, &seq);
- /*
- * This is opaque to client, so no need to byte-swap. Use
- * __force to keep sparse happy. y2038 time_t overflow is
- * irrelevant in this usage
- */
- verf[0] = (__force __be32)nn->nfssvc_boot.tv_sec;
- verf[1] = (__force __be32)nn->nfssvc_boot.tv_nsec;
- } while (need_seqretry(&nn->boot_lock, seq));
- done_seqretry(&nn->boot_lock, seq);
+ read_seqbegin_or_lock(&nn->writeverf_lock, &seq);
+ memcpy(verf, nn->writeverf, sizeof(*verf));
+ } while (need_seqretry(&nn->writeverf_lock, seq));
+ done_seqretry(&nn->writeverf_lock, seq);
}
static void nfsd_reset_boot_verifier_locked(struct nfsd_net *nn)
{
- ktime_get_raw_ts64(&nn->nfssvc_boot);
+ struct timespec64 now;
+ u64 verf;
+
+ /*
+ * Because the time value is hashed, y2038 time_t overflow
+ * is irrelevant in this usage.
+ */
+ ktime_get_raw_ts64(&now);
+ verf = siphash_2u64(now.tv_sec, now.tv_nsec, &nn->siphash_key);
+ memcpy(nn->writeverf, &verf, sizeof(nn->writeverf));
}
+/**
+ * nfsd_reset_boot_verifier - Generate a new boot verifier
+ * @nn: NFS net namespace
+ *
+ * This function updates the ->writeverf field of @nn. This field
+ * contains an opaque cookie that, according to Section 18.32.3 of
+ * RFC 8881, "the client can use to determine whether a server has
+ * changed instance state (e.g., server restart) between a call to
+ * WRITE and a subsequent call to either WRITE or COMMIT. This
+ * cookie MUST be unchanged during a single instance of the NFSv4.1
+ * server and MUST be unique between instances of the NFSv4.1
+ * server."
+ */
void nfsd_reset_boot_verifier(struct nfsd_net *nn)
{
- write_seqlock(&nn->boot_lock);
+ write_seqlock(&nn->writeverf_lock);
nfsd_reset_boot_verifier_locked(nn);
- write_sequnlock(&nn->boot_lock);
+ write_sequnlock(&nn->writeverf_lock);
}
static int nfsd_startup_net(struct net *net, const struct cred *cred)
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH 09/10] NFSD: Rename boot verifier functions
2022-01-02 17:35 [PATCH 00/10] Write verifier fixes and clean-ups Chuck Lever
` (7 preceding siblings ...)
2022-01-02 17:36 ` [PATCH 08/10] NFSD: Clean up the nfsd_net::nfssvc_boot field Chuck Lever
@ 2022-01-02 17:36 ` Chuck Lever
2022-01-02 17:36 ` [PATCH 10/10] NFSD: Trace boot verifier resets Chuck Lever
9 siblings, 0 replies; 11+ messages in thread
From: Chuck Lever @ 2022-01-02 17:36 UTC (permalink / raw)
To: linux-nfs; +Cc: trondmy
Clean up: These functions handle what the specs call a write
verifier, which in the Linux NFS server implementation is now
divorced from the server's boot instance
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
fs/nfsd/filecache.c | 2 +-
fs/nfsd/netns.h | 4 ++--
fs/nfsd/nfs4proc.c | 2 +-
fs/nfsd/nfssvc.c | 16 ++++++++--------
fs/nfsd/vfs.c | 16 ++++++++--------
5 files changed, 20 insertions(+), 20 deletions(-)
diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
index e2904540e463..8bc807c5fea4 100644
--- a/fs/nfsd/filecache.c
+++ b/fs/nfsd/filecache.c
@@ -243,7 +243,7 @@ nfsd_file_do_unhash(struct nfsd_file *nf)
trace_nfsd_file_unhash(nf);
if (nfsd_file_check_write_error(nf))
- nfsd_reset_boot_verifier(net_generic(nf->nf_net, nfsd_net_id));
+ nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id));
--nfsd_file_hashtbl[nf->nf_hashval].nfb_count;
hlist_del_rcu(&nf->nf_node);
atomic_long_dec(&nfsd_filecache_count);
diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index c879d80d5449..82ef4d77c592 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -197,6 +197,6 @@ extern void nfsd_netns_free_versions(struct nfsd_net *nn);
extern unsigned int nfsd_net_id;
-void nfsd_copy_boot_verifier(__be32 verf[2], struct nfsd_net *nn);
-void nfsd_reset_boot_verifier(struct nfsd_net *nn);
+void nfsd_copy_write_verifier(__be32 verf[2], struct nfsd_net *nn);
+void nfsd_reset_write_verifier(struct nfsd_net *nn);
#endif /* __NFSD_NETNS_H__ */
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 43057080d2aa..6f53eb90c6b4 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -598,7 +598,7 @@ static void gen_boot_verifier(nfs4_verifier *verifier, struct net *net)
BUILD_BUG_ON(2*sizeof(*verf) != sizeof(verifier->data));
- nfsd_copy_boot_verifier(verf, net_generic(net, nfsd_net_id));
+ nfsd_copy_write_verifier(verf, net_generic(net, nfsd_net_id));
}
static __be32
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 81d47049588f..07193595b8e0 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -346,14 +346,14 @@ static bool nfsd_needs_lockd(struct nfsd_net *nn)
}
/**
- * nfsd_copy_boot_verifier - Atomically copy a write verifier
+ * nfsd_copy_write_verifier - Atomically copy a write verifier
* @verf: buffer in which to receive the verifier cookie
* @nn: NFS net namespace
*
* This function provides a wait-free mechanism for copying the
- * namespace's boot verifier without tearing it.
+ * namespace's write verifier without tearing it.
*/
-void nfsd_copy_boot_verifier(__be32 verf[2], struct nfsd_net *nn)
+void nfsd_copy_write_verifier(__be32 verf[2], struct nfsd_net *nn)
{
int seq = 0;
@@ -364,7 +364,7 @@ void nfsd_copy_boot_verifier(__be32 verf[2], struct nfsd_net *nn)
done_seqretry(&nn->writeverf_lock, seq);
}
-static void nfsd_reset_boot_verifier_locked(struct nfsd_net *nn)
+static void nfsd_reset_write_verifier_locked(struct nfsd_net *nn)
{
struct timespec64 now;
u64 verf;
@@ -379,7 +379,7 @@ static void nfsd_reset_boot_verifier_locked(struct nfsd_net *nn)
}
/**
- * nfsd_reset_boot_verifier - Generate a new boot verifier
+ * nfsd_reset_write_verifier - Generate a new write verifier
* @nn: NFS net namespace
*
* This function updates the ->writeverf field of @nn. This field
@@ -391,10 +391,10 @@ static void nfsd_reset_boot_verifier_locked(struct nfsd_net *nn)
* server and MUST be unique between instances of the NFSv4.1
* server."
*/
-void nfsd_reset_boot_verifier(struct nfsd_net *nn)
+void nfsd_reset_write_verifier(struct nfsd_net *nn)
{
write_seqlock(&nn->writeverf_lock);
- nfsd_reset_boot_verifier_locked(nn);
+ nfsd_reset_write_verifier_locked(nn);
write_sequnlock(&nn->writeverf_lock);
}
@@ -683,7 +683,7 @@ int nfsd_create_serv(struct net *net)
register_inet6addr_notifier(&nfsd_inet6addr_notifier);
#endif
}
- nfsd_reset_boot_verifier(nn);
+ nfsd_reset_write_verifier(nn);
return 0;
}
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 70ea7e0aae07..49564457bd3d 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -559,8 +559,8 @@ __be32 nfsd4_clone_file_range(struct svc_rqst *rqstp,
&nfsd4_get_cstate(rqstp)->current_fh,
dst_pos,
count, status);
- nfsd_reset_boot_verifier(net_generic(nf_dst->nf_net,
- nfsd_net_id));
+ nfsd_reset_write_verifier(net_generic(nf_dst->nf_net,
+ nfsd_net_id));
ret = nfserrno(status);
}
}
@@ -1013,10 +1013,10 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
iov_iter_kvec(&iter, WRITE, vec, vlen, *cnt);
since = READ_ONCE(file->f_wb_err);
if (verf)
- nfsd_copy_boot_verifier(verf, nn);
+ nfsd_copy_write_verifier(verf, nn);
host_err = vfs_iter_write(file, &iter, &pos, flags);
if (host_err < 0) {
- nfsd_reset_boot_verifier(nn);
+ nfsd_reset_write_verifier(nn);
goto out_nfserr;
}
*cnt = host_err;
@@ -1029,7 +1029,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
if (stable && use_wgather) {
host_err = wait_for_concurrent_writes(file);
if (host_err < 0)
- nfsd_reset_boot_verifier(nn);
+ nfsd_reset_write_verifier(nn);
}
out_nfserr:
@@ -1142,7 +1142,7 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
err2 = vfs_fsync_range(nf->nf_file, offset, end, 0);
switch (err2) {
case 0:
- nfsd_copy_boot_verifier(verf, nn);
+ nfsd_copy_write_verifier(verf, nn);
err2 = filemap_check_wb_err(nf->nf_file->f_mapping,
since);
break;
@@ -1150,11 +1150,11 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
err = nfserr_notsupp;
break;
default:
- nfsd_reset_boot_verifier(nn);
+ nfsd_reset_write_verifier(nn);
}
err = nfserrno(err2);
} else
- nfsd_copy_boot_verifier(verf, nn);
+ nfsd_copy_write_verifier(verf, nn);
nfsd_file_put(nf);
out:
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH 10/10] NFSD: Trace boot verifier resets
2022-01-02 17:35 [PATCH 00/10] Write verifier fixes and clean-ups Chuck Lever
` (8 preceding siblings ...)
2022-01-02 17:36 ` [PATCH 09/10] NFSD: Rename boot verifier functions Chuck Lever
@ 2022-01-02 17:36 ` Chuck Lever
9 siblings, 0 replies; 11+ messages in thread
From: Chuck Lever @ 2022-01-02 17:36 UTC (permalink / raw)
To: linux-nfs; +Cc: trondmy
According to commit bbf2f098838a ("nfsd: Reset the boot verifier on
all write I/O errors"), the Linux NFS server forces all clients to
resend pending unstable writes if any server-side write or commit
operation encounters an error (say, ENOSPC). This is a rare and
quite exceptional event that could require administrative recovery
action, so it should be made trace-able. Example trace event:
nfsd-938 [002] 7174.945558: nfsd_writeverf_reset: boot_time= 61cc920d xid=0xdcd62036 error=-28 new verifier=0x08aecc6142515904
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
fs/nfsd/trace.h | 28 ++++++++++++++++++++++++++++
fs/nfsd/vfs.c | 13 ++++++++++---
2 files changed, 38 insertions(+), 3 deletions(-)
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index 54a84b5f7e35..b6b25f0f67a5 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -564,6 +564,34 @@ DEFINE_EVENT(nfsd_net_class, nfsd_##name, \
DEFINE_NET_EVENT(grace_start);
DEFINE_NET_EVENT(grace_complete);
+TRACE_EVENT(nfsd_writeverf_reset,
+ TP_PROTO(
+ const struct nfsd_net *nn,
+ const struct svc_rqst *rqstp,
+ int error
+ ),
+ TP_ARGS(nn, rqstp, error),
+ TP_STRUCT__entry(
+ __field(unsigned long long, boot_time)
+ __field(u32, xid)
+ __field(int, error)
+ __array(unsigned char, verifier, NFS4_VERIFIER_SIZE)
+ ),
+ TP_fast_assign(
+ __entry->boot_time = nn->boot_time;
+ __entry->xid = be32_to_cpu(rqstp->rq_xid);
+ __entry->error = error;
+
+ /* avoid seqlock inside TP_fast_assign */
+ memcpy(__entry->verifier, nn->writeverf,
+ NFS4_VERIFIER_SIZE);
+ ),
+ TP_printk("boot_time=%16llx xid=0x%08x error=%d new verifier=0x%s",
+ __entry->boot_time, __entry->xid, __entry->error,
+ __print_hex_str(__entry->verifier, NFS4_VERIFIER_SIZE)
+ )
+);
+
TRACE_EVENT(nfsd_clid_cred_mismatch,
TP_PROTO(
const struct nfs4_client *clp,
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 49564457bd3d..e4e59e1660e1 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -553,14 +553,17 @@ __be32 nfsd4_clone_file_range(struct svc_rqst *rqstp,
if (!status)
status = commit_inode_metadata(file_inode(src));
if (status < 0) {
+ struct nfsd_net *nn = net_generic(nf_dst->nf_net,
+ nfsd_net_id);
+
trace_nfsd_clone_file_range_err(rqstp,
&nfsd4_get_cstate(rqstp)->save_fh,
src_pos,
&nfsd4_get_cstate(rqstp)->current_fh,
dst_pos,
count, status);
- nfsd_reset_write_verifier(net_generic(nf_dst->nf_net,
- nfsd_net_id));
+ nfsd_reset_write_verifier(nn);
+ trace_nfsd_writeverf_reset(nn, rqstp, status);
ret = nfserrno(status);
}
}
@@ -1017,6 +1020,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
host_err = vfs_iter_write(file, &iter, &pos, flags);
if (host_err < 0) {
nfsd_reset_write_verifier(nn);
+ trace_nfsd_writeverf_reset(nn, rqstp, host_err);
goto out_nfserr;
}
*cnt = host_err;
@@ -1028,8 +1032,10 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
if (stable && use_wgather) {
host_err = wait_for_concurrent_writes(file);
- if (host_err < 0)
+ if (host_err < 0) {
nfsd_reset_write_verifier(nn);
+ trace_nfsd_writeverf_reset(nn, rqstp, host_err);
+ }
}
out_nfserr:
@@ -1151,6 +1157,7 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
break;
default:
nfsd_reset_write_verifier(nn);
+ trace_nfsd_writeverf_reset(nn, rqstp, err2);
}
err = nfserrno(err2);
} else
^ permalink raw reply related [flat|nested] 11+ messages in thread