All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v4 0/8] Wait for DELEGRETURN before returning NFS4ERR_DELAY
@ 2022-09-08 22:13 Chuck Lever
  2022-09-08 22:13 ` [PATCH v4 1/8] NFSD: Replace dprintk() call site in fh_verify() Chuck Lever
                   ` (8 more replies)
  0 siblings, 9 replies; 11+ messages in thread
From: Chuck Lever @ 2022-09-08 22:13 UTC (permalink / raw)
  To: linux-nfs

This patch series adds logic to the Linux NFS server to make it wait
a few moments for clients to return a delegation before replying
with NFS4ERR_DELAY. There are two main benefits:

- It improves responsiveness when a delegated file is accessed from
  another client, and
- It removes an unnecessary latency if a client has neglected to
  return a delegation before attempting a RENAME or SETATTR

NFS4ERR_DELAY during NFSv4 OPEN is still not handled. However, I'm
comfortable merging the infrastructure in this series and support
for using it in SETATTR, RENAME, and REMOVE now, and then handling
OPEN at a later time.

This series applies against v6.0-rc4.

Changes since v3:
- Also handle JUKEBOX when an NFSv3 operation triggers a CB_RECALL

Changes since v2:
- Wake immediately after server receives DELEGRETURN
- More tracepoint improvements

Changes since RFC:
- Eliminate spurious console messages on the server
- Wait for DELEGRETURN for RENAME operations
- Add CB done tracepoints
- Rework the retry loop

---

Chuck Lever (8):
      NFSD: Replace dprintk() call site in fh_verify()
      NFSD: Trace NFSv4 COMPOUND tags
      NFSD: Add tracepoints to report NFSv4 callback completions
      NFSD: Add a mechanism to wait for a DELEGRETURN
      NFSD: Refactor nfsd_setattr()
      NFSD: Make nfsd4_setattr() wait before returning NFS4ERR_DELAY
      NFSD: Make nfsd4_rename() wait before returning NFS4ERR_DELAY
      NFSD: Make nfsd4_remove() wait before returning NFS4ERR_DELAY


 fs/nfsd/nfs4layouts.c |   2 +-
 fs/nfsd/nfs4proc.c    |   6 +-
 fs/nfsd/nfs4state.c   |  34 +++++++++++
 fs/nfsd/nfsd.h        |   7 +++
 fs/nfsd/nfsfh.c       |   8 +--
 fs/nfsd/trace.h       | 131 ++++++++++++++++++++++++++++++++++++++----
 fs/nfsd/vfs.c         | 100 ++++++++++++++++++++------------
 7 files changed, 233 insertions(+), 55 deletions(-)

--
Chuck Lever


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v4 1/8] NFSD: Replace dprintk() call site in fh_verify()
  2022-09-08 22:13 [PATCH v4 0/8] Wait for DELEGRETURN before returning NFS4ERR_DELAY Chuck Lever
@ 2022-09-08 22:13 ` Chuck Lever
  2022-09-08 22:13 ` [PATCH v4 2/8] NFSD: Trace NFSv4 COMPOUND tags Chuck Lever
                   ` (7 subsequent siblings)
  8 siblings, 0 replies; 11+ messages in thread
From: Chuck Lever @ 2022-09-08 22:13 UTC (permalink / raw)
  To: linux-nfs

Record permission errors in the trace log. Note that the new trace
event is conditional, so it will only record non-zero return values
from nfsd_permission().

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfsfh.c |    8 +-------
 fs/nfsd/trace.h |   48 +++++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 46 insertions(+), 10 deletions(-)

diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index a5b71526cee0..d73434200df9 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -392,13 +392,7 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access)
 skip_pseudoflavor_check:
 	/* Finally, check access permissions. */
 	error = nfsd_permission(rqstp, exp, dentry, access);
-
-	if (error) {
-		dprintk("fh_verify: %pd2 permission failure, "
-			"acc=%x, error=%d\n",
-			dentry,
-			access, ntohl(error));
-	}
+	trace_nfsd_fh_verify_err(rqstp, fhp, type, access, error);
 out:
 	if (error == nfserr_stale)
 		nfsd_stats_fh_stale_inc(exp);
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index 9ebd67d461f9..1b9f5753f336 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -195,7 +195,7 @@ TRACE_EVENT(nfsd_fh_verify,
 		__sockaddr(client, rqstp->rq_xprt->xpt_remotelen)
 		__field(u32, xid)
 		__field(u32, fh_hash)
-		__field(void *, inode)
+		__field(const void *, inode)
 		__field(unsigned long, type)
 		__field(unsigned long, access)
 	),
@@ -211,13 +211,55 @@ TRACE_EVENT(nfsd_fh_verify,
 		__entry->type = type;
 		__entry->access = access;
 	),
-	TP_printk("xid=0x%08x fh_hash=0x%08x inode=%p type=%s access=%s",
-		__entry->xid, __entry->fh_hash, __entry->inode,
+	TP_printk("xid=0x%08x fh_hash=0x%08x type=%s access=%s",
+		__entry->xid, __entry->fh_hash,
 		show_fs_file_type(__entry->type),
 		show_nfsd_may_flags(__entry->access)
 	)
 );
 
+TRACE_EVENT_CONDITION(nfsd_fh_verify_err,
+	TP_PROTO(
+		const struct svc_rqst *rqstp,
+		const struct svc_fh *fhp,
+		umode_t type,
+		int access,
+		__be32 error
+	),
+	TP_ARGS(rqstp, fhp, type, access, error),
+	TP_CONDITION(error),
+	TP_STRUCT__entry(
+		__field(unsigned int, netns_ino)
+		__sockaddr(server, rqstp->rq_xprt->xpt_remotelen)
+		__sockaddr(client, rqstp->rq_xprt->xpt_remotelen)
+		__field(u32, xid)
+		__field(u32, fh_hash)
+		__field(const void *, inode)
+		__field(unsigned long, type)
+		__field(unsigned long, access)
+		__field(int, error)
+	),
+	TP_fast_assign(
+		__entry->netns_ino = SVC_NET(rqstp)->ns.inum;
+		__assign_sockaddr(server, &rqstp->rq_xprt->xpt_local,
+		       rqstp->rq_xprt->xpt_locallen);
+		__assign_sockaddr(client, &rqstp->rq_xprt->xpt_remote,
+				  rqstp->rq_xprt->xpt_remotelen);
+		__entry->xid = be32_to_cpu(rqstp->rq_xid);
+		__entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle);
+		__entry->inode = d_inode(fhp->fh_dentry);
+		__entry->type = type;
+		__entry->access = access;
+		__entry->error = be32_to_cpu(error);
+	),
+	TP_printk("xid=0x%08x fh_hash=0x%08x type=%s access=%s error=%d",
+		__entry->xid, __entry->fh_hash,
+		show_fs_file_type(__entry->type),
+		show_nfsd_may_flags(__entry->access),
+		__entry->error
+	)
+);
+
 DECLARE_EVENT_CLASS(nfsd_fh_err_class,
 	TP_PROTO(struct svc_rqst *rqstp,
 		 struct svc_fh	*fhp,



^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v4 2/8] NFSD: Trace NFSv4 COMPOUND tags
  2022-09-08 22:13 [PATCH v4 0/8] Wait for DELEGRETURN before returning NFS4ERR_DELAY Chuck Lever
  2022-09-08 22:13 ` [PATCH v4 1/8] NFSD: Replace dprintk() call site in fh_verify() Chuck Lever
@ 2022-09-08 22:13 ` Chuck Lever
  2022-09-08 22:13 ` [PATCH v4 3/8] NFSD: Add tracepoints to report NFSv4 callback completions Chuck Lever
                   ` (6 subsequent siblings)
  8 siblings, 0 replies; 11+ messages in thread
From: Chuck Lever @ 2022-09-08 22:13 UTC (permalink / raw)
  To: linux-nfs

The Linux NFSv4 client implementation does not use COMPOUND tags,
but the Solaris and MacOS implementations do, and so does pynfs.
Record these eye-catchers in the server's trace buffer to annotate
client requests while troubleshooting.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfs4proc.c |    2 +-
 fs/nfsd/trace.h    |   21 ++++++++++++++-------
 2 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index a72ab97f77ef..d43beb732987 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -2648,7 +2648,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
 
 	rqstp->rq_lease_breaker = (void **)&cstate->clp;
 
-	trace_nfsd_compound(rqstp, args->opcnt);
+	trace_nfsd_compound(rqstp, args->tag, args->taglen, args->opcnt);
 	while (!status && resp->opcnt < args->opcnt) {
 		op = &args->ops[resp->opcnt++];
 
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index 1b9f5753f336..0c35a1a844e6 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -84,19 +84,26 @@ DEFINE_NFSD_XDR_ERR_EVENT(cant_encode);
 		{ NFSD_MAY_64BIT_COOKIE,	"64BIT_COOKIE" })
 
 TRACE_EVENT(nfsd_compound,
-	TP_PROTO(const struct svc_rqst *rqst,
-		 u32 args_opcnt),
-	TP_ARGS(rqst, args_opcnt),
+	TP_PROTO(
+		const struct svc_rqst *rqst,
+		const char *tag,
+		u32 taglen,
+		u32 opcnt
+	),
+	TP_ARGS(rqst, tag, taglen, opcnt),
 	TP_STRUCT__entry(
 		__field(u32, xid)
-		__field(u32, args_opcnt)
+		__field(u32, opcnt)
+		__string_len(tag, tag, taglen)
 	),
 	TP_fast_assign(
 		__entry->xid = be32_to_cpu(rqst->rq_xid);
-		__entry->args_opcnt = args_opcnt;
+		__entry->opcnt = opcnt;
+		__assign_str_len(tag, tag, taglen);
 	),
-	TP_printk("xid=0x%08x opcnt=%u",
-		__entry->xid, __entry->args_opcnt)
+	TP_printk("xid=0x%08x opcnt=%u tag=%s",
+		__entry->xid, __entry->opcnt, __get_str(tag)
+	)
 )
 
 TRACE_EVENT(nfsd_compound_status,



^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v4 3/8] NFSD: Add tracepoints to report NFSv4 callback completions
  2022-09-08 22:13 [PATCH v4 0/8] Wait for DELEGRETURN before returning NFS4ERR_DELAY Chuck Lever
  2022-09-08 22:13 ` [PATCH v4 1/8] NFSD: Replace dprintk() call site in fh_verify() Chuck Lever
  2022-09-08 22:13 ` [PATCH v4 2/8] NFSD: Trace NFSv4 COMPOUND tags Chuck Lever
@ 2022-09-08 22:13 ` Chuck Lever
  2022-09-08 22:14 ` [PATCH v4 4/8] NFSD: Add a mechanism to wait for a DELEGRETURN Chuck Lever
                   ` (5 subsequent siblings)
  8 siblings, 0 replies; 11+ messages in thread
From: Chuck Lever @ 2022-09-08 22:13 UTC (permalink / raw)
  To: linux-nfs

Wireshark has always been lousy about dissecting NFSv4 callbacks,
especially NFSv4.0 backchannel requests. Add tracepoints so we
can surgically capture these events in the trace log.

Tracepoints are time-stamped and ordered so that we can now observe
the timing relationship between a CB_RECALL Reply and the client's
DELEGRETURN Call. Example:

            nfsd-1153  [002]   211.986391: nfsd_cb_recall:       addr=192.168.1.67:45767 client 62ea82e4:fee7492a stateid 00000003:00000001

            nfsd-1153  [002]   212.095634: nfsd_compound:        xid=0x0000002c opcnt=2
            nfsd-1153  [002]   212.095647: nfsd_compound_status: op=1/2 OP_PUTFH status=0
            nfsd-1153  [002]   212.095658: nfsd_file_put:        hash=0xf72 inode=0xffff9291148c7410 ref=3 flags=HASHED|REFERENCED may=READ file=0xffff929103b3ea00
            nfsd-1153  [002]   212.095661: nfsd_compound_status: op=2/2 OP_DELEGRETURN status=0
   kworker/u25:8-148   [002]   212.096713: nfsd_cb_recall_done:  client 62ea82e4:fee7492a stateid 00000003:00000001 status=0

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfs4layouts.c |    2 +-
 fs/nfsd/nfs4proc.c    |    4 ++++
 fs/nfsd/nfs4state.c   |    4 ++++
 fs/nfsd/trace.h       |   39 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 48 insertions(+), 1 deletion(-)

diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index 2c05692a9abf..3564d1c6f610 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -658,7 +658,7 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task)
 	ktime_t now, cutoff;
 	const struct nfsd4_layout_ops *ops;
 
-
+	trace_nfsd_cb_layout_done(&ls->ls_stid.sc_stateid, task);
 	switch (task->tk_status) {
 	case 0:
 	case -NFS4ERR_DELAY:
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index d43beb732987..2a193e84e422 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1621,6 +1621,10 @@ static void nfsd4_cb_offload_release(struct nfsd4_callback *cb)
 static int nfsd4_cb_offload_done(struct nfsd4_callback *cb,
 				 struct rpc_task *task)
 {
+	struct nfsd4_cb_offload *cbo =
+		container_of(cb, struct nfsd4_cb_offload, co_cb);
+
+	trace_nfsd_cb_offload_done(&cbo->co_res.cb_stateid, task);
 	return 1;
 }
 
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index c5d199d7e6b4..561f3556b1d2 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -357,6 +357,8 @@ nfsd4_cb_notify_lock_prepare(struct nfsd4_callback *cb)
 static int
 nfsd4_cb_notify_lock_done(struct nfsd4_callback *cb, struct rpc_task *task)
 {
+	trace_nfsd_cb_notify_lock_done(&zero_stateid, task);
+
 	/*
 	 * Since this is just an optimization, we don't try very hard if it
 	 * turns out not to succeed. We'll requeue it on NFS4ERR_DELAY, and
@@ -4743,6 +4745,8 @@ static int nfsd4_cb_recall_done(struct nfsd4_callback *cb,
 {
 	struct nfs4_delegation *dp = cb_to_delegation(cb);
 
+	trace_nfsd_cb_recall_done(&dp->dl_stid.sc_stateid, task);
+
 	if (dp->dl_stid.sc_type == NFS4_CLOSED_DELEG_STID ||
 	    dp->dl_stid.sc_type == NFS4_REVOKED_DELEG_STID)
 	        return 1;
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index 0c35a1a844e6..ec8e08315779 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -1448,6 +1448,45 @@ TRACE_EVENT(nfsd_cb_offload,
 		__entry->fh_hash, __entry->count, __entry->status)
 );
 
+DECLARE_EVENT_CLASS(nfsd_cb_done_class,
+	TP_PROTO(
+		const stateid_t *stp,
+		const struct rpc_task *task
+	),
+	TP_ARGS(stp, task),
+	TP_STRUCT__entry(
+		__field(u32, cl_boot)
+		__field(u32, cl_id)
+		__field(u32, si_id)
+		__field(u32, si_generation)
+		__field(int, status)
+	),
+	TP_fast_assign(
+		__entry->cl_boot = stp->si_opaque.so_clid.cl_boot;
+		__entry->cl_id = stp->si_opaque.so_clid.cl_id;
+		__entry->si_id = stp->si_opaque.so_id;
+		__entry->si_generation = stp->si_generation;
+		__entry->status = task->tk_status;
+	),
+	TP_printk("client %08x:%08x stateid %08x:%08x status=%d",
+		__entry->cl_boot, __entry->cl_id, __entry->si_id,
+		__entry->si_generation, __entry->status
+	)
+);
+
+#define DEFINE_NFSD_CB_DONE_EVENT(name)			\
+DEFINE_EVENT(nfsd_cb_done_class, name,			\
+	TP_PROTO(					\
+		const stateid_t *stp,			\
+		const struct rpc_task *task		\
+	),						\
+	TP_ARGS(stp, task))
+
+DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_recall_done);
+DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_notify_lock_done);
+DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_layout_done);
+DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_offload_done);
+
 #endif /* _NFSD_TRACE_H */
 
 #undef TRACE_INCLUDE_PATH



^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v4 4/8] NFSD: Add a mechanism to wait for a DELEGRETURN
  2022-09-08 22:13 [PATCH v4 0/8] Wait for DELEGRETURN before returning NFS4ERR_DELAY Chuck Lever
                   ` (2 preceding siblings ...)
  2022-09-08 22:13 ` [PATCH v4 3/8] NFSD: Add tracepoints to report NFSv4 callback completions Chuck Lever
@ 2022-09-08 22:14 ` Chuck Lever
  2022-09-08 22:14 ` [PATCH v4 5/8] NFSD: Refactor nfsd_setattr() Chuck Lever
                   ` (4 subsequent siblings)
  8 siblings, 0 replies; 11+ messages in thread
From: Chuck Lever @ 2022-09-08 22:14 UTC (permalink / raw)
  To: linux-nfs

Subsequent patches will use this mechanism to wake up an operation
that is waiting for a client to return a delegation.

The new tracepoint records whether the wait timed out or was
properly awoken by the expected DELEGRETURN:

            nfsd-1155  [002] 83799.493199: nfsd_delegret_wakeup: xid=0x14b7d6ef fh_hash=0xf6826792 (timed out)

Suggested-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfs4state.c |   30 ++++++++++++++++++++++++++++++
 fs/nfsd/nfsd.h      |    7 +++++++
 fs/nfsd/trace.h     |   23 +++++++++++++++++++++++
 3 files changed, 60 insertions(+)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 561f3556b1d2..54bc70427ce3 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4717,6 +4717,35 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type)
 	return ret;
 }
 
+static bool nfsd4_deleg_present(const struct inode *inode)
+{
+	struct file_lock_context *ctx = smp_load_acquire(&inode->i_flctx);
+
+	return ctx && !list_empty_careful(&ctx->flc_lease);
+}
+
+/**
+ * nfsd_wait_for_delegreturn - wait for delegations to be returned
+ * @rqstp: the RPC transaction being executed
+ * @inode: in-core inode of the file being waited for
+ *
+ * The timeout prevents deadlock if all nfsd threads happen to be
+ * tied up waiting for returning delegations.
+ *
+ * Return values:
+ *   %true: delegation was returned
+ *   %false: timed out waiting for delegreturn
+ */
+bool nfsd_wait_for_delegreturn(struct svc_rqst *rqstp, struct inode *inode)
+{
+	long __maybe_unused timeo;
+
+	timeo = wait_var_event_timeout(inode, !nfsd4_deleg_present(inode),
+				       NFSD_DELEGRETURN_TIMEOUT);
+	trace_nfsd_delegret_wakeup(rqstp, inode, timeo);
+	return timeo > 0;
+}
+
 static void nfsd4_cb_recall_prepare(struct nfsd4_callback *cb)
 {
 	struct nfs4_delegation *dp = cb_to_delegation(cb);
@@ -6779,6 +6808,7 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	if (status)
 		goto put_stateid;
 
+	wake_up_var(d_inode(cstate->current_fh.fh_dentry));
 	destroy_delegation(dp);
 put_stateid:
 	nfs4_put_stid(&dp->dl_stid);
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 57a468ed85c3..6ab4ad41ae84 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -164,6 +164,7 @@ char * nfs4_recoverydir(void);
 bool nfsd4_spo_must_allow(struct svc_rqst *rqstp);
 int nfsd4_create_laundry_wq(void);
 void nfsd4_destroy_laundry_wq(void);
+bool nfsd_wait_for_delegreturn(struct svc_rqst *rqstp, struct inode *inode);
 #else
 static inline int nfsd4_init_slabs(void) { return 0; }
 static inline void nfsd4_free_slabs(void) { }
@@ -179,6 +180,11 @@ static inline bool nfsd4_spo_must_allow(struct svc_rqst *rqstp)
 }
 static inline int nfsd4_create_laundry_wq(void) { return 0; };
 static inline void nfsd4_destroy_laundry_wq(void) {};
+static inline bool nfsd_wait_for_delegreturn(struct svc_rqst *rqstp,
+					      struct inode *inode)
+{
+	return false;
+}
 #endif
 
 /*
@@ -343,6 +349,7 @@ void		nfsd_lockd_shutdown(void);
 #define	NFSD_COURTESY_CLIENT_TIMEOUT	(24 * 60 * 60)	/* seconds */
 #define	NFSD_CLIENT_MAX_TRIM_PER_RUN	128
 #define	NFS4_CLIENTS_PER_GB		1024
+#define NFSD_DELEGRETURN_TIMEOUT	(HZ / 34)	/* 30ms */
 
 /*
  * The following attributes are currently not supported by the NFSv4 server:
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index ec8e08315779..06a96e955bd0 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -538,6 +538,29 @@ DEFINE_NFSD_COPY_ERR_EVENT(clone_file_range_err);
 #include "filecache.h"
 #include "vfs.h"
 
+TRACE_EVENT(nfsd_delegret_wakeup,
+	TP_PROTO(
+		const struct svc_rqst *rqstp,
+		const struct inode *inode,
+		long timeo
+	),
+	TP_ARGS(rqstp, inode, timeo),
+	TP_STRUCT__entry(
+		__field(u32, xid)
+		__field(const void *, inode)
+		__field(long, timeo)
+	),
+	TP_fast_assign(
+		__entry->xid = be32_to_cpu(rqstp->rq_xid);
+		__entry->inode = inode;
+		__entry->timeo = timeo;
+	),
+	TP_printk("xid=0x%08x inode=%p%s",
+		  __entry->xid, __entry->inode,
+		  __entry->timeo == 0 ? " (timed out)" : ""
+	)
+);
+
 DECLARE_EVENT_CLASS(nfsd_stateid_class,
 	TP_PROTO(stateid_t *stp),
 	TP_ARGS(stp),



^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v4 5/8] NFSD: Refactor nfsd_setattr()
  2022-09-08 22:13 [PATCH v4 0/8] Wait for DELEGRETURN before returning NFS4ERR_DELAY Chuck Lever
                   ` (3 preceding siblings ...)
  2022-09-08 22:14 ` [PATCH v4 4/8] NFSD: Add a mechanism to wait for a DELEGRETURN Chuck Lever
@ 2022-09-08 22:14 ` Chuck Lever
  2022-09-08 22:14 ` [PATCH v4 6/8] NFSD: Make nfsd4_setattr() wait before returning NFS4ERR_DELAY Chuck Lever
                   ` (3 subsequent siblings)
  8 siblings, 0 replies; 11+ messages in thread
From: Chuck Lever @ 2022-09-08 22:14 UTC (permalink / raw)
  To: linux-nfs

Move code that will be retried (in a subsequent patch) into a helper
function.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/vfs.c |   74 ++++++++++++++++++++++++++++++---------------------------
 1 file changed, 39 insertions(+), 35 deletions(-)

diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 9f486b788ed0..02f31d8c727a 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -339,6 +339,44 @@ nfsd_get_write_access(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	return nfserrno(get_write_access(inode));
 }
 
+static int __nfsd_setattr(struct dentry *dentry, struct iattr *iap)
+{
+	int host_err;
+
+	if (iap->ia_valid & ATTR_SIZE) {
+		/*
+		 * RFC5661, Section 18.30.4:
+		 *   Changing the size of a file with SETATTR indirectly
+		 *   changes the time_modify and change attributes.
+		 *
+		 * (and similar for the older RFCs)
+		 */
+		struct iattr size_attr = {
+			.ia_valid	= ATTR_SIZE | ATTR_CTIME | ATTR_MTIME,
+			.ia_size	= iap->ia_size,
+		};
+
+		if (iap->ia_size < 0)
+			return -EFBIG;
+
+		host_err = notify_change(&init_user_ns, dentry, &size_attr, NULL);
+		if (host_err)
+			return host_err;
+		iap->ia_valid &= ~ATTR_SIZE;
+
+		/*
+		 * Avoid the additional setattr call below if the only other
+		 * attribute that the client sends is the mtime, as we update
+		 * it as part of the size change above.
+		 */
+		if ((iap->ia_valid & ~ATTR_MTIME) == 0)
+			return 0;
+	}
+
+	iap->ia_valid |= ATTR_CTIME;
+	return notify_change(&init_user_ns, dentry, iap, NULL);
+}
+
 /*
  * Set various file attributes.  After this call fhp needs an fh_put.
  */
@@ -417,41 +455,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	}
 
 	inode_lock(inode);
-	if (size_change) {
-		/*
-		 * RFC5661, Section 18.30.4:
-		 *   Changing the size of a file with SETATTR indirectly
-		 *   changes the time_modify and change attributes.
-		 *
-		 * (and similar for the older RFCs)
-		 */
-		struct iattr size_attr = {
-			.ia_valid	= ATTR_SIZE | ATTR_CTIME | ATTR_MTIME,
-			.ia_size	= iap->ia_size,
-		};
-
-		host_err = -EFBIG;
-		if (iap->ia_size < 0)
-			goto out_unlock;
-
-		host_err = notify_change(&init_user_ns, dentry, &size_attr, NULL);
-		if (host_err)
-			goto out_unlock;
-		iap->ia_valid &= ~ATTR_SIZE;
-
-		/*
-		 * Avoid the additional setattr call below if the only other
-		 * attribute that the client sends is the mtime, as we update
-		 * it as part of the size change above.
-		 */
-		if ((iap->ia_valid & ~ATTR_MTIME) == 0)
-			goto out_unlock;
-	}
-
-	iap->ia_valid |= ATTR_CTIME;
-	host_err = notify_change(&init_user_ns, dentry, iap, NULL);
-
-out_unlock:
+	host_err = __nfsd_setattr(dentry, iap);
 	if (attr->na_seclabel && attr->na_seclabel->len)
 		attr->na_labelerr = security_inode_setsecctx(dentry,
 			attr->na_seclabel->data, attr->na_seclabel->len);



^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v4 6/8] NFSD: Make nfsd4_setattr() wait before returning NFS4ERR_DELAY
  2022-09-08 22:13 [PATCH v4 0/8] Wait for DELEGRETURN before returning NFS4ERR_DELAY Chuck Lever
                   ` (4 preceding siblings ...)
  2022-09-08 22:14 ` [PATCH v4 5/8] NFSD: Refactor nfsd_setattr() Chuck Lever
@ 2022-09-08 22:14 ` Chuck Lever
  2022-09-12 16:20   ` Jeff Layton
  2022-09-08 22:14 ` [PATCH v4 7/8] NFSD: Make nfsd4_rename() " Chuck Lever
                   ` (2 subsequent siblings)
  8 siblings, 1 reply; 11+ messages in thread
From: Chuck Lever @ 2022-09-08 22:14 UTC (permalink / raw)
  To: linux-nfs

nfsd_setattr() can kick off a CB_RECALL (via
notify_change() -> break_lease()) if a delegation is present. Before
returning NFS4ERR_DELAY, give the client holding that delegation a
chance to return it and then retry the nfsd_setattr() again, once.

Link: https://bugzilla.linux-nfs.org/show_bug.cgi?id=354
Tested-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/vfs.c |    9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 02f31d8c727a..03a826ccc165 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -394,6 +394,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	int		host_err;
 	bool		get_write_count;
 	bool		size_change = (iap->ia_valid & ATTR_SIZE);
+	int		retries;
 
 	if (iap->ia_valid & ATTR_SIZE) {
 		accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE;
@@ -455,7 +456,13 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	}
 
 	inode_lock(inode);
-	host_err = __nfsd_setattr(dentry, iap);
+	for (retries = 1;;) {
+		host_err = __nfsd_setattr(dentry, iap);
+		if (host_err != -EAGAIN || !retries--)
+			break;
+		if (!nfsd_wait_for_delegreturn(rqstp, inode))
+			break;
+	}
 	if (attr->na_seclabel && attr->na_seclabel->len)
 		attr->na_labelerr = security_inode_setsecctx(dentry,
 			attr->na_seclabel->data, attr->na_seclabel->len);



^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v4 7/8] NFSD: Make nfsd4_rename() wait before returning NFS4ERR_DELAY
  2022-09-08 22:13 [PATCH v4 0/8] Wait for DELEGRETURN before returning NFS4ERR_DELAY Chuck Lever
                   ` (5 preceding siblings ...)
  2022-09-08 22:14 ` [PATCH v4 6/8] NFSD: Make nfsd4_setattr() wait before returning NFS4ERR_DELAY Chuck Lever
@ 2022-09-08 22:14 ` Chuck Lever
  2022-09-08 22:14 ` [PATCH v4 8/8] NFSD: Make nfsd4_remove() " Chuck Lever
  2022-09-12 16:21 ` [PATCH v4 0/8] Wait for DELEGRETURN " Jeff Layton
  8 siblings, 0 replies; 11+ messages in thread
From: Chuck Lever @ 2022-09-08 22:14 UTC (permalink / raw)
  To: linux-nfs

nfsd_rename() can kick off a CB_RECALL (via
vfs_rename() -> leases_conflict()) if a delegation is present.
Before returning NFS4ERR_DELAY, give the client holding that
delegation a chance to return it and then retry the nfsd_rename()
again, once.

This version of the patch handles renaming an existing file,
but does not deal with renaming onto an existing file. That
case will still always trigger an NFS4ERR_DELAY.

Link: https://bugzilla.linux-nfs.org/show_bug.cgi?id=354
Tested-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/vfs.c |   10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 03a826ccc165..b597cb2af949 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1681,7 +1681,15 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
 			.new_dir	= tdir,
 			.new_dentry	= ndentry,
 		};
-		host_err = vfs_rename(&rd);
+		int retries;
+
+		for (retries = 1;;) {
+			host_err = vfs_rename(&rd);
+			if (host_err != -EAGAIN || !retries--)
+				break;
+			if (!nfsd_wait_for_delegreturn(rqstp, d_inode(odentry)))
+				break;
+		}
 		if (!host_err) {
 			host_err = commit_metadata(tfhp);
 			if (!host_err)



^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v4 8/8] NFSD: Make nfsd4_remove() wait before returning NFS4ERR_DELAY
  2022-09-08 22:13 [PATCH v4 0/8] Wait for DELEGRETURN before returning NFS4ERR_DELAY Chuck Lever
                   ` (6 preceding siblings ...)
  2022-09-08 22:14 ` [PATCH v4 7/8] NFSD: Make nfsd4_rename() " Chuck Lever
@ 2022-09-08 22:14 ` Chuck Lever
  2022-09-12 16:21 ` [PATCH v4 0/8] Wait for DELEGRETURN " Jeff Layton
  8 siblings, 0 replies; 11+ messages in thread
From: Chuck Lever @ 2022-09-08 22:14 UTC (permalink / raw)
  To: linux-nfs

nfsd_unlink() can kick off a CB_RECALL (via
vfs_unlink() -> leases_conflict()) if a delegation is present.
Before returning NFS4ERR_DELAY, give the client holding that
delegation a chance to return it and then retry the nfsd_unlink()
again, once.

Link: https://bugzilla.linux-nfs.org/show_bug.cgi?id=354
Tested-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/vfs.c |   11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index b597cb2af949..adb2624eca64 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1773,9 +1773,18 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
 
 	fh_fill_pre_attrs(fhp);
 	if (type != S_IFDIR) {
+		int retries;
+
 		if (rdentry->d_sb->s_export_op->flags & EXPORT_OP_CLOSE_BEFORE_UNLINK)
 			nfsd_close_cached_files(rdentry);
-		host_err = vfs_unlink(&init_user_ns, dirp, rdentry, NULL);
+
+		for (retries = 1;;) {
+			host_err = vfs_unlink(&init_user_ns, dirp, rdentry, NULL);
+			if (host_err != -EAGAIN || !retries--)
+				break;
+			if (!nfsd_wait_for_delegreturn(rqstp, rinode))
+				break;
+		}
 	} else {
 		host_err = vfs_rmdir(&init_user_ns, dirp, rdentry);
 	}



^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH v4 6/8] NFSD: Make nfsd4_setattr() wait before returning NFS4ERR_DELAY
  2022-09-08 22:14 ` [PATCH v4 6/8] NFSD: Make nfsd4_setattr() wait before returning NFS4ERR_DELAY Chuck Lever
@ 2022-09-12 16:20   ` Jeff Layton
  0 siblings, 0 replies; 11+ messages in thread
From: Jeff Layton @ 2022-09-12 16:20 UTC (permalink / raw)
  To: Chuck Lever, linux-nfs

On Thu, 2022-09-08 at 18:14 -0400, Chuck Lever wrote:
> nfsd_setattr() can kick off a CB_RECALL (via
> notify_change() -> break_lease()) if a delegation is present. Before
> returning NFS4ERR_DELAY, give the client holding that delegation a
> chance to return it and then retry the nfsd_setattr() again, once.
> 
> Link: https://bugzilla.linux-nfs.org/show_bug.cgi?id=354
> Tested-by: Igor Mammedov <imammedo@redhat.com>
> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
> ---
>  fs/nfsd/vfs.c |    9 ++++++++-
>  1 file changed, 8 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
> index 02f31d8c727a..03a826ccc165 100644
> --- a/fs/nfsd/vfs.c
> +++ b/fs/nfsd/vfs.c
> @@ -394,6 +394,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
>  	int		host_err;
>  	bool		get_write_count;
>  	bool		size_change = (iap->ia_valid & ATTR_SIZE);
> +	int		retries;
>  
>  	if (iap->ia_valid & ATTR_SIZE) {
>  		accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE;
> @@ -455,7 +456,13 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
>  	}
>  
>  	inode_lock(inode);
> -	host_err = __nfsd_setattr(dentry, iap);
> +	for (retries = 1;;) {
> +		host_err = __nfsd_setattr(dentry, iap);
> +		if (host_err != -EAGAIN || !retries--)
> +			break;

Can __nfsd_setattr return -EAGAIN for entirely different reasons? I
try_break_deleg will, but could an underlying filesystem's ->setattr
operation return -EAGAIN for an unrelated reason?

Then again, you're only retrying once, so maybe it's no big deal.

> +		if (!nfsd_wait_for_delegreturn(rqstp, inode))
> +			break;
> +	}
>  	if (attr->na_seclabel && attr->na_seclabel->len)
>  		attr->na_labelerr = security_inode_setsecctx(dentry,
>  			attr->na_seclabel->data, attr->na_seclabel->len);
> 
> 

-- 
Jeff Layton <jlayton@kernel.org>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v4 0/8] Wait for DELEGRETURN before returning NFS4ERR_DELAY
  2022-09-08 22:13 [PATCH v4 0/8] Wait for DELEGRETURN before returning NFS4ERR_DELAY Chuck Lever
                   ` (7 preceding siblings ...)
  2022-09-08 22:14 ` [PATCH v4 8/8] NFSD: Make nfsd4_remove() " Chuck Lever
@ 2022-09-12 16:21 ` Jeff Layton
  8 siblings, 0 replies; 11+ messages in thread
From: Jeff Layton @ 2022-09-12 16:21 UTC (permalink / raw)
  To: Chuck Lever, linux-nfs

On Thu, 2022-09-08 at 18:13 -0400, Chuck Lever wrote:
> This patch series adds logic to the Linux NFS server to make it wait
> a few moments for clients to return a delegation before replying
> with NFS4ERR_DELAY. There are two main benefits:
> 
> - It improves responsiveness when a delegated file is accessed from
>   another client, and
> - It removes an unnecessary latency if a client has neglected to
>   return a delegation before attempting a RENAME or SETATTR
> 
> NFS4ERR_DELAY during NFSv4 OPEN is still not handled. However, I'm
> comfortable merging the infrastructure in this series and support
> for using it in SETATTR, RENAME, and REMOVE now, and then handling
> OPEN at a later time.
> 
> This series applies against v6.0-rc4.
> 
> Changes since v3:
> - Also handle JUKEBOX when an NFSv3 operation triggers a CB_RECALL
> 
> Changes since v2:
> - Wake immediately after server receives DELEGRETURN
> - More tracepoint improvements
> 
> Changes since RFC:
> - Eliminate spurious console messages on the server
> - Wait for DELEGRETURN for RENAME operations
> - Add CB done tracepoints
> - Rework the retry loop
> 
> ---
> 
> Chuck Lever (8):
>       NFSD: Replace dprintk() call site in fh_verify()
>       NFSD: Trace NFSv4 COMPOUND tags
>       NFSD: Add tracepoints to report NFSv4 callback completions
>       NFSD: Add a mechanism to wait for a DELEGRETURN
>       NFSD: Refactor nfsd_setattr()
>       NFSD: Make nfsd4_setattr() wait before returning NFS4ERR_DELAY
>       NFSD: Make nfsd4_rename() wait before returning NFS4ERR_DELAY
>       NFSD: Make nfsd4_remove() wait before returning NFS4ERR_DELAY
> 
> 
>  fs/nfsd/nfs4layouts.c |   2 +-
>  fs/nfsd/nfs4proc.c    |   6 +-
>  fs/nfsd/nfs4state.c   |  34 +++++++++++
>  fs/nfsd/nfsd.h        |   7 +++
>  fs/nfsd/nfsfh.c       |   8 +--
>  fs/nfsd/trace.h       | 131 ++++++++++++++++++++++++++++++++++++++----
>  fs/nfsd/vfs.c         | 100 ++++++++++++++++++++------------
>  7 files changed, 233 insertions(+), 55 deletions(-)
> 
> --
> Chuck Lever
> 

Nice work, Chuck. These all look reasonable to me.

Reviewed-by: Jeff Layton <jlayton@kernel.org>

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2022-09-12 16:22 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-09-08 22:13 [PATCH v4 0/8] Wait for DELEGRETURN before returning NFS4ERR_DELAY Chuck Lever
2022-09-08 22:13 ` [PATCH v4 1/8] NFSD: Replace dprintk() call site in fh_verify() Chuck Lever
2022-09-08 22:13 ` [PATCH v4 2/8] NFSD: Trace NFSv4 COMPOUND tags Chuck Lever
2022-09-08 22:13 ` [PATCH v4 3/8] NFSD: Add tracepoints to report NFSv4 callback completions Chuck Lever
2022-09-08 22:14 ` [PATCH v4 4/8] NFSD: Add a mechanism to wait for a DELEGRETURN Chuck Lever
2022-09-08 22:14 ` [PATCH v4 5/8] NFSD: Refactor nfsd_setattr() Chuck Lever
2022-09-08 22:14 ` [PATCH v4 6/8] NFSD: Make nfsd4_setattr() wait before returning NFS4ERR_DELAY Chuck Lever
2022-09-12 16:20   ` Jeff Layton
2022-09-08 22:14 ` [PATCH v4 7/8] NFSD: Make nfsd4_rename() " Chuck Lever
2022-09-08 22:14 ` [PATCH v4 8/8] NFSD: Make nfsd4_remove() " Chuck Lever
2022-09-12 16:21 ` [PATCH v4 0/8] Wait for DELEGRETURN " Jeff Layton

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.