linux-nfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH RFC 0/2] NFSv4 state recovery observability
@ 2019-11-05 16:04 Chuck Lever
  2019-11-05 16:04 ` [PATCH RFC 1/2] NFS4: Trace state recovery operation Chuck Lever
  2019-11-05 16:04 ` [PATCH RFC 2/2] NFS4: Trace lock reclaims Chuck Lever
  0 siblings, 2 replies; 3+ messages in thread
From: Chuck Lever @ 2019-11-05 16:04 UTC (permalink / raw)
  To: linux-nfs

Add some trace points to the state manager to help track down common
in-the-field failures. We see "Lock reclaim failed!" a lot during
testing, for example, and want to enable our Q/A staff to capture
the failure completely and quickly.

---

Chuck Lever (2):
      NFS4: Trace state recovery operation
      NFS4: Trace lock reclaims


 fs/nfs/nfs4_fs.h   |    2 -
 fs/nfs/nfs4state.c |    4 +
 fs/nfs/nfs4trace.h |  171 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 175 insertions(+), 2 deletions(-)

--
Chuck Lever

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH RFC 1/2] NFS4: Trace state recovery operation
  2019-11-05 16:04 [PATCH RFC 0/2] NFSv4 state recovery observability Chuck Lever
@ 2019-11-05 16:04 ` Chuck Lever
  2019-11-05 16:04 ` [PATCH RFC 2/2] NFS4: Trace lock reclaims Chuck Lever
  1 sibling, 0 replies; 3+ messages in thread
From: Chuck Lever @ 2019-11-05 16:04 UTC (permalink / raw)
  To: linux-nfs

Add a trace point in the main state manager loop to observe state
recovery operation. Help track down state recovery bugs.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfs/nfs4state.c |    3 ++
 fs/nfs/nfs4trace.h |   93 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 96 insertions(+)

diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 0c6d53dc3672..b69c33c3600c 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -60,6 +60,7 @@
 #include "nfs4session.h"
 #include "pnfs.h"
 #include "netns.h"
+#include "nfs4trace.h"
 
 #define NFSDBG_FACILITY		NFSDBG_STATE
 
@@ -2508,6 +2509,7 @@ static void nfs4_state_manager(struct nfs_client *clp)
 
 	/* Ensure exclusive access to NFSv4 state */
 	do {
+		trace_nfs4_state_mgr(clp);
 		clear_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state);
 		if (test_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state)) {
 			section = "purge state";
@@ -2621,6 +2623,7 @@ static void nfs4_state_manager(struct nfs_client *clp)
 out_error:
 	if (strlen(section))
 		section_sep = ": ";
+	trace_nfs4_state_mgr_failed(clp, section, status);
 	pr_warn_ratelimited("NFS: state manager%s%s failed on NFSv4 server %s"
 			" with error %d\n", section_sep, section,
 			clp->cl_hostname, -status);
diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
index b86f3f567bd0..a66af56c7eef 100644
--- a/fs/nfs/nfs4trace.h
+++ b/fs/nfs/nfs4trace.h
@@ -562,6 +562,99 @@
 		)
 );
 
+TRACE_DEFINE_ENUM(NFS4CLNT_MANAGER_RUNNING);
+TRACE_DEFINE_ENUM(NFS4CLNT_CHECK_LEASE);
+TRACE_DEFINE_ENUM(NFS4CLNT_LEASE_EXPIRED);
+TRACE_DEFINE_ENUM(NFS4CLNT_RECLAIM_REBOOT);
+TRACE_DEFINE_ENUM(NFS4CLNT_RECLAIM_NOGRACE);
+TRACE_DEFINE_ENUM(NFS4CLNT_DELEGRETURN);
+TRACE_DEFINE_ENUM(NFS4CLNT_SESSION_RESET);
+TRACE_DEFINE_ENUM(NFS4CLNT_LEASE_CONFIRM);
+TRACE_DEFINE_ENUM(NFS4CLNT_SERVER_SCOPE_MISMATCH);
+TRACE_DEFINE_ENUM(NFS4CLNT_PURGE_STATE);
+TRACE_DEFINE_ENUM(NFS4CLNT_BIND_CONN_TO_SESSION);
+TRACE_DEFINE_ENUM(NFS4CLNT_MOVED);
+TRACE_DEFINE_ENUM(NFS4CLNT_LEASE_MOVED);
+TRACE_DEFINE_ENUM(NFS4CLNT_DELEGATION_EXPIRED);
+TRACE_DEFINE_ENUM(NFS4CLNT_RUN_MANAGER);
+TRACE_DEFINE_ENUM(NFS4CLNT_DELEGRETURN_RUNNING);
+
+#define show_nfs4_clp_state(state) \
+	__print_flags(state, "|", \
+		{ NFS4CLNT_MANAGER_RUNNING,	"MANAGER_RUNNING" }, \
+		{ NFS4CLNT_CHECK_LEASE,		"CHECK_LEASE" }, \
+		{ NFS4CLNT_LEASE_EXPIRED,	"LEASE_EXPIRED" }, \
+		{ NFS4CLNT_RECLAIM_REBOOT,	"RECLAIM_REBOOT" }, \
+		{ NFS4CLNT_RECLAIM_NOGRACE,	"RECLAIM_NOGRACE" }, \
+		{ NFS4CLNT_DELEGRETURN,		"DELEGRETURN" }, \
+		{ NFS4CLNT_SESSION_RESET,	"SESSION_RESET" }, \
+		{ NFS4CLNT_LEASE_CONFIRM,	"LEASE_CONFIRM" }, \
+		{ NFS4CLNT_SERVER_SCOPE_MISMATCH, \
+						"SERVER_SCOPE_MISMATCH" }, \
+		{ NFS4CLNT_PURGE_STATE,		"PURGE_STATE" }, \
+		{ NFS4CLNT_BIND_CONN_TO_SESSION, \
+						"BIND_CONN_TO_SESSION" }, \
+		{ NFS4CLNT_MOVED,		"MOVED" }, \
+		{ NFS4CLNT_LEASE_MOVED,		"LEASE_MOVED" }, \
+		{ NFS4CLNT_DELEGATION_EXPIRED,	"DELEGATION_EXPIRED" }, \
+		{ NFS4CLNT_RUN_MANAGER,		"RUN_MANAGER" }, \
+		{ NFS4CLNT_DELEGRETURN_RUNNING,	"DELEGRETURN_RUNNING" })
+
+TRACE_EVENT(nfs4_state_mgr,
+		TP_PROTO(
+			const struct nfs_client *clp
+		),
+
+		TP_ARGS(clp),
+
+		TP_STRUCT__entry(
+			__field(unsigned long, state)
+			__string(hostname, clp->cl_hostname)
+		),
+
+		TP_fast_assign(
+			__entry->state = clp->cl_state;
+			__assign_str(hostname, clp->cl_hostname)
+		),
+
+		TP_printk(
+			"hostname=%s clp state=%s", __get_str(hostname),
+			show_nfs4_clp_state(__entry->state)
+		)
+)
+
+TRACE_EVENT(nfs4_state_mgr_failed,
+		TP_PROTO(
+			const struct nfs_client *clp,
+			const char *section,
+			int status
+		),
+
+		TP_ARGS(clp, section, status),
+
+		TP_STRUCT__entry(
+			__field(unsigned long, error)
+			__field(unsigned long, state)
+			__string(hostname, clp->cl_hostname)
+			__string(section, section)
+		),
+
+		TP_fast_assign(
+			__entry->error = status;
+			__entry->state = clp->cl_state;
+			__assign_str(hostname, clp->cl_hostname);
+			__assign_str(section, section);
+		),
+
+		TP_printk(
+			"hostname=%s clp state=%s error=%ld (%s) section=%s",
+			__get_str(hostname),
+			show_nfs4_clp_state(__entry->state), -__entry->error,
+			show_nfsv4_errors(__entry->error), __get_str(section)
+
+		)
+)
+
 TRACE_EVENT(nfs4_xdr_status,
 		TP_PROTO(
 			const struct xdr_stream *xdr,


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH RFC 2/2] NFS4: Trace lock reclaims
  2019-11-05 16:04 [PATCH RFC 0/2] NFSv4 state recovery observability Chuck Lever
  2019-11-05 16:04 ` [PATCH RFC 1/2] NFS4: Trace state recovery operation Chuck Lever
@ 2019-11-05 16:04 ` Chuck Lever
  1 sibling, 0 replies; 3+ messages in thread
From: Chuck Lever @ 2019-11-05 16:04 UTC (permalink / raw)
  To: linux-nfs

One of the most frustrating messages our sustaining team sees is
the "Lock reclaim failed!" message. Add some observability in the
client's lock reclaim logic so we can capture better data the
first time a problem occurs.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfs/nfs4_fs.h   |    2 -
 fs/nfs/nfs4state.c |    1 +
 fs/nfs/nfs4trace.h |   78 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 79 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 16b2e5cc3e94..935bf335a3c2 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -166,9 +166,7 @@ enum {
 	NFS_STATE_RECOVERY_FAILED,	/* OPEN stateid state recovery failed */
 	NFS_STATE_MAY_NOTIFY_LOCK,	/* server may CB_NOTIFY_LOCK */
 	NFS_STATE_CHANGE_WAIT,		/* A state changing operation is outstanding */
-#ifdef CONFIG_NFS_V4_2
 	NFS_CLNT_DST_SSC_COPY_STATE,    /* dst server open state on client*/
-#endif /* CONFIG_NFS_V4_2 */
 };
 
 struct nfs4_state {
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index b69c33c3600c..e72fbc842025 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1594,6 +1594,7 @@ static int __nfs4_reclaim_open_state(struct nfs4_state_owner *sp, struct nfs4_st
 	if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) {
 		spin_lock(&state->state_lock);
 		list_for_each_entry(lock, &state->lock_states, ls_locks) {
+			trace_nfs4_state_lock_reclaim(state, lock);
 			if (!test_bit(NFS_LOCK_INITIALIZED, &lock->ls_flags))
 				pr_warn_ratelimited("NFS: %s: Lock reclaim failed!\n", __func__);
 		}
diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
index a66af56c7eef..046ebc24ac9f 100644
--- a/fs/nfs/nfs4trace.h
+++ b/fs/nfs/nfs4trace.h
@@ -1057,6 +1057,84 @@
 		)
 );
 
+TRACE_DEFINE_ENUM(LK_STATE_IN_USE);
+TRACE_DEFINE_ENUM(NFS_DELEGATED_STATE);
+TRACE_DEFINE_ENUM(NFS_OPEN_STATE);
+TRACE_DEFINE_ENUM(NFS_O_RDONLY_STATE);
+TRACE_DEFINE_ENUM(NFS_O_WRONLY_STATE);
+TRACE_DEFINE_ENUM(NFS_O_RDWR_STATE);
+TRACE_DEFINE_ENUM(NFS_STATE_RECLAIM_REBOOT);
+TRACE_DEFINE_ENUM(NFS_STATE_RECLAIM_NOGRACE);
+TRACE_DEFINE_ENUM(NFS_STATE_POSIX_LOCKS);
+TRACE_DEFINE_ENUM(NFS_STATE_RECOVERY_FAILED);
+TRACE_DEFINE_ENUM(NFS_STATE_MAY_NOTIFY_LOCK);
+TRACE_DEFINE_ENUM(NFS_STATE_CHANGE_WAIT);
+TRACE_DEFINE_ENUM(NFS_CLNT_DST_SSC_COPY_STATE);
+
+#define show_nfs4_state_flags(flags) \
+	__print_flags(flags, "|", \
+		{ LK_STATE_IN_USE,		"IN_USE" }, \
+		{ NFS_DELEGATED_STATE,		"DELEGATED" }, \
+		{ NFS_OPEN_STATE,		"OPEN" }, \
+		{ NFS_O_RDONLY_STATE,		"O_RDONLY" }, \
+		{ NFS_O_WRONLY_STATE,		"O_WRONLY" }, \
+		{ NFS_O_RDWR_STATE,		"O_RDWR" }, \
+		{ NFS_STATE_RECLAIM_REBOOT,	"RECLAIM_REBOOT" }, \
+		{ NFS_STATE_RECLAIM_NOGRACE,	"RECLAIM_NOGRACE" }, \
+		{ NFS_STATE_POSIX_LOCKS,	"POSIX_LOCKS" }, \
+		{ NFS_STATE_RECOVERY_FAILED,	"RECOVERY_FAILED" }, \
+		{ NFS_STATE_MAY_NOTIFY_LOCK,	"MAY_NOTIFY_LOCK" }, \
+		{ NFS_STATE_CHANGE_WAIT,	"CHANGE_WAIT" }, \
+		{ NFS_CLNT_DST_SSC_COPY_STATE,	"CLNT_DST_SSC_COPY" })
+
+#define show_nfs4_lock_flags(flags) \
+	__print_flags(flags, "|", \
+		{ BIT(NFS_LOCK_INITIALIZED),	"INITIALIZED" }, \
+		{ BIT(NFS_LOCK_LOST),		"LOST" })
+
+TRACE_EVENT(nfs4_state_lock_reclaim,
+		TP_PROTO(
+			const struct nfs4_state *state,
+			const struct nfs4_lock_state *lock
+		),
+
+		TP_ARGS(state, lock),
+
+		TP_STRUCT__entry(
+			__field(dev_t, dev)
+			__field(u32, fhandle)
+			__field(u64, fileid)
+			__field(unsigned long, state_flags)
+			__field(unsigned long, lock_flags)
+			__field(int, stateid_seq)
+			__field(u32, stateid_hash)
+		),
+
+		TP_fast_assign(
+			const struct inode *inode = state->inode;
+
+			__entry->dev = inode->i_sb->s_dev;
+			__entry->fileid = NFS_FILEID(inode);
+			__entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
+			__entry->state_flags = state->flags;
+			__entry->lock_flags = lock->ls_flags;
+			__entry->stateid_seq =
+				be32_to_cpu(state->stateid.seqid);
+			__entry->stateid_hash =
+				nfs_stateid_hash(&state->stateid);
+		),
+
+		TP_printk(
+			"fileid=%02x:%02x:%llu fhandle=0x%08x "
+			"stateid=%d:0x%08x state_flags=%s lock_flags=%s",
+			MAJOR(__entry->dev), MINOR(__entry->dev),
+			(unsigned long long)__entry->fileid, __entry->fhandle,
+			__entry->stateid_seq, __entry->stateid_hash,
+			show_nfs4_state_flags(__entry->state_flags),
+			show_nfs4_lock_flags(__entry->lock_flags)
+		)
+)
+
 DECLARE_EVENT_CLASS(nfs4_set_delegation_event,
 		TP_PROTO(
 			const struct inode *inode,


^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2019-11-05 16:04 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-11-05 16:04 [PATCH RFC 0/2] NFSv4 state recovery observability Chuck Lever
2019-11-05 16:04 ` [PATCH RFC 1/2] NFS4: Trace state recovery operation Chuck Lever
2019-11-05 16:04 ` [PATCH RFC 2/2] NFS4: Trace lock reclaims Chuck Lever

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).