Linux-NFS Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH v1 0/4] NFS/RPC patches for v5.6
@ 2019-12-23 15:28 Chuck Lever
  2019-12-23 15:28 ` [PATCH v1 1/4] SUNRPC: Capture signalled RPC tasks Chuck Lever
                   ` (3 more replies)
  0 siblings, 4 replies; 5+ messages in thread
From: Chuck Lever @ 2019-12-23 15:28 UTC (permalink / raw)
  To: anna.schumaker, trondmy; +Cc: linux-nfs

Hi -

Three patches that add diagnostic trace points in the NFS client
and the RPC client implementations, and one RPC patch that is a
pre-requisite to overhauling the RPC/RDMA connection logic.

Would you consider these for v5.6 please?

---

Chuck Lever (4):
      SUNRPC: Capture signalled RPC tasks
      NFS: Introduce trace events triggered by page writeback errors
      NFS4: Report callback authentication errors
      SUNRPC: call_connect_status should handle -EPROTO


 fs/nfs/callback_xdr.c         |   11 +++++++---
 fs/nfs/nfs4trace.h            |   35 ++++++++++++++++++++++++++++++++
 fs/nfs/nfstrace.h             |   45 +++++++++++++++++++++++++++++++++++++++++
 fs/nfs/write.c                |    3 +++
 include/trace/events/sunrpc.h |    1 +
 net/sunrpc/clnt.c             |    1 +
 net/sunrpc/sched.c            |    4 +++-
 7 files changed, 96 insertions(+), 4 deletions(-)

--
Chuck Lever

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH v1 1/4] SUNRPC: Capture signalled RPC tasks
  2019-12-23 15:28 [PATCH v1 0/4] NFS/RPC patches for v5.6 Chuck Lever
@ 2019-12-23 15:28 ` Chuck Lever
  2019-12-23 15:28 ` [PATCH v1 2/4] NFS: Introduce trace events triggered by page writeback errors Chuck Lever
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 5+ messages in thread
From: Chuck Lever @ 2019-12-23 15:28 UTC (permalink / raw)
  To: anna.schumaker, trondmy; +Cc: linux-nfs

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/trace/events/sunrpc.h |    1 +
 net/sunrpc/sched.c            |    4 +++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 8c73ffb5f7fd..ee993575d2fa 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -185,6 +185,7 @@
 DEFINE_RPC_RUNNING_EVENT(begin);
 DEFINE_RPC_RUNNING_EVENT(run_action);
 DEFINE_RPC_RUNNING_EVENT(complete);
+DEFINE_RPC_RUNNING_EVENT(signalled);
 DEFINE_RPC_RUNNING_EVENT(end);
 
 DECLARE_EVENT_CLASS(rpc_task_queued,
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 9c79548c6847..55e900255b0c 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -846,6 +846,8 @@ void rpc_signal_task(struct rpc_task *task)
 
 	if (!RPC_IS_ACTIVATED(task))
 		return;
+
+	trace_rpc_task_signalled(task, task->tk_action);
 	set_bit(RPC_TASK_SIGNALLED, &task->tk_runstate);
 	smp_mb__after_atomic();
 	queue = READ_ONCE(task->tk_waitqueue);
@@ -949,7 +951,7 @@ static void __rpc_execute(struct rpc_task *task)
 			 * clean up after sleeping on some queue, we don't
 			 * break the loop here, but go around once more.
 			 */
-			dprintk("RPC: %5u got signal\n", task->tk_pid);
+			trace_rpc_task_signalled(task, task->tk_action);
 			set_bit(RPC_TASK_SIGNALLED, &task->tk_runstate);
 			task->tk_rpc_status = -ERESTARTSYS;
 			rpc_exit(task, -ERESTARTSYS);


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH v1 2/4] NFS: Introduce trace events triggered by page writeback errors
  2019-12-23 15:28 [PATCH v1 0/4] NFS/RPC patches for v5.6 Chuck Lever
  2019-12-23 15:28 ` [PATCH v1 1/4] SUNRPC: Capture signalled RPC tasks Chuck Lever
@ 2019-12-23 15:28 ` Chuck Lever
  2019-12-23 15:28 ` [PATCH v1 3/4] NFS4: Report callback authentication errors Chuck Lever
  2019-12-23 15:28 ` [PATCH v1 4/4] SUNRPC: call_connect_status should handle -EPROTO Chuck Lever
  3 siblings, 0 replies; 5+ messages in thread
From: Chuck Lever @ 2019-12-23 15:28 UTC (permalink / raw)
  To: anna.schumaker, trondmy; +Cc: linux-nfs

Try to capture the reason for the writeback path tagging an error on
a page.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfs/nfstrace.h |   45 +++++++++++++++++++++++++++++++++++++++++++++
 fs/nfs/write.c    |    3 +++
 2 files changed, 48 insertions(+)

diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h
index f64a33d2a1d1..4d6eb1703943 100644
--- a/fs/nfs/nfstrace.h
+++ b/fs/nfs/nfstrace.h
@@ -989,6 +989,51 @@
 		)
 );
 
+DECLARE_EVENT_CLASS(nfs_page_error_class,
+		TP_PROTO(
+			const struct nfs_page *req,
+			int error
+		),
+
+		TP_ARGS(req, error),
+
+		TP_STRUCT__entry(
+			__field(const void *, req)
+			__field(pgoff_t, index)
+			__field(unsigned int, offset)
+			__field(unsigned int, pgbase)
+			__field(unsigned int, bytes)
+			__field(int, error)
+		),
+
+		TP_fast_assign(
+			__entry->req = req;
+			__entry->index = req->wb_index;
+			__entry->offset = req->wb_offset;
+			__entry->pgbase = req->wb_pgbase;
+			__entry->bytes = req->wb_bytes;
+			__entry->error = error;
+		),
+
+		TP_printk(
+			"req=%p index=%lu offset=%u pgbase=%u bytes=%u error=%d",
+			__entry->req, __entry->index, __entry->offset,
+			__entry->pgbase, __entry->bytes, __entry->error
+		)
+);
+
+#define DEFINE_NFS_PAGEERR_EVENT(name) \
+	DEFINE_EVENT(nfs_page_error_class, name, \
+			TP_PROTO( \
+				const struct nfs_page *req, \
+				int error \
+			), \
+			TP_ARGS(req, error))
+
+DEFINE_NFS_PAGEERR_EVENT(nfs_write_error);
+DEFINE_NFS_PAGEERR_EVENT(nfs_comp_error);
+DEFINE_NFS_PAGEERR_EVENT(nfs_commit_error);
+
 TRACE_EVENT(nfs_initiate_commit,
 		TP_PROTO(
 			const struct nfs_commit_data *data
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 52cab65f91cf..21787711e352 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -593,6 +593,7 @@ static void nfs_end_page_writeback(struct nfs_page *req)
 static void nfs_write_error(struct nfs_page *req, int error)
 {
 	nfs_set_pageerror(page_file_mapping(req->wb_page));
+	trace_nfs_write_error(req, error);
 	nfs_mapping_set_error(req->wb_page, error);
 	nfs_inode_remove_request(req);
 	nfs_end_page_writeback(req);
@@ -999,6 +1000,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)
 		if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) &&
 		    (hdr->good_bytes < bytes)) {
 			nfs_set_pageerror(page_file_mapping(req->wb_page));
+			trace_nfs_comp_error(req, hdr->error);
 			nfs_mapping_set_error(req->wb_page, hdr->error);
 			goto remove_req;
 		}
@@ -1847,6 +1849,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
 			(long long)req_offset(req));
 		if (status < 0) {
 			if (req->wb_page) {
+				trace_nfs_commit_error(req, status);
 				nfs_mapping_set_error(req->wb_page, status);
 				nfs_inode_remove_request(req);
 			}


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH v1 3/4] NFS4: Report callback authentication errors
  2019-12-23 15:28 [PATCH v1 0/4] NFS/RPC patches for v5.6 Chuck Lever
  2019-12-23 15:28 ` [PATCH v1 1/4] SUNRPC: Capture signalled RPC tasks Chuck Lever
  2019-12-23 15:28 ` [PATCH v1 2/4] NFS: Introduce trace events triggered by page writeback errors Chuck Lever
@ 2019-12-23 15:28 ` Chuck Lever
  2019-12-23 15:28 ` [PATCH v1 4/4] SUNRPC: call_connect_status should handle -EPROTO Chuck Lever
  3 siblings, 0 replies; 5+ messages in thread
From: Chuck Lever @ 2019-12-23 15:28 UTC (permalink / raw)
  To: anna.schumaker, trondmy; +Cc: linux-nfs

This seems to be a somewhat common issue with Kerberos NFSv4.0
set-ups.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfs/callback_xdr.c |   11 ++++++++---
 fs/nfs/nfs4trace.h    |   35 +++++++++++++++++++++++++++++++++++
 2 files changed, 43 insertions(+), 3 deletions(-)

diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 03a20f5716c7..79ff172eb1c8 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -18,6 +18,7 @@
 #include "callback.h"
 #include "internal.h"
 #include "nfs4session.h"
+#include "nfs4trace.h"
 
 #define CB_OP_TAGLEN_MAXSZ		(512)
 #define CB_OP_HDR_RES_MAXSZ		(2 * 4) // opcode, status
@@ -946,9 +947,13 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp)
 
 	if (hdr_arg.minorversion == 0) {
 		cps.clp = nfs4_find_client_ident(SVC_NET(rqstp), hdr_arg.cb_ident);
-		if (!cps.clp || !check_gss_callback_principal(cps.clp, rqstp)) {
-			if (cps.clp)
-				nfs_put_client(cps.clp);
+		if (!cps.clp) {
+			trace_nfs_cb_no_clp(rqstp->rq_xid, hdr_arg.cb_ident);
+			goto out_invalidcred;
+		}
+		if (!check_gss_callback_principal(cps.clp, rqstp)) {
+			trace_nfs_cb_badprinc(rqstp->rq_xid, hdr_arg.cb_ident);
+			nfs_put_client(cps.clp);
 			goto out_invalidcred;
 		}
 	}
diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
index e60b6fbd5ada..e3586c16ef59 100644
--- a/fs/nfs/nfs4trace.h
+++ b/fs/nfs/nfs4trace.h
@@ -691,6 +691,41 @@
 		)
 );
 
+DECLARE_EVENT_CLASS(nfs4_cb_error_class,
+		TP_PROTO(
+			__be32 xid,
+			u32 cb_ident
+		),
+
+		TP_ARGS(xid, cb_ident),
+
+		TP_STRUCT__entry(
+			__field(u32, xid)
+			__field(u32, cbident)
+		),
+
+		TP_fast_assign(
+			__entry->xid = be32_to_cpu(xid);
+			__entry->cbident = cb_ident;
+		),
+
+		TP_printk(
+			"xid=0x%08x cb_ident=0x%08x",
+			__entry->xid, __entry->cbident
+		)
+);
+
+#define DEFINE_CB_ERROR_EVENT(name) \
+	DEFINE_EVENT(nfs4_cb_error_class, nfs_cb_##name, \
+			TP_PROTO( \
+				__be32 xid, \
+				u32 cb_ident \
+			), \
+			TP_ARGS(xid, cb_ident))
+
+DEFINE_CB_ERROR_EVENT(no_clp);
+DEFINE_CB_ERROR_EVENT(badprinc);
+
 DECLARE_EVENT_CLASS(nfs4_open_event,
 		TP_PROTO(
 			const struct nfs_open_context *ctx,


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH v1 4/4] SUNRPC: call_connect_status should handle -EPROTO
  2019-12-23 15:28 [PATCH v1 0/4] NFS/RPC patches for v5.6 Chuck Lever
                   ` (2 preceding siblings ...)
  2019-12-23 15:28 ` [PATCH v1 3/4] NFS4: Report callback authentication errors Chuck Lever
@ 2019-12-23 15:28 ` Chuck Lever
  3 siblings, 0 replies; 5+ messages in thread
From: Chuck Lever @ 2019-12-23 15:28 UTC (permalink / raw)
  To: anna.schumaker, trondmy; +Cc: linux-nfs

The xprtrdma connect logic can return -EPROTO if the underlying
device or network path does not support RDMA. This can happen
after a device removal/insertion.

- When SOFTCONN is set, EPROTO is a permanent error.

- When SOFTCONN is not set, EPROTO is treated as a temporary error.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 net/sunrpc/clnt.c |    1 +
 1 file changed, 1 insertion(+)

diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index a3379765605d..7324b21f923e 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -2130,6 +2130,7 @@ void rpc_force_rebind(struct rpc_clnt *clnt)
 	case -ENETUNREACH:
 	case -EHOSTUNREACH:
 	case -EPIPE:
+	case -EPROTO:
 		xprt_conditional_disconnect(task->tk_rqstp->rq_xprt,
 					    task->tk_rqstp->rq_connect_cookie);
 		if (RPC_IS_SOFTCONN(task))


^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, back to index

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-12-23 15:28 [PATCH v1 0/4] NFS/RPC patches for v5.6 Chuck Lever
2019-12-23 15:28 ` [PATCH v1 1/4] SUNRPC: Capture signalled RPC tasks Chuck Lever
2019-12-23 15:28 ` [PATCH v1 2/4] NFS: Introduce trace events triggered by page writeback errors Chuck Lever
2019-12-23 15:28 ` [PATCH v1 3/4] NFS4: Report callback authentication errors Chuck Lever
2019-12-23 15:28 ` [PATCH v1 4/4] SUNRPC: call_connect_status should handle -EPROTO Chuck Lever

Linux-NFS Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-nfs/0 linux-nfs/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-nfs linux-nfs/ https://lore.kernel.org/linux-nfs \
		linux-nfs@vger.kernel.org
	public-inbox-index linux-nfs

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-nfs


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git