Linux-NFS Archive on lore.kernel.org
 help / color / Atom feed
From: Chuck Lever <chuck.lever@oracle.com>
To: anna.schumaker@netapp.com
Cc: linux-rdma@vger.kernel.org, linux-nfs@vger.kernel.org
Subject: [PATCH v1 07/23] SUNRPC: Add trace event that reports reply page vector alignment
Date: Mon, 11 Feb 2019 11:24:16 -0500
Message-ID: <20190211162416.2817.25507.stgit@manet.1015granger.net> (raw)
In-Reply-To: <20190211161920.2817.20881.stgit@manet.1015granger.net>

We don't want READ payloads that are partially in the head iovec and
in the page buffer because this requires pull-up, which can be
expensive.

The NFS/RPC client tries hard to predict the size of the head iovec
so that the incoming READ data payload lands only in the page
vector, but it doesn't always get it right. To help diagnose such
problems, add a trace point in the logic that decodes READ-like
operations that reports whether pull-up is being done.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/trace/events/sunrpc.h |   59 +++++++++++++++++++++++++++++++++++++++++
 net/sunrpc/xdr.c              |   33 +++++++++++++++++++----
 2 files changed, 86 insertions(+), 6 deletions(-)

diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index fbc41b8..6276508 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -321,6 +321,65 @@
 	)
 );
 
+TRACE_EVENT(rpc_xdr_alignment,
+	TP_PROTO(
+		const struct xdr_stream *xdr,
+		size_t offset,
+		unsigned int copied
+	),
+
+	TP_ARGS(xdr, offset, copied),
+
+	TP_STRUCT__entry(
+		__field(unsigned int, task_id)
+		__field(unsigned int, client_id)
+		__field(int, version)
+		__field(size_t, offset)
+		__field(unsigned int, copied)
+		__field(const void *, head_base)
+		__field(size_t, head_len)
+		__field(const void *, tail_base)
+		__field(size_t, tail_len)
+		__field(unsigned int, page_len)
+		__field(unsigned int, len)
+		__string(progname,
+			 xdr->rqst->rq_task->tk_client->cl_program->name)
+		__string(procedure,
+			 xdr->rqst->rq_task->tk_msg.rpc_proc->p_name)
+	),
+
+	TP_fast_assign(
+		const struct rpc_task *task = xdr->rqst->rq_task;
+
+		__entry->task_id = task->tk_pid;
+		__entry->client_id = task->tk_client->cl_clid;
+		__assign_str(progname,
+			     task->tk_client->cl_program->name)
+		__entry->version = task->tk_client->cl_vers;
+		__assign_str(procedure, task->tk_msg.rpc_proc->p_name)
+
+		__entry->offset = offset;
+		__entry->copied = copied;
+		__entry->head_base = xdr->buf->head[0].iov_base,
+		__entry->head_len = xdr->buf->head[0].iov_len,
+		__entry->page_len = xdr->buf->page_len,
+		__entry->tail_base = xdr->buf->tail[0].iov_base,
+		__entry->tail_len = xdr->buf->tail[0].iov_len,
+		__entry->len = xdr->buf->len;
+	),
+
+	TP_printk(
+		"task:%u@%u %sv%d %s offset=%zu copied=%u xdr=[%p,%zu]/%u/[%p,%zu]/%u\n",
+		__entry->task_id, __entry->client_id,
+		__get_str(progname), __entry->version, __get_str(procedure),
+		__entry->offset, __entry->copied,
+		__entry->head_base, __entry->head_len,
+		__entry->page_len,
+		__entry->tail_base, __entry->tail_len,
+		__entry->len
+	)
+);
+
 /*
  * First define the enums in the below macros to be exported to userspace
  * via TRACE_DEFINE_ENUM().
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 6d0b615..5f0aa53 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -347,13 +347,15 @@ __be32 *xdr_encode_opaque(__be32 *p, const void *ptr, unsigned int nbytes)
  * 'len' bytes. The extra data is not lost, but is instead
  * moved into the inlined pages and/or the tail.
  */
-static void
+static unsigned int
 xdr_shrink_bufhead(struct xdr_buf *buf, size_t len)
 {
 	struct kvec *head, *tail;
 	size_t copy, offs;
 	unsigned int pglen = buf->page_len;
+	unsigned int result;
 
+	result = 0;
 	tail = buf->tail;
 	head = buf->head;
 
@@ -367,6 +369,7 @@ __be32 *xdr_encode_opaque(__be32 *p, const void *ptr, unsigned int nbytes)
 			copy = tail->iov_len - len;
 			memmove((char *)tail->iov_base + len,
 					tail->iov_base, copy);
+			result += copy;
 		}
 		/* Copy from the inlined pages into the tail */
 		copy = len;
@@ -377,11 +380,13 @@ __be32 *xdr_encode_opaque(__be32 *p, const void *ptr, unsigned int nbytes)
 			copy = 0;
 		else if (copy > tail->iov_len - offs)
 			copy = tail->iov_len - offs;
-		if (copy != 0)
+		if (copy != 0) {
 			_copy_from_pages((char *)tail->iov_base + offs,
 					buf->pages,
 					buf->page_base + pglen + offs - len,
 					copy);
+			result += copy;
+		}
 		/* Do we also need to copy data from the head into the tail ? */
 		if (len > pglen) {
 			offs = copy = len - pglen;
@@ -391,6 +396,7 @@ __be32 *xdr_encode_opaque(__be32 *p, const void *ptr, unsigned int nbytes)
 					(char *)head->iov_base +
 					head->iov_len - offs,
 					copy);
+			result += copy;
 		}
 	}
 	/* Now handle pages */
@@ -406,12 +412,15 @@ __be32 *xdr_encode_opaque(__be32 *p, const void *ptr, unsigned int nbytes)
 		_copy_to_pages(buf->pages, buf->page_base,
 				(char *)head->iov_base + head->iov_len - len,
 				copy);
+		result += copy;
 	}
 	head->iov_len -= len;
 	buf->buflen -= len;
 	/* Have we truncated the message? */
 	if (buf->len > buf->buflen)
 		buf->len = buf->buflen;
+
+	return result;
 }
 
 /**
@@ -423,14 +432,16 @@ __be32 *xdr_encode_opaque(__be32 *p, const void *ptr, unsigned int nbytes)
  * 'len' bytes. The extra data is not lost, but is instead
  * moved into the tail.
  */
-static void
+static unsigned int
 xdr_shrink_pagelen(struct xdr_buf *buf, size_t len)
 {
 	struct kvec *tail;
 	size_t copy;
 	unsigned int pglen = buf->page_len;
 	unsigned int tailbuf_len;
+	unsigned int result;
 
+	result = 0;
 	tail = buf->tail;
 	BUG_ON (len > pglen);
 
@@ -448,18 +459,22 @@ __be32 *xdr_encode_opaque(__be32 *p, const void *ptr, unsigned int nbytes)
 		if (tail->iov_len > len) {
 			char *p = (char *)tail->iov_base + len;
 			memmove(p, tail->iov_base, tail->iov_len - len);
+			result += tail->iov_len - len;
 		} else
 			copy = tail->iov_len;
 		/* Copy from the inlined pages into the tail */
 		_copy_from_pages((char *)tail->iov_base,
 				buf->pages, buf->page_base + pglen - len,
 				copy);
+		result += copy;
 	}
 	buf->page_len -= len;
 	buf->buflen -= len;
 	/* Have we truncated the message? */
 	if (buf->len > buf->buflen)
 		buf->len = buf->buflen;
+
+	return result;
 }
 
 void
@@ -959,13 +974,17 @@ static unsigned int xdr_align_pages(struct xdr_stream *xdr, unsigned int len)
 	struct kvec *iov;
 	unsigned int nwords = XDR_QUADLEN(len);
 	unsigned int cur = xdr_stream_pos(xdr);
+	unsigned int copied, offset;
 
 	if (xdr->nwords == 0)
 		return 0;
+
 	/* Realign pages to current pointer position */
-	iov  = buf->head;
+	iov = buf->head;
 	if (iov->iov_len > cur) {
-		xdr_shrink_bufhead(buf, iov->iov_len - cur);
+		offset = iov->iov_len - cur;
+		copied = xdr_shrink_bufhead(buf, offset);
+		trace_rpc_xdr_alignment(xdr, offset, copied);
 		xdr->nwords = XDR_QUADLEN(buf->len - cur);
 	}
 
@@ -977,7 +996,9 @@ static unsigned int xdr_align_pages(struct xdr_stream *xdr, unsigned int len)
 		len = buf->page_len;
 	else if (nwords < xdr->nwords) {
 		/* Truncate page data and move it into the tail */
-		xdr_shrink_pagelen(buf, buf->page_len - len);
+		offset = buf->page_len - len;
+		copied = xdr_shrink_pagelen(buf, offset);
+		trace_rpc_xdr_alignment(xdr, offset, copied);
 		xdr->nwords = XDR_QUADLEN(buf->len - cur);
 	}
 	return len;


  parent reply index

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-02-11 16:23 [PATCH v1 00/23] NFS client patches for v5.1 (complete) Chuck Lever
2019-02-11 16:23 ` [PATCH v1 01/23] xprtrdma: Fix sparse warnings Chuck Lever
2019-02-11 16:23 ` [PATCH v1 02/23] xprtrdma: Check inline size before providing a Write chunk Chuck Lever
2019-02-11 16:23 ` [PATCH v1 03/23] xprtrdma: Reduce the doorbell rate (Receive) Chuck Lever
2019-02-11 16:24 ` [PATCH v1 04/23] SUNRPC: Display symbolic flag names in RPC trace events Chuck Lever
2019-02-11 16:24 ` [PATCH v1 05/23] SUNRPC: Add xdr_stream::rqst field Chuck Lever
2019-02-11 16:24 ` [PATCH v1 06/23] SUNRPC: Add XDR overflow trace event Chuck Lever
2019-02-11 16:24 ` Chuck Lever [this message]
2019-02-11 16:24 ` [PATCH v1 08/23] NFS: Remove print_overflow_msg() Chuck Lever
2019-02-11 16:24 ` [PATCH v1 09/23] NFS: Add trace events to report non-zero NFS status codes Chuck Lever
2019-02-11 16:24 ` [PATCH v1 10/23] SUNRPC: Remove some dprintk() call sites from auth functions Chuck Lever
2019-02-11 16:24 ` [PATCH v1 11/23] SUNRPC: Remove rpc_xprt::tsh_size Chuck Lever
2019-02-11 16:24 ` [PATCH v1 12/23] SUNRPC: Add build option to disable support for insecure enctypes Chuck Lever
2019-02-11 16:24 ` [PATCH v1 13/23] SUNRPC: Use struct xdr_stream when constructing RPC Call header Chuck Lever
2019-02-11 16:24 ` [PATCH v1 14/23] SUNRPC: Clean up rpc_verify_header() Chuck Lever
2019-02-11 16:24 ` [PATCH v1 15/23] SUNRPC: Use struct xdr_stream when decoding RPC Reply header Chuck Lever
2019-02-11 16:25 ` [PATCH v1 16/23] SUNRPC: Introduce trace points in rpc_auth_gss.ko Chuck Lever
2019-02-11 16:25 ` [PATCH v1 17/23] SUNRPC: Remove xdr_buf_trim() Chuck Lever
2019-02-11 16:25 ` [PATCH v1 18/23] SUNRPC: Add SPDX IDs to some net/sunrpc/auth_gss/ files Chuck Lever
2019-02-11 16:25 ` [PATCH v1 19/23] SUNRPC: Introduce rpc_prepare_reply_pages() Chuck Lever
2019-02-11 16:25 ` [PATCH v1 20/23] NFS: Account for XDR pad of buf->pages Chuck Lever
2019-04-05 17:36   ` Olga Kornievskaia
2019-04-05 17:51     ` Chuck Lever
2019-04-05 19:17       ` Olga Kornievskaia
2019-04-05 19:23         ` Chuck Lever
2019-04-05 19:27           ` Olga Kornievskaia
2019-04-05 19:42             ` Chuck Lever
2019-04-08 14:36               ` Olga Kornievskaia
2019-04-08 14:43                 ` Chuck Lever
2019-04-08 15:21                   ` Olga Kornievskaia
2019-04-08 15:26                     ` Olga Kornievskaia
2019-04-08 15:50                       ` Olga Kornievskaia
2019-04-08 16:02                         ` Olga Kornievskaia
2019-04-08 16:29                         ` Chuck Lever
2019-02-11 16:25 ` [PATCH v1 21/23] SUNRPC: Make AUTH_SYS and AUTH_NULL set au_verfsize Chuck Lever
2019-02-11 16:25 ` [PATCH v1 22/23] SUNRPC: Add rpc_auth::au_ralign field Chuck Lever
2019-02-11 16:25 ` [PATCH v1 23/23] SUNRPC: Use au_rslack when computing reply buffer size Chuck Lever

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190211162416.2817.25507.stgit@manet.1015granger.net \
    --to=chuck.lever@oracle.com \
    --cc=anna.schumaker@netapp.com \
    --cc=linux-nfs@vger.kernel.org \
    --cc=linux-rdma@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Linux-NFS Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-nfs/0 linux-nfs/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-nfs linux-nfs/ https://lore.kernel.org/linux-nfs \
		linux-nfs@vger.kernel.org linux-nfs@archiver.kernel.org
	public-inbox-index linux-nfs


Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-nfs


AGPL code for this site: git clone https://public-inbox.org/ public-inbox