All of lore.kernel.org
 help / color / mirror / Atom feed
From: Chuck Lever <chuck.lever@oracle.com>
To: Anna Schumaker <schumaker.anna@gmail.com>
Cc: Bruce Fields <bfields@redhat.com>,
	Linux NFS Mailing List <linux-nfs@vger.kernel.org>,
	Anna Schumaker <Anna.Schumaker@Netapp.com>
Subject: Re: [PATCH v3 3/6] NFSD: Add READ_PLUS data support
Date: Mon, 3 Aug 2020 15:17:42 -0400	[thread overview]
Message-ID: <5E30AC25-1249-4D91-A2B6-13A38DB2A253@oracle.com> (raw)
In-Reply-To: <20200803165954.1348263-4-Anna.Schumaker@Netapp.com>

Hi Anna-

> On Aug 3, 2020, at 12:59 PM, schumaker.anna@gmail.com wrote:
> 
> From: Anna Schumaker <Anna.Schumaker@Netapp.com>
> 
> This patch adds READ_PLUS support for returning a single
> NFS4_CONTENT_DATA segment to the client. This is basically the same as
> the READ operation, only with the extra information about data segments.
> 
> Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
> ---
> fs/nfsd/nfs4proc.c | 17 ++++++++++
> fs/nfsd/nfs4xdr.c  | 85 ++++++++++++++++++++++++++++++++++++++++++++--
> 2 files changed, 100 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> index a09c35f0f6f0..9630d33211f2 100644
> --- a/fs/nfsd/nfs4proc.c
> +++ b/fs/nfsd/nfs4proc.c
> @@ -2523,6 +2523,16 @@ static inline u32 nfsd4_read_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
> 	return (op_encode_hdr_size + 2 + XDR_QUADLEN(rlen)) * sizeof(__be32);
> }
> 
> +static inline u32 nfsd4_read_plus_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
> +{
> +	u32 maxcount = svc_max_payload(rqstp);
> +	u32 rlen = min(op->u.read.rd_length, maxcount);
> +	/* enough extra xdr space for encoding either a hole or data segment. */
> +	u32 segments = 1 + 2 + 2;
> +
> +	return (op_encode_hdr_size + 2 + segments + XDR_QUADLEN(rlen)) * sizeof(__be32);
> +}
> +
> static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
> {
> 	u32 maxcount = 0, rlen = 0;
> @@ -3059,6 +3069,13 @@ static const struct nfsd4_operation nfsd4_ops[] = {
> 		.op_name = "OP_COPY",
> 		.op_rsize_bop = nfsd4_copy_rsize,
> 	},
> +	[OP_READ_PLUS] = {
> +		.op_func = nfsd4_read,
> +		.op_release = nfsd4_read_release,
> +		.op_name = "OP_READ_PLUS",
> +		.op_rsize_bop = nfsd4_read_plus_rsize,
> +		.op_get_currentstateid = nfsd4_get_readstateid,
> +	},
> 	[OP_SEEK] = {
> 		.op_func = nfsd4_seek,
> 		.op_name = "OP_SEEK",
> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> index 6a1c0a7fae05..1d143bf02b83 100644
> --- a/fs/nfsd/nfs4xdr.c
> +++ b/fs/nfsd/nfs4xdr.c
> @@ -1957,7 +1957,7 @@ static const nfsd4_dec nfsd4_dec_ops[] = {
> 	[OP_LAYOUTSTATS]	= (nfsd4_dec)nfsd4_decode_notsupp,
> 	[OP_OFFLOAD_CANCEL]	= (nfsd4_dec)nfsd4_decode_offload_status,
> 	[OP_OFFLOAD_STATUS]	= (nfsd4_dec)nfsd4_decode_offload_status,
> -	[OP_READ_PLUS]		= (nfsd4_dec)nfsd4_decode_notsupp,
> +	[OP_READ_PLUS]		= (nfsd4_dec)nfsd4_decode_read,
> 	[OP_SEEK]		= (nfsd4_dec)nfsd4_decode_seek,
> 	[OP_WRITE_SAME]		= (nfsd4_dec)nfsd4_decode_notsupp,
> 	[OP_CLONE]		= (nfsd4_dec)nfsd4_decode_clone,
> @@ -4367,6 +4367,87 @@ nfsd4_encode_offload_status(struct nfsd4_compoundres *resp, __be32 nfserr,
> 		return nfserr_resource;
> 	p = xdr_encode_hyper(p, os->count);
> 	*p++ = cpu_to_be32(0);
> +	return nfserr;
> +}
> +
> +static __be32
> +nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp,
> +			    struct nfsd4_read *read,
> +			    unsigned long maxcount,  u32 *eof)
> +{
> +	struct xdr_stream *xdr = &resp->xdr;
> +	struct file *file = read->rd_nf->nf_file;
> +	int starting_len = xdr->buf->len;
> +	__be32 nfserr;
> +	__be32 *p, tmp;
> +	__be64 tmp64;
> +
> +	/* Content type, offset, byte count */
> +	p = xdr_reserve_space(xdr, 4 + 8 + 4);
> +	if (!p)
> +		return nfserr_resource;
> +
> +	read->rd_vlen = xdr_reserve_space_vec(xdr, resp->rqstp->rq_vec, maxcount);
> +	if (read->rd_vlen < 0)
> +		return nfserr_resource;
> +
> +	nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset,
> +			    resp->rqstp->rq_vec, read->rd_vlen, &maxcount, eof);

nfsd4_encode_read() has this:

3904         if (file->f_op->splice_read &&
3905             test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags))
3906                 nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount);
3907         else
3908                 nfserr = nfsd4_encode_readv(resp, read, file, maxcount);

So READ_PLUS never uses ->splice_read. 

readv is known to be less efficient than splice. Is there a measurable
server performance impact (either latency or CPU utilization) when
reading a file with no holes?


> +	if (nfserr)
> +		return nfserr;
> +	if (svc_encode_read_payload(resp->rqstp, starting_len + 16, maxcount))
> +		return nfserr_io;

Not sure you want a read_payload call here. This is to notify the
transport that there is a section of the message that can be sent
via RDMA, but READ_PLUS has no DDP-eligible data items; especially
not holes!

Although, the call is not likely to be much more than a no-op,
since clients won't be providing any Write chunks for READ_PLUS.


> +
> +	tmp = htonl(NFS4_CONTENT_DATA);
> +	write_bytes_to_xdr_buf(xdr->buf, starting_len,      &tmp,   4);
> +	tmp64 = cpu_to_be64(read->rd_offset);
> +	write_bytes_to_xdr_buf(xdr->buf, starting_len + 4,  &tmp64, 8);
> +	tmp = htonl(maxcount);
> +	write_bytes_to_xdr_buf(xdr->buf, starting_len + 12, &tmp,   4);
> +	return nfs_ok;
> +}
> +
> +static __be32
> +nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr,
> +		       struct nfsd4_read *read)
> +{
> +	unsigned long maxcount;
> +	struct xdr_stream *xdr = &resp->xdr;
> +	struct file *file;
> +	int starting_len = xdr->buf->len;
> +	int segments = 0;
> +	__be32 *p, tmp;
> +	u32 eof;
> +
> +	if (nfserr)
> +		return nfserr;
> +	file = read->rd_nf->nf_file;
> +
> +	/* eof flag, segment count */
> +	p = xdr_reserve_space(xdr, 4 + 4);
> +	if (!p)
> +		return nfserr_resource;
> +	xdr_commit_encode(xdr);
> +
> +	maxcount = svc_max_payload(resp->rqstp);
> +	maxcount = min_t(unsigned long, maxcount,
> +			 (xdr->buf->buflen - xdr->buf->len));
> +	maxcount = min_t(unsigned long, maxcount, read->rd_length);
> +
> +	eof = read->rd_offset >= i_size_read(file_inode(file));
> +	if (!eof) {
> +		nfserr = nfsd4_encode_read_plus_data(resp, read, maxcount, &eof);
> +		segments++;
> +	}
> +
> +	if (nfserr)
> +		xdr_truncate_encode(xdr, starting_len);
> +	else {
> +		tmp = htonl(eof);
> +		write_bytes_to_xdr_buf(xdr->buf, starting_len,     &tmp, 4);
> +		tmp = htonl(segments);
> +		write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4);
> +	}
> 
> 	return nfserr;
> }
> @@ -4509,7 +4590,7 @@ static const nfsd4_enc nfsd4_enc_ops[] = {
> 	[OP_LAYOUTSTATS]	= (nfsd4_enc)nfsd4_encode_noop,
> 	[OP_OFFLOAD_CANCEL]	= (nfsd4_enc)nfsd4_encode_noop,
> 	[OP_OFFLOAD_STATUS]	= (nfsd4_enc)nfsd4_encode_offload_status,
> -	[OP_READ_PLUS]		= (nfsd4_enc)nfsd4_encode_noop,
> +	[OP_READ_PLUS]		= (nfsd4_enc)nfsd4_encode_read_plus,
> 	[OP_SEEK]		= (nfsd4_enc)nfsd4_encode_seek,
> 	[OP_WRITE_SAME]		= (nfsd4_enc)nfsd4_encode_noop,
> 	[OP_CLONE]		= (nfsd4_enc)nfsd4_encode_noop,
> -- 
> 2.27.0
> 

--
Chuck Lever




  reply	other threads:[~2020-08-03 19:19 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-08-03 16:59 [PATCH v3 0/6] NFSD: Add support for the v4.2 READ_PLUS operation schumaker.anna
2020-08-03 16:59 ` [PATCH v3 1/6] SUNRPC: Implement xdr_reserve_space_vec() schumaker.anna
2020-08-03 19:19   ` Chuck Lever
2020-08-03 19:37     ` Anna Schumaker
2020-08-03 19:44       ` Chuck Lever
2020-08-03 16:59 ` [PATCH v3 2/6] NFSD: nfsd4_encode_readv() can use xdr_reserve_space_vec() schumaker.anna
2020-08-03 16:59 ` [PATCH v3 3/6] NFSD: Add READ_PLUS data support schumaker.anna
2020-08-03 19:17   ` Chuck Lever [this message]
2020-08-03 19:41     ` Anna Schumaker
2020-08-03 16:59 ` [PATCH v3 4/6] NFSD: Add READ_PLUS hole segment encoding schumaker.anna
2020-08-03 16:59 ` [PATCH v3 5/6] NFSD: Return both a hole and a data segment schumaker.anna
2020-08-03 16:59 ` [PATCH v3 6/6] NFSD: Encode a full READ_PLUS reply schumaker.anna
2020-08-03 19:26 ` [PATCH v3 0/6] NFSD: Add support for the v4.2 READ_PLUS operation Chuck Lever
2020-08-03 19:36   ` Anna Schumaker

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5E30AC25-1249-4D91-A2B6-13A38DB2A253@oracle.com \
    --to=chuck.lever@oracle.com \
    --cc=Anna.Schumaker@Netapp.com \
    --cc=bfields@redhat.com \
    --cc=linux-nfs@vger.kernel.org \
    --cc=schumaker.anna@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.