Linux-NFS Archive on lore.kernel.org
 help / color / Atom feed
From: Chuck Lever <chuck.lever@oracle.com>
To: Anna Schumaker <schumaker.anna@gmail.com>
Cc: Trond.Myklebust@hammerspace.com,
	Linux NFS Mailing List <linux-nfs@vger.kernel.org>,
	Anna Schumaker <Anna.Schumaker@Netapp.com>
Subject: Re: [PATCH v2 4/6] NFS: Add READ_PLUS data segment support
Date: Fri, 14 Feb 2020 17:28:37 -0500
Message-ID: <AAF85957-285A-42BF-993D-7EB4843E2ED2@oracle.com> (raw)
In-Reply-To: <20200214211227.407836-5-Anna.Schumaker@Netapp.com>



> On Feb 14, 2020, at 4:12 PM, schumaker.anna@gmail.com wrote:
> 
> From: Anna Schumaker <Anna.Schumaker@Netapp.com>
> 
> This patch adds client support for decoding a single NFS4_CONTENT_DATA
> segment returned by the server. This is the simplest implementation
> possible, since it does not account for any hole segments in the reply.
> 
> Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
> ---
> fs/nfs/nfs42xdr.c         | 138 ++++++++++++++++++++++++++++++++++++++
> fs/nfs/nfs4proc.c         |  43 +++++++++++-
> fs/nfs/nfs4xdr.c          |   1 +
> include/linux/nfs4.h      |   2 +-
> include/linux/nfs_fs_sb.h |   1 +
> include/linux/nfs_xdr.h   |   2 +-
> 6 files changed, 182 insertions(+), 5 deletions(-)
> 
> diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
> index c03f3246d6c5..bf118ecabe2c 100644
> --- a/fs/nfs/nfs42xdr.c
> +++ b/fs/nfs/nfs42xdr.c
> @@ -45,6 +45,15 @@
> #define encode_deallocate_maxsz		(op_encode_hdr_maxsz + \
> 					 encode_fallocate_maxsz)
> #define decode_deallocate_maxsz		(op_decode_hdr_maxsz)
> +#define encode_read_plus_maxsz		(op_encode_hdr_maxsz + \
> +					 encode_stateid_maxsz + 3)
> +#define NFS42_READ_PLUS_SEGMENT_SIZE	(1 /* data_content4 */ + \
> +					 2 /* data_info4.di_offset */ + \
> +					 2 /* data_info4.di_length */)
> +#define decode_read_plus_maxsz		(op_decode_hdr_maxsz + \
> +					 1 /* rpr_eof */ + \
> +					 1 /* rpr_contents count */ + \
> +					 NFS42_READ_PLUS_SEGMENT_SIZE)
> #define encode_seek_maxsz		(op_encode_hdr_maxsz + \
> 					 encode_stateid_maxsz + \
> 					 2 /* offset */ + \
> @@ -128,6 +137,14 @@
> 					 decode_putfh_maxsz + \
> 					 decode_deallocate_maxsz + \
> 					 decode_getattr_maxsz)
> +#define NFS4_enc_read_plus_sz		(compound_encode_hdr_maxsz + \
> +					 encode_sequence_maxsz + \
> +					 encode_putfh_maxsz + \
> +					 encode_read_plus_maxsz)
> +#define NFS4_dec_read_plus_sz		(compound_decode_hdr_maxsz + \
> +					 decode_sequence_maxsz + \
> +					 decode_putfh_maxsz + \
> +					 decode_read_plus_maxsz)
> #define NFS4_enc_seek_sz		(compound_encode_hdr_maxsz + \
> 					 encode_sequence_maxsz + \
> 					 encode_putfh_maxsz + \
> @@ -252,6 +269,16 @@ static void encode_deallocate(struct xdr_stream *xdr,
> 	encode_fallocate(xdr, args);
> }
> 
> +static void encode_read_plus(struct xdr_stream *xdr,
> +			     const struct nfs_pgio_args *args,
> +			     struct compound_hdr *hdr)
> +{
> +	encode_op_hdr(xdr, OP_READ_PLUS, decode_read_plus_maxsz, hdr);
> +	encode_nfs4_stateid(xdr, &args->stateid);
> +	encode_uint64(xdr, args->offset);
> +	encode_uint32(xdr, args->count);
> +}
> +
> static void encode_seek(struct xdr_stream *xdr,
> 			const struct nfs42_seek_args *args,
> 			struct compound_hdr *hdr)
> @@ -446,6 +473,29 @@ static void nfs4_xdr_enc_deallocate(struct rpc_rqst *req,
> 	encode_nops(&hdr);
> }
> 
> +/*
> + * Encode READ_PLUS request
> + */
> +static void nfs4_xdr_enc_read_plus(struct rpc_rqst *req,
> +				   struct xdr_stream *xdr,
> +				   const void *data)
> +{
> +	const struct nfs_pgio_args *args = data;
> +	struct compound_hdr hdr = {
> +		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
> +	};
> +
> +	encode_compound_hdr(xdr, req, &hdr);
> +	encode_sequence(xdr, &args->seq_args, &hdr);
> +	encode_putfh(xdr, args->fh, &hdr);
> +	encode_read_plus(xdr, args, &hdr);
> +
> +	rpc_prepare_reply_pages(req, args->pages, args->pgbase,
> +				args->count, hdr.replen);
> +	req->rq_rcv_buf.flags |= XDRBUF_READ;

IMO this line is incorrect.

RFC 8267 Section 6.1 does not list any part of the result of READ_PLUS
as DDP-eligible. There's no way for a client to know how to set up
Write chunks, unless it knows exactly where the file's holes are in
advance. Even then... racy.

Just curious, have you tried READ_PLUS with proto=rdma ?


> +	encode_nops(&hdr);
> +}
> +
> /*
>  * Encode SEEK request
>  */
> @@ -694,6 +744,67 @@ static int decode_deallocate(struct xdr_stream *xdr, struct nfs42_falloc_res *re
> 	return decode_op_hdr(xdr, OP_DEALLOCATE);
> }
> 
> +static uint32_t decode_read_plus_data(struct xdr_stream *xdr, struct nfs_pgio_res *res,
> +				      uint32_t *eof)
> +{
> +	__be32 *p;
> +	uint32_t count, recvd;
> +	uint64_t offset;
> +
> +	p = xdr_inline_decode(xdr, 8 + 4);
> +	if (unlikely(!p))
> +		return -EIO;
> +
> +	p = xdr_decode_hyper(p, &offset);
> +	count = be32_to_cpup(p);
> +	if (count == 0)
> +		return 0;
> +
> +	recvd = xdr_read_pages(xdr, count);
> +	if (count > recvd) {
> +		dprintk("NFS: server cheating in read reply: "
> +				"count %u > recvd %u\n", count, recvd);
> +		count = recvd;
> +		*eof = 0;
> +	}
> +
> +	return count;
> +}
> +
> +static int decode_read_plus(struct xdr_stream *xdr, struct nfs_pgio_res *res)
> +{
> +	__be32 *p;
> +	uint32_t count, eof, segments, type;
> +	int status;
> +
> +	status = decode_op_hdr(xdr, OP_READ_PLUS);
> +	if (status)
> +		return status;
> +
> +	p = xdr_inline_decode(xdr, 4 + 4);
> +	if (unlikely(!p))
> +		return -EIO;
> +
> +	eof = be32_to_cpup(p++);
> +	segments = be32_to_cpup(p++);
> +	if (segments == 0)
> +		return 0;
> +
> +	p = xdr_inline_decode(xdr, 4);
> +	if (unlikely(!p))
> +		return -EIO;
> +
> +	type = be32_to_cpup(p++);
> +	if (type == NFS4_CONTENT_DATA)
> +		count = decode_read_plus_data(xdr, res, &eof);
> +	else
> +		return -EINVAL;
> +
> +	res->eof = eof;
> +	res->count = count;
> +	return 0;
> +}
> +
> static int decode_seek(struct xdr_stream *xdr, struct nfs42_seek_res *res)
> {
> 	int status;
> @@ -870,6 +981,33 @@ static int nfs4_xdr_dec_deallocate(struct rpc_rqst *rqstp,
> 	return status;
> }
> 
> +/*
> + * Decode READ_PLUS request
> + */
> +static int nfs4_xdr_dec_read_plus(struct rpc_rqst *rqstp,
> +				  struct xdr_stream *xdr,
> +				  void *data)
> +{
> +	struct nfs_pgio_res *res = data;
> +	struct compound_hdr hdr;
> +	int status;
> +
> +	status = decode_compound_hdr(xdr, &hdr);
> +	if (status)
> +		goto out;
> +	status = decode_sequence(xdr, &res->seq_res, rqstp);
> +	if (status)
> +		goto out;
> +	status = decode_putfh(xdr);
> +	if (status)
> +		goto out;
> +	status = decode_read_plus(xdr, res);
> +	if (!status)
> +		status = res->count;
> +out:
> +	return status;
> +}
> +
> /*
>  * Decode SEEK request
>  */
> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> index 95d07a3dc5d1..ed3ec8c36273 100644
> --- a/fs/nfs/nfs4proc.c
> +++ b/fs/nfs/nfs4proc.c
> @@ -69,6 +69,10 @@
> 
> #include "nfs4trace.h"
> 
> +#ifdef CONFIG_NFS_V4_2
> +#include "nfs42.h"
> +#endif /* CONFIG_NFS_V4_2 */
> +
> #define NFSDBG_FACILITY		NFSDBG_PROC
> 
> #define NFS4_BITMASK_SZ		3
> @@ -5199,28 +5203,60 @@ static bool nfs4_read_stateid_changed(struct rpc_task *task,
> 	return true;
> }
> 
> +static bool nfs4_read_plus_not_supported(struct rpc_task *task,
> +					 struct nfs_pgio_header *hdr)
> +{
> +	struct nfs_server *server = NFS_SERVER(hdr->inode);
> +	struct rpc_message *msg = &task->tk_msg;
> +
> +	if (msg->rpc_proc == &nfs4_procedures[NFSPROC4_CLNT_READ_PLUS] &&
> +	    server->caps & NFS_CAP_READ_PLUS && task->tk_status == -ENOTSUPP) {
> +		server->caps &= ~NFS_CAP_READ_PLUS;
> +		msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
> +		rpc_restart_call_prepare(task);
> +		return true;
> +	}
> +	return false;
> +}
> +
> static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
> {
> -
> 	dprintk("--> %s\n", __func__);
> 
> 	if (!nfs4_sequence_done(task, &hdr->res.seq_res))
> 		return -EAGAIN;
> 	if (nfs4_read_stateid_changed(task, &hdr->args))
> 		return -EAGAIN;
> +	if (nfs4_read_plus_not_supported(task, hdr))
> +		return -EAGAIN;
> 	if (task->tk_status > 0)
> 		nfs_invalidate_atime(hdr->inode);
> 	return hdr->pgio_done_cb ? hdr->pgio_done_cb(task, hdr) :
> 				    nfs4_read_done_cb(task, hdr);
> }
> 
> +#ifdef CONFIG_NFS_V4_2
> +static void nfs42_read_plus_support(struct nfs_server *server, struct rpc_message *msg)
> +{
> +	if (server->caps & NFS_CAP_READ_PLUS)
> +		msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ_PLUS];
> +	else
> +		msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
> +}
> +#else
> +static void nfs42_read_plus_support(struct nfs_server *server, struct rpc_message *msg)
> +{
> +	msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
> +}
> +#endif /* CONFIG_NFS_V4_2 */
> +
> static void nfs4_proc_read_setup(struct nfs_pgio_header *hdr,
> 				 struct rpc_message *msg)
> {
> 	hdr->timestamp   = jiffies;
> 	if (!hdr->pgio_done_cb)
> 		hdr->pgio_done_cb = nfs4_read_done_cb;
> -	msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
> +	nfs42_read_plus_support(NFS_SERVER(hdr->inode), msg);
> 	nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 0, 0);
> }
> 
> @@ -9970,7 +10006,8 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = {
> 		| NFS_CAP_SEEK
> 		| NFS_CAP_LAYOUTSTATS
> 		| NFS_CAP_CLONE
> -		| NFS_CAP_LAYOUTERROR,
> +		| NFS_CAP_LAYOUTERROR
> +		| NFS_CAP_READ_PLUS,
> 	.init_client = nfs41_init_client,
> 	.shutdown_client = nfs41_shutdown_client,
> 	.match_stateid = nfs41_match_stateid,
> diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
> index 47817ef0aadb..68b2917d0537 100644
> --- a/fs/nfs/nfs4xdr.c
> +++ b/fs/nfs/nfs4xdr.c
> @@ -7584,6 +7584,7 @@ const struct rpc_procinfo nfs4_procedures[] = {
> 	PROC42(COPY_NOTIFY,	enc_copy_notify,	dec_copy_notify),
> 	PROC(LOOKUPP,		enc_lookupp,		dec_lookupp),
> 	PROC42(LAYOUTERROR,	enc_layouterror,	dec_layouterror),
> +	PROC42(READ_PLUS,	enc_read_plus,		dec_read_plus),
> };
> 
> static unsigned int nfs_version4_counts[ARRAY_SIZE(nfs4_procedures)];
> diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
> index 82d8fb422092..c1eeef52545c 100644
> --- a/include/linux/nfs4.h
> +++ b/include/linux/nfs4.h
> @@ -540,8 +540,8 @@ enum {
> 
> 	NFSPROC4_CLNT_LOOKUPP,
> 	NFSPROC4_CLNT_LAYOUTERROR,
> -
> 	NFSPROC4_CLNT_COPY_NOTIFY,
> +	NFSPROC4_CLNT_READ_PLUS,
> };
> 
> /* nfs41 types */
> diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
> index 465fa98258a3..11248c5a7b24 100644
> --- a/include/linux/nfs_fs_sb.h
> +++ b/include/linux/nfs_fs_sb.h
> @@ -281,5 +281,6 @@ struct nfs_server {
> #define NFS_CAP_OFFLOAD_CANCEL	(1U << 25)
> #define NFS_CAP_LAYOUTERROR	(1U << 26)
> #define NFS_CAP_COPY_NOTIFY	(1U << 27)
> +#define NFS_CAP_READ_PLUS	(1U << 28)
> 
> #endif
> diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
> index 94c77ed55ce1..8efbf3d8b263 100644
> --- a/include/linux/nfs_xdr.h
> +++ b/include/linux/nfs_xdr.h
> @@ -655,7 +655,7 @@ struct nfs_pgio_args {
> struct nfs_pgio_res {
> 	struct nfs4_sequence_res	seq_res;
> 	struct nfs_fattr *	fattr;
> -	__u32			count;
> +	__u64			count;
> 	__u32			op_status;
> 	union {
> 		struct {
> -- 
> 2.25.0
> 

--
Chuck Lever




  reply index

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-02-14 21:12 [PATCH v2 0/6] NFS: Add support for the v4.2 READ_PLUS operation schumaker.anna
2020-02-14 21:12 ` [PATCH v2 1/6] SUNRPC: Split out a function for setting current page schumaker.anna
2020-02-14 21:12 ` [PATCH v2 2/6] SUNRPC: Add the ability to expand holes in data pages schumaker.anna
2020-02-14 21:12 ` [PATCH v2 3/6] SUNRPC: Add the ability to shift data to a specific offset schumaker.anna
2020-02-14 21:12 ` [PATCH v2 4/6] NFS: Add READ_PLUS data segment support schumaker.anna
2020-02-14 22:28   ` Chuck Lever [this message]
2020-02-20 14:42     ` Anna Schumaker
2020-02-20 14:55       ` Chuck Lever
2020-02-20 18:28         ` Anna Schumaker
2020-02-20 18:30           ` Chuck Lever
2020-02-20 18:35             ` Anna Schumaker
2020-02-20 18:54               ` Chuck Lever
2020-02-14 21:12 ` [PATCH v2 5/6] NFS: Add READ_PLUS hole segment decoding schumaker.anna
2020-02-14 21:12 ` [PATCH v2 6/6] NFS: Decode multiple READ_PLUS segments schumaker.anna
2020-02-20 17:40 ` [PATCH v2 0/6] NFS: Add support for the v4.2 READ_PLUS operation Chuck Lever
2020-02-20 18:25   ` Anna Schumaker
2020-02-20 18:29     ` Chuck Lever

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=AAF85957-285A-42BF-993D-7EB4843E2ED2@oracle.com \
    --to=chuck.lever@oracle.com \
    --cc=Anna.Schumaker@Netapp.com \
    --cc=Trond.Myklebust@hammerspace.com \
    --cc=linux-nfs@vger.kernel.org \
    --cc=schumaker.anna@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Linux-NFS Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-nfs/0 linux-nfs/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-nfs linux-nfs/ https://lore.kernel.org/linux-nfs \
		linux-nfs@vger.kernel.org
	public-inbox-index linux-nfs

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-nfs


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git