From: Chuck Lever <chuck.lever@oracle.com>
To: Anna Schumaker <schumaker.anna@gmail.com>
Cc: Trond.Myklebust@hammerspace.com,
Linux NFS Mailing List <linux-nfs@vger.kernel.org>,
Anna Schumaker <Anna.Schumaker@Netapp.com>
Subject: Re: [PATCH v2 4/6] NFS: Add READ_PLUS data segment support
Date: Fri, 14 Feb 2020 17:28:37 -0500 [thread overview]
Message-ID: <AAF85957-285A-42BF-993D-7EB4843E2ED2@oracle.com> (raw)
In-Reply-To: <20200214211227.407836-5-Anna.Schumaker@Netapp.com>
> On Feb 14, 2020, at 4:12 PM, schumaker.anna@gmail.com wrote:
>
> From: Anna Schumaker <Anna.Schumaker@Netapp.com>
>
> This patch adds client support for decoding a single NFS4_CONTENT_DATA
> segment returned by the server. This is the simplest implementation
> possible, since it does not account for any hole segments in the reply.
>
> Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
> ---
> fs/nfs/nfs42xdr.c | 138 ++++++++++++++++++++++++++++++++++++++
> fs/nfs/nfs4proc.c | 43 +++++++++++-
> fs/nfs/nfs4xdr.c | 1 +
> include/linux/nfs4.h | 2 +-
> include/linux/nfs_fs_sb.h | 1 +
> include/linux/nfs_xdr.h | 2 +-
> 6 files changed, 182 insertions(+), 5 deletions(-)
>
> diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
> index c03f3246d6c5..bf118ecabe2c 100644
> --- a/fs/nfs/nfs42xdr.c
> +++ b/fs/nfs/nfs42xdr.c
> @@ -45,6 +45,15 @@
> #define encode_deallocate_maxsz (op_encode_hdr_maxsz + \
> encode_fallocate_maxsz)
> #define decode_deallocate_maxsz (op_decode_hdr_maxsz)
> +#define encode_read_plus_maxsz (op_encode_hdr_maxsz + \
> + encode_stateid_maxsz + 3)
> +#define NFS42_READ_PLUS_SEGMENT_SIZE (1 /* data_content4 */ + \
> + 2 /* data_info4.di_offset */ + \
> + 2 /* data_info4.di_length */)
> +#define decode_read_plus_maxsz (op_decode_hdr_maxsz + \
> + 1 /* rpr_eof */ + \
> + 1 /* rpr_contents count */ + \
> + NFS42_READ_PLUS_SEGMENT_SIZE)
> #define encode_seek_maxsz (op_encode_hdr_maxsz + \
> encode_stateid_maxsz + \
> 2 /* offset */ + \
> @@ -128,6 +137,14 @@
> decode_putfh_maxsz + \
> decode_deallocate_maxsz + \
> decode_getattr_maxsz)
> +#define NFS4_enc_read_plus_sz (compound_encode_hdr_maxsz + \
> + encode_sequence_maxsz + \
> + encode_putfh_maxsz + \
> + encode_read_plus_maxsz)
> +#define NFS4_dec_read_plus_sz (compound_decode_hdr_maxsz + \
> + decode_sequence_maxsz + \
> + decode_putfh_maxsz + \
> + decode_read_plus_maxsz)
> #define NFS4_enc_seek_sz (compound_encode_hdr_maxsz + \
> encode_sequence_maxsz + \
> encode_putfh_maxsz + \
> @@ -252,6 +269,16 @@ static void encode_deallocate(struct xdr_stream *xdr,
> encode_fallocate(xdr, args);
> }
>
> +static void encode_read_plus(struct xdr_stream *xdr,
> + const struct nfs_pgio_args *args,
> + struct compound_hdr *hdr)
> +{
> + encode_op_hdr(xdr, OP_READ_PLUS, decode_read_plus_maxsz, hdr);
> + encode_nfs4_stateid(xdr, &args->stateid);
> + encode_uint64(xdr, args->offset);
> + encode_uint32(xdr, args->count);
> +}
> +
> static void encode_seek(struct xdr_stream *xdr,
> const struct nfs42_seek_args *args,
> struct compound_hdr *hdr)
> @@ -446,6 +473,29 @@ static void nfs4_xdr_enc_deallocate(struct rpc_rqst *req,
> encode_nops(&hdr);
> }
>
> +/*
> + * Encode READ_PLUS request
> + */
> +static void nfs4_xdr_enc_read_plus(struct rpc_rqst *req,
> + struct xdr_stream *xdr,
> + const void *data)
> +{
> + const struct nfs_pgio_args *args = data;
> + struct compound_hdr hdr = {
> + .minorversion = nfs4_xdr_minorversion(&args->seq_args),
> + };
> +
> + encode_compound_hdr(xdr, req, &hdr);
> + encode_sequence(xdr, &args->seq_args, &hdr);
> + encode_putfh(xdr, args->fh, &hdr);
> + encode_read_plus(xdr, args, &hdr);
> +
> + rpc_prepare_reply_pages(req, args->pages, args->pgbase,
> + args->count, hdr.replen);
> + req->rq_rcv_buf.flags |= XDRBUF_READ;
IMO this line is incorrect.
RFC 8267 Section 6.1 does not list any part of the result of READ_PLUS
as DDP-eligible. There's no way for a client to know how to set up
Write chunks, unless it knows exactly where the file's holes are in
advance. Even then... racy.
Just curious, have you tried READ_PLUS with proto=rdma ?
> + encode_nops(&hdr);
> +}
> +
> /*
> * Encode SEEK request
> */
> @@ -694,6 +744,67 @@ static int decode_deallocate(struct xdr_stream *xdr, struct nfs42_falloc_res *re
> return decode_op_hdr(xdr, OP_DEALLOCATE);
> }
>
> +static uint32_t decode_read_plus_data(struct xdr_stream *xdr, struct nfs_pgio_res *res,
> + uint32_t *eof)
> +{
> + __be32 *p;
> + uint32_t count, recvd;
> + uint64_t offset;
> +
> + p = xdr_inline_decode(xdr, 8 + 4);
> + if (unlikely(!p))
> + return -EIO;
> +
> + p = xdr_decode_hyper(p, &offset);
> + count = be32_to_cpup(p);
> + if (count == 0)
> + return 0;
> +
> + recvd = xdr_read_pages(xdr, count);
> + if (count > recvd) {
> + dprintk("NFS: server cheating in read reply: "
> + "count %u > recvd %u\n", count, recvd);
> + count = recvd;
> + *eof = 0;
> + }
> +
> + return count;
> +}
> +
> +static int decode_read_plus(struct xdr_stream *xdr, struct nfs_pgio_res *res)
> +{
> + __be32 *p;
> + uint32_t count, eof, segments, type;
> + int status;
> +
> + status = decode_op_hdr(xdr, OP_READ_PLUS);
> + if (status)
> + return status;
> +
> + p = xdr_inline_decode(xdr, 4 + 4);
> + if (unlikely(!p))
> + return -EIO;
> +
> + eof = be32_to_cpup(p++);
> + segments = be32_to_cpup(p++);
> + if (segments == 0)
> + return 0;
> +
> + p = xdr_inline_decode(xdr, 4);
> + if (unlikely(!p))
> + return -EIO;
> +
> + type = be32_to_cpup(p++);
> + if (type == NFS4_CONTENT_DATA)
> + count = decode_read_plus_data(xdr, res, &eof);
> + else
> + return -EINVAL;
> +
> + res->eof = eof;
> + res->count = count;
> + return 0;
> +}
> +
> static int decode_seek(struct xdr_stream *xdr, struct nfs42_seek_res *res)
> {
> int status;
> @@ -870,6 +981,33 @@ static int nfs4_xdr_dec_deallocate(struct rpc_rqst *rqstp,
> return status;
> }
>
> +/*
> + * Decode READ_PLUS request
> + */
> +static int nfs4_xdr_dec_read_plus(struct rpc_rqst *rqstp,
> + struct xdr_stream *xdr,
> + void *data)
> +{
> + struct nfs_pgio_res *res = data;
> + struct compound_hdr hdr;
> + int status;
> +
> + status = decode_compound_hdr(xdr, &hdr);
> + if (status)
> + goto out;
> + status = decode_sequence(xdr, &res->seq_res, rqstp);
> + if (status)
> + goto out;
> + status = decode_putfh(xdr);
> + if (status)
> + goto out;
> + status = decode_read_plus(xdr, res);
> + if (!status)
> + status = res->count;
> +out:
> + return status;
> +}
> +
> /*
> * Decode SEEK request
> */
> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> index 95d07a3dc5d1..ed3ec8c36273 100644
> --- a/fs/nfs/nfs4proc.c
> +++ b/fs/nfs/nfs4proc.c
> @@ -69,6 +69,10 @@
>
> #include "nfs4trace.h"
>
> +#ifdef CONFIG_NFS_V4_2
> +#include "nfs42.h"
> +#endif /* CONFIG_NFS_V4_2 */
> +
> #define NFSDBG_FACILITY NFSDBG_PROC
>
> #define NFS4_BITMASK_SZ 3
> @@ -5199,28 +5203,60 @@ static bool nfs4_read_stateid_changed(struct rpc_task *task,
> return true;
> }
>
> +static bool nfs4_read_plus_not_supported(struct rpc_task *task,
> + struct nfs_pgio_header *hdr)
> +{
> + struct nfs_server *server = NFS_SERVER(hdr->inode);
> + struct rpc_message *msg = &task->tk_msg;
> +
> + if (msg->rpc_proc == &nfs4_procedures[NFSPROC4_CLNT_READ_PLUS] &&
> + server->caps & NFS_CAP_READ_PLUS && task->tk_status == -ENOTSUPP) {
> + server->caps &= ~NFS_CAP_READ_PLUS;
> + msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
> + rpc_restart_call_prepare(task);
> + return true;
> + }
> + return false;
> +}
> +
> static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
> {
> -
> dprintk("--> %s\n", __func__);
>
> if (!nfs4_sequence_done(task, &hdr->res.seq_res))
> return -EAGAIN;
> if (nfs4_read_stateid_changed(task, &hdr->args))
> return -EAGAIN;
> + if (nfs4_read_plus_not_supported(task, hdr))
> + return -EAGAIN;
> if (task->tk_status > 0)
> nfs_invalidate_atime(hdr->inode);
> return hdr->pgio_done_cb ? hdr->pgio_done_cb(task, hdr) :
> nfs4_read_done_cb(task, hdr);
> }
>
> +#ifdef CONFIG_NFS_V4_2
> +static void nfs42_read_plus_support(struct nfs_server *server, struct rpc_message *msg)
> +{
> + if (server->caps & NFS_CAP_READ_PLUS)
> + msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ_PLUS];
> + else
> + msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
> +}
> +#else
> +static void nfs42_read_plus_support(struct nfs_server *server, struct rpc_message *msg)
> +{
> + msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
> +}
> +#endif /* CONFIG_NFS_V4_2 */
> +
> static void nfs4_proc_read_setup(struct nfs_pgio_header *hdr,
> struct rpc_message *msg)
> {
> hdr->timestamp = jiffies;
> if (!hdr->pgio_done_cb)
> hdr->pgio_done_cb = nfs4_read_done_cb;
> - msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
> + nfs42_read_plus_support(NFS_SERVER(hdr->inode), msg);
> nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 0, 0);
> }
>
> @@ -9970,7 +10006,8 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = {
> | NFS_CAP_SEEK
> | NFS_CAP_LAYOUTSTATS
> | NFS_CAP_CLONE
> - | NFS_CAP_LAYOUTERROR,
> + | NFS_CAP_LAYOUTERROR
> + | NFS_CAP_READ_PLUS,
> .init_client = nfs41_init_client,
> .shutdown_client = nfs41_shutdown_client,
> .match_stateid = nfs41_match_stateid,
> diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
> index 47817ef0aadb..68b2917d0537 100644
> --- a/fs/nfs/nfs4xdr.c
> +++ b/fs/nfs/nfs4xdr.c
> @@ -7584,6 +7584,7 @@ const struct rpc_procinfo nfs4_procedures[] = {
> PROC42(COPY_NOTIFY, enc_copy_notify, dec_copy_notify),
> PROC(LOOKUPP, enc_lookupp, dec_lookupp),
> PROC42(LAYOUTERROR, enc_layouterror, dec_layouterror),
> + PROC42(READ_PLUS, enc_read_plus, dec_read_plus),
> };
>
> static unsigned int nfs_version4_counts[ARRAY_SIZE(nfs4_procedures)];
> diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
> index 82d8fb422092..c1eeef52545c 100644
> --- a/include/linux/nfs4.h
> +++ b/include/linux/nfs4.h
> @@ -540,8 +540,8 @@ enum {
>
> NFSPROC4_CLNT_LOOKUPP,
> NFSPROC4_CLNT_LAYOUTERROR,
> -
> NFSPROC4_CLNT_COPY_NOTIFY,
> + NFSPROC4_CLNT_READ_PLUS,
> };
>
> /* nfs41 types */
> diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
> index 465fa98258a3..11248c5a7b24 100644
> --- a/include/linux/nfs_fs_sb.h
> +++ b/include/linux/nfs_fs_sb.h
> @@ -281,5 +281,6 @@ struct nfs_server {
> #define NFS_CAP_OFFLOAD_CANCEL (1U << 25)
> #define NFS_CAP_LAYOUTERROR (1U << 26)
> #define NFS_CAP_COPY_NOTIFY (1U << 27)
> +#define NFS_CAP_READ_PLUS (1U << 28)
>
> #endif
> diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
> index 94c77ed55ce1..8efbf3d8b263 100644
> --- a/include/linux/nfs_xdr.h
> +++ b/include/linux/nfs_xdr.h
> @@ -655,7 +655,7 @@ struct nfs_pgio_args {
> struct nfs_pgio_res {
> struct nfs4_sequence_res seq_res;
> struct nfs_fattr * fattr;
> - __u32 count;
> + __u64 count;
> __u32 op_status;
> union {
> struct {
> --
> 2.25.0
>
--
Chuck Lever
next prev parent reply other threads:[~2020-02-14 22:28 UTC|newest]
Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-02-14 21:12 [PATCH v2 0/6] NFS: Add support for the v4.2 READ_PLUS operation schumaker.anna
2020-02-14 21:12 ` [PATCH v2 1/6] SUNRPC: Split out a function for setting current page schumaker.anna
2020-02-14 21:12 ` [PATCH v2 2/6] SUNRPC: Add the ability to expand holes in data pages schumaker.anna
2020-02-14 21:12 ` [PATCH v2 3/6] SUNRPC: Add the ability to shift data to a specific offset schumaker.anna
2020-02-14 21:12 ` [PATCH v2 4/6] NFS: Add READ_PLUS data segment support schumaker.anna
2020-02-14 22:28 ` Chuck Lever [this message]
2020-02-20 14:42 ` Anna Schumaker
2020-02-20 14:55 ` Chuck Lever
2020-02-20 18:28 ` Anna Schumaker
2020-02-20 18:30 ` Chuck Lever
2020-02-20 18:35 ` Anna Schumaker
2020-02-20 18:54 ` Chuck Lever
2020-02-14 21:12 ` [PATCH v2 5/6] NFS: Add READ_PLUS hole segment decoding schumaker.anna
2020-02-14 21:12 ` [PATCH v2 6/6] NFS: Decode multiple READ_PLUS segments schumaker.anna
2020-02-20 17:40 ` [PATCH v2 0/6] NFS: Add support for the v4.2 READ_PLUS operation Chuck Lever
2020-02-20 18:25 ` Anna Schumaker
2020-02-20 18:29 ` Chuck Lever
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=AAF85957-285A-42BF-993D-7EB4843E2ED2@oracle.com \
--to=chuck.lever@oracle.com \
--cc=Anna.Schumaker@Netapp.com \
--cc=Trond.Myklebust@hammerspace.com \
--cc=linux-nfs@vger.kernel.org \
--cc=schumaker.anna@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).