linux-nfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 0/3] NFS/RDMA server changes for v4.16
@ 2018-01-03 20:42 Chuck Lever
  2018-01-03 20:42 ` [PATCH v2 1/3] svcrdma: Post Receives in the Receive completion handler Chuck Lever
                   ` (3 more replies)
  0 siblings, 4 replies; 13+ messages in thread
From: Chuck Lever @ 2018-01-03 20:42 UTC (permalink / raw)
  To: bfields; +Cc: linux-rdma, linux-nfs

Hi Bruce-

Would you consider these patches for merging into v4.16?

Changes since v1:
- Rebased on v4.15-rc6
- Address a 0day robot complaint in nfsd3_proc_symlink
- Properly handle error return from svc_rdma_handle_bc_reply

---

Chuck Lever (3):
      svcrdma: Post Receives in the Receive completion handler
      NFSD: Clean up write argument XDR decoders
      NFSD: Clean up symlink argument XDR decoders


 fs/nfsd/nfs3proc.c                         |   18 ++++-
 fs/nfsd/nfs3xdr.c                          |   67 ++++-------------
 fs/nfsd/nfs4proc.c                         |   37 +++-------
 fs/nfsd/nfs4xdr.c                          |   11 ++-
 fs/nfsd/nfsproc.c                          |   23 ++++--
 fs/nfsd/nfsxdr.c                           |   63 ++++++++--------
 fs/nfsd/xdr.h                              |    3 +
 fs/nfsd/xdr3.h                             |    3 +
 fs/nfsd/xdr4.h                             |    3 +
 include/linux/sunrpc/svc.h                 |    4 +
 include/linux/sunrpc/svc_rdma.h            |    2 -
 net/sunrpc/svc.c                           |  109 ++++++++++++++++++++++++++++
 net/sunrpc/xprtrdma/svc_rdma_backchannel.c |    5 -
 net/sunrpc/xprtrdma/svc_rdma_recvfrom.c    |    9 --
 net/sunrpc/xprtrdma/svc_rdma_sendto.c      |    6 --
 net/sunrpc/xprtrdma/svc_rdma_transport.c   |   25 ++----
 16 files changed, 225 insertions(+), 163 deletions(-)

--
Chuck Lever

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH v2 1/3] svcrdma: Post Receives in the Receive completion handler
  2018-01-03 20:42 [PATCH v2 0/3] NFS/RDMA server changes for v4.16 Chuck Lever
@ 2018-01-03 20:42 ` Chuck Lever
  2018-01-03 20:42 ` [PATCH v2 2/3] NFSD: Clean up write argument XDR decoders Chuck Lever
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 13+ messages in thread
From: Chuck Lever @ 2018-01-03 20:42 UTC (permalink / raw)
  To: bfields; +Cc: linux-rdma, linux-nfs

This change improves Receive efficiency by posting Receives only
on the same CPU that handles Receive completion. Improved latency
and throughput has been noted with this change.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/svc_rdma.h            |    2 --
 net/sunrpc/xprtrdma/svc_rdma_backchannel.c |    5 -----
 net/sunrpc/xprtrdma/svc_rdma_recvfrom.c    |    9 +--------
 net/sunrpc/xprtrdma/svc_rdma_sendto.c      |    6 ------
 net/sunrpc/xprtrdma/svc_rdma_transport.c   |   25 +++++++------------------
 5 files changed, 8 insertions(+), 39 deletions(-)

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 995c6fe..4b731b0 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -185,8 +185,6 @@ extern int svc_rdma_post_send_wr(struct svcxprt_rdma *rdma,
 extern void svc_rdma_wc_read(struct ib_cq *, struct ib_wc *);
 extern void svc_rdma_wc_inv(struct ib_cq *, struct ib_wc *);
 extern int svc_rdma_send(struct svcxprt_rdma *, struct ib_send_wr *);
-extern int svc_rdma_post_recv(struct svcxprt_rdma *, gfp_t);
-extern int svc_rdma_repost_recv(struct svcxprt_rdma *, gfp_t);
 extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *);
 extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *);
 extern void svc_rdma_put_context(struct svc_rdma_op_ctxt *, int);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index af78935..a73632c 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -95,7 +95,6 @@ int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, __be32 *rdma_resp,
 out_notfound:
 	dprintk("svcrdma: unrecognized bc reply: xprt=%p, xid=%08x\n",
 		xprt, be32_to_cpu(xid));
-
 	goto out_unlock;
 }
 
@@ -129,10 +128,6 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
 	if (ret < 0)
 		goto out_err;
 
-	ret = svc_rdma_repost_recv(rdma, GFP_NOIO);
-	if (ret)
-		goto out_err;
-
 	/* Bump page refcnt so Send completion doesn't release
 	 * the rq_buffer before all retransmits are complete.
 	 */
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index ad4bd62..19e9c6b 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -400,10 +400,6 @@ static void svc_rdma_send_error(struct svcxprt_rdma *xprt,
 	struct page *page;
 	int ret;
 
-	ret = svc_rdma_repost_recv(xprt, GFP_KERNEL);
-	if (ret)
-		return;
-
 	page = alloc_page(GFP_KERNEL);
 	if (!page)
 		return;
@@ -554,8 +550,6 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
 		ret = svc_rdma_handle_bc_reply(xprt->xpt_bc_xprt, p,
 					       &rqstp->rq_arg);
 		svc_rdma_put_context(ctxt, 0);
-		if (ret)
-			goto repost;
 		return ret;
 	}
 
@@ -590,6 +584,5 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
 
 out_drop:
 	svc_rdma_put_context(ctxt, 1);
-repost:
-	return svc_rdma_repost_recv(rdma_xprt, GFP_KERNEL);
+	return 0;
 }
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 7c3a211..649441d 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -674,9 +674,6 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
 		svc_rdma_xdr_encode_reply_chunk(rdma_resp, rp_ch, ret);
 	}
 
-	ret = svc_rdma_post_recv(rdma, GFP_KERNEL);
-	if (ret)
-		goto err1;
 	ret = svc_rdma_send_reply_msg(rdma, rdma_argp, rdma_resp, rqstp,
 				      wr_lst, rp_ch);
 	if (ret < 0)
@@ -687,9 +684,6 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
 	if (ret != -E2BIG && ret != -EINVAL)
 		goto err1;
 
-	ret = svc_rdma_post_recv(rdma, GFP_KERNEL);
-	if (ret)
-		goto err1;
 	ret = svc_rdma_send_error_msg(rdma, rdma_resp, rqstp);
 	if (ret < 0)
 		goto err0;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 46ec069..9ad12a2 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -58,6 +58,7 @@
 
 #define RPCDBG_FACILITY	RPCDBG_SVCXPRT
 
+static int svc_rdma_post_recv(struct svcxprt_rdma *xprt);
 static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *, int);
 static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
 					struct net *net,
@@ -320,6 +321,8 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
 	list_add_tail(&ctxt->list, &xprt->sc_rq_dto_q);
 	spin_unlock(&xprt->sc_rq_dto_lock);
 
+	svc_rdma_post_recv(xprt);
+
 	set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
 	if (test_bit(RDMAXPRT_CONN_PENDING, &xprt->sc_flags))
 		goto out;
@@ -404,7 +407,8 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
 	return cma_xprt;
 }
 
-int svc_rdma_post_recv(struct svcxprt_rdma *xprt, gfp_t flags)
+static int
+svc_rdma_post_recv(struct svcxprt_rdma *xprt)
 {
 	struct ib_recv_wr recv_wr, *bad_recv_wr;
 	struct svc_rdma_op_ctxt *ctxt;
@@ -423,7 +427,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt, gfp_t flags)
 			pr_err("svcrdma: Too many sges (%d)\n", sge_no);
 			goto err_put_ctxt;
 		}
-		page = alloc_page(flags);
+		page = alloc_page(GFP_KERNEL);
 		if (!page)
 			goto err_put_ctxt;
 		ctxt->pages[sge_no] = page;
@@ -459,21 +463,6 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt, gfp_t flags)
 	return -ENOMEM;
 }
 
-int svc_rdma_repost_recv(struct svcxprt_rdma *xprt, gfp_t flags)
-{
-	int ret = 0;
-
-	ret = svc_rdma_post_recv(xprt, flags);
-	if (ret) {
-		pr_err("svcrdma: could not post a receive buffer, err=%d.\n",
-		       ret);
-		pr_err("svcrdma: closing transport %p.\n", xprt);
-		set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
-		ret = -ENOTCONN;
-	}
-	return ret;
-}
-
 static void
 svc_rdma_parse_connect_private(struct svcxprt_rdma *newxprt,
 			       struct rdma_conn_param *param)
@@ -833,7 +822,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
 
 	/* Post receive buffers */
 	for (i = 0; i < newxprt->sc_max_requests; i++) {
-		ret = svc_rdma_post_recv(newxprt, GFP_KERNEL);
+		ret = svc_rdma_post_recv(newxprt);
 		if (ret) {
 			dprintk("svcrdma: failure posting receive buffers\n");
 			goto errout;


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH v2 2/3] NFSD: Clean up write argument XDR decoders
  2018-01-03 20:42 [PATCH v2 0/3] NFS/RDMA server changes for v4.16 Chuck Lever
  2018-01-03 20:42 ` [PATCH v2 1/3] svcrdma: Post Receives in the Receive completion handler Chuck Lever
@ 2018-01-03 20:42 ` Chuck Lever
  2018-01-03 20:42 ` [PATCH v2 3/3] NFSD: Clean up symlink " Chuck Lever
  2018-01-16 21:31 ` [PATCH v2 0/3] NFS/RDMA server changes for v4.16 Chuck Lever
  3 siblings, 0 replies; 13+ messages in thread
From: Chuck Lever @ 2018-01-03 20:42 UTC (permalink / raw)
  To: bfields; +Cc: linux-rdma, linux-nfs

Move common code in NFSD's write arg decoders into a helper. The
immediate benefit is reduction of code duplication and some nice
micro-optimizations (see below).

However, in the long term, this helper could perform a per-transport
call-out to fill the rq_vec (say, using RDMA Reads).

The legacy WRITE decoders and procs are changed to work like NFSv4,
which constructs the rq_vec just before it is about to call
vfs_writev.

Why? Calling a transport call-out from the proc instead of the XDR
decoder means that the incoming FH can be resolved to a particular
filesystem and file. This would allow pages from the backing file to
be presented to the transport to be filled, rather than presenting
anonymous pages and copying or swapping them into the file's page
cache later.

We also prefer using the pages in rq_arg.pages, instead of pulling
the data pages directly out of the rqstp::rq_pages array. This
removes the only reference to rq_pages found in NFSD, eliminating an
NFSD assumption about how transports use the pages in rq_pages.

Lastly, avoid setting up the first element of rq_vec as a zero-
length buffer. This happens with an RDMA transport when a normal
Read chunk is present because the data payload is in rq_arg's
page list (none of it is in the head buffer).

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfs3proc.c         |    8 ++++++--
 fs/nfsd/nfs3xdr.c          |   16 ++++------------
 fs/nfsd/nfs4proc.c         |   30 ++++++------------------------
 fs/nfsd/nfs4xdr.c          |    1 -
 fs/nfsd/nfsproc.c          |    9 +++++++--
 fs/nfsd/nfsxdr.c           |   14 ++------------
 fs/nfsd/xdr.h              |    2 +-
 fs/nfsd/xdr3.h             |    2 +-
 fs/nfsd/xdr4.h             |    1 -
 include/linux/sunrpc/svc.h |    2 ++
 net/sunrpc/svc.c           |   42 ++++++++++++++++++++++++++++++++++++++++++
 11 files changed, 71 insertions(+), 56 deletions(-)

diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 1d0ce3c..2dd95eb 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -192,6 +192,7 @@
 	struct nfsd3_writeres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 	unsigned long cnt = argp->len;
+	unsigned int nvecs;
 
 	dprintk("nfsd: WRITE(3)    %s %d bytes at %Lu%s\n",
 				SVCFH_fmt(&argp->fh),
@@ -201,9 +202,12 @@
 
 	fh_copy(&resp->fh, &argp->fh);
 	resp->committed = argp->stable;
+	nvecs = svc_fill_write_vector(rqstp, &argp->first, cnt);
+	if (!nvecs)
+		RETURN_STATUS(nfserr_io);
 	nfserr = nfsd_write(rqstp, &resp->fh, argp->offset,
-				rqstp->rq_vec, argp->vlen,
-				&cnt, resp->committed);
+			    rqstp->rq_vec, nvecs, &cnt,
+			    resp->committed);
 	resp->count = cnt;
 	RETURN_STATUS(nfserr);
 }
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 2758480..240cdb0e 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -362,7 +362,7 @@ void fill_post_wcc(struct svc_fh *fhp)
 nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nfsd3_writeargs *args = rqstp->rq_argp;
-	unsigned int len, v, hdr, dlen;
+	unsigned int len, hdr, dlen;
 	u32 max_blocksize = svc_max_payload(rqstp);
 	struct kvec *head = rqstp->rq_arg.head;
 	struct kvec *tail = rqstp->rq_arg.tail;
@@ -404,17 +404,9 @@ void fill_post_wcc(struct svc_fh *fhp)
 		args->count = max_blocksize;
 		len = args->len = max_blocksize;
 	}
-	rqstp->rq_vec[0].iov_base = (void*)p;
-	rqstp->rq_vec[0].iov_len = head->iov_len - hdr;
-	v = 0;
-	while (len > rqstp->rq_vec[v].iov_len) {
-		len -= rqstp->rq_vec[v].iov_len;
-		v++;
-		rqstp->rq_vec[v].iov_base = page_address(rqstp->rq_pages[v]);
-		rqstp->rq_vec[v].iov_len = PAGE_SIZE;
-	}
-	rqstp->rq_vec[v].iov_len = len;
-	args->vlen = v + 1;
+
+	args->first.iov_base = (void *)p;
+	args->first.iov_len = head->iov_len - hdr;
 	return 1;
 }
 
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 008ea0b..5029b96 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -969,24 +969,6 @@ static __be32 nfsd4_do_lookupp(struct svc_rqst *rqstp, struct svc_fh *fh)
 	return status;
 }
 
-static int fill_in_write_vector(struct kvec *vec, struct nfsd4_write *write)
-{
-        int i = 1;
-        int buflen = write->wr_buflen;
-
-        vec[0].iov_base = write->wr_head.iov_base;
-        vec[0].iov_len = min_t(int, buflen, write->wr_head.iov_len);
-        buflen -= vec[0].iov_len;
-
-        while (buflen) {
-                vec[i].iov_base = page_address(write->wr_pagelist[i - 1]);
-                vec[i].iov_len = min_t(int, PAGE_SIZE, buflen);
-                buflen -= vec[i].iov_len;
-                i++;
-        }
-        return i;
-}
-
 static __be32
 nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	    union nfsd4_op_u *u)
@@ -995,8 +977,8 @@ static int fill_in_write_vector(struct kvec *vec, struct nfsd4_write *write)
 	stateid_t *stateid = &write->wr_stateid;
 	struct file *filp = NULL;
 	__be32 status = nfs_ok;
+	unsigned int nvecs;
 	unsigned long cnt;
-	int nvecs;
 
 	if (write->wr_offset >= OFFSET_MAX)
 		return nfserr_inval;
@@ -1012,12 +994,12 @@ static int fill_in_write_vector(struct kvec *vec, struct nfsd4_write *write)
 	write->wr_how_written = write->wr_stable_how;
 	gen_boot_verifier(&write->wr_verifier, SVC_NET(rqstp));
 
-	nvecs = fill_in_write_vector(rqstp->rq_vec, write);
-	WARN_ON_ONCE(nvecs > ARRAY_SIZE(rqstp->rq_vec));
-
+	nvecs = svc_fill_write_vector(rqstp, &write->wr_head, cnt);
+	if (!nvecs)
+		return nfserr_io;
 	status = nfsd_vfs_write(rqstp, &cstate->current_fh, filp,
-				write->wr_offset, rqstp->rq_vec, nvecs, &cnt,
-				write->wr_how_written);
+				write->wr_offset, rqstp->rq_vec, nvecs,
+				&cnt, write->wr_how_written);
 	fput(filp);
 
 	write->wr_bytes_written = cnt;
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 2c61c6b..bd25230 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1285,7 +1285,6 @@ static __be32 nfsd4_decode_opaque(struct nfsd4_compoundargs *argp, struct xdr_ne
 	}
 	write->wr_head.iov_base = p;
 	write->wr_head.iov_len = avail;
-	write->wr_pagelist = argp->pagelist;
 
 	len = XDR_QUADLEN(write->wr_buflen) << 2;
 	if (len >= avail) {
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 43c0419..1995ea6 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -212,13 +212,18 @@
 	struct nfsd_attrstat *resp = rqstp->rq_resp;
 	__be32	nfserr;
 	unsigned long cnt = argp->len;
+	unsigned int nvecs;
 
 	dprintk("nfsd: WRITE    %s %d bytes at %d\n",
 		SVCFH_fmt(&argp->fh),
 		argp->len, argp->offset);
 
-	nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), argp->offset,
-				rqstp->rq_vec, argp->vlen, &cnt, NFS_DATA_SYNC);
+	nvecs = svc_fill_write_vector(rqstp, &argp->first, cnt);
+	if (!nvecs)
+		return nfserr_io;
+	nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh),
+			    argp->offset, rqstp->rq_vec, nvecs,
+			    &cnt, NFS_DATA_SYNC);
 	return nfsd_return_attrs(nfserr, resp);
 }
 
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index 644a034..165e25e 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -286,7 +286,6 @@ __be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *f
 	struct nfsd_writeargs *args = rqstp->rq_argp;
 	unsigned int len, hdr, dlen;
 	struct kvec *head = rqstp->rq_arg.head;
-	int v;
 
 	p = decode_fh(p, &args->fh);
 	if (!p)
@@ -322,17 +321,8 @@ __be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *f
 	if (dlen < XDR_QUADLEN(len)*4)
 		return 0;
 
-	rqstp->rq_vec[0].iov_base = (void*)p;
-	rqstp->rq_vec[0].iov_len = head->iov_len - hdr;
-	v = 0;
-	while (len > rqstp->rq_vec[v].iov_len) {
-		len -= rqstp->rq_vec[v].iov_len;
-		v++;
-		rqstp->rq_vec[v].iov_base = page_address(rqstp->rq_pages[v]);
-		rqstp->rq_vec[v].iov_len = PAGE_SIZE;
-	}
-	rqstp->rq_vec[v].iov_len = len;
-	args->vlen = v + 1;
+	args->first.iov_base = (void *)p;
+	args->first.iov_len = head->iov_len - hdr;
 	return 1;
 }
 
diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h
index 2f4f22e..a765c41 100644
--- a/fs/nfsd/xdr.h
+++ b/fs/nfsd/xdr.h
@@ -34,7 +34,7 @@ struct nfsd_writeargs {
 	svc_fh			fh;
 	__u32			offset;
 	int			len;
-	int			vlen;
+	struct kvec		first;
 };
 
 struct nfsd_createargs {
diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h
index 056bf8a..deccf7f 100644
--- a/fs/nfsd/xdr3.h
+++ b/fs/nfsd/xdr3.h
@@ -41,7 +41,7 @@ struct nfsd3_writeargs {
 	__u32			count;
 	int			stable;
 	__u32			len;
-	int			vlen;
+	struct kvec		first;
 };
 
 struct nfsd3_createargs {
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index bc29511..d56219d 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -390,7 +390,6 @@ struct nfsd4_write {
 	u32		wr_stable_how;      /* request */
 	u32		wr_buflen;          /* request */
 	struct kvec	wr_head;
-	struct page **	wr_pagelist;        /* request */
 
 	u32		wr_bytes_written;   /* response */
 	u32		wr_how_written;     /* response */
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 786ae22..238b9ae 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -493,6 +493,8 @@ int		   svc_register(const struct svc_serv *, struct net *, const int,
 void		   svc_reserve(struct svc_rqst *rqstp, int space);
 struct svc_pool *  svc_pool_for_cpu(struct svc_serv *serv, int cpu);
 char *		   svc_print_addr(struct svc_rqst *, char *, size_t);
+unsigned int	   svc_fill_write_vector(struct svc_rqst *rqstp,
+					 struct kvec *first, size_t total);
 
 #define	RPC_MAX_ADDRBUFLEN	(63U)
 
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 387cc4a..759b668 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1536,3 +1536,45 @@ u32 svc_max_payload(const struct svc_rqst *rqstp)
 	return max;
 }
 EXPORT_SYMBOL_GPL(svc_max_payload);
+
+/**
+ * svc_fill_write_vector - Construct data argument for VFS write call
+ * @rqstp: svc_rqst to operate on
+ * @first: buffer containing first section of write payload
+ * @total: total number of bytes of write payload
+ *
+ * Returns the number of elements populated in the data argument array.
+ */
+unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, struct kvec *first,
+				   size_t total)
+{
+	struct kvec *vec = rqstp->rq_vec;
+	struct page **pages;
+	unsigned int i;
+
+	/* Some types of transport can present the write payload
+	 * entirely in rq_arg.pages. In this case, @first is empty.
+	 */
+	i = 0;
+	if (first->iov_len) {
+		vec[i].iov_base = first->iov_base;
+		vec[i].iov_len = min_t(size_t, total, first->iov_len);
+		total -= vec[i].iov_len;
+		++i;
+	}
+
+	WARN_ON_ONCE(rqstp->rq_arg.page_base != 0);
+	pages = rqstp->rq_arg.pages;
+	while (total) {
+		vec[i].iov_base = page_address(*pages);
+		vec[i].iov_len = min_t(size_t, total, PAGE_SIZE);
+		total -= vec[i].iov_len;
+		++i;
+
+		++pages;
+	}
+
+	WARN_ON_ONCE(i > ARRAY_SIZE(rqstp->rq_vec));
+	return i;
+}
+EXPORT_SYMBOL_GPL(svc_fill_write_vector);


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH v2 3/3] NFSD: Clean up symlink argument XDR decoders
  2018-01-03 20:42 [PATCH v2 0/3] NFS/RDMA server changes for v4.16 Chuck Lever
  2018-01-03 20:42 ` [PATCH v2 1/3] svcrdma: Post Receives in the Receive completion handler Chuck Lever
  2018-01-03 20:42 ` [PATCH v2 2/3] NFSD: Clean up write argument XDR decoders Chuck Lever
@ 2018-01-03 20:42 ` Chuck Lever
  2018-01-22 22:00   ` J. Bruce Fields
  2018-01-16 21:31 ` [PATCH v2 0/3] NFS/RDMA server changes for v4.16 Chuck Lever
  3 siblings, 1 reply; 13+ messages in thread
From: Chuck Lever @ 2018-01-03 20:42 UTC (permalink / raw)
  To: bfields; +Cc: linux-rdma, linux-nfs

Move common code in NFSD's symlink arg decoders into a helper. The
immediate benefits include:

 - one fewer data copies on transports that support DDP
 - no memory allocation in the NFSv4 XDR decoder
 - consistent error checking across all versions
 - reduction of code duplication
 - support for both legal forms of SYMLINK requests on RDMA
   transports for all versions of NFS (in particular, NFSv2, for
   completeness)

In the long term, this helper is an appropriate spot to perform a
per-transport call-out to fill the pathname argument using, say,
RDMA Reads.

Filling the pathname in the proc function also means that eventually
the incoming filehandle can be interpreted so that filesystem-
specific memory can be allocated as a sink for the pathname
argument, rather than using anonymous pages.

Wondering why the current code punts a zero-length SYMLINK. Is it
illegal to create a zero-length SYMLINK on Linux?

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfs3proc.c         |   10 +++++++
 fs/nfsd/nfs3xdr.c          |   51 ++++++++-------------------------
 fs/nfsd/nfs4proc.c         |    7 +++++
 fs/nfsd/nfs4xdr.c          |   10 +++++--
 fs/nfsd/nfsproc.c          |   14 +++++----
 fs/nfsd/nfsxdr.c           |   49 +++++++++++++++++++-------------
 fs/nfsd/xdr.h              |    1 +
 fs/nfsd/xdr3.h             |    1 +
 fs/nfsd/xdr4.h             |    2 +
 include/linux/sunrpc/svc.h |    2 +
 net/sunrpc/svc.c           |   67 ++++++++++++++++++++++++++++++++++++++++++++
 11 files changed, 146 insertions(+), 68 deletions(-)

diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 2dd95eb..6259a4b 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -283,6 +283,16 @@
 	struct nfsd3_diropres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
+	if (argp->tlen == 0)
+		RETURN_STATUS(nfserr_inval);
+	if (argp->tlen > NFS3_MAXPATHLEN)
+		RETURN_STATUS(nfserr_nametoolong);
+
+	argp->tname = svc_fill_symlink_pathname(rqstp, &argp->first,
+						argp->tlen);
+	if (IS_ERR(argp->tname))
+		RETURN_STATUS(nfserrno(PTR_ERR(argp->tname)));
+
 	dprintk("nfsd: SYMLINK(3)  %s %.*s -> %.*s\n",
 				SVCFH_fmt(&argp->ffh),
 				argp->flen, argp->fname,
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 240cdb0e..78b555b 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -452,51 +452,24 @@ void fill_post_wcc(struct svc_fh *fhp)
 nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nfsd3_symlinkargs *args = rqstp->rq_argp;
-	unsigned int len, avail;
-	char *old, *new;
-	struct kvec *vec;
+	char *base = (char *)p;
+	size_t dlen;
 
 	if (!(p = decode_fh(p, &args->ffh)) ||
-	    !(p = decode_filename(p, &args->fname, &args->flen))
-		)
+	    !(p = decode_filename(p, &args->fname, &args->flen)))
 		return 0;
 	p = decode_sattr3(p, &args->attrs);
 
-	/* now decode the pathname, which might be larger than the first page.
-	 * As we have to check for nul's anyway, we copy it into a new page
-	 * This page appears in the rq_res.pages list, but as pages_len is always
-	 * 0, it won't get in the way
-	 */
-	len = ntohl(*p++);
-	if (len == 0 || len > NFS3_MAXPATHLEN || len >= PAGE_SIZE)
-		return 0;
-	args->tname = new = page_address(*(rqstp->rq_next_page++));
-	args->tlen = len;
-	/* first copy and check from the first page */
-	old = (char*)p;
-	vec = &rqstp->rq_arg.head[0];
-	if ((void *)old > vec->iov_base + vec->iov_len)
-		return 0;
-	avail = vec->iov_len - (old - (char*)vec->iov_base);
-	while (len && avail && *old) {
-		*new++ = *old++;
-		len--;
-		avail--;
-	}
-	/* now copy next page if there is one */
-	if (len && !avail && rqstp->rq_arg.page_len) {
-		avail = min_t(unsigned int, rqstp->rq_arg.page_len, PAGE_SIZE);
-		old = page_address(rqstp->rq_arg.pages[0]);
-	}
-	while (len && avail && *old) {
-		*new++ = *old++;
-		len--;
-		avail--;
-	}
-	*new = '\0';
-	if (len)
-		return 0;
+	args->tlen = ntohl(*p++);
 
+	args->first.iov_base = p;
+	args->first.iov_len = rqstp->rq_arg.head[0].iov_len;
+	args->first.iov_len -= (char *)p - base;
+
+	dlen = args->first.iov_len + rqstp->rq_arg.page_len +
+	       rqstp->rq_arg.tail[0].iov_len;
+	if (dlen < XDR_QUADLEN(args->tlen) << 2)
+		return 0;
 	return 1;
 }
 
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 5029b96..36bd1f7 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -605,6 +605,13 @@ static void gen_boot_verifier(nfs4_verifier *verifier, struct net *net)
 
 	switch (create->cr_type) {
 	case NF4LNK:
+		if (create->cr_datalen > NFS4_MAXPATHLEN)
+			return nfserr_nametoolong;
+		create->cr_data =
+			svc_fill_symlink_pathname(rqstp, &create->cr_first,
+						  create->cr_datalen);
+		if (IS_ERR(create->cr_data))
+			return nfserrno(PTR_ERR(create->cr_data));
 		status = nfsd_symlink(rqstp, &cstate->current_fh,
 				      create->cr_name, create->cr_namelen,
 				      create->cr_data, &resfh);
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index bd25230..d05384e 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -648,6 +648,7 @@ static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp,
 static __be32
 nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create)
 {
+	struct kvec *head;
 	DECODE_HEAD;
 
 	READ_BUF(4);
@@ -656,10 +657,13 @@ static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp,
 	case NF4LNK:
 		READ_BUF(4);
 		create->cr_datalen = be32_to_cpup(p++);
+		if (create->cr_datalen == 0)
+			return nfserr_inval;
+		head = argp->rqstp->rq_arg.head;
+		create->cr_first.iov_base = p;
+		create->cr_first.iov_len = head->iov_len;
+		create->cr_first.iov_len -= (char *)p - (char *)head->iov_base;
 		READ_BUF(create->cr_datalen);
-		create->cr_data = svcxdr_dupstr(argp, p, create->cr_datalen);
-		if (!create->cr_data)
-			return nfserr_jukebox;
 		break;
 	case NF4BLK:
 	case NF4CHR:
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 1995ea6..f107f9f 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -449,17 +449,19 @@
 	struct svc_fh	newfh;
 	__be32		nfserr;
 
+	if (argp->tlen > NFS_MAXPATHLEN)
+		return nfserr_nametoolong;
+
+	argp->tname = svc_fill_symlink_pathname(rqstp, &argp->first,
+						argp->tlen);
+	if (IS_ERR(argp->tname))
+		return nfserrno(PTR_ERR(argp->tname));
+
 	dprintk("nfsd: SYMLINK  %s %.*s -> %.*s\n",
 		SVCFH_fmt(&argp->ffh), argp->flen, argp->fname,
 		argp->tlen, argp->tname);
 
 	fh_init(&newfh, NFS_FHSIZE);
-	/*
-	 * Crazy hack: the request fits in a page, and already-decoded
-	 * attributes follow argp->tname, so it's safe to just write a
-	 * null to ensure it's null-terminated:
-	 */
-	argp->tname[argp->tlen] = '\0';
 	nfserr = nfsd_symlink(rqstp, &argp->ffh, argp->fname, argp->flen,
 						 argp->tname, &newfh);
 
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index 165e25e..8fcd047 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -71,22 +71,6 @@ __be32 *nfs2svc_decode_fh(__be32 *p, struct svc_fh *fhp)
 }
 
 static __be32 *
-decode_pathname(__be32 *p, char **namp, unsigned int *lenp)
-{
-	char		*name;
-	unsigned int	i;
-
-	if ((p = xdr_decode_string_inplace(p, namp, lenp, NFS_MAXPATHLEN)) != NULL) {
-		for (i = 0, name = *namp; i < *lenp; i++, name++) {
-			if (*name == '\0')
-				return NULL;
-		}
-	}
-
-	return p;
-}
-
-static __be32 *
 decode_sattr(__be32 *p, struct iattr *iap)
 {
 	u32	tmp, tmp1;
@@ -383,14 +367,39 @@ __be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *f
 nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nfsd_symlinkargs *args = rqstp->rq_argp;
+	char *base = (char *)p;
+	size_t xdrlen;
 
 	if (   !(p = decode_fh(p, &args->ffh))
-	    || !(p = decode_filename(p, &args->fname, &args->flen))
-	    || !(p = decode_pathname(p, &args->tname, &args->tlen)))
+	    || !(p = decode_filename(p, &args->fname, &args->flen)))
 		return 0;
-	p = decode_sattr(p, &args->attrs);
 
-	return xdr_argsize_check(rqstp, p);
+	args->tlen = ntohl(*p++);
+	if (args->tlen == 0)
+		return 0;
+
+	args->first.iov_base = p;
+	args->first.iov_len = rqstp->rq_arg.head[0].iov_len;
+	args->first.iov_len -= (char *)p - base;
+
+	/* This request is never larger than a page. Therefore,
+	 * transport will deliver either:
+	 * 1. pathname in the pagelist -> sattr is in the tail.
+	 * 2. everything in the head buffer -> sattr is in the head.
+	 */
+	if (rqstp->rq_arg.page_len) {
+		if (args->tlen != rqstp->rq_arg.page_len)
+			return 0;
+		p = rqstp->rq_arg.tail[0].iov_base;
+	} else {
+		xdrlen = XDR_QUADLEN(args->tlen);
+		if (xdrlen > args->first.iov_len - (8 * sizeof(__be32)))
+			return 0;
+		p += xdrlen;
+	}
+	decode_sattr(p, &args->attrs);
+
+	return 1;
 }
 
 int
diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h
index a765c41..ea7cca3 100644
--- a/fs/nfsd/xdr.h
+++ b/fs/nfsd/xdr.h
@@ -72,6 +72,7 @@ struct nfsd_symlinkargs {
 	char *			tname;
 	unsigned int		tlen;
 	struct iattr		attrs;
+	struct kvec		first;
 };
 
 struct nfsd_readdirargs {
diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h
index deccf7f..2cb29e9 100644
--- a/fs/nfsd/xdr3.h
+++ b/fs/nfsd/xdr3.h
@@ -90,6 +90,7 @@ struct nfsd3_symlinkargs {
 	char *			tname;
 	unsigned int		tlen;
 	struct iattr		attrs;
+	struct kvec		first;
 };
 
 struct nfsd3_readdirargs {
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index d56219d..b485cd1 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -110,6 +110,7 @@ struct nfsd4_create {
 		struct {
 			u32 datalen;
 			char *data;
+			struct kvec first;
 		} link;   /* NF4LNK */
 		struct {
 			u32 specdata1;
@@ -124,6 +125,7 @@ struct nfsd4_create {
 };
 #define cr_datalen	u.link.datalen
 #define cr_data		u.link.data
+#define cr_first	u.link.first
 #define cr_specdata1	u.dev.specdata1
 #define cr_specdata2	u.dev.specdata2
 
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 238b9ae..fd5846e 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -495,6 +495,8 @@ int		   svc_register(const struct svc_serv *, struct net *, const int,
 char *		   svc_print_addr(struct svc_rqst *, char *, size_t);
 unsigned int	   svc_fill_write_vector(struct svc_rqst *rqstp,
 					 struct kvec *first, size_t total);
+char		  *svc_fill_symlink_pathname(struct svc_rqst *rqstp,
+					     struct kvec *first, size_t total);
 
 #define	RPC_MAX_ADDRBUFLEN	(63U)
 
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 759b668..fc93406 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1578,3 +1578,70 @@ unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, struct kvec *first,
 	return i;
 }
 EXPORT_SYMBOL_GPL(svc_fill_write_vector);
+
+/**
+ * svc_fill_symlink_pathname - Construct pathname argument for VFS symlink call
+ * @rqstp: svc_rqst to operate on
+ * @first: buffer containing first section of pathname
+ * @total: total length of the pathname argument
+ *
+ * Returns pointer to a NUL-terminated string, or an ERR_PTR. The buffer is
+ * released automatically when @rqstp is recycled.
+ */
+char *svc_fill_symlink_pathname(struct svc_rqst *rqstp, struct kvec *first,
+				size_t total)
+{
+	struct xdr_buf *arg = &rqstp->rq_arg;
+	struct page **pages;
+	char *result;
+
+	/* VFS API demands a NUL-terminated pathname. This function
+	 * uses a page from @rqstp as the pathname buffer, to enable
+	 * direct placement. Thus the total buffer size is PAGE_SIZE.
+	 * Space in this buffer for NUL-termination requires that we
+	 * cap the size of the returned symlink pathname just a
+	 * little early.
+	 */
+	if (total > PAGE_SIZE - 1)
+		return ERR_PTR(-ENAMETOOLONG);
+
+	/* Some types of transport can present the pathname entirely
+	 * in rq_arg.pages. If not, then copy the pathname into one
+	 * page.
+	 */
+	pages = arg->pages;
+	WARN_ON_ONCE(arg->page_base != 0);
+	if (first->iov_base == 0) {
+		result = page_address(*pages);
+		result[total] = '\0';
+	} else {
+		size_t len, remaining;
+		char *dst;
+
+		result = page_address(*(rqstp->rq_next_page++));
+		dst = result;
+		remaining = total;
+
+		len = min_t(size_t, total, first->iov_len);
+		memcpy(dst, first->iov_base, len);
+		dst += len;
+		remaining -= len;
+
+		/* No more than one page left */
+		if (remaining) {
+			len = min_t(size_t, remaining, PAGE_SIZE);
+			memcpy(dst, page_address(*pages), len);
+			dst += len;
+		}
+
+		*dst = '\0';
+	}
+
+	/* Sanity check: we don't allow the pathname argument to
+	 * contain a NUL byte.
+	 */
+	if (strlen(result) != total)
+		return ERR_PTR(-EINVAL);
+	return result;
+}
+EXPORT_SYMBOL_GPL(svc_fill_symlink_pathname);


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH v2 0/3] NFS/RDMA server changes for v4.16
  2018-01-03 20:42 [PATCH v2 0/3] NFS/RDMA server changes for v4.16 Chuck Lever
                   ` (2 preceding siblings ...)
  2018-01-03 20:42 ` [PATCH v2 3/3] NFSD: Clean up symlink " Chuck Lever
@ 2018-01-16 21:31 ` Chuck Lever
  2018-01-16 21:33   ` Bruce Fields
  3 siblings, 1 reply; 13+ messages in thread
From: Chuck Lever @ 2018-01-16 21:31 UTC (permalink / raw)
  To: Bruce Fields; +Cc: linux-rdma, Linux NFS Mailing List



> On Jan 3, 2018, at 3:42 PM, Chuck Lever <chuck.lever@oracle.com> =
wrote:
>=20
> Hi Bruce-
>=20
> Would you consider these patches for merging into v4.16?

Hi Bruce, since you haven't responded, I assume these will
not appear in v4.16. Let's drop these for now, and I will
resubmit them for v4.17.


> Changes since v1:
> - Rebased on v4.15-rc6
> - Address a 0day robot complaint in nfsd3_proc_symlink
> - Properly handle error return from svc_rdma_handle_bc_reply
>=20
> ---
>=20
> Chuck Lever (3):
>      svcrdma: Post Receives in the Receive completion handler
>      NFSD: Clean up write argument XDR decoders
>      NFSD: Clean up symlink argument XDR decoders
>=20
>=20
> fs/nfsd/nfs3proc.c                         |   18 ++++-
> fs/nfsd/nfs3xdr.c                          |   67 ++++-------------
> fs/nfsd/nfs4proc.c                         |   37 +++-------
> fs/nfsd/nfs4xdr.c                          |   11 ++-
> fs/nfsd/nfsproc.c                          |   23 ++++--
> fs/nfsd/nfsxdr.c                           |   63 ++++++++--------
> fs/nfsd/xdr.h                              |    3 +
> fs/nfsd/xdr3.h                             |    3 +
> fs/nfsd/xdr4.h                             |    3 +
> include/linux/sunrpc/svc.h                 |    4 +
> include/linux/sunrpc/svc_rdma.h            |    2 -
> net/sunrpc/svc.c                           |  109 =
++++++++++++++++++++++++++++
> net/sunrpc/xprtrdma/svc_rdma_backchannel.c |    5 -
> net/sunrpc/xprtrdma/svc_rdma_recvfrom.c    |    9 --
> net/sunrpc/xprtrdma/svc_rdma_sendto.c      |    6 --
> net/sunrpc/xprtrdma/svc_rdma_transport.c   |   25 ++----
> 16 files changed, 225 insertions(+), 163 deletions(-)
>=20
> --
> Chuck Lever
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" =
in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
Chuck Lever




^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2 0/3] NFS/RDMA server changes for v4.16
  2018-01-16 21:31 ` [PATCH v2 0/3] NFS/RDMA server changes for v4.16 Chuck Lever
@ 2018-01-16 21:33   ` Bruce Fields
  2018-01-17 14:44     ` Chuck Lever
  0 siblings, 1 reply; 13+ messages in thread
From: Bruce Fields @ 2018-01-16 21:33 UTC (permalink / raw)
  To: Chuck Lever; +Cc: linux-rdma, Linux NFS Mailing List

On Tue, Jan 16, 2018 at 04:31:07PM -0500, Chuck Lever wrote:
> 
> 
> > On Jan 3, 2018, at 3:42 PM, Chuck Lever <chuck.lever@oracle.com> wrote:
> > 
> > Hi Bruce-
> > 
> > Would you consider these patches for merging into v4.16?
> 
> Hi Bruce, since you haven't responded, I assume these will
> not appear in v4.16. Let's drop these for now, and I will
> resubmit them for v4.17.

Sorry for the lack of response, I started reviewing but didn't finish,
and still expect to get them in for 4.16.

--b.

> 
> 
> > Changes since v1:
> > - Rebased on v4.15-rc6
> > - Address a 0day robot complaint in nfsd3_proc_symlink
> > - Properly handle error return from svc_rdma_handle_bc_reply
> > 
> > ---
> > 
> > Chuck Lever (3):
> >      svcrdma: Post Receives in the Receive completion handler
> >      NFSD: Clean up write argument XDR decoders
> >      NFSD: Clean up symlink argument XDR decoders
> > 
> > 
> > fs/nfsd/nfs3proc.c                         |   18 ++++-
> > fs/nfsd/nfs3xdr.c                          |   67 ++++-------------
> > fs/nfsd/nfs4proc.c                         |   37 +++-------
> > fs/nfsd/nfs4xdr.c                          |   11 ++-
> > fs/nfsd/nfsproc.c                          |   23 ++++--
> > fs/nfsd/nfsxdr.c                           |   63 ++++++++--------
> > fs/nfsd/xdr.h                              |    3 +
> > fs/nfsd/xdr3.h                             |    3 +
> > fs/nfsd/xdr4.h                             |    3 +
> > include/linux/sunrpc/svc.h                 |    4 +
> > include/linux/sunrpc/svc_rdma.h            |    2 -
> > net/sunrpc/svc.c                           |  109 ++++++++++++++++++++++++++++
> > net/sunrpc/xprtrdma/svc_rdma_backchannel.c |    5 -
> > net/sunrpc/xprtrdma/svc_rdma_recvfrom.c    |    9 --
> > net/sunrpc/xprtrdma/svc_rdma_sendto.c      |    6 --
> > net/sunrpc/xprtrdma/svc_rdma_transport.c   |   25 ++----
> > 16 files changed, 225 insertions(+), 163 deletions(-)
> > 
> > --
> > Chuck Lever
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
> --
> Chuck Lever
> 
> 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2 0/3] NFS/RDMA server changes for v4.16
  2018-01-16 21:33   ` Bruce Fields
@ 2018-01-17 14:44     ` Chuck Lever
  0 siblings, 0 replies; 13+ messages in thread
From: Chuck Lever @ 2018-01-17 14:44 UTC (permalink / raw)
  To: Bruce Fields; +Cc: linux-rdma, Linux NFS Mailing List



> On Jan 16, 2018, at 4:33 PM, Bruce Fields <bfields@fieldses.org> =
wrote:
>=20
> On Tue, Jan 16, 2018 at 04:31:07PM -0500, Chuck Lever wrote:
>>=20
>>=20
>>> On Jan 3, 2018, at 3:42 PM, Chuck Lever <chuck.lever@oracle.com> =
wrote:
>>>=20
>>> Hi Bruce-
>>>=20
>>> Would you consider these patches for merging into v4.16?
>>=20
>> Hi Bruce, since you haven't responded, I assume these will
>> not appear in v4.16. Let's drop these for now, and I will
>> resubmit them for v4.17.
>=20
> Sorry for the lack of response, I started reviewing but didn't finish,
> and still expect to get them in for 4.16.

No rush. I have more work in this area, so it would be OK
to postpone them.


> --b.
>=20
>>=20
>>=20
>>> Changes since v1:
>>> - Rebased on v4.15-rc6
>>> - Address a 0day robot complaint in nfsd3_proc_symlink
>>> - Properly handle error return from svc_rdma_handle_bc_reply
>>>=20
>>> ---
>>>=20
>>> Chuck Lever (3):
>>>     svcrdma: Post Receives in the Receive completion handler
>>>     NFSD: Clean up write argument XDR decoders
>>>     NFSD: Clean up symlink argument XDR decoders
>>>=20
>>>=20
>>> fs/nfsd/nfs3proc.c                         |   18 ++++-
>>> fs/nfsd/nfs3xdr.c                          |   67 ++++-------------
>>> fs/nfsd/nfs4proc.c                         |   37 +++-------
>>> fs/nfsd/nfs4xdr.c                          |   11 ++-
>>> fs/nfsd/nfsproc.c                          |   23 ++++--
>>> fs/nfsd/nfsxdr.c                           |   63 ++++++++--------
>>> fs/nfsd/xdr.h                              |    3 +
>>> fs/nfsd/xdr3.h                             |    3 +
>>> fs/nfsd/xdr4.h                             |    3 +
>>> include/linux/sunrpc/svc.h                 |    4 +
>>> include/linux/sunrpc/svc_rdma.h            |    2 -
>>> net/sunrpc/svc.c                           |  109 =
++++++++++++++++++++++++++++
>>> net/sunrpc/xprtrdma/svc_rdma_backchannel.c |    5 -
>>> net/sunrpc/xprtrdma/svc_rdma_recvfrom.c    |    9 --
>>> net/sunrpc/xprtrdma/svc_rdma_sendto.c      |    6 --
>>> net/sunrpc/xprtrdma/svc_rdma_transport.c   |   25 ++----
>>> 16 files changed, 225 insertions(+), 163 deletions(-)
>>>=20
>>> --
>>> Chuck Lever
>>> --
>>> To unsubscribe from this list: send the line "unsubscribe linux-nfs" =
in
>>> the body of a message to majordomo@vger.kernel.org
>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>=20
>> --
>> Chuck Lever
>>=20
>>=20
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" =
in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
Chuck Lever




^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2 3/3] NFSD: Clean up symlink argument XDR decoders
  2018-01-03 20:42 ` [PATCH v2 3/3] NFSD: Clean up symlink " Chuck Lever
@ 2018-01-22 22:00   ` J. Bruce Fields
  2018-01-23  1:09     ` Chuck Lever
  0 siblings, 1 reply; 13+ messages in thread
From: J. Bruce Fields @ 2018-01-22 22:00 UTC (permalink / raw)
  To: Chuck Lever; +Cc: linux-rdma, linux-nfs

On Wed, Jan 03, 2018 at 03:42:35PM -0500, Chuck Lever wrote:
> Move common code in NFSD's symlink arg decoders into a helper. The
> immediate benefits include:
> 
>  - one fewer data copies on transports that support DDP
>  - no memory allocation in the NFSv4 XDR decoder
>  - consistent error checking across all versions
>  - reduction of code duplication
>  - support for both legal forms of SYMLINK requests on RDMA
>    transports for all versions of NFS (in particular, NFSv2, for
>    completeness)
> 
> In the long term, this helper is an appropriate spot to perform a
> per-transport call-out to fill the pathname argument using, say,
> RDMA Reads.
> 
> Filling the pathname in the proc function also means that eventually
> the incoming filehandle can be interpreted so that filesystem-
> specific memory can be allocated as a sink for the pathname
> argument, rather than using anonymous pages.
> 
> Wondering why the current code punts a zero-length SYMLINK. Is it
> illegal to create a zero-length SYMLINK on Linux?

SYMLINK(2) says

	ENOENT A directory component in linkpath does not exist or is a
	dangling symbolic link, or target or linkpath is an empty
	string.

That doesn't explain the INVAL, or why this is the right place to be
checking it.

I'm a little nervous about the NULL termination in
svc_fill_symlink_pathname; how do we know it's safe to write a zero
there?  I haven't checked it carefully yet.

--g.

> 
> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
> ---
>  fs/nfsd/nfs3proc.c         |   10 +++++++
>  fs/nfsd/nfs3xdr.c          |   51 ++++++++-------------------------
>  fs/nfsd/nfs4proc.c         |    7 +++++
>  fs/nfsd/nfs4xdr.c          |   10 +++++--
>  fs/nfsd/nfsproc.c          |   14 +++++----
>  fs/nfsd/nfsxdr.c           |   49 +++++++++++++++++++-------------
>  fs/nfsd/xdr.h              |    1 +
>  fs/nfsd/xdr3.h             |    1 +
>  fs/nfsd/xdr4.h             |    2 +
>  include/linux/sunrpc/svc.h |    2 +
>  net/sunrpc/svc.c           |   67 ++++++++++++++++++++++++++++++++++++++++++++
>  11 files changed, 146 insertions(+), 68 deletions(-)
> 
> diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
> index 2dd95eb..6259a4b 100644
> --- a/fs/nfsd/nfs3proc.c
> +++ b/fs/nfsd/nfs3proc.c
> @@ -283,6 +283,16 @@
>  	struct nfsd3_diropres *resp = rqstp->rq_resp;
>  	__be32	nfserr;
>  
> +	if (argp->tlen == 0)
> +		RETURN_STATUS(nfserr_inval);
> +	if (argp->tlen > NFS3_MAXPATHLEN)
> +		RETURN_STATUS(nfserr_nametoolong);
> +
> +	argp->tname = svc_fill_symlink_pathname(rqstp, &argp->first,
> +						argp->tlen);
> +	if (IS_ERR(argp->tname))
> +		RETURN_STATUS(nfserrno(PTR_ERR(argp->tname)));
> +
>  	dprintk("nfsd: SYMLINK(3)  %s %.*s -> %.*s\n",
>  				SVCFH_fmt(&argp->ffh),
>  				argp->flen, argp->fname,
> diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
> index 240cdb0e..78b555b 100644
> --- a/fs/nfsd/nfs3xdr.c
> +++ b/fs/nfsd/nfs3xdr.c
> @@ -452,51 +452,24 @@ void fill_post_wcc(struct svc_fh *fhp)
>  nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p)
>  {
>  	struct nfsd3_symlinkargs *args = rqstp->rq_argp;
> -	unsigned int len, avail;
> -	char *old, *new;
> -	struct kvec *vec;
> +	char *base = (char *)p;
> +	size_t dlen;
>  
>  	if (!(p = decode_fh(p, &args->ffh)) ||
> -	    !(p = decode_filename(p, &args->fname, &args->flen))
> -		)
> +	    !(p = decode_filename(p, &args->fname, &args->flen)))
>  		return 0;
>  	p = decode_sattr3(p, &args->attrs);
>  
> -	/* now decode the pathname, which might be larger than the first page.
> -	 * As we have to check for nul's anyway, we copy it into a new page
> -	 * This page appears in the rq_res.pages list, but as pages_len is always
> -	 * 0, it won't get in the way
> -	 */
> -	len = ntohl(*p++);
> -	if (len == 0 || len > NFS3_MAXPATHLEN || len >= PAGE_SIZE)
> -		return 0;
> -	args->tname = new = page_address(*(rqstp->rq_next_page++));
> -	args->tlen = len;
> -	/* first copy and check from the first page */
> -	old = (char*)p;
> -	vec = &rqstp->rq_arg.head[0];
> -	if ((void *)old > vec->iov_base + vec->iov_len)
> -		return 0;
> -	avail = vec->iov_len - (old - (char*)vec->iov_base);
> -	while (len && avail && *old) {
> -		*new++ = *old++;
> -		len--;
> -		avail--;
> -	}
> -	/* now copy next page if there is one */
> -	if (len && !avail && rqstp->rq_arg.page_len) {
> -		avail = min_t(unsigned int, rqstp->rq_arg.page_len, PAGE_SIZE);
> -		old = page_address(rqstp->rq_arg.pages[0]);
> -	}
> -	while (len && avail && *old) {
> -		*new++ = *old++;
> -		len--;
> -		avail--;
> -	}
> -	*new = '\0';
> -	if (len)
> -		return 0;
> +	args->tlen = ntohl(*p++);
>  
> +	args->first.iov_base = p;
> +	args->first.iov_len = rqstp->rq_arg.head[0].iov_len;
> +	args->first.iov_len -= (char *)p - base;
> +
> +	dlen = args->first.iov_len + rqstp->rq_arg.page_len +
> +	       rqstp->rq_arg.tail[0].iov_len;
> +	if (dlen < XDR_QUADLEN(args->tlen) << 2)
> +		return 0;
>  	return 1;
>  }
>  
> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> index 5029b96..36bd1f7 100644
> --- a/fs/nfsd/nfs4proc.c
> +++ b/fs/nfsd/nfs4proc.c
> @@ -605,6 +605,13 @@ static void gen_boot_verifier(nfs4_verifier *verifier, struct net *net)
>  
>  	switch (create->cr_type) {
>  	case NF4LNK:
> +		if (create->cr_datalen > NFS4_MAXPATHLEN)
> +			return nfserr_nametoolong;
> +		create->cr_data =
> +			svc_fill_symlink_pathname(rqstp, &create->cr_first,
> +						  create->cr_datalen);
> +		if (IS_ERR(create->cr_data))
> +			return nfserrno(PTR_ERR(create->cr_data));
>  		status = nfsd_symlink(rqstp, &cstate->current_fh,
>  				      create->cr_name, create->cr_namelen,
>  				      create->cr_data, &resfh);
> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> index bd25230..d05384e 100644
> --- a/fs/nfsd/nfs4xdr.c
> +++ b/fs/nfsd/nfs4xdr.c
> @@ -648,6 +648,7 @@ static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp,
>  static __be32
>  nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create)
>  {
> +	struct kvec *head;
>  	DECODE_HEAD;
>  
>  	READ_BUF(4);
> @@ -656,10 +657,13 @@ static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp,
>  	case NF4LNK:
>  		READ_BUF(4);
>  		create->cr_datalen = be32_to_cpup(p++);
> +		if (create->cr_datalen == 0)
> +			return nfserr_inval;
> +		head = argp->rqstp->rq_arg.head;
> +		create->cr_first.iov_base = p;
> +		create->cr_first.iov_len = head->iov_len;
> +		create->cr_first.iov_len -= (char *)p - (char *)head->iov_base;
>  		READ_BUF(create->cr_datalen);
> -		create->cr_data = svcxdr_dupstr(argp, p, create->cr_datalen);
> -		if (!create->cr_data)
> -			return nfserr_jukebox;
>  		break;
>  	case NF4BLK:
>  	case NF4CHR:
> diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
> index 1995ea6..f107f9f 100644
> --- a/fs/nfsd/nfsproc.c
> +++ b/fs/nfsd/nfsproc.c
> @@ -449,17 +449,19 @@
>  	struct svc_fh	newfh;
>  	__be32		nfserr;
>  
> +	if (argp->tlen > NFS_MAXPATHLEN)
> +		return nfserr_nametoolong;
> +
> +	argp->tname = svc_fill_symlink_pathname(rqstp, &argp->first,
> +						argp->tlen);
> +	if (IS_ERR(argp->tname))
> +		return nfserrno(PTR_ERR(argp->tname));
> +
>  	dprintk("nfsd: SYMLINK  %s %.*s -> %.*s\n",
>  		SVCFH_fmt(&argp->ffh), argp->flen, argp->fname,
>  		argp->tlen, argp->tname);
>  
>  	fh_init(&newfh, NFS_FHSIZE);
> -	/*
> -	 * Crazy hack: the request fits in a page, and already-decoded
> -	 * attributes follow argp->tname, so it's safe to just write a
> -	 * null to ensure it's null-terminated:
> -	 */
> -	argp->tname[argp->tlen] = '\0';
>  	nfserr = nfsd_symlink(rqstp, &argp->ffh, argp->fname, argp->flen,
>  						 argp->tname, &newfh);
>  
> diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
> index 165e25e..8fcd047 100644
> --- a/fs/nfsd/nfsxdr.c
> +++ b/fs/nfsd/nfsxdr.c
> @@ -71,22 +71,6 @@ __be32 *nfs2svc_decode_fh(__be32 *p, struct svc_fh *fhp)
>  }
>  
>  static __be32 *
> -decode_pathname(__be32 *p, char **namp, unsigned int *lenp)
> -{
> -	char		*name;
> -	unsigned int	i;
> -
> -	if ((p = xdr_decode_string_inplace(p, namp, lenp, NFS_MAXPATHLEN)) != NULL) {
> -		for (i = 0, name = *namp; i < *lenp; i++, name++) {
> -			if (*name == '\0')
> -				return NULL;
> -		}
> -	}
> -
> -	return p;
> -}
> -
> -static __be32 *
>  decode_sattr(__be32 *p, struct iattr *iap)
>  {
>  	u32	tmp, tmp1;
> @@ -383,14 +367,39 @@ __be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *f
>  nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p)
>  {
>  	struct nfsd_symlinkargs *args = rqstp->rq_argp;
> +	char *base = (char *)p;
> +	size_t xdrlen;
>  
>  	if (   !(p = decode_fh(p, &args->ffh))
> -	    || !(p = decode_filename(p, &args->fname, &args->flen))
> -	    || !(p = decode_pathname(p, &args->tname, &args->tlen)))
> +	    || !(p = decode_filename(p, &args->fname, &args->flen)))
>  		return 0;
> -	p = decode_sattr(p, &args->attrs);
>  
> -	return xdr_argsize_check(rqstp, p);
> +	args->tlen = ntohl(*p++);
> +	if (args->tlen == 0)
> +		return 0;
> +
> +	args->first.iov_base = p;
> +	args->first.iov_len = rqstp->rq_arg.head[0].iov_len;
> +	args->first.iov_len -= (char *)p - base;
> +
> +	/* This request is never larger than a page. Therefore,
> +	 * transport will deliver either:
> +	 * 1. pathname in the pagelist -> sattr is in the tail.
> +	 * 2. everything in the head buffer -> sattr is in the head.
> +	 */
> +	if (rqstp->rq_arg.page_len) {
> +		if (args->tlen != rqstp->rq_arg.page_len)
> +			return 0;
> +		p = rqstp->rq_arg.tail[0].iov_base;
> +	} else {
> +		xdrlen = XDR_QUADLEN(args->tlen);
> +		if (xdrlen > args->first.iov_len - (8 * sizeof(__be32)))
> +			return 0;
> +		p += xdrlen;
> +	}
> +	decode_sattr(p, &args->attrs);
> +
> +	return 1;
>  }
>  
>  int
> diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h
> index a765c41..ea7cca3 100644
> --- a/fs/nfsd/xdr.h
> +++ b/fs/nfsd/xdr.h
> @@ -72,6 +72,7 @@ struct nfsd_symlinkargs {
>  	char *			tname;
>  	unsigned int		tlen;
>  	struct iattr		attrs;
> +	struct kvec		first;
>  };
>  
>  struct nfsd_readdirargs {
> diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h
> index deccf7f..2cb29e9 100644
> --- a/fs/nfsd/xdr3.h
> +++ b/fs/nfsd/xdr3.h
> @@ -90,6 +90,7 @@ struct nfsd3_symlinkargs {
>  	char *			tname;
>  	unsigned int		tlen;
>  	struct iattr		attrs;
> +	struct kvec		first;
>  };
>  
>  struct nfsd3_readdirargs {
> diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
> index d56219d..b485cd1 100644
> --- a/fs/nfsd/xdr4.h
> +++ b/fs/nfsd/xdr4.h
> @@ -110,6 +110,7 @@ struct nfsd4_create {
>  		struct {
>  			u32 datalen;
>  			char *data;
> +			struct kvec first;
>  		} link;   /* NF4LNK */
>  		struct {
>  			u32 specdata1;
> @@ -124,6 +125,7 @@ struct nfsd4_create {
>  };
>  #define cr_datalen	u.link.datalen
>  #define cr_data		u.link.data
> +#define cr_first	u.link.first
>  #define cr_specdata1	u.dev.specdata1
>  #define cr_specdata2	u.dev.specdata2
>  
> diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
> index 238b9ae..fd5846e 100644
> --- a/include/linux/sunrpc/svc.h
> +++ b/include/linux/sunrpc/svc.h
> @@ -495,6 +495,8 @@ int		   svc_register(const struct svc_serv *, struct net *, const int,
>  char *		   svc_print_addr(struct svc_rqst *, char *, size_t);
>  unsigned int	   svc_fill_write_vector(struct svc_rqst *rqstp,
>  					 struct kvec *first, size_t total);
> +char		  *svc_fill_symlink_pathname(struct svc_rqst *rqstp,
> +					     struct kvec *first, size_t total);
>  
>  #define	RPC_MAX_ADDRBUFLEN	(63U)
>  
> diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
> index 759b668..fc93406 100644
> --- a/net/sunrpc/svc.c
> +++ b/net/sunrpc/svc.c
> @@ -1578,3 +1578,70 @@ unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, struct kvec *first,
>  	return i;
>  }
>  EXPORT_SYMBOL_GPL(svc_fill_write_vector);
> +
> +/**
> + * svc_fill_symlink_pathname - Construct pathname argument for VFS symlink call
> + * @rqstp: svc_rqst to operate on
> + * @first: buffer containing first section of pathname
> + * @total: total length of the pathname argument
> + *
> + * Returns pointer to a NUL-terminated string, or an ERR_PTR. The buffer is
> + * released automatically when @rqstp is recycled.
> + */
> +char *svc_fill_symlink_pathname(struct svc_rqst *rqstp, struct kvec *first,
> +				size_t total)
> +{
> +	struct xdr_buf *arg = &rqstp->rq_arg;
> +	struct page **pages;
> +	char *result;
> +
> +	/* VFS API demands a NUL-terminated pathname. This function
> +	 * uses a page from @rqstp as the pathname buffer, to enable
> +	 * direct placement. Thus the total buffer size is PAGE_SIZE.
> +	 * Space in this buffer for NUL-termination requires that we
> +	 * cap the size of the returned symlink pathname just a
> +	 * little early.
> +	 */
> +	if (total > PAGE_SIZE - 1)
> +		return ERR_PTR(-ENAMETOOLONG);
> +
> +	/* Some types of transport can present the pathname entirely
> +	 * in rq_arg.pages. If not, then copy the pathname into one
> +	 * page.
> +	 */
> +	pages = arg->pages;
> +	WARN_ON_ONCE(arg->page_base != 0);
> +	if (first->iov_base == 0) {
> +		result = page_address(*pages);
> +		result[total] = '\0';
> +	} else {
> +		size_t len, remaining;
> +		char *dst;
> +
> +		result = page_address(*(rqstp->rq_next_page++));
> +		dst = result;
> +		remaining = total;
> +
> +		len = min_t(size_t, total, first->iov_len);
> +		memcpy(dst, first->iov_base, len);
> +		dst += len;
> +		remaining -= len;
> +
> +		/* No more than one page left */
> +		if (remaining) {
> +			len = min_t(size_t, remaining, PAGE_SIZE);
> +			memcpy(dst, page_address(*pages), len);
> +			dst += len;
> +		}
> +
> +		*dst = '\0';
> +	}
> +
> +	/* Sanity check: we don't allow the pathname argument to
> +	 * contain a NUL byte.
> +	 */
> +	if (strlen(result) != total)
> +		return ERR_PTR(-EINVAL);
> +	return result;
> +}
> +EXPORT_SYMBOL_GPL(svc_fill_symlink_pathname);

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2 3/3] NFSD: Clean up symlink argument XDR decoders
  2018-01-22 22:00   ` J. Bruce Fields
@ 2018-01-23  1:09     ` Chuck Lever
  2018-01-23 20:52       ` Bruce Fields
  0 siblings, 1 reply; 13+ messages in thread
From: Chuck Lever @ 2018-01-23  1:09 UTC (permalink / raw)
  To: Bruce Fields; +Cc: linux-rdma, Linux NFS Mailing List



> On Jan 22, 2018, at 2:00 PM, J. Bruce Fields <bfields@fieldses.org> =
wrote:
>=20
> On Wed, Jan 03, 2018 at 03:42:35PM -0500, Chuck Lever wrote:
>> Move common code in NFSD's symlink arg decoders into a helper. The
>> immediate benefits include:
>>=20
>> - one fewer data copies on transports that support DDP
>> - no memory allocation in the NFSv4 XDR decoder
>> - consistent error checking across all versions
>> - reduction of code duplication
>> - support for both legal forms of SYMLINK requests on RDMA
>>   transports for all versions of NFS (in particular, NFSv2, for
>>   completeness)
>>=20
>> In the long term, this helper is an appropriate spot to perform a
>> per-transport call-out to fill the pathname argument using, say,
>> RDMA Reads.
>>=20
>> Filling the pathname in the proc function also means that eventually
>> the incoming filehandle can be interpreted so that filesystem-
>> specific memory can be allocated as a sink for the pathname
>> argument, rather than using anonymous pages.
>>=20
>> Wondering why the current code punts a zero-length SYMLINK. Is it
>> illegal to create a zero-length SYMLINK on Linux?
>=20
> SYMLINK(2) says
>=20
> 	ENOENT A directory component in linkpath does not exist or is a
> 	dangling symbolic link, or target or linkpath is an empty
> 	string.
>=20
> That doesn't explain the INVAL, or why this is the right place to be
> checking it.

RFC 1813:

NFS3ERR_IO
      NFS3ERR_ACCES
      NFS3ERR_EXIST
      NFS3ERR_NOTDIR
      NFS3ERR_NOSPC
      NFS3ERR_ROFS
      NFS3ERR_NAMETOOLONG
      NFS3ERR_DQUOT
      NFS3ERR_STALE
      NFS3ERR_BADHANDLE
      NFS3ERR_NOTSUPP
      NFS3ERR_SERVERFAULT

Interestingly, neither INVAL nor NOENT are valid
status codes for NFSv3 SYMLINK. NFS3ERR_NOTSUPP
might be closest, I suppose.

RFC 5661 says explicitly:
=20
If the objname has a length of zero, or if objname does not obey the
UTF-8 definition, the error NFS4ERR_INVAL will be returned.

And lists these as valid status codes for CREATE(NF4LNK):
=20
| NFS4ERR_ACCESS, NFS4ERR_ATTRNOTSUPP,       |
| NFS4ERR_BADCHAR, NFS4ERR_BADNAME,          |
| NFS4ERR_BADOWNER, NFS4ERR_BADTYPE,         |
| NFS4ERR_BADXDR, NFS4ERR_DEADSESSION,       |
| NFS4ERR_DELAY, NFS4ERR_DQUOT,              |
| NFS4ERR_EXIST, NFS4ERR_FHEXPIRED,          |
| NFS4ERR_INVAL, NFS4ERR_IO, NFS4ERR_MLINK,  |
| NFS4ERR_MOVED, NFS4ERR_NAMETOOLONG,        |
| NFS4ERR_NOFILEHANDLE, NFS4ERR_NOSPC,       |
| NFS4ERR_NOTDIR, NFS4ERR_OP_NOT_IN_SESSION, |
| NFS4ERR_PERM, NFS4ERR_REP_TOO_BIG,         |
| NFS4ERR_REP_TOO_BIG_TO_CACHE,              |
| NFS4ERR_REQ_TOO_BIG,                       |
| NFS4ERR_RETRY_UNCACHED_REP, NFS4ERR_ROFS,  |
| NFS4ERR_SERVERFAULT, NFS4ERR_STALE,        |
| NFS4ERR_TOO_MANY_OPS,                      |
| NFS4ERR_UNSAFE_COMPOUND                    |


> I'm a little nervous about the NULL termination in
> svc_fill_symlink_pathname; how do we know it's safe to write a zero
> there?  I haven't checked it carefully yet.

svc_fill_symlink_pathname grabs a whole fresh page
from @rqstp. It is safe to write bytes anywhere in
that page.


> --g.
>=20
>>=20
>> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
>> ---
>> fs/nfsd/nfs3proc.c         |   10 +++++++
>> fs/nfsd/nfs3xdr.c          |   51 ++++++++-------------------------
>> fs/nfsd/nfs4proc.c         |    7 +++++
>> fs/nfsd/nfs4xdr.c          |   10 +++++--
>> fs/nfsd/nfsproc.c          |   14 +++++----
>> fs/nfsd/nfsxdr.c           |   49 +++++++++++++++++++-------------
>> fs/nfsd/xdr.h              |    1 +
>> fs/nfsd/xdr3.h             |    1 +
>> fs/nfsd/xdr4.h             |    2 +
>> include/linux/sunrpc/svc.h |    2 +
>> net/sunrpc/svc.c           |   67 =
++++++++++++++++++++++++++++++++++++++++++++
>> 11 files changed, 146 insertions(+), 68 deletions(-)
>>=20
>> diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
>> index 2dd95eb..6259a4b 100644
>> --- a/fs/nfsd/nfs3proc.c
>> +++ b/fs/nfsd/nfs3proc.c
>> @@ -283,6 +283,16 @@
>> 	struct nfsd3_diropres *resp =3D rqstp->rq_resp;
>> 	__be32	nfserr;
>>=20
>> +	if (argp->tlen =3D=3D 0)
>> +		RETURN_STATUS(nfserr_inval);
>> +	if (argp->tlen > NFS3_MAXPATHLEN)
>> +		RETURN_STATUS(nfserr_nametoolong);
>> +
>> +	argp->tname =3D svc_fill_symlink_pathname(rqstp, &argp->first,
>> +						argp->tlen);
>> +	if (IS_ERR(argp->tname))
>> +		RETURN_STATUS(nfserrno(PTR_ERR(argp->tname)));
>> +
>> 	dprintk("nfsd: SYMLINK(3)  %s %.*s -> %.*s\n",
>> 				SVCFH_fmt(&argp->ffh),
>> 				argp->flen, argp->fname,
>> diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
>> index 240cdb0e..78b555b 100644
>> --- a/fs/nfsd/nfs3xdr.c
>> +++ b/fs/nfsd/nfs3xdr.c
>> @@ -452,51 +452,24 @@ void fill_post_wcc(struct svc_fh *fhp)
>> nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p)
>> {
>> 	struct nfsd3_symlinkargs *args =3D rqstp->rq_argp;
>> -	unsigned int len, avail;
>> -	char *old, *new;
>> -	struct kvec *vec;
>> +	char *base =3D (char *)p;
>> +	size_t dlen;
>>=20
>> 	if (!(p =3D decode_fh(p, &args->ffh)) ||
>> -	    !(p =3D decode_filename(p, &args->fname, &args->flen))
>> -		)
>> +	    !(p =3D decode_filename(p, &args->fname, &args->flen)))
>> 		return 0;
>> 	p =3D decode_sattr3(p, &args->attrs);
>>=20
>> -	/* now decode the pathname, which might be larger than the first =
page.
>> -	 * As we have to check for nul's anyway, we copy it into a new =
page
>> -	 * This page appears in the rq_res.pages list, but as pages_len =
is always
>> -	 * 0, it won't get in the way
>> -	 */
>> -	len =3D ntohl(*p++);
>> -	if (len =3D=3D 0 || len > NFS3_MAXPATHLEN || len >=3D PAGE_SIZE)
>> -		return 0;
>> -	args->tname =3D new =3D page_address(*(rqstp->rq_next_page++));
>> -	args->tlen =3D len;
>> -	/* first copy and check from the first page */
>> -	old =3D (char*)p;
>> -	vec =3D &rqstp->rq_arg.head[0];
>> -	if ((void *)old > vec->iov_base + vec->iov_len)
>> -		return 0;
>> -	avail =3D vec->iov_len - (old - (char*)vec->iov_base);
>> -	while (len && avail && *old) {
>> -		*new++ =3D *old++;
>> -		len--;
>> -		avail--;
>> -	}
>> -	/* now copy next page if there is one */
>> -	if (len && !avail && rqstp->rq_arg.page_len) {
>> -		avail =3D min_t(unsigned int, rqstp->rq_arg.page_len, =
PAGE_SIZE);
>> -		old =3D page_address(rqstp->rq_arg.pages[0]);
>> -	}
>> -	while (len && avail && *old) {
>> -		*new++ =3D *old++;
>> -		len--;
>> -		avail--;
>> -	}
>> -	*new =3D '\0';
>> -	if (len)
>> -		return 0;
>> +	args->tlen =3D ntohl(*p++);
>>=20
>> +	args->first.iov_base =3D p;
>> +	args->first.iov_len =3D rqstp->rq_arg.head[0].iov_len;
>> +	args->first.iov_len -=3D (char *)p - base;
>> +
>> +	dlen =3D args->first.iov_len + rqstp->rq_arg.page_len +
>> +	       rqstp->rq_arg.tail[0].iov_len;
>> +	if (dlen < XDR_QUADLEN(args->tlen) << 2)
>> +		return 0;
>> 	return 1;
>> }
>>=20
>> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
>> index 5029b96..36bd1f7 100644
>> --- a/fs/nfsd/nfs4proc.c
>> +++ b/fs/nfsd/nfs4proc.c
>> @@ -605,6 +605,13 @@ static void gen_boot_verifier(nfs4_verifier =
*verifier, struct net *net)
>>=20
>> 	switch (create->cr_type) {
>> 	case NF4LNK:
>> +		if (create->cr_datalen > NFS4_MAXPATHLEN)
>> +			return nfserr_nametoolong;
>> +		create->cr_data =3D
>> +			svc_fill_symlink_pathname(rqstp, =
&create->cr_first,
>> +						  create->cr_datalen);
>> +		if (IS_ERR(create->cr_data))
>> +			return nfserrno(PTR_ERR(create->cr_data));
>> 		status =3D nfsd_symlink(rqstp, &cstate->current_fh,
>> 				      create->cr_name, =
create->cr_namelen,
>> 				      create->cr_data, &resfh);
>> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
>> index bd25230..d05384e 100644
>> --- a/fs/nfsd/nfs4xdr.c
>> +++ b/fs/nfsd/nfs4xdr.c
>> @@ -648,6 +648,7 @@ static __be32 =
nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp,
>> static __be32
>> nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct =
nfsd4_create *create)
>> {
>> +	struct kvec *head;
>> 	DECODE_HEAD;
>>=20
>> 	READ_BUF(4);
>> @@ -656,10 +657,13 @@ static __be32 =
nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp,
>> 	case NF4LNK:
>> 		READ_BUF(4);
>> 		create->cr_datalen =3D be32_to_cpup(p++);
>> +		if (create->cr_datalen =3D=3D 0)
>> +			return nfserr_inval;
>> +		head =3D argp->rqstp->rq_arg.head;
>> +		create->cr_first.iov_base =3D p;
>> +		create->cr_first.iov_len =3D head->iov_len;
>> +		create->cr_first.iov_len -=3D (char *)p - (char =
*)head->iov_base;
>> 		READ_BUF(create->cr_datalen);
>> -		create->cr_data =3D svcxdr_dupstr(argp, p, =
create->cr_datalen);
>> -		if (!create->cr_data)
>> -			return nfserr_jukebox;
>> 		break;
>> 	case NF4BLK:
>> 	case NF4CHR:
>> diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
>> index 1995ea6..f107f9f 100644
>> --- a/fs/nfsd/nfsproc.c
>> +++ b/fs/nfsd/nfsproc.c
>> @@ -449,17 +449,19 @@
>> 	struct svc_fh	newfh;
>> 	__be32		nfserr;
>>=20
>> +	if (argp->tlen > NFS_MAXPATHLEN)
>> +		return nfserr_nametoolong;
>> +
>> +	argp->tname =3D svc_fill_symlink_pathname(rqstp, &argp->first,
>> +						argp->tlen);
>> +	if (IS_ERR(argp->tname))
>> +		return nfserrno(PTR_ERR(argp->tname));
>> +
>> 	dprintk("nfsd: SYMLINK  %s %.*s -> %.*s\n",
>> 		SVCFH_fmt(&argp->ffh), argp->flen, argp->fname,
>> 		argp->tlen, argp->tname);
>>=20
>> 	fh_init(&newfh, NFS_FHSIZE);
>> -	/*
>> -	 * Crazy hack: the request fits in a page, and already-decoded
>> -	 * attributes follow argp->tname, so it's safe to just write a
>> -	 * null to ensure it's null-terminated:
>> -	 */
>> -	argp->tname[argp->tlen] =3D '\0';
>> 	nfserr =3D nfsd_symlink(rqstp, &argp->ffh, argp->fname, =
argp->flen,
>> 						 argp->tname, &newfh);
>>=20
>> diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
>> index 165e25e..8fcd047 100644
>> --- a/fs/nfsd/nfsxdr.c
>> +++ b/fs/nfsd/nfsxdr.c
>> @@ -71,22 +71,6 @@ __be32 *nfs2svc_decode_fh(__be32 *p, struct svc_fh =
*fhp)
>> }
>>=20
>> static __be32 *
>> -decode_pathname(__be32 *p, char **namp, unsigned int *lenp)
>> -{
>> -	char		*name;
>> -	unsigned int	i;
>> -
>> -	if ((p =3D xdr_decode_string_inplace(p, namp, lenp, =
NFS_MAXPATHLEN)) !=3D NULL) {
>> -		for (i =3D 0, name =3D *namp; i < *lenp; i++, name++) {
>> -			if (*name =3D=3D '\0')
>> -				return NULL;
>> -		}
>> -	}
>> -
>> -	return p;
>> -}
>> -
>> -static __be32 *
>> decode_sattr(__be32 *p, struct iattr *iap)
>> {
>> 	u32	tmp, tmp1;
>> @@ -383,14 +367,39 @@ __be32 *nfs2svc_encode_fattr(struct svc_rqst =
*rqstp, __be32 *p, struct svc_fh *f
>> nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p)
>> {
>> 	struct nfsd_symlinkargs *args =3D rqstp->rq_argp;
>> +	char *base =3D (char *)p;
>> +	size_t xdrlen;
>>=20
>> 	if (   !(p =3D decode_fh(p, &args->ffh))
>> -	    || !(p =3D decode_filename(p, &args->fname, &args->flen))
>> -	    || !(p =3D decode_pathname(p, &args->tname, &args->tlen)))
>> +	    || !(p =3D decode_filename(p, &args->fname, &args->flen)))
>> 		return 0;
>> -	p =3D decode_sattr(p, &args->attrs);
>>=20
>> -	return xdr_argsize_check(rqstp, p);
>> +	args->tlen =3D ntohl(*p++);
>> +	if (args->tlen =3D=3D 0)
>> +		return 0;
>> +
>> +	args->first.iov_base =3D p;
>> +	args->first.iov_len =3D rqstp->rq_arg.head[0].iov_len;
>> +	args->first.iov_len -=3D (char *)p - base;
>> +
>> +	/* This request is never larger than a page. Therefore,
>> +	 * transport will deliver either:
>> +	 * 1. pathname in the pagelist -> sattr is in the tail.
>> +	 * 2. everything in the head buffer -> sattr is in the head.
>> +	 */
>> +	if (rqstp->rq_arg.page_len) {
>> +		if (args->tlen !=3D rqstp->rq_arg.page_len)
>> +			return 0;
>> +		p =3D rqstp->rq_arg.tail[0].iov_base;
>> +	} else {
>> +		xdrlen =3D XDR_QUADLEN(args->tlen);
>> +		if (xdrlen > args->first.iov_len - (8 * sizeof(__be32)))
>> +			return 0;
>> +		p +=3D xdrlen;
>> +	}
>> +	decode_sattr(p, &args->attrs);
>> +
>> +	return 1;
>> }
>>=20
>> int
>> diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h
>> index a765c41..ea7cca3 100644
>> --- a/fs/nfsd/xdr.h
>> +++ b/fs/nfsd/xdr.h
>> @@ -72,6 +72,7 @@ struct nfsd_symlinkargs {
>> 	char *			tname;
>> 	unsigned int		tlen;
>> 	struct iattr		attrs;
>> +	struct kvec		first;
>> };
>>=20
>> struct nfsd_readdirargs {
>> diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h
>> index deccf7f..2cb29e9 100644
>> --- a/fs/nfsd/xdr3.h
>> +++ b/fs/nfsd/xdr3.h
>> @@ -90,6 +90,7 @@ struct nfsd3_symlinkargs {
>> 	char *			tname;
>> 	unsigned int		tlen;
>> 	struct iattr		attrs;
>> +	struct kvec		first;
>> };
>>=20
>> struct nfsd3_readdirargs {
>> diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
>> index d56219d..b485cd1 100644
>> --- a/fs/nfsd/xdr4.h
>> +++ b/fs/nfsd/xdr4.h
>> @@ -110,6 +110,7 @@ struct nfsd4_create {
>> 		struct {
>> 			u32 datalen;
>> 			char *data;
>> +			struct kvec first;
>> 		} link;   /* NF4LNK */
>> 		struct {
>> 			u32 specdata1;
>> @@ -124,6 +125,7 @@ struct nfsd4_create {
>> };
>> #define cr_datalen	u.link.datalen
>> #define cr_data		u.link.data
>> +#define cr_first	u.link.first
>> #define cr_specdata1	u.dev.specdata1
>> #define cr_specdata2	u.dev.specdata2
>>=20
>> diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
>> index 238b9ae..fd5846e 100644
>> --- a/include/linux/sunrpc/svc.h
>> +++ b/include/linux/sunrpc/svc.h
>> @@ -495,6 +495,8 @@ int		   svc_register(const struct =
svc_serv *, struct net *, const int,
>> char *		   svc_print_addr(struct svc_rqst *, char *, =
size_t);
>> unsigned int	   svc_fill_write_vector(struct svc_rqst *rqstp,
>> 					 struct kvec *first, size_t =
total);
>> +char		  *svc_fill_symlink_pathname(struct svc_rqst =
*rqstp,
>> +					     struct kvec *first, size_t =
total);
>>=20
>> #define	RPC_MAX_ADDRBUFLEN	(63U)
>>=20
>> diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
>> index 759b668..fc93406 100644
>> --- a/net/sunrpc/svc.c
>> +++ b/net/sunrpc/svc.c
>> @@ -1578,3 +1578,70 @@ unsigned int svc_fill_write_vector(struct =
svc_rqst *rqstp, struct kvec *first,
>> 	return i;
>> }
>> EXPORT_SYMBOL_GPL(svc_fill_write_vector);
>> +
>> +/**
>> + * svc_fill_symlink_pathname - Construct pathname argument for VFS =
symlink call
>> + * @rqstp: svc_rqst to operate on
>> + * @first: buffer containing first section of pathname
>> + * @total: total length of the pathname argument
>> + *
>> + * Returns pointer to a NUL-terminated string, or an ERR_PTR. The =
buffer is
>> + * released automatically when @rqstp is recycled.
>> + */
>> +char *svc_fill_symlink_pathname(struct svc_rqst *rqstp, struct kvec =
*first,
>> +				size_t total)
>> +{
>> +	struct xdr_buf *arg =3D &rqstp->rq_arg;
>> +	struct page **pages;
>> +	char *result;
>> +
>> +	/* VFS API demands a NUL-terminated pathname. This function
>> +	 * uses a page from @rqstp as the pathname buffer, to enable
>> +	 * direct placement. Thus the total buffer size is PAGE_SIZE.
>> +	 * Space in this buffer for NUL-termination requires that we
>> +	 * cap the size of the returned symlink pathname just a
>> +	 * little early.
>> +	 */
>> +	if (total > PAGE_SIZE - 1)
>> +		return ERR_PTR(-ENAMETOOLONG);
>> +
>> +	/* Some types of transport can present the pathname entirely
>> +	 * in rq_arg.pages. If not, then copy the pathname into one
>> +	 * page.
>> +	 */
>> +	pages =3D arg->pages;
>> +	WARN_ON_ONCE(arg->page_base !=3D 0);
>> +	if (first->iov_base =3D=3D 0) {
>> +		result =3D page_address(*pages);
>> +		result[total] =3D '\0';
>> +	} else {
>> +		size_t len, remaining;
>> +		char *dst;
>> +
>> +		result =3D page_address(*(rqstp->rq_next_page++));
>> +		dst =3D result;
>> +		remaining =3D total;
>> +
>> +		len =3D min_t(size_t, total, first->iov_len);
>> +		memcpy(dst, first->iov_base, len);
>> +		dst +=3D len;
>> +		remaining -=3D len;
>> +
>> +		/* No more than one page left */
>> +		if (remaining) {
>> +			len =3D min_t(size_t, remaining, PAGE_SIZE);
>> +			memcpy(dst, page_address(*pages), len);
>> +			dst +=3D len;
>> +		}
>> +
>> +		*dst =3D '\0';
>> +	}
>> +
>> +	/* Sanity check: we don't allow the pathname argument to
>> +	 * contain a NUL byte.
>> +	 */
>> +	if (strlen(result) !=3D total)
>> +		return ERR_PTR(-EINVAL);
>> +	return result;
>> +}
>> +EXPORT_SYMBOL_GPL(svc_fill_symlink_pathname);

--
Chuck Lever




^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2 3/3] NFSD: Clean up symlink argument XDR decoders
  2018-01-23  1:09     ` Chuck Lever
@ 2018-01-23 20:52       ` Bruce Fields
  2018-01-23 22:30         ` Chuck Lever
  0 siblings, 1 reply; 13+ messages in thread
From: Bruce Fields @ 2018-01-23 20:52 UTC (permalink / raw)
  To: Chuck Lever; +Cc: linux-rdma, Linux NFS Mailing List

On Mon, Jan 22, 2018 at 05:09:35PM -0800, Chuck Lever wrote:
> 
> 
> > On Jan 22, 2018, at 2:00 PM, J. Bruce Fields <bfields@fieldses.org> wrote:
> > 
> > On Wed, Jan 03, 2018 at 03:42:35PM -0500, Chuck Lever wrote:
> >> Move common code in NFSD's symlink arg decoders into a helper. The
> >> immediate benefits include:
> >> 
> >> - one fewer data copies on transports that support DDP
> >> - no memory allocation in the NFSv4 XDR decoder
> >> - consistent error checking across all versions
> >> - reduction of code duplication
> >> - support for both legal forms of SYMLINK requests on RDMA
> >>   transports for all versions of NFS (in particular, NFSv2, for
> >>   completeness)
> >> 
> >> In the long term, this helper is an appropriate spot to perform a
> >> per-transport call-out to fill the pathname argument using, say,
> >> RDMA Reads.
> >> 
> >> Filling the pathname in the proc function also means that eventually
> >> the incoming filehandle can be interpreted so that filesystem-
> >> specific memory can be allocated as a sink for the pathname
> >> argument, rather than using anonymous pages.
> >> 
> >> Wondering why the current code punts a zero-length SYMLINK. Is it
> >> illegal to create a zero-length SYMLINK on Linux?
> > 
> > SYMLINK(2) says
> > 
> > 	ENOENT A directory component in linkpath does not exist or is a
> > 	dangling symbolic link, or target or linkpath is an empty
> > 	string.
> > 
> > That doesn't explain the INVAL, or why this is the right place to be
> > checking it.
> 
> RFC 1813:
> 
> NFS3ERR_IO
>       NFS3ERR_ACCES
>       NFS3ERR_EXIST
>       NFS3ERR_NOTDIR
>       NFS3ERR_NOSPC
>       NFS3ERR_ROFS
>       NFS3ERR_NAMETOOLONG
>       NFS3ERR_DQUOT
>       NFS3ERR_STALE
>       NFS3ERR_BADHANDLE
>       NFS3ERR_NOTSUPP
>       NFS3ERR_SERVERFAULT
> 
> Interestingly, neither INVAL nor NOENT are valid
> status codes for NFSv3 SYMLINK. NFS3ERR_NOTSUPP
> might be closest, I suppose.
> 
> RFC 5661 says explicitly:
>  
> If the objname has a length of zero, or if objname does not obey the
> UTF-8 definition, the error NFS4ERR_INVAL will be returned.
> 
> And lists these as valid status codes for CREATE(NF4LNK):
>  
> | NFS4ERR_ACCESS, NFS4ERR_ATTRNOTSUPP,       |
> | NFS4ERR_BADCHAR, NFS4ERR_BADNAME,          |
> | NFS4ERR_BADOWNER, NFS4ERR_BADTYPE,         |
> | NFS4ERR_BADXDR, NFS4ERR_DEADSESSION,       |
> | NFS4ERR_DELAY, NFS4ERR_DQUOT,              |
> | NFS4ERR_EXIST, NFS4ERR_FHEXPIRED,          |
> | NFS4ERR_INVAL, NFS4ERR_IO, NFS4ERR_MLINK,  |
> | NFS4ERR_MOVED, NFS4ERR_NAMETOOLONG,        |
> | NFS4ERR_NOFILEHANDLE, NFS4ERR_NOSPC,       |
> | NFS4ERR_NOTDIR, NFS4ERR_OP_NOT_IN_SESSION, |
> | NFS4ERR_PERM, NFS4ERR_REP_TOO_BIG,         |
> | NFS4ERR_REP_TOO_BIG_TO_CACHE,              |
> | NFS4ERR_REQ_TOO_BIG,                       |
> | NFS4ERR_RETRY_UNCACHED_REP, NFS4ERR_ROFS,  |
> | NFS4ERR_SERVERFAULT, NFS4ERR_STALE,        |
> | NFS4ERR_TOO_MANY_OPS,                      |
> | NFS4ERR_UNSAFE_COMPOUND                    |
> 
> 
> > I'm a little nervous about the NULL termination in
> > svc_fill_symlink_pathname; how do we know it's safe to write a zero
> > there?  I haven't checked it carefully yet.
> 
> svc_fill_symlink_pathname grabs a whole fresh page
> from @rqstp. It is safe to write bytes anywhere in
> that page.

How do we know it's safe to grab another page?  (Could we run out of
pages?  I'd probably know the answer if I'd rewritten this code
recently, but I haven't and the details are swapped out....)

Also I don't think that's true in the first->iov_base == 0 case.

--b.

> 
> 
> > --g.
> > 
> >> 
> >> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
> >> ---
> >> fs/nfsd/nfs3proc.c         |   10 +++++++
> >> fs/nfsd/nfs3xdr.c          |   51 ++++++++-------------------------
> >> fs/nfsd/nfs4proc.c         |    7 +++++
> >> fs/nfsd/nfs4xdr.c          |   10 +++++--
> >> fs/nfsd/nfsproc.c          |   14 +++++----
> >> fs/nfsd/nfsxdr.c           |   49 +++++++++++++++++++-------------
> >> fs/nfsd/xdr.h              |    1 +
> >> fs/nfsd/xdr3.h             |    1 +
> >> fs/nfsd/xdr4.h             |    2 +
> >> include/linux/sunrpc/svc.h |    2 +
> >> net/sunrpc/svc.c           |   67 ++++++++++++++++++++++++++++++++++++++++++++
> >> 11 files changed, 146 insertions(+), 68 deletions(-)
> >> 
> >> diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
> >> index 2dd95eb..6259a4b 100644
> >> --- a/fs/nfsd/nfs3proc.c
> >> +++ b/fs/nfsd/nfs3proc.c
> >> @@ -283,6 +283,16 @@
> >> 	struct nfsd3_diropres *resp = rqstp->rq_resp;
> >> 	__be32	nfserr;
> >> 
> >> +	if (argp->tlen == 0)
> >> +		RETURN_STATUS(nfserr_inval);
> >> +	if (argp->tlen > NFS3_MAXPATHLEN)
> >> +		RETURN_STATUS(nfserr_nametoolong);
> >> +
> >> +	argp->tname = svc_fill_symlink_pathname(rqstp, &argp->first,
> >> +						argp->tlen);
> >> +	if (IS_ERR(argp->tname))
> >> +		RETURN_STATUS(nfserrno(PTR_ERR(argp->tname)));
> >> +
> >> 	dprintk("nfsd: SYMLINK(3)  %s %.*s -> %.*s\n",
> >> 				SVCFH_fmt(&argp->ffh),
> >> 				argp->flen, argp->fname,
> >> diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
> >> index 240cdb0e..78b555b 100644
> >> --- a/fs/nfsd/nfs3xdr.c
> >> +++ b/fs/nfsd/nfs3xdr.c
> >> @@ -452,51 +452,24 @@ void fill_post_wcc(struct svc_fh *fhp)
> >> nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p)
> >> {
> >> 	struct nfsd3_symlinkargs *args = rqstp->rq_argp;
> >> -	unsigned int len, avail;
> >> -	char *old, *new;
> >> -	struct kvec *vec;
> >> +	char *base = (char *)p;
> >> +	size_t dlen;
> >> 
> >> 	if (!(p = decode_fh(p, &args->ffh)) ||
> >> -	    !(p = decode_filename(p, &args->fname, &args->flen))
> >> -		)
> >> +	    !(p = decode_filename(p, &args->fname, &args->flen)))
> >> 		return 0;
> >> 	p = decode_sattr3(p, &args->attrs);
> >> 
> >> -	/* now decode the pathname, which might be larger than the first page.
> >> -	 * As we have to check for nul's anyway, we copy it into a new page
> >> -	 * This page appears in the rq_res.pages list, but as pages_len is always
> >> -	 * 0, it won't get in the way
> >> -	 */
> >> -	len = ntohl(*p++);
> >> -	if (len == 0 || len > NFS3_MAXPATHLEN || len >= PAGE_SIZE)
> >> -		return 0;
> >> -	args->tname = new = page_address(*(rqstp->rq_next_page++));
> >> -	args->tlen = len;
> >> -	/* first copy and check from the first page */
> >> -	old = (char*)p;
> >> -	vec = &rqstp->rq_arg.head[0];
> >> -	if ((void *)old > vec->iov_base + vec->iov_len)
> >> -		return 0;
> >> -	avail = vec->iov_len - (old - (char*)vec->iov_base);
> >> -	while (len && avail && *old) {
> >> -		*new++ = *old++;
> >> -		len--;
> >> -		avail--;
> >> -	}
> >> -	/* now copy next page if there is one */
> >> -	if (len && !avail && rqstp->rq_arg.page_len) {
> >> -		avail = min_t(unsigned int, rqstp->rq_arg.page_len, PAGE_SIZE);
> >> -		old = page_address(rqstp->rq_arg.pages[0]);
> >> -	}
> >> -	while (len && avail && *old) {
> >> -		*new++ = *old++;
> >> -		len--;
> >> -		avail--;
> >> -	}
> >> -	*new = '\0';
> >> -	if (len)
> >> -		return 0;
> >> +	args->tlen = ntohl(*p++);
> >> 
> >> +	args->first.iov_base = p;
> >> +	args->first.iov_len = rqstp->rq_arg.head[0].iov_len;
> >> +	args->first.iov_len -= (char *)p - base;
> >> +
> >> +	dlen = args->first.iov_len + rqstp->rq_arg.page_len +
> >> +	       rqstp->rq_arg.tail[0].iov_len;
> >> +	if (dlen < XDR_QUADLEN(args->tlen) << 2)
> >> +		return 0;
> >> 	return 1;
> >> }
> >> 
> >> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> >> index 5029b96..36bd1f7 100644
> >> --- a/fs/nfsd/nfs4proc.c
> >> +++ b/fs/nfsd/nfs4proc.c
> >> @@ -605,6 +605,13 @@ static void gen_boot_verifier(nfs4_verifier *verifier, struct net *net)
> >> 
> >> 	switch (create->cr_type) {
> >> 	case NF4LNK:
> >> +		if (create->cr_datalen > NFS4_MAXPATHLEN)
> >> +			return nfserr_nametoolong;
> >> +		create->cr_data =
> >> +			svc_fill_symlink_pathname(rqstp, &create->cr_first,
> >> +						  create->cr_datalen);
> >> +		if (IS_ERR(create->cr_data))
> >> +			return nfserrno(PTR_ERR(create->cr_data));
> >> 		status = nfsd_symlink(rqstp, &cstate->current_fh,
> >> 				      create->cr_name, create->cr_namelen,
> >> 				      create->cr_data, &resfh);
> >> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> >> index bd25230..d05384e 100644
> >> --- a/fs/nfsd/nfs4xdr.c
> >> +++ b/fs/nfsd/nfs4xdr.c
> >> @@ -648,6 +648,7 @@ static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp,
> >> static __be32
> >> nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create)
> >> {
> >> +	struct kvec *head;
> >> 	DECODE_HEAD;
> >> 
> >> 	READ_BUF(4);
> >> @@ -656,10 +657,13 @@ static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp,
> >> 	case NF4LNK:
> >> 		READ_BUF(4);
> >> 		create->cr_datalen = be32_to_cpup(p++);
> >> +		if (create->cr_datalen == 0)
> >> +			return nfserr_inval;
> >> +		head = argp->rqstp->rq_arg.head;
> >> +		create->cr_first.iov_base = p;
> >> +		create->cr_first.iov_len = head->iov_len;
> >> +		create->cr_first.iov_len -= (char *)p - (char *)head->iov_base;
> >> 		READ_BUF(create->cr_datalen);
> >> -		create->cr_data = svcxdr_dupstr(argp, p, create->cr_datalen);
> >> -		if (!create->cr_data)
> >> -			return nfserr_jukebox;
> >> 		break;
> >> 	case NF4BLK:
> >> 	case NF4CHR:
> >> diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
> >> index 1995ea6..f107f9f 100644
> >> --- a/fs/nfsd/nfsproc.c
> >> +++ b/fs/nfsd/nfsproc.c
> >> @@ -449,17 +449,19 @@
> >> 	struct svc_fh	newfh;
> >> 	__be32		nfserr;
> >> 
> >> +	if (argp->tlen > NFS_MAXPATHLEN)
> >> +		return nfserr_nametoolong;
> >> +
> >> +	argp->tname = svc_fill_symlink_pathname(rqstp, &argp->first,
> >> +						argp->tlen);
> >> +	if (IS_ERR(argp->tname))
> >> +		return nfserrno(PTR_ERR(argp->tname));
> >> +
> >> 	dprintk("nfsd: SYMLINK  %s %.*s -> %.*s\n",
> >> 		SVCFH_fmt(&argp->ffh), argp->flen, argp->fname,
> >> 		argp->tlen, argp->tname);
> >> 
> >> 	fh_init(&newfh, NFS_FHSIZE);
> >> -	/*
> >> -	 * Crazy hack: the request fits in a page, and already-decoded
> >> -	 * attributes follow argp->tname, so it's safe to just write a
> >> -	 * null to ensure it's null-terminated:
> >> -	 */
> >> -	argp->tname[argp->tlen] = '\0';
> >> 	nfserr = nfsd_symlink(rqstp, &argp->ffh, argp->fname, argp->flen,
> >> 						 argp->tname, &newfh);
> >> 
> >> diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
> >> index 165e25e..8fcd047 100644
> >> --- a/fs/nfsd/nfsxdr.c
> >> +++ b/fs/nfsd/nfsxdr.c
> >> @@ -71,22 +71,6 @@ __be32 *nfs2svc_decode_fh(__be32 *p, struct svc_fh *fhp)
> >> }
> >> 
> >> static __be32 *
> >> -decode_pathname(__be32 *p, char **namp, unsigned int *lenp)
> >> -{
> >> -	char		*name;
> >> -	unsigned int	i;
> >> -
> >> -	if ((p = xdr_decode_string_inplace(p, namp, lenp, NFS_MAXPATHLEN)) != NULL) {
> >> -		for (i = 0, name = *namp; i < *lenp; i++, name++) {
> >> -			if (*name == '\0')
> >> -				return NULL;
> >> -		}
> >> -	}
> >> -
> >> -	return p;
> >> -}
> >> -
> >> -static __be32 *
> >> decode_sattr(__be32 *p, struct iattr *iap)
> >> {
> >> 	u32	tmp, tmp1;
> >> @@ -383,14 +367,39 @@ __be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *f
> >> nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p)
> >> {
> >> 	struct nfsd_symlinkargs *args = rqstp->rq_argp;
> >> +	char *base = (char *)p;
> >> +	size_t xdrlen;
> >> 
> >> 	if (   !(p = decode_fh(p, &args->ffh))
> >> -	    || !(p = decode_filename(p, &args->fname, &args->flen))
> >> -	    || !(p = decode_pathname(p, &args->tname, &args->tlen)))
> >> +	    || !(p = decode_filename(p, &args->fname, &args->flen)))
> >> 		return 0;
> >> -	p = decode_sattr(p, &args->attrs);
> >> 
> >> -	return xdr_argsize_check(rqstp, p);
> >> +	args->tlen = ntohl(*p++);
> >> +	if (args->tlen == 0)
> >> +		return 0;
> >> +
> >> +	args->first.iov_base = p;
> >> +	args->first.iov_len = rqstp->rq_arg.head[0].iov_len;
> >> +	args->first.iov_len -= (char *)p - base;
> >> +
> >> +	/* This request is never larger than a page. Therefore,
> >> +	 * transport will deliver either:
> >> +	 * 1. pathname in the pagelist -> sattr is in the tail.
> >> +	 * 2. everything in the head buffer -> sattr is in the head.
> >> +	 */
> >> +	if (rqstp->rq_arg.page_len) {
> >> +		if (args->tlen != rqstp->rq_arg.page_len)
> >> +			return 0;
> >> +		p = rqstp->rq_arg.tail[0].iov_base;
> >> +	} else {
> >> +		xdrlen = XDR_QUADLEN(args->tlen);
> >> +		if (xdrlen > args->first.iov_len - (8 * sizeof(__be32)))
> >> +			return 0;
> >> +		p += xdrlen;
> >> +	}
> >> +	decode_sattr(p, &args->attrs);
> >> +
> >> +	return 1;
> >> }
> >> 
> >> int
> >> diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h
> >> index a765c41..ea7cca3 100644
> >> --- a/fs/nfsd/xdr.h
> >> +++ b/fs/nfsd/xdr.h
> >> @@ -72,6 +72,7 @@ struct nfsd_symlinkargs {
> >> 	char *			tname;
> >> 	unsigned int		tlen;
> >> 	struct iattr		attrs;
> >> +	struct kvec		first;
> >> };
> >> 
> >> struct nfsd_readdirargs {
> >> diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h
> >> index deccf7f..2cb29e9 100644
> >> --- a/fs/nfsd/xdr3.h
> >> +++ b/fs/nfsd/xdr3.h
> >> @@ -90,6 +90,7 @@ struct nfsd3_symlinkargs {
> >> 	char *			tname;
> >> 	unsigned int		tlen;
> >> 	struct iattr		attrs;
> >> +	struct kvec		first;
> >> };
> >> 
> >> struct nfsd3_readdirargs {
> >> diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
> >> index d56219d..b485cd1 100644
> >> --- a/fs/nfsd/xdr4.h
> >> +++ b/fs/nfsd/xdr4.h
> >> @@ -110,6 +110,7 @@ struct nfsd4_create {
> >> 		struct {
> >> 			u32 datalen;
> >> 			char *data;
> >> +			struct kvec first;
> >> 		} link;   /* NF4LNK */
> >> 		struct {
> >> 			u32 specdata1;
> >> @@ -124,6 +125,7 @@ struct nfsd4_create {
> >> };
> >> #define cr_datalen	u.link.datalen
> >> #define cr_data		u.link.data
> >> +#define cr_first	u.link.first
> >> #define cr_specdata1	u.dev.specdata1
> >> #define cr_specdata2	u.dev.specdata2
> >> 
> >> diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
> >> index 238b9ae..fd5846e 100644
> >> --- a/include/linux/sunrpc/svc.h
> >> +++ b/include/linux/sunrpc/svc.h
> >> @@ -495,6 +495,8 @@ int		   svc_register(const struct svc_serv *, struct net *, const int,
> >> char *		   svc_print_addr(struct svc_rqst *, char *, size_t);
> >> unsigned int	   svc_fill_write_vector(struct svc_rqst *rqstp,
> >> 					 struct kvec *first, size_t total);
> >> +char		  *svc_fill_symlink_pathname(struct svc_rqst *rqstp,
> >> +					     struct kvec *first, size_t total);
> >> 
> >> #define	RPC_MAX_ADDRBUFLEN	(63U)
> >> 
> >> diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
> >> index 759b668..fc93406 100644
> >> --- a/net/sunrpc/svc.c
> >> +++ b/net/sunrpc/svc.c
> >> @@ -1578,3 +1578,70 @@ unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, struct kvec *first,
> >> 	return i;
> >> }
> >> EXPORT_SYMBOL_GPL(svc_fill_write_vector);
> >> +
> >> +/**
> >> + * svc_fill_symlink_pathname - Construct pathname argument for VFS symlink call
> >> + * @rqstp: svc_rqst to operate on
> >> + * @first: buffer containing first section of pathname
> >> + * @total: total length of the pathname argument
> >> + *
> >> + * Returns pointer to a NUL-terminated string, or an ERR_PTR. The buffer is
> >> + * released automatically when @rqstp is recycled.
> >> + */
> >> +char *svc_fill_symlink_pathname(struct svc_rqst *rqstp, struct kvec *first,
> >> +				size_t total)
> >> +{
> >> +	struct xdr_buf *arg = &rqstp->rq_arg;
> >> +	struct page **pages;
> >> +	char *result;
> >> +
> >> +	/* VFS API demands a NUL-terminated pathname. This function
> >> +	 * uses a page from @rqstp as the pathname buffer, to enable
> >> +	 * direct placement. Thus the total buffer size is PAGE_SIZE.
> >> +	 * Space in this buffer for NUL-termination requires that we
> >> +	 * cap the size of the returned symlink pathname just a
> >> +	 * little early.
> >> +	 */
> >> +	if (total > PAGE_SIZE - 1)
> >> +		return ERR_PTR(-ENAMETOOLONG);
> >> +
> >> +	/* Some types of transport can present the pathname entirely
> >> +	 * in rq_arg.pages. If not, then copy the pathname into one
> >> +	 * page.
> >> +	 */
> >> +	pages = arg->pages;
> >> +	WARN_ON_ONCE(arg->page_base != 0);
> >> +	if (first->iov_base == 0) {
> >> +		result = page_address(*pages);
> >> +		result[total] = '\0';
> >> +	} else {
> >> +		size_t len, remaining;
> >> +		char *dst;
> >> +
> >> +		result = page_address(*(rqstp->rq_next_page++));
> >> +		dst = result;
> >> +		remaining = total;
> >> +
> >> +		len = min_t(size_t, total, first->iov_len);
> >> +		memcpy(dst, first->iov_base, len);
> >> +		dst += len;
> >> +		remaining -= len;
> >> +
> >> +		/* No more than one page left */
> >> +		if (remaining) {
> >> +			len = min_t(size_t, remaining, PAGE_SIZE);
> >> +			memcpy(dst, page_address(*pages), len);
> >> +			dst += len;
> >> +		}
> >> +
> >> +		*dst = '\0';
> >> +	}
> >> +
> >> +	/* Sanity check: we don't allow the pathname argument to
> >> +	 * contain a NUL byte.
> >> +	 */
> >> +	if (strlen(result) != total)
> >> +		return ERR_PTR(-EINVAL);
> >> +	return result;
> >> +}
> >> +EXPORT_SYMBOL_GPL(svc_fill_symlink_pathname);
> 
> --
> Chuck Lever
> 
> 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2 3/3] NFSD: Clean up symlink argument XDR decoders
  2018-01-23 20:52       ` Bruce Fields
@ 2018-01-23 22:30         ` Chuck Lever
  2018-01-25 16:33           ` Bruce Fields
  0 siblings, 1 reply; 13+ messages in thread
From: Chuck Lever @ 2018-01-23 22:30 UTC (permalink / raw)
  To: Bruce Fields; +Cc: linux-rdma, Linux NFS Mailing List



> On Jan 23, 2018, at 12:52 PM, Bruce Fields <bfields@fieldses.org> =
wrote:
>=20
> On Mon, Jan 22, 2018 at 05:09:35PM -0800, Chuck Lever wrote:
>>=20
>>=20
>>> On Jan 22, 2018, at 2:00 PM, J. Bruce Fields <bfields@fieldses.org> =
wrote:
>>>=20
>>> On Wed, Jan 03, 2018 at 03:42:35PM -0500, Chuck Lever wrote:
>>>> Move common code in NFSD's symlink arg decoders into a helper. The
>>>> immediate benefits include:
>>>>=20
>>>> - one fewer data copies on transports that support DDP
>>>> - no memory allocation in the NFSv4 XDR decoder
>>>> - consistent error checking across all versions
>>>> - reduction of code duplication
>>>> - support for both legal forms of SYMLINK requests on RDMA
>>>>  transports for all versions of NFS (in particular, NFSv2, for
>>>>  completeness)
>>>>=20
>>>> In the long term, this helper is an appropriate spot to perform a
>>>> per-transport call-out to fill the pathname argument using, say,
>>>> RDMA Reads.
>>>>=20
>>>> Filling the pathname in the proc function also means that =
eventually
>>>> the incoming filehandle can be interpreted so that filesystem-
>>>> specific memory can be allocated as a sink for the pathname
>>>> argument, rather than using anonymous pages.
>>>>=20
>>>> Wondering why the current code punts a zero-length SYMLINK. Is it
>>>> illegal to create a zero-length SYMLINK on Linux?
>>>=20
>>> SYMLINK(2) says
>>>=20
>>> 	ENOENT A directory component in linkpath does not exist or is a
>>> 	dangling symbolic link, or target or linkpath is an empty
>>> 	string.
>>>=20
>>> That doesn't explain the INVAL, or why this is the right place to be
>>> checking it.
>>=20
>> RFC 1813:
>>=20
>> NFS3ERR_IO
>>      NFS3ERR_ACCES
>>      NFS3ERR_EXIST
>>      NFS3ERR_NOTDIR
>>      NFS3ERR_NOSPC
>>      NFS3ERR_ROFS
>>      NFS3ERR_NAMETOOLONG
>>      NFS3ERR_DQUOT
>>      NFS3ERR_STALE
>>      NFS3ERR_BADHANDLE
>>      NFS3ERR_NOTSUPP
>>      NFS3ERR_SERVERFAULT
>>=20
>> Interestingly, neither INVAL nor NOENT are valid
>> status codes for NFSv3 SYMLINK. NFS3ERR_NOTSUPP
>> might be closest, I suppose.
>>=20
>> RFC 5661 says explicitly:
>>=20
>> If the objname has a length of zero, or if objname does not obey the
>> UTF-8 definition, the error NFS4ERR_INVAL will be returned.
>>=20
>> And lists these as valid status codes for CREATE(NF4LNK):
>>=20
>> | NFS4ERR_ACCESS, NFS4ERR_ATTRNOTSUPP,       |
>> | NFS4ERR_BADCHAR, NFS4ERR_BADNAME,          |
>> | NFS4ERR_BADOWNER, NFS4ERR_BADTYPE,         |
>> | NFS4ERR_BADXDR, NFS4ERR_DEADSESSION,       |
>> | NFS4ERR_DELAY, NFS4ERR_DQUOT,              |
>> | NFS4ERR_EXIST, NFS4ERR_FHEXPIRED,          |
>> | NFS4ERR_INVAL, NFS4ERR_IO, NFS4ERR_MLINK,  |
>> | NFS4ERR_MOVED, NFS4ERR_NAMETOOLONG,        |
>> | NFS4ERR_NOFILEHANDLE, NFS4ERR_NOSPC,       |
>> | NFS4ERR_NOTDIR, NFS4ERR_OP_NOT_IN_SESSION, |
>> | NFS4ERR_PERM, NFS4ERR_REP_TOO_BIG,         |
>> | NFS4ERR_REP_TOO_BIG_TO_CACHE,              |
>> | NFS4ERR_REQ_TOO_BIG,                       |
>> | NFS4ERR_RETRY_UNCACHED_REP, NFS4ERR_ROFS,  |
>> | NFS4ERR_SERVERFAULT, NFS4ERR_STALE,        |
>> | NFS4ERR_TOO_MANY_OPS,                      |
>> | NFS4ERR_UNSAFE_COMPOUND                    |
>>=20
>>=20
>>> I'm a little nervous about the NULL termination in
>>> svc_fill_symlink_pathname; how do we know it's safe to write a zero
>>> there?  I haven't checked it carefully yet.
>>=20
>> svc_fill_symlink_pathname grabs a whole fresh page
>> from @rqstp. It is safe to write bytes anywhere in
>> that page.
>=20
> How do we know it's safe to grab another page?

It's safe in the v3 case because that's what the v3
decoder currently does. I think you are concerned
about the NFSv4 COMPOUND case on TCP, for arbitrarily
large compounds? Given that we have 257-ish pages
in the rqstp->rq_pages array (ie, that number is
bounded) I'm not sure how we can ever be sure there
are enough pages for all combinations of NFS
operations.


> (Could we run out of
> pages?  I'd probably know the answer if I'd rewritten this code
> recently, but I haven't and the details are swapped out....)
>=20
> Also I don't think that's true in the first->iov_base =3D=3D 0 case.

This case occurs when an RDMA transport has filled
in the first page of arg->pages with the symlink
contents. It's also a whole fresh page that was
obtained from rqstp->rq_pages.

Here, are you concerned about the client sending a
COMPOUND over RPC/RDMA that has more than one SYMLINK
operation? Our server currently supports only one Read
chunk per RPC.


> --b.
>=20
>>=20
>>=20
>>> --g.
>>>=20
>>>>=20
>>>> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
>>>> ---
>>>> fs/nfsd/nfs3proc.c         |   10 +++++++
>>>> fs/nfsd/nfs3xdr.c          |   51 ++++++++-------------------------
>>>> fs/nfsd/nfs4proc.c         |    7 +++++
>>>> fs/nfsd/nfs4xdr.c          |   10 +++++--
>>>> fs/nfsd/nfsproc.c          |   14 +++++----
>>>> fs/nfsd/nfsxdr.c           |   49 +++++++++++++++++++-------------
>>>> fs/nfsd/xdr.h              |    1 +
>>>> fs/nfsd/xdr3.h             |    1 +
>>>> fs/nfsd/xdr4.h             |    2 +
>>>> include/linux/sunrpc/svc.h |    2 +
>>>> net/sunrpc/svc.c           |   67 =
++++++++++++++++++++++++++++++++++++++++++++
>>>> 11 files changed, 146 insertions(+), 68 deletions(-)
>>>>=20
>>>> diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
>>>> index 2dd95eb..6259a4b 100644
>>>> --- a/fs/nfsd/nfs3proc.c
>>>> +++ b/fs/nfsd/nfs3proc.c
>>>> @@ -283,6 +283,16 @@
>>>> 	struct nfsd3_diropres *resp =3D rqstp->rq_resp;
>>>> 	__be32	nfserr;
>>>>=20
>>>> +	if (argp->tlen =3D=3D 0)
>>>> +		RETURN_STATUS(nfserr_inval);
>>>> +	if (argp->tlen > NFS3_MAXPATHLEN)
>>>> +		RETURN_STATUS(nfserr_nametoolong);
>>>> +
>>>> +	argp->tname =3D svc_fill_symlink_pathname(rqstp, &argp->first,
>>>> +						argp->tlen);
>>>> +	if (IS_ERR(argp->tname))
>>>> +		RETURN_STATUS(nfserrno(PTR_ERR(argp->tname)));
>>>> +
>>>> 	dprintk("nfsd: SYMLINK(3)  %s %.*s -> %.*s\n",
>>>> 				SVCFH_fmt(&argp->ffh),
>>>> 				argp->flen, argp->fname,
>>>> diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
>>>> index 240cdb0e..78b555b 100644
>>>> --- a/fs/nfsd/nfs3xdr.c
>>>> +++ b/fs/nfsd/nfs3xdr.c
>>>> @@ -452,51 +452,24 @@ void fill_post_wcc(struct svc_fh *fhp)
>>>> nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p)
>>>> {
>>>> 	struct nfsd3_symlinkargs *args =3D rqstp->rq_argp;
>>>> -	unsigned int len, avail;
>>>> -	char *old, *new;
>>>> -	struct kvec *vec;
>>>> +	char *base =3D (char *)p;
>>>> +	size_t dlen;
>>>>=20
>>>> 	if (!(p =3D decode_fh(p, &args->ffh)) ||
>>>> -	    !(p =3D decode_filename(p, &args->fname, &args->flen))
>>>> -		)
>>>> +	    !(p =3D decode_filename(p, &args->fname, &args->flen)))
>>>> 		return 0;
>>>> 	p =3D decode_sattr3(p, &args->attrs);
>>>>=20
>>>> -	/* now decode the pathname, which might be larger than the first =
page.
>>>> -	 * As we have to check for nul's anyway, we copy it into a new =
page
>>>> -	 * This page appears in the rq_res.pages list, but as pages_len =
is always
>>>> -	 * 0, it won't get in the way
>>>> -	 */
>>>> -	len =3D ntohl(*p++);
>>>> -	if (len =3D=3D 0 || len > NFS3_MAXPATHLEN || len >=3D PAGE_SIZE)
>>>> -		return 0;
>>>> -	args->tname =3D new =3D page_address(*(rqstp->rq_next_page++));
>>>> -	args->tlen =3D len;
>>>> -	/* first copy and check from the first page */
>>>> -	old =3D (char*)p;
>>>> -	vec =3D &rqstp->rq_arg.head[0];
>>>> -	if ((void *)old > vec->iov_base + vec->iov_len)
>>>> -		return 0;
>>>> -	avail =3D vec->iov_len - (old - (char*)vec->iov_base);
>>>> -	while (len && avail && *old) {
>>>> -		*new++ =3D *old++;
>>>> -		len--;
>>>> -		avail--;
>>>> -	}
>>>> -	/* now copy next page if there is one */
>>>> -	if (len && !avail && rqstp->rq_arg.page_len) {
>>>> -		avail =3D min_t(unsigned int, rqstp->rq_arg.page_len, =
PAGE_SIZE);
>>>> -		old =3D page_address(rqstp->rq_arg.pages[0]);
>>>> -	}
>>>> -	while (len && avail && *old) {
>>>> -		*new++ =3D *old++;
>>>> -		len--;
>>>> -		avail--;
>>>> -	}
>>>> -	*new =3D '\0';
>>>> -	if (len)
>>>> -		return 0;
>>>> +	args->tlen =3D ntohl(*p++);
>>>>=20
>>>> +	args->first.iov_base =3D p;
>>>> +	args->first.iov_len =3D rqstp->rq_arg.head[0].iov_len;
>>>> +	args->first.iov_len -=3D (char *)p - base;
>>>> +
>>>> +	dlen =3D args->first.iov_len + rqstp->rq_arg.page_len +
>>>> +	       rqstp->rq_arg.tail[0].iov_len;
>>>> +	if (dlen < XDR_QUADLEN(args->tlen) << 2)
>>>> +		return 0;
>>>> 	return 1;
>>>> }
>>>>=20
>>>> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
>>>> index 5029b96..36bd1f7 100644
>>>> --- a/fs/nfsd/nfs4proc.c
>>>> +++ b/fs/nfsd/nfs4proc.c
>>>> @@ -605,6 +605,13 @@ static void gen_boot_verifier(nfs4_verifier =
*verifier, struct net *net)
>>>>=20
>>>> 	switch (create->cr_type) {
>>>> 	case NF4LNK:
>>>> +		if (create->cr_datalen > NFS4_MAXPATHLEN)
>>>> +			return nfserr_nametoolong;
>>>> +		create->cr_data =3D
>>>> +			svc_fill_symlink_pathname(rqstp, =
&create->cr_first,
>>>> +						  create->cr_datalen);
>>>> +		if (IS_ERR(create->cr_data))
>>>> +			return nfserrno(PTR_ERR(create->cr_data));
>>>> 		status =3D nfsd_symlink(rqstp, &cstate->current_fh,
>>>> 				      create->cr_name, =
create->cr_namelen,
>>>> 				      create->cr_data, &resfh);
>>>> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
>>>> index bd25230..d05384e 100644
>>>> --- a/fs/nfsd/nfs4xdr.c
>>>> +++ b/fs/nfsd/nfs4xdr.c
>>>> @@ -648,6 +648,7 @@ static __be32 =
nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp,
>>>> static __be32
>>>> nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct =
nfsd4_create *create)
>>>> {
>>>> +	struct kvec *head;
>>>> 	DECODE_HEAD;
>>>>=20
>>>> 	READ_BUF(4);
>>>> @@ -656,10 +657,13 @@ static __be32 =
nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp,
>>>> 	case NF4LNK:
>>>> 		READ_BUF(4);
>>>> 		create->cr_datalen =3D be32_to_cpup(p++);
>>>> +		if (create->cr_datalen =3D=3D 0)
>>>> +			return nfserr_inval;
>>>> +		head =3D argp->rqstp->rq_arg.head;
>>>> +		create->cr_first.iov_base =3D p;
>>>> +		create->cr_first.iov_len =3D head->iov_len;
>>>> +		create->cr_first.iov_len -=3D (char *)p - (char =
*)head->iov_base;
>>>> 		READ_BUF(create->cr_datalen);
>>>> -		create->cr_data =3D svcxdr_dupstr(argp, p, =
create->cr_datalen);
>>>> -		if (!create->cr_data)
>>>> -			return nfserr_jukebox;
>>>> 		break;
>>>> 	case NF4BLK:
>>>> 	case NF4CHR:
>>>> diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
>>>> index 1995ea6..f107f9f 100644
>>>> --- a/fs/nfsd/nfsproc.c
>>>> +++ b/fs/nfsd/nfsproc.c
>>>> @@ -449,17 +449,19 @@
>>>> 	struct svc_fh	newfh;
>>>> 	__be32		nfserr;
>>>>=20
>>>> +	if (argp->tlen > NFS_MAXPATHLEN)
>>>> +		return nfserr_nametoolong;
>>>> +
>>>> +	argp->tname =3D svc_fill_symlink_pathname(rqstp, &argp->first,
>>>> +						argp->tlen);
>>>> +	if (IS_ERR(argp->tname))
>>>> +		return nfserrno(PTR_ERR(argp->tname));
>>>> +
>>>> 	dprintk("nfsd: SYMLINK  %s %.*s -> %.*s\n",
>>>> 		SVCFH_fmt(&argp->ffh), argp->flen, argp->fname,
>>>> 		argp->tlen, argp->tname);
>>>>=20
>>>> 	fh_init(&newfh, NFS_FHSIZE);
>>>> -	/*
>>>> -	 * Crazy hack: the request fits in a page, and already-decoded
>>>> -	 * attributes follow argp->tname, so it's safe to just write a
>>>> -	 * null to ensure it's null-terminated:
>>>> -	 */
>>>> -	argp->tname[argp->tlen] =3D '\0';
>>>> 	nfserr =3D nfsd_symlink(rqstp, &argp->ffh, argp->fname, =
argp->flen,
>>>> 						 argp->tname, &newfh);
>>>>=20
>>>> diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
>>>> index 165e25e..8fcd047 100644
>>>> --- a/fs/nfsd/nfsxdr.c
>>>> +++ b/fs/nfsd/nfsxdr.c
>>>> @@ -71,22 +71,6 @@ __be32 *nfs2svc_decode_fh(__be32 *p, struct =
svc_fh *fhp)
>>>> }
>>>>=20
>>>> static __be32 *
>>>> -decode_pathname(__be32 *p, char **namp, unsigned int *lenp)
>>>> -{
>>>> -	char		*name;
>>>> -	unsigned int	i;
>>>> -
>>>> -	if ((p =3D xdr_decode_string_inplace(p, namp, lenp, =
NFS_MAXPATHLEN)) !=3D NULL) {
>>>> -		for (i =3D 0, name =3D *namp; i < *lenp; i++, name++) {
>>>> -			if (*name =3D=3D '\0')
>>>> -				return NULL;
>>>> -		}
>>>> -	}
>>>> -
>>>> -	return p;
>>>> -}
>>>> -
>>>> -static __be32 *
>>>> decode_sattr(__be32 *p, struct iattr *iap)
>>>> {
>>>> 	u32	tmp, tmp1;
>>>> @@ -383,14 +367,39 @@ __be32 *nfs2svc_encode_fattr(struct svc_rqst =
*rqstp, __be32 *p, struct svc_fh *f
>>>> nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p)
>>>> {
>>>> 	struct nfsd_symlinkargs *args =3D rqstp->rq_argp;
>>>> +	char *base =3D (char *)p;
>>>> +	size_t xdrlen;
>>>>=20
>>>> 	if (   !(p =3D decode_fh(p, &args->ffh))
>>>> -	    || !(p =3D decode_filename(p, &args->fname, &args->flen))
>>>> -	    || !(p =3D decode_pathname(p, &args->tname, &args->tlen)))
>>>> +	    || !(p =3D decode_filename(p, &args->fname, &args->flen)))
>>>> 		return 0;
>>>> -	p =3D decode_sattr(p, &args->attrs);
>>>>=20
>>>> -	return xdr_argsize_check(rqstp, p);
>>>> +	args->tlen =3D ntohl(*p++);
>>>> +	if (args->tlen =3D=3D 0)
>>>> +		return 0;
>>>> +
>>>> +	args->first.iov_base =3D p;
>>>> +	args->first.iov_len =3D rqstp->rq_arg.head[0].iov_len;
>>>> +	args->first.iov_len -=3D (char *)p - base;
>>>> +
>>>> +	/* This request is never larger than a page. Therefore,
>>>> +	 * transport will deliver either:
>>>> +	 * 1. pathname in the pagelist -> sattr is in the tail.
>>>> +	 * 2. everything in the head buffer -> sattr is in the head.
>>>> +	 */
>>>> +	if (rqstp->rq_arg.page_len) {
>>>> +		if (args->tlen !=3D rqstp->rq_arg.page_len)
>>>> +			return 0;
>>>> +		p =3D rqstp->rq_arg.tail[0].iov_base;
>>>> +	} else {
>>>> +		xdrlen =3D XDR_QUADLEN(args->tlen);
>>>> +		if (xdrlen > args->first.iov_len - (8 * sizeof(__be32)))
>>>> +			return 0;
>>>> +		p +=3D xdrlen;
>>>> +	}
>>>> +	decode_sattr(p, &args->attrs);
>>>> +
>>>> +	return 1;
>>>> }
>>>>=20
>>>> int
>>>> diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h
>>>> index a765c41..ea7cca3 100644
>>>> --- a/fs/nfsd/xdr.h
>>>> +++ b/fs/nfsd/xdr.h
>>>> @@ -72,6 +72,7 @@ struct nfsd_symlinkargs {
>>>> 	char *			tname;
>>>> 	unsigned int		tlen;
>>>> 	struct iattr		attrs;
>>>> +	struct kvec		first;
>>>> };
>>>>=20
>>>> struct nfsd_readdirargs {
>>>> diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h
>>>> index deccf7f..2cb29e9 100644
>>>> --- a/fs/nfsd/xdr3.h
>>>> +++ b/fs/nfsd/xdr3.h
>>>> @@ -90,6 +90,7 @@ struct nfsd3_symlinkargs {
>>>> 	char *			tname;
>>>> 	unsigned int		tlen;
>>>> 	struct iattr		attrs;
>>>> +	struct kvec		first;
>>>> };
>>>>=20
>>>> struct nfsd3_readdirargs {
>>>> diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
>>>> index d56219d..b485cd1 100644
>>>> --- a/fs/nfsd/xdr4.h
>>>> +++ b/fs/nfsd/xdr4.h
>>>> @@ -110,6 +110,7 @@ struct nfsd4_create {
>>>> 		struct {
>>>> 			u32 datalen;
>>>> 			char *data;
>>>> +			struct kvec first;
>>>> 		} link;   /* NF4LNK */
>>>> 		struct {
>>>> 			u32 specdata1;
>>>> @@ -124,6 +125,7 @@ struct nfsd4_create {
>>>> };
>>>> #define cr_datalen	u.link.datalen
>>>> #define cr_data		u.link.data
>>>> +#define cr_first	u.link.first
>>>> #define cr_specdata1	u.dev.specdata1
>>>> #define cr_specdata2	u.dev.specdata2
>>>>=20
>>>> diff --git a/include/linux/sunrpc/svc.h =
b/include/linux/sunrpc/svc.h
>>>> index 238b9ae..fd5846e 100644
>>>> --- a/include/linux/sunrpc/svc.h
>>>> +++ b/include/linux/sunrpc/svc.h
>>>> @@ -495,6 +495,8 @@ int		   svc_register(const struct =
svc_serv *, struct net *, const int,
>>>> char *		   svc_print_addr(struct svc_rqst *, char *, =
size_t);
>>>> unsigned int	   svc_fill_write_vector(struct svc_rqst *rqstp,
>>>> 					 struct kvec *first, size_t =
total);
>>>> +char		  *svc_fill_symlink_pathname(struct svc_rqst =
*rqstp,
>>>> +					     struct kvec *first, size_t =
total);
>>>>=20
>>>> #define	RPC_MAX_ADDRBUFLEN	(63U)
>>>>=20
>>>> diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
>>>> index 759b668..fc93406 100644
>>>> --- a/net/sunrpc/svc.c
>>>> +++ b/net/sunrpc/svc.c
>>>> @@ -1578,3 +1578,70 @@ unsigned int svc_fill_write_vector(struct =
svc_rqst *rqstp, struct kvec *first,
>>>> 	return i;
>>>> }
>>>> EXPORT_SYMBOL_GPL(svc_fill_write_vector);
>>>> +
>>>> +/**
>>>> + * svc_fill_symlink_pathname - Construct pathname argument for VFS =
symlink call
>>>> + * @rqstp: svc_rqst to operate on
>>>> + * @first: buffer containing first section of pathname
>>>> + * @total: total length of the pathname argument
>>>> + *
>>>> + * Returns pointer to a NUL-terminated string, or an ERR_PTR. The =
buffer is
>>>> + * released automatically when @rqstp is recycled.
>>>> + */
>>>> +char *svc_fill_symlink_pathname(struct svc_rqst *rqstp, struct =
kvec *first,
>>>> +				size_t total)
>>>> +{
>>>> +	struct xdr_buf *arg =3D &rqstp->rq_arg;
>>>> +	struct page **pages;
>>>> +	char *result;
>>>> +
>>>> +	/* VFS API demands a NUL-terminated pathname. This function
>>>> +	 * uses a page from @rqstp as the pathname buffer, to enable
>>>> +	 * direct placement. Thus the total buffer size is PAGE_SIZE.
>>>> +	 * Space in this buffer for NUL-termination requires that we
>>>> +	 * cap the size of the returned symlink pathname just a
>>>> +	 * little early.
>>>> +	 */
>>>> +	if (total > PAGE_SIZE - 1)
>>>> +		return ERR_PTR(-ENAMETOOLONG);
>>>> +
>>>> +	/* Some types of transport can present the pathname entirely
>>>> +	 * in rq_arg.pages. If not, then copy the pathname into one
>>>> +	 * page.
>>>> +	 */
>>>> +	pages =3D arg->pages;
>>>> +	WARN_ON_ONCE(arg->page_base !=3D 0);
>>>> +	if (first->iov_base =3D=3D 0) {
>>>> +		result =3D page_address(*pages);
>>>> +		result[total] =3D '\0';
>>>> +	} else {
>>>> +		size_t len, remaining;
>>>> +		char *dst;
>>>> +
>>>> +		result =3D page_address(*(rqstp->rq_next_page++));
>>>> +		dst =3D result;
>>>> +		remaining =3D total;
>>>> +
>>>> +		len =3D min_t(size_t, total, first->iov_len);
>>>> +		memcpy(dst, first->iov_base, len);
>>>> +		dst +=3D len;
>>>> +		remaining -=3D len;
>>>> +
>>>> +		/* No more than one page left */
>>>> +		if (remaining) {
>>>> +			len =3D min_t(size_t, remaining, PAGE_SIZE);
>>>> +			memcpy(dst, page_address(*pages), len);
>>>> +			dst +=3D len;
>>>> +		}
>>>> +
>>>> +		*dst =3D '\0';
>>>> +	}
>>>> +
>>>> +	/* Sanity check: we don't allow the pathname argument to
>>>> +	 * contain a NUL byte.
>>>> +	 */
>>>> +	if (strlen(result) !=3D total)
>>>> +		return ERR_PTR(-EINVAL);
>>>> +	return result;
>>>> +}
>>>> +EXPORT_SYMBOL_GPL(svc_fill_symlink_pathname);
>>=20
>> --
>> Chuck Lever

--
Chuck Lever




^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2 3/3] NFSD: Clean up symlink argument XDR decoders
  2018-01-23 22:30         ` Chuck Lever
@ 2018-01-25 16:33           ` Bruce Fields
  2018-01-25 16:40             ` Chuck Lever
  0 siblings, 1 reply; 13+ messages in thread
From: Bruce Fields @ 2018-01-25 16:33 UTC (permalink / raw)
  To: Chuck Lever; +Cc: linux-rdma, Linux NFS Mailing List

On Tue, Jan 23, 2018 at 02:30:57PM -0800, Chuck Lever wrote:
> 
> 
> > On Jan 23, 2018, at 12:52 PM, Bruce Fields <bfields@fieldses.org> wrote:
> > 
> > On Mon, Jan 22, 2018 at 05:09:35PM -0800, Chuck Lever wrote:
> >> 
> >> 
> >>> On Jan 22, 2018, at 2:00 PM, J. Bruce Fields <bfields@fieldses.org> wrote:
> >>> 
> >>> On Wed, Jan 03, 2018 at 03:42:35PM -0500, Chuck Lever wrote:
> >>>> Move common code in NFSD's symlink arg decoders into a helper. The
> >>>> immediate benefits include:
> >>>> 
> >>>> - one fewer data copies on transports that support DDP
> >>>> - no memory allocation in the NFSv4 XDR decoder
> >>>> - consistent error checking across all versions
> >>>> - reduction of code duplication
> >>>> - support for both legal forms of SYMLINK requests on RDMA
> >>>>  transports for all versions of NFS (in particular, NFSv2, for
> >>>>  completeness)
> >>>> 
> >>>> In the long term, this helper is an appropriate spot to perform a
> >>>> per-transport call-out to fill the pathname argument using, say,
> >>>> RDMA Reads.
> >>>> 
> >>>> Filling the pathname in the proc function also means that eventually
> >>>> the incoming filehandle can be interpreted so that filesystem-
> >>>> specific memory can be allocated as a sink for the pathname
> >>>> argument, rather than using anonymous pages.
> >>>> 
> >>>> Wondering why the current code punts a zero-length SYMLINK. Is it
> >>>> illegal to create a zero-length SYMLINK on Linux?
> >>> 
> >>> SYMLINK(2) says
> >>> 
> >>> 	ENOENT A directory component in linkpath does not exist or is a
> >>> 	dangling symbolic link, or target or linkpath is an empty
> >>> 	string.
> >>> 
> >>> That doesn't explain the INVAL, or why this is the right place to be
> >>> checking it.
> >> 
> >> RFC 1813:
> >> 
> >> NFS3ERR_IO
> >>      NFS3ERR_ACCES
> >>      NFS3ERR_EXIST
> >>      NFS3ERR_NOTDIR
> >>      NFS3ERR_NOSPC
> >>      NFS3ERR_ROFS
> >>      NFS3ERR_NAMETOOLONG
> >>      NFS3ERR_DQUOT
> >>      NFS3ERR_STALE
> >>      NFS3ERR_BADHANDLE
> >>      NFS3ERR_NOTSUPP
> >>      NFS3ERR_SERVERFAULT
> >> 
> >> Interestingly, neither INVAL nor NOENT are valid
> >> status codes for NFSv3 SYMLINK. NFS3ERR_NOTSUPP
> >> might be closest, I suppose.
> >> 
> >> RFC 5661 says explicitly:
> >> 
> >> If the objname has a length of zero, or if objname does not obey the
> >> UTF-8 definition, the error NFS4ERR_INVAL will be returned.
> >> 
> >> And lists these as valid status codes for CREATE(NF4LNK):
> >> 
> >> | NFS4ERR_ACCESS, NFS4ERR_ATTRNOTSUPP,       |
> >> | NFS4ERR_BADCHAR, NFS4ERR_BADNAME,          |
> >> | NFS4ERR_BADOWNER, NFS4ERR_BADTYPE,         |
> >> | NFS4ERR_BADXDR, NFS4ERR_DEADSESSION,       |
> >> | NFS4ERR_DELAY, NFS4ERR_DQUOT,              |
> >> | NFS4ERR_EXIST, NFS4ERR_FHEXPIRED,          |
> >> | NFS4ERR_INVAL, NFS4ERR_IO, NFS4ERR_MLINK,  |
> >> | NFS4ERR_MOVED, NFS4ERR_NAMETOOLONG,        |
> >> | NFS4ERR_NOFILEHANDLE, NFS4ERR_NOSPC,       |
> >> | NFS4ERR_NOTDIR, NFS4ERR_OP_NOT_IN_SESSION, |
> >> | NFS4ERR_PERM, NFS4ERR_REP_TOO_BIG,         |
> >> | NFS4ERR_REP_TOO_BIG_TO_CACHE,              |
> >> | NFS4ERR_REQ_TOO_BIG,                       |
> >> | NFS4ERR_RETRY_UNCACHED_REP, NFS4ERR_ROFS,  |
> >> | NFS4ERR_SERVERFAULT, NFS4ERR_STALE,        |
> >> | NFS4ERR_TOO_MANY_OPS,                      |
> >> | NFS4ERR_UNSAFE_COMPOUND                    |
> >> 
> >> 
> >>> I'm a little nervous about the NULL termination in
> >>> svc_fill_symlink_pathname; how do we know it's safe to write a zero
> >>> there?  I haven't checked it carefully yet.
> >> 
> >> svc_fill_symlink_pathname grabs a whole fresh page
> >> from @rqstp. It is safe to write bytes anywhere in
> >> that page.
> > 
> > How do we know it's safe to grab another page?
> 
> It's safe in the v3 case because that's what the v3
> decoder currently does. I think you are concerned
> about the NFSv4 COMPOUND case on TCP, for arbitrarily
> large compounds? Given that we have 257-ish pages
> in the rqstp->rq_pages array (ie, that number is
> bounded) I'm not sure how we can ever be sure there
> are enough pages for all combinations of NFS
> operations.

Right, I think it just depends on some maximum lengths being checked
earlier.  So my worry is that this my violate some assumptions about how
many pages we need to handle a request of a certain length.

I'll go remind myself how this works.  We need some documentation here
at least.  So I may end up postponing these after all....

> > (Could we run out of
> > pages?  I'd probably know the answer if I'd rewritten this code
> > recently, but I haven't and the details are swapped out....)
> > 
> > Also I don't think that's true in the first->iov_base == 0 case.
> 
> This case occurs when an RDMA transport has filled
> in the first page of arg->pages with the symlink
> contents. It's also a whole fresh page that was
> obtained from rqstp->rq_pages.

OK, I wasn't following that case.

--b.

> 
> Here, are you concerned about the client sending a
> COMPOUND over RPC/RDMA that has more than one SYMLINK
> operation? Our server currently supports only one Read
> chunk per RPC.
> 
> 
> > --b.
> > 
> >> 
> >> 
> >>> --g.
> >>> 
> >>>> 
> >>>> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
> >>>> ---
> >>>> fs/nfsd/nfs3proc.c         |   10 +++++++
> >>>> fs/nfsd/nfs3xdr.c          |   51 ++++++++-------------------------
> >>>> fs/nfsd/nfs4proc.c         |    7 +++++
> >>>> fs/nfsd/nfs4xdr.c          |   10 +++++--
> >>>> fs/nfsd/nfsproc.c          |   14 +++++----
> >>>> fs/nfsd/nfsxdr.c           |   49 +++++++++++++++++++-------------
> >>>> fs/nfsd/xdr.h              |    1 +
> >>>> fs/nfsd/xdr3.h             |    1 +
> >>>> fs/nfsd/xdr4.h             |    2 +
> >>>> include/linux/sunrpc/svc.h |    2 +
> >>>> net/sunrpc/svc.c           |   67 ++++++++++++++++++++++++++++++++++++++++++++
> >>>> 11 files changed, 146 insertions(+), 68 deletions(-)
> >>>> 
> >>>> diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
> >>>> index 2dd95eb..6259a4b 100644
> >>>> --- a/fs/nfsd/nfs3proc.c
> >>>> +++ b/fs/nfsd/nfs3proc.c
> >>>> @@ -283,6 +283,16 @@
> >>>> 	struct nfsd3_diropres *resp = rqstp->rq_resp;
> >>>> 	__be32	nfserr;
> >>>> 
> >>>> +	if (argp->tlen == 0)
> >>>> +		RETURN_STATUS(nfserr_inval);
> >>>> +	if (argp->tlen > NFS3_MAXPATHLEN)
> >>>> +		RETURN_STATUS(nfserr_nametoolong);
> >>>> +
> >>>> +	argp->tname = svc_fill_symlink_pathname(rqstp, &argp->first,
> >>>> +						argp->tlen);
> >>>> +	if (IS_ERR(argp->tname))
> >>>> +		RETURN_STATUS(nfserrno(PTR_ERR(argp->tname)));
> >>>> +
> >>>> 	dprintk("nfsd: SYMLINK(3)  %s %.*s -> %.*s\n",
> >>>> 				SVCFH_fmt(&argp->ffh),
> >>>> 				argp->flen, argp->fname,
> >>>> diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
> >>>> index 240cdb0e..78b555b 100644
> >>>> --- a/fs/nfsd/nfs3xdr.c
> >>>> +++ b/fs/nfsd/nfs3xdr.c
> >>>> @@ -452,51 +452,24 @@ void fill_post_wcc(struct svc_fh *fhp)
> >>>> nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p)
> >>>> {
> >>>> 	struct nfsd3_symlinkargs *args = rqstp->rq_argp;
> >>>> -	unsigned int len, avail;
> >>>> -	char *old, *new;
> >>>> -	struct kvec *vec;
> >>>> +	char *base = (char *)p;
> >>>> +	size_t dlen;
> >>>> 
> >>>> 	if (!(p = decode_fh(p, &args->ffh)) ||
> >>>> -	    !(p = decode_filename(p, &args->fname, &args->flen))
> >>>> -		)
> >>>> +	    !(p = decode_filename(p, &args->fname, &args->flen)))
> >>>> 		return 0;
> >>>> 	p = decode_sattr3(p, &args->attrs);
> >>>> 
> >>>> -	/* now decode the pathname, which might be larger than the first page.
> >>>> -	 * As we have to check for nul's anyway, we copy it into a new page
> >>>> -	 * This page appears in the rq_res.pages list, but as pages_len is always
> >>>> -	 * 0, it won't get in the way
> >>>> -	 */
> >>>> -	len = ntohl(*p++);
> >>>> -	if (len == 0 || len > NFS3_MAXPATHLEN || len >= PAGE_SIZE)
> >>>> -		return 0;
> >>>> -	args->tname = new = page_address(*(rqstp->rq_next_page++));
> >>>> -	args->tlen = len;
> >>>> -	/* first copy and check from the first page */
> >>>> -	old = (char*)p;
> >>>> -	vec = &rqstp->rq_arg.head[0];
> >>>> -	if ((void *)old > vec->iov_base + vec->iov_len)
> >>>> -		return 0;
> >>>> -	avail = vec->iov_len - (old - (char*)vec->iov_base);
> >>>> -	while (len && avail && *old) {
> >>>> -		*new++ = *old++;
> >>>> -		len--;
> >>>> -		avail--;
> >>>> -	}
> >>>> -	/* now copy next page if there is one */
> >>>> -	if (len && !avail && rqstp->rq_arg.page_len) {
> >>>> -		avail = min_t(unsigned int, rqstp->rq_arg.page_len, PAGE_SIZE);
> >>>> -		old = page_address(rqstp->rq_arg.pages[0]);
> >>>> -	}
> >>>> -	while (len && avail && *old) {
> >>>> -		*new++ = *old++;
> >>>> -		len--;
> >>>> -		avail--;
> >>>> -	}
> >>>> -	*new = '\0';
> >>>> -	if (len)
> >>>> -		return 0;
> >>>> +	args->tlen = ntohl(*p++);
> >>>> 
> >>>> +	args->first.iov_base = p;
> >>>> +	args->first.iov_len = rqstp->rq_arg.head[0].iov_len;
> >>>> +	args->first.iov_len -= (char *)p - base;
> >>>> +
> >>>> +	dlen = args->first.iov_len + rqstp->rq_arg.page_len +
> >>>> +	       rqstp->rq_arg.tail[0].iov_len;
> >>>> +	if (dlen < XDR_QUADLEN(args->tlen) << 2)
> >>>> +		return 0;
> >>>> 	return 1;
> >>>> }
> >>>> 
> >>>> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> >>>> index 5029b96..36bd1f7 100644
> >>>> --- a/fs/nfsd/nfs4proc.c
> >>>> +++ b/fs/nfsd/nfs4proc.c
> >>>> @@ -605,6 +605,13 @@ static void gen_boot_verifier(nfs4_verifier *verifier, struct net *net)
> >>>> 
> >>>> 	switch (create->cr_type) {
> >>>> 	case NF4LNK:
> >>>> +		if (create->cr_datalen > NFS4_MAXPATHLEN)
> >>>> +			return nfserr_nametoolong;
> >>>> +		create->cr_data =
> >>>> +			svc_fill_symlink_pathname(rqstp, &create->cr_first,
> >>>> +						  create->cr_datalen);
> >>>> +		if (IS_ERR(create->cr_data))
> >>>> +			return nfserrno(PTR_ERR(create->cr_data));
> >>>> 		status = nfsd_symlink(rqstp, &cstate->current_fh,
> >>>> 				      create->cr_name, create->cr_namelen,
> >>>> 				      create->cr_data, &resfh);
> >>>> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> >>>> index bd25230..d05384e 100644
> >>>> --- a/fs/nfsd/nfs4xdr.c
> >>>> +++ b/fs/nfsd/nfs4xdr.c
> >>>> @@ -648,6 +648,7 @@ static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp,
> >>>> static __be32
> >>>> nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create)
> >>>> {
> >>>> +	struct kvec *head;
> >>>> 	DECODE_HEAD;
> >>>> 
> >>>> 	READ_BUF(4);
> >>>> @@ -656,10 +657,13 @@ static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp,
> >>>> 	case NF4LNK:
> >>>> 		READ_BUF(4);
> >>>> 		create->cr_datalen = be32_to_cpup(p++);
> >>>> +		if (create->cr_datalen == 0)
> >>>> +			return nfserr_inval;
> >>>> +		head = argp->rqstp->rq_arg.head;
> >>>> +		create->cr_first.iov_base = p;
> >>>> +		create->cr_first.iov_len = head->iov_len;
> >>>> +		create->cr_first.iov_len -= (char *)p - (char *)head->iov_base;
> >>>> 		READ_BUF(create->cr_datalen);
> >>>> -		create->cr_data = svcxdr_dupstr(argp, p, create->cr_datalen);
> >>>> -		if (!create->cr_data)
> >>>> -			return nfserr_jukebox;
> >>>> 		break;
> >>>> 	case NF4BLK:
> >>>> 	case NF4CHR:
> >>>> diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
> >>>> index 1995ea6..f107f9f 100644
> >>>> --- a/fs/nfsd/nfsproc.c
> >>>> +++ b/fs/nfsd/nfsproc.c
> >>>> @@ -449,17 +449,19 @@
> >>>> 	struct svc_fh	newfh;
> >>>> 	__be32		nfserr;
> >>>> 
> >>>> +	if (argp->tlen > NFS_MAXPATHLEN)
> >>>> +		return nfserr_nametoolong;
> >>>> +
> >>>> +	argp->tname = svc_fill_symlink_pathname(rqstp, &argp->first,
> >>>> +						argp->tlen);
> >>>> +	if (IS_ERR(argp->tname))
> >>>> +		return nfserrno(PTR_ERR(argp->tname));
> >>>> +
> >>>> 	dprintk("nfsd: SYMLINK  %s %.*s -> %.*s\n",
> >>>> 		SVCFH_fmt(&argp->ffh), argp->flen, argp->fname,
> >>>> 		argp->tlen, argp->tname);
> >>>> 
> >>>> 	fh_init(&newfh, NFS_FHSIZE);
> >>>> -	/*
> >>>> -	 * Crazy hack: the request fits in a page, and already-decoded
> >>>> -	 * attributes follow argp->tname, so it's safe to just write a
> >>>> -	 * null to ensure it's null-terminated:
> >>>> -	 */
> >>>> -	argp->tname[argp->tlen] = '\0';
> >>>> 	nfserr = nfsd_symlink(rqstp, &argp->ffh, argp->fname, argp->flen,
> >>>> 						 argp->tname, &newfh);
> >>>> 
> >>>> diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
> >>>> index 165e25e..8fcd047 100644
> >>>> --- a/fs/nfsd/nfsxdr.c
> >>>> +++ b/fs/nfsd/nfsxdr.c
> >>>> @@ -71,22 +71,6 @@ __be32 *nfs2svc_decode_fh(__be32 *p, struct svc_fh *fhp)
> >>>> }
> >>>> 
> >>>> static __be32 *
> >>>> -decode_pathname(__be32 *p, char **namp, unsigned int *lenp)
> >>>> -{
> >>>> -	char		*name;
> >>>> -	unsigned int	i;
> >>>> -
> >>>> -	if ((p = xdr_decode_string_inplace(p, namp, lenp, NFS_MAXPATHLEN)) != NULL) {
> >>>> -		for (i = 0, name = *namp; i < *lenp; i++, name++) {
> >>>> -			if (*name == '\0')
> >>>> -				return NULL;
> >>>> -		}
> >>>> -	}
> >>>> -
> >>>> -	return p;
> >>>> -}
> >>>> -
> >>>> -static __be32 *
> >>>> decode_sattr(__be32 *p, struct iattr *iap)
> >>>> {
> >>>> 	u32	tmp, tmp1;
> >>>> @@ -383,14 +367,39 @@ __be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *f
> >>>> nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p)
> >>>> {
> >>>> 	struct nfsd_symlinkargs *args = rqstp->rq_argp;
> >>>> +	char *base = (char *)p;
> >>>> +	size_t xdrlen;
> >>>> 
> >>>> 	if (   !(p = decode_fh(p, &args->ffh))
> >>>> -	    || !(p = decode_filename(p, &args->fname, &args->flen))
> >>>> -	    || !(p = decode_pathname(p, &args->tname, &args->tlen)))
> >>>> +	    || !(p = decode_filename(p, &args->fname, &args->flen)))
> >>>> 		return 0;
> >>>> -	p = decode_sattr(p, &args->attrs);
> >>>> 
> >>>> -	return xdr_argsize_check(rqstp, p);
> >>>> +	args->tlen = ntohl(*p++);
> >>>> +	if (args->tlen == 0)
> >>>> +		return 0;
> >>>> +
> >>>> +	args->first.iov_base = p;
> >>>> +	args->first.iov_len = rqstp->rq_arg.head[0].iov_len;
> >>>> +	args->first.iov_len -= (char *)p - base;
> >>>> +
> >>>> +	/* This request is never larger than a page. Therefore,
> >>>> +	 * transport will deliver either:
> >>>> +	 * 1. pathname in the pagelist -> sattr is in the tail.
> >>>> +	 * 2. everything in the head buffer -> sattr is in the head.
> >>>> +	 */
> >>>> +	if (rqstp->rq_arg.page_len) {
> >>>> +		if (args->tlen != rqstp->rq_arg.page_len)
> >>>> +			return 0;
> >>>> +		p = rqstp->rq_arg.tail[0].iov_base;
> >>>> +	} else {
> >>>> +		xdrlen = XDR_QUADLEN(args->tlen);
> >>>> +		if (xdrlen > args->first.iov_len - (8 * sizeof(__be32)))
> >>>> +			return 0;
> >>>> +		p += xdrlen;
> >>>> +	}
> >>>> +	decode_sattr(p, &args->attrs);
> >>>> +
> >>>> +	return 1;
> >>>> }
> >>>> 
> >>>> int
> >>>> diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h
> >>>> index a765c41..ea7cca3 100644
> >>>> --- a/fs/nfsd/xdr.h
> >>>> +++ b/fs/nfsd/xdr.h
> >>>> @@ -72,6 +72,7 @@ struct nfsd_symlinkargs {
> >>>> 	char *			tname;
> >>>> 	unsigned int		tlen;
> >>>> 	struct iattr		attrs;
> >>>> +	struct kvec		first;
> >>>> };
> >>>> 
> >>>> struct nfsd_readdirargs {
> >>>> diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h
> >>>> index deccf7f..2cb29e9 100644
> >>>> --- a/fs/nfsd/xdr3.h
> >>>> +++ b/fs/nfsd/xdr3.h
> >>>> @@ -90,6 +90,7 @@ struct nfsd3_symlinkargs {
> >>>> 	char *			tname;
> >>>> 	unsigned int		tlen;
> >>>> 	struct iattr		attrs;
> >>>> +	struct kvec		first;
> >>>> };
> >>>> 
> >>>> struct nfsd3_readdirargs {
> >>>> diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
> >>>> index d56219d..b485cd1 100644
> >>>> --- a/fs/nfsd/xdr4.h
> >>>> +++ b/fs/nfsd/xdr4.h
> >>>> @@ -110,6 +110,7 @@ struct nfsd4_create {
> >>>> 		struct {
> >>>> 			u32 datalen;
> >>>> 			char *data;
> >>>> +			struct kvec first;
> >>>> 		} link;   /* NF4LNK */
> >>>> 		struct {
> >>>> 			u32 specdata1;
> >>>> @@ -124,6 +125,7 @@ struct nfsd4_create {
> >>>> };
> >>>> #define cr_datalen	u.link.datalen
> >>>> #define cr_data		u.link.data
> >>>> +#define cr_first	u.link.first
> >>>> #define cr_specdata1	u.dev.specdata1
> >>>> #define cr_specdata2	u.dev.specdata2
> >>>> 
> >>>> diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
> >>>> index 238b9ae..fd5846e 100644
> >>>> --- a/include/linux/sunrpc/svc.h
> >>>> +++ b/include/linux/sunrpc/svc.h
> >>>> @@ -495,6 +495,8 @@ int		   svc_register(const struct svc_serv *, struct net *, const int,
> >>>> char *		   svc_print_addr(struct svc_rqst *, char *, size_t);
> >>>> unsigned int	   svc_fill_write_vector(struct svc_rqst *rqstp,
> >>>> 					 struct kvec *first, size_t total);
> >>>> +char		  *svc_fill_symlink_pathname(struct svc_rqst *rqstp,
> >>>> +					     struct kvec *first, size_t total);
> >>>> 
> >>>> #define	RPC_MAX_ADDRBUFLEN	(63U)
> >>>> 
> >>>> diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
> >>>> index 759b668..fc93406 100644
> >>>> --- a/net/sunrpc/svc.c
> >>>> +++ b/net/sunrpc/svc.c
> >>>> @@ -1578,3 +1578,70 @@ unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, struct kvec *first,
> >>>> 	return i;
> >>>> }
> >>>> EXPORT_SYMBOL_GPL(svc_fill_write_vector);
> >>>> +
> >>>> +/**
> >>>> + * svc_fill_symlink_pathname - Construct pathname argument for VFS symlink call
> >>>> + * @rqstp: svc_rqst to operate on
> >>>> + * @first: buffer containing first section of pathname
> >>>> + * @total: total length of the pathname argument
> >>>> + *
> >>>> + * Returns pointer to a NUL-terminated string, or an ERR_PTR. The buffer is
> >>>> + * released automatically when @rqstp is recycled.
> >>>> + */
> >>>> +char *svc_fill_symlink_pathname(struct svc_rqst *rqstp, struct kvec *first,
> >>>> +				size_t total)
> >>>> +{
> >>>> +	struct xdr_buf *arg = &rqstp->rq_arg;
> >>>> +	struct page **pages;
> >>>> +	char *result;
> >>>> +
> >>>> +	/* VFS API demands a NUL-terminated pathname. This function
> >>>> +	 * uses a page from @rqstp as the pathname buffer, to enable
> >>>> +	 * direct placement. Thus the total buffer size is PAGE_SIZE.
> >>>> +	 * Space in this buffer for NUL-termination requires that we
> >>>> +	 * cap the size of the returned symlink pathname just a
> >>>> +	 * little early.
> >>>> +	 */
> >>>> +	if (total > PAGE_SIZE - 1)
> >>>> +		return ERR_PTR(-ENAMETOOLONG);
> >>>> +
> >>>> +	/* Some types of transport can present the pathname entirely
> >>>> +	 * in rq_arg.pages. If not, then copy the pathname into one
> >>>> +	 * page.
> >>>> +	 */
> >>>> +	pages = arg->pages;
> >>>> +	WARN_ON_ONCE(arg->page_base != 0);
> >>>> +	if (first->iov_base == 0) {
> >>>> +		result = page_address(*pages);
> >>>> +		result[total] = '\0';
> >>>> +	} else {
> >>>> +		size_t len, remaining;
> >>>> +		char *dst;
> >>>> +
> >>>> +		result = page_address(*(rqstp->rq_next_page++));
> >>>> +		dst = result;
> >>>> +		remaining = total;
> >>>> +
> >>>> +		len = min_t(size_t, total, first->iov_len);
> >>>> +		memcpy(dst, first->iov_base, len);
> >>>> +		dst += len;
> >>>> +		remaining -= len;
> >>>> +
> >>>> +		/* No more than one page left */
> >>>> +		if (remaining) {
> >>>> +			len = min_t(size_t, remaining, PAGE_SIZE);
> >>>> +			memcpy(dst, page_address(*pages), len);
> >>>> +			dst += len;
> >>>> +		}
> >>>> +
> >>>> +		*dst = '\0';
> >>>> +	}
> >>>> +
> >>>> +	/* Sanity check: we don't allow the pathname argument to
> >>>> +	 * contain a NUL byte.
> >>>> +	 */
> >>>> +	if (strlen(result) != total)
> >>>> +		return ERR_PTR(-EINVAL);
> >>>> +	return result;
> >>>> +}
> >>>> +EXPORT_SYMBOL_GPL(svc_fill_symlink_pathname);
> >> 
> >> --
> >> Chuck Lever
> 
> --
> Chuck Lever
> 
> 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2 3/3] NFSD: Clean up symlink argument XDR decoders
  2018-01-25 16:33           ` Bruce Fields
@ 2018-01-25 16:40             ` Chuck Lever
  0 siblings, 0 replies; 13+ messages in thread
From: Chuck Lever @ 2018-01-25 16:40 UTC (permalink / raw)
  To: Bruce Fields; +Cc: linux-rdma, Linux NFS Mailing List



> On Jan 25, 2018, at 8:33 AM, Bruce Fields <bfields@fieldses.org> wrote:
>=20
>> On Tue, Jan 23, 2018 at 02:30:57PM -0800, Chuck Lever wrote:
>>=20
>>=20
>>> On Jan 23, 2018, at 12:52 PM, Bruce Fields <bfields@fieldses.org> wrote:=

>>>=20
>>> On Mon, Jan 22, 2018 at 05:09:35PM -0800, Chuck Lever wrote:
>>>>=20
>>>>=20
>>>>> On Jan 22, 2018, at 2:00 PM, J. Bruce Fields <bfields@fieldses.org> wr=
ote:
>>>>>=20
>>>>> On Wed, Jan 03, 2018 at 03:42:35PM -0500, Chuck Lever wrote:
>>>>>> Move common code in NFSD's symlink arg decoders into a helper. The
>>>>>> immediate benefits include:
>>>>>>=20
>>>>>> - one fewer data copies on transports that support DDP
>>>>>> - no memory allocation in the NFSv4 XDR decoder
>>>>>> - consistent error checking across all versions
>>>>>> - reduction of code duplication
>>>>>> - support for both legal forms of SYMLINK requests on RDMA
>>>>>> transports for all versions of NFS (in particular, NFSv2, for
>>>>>> completeness)
>>>>>>=20
>>>>>> In the long term, this helper is an appropriate spot to perform a
>>>>>> per-transport call-out to fill the pathname argument using, say,
>>>>>> RDMA Reads.
>>>>>>=20
>>>>>> Filling the pathname in the proc function also means that eventually
>>>>>> the incoming filehandle can be interpreted so that filesystem-
>>>>>> specific memory can be allocated as a sink for the pathname
>>>>>> argument, rather than using anonymous pages.
>>>>>>=20
>>>>>> Wondering why the current code punts a zero-length SYMLINK. Is it
>>>>>> illegal to create a zero-length SYMLINK on Linux?
>>>>>=20
>>>>> SYMLINK(2) says
>>>>>=20
>>>>>    ENOENT A directory component in linkpath does not exist or is a
>>>>>    dangling symbolic link, or target or linkpath is an empty
>>>>>    string.
>>>>>=20
>>>>> That doesn't explain the INVAL, or why this is the right place to be
>>>>> checking it.
>>>>=20
>>>> RFC 1813:
>>>>=20
>>>> NFS3ERR_IO
>>>>     NFS3ERR_ACCES
>>>>     NFS3ERR_EXIST
>>>>     NFS3ERR_NOTDIR
>>>>     NFS3ERR_NOSPC
>>>>     NFS3ERR_ROFS
>>>>     NFS3ERR_NAMETOOLONG
>>>>     NFS3ERR_DQUOT
>>>>     NFS3ERR_STALE
>>>>     NFS3ERR_BADHANDLE
>>>>     NFS3ERR_NOTSUPP
>>>>     NFS3ERR_SERVERFAULT
>>>>=20
>>>> Interestingly, neither INVAL nor NOENT are valid
>>>> status codes for NFSv3 SYMLINK. NFS3ERR_NOTSUPP
>>>> might be closest, I suppose.
>>>>=20
>>>> RFC 5661 says explicitly:
>>>>=20
>>>> If the objname has a length of zero, or if objname does not obey the
>>>> UTF-8 definition, the error NFS4ERR_INVAL will be returned.
>>>>=20
>>>> And lists these as valid status codes for CREATE(NF4LNK):
>>>>=20
>>>> | NFS4ERR_ACCESS, NFS4ERR_ATTRNOTSUPP,       |
>>>> | NFS4ERR_BADCHAR, NFS4ERR_BADNAME,          |
>>>> | NFS4ERR_BADOWNER, NFS4ERR_BADTYPE,         |
>>>> | NFS4ERR_BADXDR, NFS4ERR_DEADSESSION,       |
>>>> | NFS4ERR_DELAY, NFS4ERR_DQUOT,              |
>>>> | NFS4ERR_EXIST, NFS4ERR_FHEXPIRED,          |
>>>> | NFS4ERR_INVAL, NFS4ERR_IO, NFS4ERR_MLINK,  |
>>>> | NFS4ERR_MOVED, NFS4ERR_NAMETOOLONG,        |
>>>> | NFS4ERR_NOFILEHANDLE, NFS4ERR_NOSPC,       |
>>>> | NFS4ERR_NOTDIR, NFS4ERR_OP_NOT_IN_SESSION, |
>>>> | NFS4ERR_PERM, NFS4ERR_REP_TOO_BIG,         |
>>>> | NFS4ERR_REP_TOO_BIG_TO_CACHE,              |
>>>> | NFS4ERR_REQ_TOO_BIG,                       |
>>>> | NFS4ERR_RETRY_UNCACHED_REP, NFS4ERR_ROFS,  |
>>>> | NFS4ERR_SERVERFAULT, NFS4ERR_STALE,        |
>>>> | NFS4ERR_TOO_MANY_OPS,                      |
>>>> | NFS4ERR_UNSAFE_COMPOUND                    |
>>>>=20
>>>>=20
>>>>> I'm a little nervous about the NULL termination in
>>>>> svc_fill_symlink_pathname; how do we know it's safe to write a zero
>>>>> there?  I haven't checked it carefully yet.
>>>>=20
>>>> svc_fill_symlink_pathname grabs a whole fresh page
>>>> from @rqstp. It is safe to write bytes anywhere in
>>>> that page.
>>>=20
>>> How do we know it's safe to grab another page?
>>=20
>> It's safe in the v3 case because that's what the v3
>> decoder currently does. I think you are concerned
>> about the NFSv4 COMPOUND case on TCP, for arbitrarily
>> large compounds? Given that we have 257-ish pages
>> in the rqstp->rq_pages array (ie, that number is
>> bounded) I'm not sure how we can ever be sure there
>> are enough pages for all combinations of NFS
>> operations.
>=20
> Right, I think it just depends on some maximum lengths being checked
> earlier.  So my worry is that this my violate some assumptions about how
> many pages we need to handle a request of a certain length.
>=20
> I'll go remind myself how this works.  We need some documentation here
> at least.

And/or some helper functions that manage the use of rq_pages,
while updating rq_respages as needed.


> So I may end up postponing these after all....
>=20
>>> (Could we run out of
>>> pages?  I'd probably know the answer if I'd rewritten this code
>>> recently, but I haven't and the details are swapped out....)
>>>=20
>>> Also I don't think that's true in the first->iov_base =3D=3D 0 case.
>>=20
>> This case occurs when an RDMA transport has filled
>> in the first page of arg->pages with the symlink
>> contents. It's also a whole fresh page that was
>> obtained from rqstp->rq_pages.
>=20
> OK, I wasn't following that case.
>=20
> --b.
>=20
>>=20
>> Here, are you concerned about the client sending a
>> COMPOUND over RPC/RDMA that has more than one SYMLINK
>> operation? Our server currently supports only one Read
>> chunk per RPC.
>>=20
>>=20
>>> --b.
>>>=20
>>>>=20
>>>>=20
>>>>> --g.
>>>>>=20
>>>>>>=20
>>>>>> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
>>>>>> ---
>>>>>> fs/nfsd/nfs3proc.c         |   10 +++++++
>>>>>> fs/nfsd/nfs3xdr.c          |   51 ++++++++-------------------------
>>>>>> fs/nfsd/nfs4proc.c         |    7 +++++
>>>>>> fs/nfsd/nfs4xdr.c          |   10 +++++--
>>>>>> fs/nfsd/nfsproc.c          |   14 +++++----
>>>>>> fs/nfsd/nfsxdr.c           |   49 +++++++++++++++++++-------------
>>>>>> fs/nfsd/xdr.h              |    1 +
>>>>>> fs/nfsd/xdr3.h             |    1 +
>>>>>> fs/nfsd/xdr4.h             |    2 +
>>>>>> include/linux/sunrpc/svc.h |    2 +
>>>>>> net/sunrpc/svc.c           |   67 +++++++++++++++++++++++++++++++++++=
+++++++++
>>>>>> 11 files changed, 146 insertions(+), 68 deletions(-)
>>>>>>=20
>>>>>> diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
>>>>>> index 2dd95eb..6259a4b 100644
>>>>>> --- a/fs/nfsd/nfs3proc.c
>>>>>> +++ b/fs/nfsd/nfs3proc.c
>>>>>> @@ -283,6 +283,16 @@
>>>>>>    struct nfsd3_diropres *resp =3D rqstp->rq_resp;
>>>>>>    __be32    nfserr;
>>>>>>=20
>>>>>> +    if (argp->tlen =3D=3D 0)
>>>>>> +        RETURN_STATUS(nfserr_inval);
>>>>>> +    if (argp->tlen > NFS3_MAXPATHLEN)
>>>>>> +        RETURN_STATUS(nfserr_nametoolong);
>>>>>> +
>>>>>> +    argp->tname =3D svc_fill_symlink_pathname(rqstp, &argp->first,
>>>>>> +                        argp->tlen);
>>>>>> +    if (IS_ERR(argp->tname))
>>>>>> +        RETURN_STATUS(nfserrno(PTR_ERR(argp->tname)));
>>>>>> +
>>>>>>    dprintk("nfsd: SYMLINK(3)  %s %.*s -> %.*s\n",
>>>>>>                SVCFH_fmt(&argp->ffh),
>>>>>>                argp->flen, argp->fname,
>>>>>> diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
>>>>>> index 240cdb0e..78b555b 100644
>>>>>> --- a/fs/nfsd/nfs3xdr.c
>>>>>> +++ b/fs/nfsd/nfs3xdr.c
>>>>>> @@ -452,51 +452,24 @@ void fill_post_wcc(struct svc_fh *fhp)
>>>>>> nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p)
>>>>>> {
>>>>>>    struct nfsd3_symlinkargs *args =3D rqstp->rq_argp;
>>>>>> -    unsigned int len, avail;
>>>>>> -    char *old, *new;
>>>>>> -    struct kvec *vec;
>>>>>> +    char *base =3D (char *)p;
>>>>>> +    size_t dlen;
>>>>>>=20
>>>>>>    if (!(p =3D decode_fh(p, &args->ffh)) ||
>>>>>> -        !(p =3D decode_filename(p, &args->fname, &args->flen))
>>>>>> -        )
>>>>>> +        !(p =3D decode_filename(p, &args->fname, &args->flen)))
>>>>>>        return 0;
>>>>>>    p =3D decode_sattr3(p, &args->attrs);
>>>>>>=20
>>>>>> -    /* now decode the pathname, which might be larger than the first=
 page.
>>>>>> -     * As we have to check for nul's anyway, we copy it into a new p=
age
>>>>>> -     * This page appears in the rq_res.pages list, but as pages_len i=
s always
>>>>>> -     * 0, it won't get in the way
>>>>>> -     */
>>>>>> -    len =3D ntohl(*p++);
>>>>>> -    if (len =3D=3D 0 || len > NFS3_MAXPATHLEN || len >=3D PAGE_SIZE)=

>>>>>> -        return 0;
>>>>>> -    args->tname =3D new =3D page_address(*(rqstp->rq_next_page++));
>>>>>> -    args->tlen =3D len;
>>>>>> -    /* first copy and check from the first page */
>>>>>> -    old =3D (char*)p;
>>>>>> -    vec =3D &rqstp->rq_arg.head[0];
>>>>>> -    if ((void *)old > vec->iov_base + vec->iov_len)
>>>>>> -        return 0;
>>>>>> -    avail =3D vec->iov_len - (old - (char*)vec->iov_base);
>>>>>> -    while (len && avail && *old) {
>>>>>> -        *new++ =3D *old++;
>>>>>> -        len--;
>>>>>> -        avail--;
>>>>>> -    }
>>>>>> -    /* now copy next page if there is one */
>>>>>> -    if (len && !avail && rqstp->rq_arg.page_len) {
>>>>>> -        avail =3D min_t(unsigned int, rqstp->rq_arg.page_len, PAGE_S=
IZE);
>>>>>> -        old =3D page_address(rqstp->rq_arg.pages[0]);
>>>>>> -    }
>>>>>> -    while (len && avail && *old) {
>>>>>> -        *new++ =3D *old++;
>>>>>> -        len--;
>>>>>> -        avail--;
>>>>>> -    }
>>>>>> -    *new =3D '\0';
>>>>>> -    if (len)
>>>>>> -        return 0;
>>>>>> +    args->tlen =3D ntohl(*p++);
>>>>>>=20
>>>>>> +    args->first.iov_base =3D p;
>>>>>> +    args->first.iov_len =3D rqstp->rq_arg.head[0].iov_len;
>>>>>> +    args->first.iov_len -=3D (char *)p - base;
>>>>>> +
>>>>>> +    dlen =3D args->first.iov_len + rqstp->rq_arg.page_len +
>>>>>> +           rqstp->rq_arg.tail[0].iov_len;
>>>>>> +    if (dlen < XDR_QUADLEN(args->tlen) << 2)
>>>>>> +        return 0;
>>>>>>    return 1;
>>>>>> }
>>>>>>=20
>>>>>> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
>>>>>> index 5029b96..36bd1f7 100644
>>>>>> --- a/fs/nfsd/nfs4proc.c
>>>>>> +++ b/fs/nfsd/nfs4proc.c
>>>>>> @@ -605,6 +605,13 @@ static void gen_boot_verifier(nfs4_verifier *ver=
ifier, struct net *net)
>>>>>>=20
>>>>>>    switch (create->cr_type) {
>>>>>>    case NF4LNK:
>>>>>> +        if (create->cr_datalen > NFS4_MAXPATHLEN)
>>>>>> +            return nfserr_nametoolong;
>>>>>> +        create->cr_data =3D
>>>>>> +            svc_fill_symlink_pathname(rqstp, &create->cr_first,
>>>>>> +                          create->cr_datalen);
>>>>>> +        if (IS_ERR(create->cr_data))
>>>>>> +            return nfserrno(PTR_ERR(create->cr_data));
>>>>>>        status =3D nfsd_symlink(rqstp, &cstate->current_fh,
>>>>>>                      create->cr_name, create->cr_namelen,
>>>>>>                      create->cr_data, &resfh);
>>>>>> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
>>>>>> index bd25230..d05384e 100644
>>>>>> --- a/fs/nfsd/nfs4xdr.c
>>>>>> +++ b/fs/nfsd/nfs4xdr.c
>>>>>> @@ -648,6 +648,7 @@ static __be32 nfsd4_decode_bind_conn_to_session(s=
truct nfsd4_compoundargs *argp,
>>>>>> static __be32
>>>>>> nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_cre=
ate *create)
>>>>>> {
>>>>>> +    struct kvec *head;
>>>>>>    DECODE_HEAD;
>>>>>>=20
>>>>>>    READ_BUF(4);
>>>>>> @@ -656,10 +657,13 @@ static __be32 nfsd4_decode_bind_conn_to_session=
(struct nfsd4_compoundargs *argp,
>>>>>>    case NF4LNK:
>>>>>>        READ_BUF(4);
>>>>>>        create->cr_datalen =3D be32_to_cpup(p++);
>>>>>> +        if (create->cr_datalen =3D=3D 0)
>>>>>> +            return nfserr_inval;
>>>>>> +        head =3D argp->rqstp->rq_arg.head;
>>>>>> +        create->cr_first.iov_base =3D p;
>>>>>> +        create->cr_first.iov_len =3D head->iov_len;
>>>>>> +        create->cr_first.iov_len -=3D (char *)p - (char *)head->iov_=
base;
>>>>>>        READ_BUF(create->cr_datalen);
>>>>>> -        create->cr_data =3D svcxdr_dupstr(argp, p, create->cr_datale=
n);
>>>>>> -        if (!create->cr_data)
>>>>>> -            return nfserr_jukebox;
>>>>>>        break;
>>>>>>    case NF4BLK:
>>>>>>    case NF4CHR:
>>>>>> diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
>>>>>> index 1995ea6..f107f9f 100644
>>>>>> --- a/fs/nfsd/nfsproc.c
>>>>>> +++ b/fs/nfsd/nfsproc.c
>>>>>> @@ -449,17 +449,19 @@
>>>>>>    struct svc_fh    newfh;
>>>>>>    __be32        nfserr;
>>>>>>=20
>>>>>> +    if (argp->tlen > NFS_MAXPATHLEN)
>>>>>> +        return nfserr_nametoolong;
>>>>>> +
>>>>>> +    argp->tname =3D svc_fill_symlink_pathname(rqstp, &argp->first,
>>>>>> +                        argp->tlen);
>>>>>> +    if (IS_ERR(argp->tname))
>>>>>> +        return nfserrno(PTR_ERR(argp->tname));
>>>>>> +
>>>>>>    dprintk("nfsd: SYMLINK  %s %.*s -> %.*s\n",
>>>>>>        SVCFH_fmt(&argp->ffh), argp->flen, argp->fname,
>>>>>>        argp->tlen, argp->tname);
>>>>>>=20
>>>>>>    fh_init(&newfh, NFS_FHSIZE);
>>>>>> -    /*
>>>>>> -     * Crazy hack: the request fits in a page, and already-decoded
>>>>>> -     * attributes follow argp->tname, so it's safe to just write a
>>>>>> -     * null to ensure it's null-terminated:
>>>>>> -     */
>>>>>> -    argp->tname[argp->tlen] =3D '\0';
>>>>>>    nfserr =3D nfsd_symlink(rqstp, &argp->ffh, argp->fname, argp->flen=
,
>>>>>>                         argp->tname, &newfh);
>>>>>>=20
>>>>>> diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
>>>>>> index 165e25e..8fcd047 100644
>>>>>> --- a/fs/nfsd/nfsxdr.c
>>>>>> +++ b/fs/nfsd/nfsxdr.c
>>>>>> @@ -71,22 +71,6 @@ __be32 *nfs2svc_decode_fh(__be32 *p, struct svc_fh=
 *fhp)
>>>>>> }
>>>>>>=20
>>>>>> static __be32 *
>>>>>> -decode_pathname(__be32 *p, char **namp, unsigned int *lenp)
>>>>>> -{
>>>>>> -    char        *name;
>>>>>> -    unsigned int    i;
>>>>>> -
>>>>>> -    if ((p =3D xdr_decode_string_inplace(p, namp, lenp, NFS_MAXPATHL=
EN)) !=3D NULL) {
>>>>>> -        for (i =3D 0, name =3D *namp; i < *lenp; i++, name++) {
>>>>>> -            if (*name =3D=3D '\0')
>>>>>> -                return NULL;
>>>>>> -        }
>>>>>> -    }
>>>>>> -
>>>>>> -    return p;
>>>>>> -}
>>>>>> -
>>>>>> -static __be32 *
>>>>>> decode_sattr(__be32 *p, struct iattr *iap)
>>>>>> {
>>>>>>    u32    tmp, tmp1;
>>>>>> @@ -383,14 +367,39 @@ __be32 *nfs2svc_encode_fattr(struct svc_rqst *r=
qstp, __be32 *p, struct svc_fh *f
>>>>>> nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p)
>>>>>> {
>>>>>>    struct nfsd_symlinkargs *args =3D rqstp->rq_argp;
>>>>>> +    char *base =3D (char *)p;
>>>>>> +    size_t xdrlen;
>>>>>>=20
>>>>>>    if (   !(p =3D decode_fh(p, &args->ffh))
>>>>>> -        || !(p =3D decode_filename(p, &args->fname, &args->flen))
>>>>>> -        || !(p =3D decode_pathname(p, &args->tname, &args->tlen)))
>>>>>> +        || !(p =3D decode_filename(p, &args->fname, &args->flen)))
>>>>>>        return 0;
>>>>>> -    p =3D decode_sattr(p, &args->attrs);
>>>>>>=20
>>>>>> -    return xdr_argsize_check(rqstp, p);
>>>>>> +    args->tlen =3D ntohl(*p++);
>>>>>> +    if (args->tlen =3D=3D 0)
>>>>>> +        return 0;
>>>>>> +
>>>>>> +    args->first.iov_base =3D p;
>>>>>> +    args->first.iov_len =3D rqstp->rq_arg.head[0].iov_len;
>>>>>> +    args->first.iov_len -=3D (char *)p - base;
>>>>>> +
>>>>>> +    /* This request is never larger than a page. Therefore,
>>>>>> +     * transport will deliver either:
>>>>>> +     * 1. pathname in the pagelist -> sattr is in the tail.
>>>>>> +     * 2. everything in the head buffer -> sattr is in the head.
>>>>>> +     */
>>>>>> +    if (rqstp->rq_arg.page_len) {
>>>>>> +        if (args->tlen !=3D rqstp->rq_arg.page_len)
>>>>>> +            return 0;
>>>>>> +        p =3D rqstp->rq_arg.tail[0].iov_base;
>>>>>> +    } else {
>>>>>> +        xdrlen =3D XDR_QUADLEN(args->tlen);
>>>>>> +        if (xdrlen > args->first.iov_len - (8 * sizeof(__be32)))
>>>>>> +            return 0;
>>>>>> +        p +=3D xdrlen;
>>>>>> +    }
>>>>>> +    decode_sattr(p, &args->attrs);
>>>>>> +
>>>>>> +    return 1;
>>>>>> }
>>>>>>=20
>>>>>> int
>>>>>> diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h
>>>>>> index a765c41..ea7cca3 100644
>>>>>> --- a/fs/nfsd/xdr.h
>>>>>> +++ b/fs/nfsd/xdr.h
>>>>>> @@ -72,6 +72,7 @@ struct nfsd_symlinkargs {
>>>>>>    char *            tname;
>>>>>>    unsigned int        tlen;
>>>>>>    struct iattr        attrs;
>>>>>> +    struct kvec        first;
>>>>>> };
>>>>>>=20
>>>>>> struct nfsd_readdirargs {
>>>>>> diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h
>>>>>> index deccf7f..2cb29e9 100644
>>>>>> --- a/fs/nfsd/xdr3.h
>>>>>> +++ b/fs/nfsd/xdr3.h
>>>>>> @@ -90,6 +90,7 @@ struct nfsd3_symlinkargs {
>>>>>>    char *            tname;
>>>>>>    unsigned int        tlen;
>>>>>>    struct iattr        attrs;
>>>>>> +    struct kvec        first;
>>>>>> };
>>>>>>=20
>>>>>> struct nfsd3_readdirargs {
>>>>>> diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
>>>>>> index d56219d..b485cd1 100644
>>>>>> --- a/fs/nfsd/xdr4.h
>>>>>> +++ b/fs/nfsd/xdr4.h
>>>>>> @@ -110,6 +110,7 @@ struct nfsd4_create {
>>>>>>        struct {
>>>>>>            u32 datalen;
>>>>>>            char *data;
>>>>>> +            struct kvec first;
>>>>>>        } link;   /* NF4LNK */
>>>>>>        struct {
>>>>>>            u32 specdata1;
>>>>>> @@ -124,6 +125,7 @@ struct nfsd4_create {
>>>>>> };
>>>>>> #define cr_datalen    u.link.datalen
>>>>>> #define cr_data        u.link.data
>>>>>> +#define cr_first    u.link.first
>>>>>> #define cr_specdata1    u.dev.specdata1
>>>>>> #define cr_specdata2    u.dev.specdata2
>>>>>>=20
>>>>>> diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
>>>>>> index 238b9ae..fd5846e 100644
>>>>>> --- a/include/linux/sunrpc/svc.h
>>>>>> +++ b/include/linux/sunrpc/svc.h
>>>>>> @@ -495,6 +495,8 @@ int           svc_register(const struct svc_serv *=
, struct net *, const int,
>>>>>> char *           svc_print_addr(struct svc_rqst *, char *, size_t);
>>>>>> unsigned int       svc_fill_write_vector(struct svc_rqst *rqstp,
>>>>>>                     struct kvec *first, size_t total);
>>>>>> +char          *svc_fill_symlink_pathname(struct svc_rqst *rqstp,
>>>>>> +                         struct kvec *first, size_t total);
>>>>>>=20
>>>>>> #define    RPC_MAX_ADDRBUFLEN    (63U)
>>>>>>=20
>>>>>> diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
>>>>>> index 759b668..fc93406 100644
>>>>>> --- a/net/sunrpc/svc.c
>>>>>> +++ b/net/sunrpc/svc.c
>>>>>> @@ -1578,3 +1578,70 @@ unsigned int svc_fill_write_vector(struct svc_=
rqst *rqstp, struct kvec *first,
>>>>>>    return i;
>>>>>> }
>>>>>> EXPORT_SYMBOL_GPL(svc_fill_write_vector);
>>>>>> +
>>>>>> +/**
>>>>>> + * svc_fill_symlink_pathname - Construct pathname argument for VFS s=
ymlink call
>>>>>> + * @rqstp: svc_rqst to operate on
>>>>>> + * @first: buffer containing first section of pathname
>>>>>> + * @total: total length of the pathname argument
>>>>>> + *
>>>>>> + * Returns pointer to a NUL-terminated string, or an ERR_PTR. The bu=
ffer is
>>>>>> + * released automatically when @rqstp is recycled.
>>>>>> + */
>>>>>> +char *svc_fill_symlink_pathname(struct svc_rqst *rqstp, struct kvec *=
first,
>>>>>> +                size_t total)
>>>>>> +{
>>>>>> +    struct xdr_buf *arg =3D &rqstp->rq_arg;
>>>>>> +    struct page **pages;
>>>>>> +    char *result;
>>>>>> +
>>>>>> +    /* VFS API demands a NUL-terminated pathname. This function
>>>>>> +     * uses a page from @rqstp as the pathname buffer, to enable
>>>>>> +     * direct placement. Thus the total buffer size is PAGE_SIZE.
>>>>>> +     * Space in this buffer for NUL-termination requires that we
>>>>>> +     * cap the size of the returned symlink pathname just a
>>>>>> +     * little early.
>>>>>> +     */
>>>>>> +    if (total > PAGE_SIZE - 1)
>>>>>> +        return ERR_PTR(-ENAMETOOLONG);
>>>>>> +
>>>>>> +    /* Some types of transport can present the pathname entirely
>>>>>> +     * in rq_arg.pages. If not, then copy the pathname into one
>>>>>> +     * page.
>>>>>> +     */
>>>>>> +    pages =3D arg->pages;
>>>>>> +    WARN_ON_ONCE(arg->page_base !=3D 0);
>>>>>> +    if (first->iov_base =3D=3D 0) {
>>>>>> +        result =3D page_address(*pages);
>>>>>> +        result[total] =3D '\0';
>>>>>> +    } else {
>>>>>> +        size_t len, remaining;
>>>>>> +        char *dst;
>>>>>> +
>>>>>> +        result =3D page_address(*(rqstp->rq_next_page++));
>>>>>> +        dst =3D result;
>>>>>> +        remaining =3D total;
>>>>>> +
>>>>>> +        len =3D min_t(size_t, total, first->iov_len);
>>>>>> +        memcpy(dst, first->iov_base, len);
>>>>>> +        dst +=3D len;
>>>>>> +        remaining -=3D len;
>>>>>> +
>>>>>> +        /* No more than one page left */
>>>>>> +        if (remaining) {
>>>>>> +            len =3D min_t(size_t, remaining, PAGE_SIZE);
>>>>>> +            memcpy(dst, page_address(*pages), len);
>>>>>> +            dst +=3D len;
>>>>>> +        }
>>>>>> +
>>>>>> +        *dst =3D '\0';
>>>>>> +    }
>>>>>> +
>>>>>> +    /* Sanity check: we don't allow the pathname argument to
>>>>>> +     * contain a NUL byte.
>>>>>> +     */
>>>>>> +    if (strlen(result) !=3D total)
>>>>>> +        return ERR_PTR(-EINVAL);
>>>>>> +    return result;
>>>>>> +}
>>>>>> +EXPORT_SYMBOL_GPL(svc_fill_symlink_pathname);
>>>>=20
>>>> --
>>>> Chuck Lever
>>=20
>> --
>> Chuck Lever
>>=20
>>=20


^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2018-01-25 16:50 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-01-03 20:42 [PATCH v2 0/3] NFS/RDMA server changes for v4.16 Chuck Lever
2018-01-03 20:42 ` [PATCH v2 1/3] svcrdma: Post Receives in the Receive completion handler Chuck Lever
2018-01-03 20:42 ` [PATCH v2 2/3] NFSD: Clean up write argument XDR decoders Chuck Lever
2018-01-03 20:42 ` [PATCH v2 3/3] NFSD: Clean up symlink " Chuck Lever
2018-01-22 22:00   ` J. Bruce Fields
2018-01-23  1:09     ` Chuck Lever
2018-01-23 20:52       ` Bruce Fields
2018-01-23 22:30         ` Chuck Lever
2018-01-25 16:33           ` Bruce Fields
2018-01-25 16:40             ` Chuck Lever
2018-01-16 21:31 ` [PATCH v2 0/3] NFS/RDMA server changes for v4.16 Chuck Lever
2018-01-16 21:33   ` Bruce Fields
2018-01-17 14:44     ` Chuck Lever

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).