* [PATCH 1/7] SUNRPC: Split out a function for setting current page
2020-01-10 22:34 [PATCH 0/7] NFS: Add support for the v4.2 READ_PLUS operation schumaker.anna
@ 2020-01-10 22:34 ` schumaker.anna
2020-01-10 22:34 ` [PATCH 2/7] SUNRPC: Add the ability to expand holes in data pages schumaker.anna
` (5 subsequent siblings)
6 siblings, 0 replies; 11+ messages in thread
From: schumaker.anna @ 2020-01-10 22:34 UTC (permalink / raw)
To: Trond.Myklebust, linux-nfs; +Cc: Anna.Schumaker
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
I'm going to need this bit of code in a few places for READ_PLUS
decoding, so let's make it a helper function.
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
net/sunrpc/xdr.c | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index e5497dc2475b..8bed0ec21563 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -825,6 +825,12 @@ static int xdr_set_page_base(struct xdr_stream *xdr,
return 0;
}
+static void xdr_set_page(struct xdr_stream *xdr, unsigned int base)
+{
+ if (xdr_set_page_base(xdr, base, PAGE_SIZE) < 0)
+ xdr_set_iov(xdr, xdr->buf->tail, xdr->nwords << 2);
+}
+
static void xdr_set_next_page(struct xdr_stream *xdr)
{
unsigned int newbase;
@@ -832,8 +838,7 @@ static void xdr_set_next_page(struct xdr_stream *xdr)
newbase = (1 + xdr->page_ptr - xdr->buf->pages) << PAGE_SHIFT;
newbase -= xdr->buf->page_base;
- if (xdr_set_page_base(xdr, newbase, PAGE_SIZE) < 0)
- xdr_set_iov(xdr, xdr->buf->tail, xdr->nwords << 2);
+ xdr_set_page(xdr, newbase);
}
static bool xdr_set_next_buffer(struct xdr_stream *xdr)
--
2.24.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH 2/7] SUNRPC: Add the ability to expand holes in data pages
2020-01-10 22:34 [PATCH 0/7] NFS: Add support for the v4.2 READ_PLUS operation schumaker.anna
2020-01-10 22:34 ` [PATCH 1/7] SUNRPC: Split out a function for setting current page schumaker.anna
@ 2020-01-10 22:34 ` schumaker.anna
2020-01-10 22:34 ` [PATCH 3/7] SUNRPC: Add the ability to shift data to a specific offset schumaker.anna
` (4 subsequent siblings)
6 siblings, 0 replies; 11+ messages in thread
From: schumaker.anna @ 2020-01-10 22:34 UTC (permalink / raw)
To: Trond.Myklebust, linux-nfs; +Cc: Anna.Schumaker
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
This patch adds the ability to "read a hole" into a set of XDR data
pages by taking the following steps:
1) Shift all data after the current xdr->p to the right, possibly into
the tail,
2) Zero the specified range, and
3) Update xdr->p to point beyond the hole.
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
include/linux/sunrpc/xdr.h | 1 +
net/sunrpc/xdr.c | 100 +++++++++++++++++++++++++++++++++++++
2 files changed, 101 insertions(+)
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index b41f34977995..81a79099ed3b 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -263,6 +263,7 @@ extern __be32 *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes);
extern unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len);
extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len);
extern int xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len, int (*actor)(struct scatterlist *, void *), void *data);
+extern size_t xdr_expand_hole(struct xdr_stream *, size_t, uint64_t);
/**
* xdr_stream_remaining - Return the number of bytes remaining in the stream
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 8bed0ec21563..bc9b9b0945f5 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -266,6 +266,40 @@ _shift_data_right_pages(struct page **pages, size_t pgto_base,
} while ((len -= copy) != 0);
}
+static void
+_shift_data_right_tail(struct xdr_buf *buf, size_t pgfrom_base, size_t len)
+{
+ struct kvec *tail = buf->tail;
+
+ /* Make room for new data. */
+ if (tail->iov_len > 0)
+ memmove((char *)tail->iov_base + len, tail->iov_base, len);
+
+ _copy_from_pages((char *)tail->iov_base,
+ buf->pages,
+ buf->page_base + pgfrom_base,
+ len);
+
+ tail->iov_len += len;
+}
+
+static void
+_shift_data_right(struct xdr_buf *buf, size_t to, size_t from, size_t len)
+{
+ size_t shift = len;
+
+ if ((to + len) > buf->page_len) {
+ shift = (to + len) - buf->page_len;
+ _shift_data_right_tail(buf, (from + len) - shift, shift);
+ shift = len - shift;
+ }
+
+ _shift_data_right_pages(buf->pages,
+ buf->page_base + to,
+ buf->page_base + from,
+ shift);
+}
+
/**
* _copy_to_pages
* @pages: array of pages
@@ -350,6 +384,33 @@ _copy_from_pages(char *p, struct page **pages, size_t pgbase, size_t len)
}
EXPORT_SYMBOL_GPL(_copy_from_pages);
+/**
+ * _zero_data_pages
+ * @pages: array of pages
+ * @pgbase: beginning page vector address
+ * @len: length
+ */
+static void
+_zero_data_pages(struct page **pages, size_t pgbase, size_t len)
+{
+ struct page **page;
+ size_t zero;
+
+ page = pages + (pgbase >> PAGE_SHIFT);
+ pgbase &= ~PAGE_MASK;
+
+ do {
+ zero = len;
+ if (pgbase + zero > PAGE_SIZE)
+ zero = PAGE_SIZE - pgbase;
+
+ zero_user_segment(*page, pgbase, pgbase + zero);
+ page++;
+ pgbase = 0;
+
+ } while ((len -= zero) != 0);
+}
+
/**
* xdr_shrink_bufhead
* @buf: xdr_buf
@@ -505,6 +566,24 @@ unsigned int xdr_stream_pos(const struct xdr_stream *xdr)
}
EXPORT_SYMBOL_GPL(xdr_stream_pos);
+/**
+ * xdr_page_pos - Return the current offset from the start of the xdr->buf->pages
+ * @xdr: pointer to struct xdr_stream
+ */
+static size_t xdr_page_pos(const struct xdr_stream *xdr)
+{
+ unsigned int offset;
+ unsigned int base = xdr->buf->page_len;
+ void *kaddr = xdr->buf->tail->iov_base;;
+
+ if (xdr->page_ptr) {
+ base = (xdr->page_ptr - xdr->buf->pages) * PAGE_SIZE;
+ kaddr = page_address(*xdr->page_ptr);
+ }
+ offset = xdr->p - (__be32 *)kaddr;
+ return base + (offset * sizeof(__be32));
+}
+
/**
* xdr_init_encode - Initialize a struct xdr_stream for sending data.
* @xdr: pointer to xdr_stream struct
@@ -1062,6 +1141,27 @@ unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len)
}
EXPORT_SYMBOL_GPL(xdr_read_pages);
+size_t xdr_expand_hole(struct xdr_stream *xdr, size_t offset, uint64_t length)
+{
+ struct xdr_buf *buf = xdr->buf;
+ size_t from = 0;
+
+ if ((offset + length) < offset ||
+ (offset + length) > buf->page_len)
+ length = buf->page_len - offset;
+
+ if (offset == 0)
+ xdr_align_pages(xdr, xdr->nwords << 2);
+ else
+ from = xdr_page_pos(xdr);
+
+ _shift_data_right(buf, offset + length, from, xdr->nwords << 2);
+ _zero_data_pages(buf->pages, buf->page_base + offset, length);
+ xdr_set_page(xdr, offset + length);
+ return length;
+}
+EXPORT_SYMBOL_GPL(xdr_expand_hole);
+
/**
* xdr_enter_page - decode data from the XDR page
* @xdr: pointer to xdr_stream struct
--
2.24.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH 3/7] SUNRPC: Add the ability to shift data to a specific offset
2020-01-10 22:34 [PATCH 0/7] NFS: Add support for the v4.2 READ_PLUS operation schumaker.anna
2020-01-10 22:34 ` [PATCH 1/7] SUNRPC: Split out a function for setting current page schumaker.anna
2020-01-10 22:34 ` [PATCH 2/7] SUNRPC: Add the ability to expand holes in data pages schumaker.anna
@ 2020-01-10 22:34 ` schumaker.anna
2020-01-10 22:34 ` [PATCH 4/7] NFS: Add READ_PLUS data segment support schumaker.anna
` (3 subsequent siblings)
6 siblings, 0 replies; 11+ messages in thread
From: schumaker.anna @ 2020-01-10 22:34 UTC (permalink / raw)
To: Trond.Myklebust, linux-nfs; +Cc: Anna.Schumaker
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Expanding holes tends to put the data content a few bytes to the right
of where we want it. This patch implements a left-shift operation to
line everything up properly.
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
include/linux/sunrpc/xdr.h | 1 +
net/sunrpc/xdr.c | 135 +++++++++++++++++++++++++++++++++++++
2 files changed, 136 insertions(+)
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 81a79099ed3b..90abf732c8ee 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -264,6 +264,7 @@ extern unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len);
extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len);
extern int xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len, int (*actor)(struct scatterlist *, void *), void *data);
extern size_t xdr_expand_hole(struct xdr_stream *, size_t, uint64_t);
+extern uint64_t xdr_align_data(struct xdr_stream *, uint64_t, uint64_t);
/**
* xdr_stream_remaining - Return the number of bytes remaining in the stream
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index bc9b9b0945f5..a857c2308ee1 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -19,6 +19,9 @@
#include <linux/bvec.h>
#include <trace/events/sunrpc.h>
+static void _copy_to_pages(struct page **, size_t, const char *, size_t);
+
+
/*
* XDR functions for basic NFS types
*/
@@ -300,6 +303,117 @@ _shift_data_right(struct xdr_buf *buf, size_t to, size_t from, size_t len)
shift);
}
+
+/**
+ * _shift_data_left_pages
+ * @pages: vector of pages containing both the source and dest memory area.
+ * @pgto_base: page vector address of destination
+ * @pgfrom_base: page vector address of source
+ * @len: number of bytes to copy
+ *
+ * Note: the addresses pgto_base and pgfrom_base are both calculated in
+ * the same way:
+ * if a memory area starts at byte 'base' in page 'pages[i]',
+ * then its address is given as (i << PAGE_CACHE_SHIFT) + base
+ * Alse note: pgto_base must be < pgfrom_base, but the memory areas
+ * they point to may overlap.
+ */
+static void
+_shift_data_left_pages(struct page **pages, size_t pgto_base,
+ size_t pgfrom_base, size_t len)
+{
+ struct page **pgfrom, **pgto;
+ char *vfrom, *vto;
+ size_t copy;
+
+ BUG_ON(pgfrom_base <= pgto_base);
+
+ pgto = pages + (pgto_base >> PAGE_SHIFT);
+ pgfrom = pages + (pgfrom_base >> PAGE_SHIFT);
+
+ pgto_base = pgto_base % PAGE_SIZE;
+ pgfrom_base = pgfrom_base % PAGE_SIZE;
+
+ do {
+ if (pgto_base >= PAGE_SIZE) {
+ pgto_base = 0;
+ pgto++;
+ }
+ if (pgfrom_base >= PAGE_SIZE){
+ pgfrom_base = 0;
+ pgfrom++;
+ }
+
+ copy = len;
+ if (copy > (PAGE_SIZE - pgto_base))
+ copy = PAGE_SIZE - pgto_base;
+ if (copy > (PAGE_SIZE - pgfrom_base))
+ copy = PAGE_SIZE - pgfrom_base;
+
+ if (pgto_base == 131056)
+ break;
+
+ vto = kmap_atomic(*pgto);
+ if (*pgto != *pgfrom) {
+ vfrom = kmap_atomic(*pgfrom);
+ memcpy(vto + pgto_base, vfrom + pgfrom_base, copy);
+ kunmap_atomic(vfrom);
+ } else
+ memmove(vto + pgto_base, vto + pgfrom_base, copy);
+ flush_dcache_page(*pgto);
+ kunmap_atomic(vto);
+
+ pgto_base += copy;
+ pgfrom_base += copy;
+
+ } while ((len -= copy) != 0);
+}
+
+static void
+_shift_data_left_tail(struct xdr_buf *buf, size_t pgto_base,
+ size_t tail_from, size_t len)
+{
+ struct kvec *tail = buf->tail;
+ size_t shift = len;
+
+ if (len == 0)
+ return;
+ if (pgto_base + len > buf->page_len)
+ shift = buf->page_len - pgto_base;
+
+ _copy_to_pages(buf->pages,
+ buf->page_base + pgto_base,
+ (char *)(tail->iov_base + tail_from),
+ shift);
+
+ memmove((char *)tail->iov_base, tail->iov_base + tail_from + shift, shift);
+ tail->iov_len -= (tail_from + shift);
+}
+
+static void
+_shift_data_left(struct xdr_buf *buf, size_t to, size_t from, size_t len)
+{
+ size_t shift = len;
+
+ if (from < buf->page_len) {
+ shift = min(len, buf->page_len - from);
+ _shift_data_left_pages(buf->pages,
+ buf->page_base + to,
+ buf->page_base + from,
+ shift);
+ to += shift;
+ from += shift;
+ shift = len - shift;
+ }
+
+ if (shift == 0)
+ return;
+ if (from >= buf->page_len)
+ from -= buf->page_len;
+
+ _shift_data_left_tail(buf, to, from, shift);
+}
+
/**
* _copy_to_pages
* @pages: array of pages
@@ -1162,6 +1276,27 @@ size_t xdr_expand_hole(struct xdr_stream *xdr, size_t offset, uint64_t length)
}
EXPORT_SYMBOL_GPL(xdr_expand_hole);
+uint64_t xdr_align_data(struct xdr_stream *xdr, uint64_t offset, uint64_t length)
+{
+ struct xdr_buf *buf = xdr->buf;
+ size_t from = offset;
+
+ if (offset + length > buf->page_len)
+ length = buf->page_len - offset;
+
+ if (offset == 0)
+ xdr_align_pages(xdr, xdr->nwords << 2);
+ else {
+ from = xdr_page_pos(xdr);
+ _shift_data_left(buf, offset, from, length);
+ }
+
+ xdr->nwords -= XDR_QUADLEN(length);
+ xdr_set_page(xdr, from + length);
+ return length;
+}
+EXPORT_SYMBOL_GPL(xdr_align_data);
+
/**
* xdr_enter_page - decode data from the XDR page
* @xdr: pointer to xdr_stream struct
--
2.24.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH 4/7] NFS: Add READ_PLUS data segment support
2020-01-10 22:34 [PATCH 0/7] NFS: Add support for the v4.2 READ_PLUS operation schumaker.anna
` (2 preceding siblings ...)
2020-01-10 22:34 ` [PATCH 3/7] SUNRPC: Add the ability to shift data to a specific offset schumaker.anna
@ 2020-01-10 22:34 ` schumaker.anna
2020-01-10 22:34 ` [PATCH 5/7] NFS: Add READ_PLUS hole segment decoding schumaker.anna
` (2 subsequent siblings)
6 siblings, 0 replies; 11+ messages in thread
From: schumaker.anna @ 2020-01-10 22:34 UTC (permalink / raw)
To: Trond.Myklebust, linux-nfs; +Cc: Anna.Schumaker
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
This patch adds client support for decoding a single NFS4_CONTENT_DATA
segment returned by the server. This is the simplest implementation
possible, since it does not account for any hole segments in the reply.
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
fs/nfs/nfs42xdr.c | 138 ++++++++++++++++++++++++++++++++++++++
fs/nfs/nfs4proc.c | 32 ++++++++-
fs/nfs/nfs4xdr.c | 1 +
include/linux/nfs4.h | 2 +-
include/linux/nfs_fs_sb.h | 1 +
include/linux/nfs_xdr.h | 2 +-
6 files changed, 171 insertions(+), 5 deletions(-)
diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
index c03f3246d6c5..bf118ecabe2c 100644
--- a/fs/nfs/nfs42xdr.c
+++ b/fs/nfs/nfs42xdr.c
@@ -45,6 +45,15 @@
#define encode_deallocate_maxsz (op_encode_hdr_maxsz + \
encode_fallocate_maxsz)
#define decode_deallocate_maxsz (op_decode_hdr_maxsz)
+#define encode_read_plus_maxsz (op_encode_hdr_maxsz + \
+ encode_stateid_maxsz + 3)
+#define NFS42_READ_PLUS_SEGMENT_SIZE (1 /* data_content4 */ + \
+ 2 /* data_info4.di_offset */ + \
+ 2 /* data_info4.di_length */)
+#define decode_read_plus_maxsz (op_decode_hdr_maxsz + \
+ 1 /* rpr_eof */ + \
+ 1 /* rpr_contents count */ + \
+ NFS42_READ_PLUS_SEGMENT_SIZE)
#define encode_seek_maxsz (op_encode_hdr_maxsz + \
encode_stateid_maxsz + \
2 /* offset */ + \
@@ -128,6 +137,14 @@
decode_putfh_maxsz + \
decode_deallocate_maxsz + \
decode_getattr_maxsz)
+#define NFS4_enc_read_plus_sz (compound_encode_hdr_maxsz + \
+ encode_sequence_maxsz + \
+ encode_putfh_maxsz + \
+ encode_read_plus_maxsz)
+#define NFS4_dec_read_plus_sz (compound_decode_hdr_maxsz + \
+ decode_sequence_maxsz + \
+ decode_putfh_maxsz + \
+ decode_read_plus_maxsz)
#define NFS4_enc_seek_sz (compound_encode_hdr_maxsz + \
encode_sequence_maxsz + \
encode_putfh_maxsz + \
@@ -252,6 +269,16 @@ static void encode_deallocate(struct xdr_stream *xdr,
encode_fallocate(xdr, args);
}
+static void encode_read_plus(struct xdr_stream *xdr,
+ const struct nfs_pgio_args *args,
+ struct compound_hdr *hdr)
+{
+ encode_op_hdr(xdr, OP_READ_PLUS, decode_read_plus_maxsz, hdr);
+ encode_nfs4_stateid(xdr, &args->stateid);
+ encode_uint64(xdr, args->offset);
+ encode_uint32(xdr, args->count);
+}
+
static void encode_seek(struct xdr_stream *xdr,
const struct nfs42_seek_args *args,
struct compound_hdr *hdr)
@@ -446,6 +473,29 @@ static void nfs4_xdr_enc_deallocate(struct rpc_rqst *req,
encode_nops(&hdr);
}
+/*
+ * Encode READ_PLUS request
+ */
+static void nfs4_xdr_enc_read_plus(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const void *data)
+{
+ const struct nfs_pgio_args *args = data;
+ struct compound_hdr hdr = {
+ .minorversion = nfs4_xdr_minorversion(&args->seq_args),
+ };
+
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ encode_read_plus(xdr, args, &hdr);
+
+ rpc_prepare_reply_pages(req, args->pages, args->pgbase,
+ args->count, hdr.replen);
+ req->rq_rcv_buf.flags |= XDRBUF_READ;
+ encode_nops(&hdr);
+}
+
/*
* Encode SEEK request
*/
@@ -694,6 +744,67 @@ static int decode_deallocate(struct xdr_stream *xdr, struct nfs42_falloc_res *re
return decode_op_hdr(xdr, OP_DEALLOCATE);
}
+static uint32_t decode_read_plus_data(struct xdr_stream *xdr, struct nfs_pgio_res *res,
+ uint32_t *eof)
+{
+ __be32 *p;
+ uint32_t count, recvd;
+ uint64_t offset;
+
+ p = xdr_inline_decode(xdr, 8 + 4);
+ if (unlikely(!p))
+ return -EIO;
+
+ p = xdr_decode_hyper(p, &offset);
+ count = be32_to_cpup(p);
+ if (count == 0)
+ return 0;
+
+ recvd = xdr_read_pages(xdr, count);
+ if (count > recvd) {
+ dprintk("NFS: server cheating in read reply: "
+ "count %u > recvd %u\n", count, recvd);
+ count = recvd;
+ *eof = 0;
+ }
+
+ return count;
+}
+
+static int decode_read_plus(struct xdr_stream *xdr, struct nfs_pgio_res *res)
+{
+ __be32 *p;
+ uint32_t count, eof, segments, type;
+ int status;
+
+ status = decode_op_hdr(xdr, OP_READ_PLUS);
+ if (status)
+ return status;
+
+ p = xdr_inline_decode(xdr, 4 + 4);
+ if (unlikely(!p))
+ return -EIO;
+
+ eof = be32_to_cpup(p++);
+ segments = be32_to_cpup(p++);
+ if (segments == 0)
+ return 0;
+
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ return -EIO;
+
+ type = be32_to_cpup(p++);
+ if (type == NFS4_CONTENT_DATA)
+ count = decode_read_plus_data(xdr, res, &eof);
+ else
+ return -EINVAL;
+
+ res->eof = eof;
+ res->count = count;
+ return 0;
+}
+
static int decode_seek(struct xdr_stream *xdr, struct nfs42_seek_res *res)
{
int status;
@@ -870,6 +981,33 @@ static int nfs4_xdr_dec_deallocate(struct rpc_rqst *rqstp,
return status;
}
+/*
+ * Decode READ_PLUS request
+ */
+static int nfs4_xdr_dec_read_plus(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ void *data)
+{
+ struct nfs_pgio_res *res = data;
+ struct compound_hdr hdr;
+ int status;
+
+ status = decode_compound_hdr(xdr, &hdr);
+ if (status)
+ goto out;
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
+ if (status)
+ goto out;
+ status = decode_putfh(xdr);
+ if (status)
+ goto out;
+ status = decode_read_plus(xdr, res);
+ if (!status)
+ status = res->count;
+out:
+ return status;
+}
+
/*
* Decode SEEK request
*/
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 64bbb0d0ff5a..60b1ec684751 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -69,6 +69,10 @@
#include "nfs4trace.h"
+#ifdef CONFIG_NFS_V4_2
+#include "nfs42.h"
+#endif /* CONFIG_NFS_V4_2 */
+
#define NFSDBG_FACILITY NFSDBG_PROC
#define NFS4_BITMASK_SZ 3
@@ -5195,9 +5199,15 @@ static bool nfs4_read_stateid_changed(struct rpc_task *task,
static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
{
-
+ struct nfs_server *server = NFS_SERVER(hdr->inode);
dprintk("--> %s\n", __func__);
+ if ((server->caps & NFS_CAP_READ_PLUS) && (task->tk_status == -ENOTSUPP)) {
+ server->caps &= ~NFS_CAP_READ_PLUS;
+ if (rpc_restart_call_prepare(task))
+ task->tk_status = 0;
+ return -EAGAIN;
+ }
if (!nfs4_sequence_done(task, &hdr->res.seq_res))
return -EAGAIN;
if (nfs4_read_stateid_changed(task, &hdr->args))
@@ -5208,13 +5218,28 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
nfs4_read_done_cb(task, hdr);
}
+#ifdef CONFIG_NFS_V4_2
+static void nfs42_read_plus_support(struct nfs_server *server, struct rpc_message *msg)
+{
+ if (server->caps & NFS_CAP_READ_PLUS)
+ msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ_PLUS];
+ else
+ msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
+}
+#else
+static void nfs42_read_plus_support(struct nfs_server *server, struct rpc_message *msg)
+{
+ msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
+}
+#endif /* CONFIG_NFS_V4_2 */
+
static void nfs4_proc_read_setup(struct nfs_pgio_header *hdr,
struct rpc_message *msg)
{
hdr->timestamp = jiffies;
if (!hdr->pgio_done_cb)
hdr->pgio_done_cb = nfs4_read_done_cb;
- msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
+ nfs42_read_plus_support(NFS_SERVER(hdr->inode), msg);
nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 0, 0);
}
@@ -9957,7 +9982,8 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = {
| NFS_CAP_SEEK
| NFS_CAP_LAYOUTSTATS
| NFS_CAP_CLONE
- | NFS_CAP_LAYOUTERROR,
+ | NFS_CAP_LAYOUTERROR
+ | NFS_CAP_READ_PLUS,
.init_client = nfs41_init_client,
.shutdown_client = nfs41_shutdown_client,
.match_stateid = nfs41_match_stateid,
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 02890606e14a..dd81c9ddd0fb 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -7593,6 +7593,7 @@ const struct rpc_procinfo nfs4_procedures[] = {
PROC42(COPY_NOTIFY, enc_copy_notify, dec_copy_notify),
PROC(LOOKUPP, enc_lookupp, dec_lookupp),
PROC42(LAYOUTERROR, enc_layouterror, dec_layouterror),
+ PROC42(READ_PLUS, enc_read_plus, dec_read_plus),
};
static unsigned int nfs_version4_counts[ARRAY_SIZE(nfs4_procedures)];
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 82d8fb422092..c1eeef52545c 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -540,8 +540,8 @@ enum {
NFSPROC4_CLNT_LOOKUPP,
NFSPROC4_CLNT_LAYOUTERROR,
-
NFSPROC4_CLNT_COPY_NOTIFY,
+ NFSPROC4_CLNT_READ_PLUS,
};
/* nfs41 types */
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 465fa98258a3..11248c5a7b24 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -281,5 +281,6 @@ struct nfs_server {
#define NFS_CAP_OFFLOAD_CANCEL (1U << 25)
#define NFS_CAP_LAYOUTERROR (1U << 26)
#define NFS_CAP_COPY_NOTIFY (1U << 27)
+#define NFS_CAP_READ_PLUS (1U << 28)
#endif
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index bb8652792b84..412297a10878 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -655,7 +655,7 @@ struct nfs_pgio_args {
struct nfs_pgio_res {
struct nfs4_sequence_res seq_res;
struct nfs_fattr * fattr;
- __u32 count;
+ __u64 count;
__u32 op_status;
union {
struct {
--
2.24.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH 5/7] NFS: Add READ_PLUS hole segment decoding
2020-01-10 22:34 [PATCH 0/7] NFS: Add support for the v4.2 READ_PLUS operation schumaker.anna
` (3 preceding siblings ...)
2020-01-10 22:34 ` [PATCH 4/7] NFS: Add READ_PLUS data segment support schumaker.anna
@ 2020-01-10 22:34 ` schumaker.anna
2020-01-10 22:34 ` [PATCH 6/7] NFS: Decode multiple READ_PLUS segments schumaker.anna
2020-01-10 22:34 ` [PATCH 7/7] NFS: Add a mount option for READ_PLUS schumaker.anna
6 siblings, 0 replies; 11+ messages in thread
From: schumaker.anna @ 2020-01-10 22:34 UTC (permalink / raw)
To: Trond.Myklebust, linux-nfs; +Cc: Anna.Schumaker
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
We keep things simple for now by only decoding a single hole or data
segment returned by the server, even if they returned more to us.
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
fs/nfs/nfs42xdr.c | 32 ++++++++++++++++++++++++++++++--
1 file changed, 30 insertions(+), 2 deletions(-)
diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
index bf118ecabe2c..3407a3cf2e13 100644
--- a/fs/nfs/nfs42xdr.c
+++ b/fs/nfs/nfs42xdr.c
@@ -47,7 +47,7 @@
#define decode_deallocate_maxsz (op_decode_hdr_maxsz)
#define encode_read_plus_maxsz (op_encode_hdr_maxsz + \
encode_stateid_maxsz + 3)
-#define NFS42_READ_PLUS_SEGMENT_SIZE (1 /* data_content4 */ + \
+#define NFS42_READ_PLUS_SEGMENT_SIZE (2 /* data_content4 */ + \
2 /* data_info4.di_offset */ + \
2 /* data_info4.di_length */)
#define decode_read_plus_maxsz (op_decode_hdr_maxsz + \
@@ -771,6 +771,31 @@ static uint32_t decode_read_plus_data(struct xdr_stream *xdr, struct nfs_pgio_re
return count;
}
+static uint32_t decode_read_plus_hole(struct xdr_stream *xdr, struct nfs_pgio_res *res,
+ uint32_t *eof)
+{
+ __be32 *p;
+ uint64_t offset, length;
+ size_t recvd;
+
+ p = xdr_inline_decode(xdr, 8 + 8);
+ if (unlikely(!p))
+ return -EIO;
+
+ p = xdr_decode_hyper(p, &offset);
+ p = xdr_decode_hyper(p, &length);
+ if (length == 0)
+ return 0;
+
+ recvd = xdr_expand_hole(xdr, 0, length);
+ if (recvd < length) {
+ *eof = 0;
+ length = recvd;
+ }
+
+ return length;
+}
+
static int decode_read_plus(struct xdr_stream *xdr, struct nfs_pgio_res *res)
{
__be32 *p;
@@ -798,7 +823,10 @@ static int decode_read_plus(struct xdr_stream *xdr, struct nfs_pgio_res *res)
if (type == NFS4_CONTENT_DATA)
count = decode_read_plus_data(xdr, res, &eof);
else
- return -EINVAL;
+ count = decode_read_plus_hole(xdr, res, &eof);
+
+ if (segments > 1)
+ eof = 0;
res->eof = eof;
res->count = count;
--
2.24.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH 6/7] NFS: Decode multiple READ_PLUS segments
2020-01-10 22:34 [PATCH 0/7] NFS: Add support for the v4.2 READ_PLUS operation schumaker.anna
` (4 preceding siblings ...)
2020-01-10 22:34 ` [PATCH 5/7] NFS: Add READ_PLUS hole segment decoding schumaker.anna
@ 2020-01-10 22:34 ` schumaker.anna
2020-01-10 22:34 ` [PATCH 7/7] NFS: Add a mount option for READ_PLUS schumaker.anna
6 siblings, 0 replies; 11+ messages in thread
From: schumaker.anna @ 2020-01-10 22:34 UTC (permalink / raw)
To: Trond.Myklebust, linux-nfs; +Cc: Anna.Schumaker
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
We now have everything we need to read holes and shift data to where
it's supposed to be. I switch over to using xdr_align_data() to put data
segments in the proper place.
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
fs/nfs/nfs42xdr.c | 43 +++++++++++++++++++++++--------------------
1 file changed, 23 insertions(+), 20 deletions(-)
diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
index 3407a3cf2e13..b5c638bcab66 100644
--- a/fs/nfs/nfs42xdr.c
+++ b/fs/nfs/nfs42xdr.c
@@ -53,7 +53,7 @@
#define decode_read_plus_maxsz (op_decode_hdr_maxsz + \
1 /* rpr_eof */ + \
1 /* rpr_contents count */ + \
- NFS42_READ_PLUS_SEGMENT_SIZE)
+ (2 * NFS42_READ_PLUS_SEGMENT_SIZE))
#define encode_seek_maxsz (op_encode_hdr_maxsz + \
encode_stateid_maxsz + \
2 /* offset */ + \
@@ -745,7 +745,7 @@ static int decode_deallocate(struct xdr_stream *xdr, struct nfs42_falloc_res *re
}
static uint32_t decode_read_plus_data(struct xdr_stream *xdr, struct nfs_pgio_res *res,
- uint32_t *eof)
+ uint32_t *eof, uint64_t total)
{
__be32 *p;
uint32_t count, recvd;
@@ -760,7 +760,7 @@ static uint32_t decode_read_plus_data(struct xdr_stream *xdr, struct nfs_pgio_re
if (count == 0)
return 0;
- recvd = xdr_read_pages(xdr, count);
+ recvd = xdr_align_data(xdr, total, count);
if (count > recvd) {
dprintk("NFS: server cheating in read reply: "
"count %u > recvd %u\n", count, recvd);
@@ -772,7 +772,7 @@ static uint32_t decode_read_plus_data(struct xdr_stream *xdr, struct nfs_pgio_re
}
static uint32_t decode_read_plus_hole(struct xdr_stream *xdr, struct nfs_pgio_res *res,
- uint32_t *eof)
+ uint32_t *eof, uint64_t total)
{
__be32 *p;
uint64_t offset, length;
@@ -787,7 +787,7 @@ static uint32_t decode_read_plus_hole(struct xdr_stream *xdr, struct nfs_pgio_re
if (length == 0)
return 0;
- recvd = xdr_expand_hole(xdr, 0, length);
+ recvd = xdr_expand_hole(xdr, total, length);
if (recvd < length) {
*eof = 0;
length = recvd;
@@ -799,8 +799,9 @@ static uint32_t decode_read_plus_hole(struct xdr_stream *xdr, struct nfs_pgio_re
static int decode_read_plus(struct xdr_stream *xdr, struct nfs_pgio_res *res)
{
__be32 *p;
- uint32_t count, eof, segments, type;
- int status;
+ uint32_t eof, segments, type, total;
+ int32_t count;
+ int status, i;
status = decode_op_hdr(xdr, OP_READ_PLUS);
if (status)
@@ -810,26 +811,28 @@ static int decode_read_plus(struct xdr_stream *xdr, struct nfs_pgio_res *res)
if (unlikely(!p))
return -EIO;
+ total = 0;
eof = be32_to_cpup(p++);
segments = be32_to_cpup(p++);
- if (segments == 0)
- return 0;
- p = xdr_inline_decode(xdr, 4);
- if (unlikely(!p))
- return -EIO;
+ for (i = 0; i < segments; i++) {
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ return -EIO;
- type = be32_to_cpup(p++);
- if (type == NFS4_CONTENT_DATA)
- count = decode_read_plus_data(xdr, res, &eof);
- else
- count = decode_read_plus_hole(xdr, res, &eof);
+ type = be32_to_cpup(p);
+ if (type == NFS4_CONTENT_DATA)
+ count = decode_read_plus_data(xdr, res, &eof, total);
+ else
+ count = decode_read_plus_hole(xdr, res, &eof, total);
- if (segments > 1)
- eof = 0;
+ if (count < 0)
+ return count;
+ total += count;
+ }
res->eof = eof;
- res->count = count;
+ res->count = total;
return 0;
}
--
2.24.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH 7/7] NFS: Add a mount option for READ_PLUS
2020-01-10 22:34 [PATCH 0/7] NFS: Add support for the v4.2 READ_PLUS operation schumaker.anna
` (5 preceding siblings ...)
2020-01-10 22:34 ` [PATCH 6/7] NFS: Decode multiple READ_PLUS segments schumaker.anna
@ 2020-01-10 22:34 ` schumaker.anna
2020-01-10 22:41 ` Chuck Lever
6 siblings, 1 reply; 11+ messages in thread
From: schumaker.anna @ 2020-01-10 22:34 UTC (permalink / raw)
To: Trond.Myklebust, linux-nfs; +Cc: Anna.Schumaker
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
There are some workloads where READ_PLUS might end up hurting
performance, so let's be nice to users and provide a way to disable this
operation similar to how READDIR_PLUS can be disabled.
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
fs/nfs/fs_context.c | 14 ++++++++++++++
fs/nfs/nfs4client.c | 3 +++
include/linux/nfs_fs_sb.h | 1 +
3 files changed, 18 insertions(+)
diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
index 0247dcb7b316..82ba07c7c1ce 100644
--- a/fs/nfs/fs_context.c
+++ b/fs/nfs/fs_context.c
@@ -64,6 +64,7 @@ enum nfs_param {
Opt_proto,
Opt_rdirplus,
Opt_rdma,
+ Opt_readplus,
Opt_resvport,
Opt_retrans,
Opt_retry,
@@ -120,6 +121,7 @@ static const struct fs_parameter_spec nfs_param_specs[] = {
fsparam_string("proto", Opt_proto),
fsparam_flag_no("rdirplus", Opt_rdirplus),
fsparam_flag ("rdma", Opt_rdma),
+ fsparam_flag_no("readplus", Opt_readplus),
fsparam_flag_no("resvport", Opt_resvport),
fsparam_u32 ("retrans", Opt_retrans),
fsparam_string("retry", Opt_retry),
@@ -555,6 +557,12 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
else
ctx->options |= NFS_OPTION_MIGRATION;
break;
+ case Opt_readplus:
+ if (result.negated)
+ ctx->options |= NFS_OPTION_NO_READ_PLUS;
+ else
+ ctx->options &= ~NFS_OPTION_NO_READ_PLUS;
+ break;
/*
* options that take numeric values
@@ -1176,6 +1184,10 @@ static int nfs_fs_context_validate(struct fs_context *fc)
(ctx->version != 4 || ctx->minorversion != 0))
goto out_migration_misuse;
+ if (ctx->options & NFS_OPTION_NO_READ_PLUS &&
+ (ctx->version != 4 || ctx->minorversion < 2))
+ goto out_noreadplus_misuse;
+
/* Verify that any proto=/mountproto= options match the address
* families in the addr=/mountaddr= options.
*/
@@ -1254,6 +1266,8 @@ static int nfs_fs_context_validate(struct fs_context *fc)
ctx->version, ctx->minorversion);
out_migration_misuse:
return nfs_invalf(fc, "NFS: 'Migration' not supported for this NFS version");
+out_noreadplus_misuse:
+ return nfs_invalf(fc, "NFS: 'noreadplus' not supported for this NFS version\n");
out_version_unavailable:
nfs_errorf(fc, "NFS: Version unavailable");
return ret;
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 0cd767e5c977..868dc3c36ba1 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -1016,6 +1016,9 @@ static int nfs4_server_common_setup(struct nfs_server *server,
server->caps |= server->nfs_client->cl_mvops->init_caps;
if (server->flags & NFS_MOUNT_NORDIRPLUS)
server->caps &= ~NFS_CAP_READDIRPLUS;
+ if (server->options & NFS_OPTION_NO_READ_PLUS)
+ server->caps &= ~NFS_CAP_READ_PLUS;
+
/*
* Don't use NFS uid/gid mapping if we're using AUTH_SYS or lower
* authentication.
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 11248c5a7b24..360e70c7bbb6 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -172,6 +172,7 @@ struct nfs_server {
unsigned int clone_blksize; /* granularity of a CLONE operation */
#define NFS_OPTION_FSCACHE 0x00000001 /* - local caching enabled */
#define NFS_OPTION_MIGRATION 0x00000002 /* - NFSv4 migration enabled */
+#define NFS_OPTION_NO_READ_PLUS 0x00000004 /* - NFSv4.2 READ_PLUS enabled */
struct nfs_fsid fsid;
__u64 maxfilesize; /* maximum file size */
--
2.24.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [PATCH 7/7] NFS: Add a mount option for READ_PLUS
2020-01-10 22:34 ` [PATCH 7/7] NFS: Add a mount option for READ_PLUS schumaker.anna
@ 2020-01-10 22:41 ` Chuck Lever
2020-01-10 22:43 ` Schumaker, Anna
0 siblings, 1 reply; 11+ messages in thread
From: Chuck Lever @ 2020-01-10 22:41 UTC (permalink / raw)
To: Anna Schumaker; +Cc: Trond.Myklebust, Linux NFS Mailing List, Anna Schumaker
Hi Anna-
> On Jan 10, 2020, at 5:34 PM, schumaker.anna@gmail.com wrote:
>
> From: Anna Schumaker <Anna.Schumaker@Netapp.com>
>
> There are some workloads where READ_PLUS might end up hurting
> performance,
Can you say more about this? Have you seen workloads that are
hurt by READ_PLUS? Nothing jumps out at me from the tables in
the cover letter.
> so let's be nice to users and provide a way to disable this
> operation similar to how READDIR_PLUS can be disabled.
Does it make sense to hold off on a mount option until there
is evidence that there is no other way to work around such a
performance regression?
- Attempt to address the regression directly
- Improve the heuristics about when READ_PLUS is used
- Document that dropping back to vers=4.1 will disable it
Any experience with NFS/RDMA?
> Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
> ---
> fs/nfs/fs_context.c | 14 ++++++++++++++
> fs/nfs/nfs4client.c | 3 +++
> include/linux/nfs_fs_sb.h | 1 +
> 3 files changed, 18 insertions(+)
>
> diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
> index 0247dcb7b316..82ba07c7c1ce 100644
> --- a/fs/nfs/fs_context.c
> +++ b/fs/nfs/fs_context.c
> @@ -64,6 +64,7 @@ enum nfs_param {
> Opt_proto,
> Opt_rdirplus,
> Opt_rdma,
> + Opt_readplus,
> Opt_resvport,
> Opt_retrans,
> Opt_retry,
> @@ -120,6 +121,7 @@ static const struct fs_parameter_spec nfs_param_specs[] = {
> fsparam_string("proto", Opt_proto),
> fsparam_flag_no("rdirplus", Opt_rdirplus),
> fsparam_flag ("rdma", Opt_rdma),
> + fsparam_flag_no("readplus", Opt_readplus),
> fsparam_flag_no("resvport", Opt_resvport),
> fsparam_u32 ("retrans", Opt_retrans),
> fsparam_string("retry", Opt_retry),
> @@ -555,6 +557,12 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
> else
> ctx->options |= NFS_OPTION_MIGRATION;
> break;
> + case Opt_readplus:
> + if (result.negated)
> + ctx->options |= NFS_OPTION_NO_READ_PLUS;
> + else
> + ctx->options &= ~NFS_OPTION_NO_READ_PLUS;
> + break;
>
> /*
> * options that take numeric values
> @@ -1176,6 +1184,10 @@ static int nfs_fs_context_validate(struct fs_context *fc)
> (ctx->version != 4 || ctx->minorversion != 0))
> goto out_migration_misuse;
>
> + if (ctx->options & NFS_OPTION_NO_READ_PLUS &&
> + (ctx->version != 4 || ctx->minorversion < 2))
> + goto out_noreadplus_misuse;
> +
> /* Verify that any proto=/mountproto= options match the address
> * families in the addr=/mountaddr= options.
> */
> @@ -1254,6 +1266,8 @@ static int nfs_fs_context_validate(struct fs_context *fc)
> ctx->version, ctx->minorversion);
> out_migration_misuse:
> return nfs_invalf(fc, "NFS: 'Migration' not supported for this NFS version");
> +out_noreadplus_misuse:
> + return nfs_invalf(fc, "NFS: 'noreadplus' not supported for this NFS version\n");
> out_version_unavailable:
> nfs_errorf(fc, "NFS: Version unavailable");
> return ret;
> diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
> index 0cd767e5c977..868dc3c36ba1 100644
> --- a/fs/nfs/nfs4client.c
> +++ b/fs/nfs/nfs4client.c
> @@ -1016,6 +1016,9 @@ static int nfs4_server_common_setup(struct nfs_server *server,
> server->caps |= server->nfs_client->cl_mvops->init_caps;
> if (server->flags & NFS_MOUNT_NORDIRPLUS)
> server->caps &= ~NFS_CAP_READDIRPLUS;
> + if (server->options & NFS_OPTION_NO_READ_PLUS)
> + server->caps &= ~NFS_CAP_READ_PLUS;
> +
> /*
> * Don't use NFS uid/gid mapping if we're using AUTH_SYS or lower
> * authentication.
> diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
> index 11248c5a7b24..360e70c7bbb6 100644
> --- a/include/linux/nfs_fs_sb.h
> +++ b/include/linux/nfs_fs_sb.h
> @@ -172,6 +172,7 @@ struct nfs_server {
> unsigned int clone_blksize; /* granularity of a CLONE operation */
> #define NFS_OPTION_FSCACHE 0x00000001 /* - local caching enabled */
> #define NFS_OPTION_MIGRATION 0x00000002 /* - NFSv4 migration enabled */
> +#define NFS_OPTION_NO_READ_PLUS 0x00000004 /* - NFSv4.2 READ_PLUS enabled */
>
> struct nfs_fsid fsid;
> __u64 maxfilesize; /* maximum file size */
> --
> 2.24.1
>
--
Chuck Lever
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 7/7] NFS: Add a mount option for READ_PLUS
2020-01-10 22:41 ` Chuck Lever
@ 2020-01-10 22:43 ` Schumaker, Anna
2020-01-10 22:45 ` Chuck Lever
0 siblings, 1 reply; 11+ messages in thread
From: Schumaker, Anna @ 2020-01-10 22:43 UTC (permalink / raw)
To: chuck.lever; +Cc: linux-nfs, Trond.Myklebust
Hi Chuck,
On Fri, 2020-01-10 at 17:41 -0500, Chuck Lever wrote:
> NetApp Security WARNING: This is an external email. Do not click links or open
> attachments unless you recognize the sender and know the content is safe.
>
>
>
>
> Hi Anna-
>
> > On Jan 10, 2020, at 5:34 PM, schumaker.anna@gmail.com wrote:
> >
> > From: Anna Schumaker <Anna.Schumaker@Netapp.com>
> >
> > There are some workloads where READ_PLUS might end up hurting
> > performance,
>
> Can you say more about this? Have you seen workloads that are
> hurt by READ_PLUS? Nothing jumps out at me from the tables in
> the cover letter.
It's mostly something I've seen when using btrfs in a virtual machine (so
probably not a wide use case, and it's possible I need to change something in my
setup to get it working better).
>
>
> > so let's be nice to users and provide a way to disable this
> > operation similar to how READDIR_PLUS can be disabled.
>
> Does it make sense to hold off on a mount option until there
> is evidence that there is no other way to work around such a
> performance regression?
>
> - Attempt to address the regression directly
> - Improve the heuristics about when READ_PLUS is used
> - Document that dropping back to vers=4.1 will disable it
Yeah, we could hold off on the patch for now and see if anybody has any issues
first.
>
> Any experience with NFS/RDMA?
Not yet, but I can try to test that next week.
Anna
>
>
> > Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
> > ---
> > fs/nfs/fs_context.c | 14 ++++++++++++++
> > fs/nfs/nfs4client.c | 3 +++
> > include/linux/nfs_fs_sb.h | 1 +
> > 3 files changed, 18 insertions(+)
> >
> > diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
> > index 0247dcb7b316..82ba07c7c1ce 100644
> > --- a/fs/nfs/fs_context.c
> > +++ b/fs/nfs/fs_context.c
> > @@ -64,6 +64,7 @@ enum nfs_param {
> > Opt_proto,
> > Opt_rdirplus,
> > Opt_rdma,
> > + Opt_readplus,
> > Opt_resvport,
> > Opt_retrans,
> > Opt_retry,
> > @@ -120,6 +121,7 @@ static const struct fs_parameter_spec nfs_param_specs[]
> > = {
> > fsparam_string("proto", Opt_proto),
> > fsparam_flag_no("rdirplus", Opt_rdirplus),
> > fsparam_flag ("rdma", Opt_rdma),
> > + fsparam_flag_no("readplus", Opt_readplus),
> > fsparam_flag_no("resvport", Opt_resvport),
> > fsparam_u32 ("retrans", Opt_retrans),
> > fsparam_string("retry", Opt_retry),
> > @@ -555,6 +557,12 @@ static int nfs_fs_context_parse_param(struct fs_context
> > *fc,
> > else
> > ctx->options |= NFS_OPTION_MIGRATION;
> > break;
> > + case Opt_readplus:
> > + if (result.negated)
> > + ctx->options |= NFS_OPTION_NO_READ_PLUS;
> > + else
> > + ctx->options &= ~NFS_OPTION_NO_READ_PLUS;
> > + break;
> >
> > /*
> > * options that take numeric values
> > @@ -1176,6 +1184,10 @@ static int nfs_fs_context_validate(struct fs_context
> > *fc)
> > (ctx->version != 4 || ctx->minorversion != 0))
> > goto out_migration_misuse;
> >
> > + if (ctx->options & NFS_OPTION_NO_READ_PLUS &&
> > + (ctx->version != 4 || ctx->minorversion < 2))
> > + goto out_noreadplus_misuse;
> > +
> > /* Verify that any proto=/mountproto= options match the address
> > * families in the addr=/mountaddr= options.
> > */
> > @@ -1254,6 +1266,8 @@ static int nfs_fs_context_validate(struct fs_context
> > *fc)
> > ctx->version, ctx->minorversion);
> > out_migration_misuse:
> > return nfs_invalf(fc, "NFS: 'Migration' not supported for this NFS
> > version");
> > +out_noreadplus_misuse:
> > + return nfs_invalf(fc, "NFS: 'noreadplus' not supported for this NFS
> > version\n");
> > out_version_unavailable:
> > nfs_errorf(fc, "NFS: Version unavailable");
> > return ret;
> > diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
> > index 0cd767e5c977..868dc3c36ba1 100644
> > --- a/fs/nfs/nfs4client.c
> > +++ b/fs/nfs/nfs4client.c
> > @@ -1016,6 +1016,9 @@ static int nfs4_server_common_setup(struct nfs_server
> > *server,
> > server->caps |= server->nfs_client->cl_mvops->init_caps;
> > if (server->flags & NFS_MOUNT_NORDIRPLUS)
> > server->caps &= ~NFS_CAP_READDIRPLUS;
> > + if (server->options & NFS_OPTION_NO_READ_PLUS)
> > + server->caps &= ~NFS_CAP_READ_PLUS;
> > +
> > /*
> > * Don't use NFS uid/gid mapping if we're using AUTH_SYS or lower
> > * authentication.
> > diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
> > index 11248c5a7b24..360e70c7bbb6 100644
> > --- a/include/linux/nfs_fs_sb.h
> > +++ b/include/linux/nfs_fs_sb.h
> > @@ -172,6 +172,7 @@ struct nfs_server {
> > unsigned int clone_blksize; /* granularity of a CLONE
> > operation */
> > #define NFS_OPTION_FSCACHE 0x00000001 /* - local caching enabled */
> > #define NFS_OPTION_MIGRATION 0x00000002 /* - NFSv4 migration enabled
> > */
> > +#define NFS_OPTION_NO_READ_PLUS 0x00000004 /* - NFSv4.2 READ_PLUS
> > enabled */
> >
> > struct nfs_fsid fsid;
> > __u64 maxfilesize; /* maximum file size */
> > --
> > 2.24.1
> >
>
> --
> Chuck Lever
>
>
>
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 7/7] NFS: Add a mount option for READ_PLUS
2020-01-10 22:43 ` Schumaker, Anna
@ 2020-01-10 22:45 ` Chuck Lever
0 siblings, 0 replies; 11+ messages in thread
From: Chuck Lever @ 2020-01-10 22:45 UTC (permalink / raw)
To: Anna Schumaker; +Cc: Linux NFS Mailing List, Trond.Myklebust
> On Jan 10, 2020, at 5:43 PM, Schumaker, Anna <Anna.Schumaker@netapp.com> wrote:
>
> Hi Chuck,
>
> On Fri, 2020-01-10 at 17:41 -0500, Chuck Lever wrote:
>> NetApp Security WARNING: This is an external email. Do not click links or open
>> attachments unless you recognize the sender and know the content is safe.
>>
>>
>>
>>
>> Hi Anna-
>>
>>> On Jan 10, 2020, at 5:34 PM, schumaker.anna@gmail.com wrote:
>>>
>>> From: Anna Schumaker <Anna.Schumaker@Netapp.com>
>>>
>>> There are some workloads where READ_PLUS might end up hurting
>>> performance,
>>
>> Can you say more about this? Have you seen workloads that are
>> hurt by READ_PLUS? Nothing jumps out at me from the tables in
>> the cover letter.
>
> It's mostly something I've seen when using btrfs in a virtual machine (so
> probably not a wide use case, and it's possible I need to change something in my
> setup to get it working better).
>
>>
>>
>>> so let's be nice to users and provide a way to disable this
>>> operation similar to how READDIR_PLUS can be disabled.
>>
>> Does it make sense to hold off on a mount option until there
>> is evidence that there is no other way to work around such a
>> performance regression?
>>
>> - Attempt to address the regression directly
>> - Improve the heuristics about when READ_PLUS is used
>> - Document that dropping back to vers=4.1 will disable it
>
> Yeah, we could hold off on the patch for now and see if anybody has any issues
> first.
>
>>
>> Any experience with NFS/RDMA?
>
> Not yet, but I can try to test that next week.
Thanks for your response!
> Anna
>
>>
>>
>>> Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
>>> ---
>>> fs/nfs/fs_context.c | 14 ++++++++++++++
>>> fs/nfs/nfs4client.c | 3 +++
>>> include/linux/nfs_fs_sb.h | 1 +
>>> 3 files changed, 18 insertions(+)
>>>
>>> diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
>>> index 0247dcb7b316..82ba07c7c1ce 100644
>>> --- a/fs/nfs/fs_context.c
>>> +++ b/fs/nfs/fs_context.c
>>> @@ -64,6 +64,7 @@ enum nfs_param {
>>> Opt_proto,
>>> Opt_rdirplus,
>>> Opt_rdma,
>>> + Opt_readplus,
>>> Opt_resvport,
>>> Opt_retrans,
>>> Opt_retry,
>>> @@ -120,6 +121,7 @@ static const struct fs_parameter_spec nfs_param_specs[]
>>> = {
>>> fsparam_string("proto", Opt_proto),
>>> fsparam_flag_no("rdirplus", Opt_rdirplus),
>>> fsparam_flag ("rdma", Opt_rdma),
>>> + fsparam_flag_no("readplus", Opt_readplus),
>>> fsparam_flag_no("resvport", Opt_resvport),
>>> fsparam_u32 ("retrans", Opt_retrans),
>>> fsparam_string("retry", Opt_retry),
>>> @@ -555,6 +557,12 @@ static int nfs_fs_context_parse_param(struct fs_context
>>> *fc,
>>> else
>>> ctx->options |= NFS_OPTION_MIGRATION;
>>> break;
>>> + case Opt_readplus:
>>> + if (result.negated)
>>> + ctx->options |= NFS_OPTION_NO_READ_PLUS;
>>> + else
>>> + ctx->options &= ~NFS_OPTION_NO_READ_PLUS;
>>> + break;
>>>
>>> /*
>>> * options that take numeric values
>>> @@ -1176,6 +1184,10 @@ static int nfs_fs_context_validate(struct fs_context
>>> *fc)
>>> (ctx->version != 4 || ctx->minorversion != 0))
>>> goto out_migration_misuse;
>>>
>>> + if (ctx->options & NFS_OPTION_NO_READ_PLUS &&
>>> + (ctx->version != 4 || ctx->minorversion < 2))
>>> + goto out_noreadplus_misuse;
>>> +
>>> /* Verify that any proto=/mountproto= options match the address
>>> * families in the addr=/mountaddr= options.
>>> */
>>> @@ -1254,6 +1266,8 @@ static int nfs_fs_context_validate(struct fs_context
>>> *fc)
>>> ctx->version, ctx->minorversion);
>>> out_migration_misuse:
>>> return nfs_invalf(fc, "NFS: 'Migration' not supported for this NFS
>>> version");
>>> +out_noreadplus_misuse:
>>> + return nfs_invalf(fc, "NFS: 'noreadplus' not supported for this NFS
>>> version\n");
>>> out_version_unavailable:
>>> nfs_errorf(fc, "NFS: Version unavailable");
>>> return ret;
>>> diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
>>> index 0cd767e5c977..868dc3c36ba1 100644
>>> --- a/fs/nfs/nfs4client.c
>>> +++ b/fs/nfs/nfs4client.c
>>> @@ -1016,6 +1016,9 @@ static int nfs4_server_common_setup(struct nfs_server
>>> *server,
>>> server->caps |= server->nfs_client->cl_mvops->init_caps;
>>> if (server->flags & NFS_MOUNT_NORDIRPLUS)
>>> server->caps &= ~NFS_CAP_READDIRPLUS;
>>> + if (server->options & NFS_OPTION_NO_READ_PLUS)
>>> + server->caps &= ~NFS_CAP_READ_PLUS;
>>> +
>>> /*
>>> * Don't use NFS uid/gid mapping if we're using AUTH_SYS or lower
>>> * authentication.
>>> diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
>>> index 11248c5a7b24..360e70c7bbb6 100644
>>> --- a/include/linux/nfs_fs_sb.h
>>> +++ b/include/linux/nfs_fs_sb.h
>>> @@ -172,6 +172,7 @@ struct nfs_server {
>>> unsigned int clone_blksize; /* granularity of a CLONE
>>> operation */
>>> #define NFS_OPTION_FSCACHE 0x00000001 /* - local caching enabled */
>>> #define NFS_OPTION_MIGRATION 0x00000002 /* - NFSv4 migration enabled
>>> */
>>> +#define NFS_OPTION_NO_READ_PLUS 0x00000004 /* - NFSv4.2 READ_PLUS
>>> enabled */
>>>
>>> struct nfs_fsid fsid;
>>> __u64 maxfilesize; /* maximum file size */
>>> --
>>> 2.24.1
>>>
>>
>> --
>> Chuck Lever
>>
>>
>>
--
Chuck Lever
^ permalink raw reply [flat|nested] 11+ messages in thread