All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v6 00/10] NFSD support for asynchronous COPY
@ 2017-10-24 17:47 Olga Kornievskaia
  2017-10-24 17:47 ` [PATCH v6 01/10] NFSD CB_OFFLOAD xdr Olga Kornievskaia
                   ` (10 more replies)
  0 siblings, 11 replies; 48+ messages in thread
From: Olga Kornievskaia @ 2017-10-24 17:47 UTC (permalink / raw)
  To: bfields; +Cc: linux-nfs

To do asynchronous copies, NFSD creates a new kthread to handle the request.
Upon receiving the COPY, it generates a unique copy stateid (stored in a
global list for keeping track of state for OFFLOAD_STATUS to be queried by),
starts the thread, and replies back to the client. nfsd4_copy arguments that
are allocated on the stack are copies for the kthread.

For the async copy handler, copy is done in the loop for the requested
number of bytes. If an error is encountered and partial copy has happened,
a successful partial copy is returned in the CB_OFFLOAD. vfs_copy_file_range
is called with 4MB chunks for both async and sync, allowing for 4MB
granularity of OFFLOAD_STATUS queiry. Once copy is done, the results are
queued for the callback workqueue and sent via CB_OFFLOAD.

When the server received an OFFLOAD_CANCEL, it will find the kthread running
the copy and will send a SIGPENDING and kthread_stop() and it will interrupt
the ongoing do_splice() and once vfs returns we are choosing not to send
the CB_OFFLOAD back to the client.

When the server receives an OFFLOAD_STATUS, it will find the kthread running
the copy and will locate within the copy state the current number of bytes
copied so far.

v6:
1. if an error occured and partial bytes were copied, don't return the error
and instead return the successful partial copy.
2. OFFLOAD_STATUS locates the on-going copy and looks up the number of bytes
copied so far.

Olga Kornievskaia (10):
  NFSD CB_OFFLOAD xdr
  NFSD OFFLOAD_STATUS xdr
  NFSD OFFLOAD_CANCEL xdr
  NFSD xdr callback stateid in async COPY reply
  NFSD first draft of async copy
  NFSD return nfs4_stid in nfs4_preprocess_stateid_op
  NFSD create new stateid for async copy
  NFSD handle OFFLOAD_CANCEL op
  NFSD support OFFLOAD_STATUS
  NFSD stop queued async copies on client shutdown

 fs/nfsd/netns.h        |   8 ++
 fs/nfsd/nfs4callback.c |  97 +++++++++++++++
 fs/nfsd/nfs4proc.c     | 316 ++++++++++++++++++++++++++++++++++++++++++++-----
 fs/nfsd/nfs4state.c    |  77 +++++++++++-
 fs/nfsd/nfs4xdr.c      |  50 ++++++--
 fs/nfsd/nfsctl.c       |   1 +
 fs/nfsd/state.h        |  21 +++-
 fs/nfsd/xdr4.h         |  28 +++++
 fs/nfsd/xdr4cb.h       |  10 ++
 9 files changed, 571 insertions(+), 37 deletions(-)

-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 48+ messages in thread

* [PATCH v6 01/10] NFSD CB_OFFLOAD xdr
  2017-10-24 17:47 [PATCH v6 00/10] NFSD support for asynchronous COPY Olga Kornievskaia
@ 2017-10-24 17:47 ` Olga Kornievskaia
  2018-01-25 16:43   ` J. Bruce Fields
  2017-10-24 17:47 ` [PATCH v6 02/10] NFSD OFFLOAD_STATUS xdr Olga Kornievskaia
                   ` (9 subsequent siblings)
  10 siblings, 1 reply; 48+ messages in thread
From: Olga Kornievskaia @ 2017-10-24 17:47 UTC (permalink / raw)
  To: bfields; +Cc: linux-nfs

Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
---
 fs/nfsd/nfs4callback.c | 97 ++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/nfsd/state.h        |  1 +
 fs/nfsd/xdr4.h         |  6 ++++
 fs/nfsd/xdr4cb.h       | 10 ++++++
 4 files changed, 114 insertions(+)

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 49b0a9e..d12d914 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -39,6 +39,7 @@
 #include "state.h"
 #include "netns.h"
 #include "xdr4cb.h"
+#include "xdr4.h"
 
 #define NFSDDBG_FACILITY                NFSDDBG_PROC
 
@@ -105,6 +106,7 @@ enum nfs_cb_opnum4 {
 	OP_CB_WANTS_CANCELLED		= 12,
 	OP_CB_NOTIFY_LOCK		= 13,
 	OP_CB_NOTIFY_DEVICEID		= 14,
+	OP_CB_OFFLOAD			= 15,
 	OP_CB_ILLEGAL			= 10044
 };
 
@@ -683,6 +685,100 @@ static int nfs4_xdr_dec_cb_notify_lock(struct rpc_rqst *rqstp,
 }
 
 /*
+ * struct write_response4 {
+ *	stateid4	wr_callback_id<1>;
+ *	length4		wr_count;
+ *	stable_how4	wr_committed;
+ *	verifier4	wr_writeverf;
+ * };
+ * union offload_info4 switch (nfsstat4 coa_status) {
+ *	case NFS4_OK:
+ *		write_response4	coa_resok4;
+ *	default:
+ *	length4		coa_bytes_copied;
+ * };
+ * struct CB_OFFLOAD4args {
+ *	nfs_fh4		coa_fh;
+ *	stateid4	coa_stateid;
+ *	offload_info4	coa_offload_info;
+ * };
+ */
+static void encode_offload_info4(struct xdr_stream *xdr,
+				 __be32 nfserr,
+				 const struct nfsd4_copy *cp)
+{
+	__be32 *p;
+
+	p = xdr_reserve_space(xdr, 4);
+	*p++ = nfserr;
+	if (!nfserr) {
+		p = xdr_reserve_space(xdr, 4 + 8 + 4 + NFS4_VERIFIER_SIZE);
+		p = xdr_encode_empty_array(p);
+		p = xdr_encode_hyper(p, cp->cp_res.wr_bytes_written);
+		*p++ = cpu_to_be32(cp->cp_res.wr_stable_how);
+		p = xdr_encode_opaque_fixed(p, cp->cp_res.wr_verifier.data,
+					    NFS4_VERIFIER_SIZE);
+	} else {
+		p = xdr_reserve_space(xdr, 8);
+		p = xdr_encode_hyper(p, cp->cp_res.wr_bytes_written);
+	}
+}
+
+static void encode_cb_offload4args(struct xdr_stream *xdr,
+				   __be32 nfserr,
+				   const struct knfsd_fh *fh,
+				   const struct nfsd4_copy *cp,
+				   struct nfs4_cb_compound_hdr *hdr)
+{
+	__be32 *p;
+
+	p = xdr_reserve_space(xdr, 4);
+	*p++ = cpu_to_be32(OP_CB_OFFLOAD);
+	encode_nfs_fh4(xdr, fh);
+	encode_stateid4(xdr, &cp->cp_res.cb_stateid);
+	encode_offload_info4(xdr, nfserr, cp);
+
+	hdr->nops++;
+}
+
+static void nfs4_xdr_enc_cb_offload(struct rpc_rqst *req,
+				    struct xdr_stream *xdr,
+				    const void *data)
+{
+	const struct nfsd4_callback *cb = data;
+	const struct nfsd4_copy *cp =
+		container_of(cb, struct nfsd4_copy, cp_cb);
+	struct nfs4_cb_compound_hdr hdr = {
+		.ident = 0,
+		.minorversion = cb->cb_clp->cl_minorversion,
+	};
+
+	encode_cb_compound4args(xdr, &hdr);
+	encode_cb_sequence4args(xdr, cb, &hdr);
+	encode_cb_offload4args(xdr, cp->nfserr, &cp->fh, cp, &hdr);
+	encode_cb_nops(&hdr);
+}
+
+static int nfs4_xdr_dec_cb_offload(struct rpc_rqst *rqstp,
+				   struct xdr_stream *xdr,
+				   void *data)
+{
+	struct nfsd4_callback *cb = data;
+	struct nfs4_cb_compound_hdr hdr;
+	int status;
+
+	status = decode_cb_compound4res(xdr, &hdr);
+	if (unlikely(status))
+		return status;
+
+	if (cb) {
+		status = decode_cb_sequence4res(xdr, cb);
+		if (unlikely(status || cb->cb_seq_status))
+			return status;
+	}
+	return decode_cb_op_status(xdr, OP_CB_OFFLOAD, &cb->cb_status);
+}
+/*
  * RPC procedure tables
  */
 #define PROC(proc, call, argtype, restype)				\
@@ -703,6 +799,7 @@ static int nfs4_xdr_dec_cb_notify_lock(struct rpc_rqst *rqstp,
 	PROC(CB_LAYOUT,	COMPOUND,	cb_layout,	cb_layout),
 #endif
 	PROC(CB_NOTIFY_LOCK,	COMPOUND,	cb_notify_lock,	cb_notify_lock),
+	PROC(CB_OFFLOAD,	COMPOUND,	cb_offload,	cb_offload),
 };
 
 static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)];
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 005c911..f8b0210 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -570,6 +570,7 @@ enum nfsd4_cb_op {
 	NFSPROC4_CLNT_CB_NULL = 0,
 	NFSPROC4_CLNT_CB_RECALL,
 	NFSPROC4_CLNT_CB_LAYOUT,
+	NFSPROC4_CLNT_CB_OFFLOAD,
 	NFSPROC4_CLNT_CB_SEQUENCE,
 	NFSPROC4_CLNT_CB_NOTIFY_LOCK,
 };
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 1e4edbf..4ac2676 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -507,6 +507,7 @@ struct nfsd42_write_res {
 	u64			wr_bytes_written;
 	u32			wr_stable_how;
 	nfs4_verifier		wr_verifier;
+	stateid_t		cb_stateid;
 };
 
 struct nfsd4_copy {
@@ -523,6 +524,11 @@ struct nfsd4_copy {
 
 	/* response */
 	struct nfsd42_write_res	cp_res;
+
+	/* for cb_offload */
+	struct nfsd4_callback	cp_cb;
+	__be32			nfserr;
+	struct knfsd_fh		fh;
 };
 
 struct nfsd4_seek {
diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h
index 49b719d..7e39913 100644
--- a/fs/nfsd/xdr4cb.h
+++ b/fs/nfsd/xdr4cb.h
@@ -37,3 +37,13 @@
 #define NFS4_dec_cb_notify_lock_sz	(cb_compound_dec_hdr_sz  +      \
 					cb_sequence_dec_sz +            \
 					op_dec_sz)
+#define enc_cb_offload_info_sz		(1 + 1 + 2 + 1 +		\
+					XDR_QUADLEN(NFS4_VERIFIER_SIZE))
+#define NFS4_enc_cb_offload_sz		(cb_compound_enc_hdr_sz +       \
+					cb_sequence_enc_sz +            \
+					enc_nfs4_fh_sz +		\
+					enc_stateid_sz +		\
+					enc_cb_offload_info_sz)
+#define NFS4_dec_cb_offload_sz		(cb_compound_dec_hdr_sz  +      \
+					cb_sequence_dec_sz +            \
+					op_dec_sz)
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [PATCH v6 02/10] NFSD OFFLOAD_STATUS xdr
  2017-10-24 17:47 [PATCH v6 00/10] NFSD support for asynchronous COPY Olga Kornievskaia
  2017-10-24 17:47 ` [PATCH v6 01/10] NFSD CB_OFFLOAD xdr Olga Kornievskaia
@ 2017-10-24 17:47 ` Olga Kornievskaia
  2017-10-24 17:47 ` [PATCH v6 03/10] NFSD OFFLOAD_CANCEL xdr Olga Kornievskaia
                   ` (8 subsequent siblings)
  10 siblings, 0 replies; 48+ messages in thread
From: Olga Kornievskaia @ 2017-10-24 17:47 UTC (permalink / raw)
  To: bfields; +Cc: linux-nfs

Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
---
 fs/nfsd/nfs4proc.c | 20 ++++++++++++++++++++
 fs/nfsd/nfs4xdr.c  | 27 +++++++++++++++++++++++++--
 fs/nfsd/xdr4.h     | 10 ++++++++++
 3 files changed, 55 insertions(+), 2 deletions(-)

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 8487486..928f971 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1149,6 +1149,13 @@ static int fill_in_write_vector(struct kvec *vec, struct nfsd4_write *write)
 	fput(file);
 	return status;
 }
+static __be32
+nfsd4_offload_status(struct svc_rqst *rqstp,
+		     struct nfsd4_compound_state *cstate,
+		     union nfsd4_op_u *u)
+{
+	return nfserr_notsupp;
+}
 
 static __be32
 nfsd4_allocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
@@ -2046,6 +2053,14 @@ static inline u32 nfsd4_copy_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 		1 /* cr_synchronous */) * sizeof(__be32);
 }
 
+static inline u32 nfsd4_offload_status_rsize(struct svc_rqst *rqstp,
+					     struct nfsd4_op *op)
+{
+	return (op_encode_hdr_size +
+		2 /* osr_count */ +
+		1 /* osr_complete<1> optional 0 for now */) * sizeof(__be32);
+}
+
 #ifdef CONFIG_NFSD_PNFS
 static inline u32 nfsd4_getdeviceinfo_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 {
@@ -2459,6 +2474,11 @@ static inline u32 nfsd4_seek_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 		.op_name = "OP_SEEK",
 		.op_rsize_bop = nfsd4_seek_rsize,
 	},
+	[OP_OFFLOAD_STATUS] = {
+		.op_func = nfsd4_offload_status,
+		.op_name = "OP_OFFLOAD_STATUS",
+		.op_rsize_bop = nfsd4_offload_status_rsize,
+	},
 };
 
 /**
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 2c61c6b..f8111bb 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1768,6 +1768,13 @@ static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp, str
 }
 
 static __be32
+nfsd4_decode_offload_status(struct nfsd4_compoundargs *argp,
+			    struct nfsd4_offload_status *os)
+{
+	return nfsd4_decode_stateid(argp, &os->stateid);
+}
+
+static __be32
 nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek)
 {
 	DECODE_HEAD;
@@ -1874,7 +1881,7 @@ static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp, str
 	[OP_LAYOUTERROR]	= (nfsd4_dec)nfsd4_decode_notsupp,
 	[OP_LAYOUTSTATS]	= (nfsd4_dec)nfsd4_decode_notsupp,
 	[OP_OFFLOAD_CANCEL]	= (nfsd4_dec)nfsd4_decode_notsupp,
-	[OP_OFFLOAD_STATUS]	= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_OFFLOAD_STATUS]	= (nfsd4_dec)nfsd4_decode_offload_status,
 	[OP_READ_PLUS]		= (nfsd4_dec)nfsd4_decode_notsupp,
 	[OP_SEEK]		= (nfsd4_dec)nfsd4_decode_seek,
 	[OP_WRITE_SAME]		= (nfsd4_dec)nfsd4_decode_notsupp,
@@ -4216,6 +4223,22 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
 }
 
 static __be32
+nfsd4_encode_offload_status(struct nfsd4_compoundres *resp, __be32 nfserr,
+			    struct nfsd4_offload_status *os)
+{
+	struct xdr_stream *xdr = &resp->xdr;
+	__be32 *p;
+
+	p = xdr_reserve_space(xdr, 8 + 4);
+	if (!p)
+		return nfserr_resource;
+	p = xdr_encode_hyper(p, os->count);
+	*p++ = cpu_to_be32(0);
+
+	return nfserr;
+}
+
+static __be32
 nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr,
 		  struct nfsd4_seek *seek)
 {
@@ -4318,7 +4341,7 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
 	[OP_LAYOUTERROR]	= (nfsd4_enc)nfsd4_encode_noop,
 	[OP_LAYOUTSTATS]	= (nfsd4_enc)nfsd4_encode_noop,
 	[OP_OFFLOAD_CANCEL]	= (nfsd4_enc)nfsd4_encode_noop,
-	[OP_OFFLOAD_STATUS]	= (nfsd4_enc)nfsd4_encode_noop,
+	[OP_OFFLOAD_STATUS]	= (nfsd4_enc)nfsd4_encode_offload_status,
 	[OP_READ_PLUS]		= (nfsd4_enc)nfsd4_encode_noop,
 	[OP_SEEK]		= (nfsd4_enc)nfsd4_encode_seek,
 	[OP_WRITE_SAME]		= (nfsd4_enc)nfsd4_encode_noop,
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 4ac2676..9b0c099 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -542,6 +542,15 @@ struct nfsd4_seek {
 	loff_t		seek_pos;
 };
 
+struct nfsd4_offload_status {
+	/* request */
+	stateid_t	stateid;
+
+	/* response */
+	u64		count;
+	u32		status;
+};
+
 struct nfsd4_op {
 	int					opnum;
 	const struct nfsd4_operation *		opdesc;
@@ -600,6 +609,7 @@ struct nfsd4_op {
 		struct nfsd4_fallocate		deallocate;
 		struct nfsd4_clone		clone;
 		struct nfsd4_copy		copy;
+		struct nfsd4_offload_status	offload_status;
 		struct nfsd4_seek		seek;
 	} u;
 	struct nfs4_replay *			replay;
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [PATCH v6 03/10] NFSD OFFLOAD_CANCEL xdr
  2017-10-24 17:47 [PATCH v6 00/10] NFSD support for asynchronous COPY Olga Kornievskaia
  2017-10-24 17:47 ` [PATCH v6 01/10] NFSD CB_OFFLOAD xdr Olga Kornievskaia
  2017-10-24 17:47 ` [PATCH v6 02/10] NFSD OFFLOAD_STATUS xdr Olga Kornievskaia
@ 2017-10-24 17:47 ` Olga Kornievskaia
  2017-10-24 17:47 ` [PATCH v6 04/10] NFSD xdr callback stateid in async COPY reply Olga Kornievskaia
                   ` (7 subsequent siblings)
  10 siblings, 0 replies; 48+ messages in thread
From: Olga Kornievskaia @ 2017-10-24 17:47 UTC (permalink / raw)
  To: bfields; +Cc: linux-nfs

Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
---
 fs/nfsd/nfs4proc.c | 14 ++++++++++++++
 fs/nfsd/nfs4xdr.c  |  2 +-
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 928f971..cb6e3ea 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1128,6 +1128,14 @@ static int fill_in_write_vector(struct kvec *vec, struct nfsd4_write *write)
 }
 
 static __be32
+nfsd4_offload_cancel(struct svc_rqst *rqstp,
+		     struct nfsd4_compound_state *cstate,
+		     union nfsd4_op_u *u)
+{
+	return 0;
+}
+
+static __be32
 nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		struct nfsd4_fallocate *fallocate, int flags)
 {
@@ -2479,6 +2487,12 @@ static inline u32 nfsd4_seek_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 		.op_name = "OP_OFFLOAD_STATUS",
 		.op_rsize_bop = nfsd4_offload_status_rsize,
 	},
+	[OP_OFFLOAD_CANCEL] = {
+		.op_func = nfsd4_offload_cancel,
+		.op_flags = OP_MODIFIES_SOMETHING,
+		.op_name = "OP_OFFLOAD_CANCEL",
+		.op_rsize_bop = nfsd4_only_status_rsize,
+	},
 };
 
 /**
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index f8111bb..55db3c4 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1880,7 +1880,7 @@ static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp, str
 	[OP_IO_ADVISE]		= (nfsd4_dec)nfsd4_decode_notsupp,
 	[OP_LAYOUTERROR]	= (nfsd4_dec)nfsd4_decode_notsupp,
 	[OP_LAYOUTSTATS]	= (nfsd4_dec)nfsd4_decode_notsupp,
-	[OP_OFFLOAD_CANCEL]	= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_OFFLOAD_CANCEL]	= (nfsd4_dec)nfsd4_decode_offload_status,
 	[OP_OFFLOAD_STATUS]	= (nfsd4_dec)nfsd4_decode_offload_status,
 	[OP_READ_PLUS]		= (nfsd4_dec)nfsd4_decode_notsupp,
 	[OP_SEEK]		= (nfsd4_dec)nfsd4_decode_seek,
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [PATCH v6 04/10] NFSD xdr callback stateid in async COPY reply
  2017-10-24 17:47 [PATCH v6 00/10] NFSD support for asynchronous COPY Olga Kornievskaia
                   ` (2 preceding siblings ...)
  2017-10-24 17:47 ` [PATCH v6 03/10] NFSD OFFLOAD_CANCEL xdr Olga Kornievskaia
@ 2017-10-24 17:47 ` Olga Kornievskaia
  2017-10-24 17:47 ` [PATCH v6 05/10] NFSD first draft of async copy Olga Kornievskaia
                   ` (6 subsequent siblings)
  10 siblings, 0 replies; 48+ messages in thread
From: Olga Kornievskaia @ 2017-10-24 17:47 UTC (permalink / raw)
  To: bfields; +Cc: linux-nfs

Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
---
 fs/nfsd/nfs4xdr.c | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 55db3c4..27c4509 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -4190,15 +4190,27 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
 #endif /* CONFIG_NFSD_PNFS */
 
 static __be32
-nfsd42_encode_write_res(struct nfsd4_compoundres *resp, struct nfsd42_write_res *write)
+nfsd42_encode_write_res(struct nfsd4_compoundres *resp,
+		struct nfsd42_write_res *write, bool sync)
 {
 	__be32 *p;
+	p = xdr_reserve_space(&resp->xdr, 4);
+	if (!p)
+		return nfserr_resource;
 
-	p = xdr_reserve_space(&resp->xdr, 4 + 8 + 4 + NFS4_VERIFIER_SIZE);
+	if (sync)
+		*p++ = cpu_to_be32(0);
+	else {
+		__be32 nfserr;
+		*p++ = cpu_to_be32(1);
+		nfserr = nfsd4_encode_stateid(&resp->xdr, &write->cb_stateid);
+		if (nfserr)
+			return nfserr;
+	}
+	p = xdr_reserve_space(&resp->xdr, 8 + 4 + NFS4_VERIFIER_SIZE);
 	if (!p)
 		return nfserr_resource;
 
-	*p++ = cpu_to_be32(0);
 	p = xdr_encode_hyper(p, write->wr_bytes_written);
 	*p++ = cpu_to_be32(write->wr_stable_how);
 	p = xdr_encode_opaque_fixed(p, write->wr_verifier.data,
@@ -4212,7 +4224,8 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
 {
 	__be32 *p;
 
-	nfserr = nfsd42_encode_write_res(resp, &copy->cp_res);
+	nfserr = nfsd42_encode_write_res(resp, &copy->cp_res,
+			copy->cp_synchronous);
 	if (nfserr)
 		return nfserr;
 
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [PATCH v6 05/10] NFSD first draft of async copy
  2017-10-24 17:47 [PATCH v6 00/10] NFSD support for asynchronous COPY Olga Kornievskaia
                   ` (3 preceding siblings ...)
  2017-10-24 17:47 ` [PATCH v6 04/10] NFSD xdr callback stateid in async COPY reply Olga Kornievskaia
@ 2017-10-24 17:47 ` Olga Kornievskaia
  2018-01-25 22:04   ` J. Bruce Fields
                     ` (2 more replies)
  2017-10-24 17:47 ` [PATCH v6 06/10] NFSD return nfs4_stid in nfs4_preprocess_stateid_op Olga Kornievskaia
                   ` (5 subsequent siblings)
  10 siblings, 3 replies; 48+ messages in thread
From: Olga Kornievskaia @ 2017-10-24 17:47 UTC (permalink / raw)
  To: bfields; +Cc: linux-nfs

Upon receiving a request for async copy, create a new kthread.
If we get asynchronous request, make sure to copy the needed
arguments/state from the stack before starting the copy. Then
start the thread and reply back to the client indicating copy
is asynchronous.

nfsd_copy_file_range() will copy in a loop over the total
number of bytes is needed to copy. In case a failure happens
in the middle, we ignore the error and return how much we
copied so far. Once done creating a workitem for the callback
workqueue and send CB_OFFLOAD with the results.
---
 fs/nfsd/nfs4proc.c  | 172 ++++++++++++++++++++++++++++++++++++++++++++++------
 fs/nfsd/nfs4state.c |   2 +
 fs/nfsd/state.h     |   2 +
 fs/nfsd/xdr4.h      |   9 +++
 4 files changed, 166 insertions(+), 19 deletions(-)

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index cb6e3ea..bdccfa9 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -35,6 +35,7 @@
 #include <linux/file.h>
 #include <linux/falloc.h>
 #include <linux/slab.h>
+#include <linux/kthread.h>
 
 #include "idmap.h"
 #include "cache.h"
@@ -1092,39 +1093,172 @@ static int fill_in_write_vector(struct kvec *vec, struct nfsd4_write *write)
 out:
 	return status;
 }
+static void nfsd4_cb_offload_release(struct nfsd4_callback *cb)
+{
+	struct nfsd4_copy *copy = container_of(cb, struct nfsd4_copy, cp_cb);
+
+	atomic_dec(&copy->cp_clp->cl_refcount);
+	kfree(copy);
+}
+
+static int nfsd4_cb_offload_done(struct nfsd4_callback *cb,
+				 struct rpc_task *task)
+{
+	return 1;
+}
+
+static const struct nfsd4_callback_ops nfsd4_cb_offload_ops = {
+	.release = nfsd4_cb_offload_release,
+	.done = nfsd4_cb_offload_done
+};
+
+static int nfsd4_init_copy_res(struct nfsd4_copy *copy, bool sync)
+{
+	memcpy(&copy->cp_res.cb_stateid, &copy->cp_dst_stateid,
+		sizeof(copy->cp_dst_stateid));
+	copy->cp_res.wr_stable_how = NFS_UNSTABLE;
+	copy->cp_consecutive = 1;
+	copy->cp_synchronous = sync;
+	gen_boot_verifier(&copy->cp_res.wr_verifier, copy->net);
+
+	return nfs_ok;
+}
+
+static int _nfsd_copy_file_range(struct nfsd4_copy *copy)
+{
+	ssize_t bytes_copied = 0;
+	size_t bytes_total = copy->cp_count;
+	u64 src_pos = copy->cp_src_pos;
+	u64 dst_pos = copy->cp_dst_pos;
+
+	do {
+		bytes_copied = nfsd_copy_file_range(copy->fh_src, src_pos,
+				copy->fh_dst, dst_pos, bytes_total);
+		if (bytes_copied <= 0)
+			break;
+		bytes_total -= bytes_copied;
+		copy->cp_res.wr_bytes_written += bytes_copied;
+		src_pos += bytes_copied;
+		dst_pos += bytes_copied;
+	} while (bytes_total > 0 && !copy->cp_synchronous);
+	return bytes_copied;
+}
+
+static int nfsd4_do_copy(struct nfsd4_copy *copy, bool sync)
+{
+	__be32 status;
+	ssize_t bytes;
+
+	bytes = _nfsd_copy_file_range(copy);
+	if (bytes < 0 && !copy->cp_res.wr_bytes_written)
+		status = nfserrno(bytes);
+	else
+		status = nfsd4_init_copy_res(copy, sync);
+
+	fput(copy->fh_src);
+	fput(copy->fh_dst);
+	return status;
+}
+
+static void dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst)
+{
+	memcpy(&dst->cp_src_stateid, &src->cp_src_stateid, sizeof(stateid_t));
+	memcpy(&dst->cp_dst_stateid, &src->cp_dst_stateid, sizeof(stateid_t));
+	dst->cp_src_pos = src->cp_src_pos;
+	dst->cp_dst_pos = src->cp_dst_pos;
+	dst->cp_count = src->cp_count;
+	dst->cp_consecutive = src->cp_consecutive;
+	dst->cp_synchronous = src->cp_synchronous;
+	memcpy(&dst->cp_res, &src->cp_res, sizeof(src->cp_res));
+	/* skipping nfsd4_callback */
+	memcpy(&dst->fh, &src->fh, sizeof(src->fh));
+	dst->net = src->net;
+	dst->cp_clp = src->cp_clp;
+	atomic_inc(&dst->cp_clp->cl_refcount);
+	dst->fh_dst = get_file(src->fh_dst);
+	dst->fh_src = get_file(src->fh_src);
+}
+
+static void cleanup_async_copy(struct nfsd4_copy *copy)
+{
+	fput(copy->fh_dst);
+	fput(copy->fh_src);
+	spin_lock(&copy->cp_clp->async_lock);
+	list_del(&copy->copies);
+	spin_unlock(&copy->cp_clp->async_lock);
+	atomic_dec(&copy->cp_clp->cl_refcount);
+	kfree(copy);
+}
+
+static int nfsd4_do_async_copy(void *data)
+{
+	struct nfsd4_copy *copy = (struct nfsd4_copy *)data;
+	struct nfsd4_copy *cb_copy;
+
+	copy->nfserr = nfsd4_do_copy(copy, 0);
+	cb_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL);
+	if (!cb_copy)
+		goto out;
+	memcpy(&cb_copy->cp_res, &copy->cp_res, sizeof(copy->cp_res));
+	cb_copy->cp_clp = copy->cp_clp;
+	atomic_inc(&cb_copy->cp_clp->cl_refcount);
+	cb_copy->nfserr = copy->nfserr;
+	memcpy(&cb_copy->fh, &copy->fh, sizeof(copy->fh));
+	nfsd4_init_cb(&cb_copy->cp_cb, cb_copy->cp_clp,
+			&nfsd4_cb_offload_ops, NFSPROC4_CLNT_CB_OFFLOAD);
+	nfsd4_run_cb(&cb_copy->cp_cb);
+out:
+	cleanup_async_copy(copy);
+	return 0;
+}
 
 static __be32
 nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		union nfsd4_op_u *u)
 {
 	struct nfsd4_copy *copy = &u->copy;
-	struct file *src, *dst;
 	__be32 status;
-	ssize_t bytes;
+	struct nfsd4_copy *async_copy = NULL;
 
-	status = nfsd4_verify_copy(rqstp, cstate, &copy->cp_src_stateid, &src,
-				   &copy->cp_dst_stateid, &dst);
+	status = nfsd4_verify_copy(rqstp, cstate, &copy->cp_src_stateid,
+				   &copy->fh_src, &copy->cp_dst_stateid,
+				   &copy->fh_dst);
 	if (status)
 		goto out;
 
-	bytes = nfsd_copy_file_range(src, copy->cp_src_pos,
-			dst, copy->cp_dst_pos, copy->cp_count);
-
-	if (bytes < 0)
-		status = nfserrno(bytes);
-	else {
-		copy->cp_res.wr_bytes_written = bytes;
-		copy->cp_res.wr_stable_how = NFS_UNSTABLE;
-		copy->cp_consecutive = 1;
-		copy->cp_synchronous = 1;
-		gen_boot_verifier(&copy->cp_res.wr_verifier, SVC_NET(rqstp));
-		status = nfs_ok;
+	copy->cp_clp = cstate->clp;
+	memcpy(&copy->fh, &cstate->current_fh.fh_handle,
+		sizeof(struct knfsd_fh));
+	copy->net = SVC_NET(rqstp);
+	if (!copy->cp_synchronous) {
+		status = nfsd4_init_copy_res(copy, 0);
+		async_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL);
+		if (!async_copy) {
+			status = nfserrno(-ENOMEM);
+			goto out;
+		}
+		dup_copy_fields(copy, async_copy);
+		memcpy(&copy->cp_res.cb_stateid, &copy->cp_dst_stateid,
+			sizeof(copy->cp_dst_stateid));
+		spin_lock(&async_copy->cp_clp->async_lock);
+		list_add(&async_copy->copies,
+				&async_copy->cp_clp->async_copies);
+		spin_unlock(&async_copy->cp_clp->async_lock);
+		async_copy->copy_task = kthread_create(nfsd4_do_async_copy,
+				async_copy, "%s", "copy thread");
+		if (IS_ERR(async_copy->copy_task)) {
+			status = PTR_ERR(async_copy->copy_task);
+			goto out_err_dec;
+		}
+		wake_up_process(async_copy->copy_task);
+	} else {
+		status = nfsd4_do_copy(copy, 1);
 	}
-
-	fput(src);
-	fput(dst);
 out:
 	return status;
+out_err_dec:
+	cleanup_async_copy(async_copy);
+	goto out;
 }
 
 static __be32
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 0c04f81..d7767a1 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1774,6 +1774,8 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
 #ifdef CONFIG_NFSD_PNFS
 	INIT_LIST_HEAD(&clp->cl_lo_states);
 #endif
+	INIT_LIST_HEAD(&clp->async_copies);
+	spin_lock_init(&clp->async_lock);
 	spin_lock_init(&clp->cl_lock);
 	rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table");
 	return clp;
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index f8b0210..9189062 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -352,6 +352,8 @@ struct nfs4_client {
 	struct rpc_wait_queue	cl_cb_waitq;	/* backchannel callers may */
 						/* wait here for slots */
 	struct net		*net;
+	struct list_head	async_copies;	/* list of async copies */
+	spinlock_t		async_lock;	/* lock for async copies */
 };
 
 /* struct nfs4_client_reset
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 9b0c099..0a19954 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -529,6 +529,15 @@ struct nfsd4_copy {
 	struct nfsd4_callback	cp_cb;
 	__be32			nfserr;
 	struct knfsd_fh		fh;
+
+	struct nfs4_client      *cp_clp;
+
+	struct file             *fh_src;
+	struct file             *fh_dst;
+	struct net              *net;
+
+	struct list_head	copies;
+	struct task_struct	*copy_task;
 };
 
 struct nfsd4_seek {
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [PATCH v6 06/10] NFSD return nfs4_stid in nfs4_preprocess_stateid_op
  2017-10-24 17:47 [PATCH v6 00/10] NFSD support for asynchronous COPY Olga Kornievskaia
                   ` (4 preceding siblings ...)
  2017-10-24 17:47 ` [PATCH v6 05/10] NFSD first draft of async copy Olga Kornievskaia
@ 2017-10-24 17:47 ` Olga Kornievskaia
  2017-10-24 17:47 ` [PATCH v6 07/10] NFSD create new stateid for async copy Olga Kornievskaia
                   ` (4 subsequent siblings)
  10 siblings, 0 replies; 48+ messages in thread
From: Olga Kornievskaia @ 2017-10-24 17:47 UTC (permalink / raw)
  To: bfields; +Cc: linux-nfs

Needed for copy to add nfs4_cp_state to the nfs4_stid.

Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
---
 fs/nfsd/nfs4proc.c  | 17 ++++++++++-------
 fs/nfsd/nfs4state.c |  8 ++++++--
 fs/nfsd/state.h     |  3 ++-
 3 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index bdccfa9..a020533 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -773,7 +773,8 @@ static __be32 nfsd4_do_lookupp(struct svc_rqst *rqstp, struct svc_fh *fh)
 	/* check stateid */
 	status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
 					&read->rd_stateid, RD_STATE,
-					&read->rd_filp, &read->rd_tmp_file);
+					&read->rd_filp, &read->rd_tmp_file,
+					NULL);
 	if (status) {
 		dprintk("NFSD: nfsd4_read: couldn't process stateid!\n");
 		goto out;
@@ -946,7 +947,7 @@ static __be32 nfsd4_do_lookupp(struct svc_rqst *rqstp, struct svc_fh *fh)
 	if (setattr->sa_iattr.ia_valid & ATTR_SIZE) {
 		status = nfs4_preprocess_stateid_op(rqstp, cstate,
 				&cstate->current_fh, &setattr->sa_stateid,
-				WR_STATE, NULL, NULL);
+				WR_STATE, NULL, NULL, NULL);
 		if (status) {
 			dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n");
 			return status;
@@ -1012,7 +1013,7 @@ static int fill_in_write_vector(struct kvec *vec, struct nfsd4_write *write)
 		return nfserr_inval;
 
 	status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
-						stateid, WR_STATE, &filp, NULL);
+					stateid, WR_STATE, &filp, NULL, NULL);
 	if (status) {
 		dprintk("NFSD: nfsd4_write: couldn't process stateid!\n");
 		return status;
@@ -1043,14 +1044,16 @@ static int fill_in_write_vector(struct kvec *vec, struct nfsd4_write *write)
 	__be32 status;
 
 	status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->save_fh,
-					    src_stateid, RD_STATE, src, NULL);
+					    src_stateid, RD_STATE, src, NULL,
+					    NULL);
 	if (status) {
 		dprintk("NFSD: %s: couldn't process src stateid!\n", __func__);
 		goto out;
 	}
 
 	status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
-					    dst_stateid, WR_STATE, dst, NULL);
+					    dst_stateid, WR_STATE, dst, NULL,
+					    NULL);
 	if (status) {
 		dprintk("NFSD: %s: couldn't process dst stateid!\n", __func__);
 		goto out_put_src;
@@ -1278,7 +1281,7 @@ static int nfsd4_do_async_copy(void *data)
 
 	status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
 					    &fallocate->falloc_stateid,
-					    WR_STATE, &file, NULL);
+					    WR_STATE, &file, NULL, NULL);
 	if (status != nfs_ok) {
 		dprintk("NFSD: nfsd4_fallocate: couldn't process stateid!\n");
 		return status;
@@ -1325,7 +1328,7 @@ static int nfsd4_do_async_copy(void *data)
 
 	status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
 					    &seek->seek_stateid,
-					    RD_STATE, &file, NULL);
+					    RD_STATE, &file, NULL, NULL);
 	if (status) {
 		dprintk("NFSD: nfsd4_seek: couldn't process stateid!\n");
 		return status;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index d7767a1..af40762 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4947,7 +4947,8 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
 __be32
 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
 		struct nfsd4_compound_state *cstate, struct svc_fh *fhp,
-		stateid_t *stateid, int flags, struct file **filpp, bool *tmp_file)
+		stateid_t *stateid, int flags, struct file **filpp,
+		bool *tmp_file, struct nfs4_stid **cstid)
 {
 	struct inode *ino = d_inode(fhp->fh_dentry);
 	struct net *net = SVC_NET(rqstp);
@@ -4998,8 +4999,11 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
 	if (!status && filpp)
 		status = nfs4_check_file(rqstp, fhp, s, filpp, tmp_file, flags);
 out:
-	if (s)
+	if (s) {
+		if (!status && cstid)
+			*cstid = s;
 		nfs4_put_stid(s);
+	}
 	return status;
 }
 
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 9189062..d58507b 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -602,7 +602,8 @@ struct nfsd4_blocked_lock {
 
 extern __be32 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
 		struct nfsd4_compound_state *cstate, struct svc_fh *fhp,
-		stateid_t *stateid, int flags, struct file **filp, bool *tmp_file);
+		stateid_t *stateid, int flags, struct file **filp,
+		bool *tmp_file, struct nfs4_stid **cstid);
 __be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
 		     stateid_t *stateid, unsigned char typemask,
 		     struct nfs4_stid **s, struct nfsd_net *nn);
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [PATCH v6 07/10] NFSD create new stateid for async copy
  2017-10-24 17:47 [PATCH v6 00/10] NFSD support for asynchronous COPY Olga Kornievskaia
                   ` (5 preceding siblings ...)
  2017-10-24 17:47 ` [PATCH v6 06/10] NFSD return nfs4_stid in nfs4_preprocess_stateid_op Olga Kornievskaia
@ 2017-10-24 17:47 ` Olga Kornievskaia
  2018-01-26 21:37   ` J. Bruce Fields
  2018-01-26 21:59   ` J. Bruce Fields
  2017-10-24 17:47 ` [PATCH v6 08/10] NFSD handle OFFLOAD_CANCEL op Olga Kornievskaia
                   ` (3 subsequent siblings)
  10 siblings, 2 replies; 48+ messages in thread
From: Olga Kornievskaia @ 2017-10-24 17:47 UTC (permalink / raw)
  To: bfields; +Cc: linux-nfs

Generate a new stateid to be used for reply to the asynchronous
COPY (this would also be used later by COPY_NOTIFY as well).
Associate the stateid with the parent OPEN/LOCK/DELEG stateid
that can be freed during the free of the parent stateid. However,
right now deciding to bind the lifetime to when the vfs copy
is done. This way don't need to keep the nfsd_net structure for
the callback. The drawback is that time copy state information
is available for query by OFFLOAD_STATUS is slightly less.

Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
---
 fs/nfsd/netns.h     |  8 +++++++
 fs/nfsd/nfs4proc.c  | 32 +++++++++++++++++++-------
 fs/nfsd/nfs4state.c | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/nfsd/nfsctl.c    |  1 +
 fs/nfsd/state.h     | 14 ++++++++++++
 fs/nfsd/xdr4.h      |  2 ++
 6 files changed, 115 insertions(+), 8 deletions(-)

diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 3714231..2c88a95 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -119,6 +119,14 @@ struct nfsd_net {
 	u32 clverifier_counter;
 
 	struct svc_serv *nfsd_serv;
+
+	/*
+	 * clientid and stateid data for construction of net unique COPY
+	 * stateids.
+	 */
+	u32		s2s_cp_cl_id;
+	struct idr	s2s_cp_stateids;
+	spinlock_t	s2s_cp_lock;
 };
 
 /* Simple check to find out if a given net was properly initialized */
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index a020533..6876080 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1039,7 +1039,8 @@ static int fill_in_write_vector(struct kvec *vec, struct nfsd4_write *write)
 static __be32
 nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		  stateid_t *src_stateid, struct file **src,
-		  stateid_t *dst_stateid, struct file **dst)
+		  stateid_t *dst_stateid, struct file **dst,
+		  struct nfs4_stid **stid)
 {
 	__be32 status;
 
@@ -1053,7 +1054,7 @@ static int fill_in_write_vector(struct kvec *vec, struct nfsd4_write *write)
 
 	status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
 					    dst_stateid, WR_STATE, dst, NULL,
-					    NULL);
+					    stid);
 	if (status) {
 		dprintk("NFSD: %s: couldn't process dst stateid!\n", __func__);
 		goto out_put_src;
@@ -1084,7 +1085,7 @@ static int fill_in_write_vector(struct kvec *vec, struct nfsd4_write *write)
 	__be32 status;
 
 	status = nfsd4_verify_copy(rqstp, cstate, &clone->cl_src_stateid, &src,
-				   &clone->cl_dst_stateid, &dst);
+				   &clone->cl_dst_stateid, &dst, NULL);
 	if (status)
 		goto out;
 
@@ -1117,8 +1118,6 @@ static int nfsd4_cb_offload_done(struct nfsd4_callback *cb,
 
 static int nfsd4_init_copy_res(struct nfsd4_copy *copy, bool sync)
 {
-	memcpy(&copy->cp_res.cb_stateid, &copy->cp_dst_stateid,
-		sizeof(copy->cp_dst_stateid));
 	copy->cp_res.wr_stable_how = NFS_UNSTABLE;
 	copy->cp_consecutive = 1;
 	copy->cp_synchronous = sync;
@@ -1180,10 +1179,15 @@ static void dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst)
 	atomic_inc(&dst->cp_clp->cl_refcount);
 	dst->fh_dst = get_file(src->fh_dst);
 	dst->fh_src = get_file(src->fh_src);
+	dst->stid = src->stid;
+	dst->cps = src->cps;
 }
 
 static void cleanup_async_copy(struct nfsd4_copy *copy)
 {
+	list_del(&copy->cps->cp_list);
+	nfs4_free_cp_state(copy->cps);
+	nfs4_put_stid(copy->stid);
 	fput(copy->fh_dst);
 	fput(copy->fh_src);
 	spin_lock(&copy->cp_clp->async_lock);
@@ -1225,7 +1229,7 @@ static int nfsd4_do_async_copy(void *data)
 
 	status = nfsd4_verify_copy(rqstp, cstate, &copy->cp_src_stateid,
 				   &copy->fh_src, &copy->cp_dst_stateid,
-				   &copy->fh_dst);
+				   &copy->fh_dst, &copy->stid);
 	if (status)
 		goto out;
 
@@ -1234,15 +1238,27 @@ static int nfsd4_do_async_copy(void *data)
 		sizeof(struct knfsd_fh));
 	copy->net = SVC_NET(rqstp);
 	if (!copy->cp_synchronous) {
+		struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+
 		status = nfsd4_init_copy_res(copy, 0);
 		async_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL);
 		if (!async_copy) {
 			status = nfserrno(-ENOMEM);
 			goto out;
 		}
+		copy->cps = nfs4_alloc_init_cp_state(nn, copy->stid);
+		if (!copy->cps) {
+			status = nfserrno(-ENOMEM);
+			kfree(async_copy);
+			goto out;
+		}
+		/* take a reference on the parent stateid so it's not
+		 * not freed by the copy compound
+		 */
+		atomic_inc(&copy->stid->sc_count);
+		memcpy(&copy->cp_res.cb_stateid, &copy->cps->cp_stateid,
+			sizeof(copy->cps->cp_stateid));
 		dup_copy_fields(copy, async_copy);
-		memcpy(&copy->cp_res.cb_stateid, &copy->cp_dst_stateid,
-			sizeof(copy->cp_dst_stateid));
 		spin_lock(&async_copy->cp_clp->async_lock);
 		list_add(&async_copy->copies,
 				&async_copy->cp_clp->async_copies);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index af40762..f9151f2 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -658,6 +658,7 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla
 	/* Will be incremented before return to client: */
 	atomic_set(&stid->sc_count, 1);
 	spin_lock_init(&stid->sc_lock);
+	INIT_LIST_HEAD(&stid->sc_cp_list);
 
 	/*
 	 * It shouldn't be a problem to reuse an opaque stateid value.
@@ -674,6 +675,66 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla
 	return NULL;
 }
 
+/*
+ * Create a unique stateid_t to represent each COPY. Hang the copy
+ * stateids off the OPEN/LOCK/DELEG stateid from the client open
+ * the source file.
+ */
+struct nfs4_cp_state *nfs4_alloc_init_cp_state(struct nfsd_net *nn,
+					       struct nfs4_stid *p_stid)
+{
+	struct nfs4_cp_state *cps;
+	int new_id;
+
+	cps = kzalloc(sizeof(struct nfs4_cp_state), GFP_KERNEL);
+	if (!cps)
+		return NULL;
+	idr_preload(GFP_KERNEL);
+	spin_lock(&nn->s2s_cp_lock);
+	new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, cps, 0, 0, GFP_NOWAIT);
+	spin_unlock(&nn->s2s_cp_lock);
+	idr_preload_end();
+	if (new_id < 0)
+		goto out_free;
+	cps->cp_stateid.si_opaque.so_id = new_id;
+	cps->cp_stateid.si_opaque.so_clid.cl_boot = nn->boot_time;
+	cps->cp_stateid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id;
+	cps->cp_p_stid = p_stid;
+	INIT_LIST_HEAD(&cps->cp_list);
+	list_add(&cps->cp_list, &p_stid->sc_cp_list);
+
+	return cps;
+out_free:
+	kfree(cps);
+	return NULL;
+}
+
+void nfs4_free_cp_state(struct nfs4_cp_state *cps)
+{
+	struct nfsd_net *nn;
+
+	nn = net_generic(cps->cp_p_stid->sc_client->net, nfsd_net_id);
+	spin_lock(&nn->s2s_cp_lock);
+	idr_remove(&nn->s2s_cp_stateids, cps->cp_stateid.si_opaque.so_id);
+	spin_unlock(&nn->s2s_cp_lock);
+
+	kfree(cps);
+}
+
+static void nfs4_free_cp_statelist(struct nfs4_stid *stid)
+{
+	struct nfs4_cp_state *cps;
+
+	might_sleep();
+
+	while (!list_empty(&stid->sc_cp_list)) {
+		cps = list_first_entry(&stid->sc_cp_list, struct nfs4_cp_state,
+				       cp_list);
+		list_del(&cps->cp_list);
+		nfs4_free_cp_state(cps);
+	}
+}
+
 static struct nfs4_ol_stateid * nfs4_alloc_open_stateid(struct nfs4_client *clp)
 {
 	struct nfs4_stid *stid;
@@ -819,6 +880,9 @@ static void block_delegations(struct knfsd_fh *fh)
 	}
 	idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id);
 	spin_unlock(&clp->cl_lock);
+
+	nfs4_free_cp_statelist(s);
+
 	s->sc_free(s);
 	if (fp)
 		put_nfs4_file(fp);
@@ -6954,6 +7018,8 @@ static int nfs4_state_create_net(struct net *net)
 	INIT_LIST_HEAD(&nn->close_lru);
 	INIT_LIST_HEAD(&nn->del_recall_lru);
 	spin_lock_init(&nn->client_lock);
+	spin_lock_init(&nn->s2s_cp_lock);
+	idr_init(&nn->s2s_cp_stateids);
 
 	spin_lock_init(&nn->blocked_locks_lock);
 	INIT_LIST_HEAD(&nn->blocked_locks_lru);
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 6493df6..232270d 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1241,6 +1241,7 @@ static __net_init int nfsd_init_net(struct net *net)
 	nn->nfsd4_grace = 90;
 	nn->clverifier_counter = prandom_u32();
 	nn->clientid_counter = prandom_u32();
+	nn->s2s_cp_cl_id = nn->clientid_counter++;
 	return 0;
 
 out_idmap_error:
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index d58507b..b4de8ae 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -93,6 +93,7 @@ struct nfs4_stid {
 #define NFS4_REVOKED_DELEG_STID 16
 #define NFS4_CLOSED_DELEG_STID 32
 #define NFS4_LAYOUT_STID 64
+	struct list_head	sc_cp_list;
 	unsigned char		sc_type;
 	stateid_t		sc_stateid;
 	spinlock_t		sc_lock;
@@ -102,6 +103,17 @@ struct nfs4_stid {
 };
 
 /*
+ * Keep a list of stateids issued by the COPY, associate it with the
+ * parent OPEN/LOCK/DELEG stateid. Used for lookup by
+ * OFFLOAD_CANCEL and OFFLOAD_STATUS (as well as COPY_NOTIFY)
+ */
+struct nfs4_cp_state {
+	stateid_t		cp_stateid;
+	struct list_head	cp_list;	/* per parent nfs4_stid */
+	struct nfs4_stid        *cp_p_stid;     /* pointer to parent */
+};
+
+/*
  * Represents a delegation stateid. The nfs4_client holds references to these
  * and they are put when it is being destroyed or when the delegation is
  * returned by the client:
@@ -609,6 +621,8 @@ __be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
 		     struct nfs4_stid **s, struct nfsd_net *nn);
 struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab,
 				  void (*sc_free)(struct nfs4_stid *));
+struct nfs4_cp_state *nfs4_alloc_init_cp_state(struct nfsd_net *nn, struct nfs4_stid *p_stid);
+void nfs4_free_cp_state(struct nfs4_cp_state *cps);
 void nfs4_unhash_stid(struct nfs4_stid *s);
 void nfs4_put_stid(struct nfs4_stid *s);
 void nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid);
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 0a19954..27bac6a 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -535,6 +535,8 @@ struct nfsd4_copy {
 	struct file             *fh_src;
 	struct file             *fh_dst;
 	struct net              *net;
+	struct nfs4_stid        *stid;
+	struct nfs4_cp_state    *cps;
 
 	struct list_head	copies;
 	struct task_struct	*copy_task;
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [PATCH v6 08/10] NFSD handle OFFLOAD_CANCEL op
  2017-10-24 17:47 [PATCH v6 00/10] NFSD support for asynchronous COPY Olga Kornievskaia
                   ` (6 preceding siblings ...)
  2017-10-24 17:47 ` [PATCH v6 07/10] NFSD create new stateid for async copy Olga Kornievskaia
@ 2017-10-24 17:47 ` Olga Kornievskaia
  2018-02-16 17:28   ` Olga Kornievskaia
  2017-10-24 17:47 ` [PATCH v6 09/10] NFSD support OFFLOAD_STATUS Olga Kornievskaia
                   ` (2 subsequent siblings)
  10 siblings, 1 reply; 48+ messages in thread
From: Olga Kornievskaia @ 2017-10-24 17:47 UTC (permalink / raw)
  To: bfields; +Cc: linux-nfs

Upon receiving OFFLOAD_CANCEL search the list of copy stateids,
if found then set the SIGPENDING signal so that do_splice stops
copying and also send kthread_stop to the copy thread to stop
and wait for it. Take a reference on the copy from the
offload_cancel thread so that it won't go away while we are
trying to process it. Server won't be sending CB_OFFLOAD to the
client since it received a cancel.

Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
---
 fs/nfsd/nfs4proc.c | 46 ++++++++++++++++++++++++++++++++++++++++++++--
 fs/nfsd/xdr4.h     |  1 +
 2 files changed, 45 insertions(+), 2 deletions(-)

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 6876080..3b0bb54 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1097,6 +1097,14 @@ static int fill_in_write_vector(struct kvec *vec, struct nfsd4_write *write)
 out:
 	return status;
 }
+
+static void nfs4_put_copy(struct nfsd4_copy *copy)
+{
+	if (!atomic_dec_and_test(&copy->refcount))
+		return;
+	kfree(copy);
+}
+
 static void nfsd4_cb_offload_release(struct nfsd4_callback *cb)
 {
 	struct nfsd4_copy *copy = container_of(cb, struct nfsd4_copy, cp_cb);
@@ -1134,6 +1142,8 @@ static int _nfsd_copy_file_range(struct nfsd4_copy *copy)
 	u64 dst_pos = copy->cp_dst_pos;
 
 	do {
+		if (signalled() || kthread_should_stop())
+			return -1;
 		bytes_copied = nfsd_copy_file_range(copy->fh_src, src_pos,
 				copy->fh_dst, dst_pos, bytes_total);
 		if (bytes_copied <= 0)
@@ -1152,11 +1162,16 @@ static int nfsd4_do_copy(struct nfsd4_copy *copy, bool sync)
 	ssize_t bytes;
 
 	bytes = _nfsd_copy_file_range(copy);
+	if (signalled() || kthread_should_stop()) {
+		status = -1;
+		goto cleanup;
+	}
 	if (bytes < 0 && !copy->cp_res.wr_bytes_written)
 		status = nfserrno(bytes);
 	else
 		status = nfsd4_init_copy_res(copy, sync);
 
+cleanup:
 	fput(copy->fh_src);
 	fput(copy->fh_dst);
 	return status;
@@ -1194,7 +1209,7 @@ static void cleanup_async_copy(struct nfsd4_copy *copy)
 	list_del(&copy->copies);
 	spin_unlock(&copy->cp_clp->async_lock);
 	atomic_dec(&copy->cp_clp->cl_refcount);
-	kfree(copy);
+	nfs4_put_copy(copy);
 }
 
 static int nfsd4_do_async_copy(void *data)
@@ -1203,6 +1218,9 @@ static int nfsd4_do_async_copy(void *data)
 	struct nfsd4_copy *cb_copy;
 
 	copy->nfserr = nfsd4_do_copy(copy, 0);
+	if (signalled() || kthread_should_stop())
+		goto out;
+
 	cb_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL);
 	if (!cb_copy)
 		goto out;
@@ -1259,6 +1277,7 @@ static int nfsd4_do_async_copy(void *data)
 		memcpy(&copy->cp_res.cb_stateid, &copy->cps->cp_stateid,
 			sizeof(copy->cps->cp_stateid));
 		dup_copy_fields(copy, async_copy);
+		atomic_set(&async_copy->refcount, 1);
 		spin_lock(&async_copy->cp_clp->async_lock);
 		list_add(&async_copy->copies,
 				&async_copy->cp_clp->async_copies);
@@ -1285,7 +1304,30 @@ static int nfsd4_do_async_copy(void *data)
 		     struct nfsd4_compound_state *cstate,
 		     union nfsd4_op_u *u)
 {
-	return 0;
+	struct nfsd4_offload_status *os = &u->offload_status;
+	__be32 status = 0;
+	struct nfsd4_copy *copy;
+	bool found = false;
+	struct nfs4_client *clp = cstate->clp;
+
+	spin_lock(&clp->async_lock);
+	list_for_each_entry(copy, &clp->async_copies, copies) {
+		if (memcmp(&copy->cps->cp_stateid, &os->stateid,
+				NFS4_STATEID_SIZE))
+			continue;
+		found = true;
+		atomic_inc(&copy->refcount);
+		break;
+	}
+	spin_unlock(&clp->async_lock);
+	if (found) {
+		set_tsk_thread_flag(copy->copy_task, TIF_SIGPENDING);
+		kthread_stop(copy->copy_task);
+		nfs4_put_copy(copy);
+	} else
+		status = nfserr_bad_stateid;
+
+	return status;
 }
 
 static __be32
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 27bac6a..3127b58 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -540,6 +540,7 @@ struct nfsd4_copy {
 
 	struct list_head	copies;
 	struct task_struct	*copy_task;
+	atomic_t		refcount;
 };
 
 struct nfsd4_seek {
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [PATCH v6 09/10] NFSD support OFFLOAD_STATUS
  2017-10-24 17:47 [PATCH v6 00/10] NFSD support for asynchronous COPY Olga Kornievskaia
                   ` (7 preceding siblings ...)
  2017-10-24 17:47 ` [PATCH v6 08/10] NFSD handle OFFLOAD_CANCEL op Olga Kornievskaia
@ 2017-10-24 17:47 ` Olga Kornievskaia
  2017-10-24 17:47 ` [PATCH v6 10/10] NFSD stop queued async copies on client shutdown Olga Kornievskaia
  2017-11-03 19:57 ` [PATCH v6 00/10] NFSD support for asynchronous COPY Olga Kornievskaia
  10 siblings, 0 replies; 48+ messages in thread
From: Olga Kornievskaia @ 2017-10-24 17:47 UTC (permalink / raw)
  To: bfields; +Cc: linux-nfs

Search the list for the asynchronous copy based on the stateid,
then lookup the number of bytes copied so far.

Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
---
 fs/nfsd/nfs4proc.c | 44 ++++++++++++++++++++++++++++++++------------
 1 file changed, 32 insertions(+), 12 deletions(-)

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 3b0bb54..b2f6549 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1299,28 +1299,36 @@ static int nfsd4_do_async_copy(void *data)
 	goto out;
 }
 
-static __be32
-nfsd4_offload_cancel(struct svc_rqst *rqstp,
-		     struct nfsd4_compound_state *cstate,
-		     union nfsd4_op_u *u)
+static struct nfsd4_copy *
+find_async_copy(struct nfs4_client *clp, struct nfsd4_offload_status *os)
 {
-	struct nfsd4_offload_status *os = &u->offload_status;
-	__be32 status = 0;
 	struct nfsd4_copy *copy;
-	bool found = false;
-	struct nfs4_client *clp = cstate->clp;
 
 	spin_lock(&clp->async_lock);
 	list_for_each_entry(copy, &clp->async_copies, copies) {
 		if (memcmp(&copy->cps->cp_stateid, &os->stateid,
 				NFS4_STATEID_SIZE))
 			continue;
-		found = true;
 		atomic_inc(&copy->refcount);
-		break;
+		spin_unlock(&clp->async_lock);
+		return copy;
 	}
 	spin_unlock(&clp->async_lock);
-	if (found) {
+	return NULL;
+}
+
+static __be32
+nfsd4_offload_cancel(struct svc_rqst *rqstp,
+		     struct nfsd4_compound_state *cstate,
+		     union nfsd4_op_u *u)
+{
+	struct nfsd4_offload_status *os = &u->offload_status;
+	__be32 status = 0;
+	struct nfsd4_copy *copy;
+	struct nfs4_client *clp = cstate->clp;
+
+	copy = find_async_copy(clp, os);
+	if (copy) {
 		set_tsk_thread_flag(copy->copy_task, TIF_SIGPENDING);
 		kthread_stop(copy->copy_task);
 		nfs4_put_copy(copy);
@@ -1357,7 +1365,19 @@ static int nfsd4_do_async_copy(void *data)
 		     struct nfsd4_compound_state *cstate,
 		     union nfsd4_op_u *u)
 {
-	return nfserr_notsupp;
+	struct nfsd4_offload_status *os = &u->offload_status;
+	__be32 status = 0;
+	struct nfsd4_copy *copy;
+	struct nfs4_client *clp = cstate->clp;
+
+	copy = find_async_copy(clp, os);
+	if (copy) {
+		os->count = copy->cp_res.wr_bytes_written;
+		nfs4_put_copy(copy);
+	} else
+		status = nfserr_bad_stateid;
+
+	return status;
 }
 
 static __be32
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [PATCH v6 10/10] NFSD stop queued async copies on client shutdown
  2017-10-24 17:47 [PATCH v6 00/10] NFSD support for asynchronous COPY Olga Kornievskaia
                   ` (8 preceding siblings ...)
  2017-10-24 17:47 ` [PATCH v6 09/10] NFSD support OFFLOAD_STATUS Olga Kornievskaia
@ 2017-10-24 17:47 ` Olga Kornievskaia
  2018-01-25 22:22   ` J. Bruce Fields
  2017-11-03 19:57 ` [PATCH v6 00/10] NFSD support for asynchronous COPY Olga Kornievskaia
  10 siblings, 1 reply; 48+ messages in thread
From: Olga Kornievskaia @ 2017-10-24 17:47 UTC (permalink / raw)
  To: bfields; +Cc: linux-nfs

If client is shutting down and there are still async copies going
on, then stop queued async copies.

Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
---
 fs/nfsd/nfs4proc.c  | 13 +++++++++++++
 fs/nfsd/nfs4state.c |  1 +
 fs/nfsd/state.h     |  1 +
 3 files changed, 15 insertions(+)

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index b2f6549..34042ff6 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1105,6 +1105,19 @@ static void nfs4_put_copy(struct nfsd4_copy *copy)
 	kfree(copy);
 }
 
+void nfsd4_shutdown_copy(struct nfs4_client *clp)
+{
+	struct nfsd4_copy *copy;
+
+	spin_lock(&clp->async_lock);
+	list_for_each_entry(copy, &clp->async_copies, copies) {
+		set_tsk_thread_flag(copy->copy_task, TIF_SIGPENDING);
+		kthread_stop(copy->copy_task);
+		nfs4_put_copy(copy);
+	}
+	spin_unlock(&clp->async_lock);
+}
+
 static void nfsd4_cb_offload_release(struct nfsd4_callback *cb)
 {
 	struct nfsd4_copy *copy = container_of(cb, struct nfsd4_copy, cp_cb);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index f9151f2..efac39d 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1947,6 +1947,7 @@ static __be32 mark_client_expired_locked(struct nfs4_client *clp)
 	}
 	nfsd4_return_all_client_layouts(clp);
 	nfsd4_shutdown_callback(clp);
+	nfsd4_shutdown_copy(clp);
 	if (clp->cl_cb_conn.cb_xprt)
 		svc_xprt_put(clp->cl_cb_conn.cb_xprt);
 	free_client(clp);
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index b4de8ae..88869df 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -643,6 +643,7 @@ extern void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
 extern int nfsd4_create_callback_queue(void);
 extern void nfsd4_destroy_callback_queue(void);
 extern void nfsd4_shutdown_callback(struct nfs4_client *);
+extern void nfsd4_shutdown_copy(struct nfs4_client *clp);
 extern void nfsd4_prepare_cb_recall(struct nfs4_delegation *dp);
 extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(const char *name,
 							struct nfsd_net *nn);
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 48+ messages in thread

* Re: [PATCH v6 00/10] NFSD support for asynchronous COPY
  2017-10-24 17:47 [PATCH v6 00/10] NFSD support for asynchronous COPY Olga Kornievskaia
                   ` (9 preceding siblings ...)
  2017-10-24 17:47 ` [PATCH v6 10/10] NFSD stop queued async copies on client shutdown Olga Kornievskaia
@ 2017-11-03 19:57 ` Olga Kornievskaia
  2017-11-10 15:01   ` Olga Kornievskaia
  10 siblings, 1 reply; 48+ messages in thread
From: Olga Kornievskaia @ 2017-11-03 19:57 UTC (permalink / raw)
  To: Olga Kornievskaia; +Cc: J. Bruce Fields, linux-nfs

On Tue, Oct 24, 2017 at 1:47 PM, Olga Kornievskaia <kolga@netapp.com> wrote:
> To do asynchronous copies, NFSD creates a new kthread to handle the request.
> Upon receiving the COPY, it generates a unique copy stateid (stored in a
> global list for keeping track of state for OFFLOAD_STATUS to be queried by),
> starts the thread, and replies back to the client. nfsd4_copy arguments that
> are allocated on the stack are copies for the kthread.
>
> For the async copy handler, copy is done in the loop for the requested
> number of bytes. If an error is encountered and partial copy has happened,
> a successful partial copy is returned in the CB_OFFLOAD. vfs_copy_file_range
> is called with 4MB chunks for both async and sync, allowing for 4MB
> granularity of OFFLOAD_STATUS queiry. Once copy is done, the results are
> queued for the callback workqueue and sent via CB_OFFLOAD.
>
> When the server received an OFFLOAD_CANCEL, it will find the kthread running
> the copy and will send a SIGPENDING and kthread_stop() and it will interrupt
> the ongoing do_splice() and once vfs returns we are choosing not to send
> the CB_OFFLOAD back to the client.
>
> When the server receives an OFFLOAD_STATUS, it will find the kthread running
> the copy and will locate within the copy state the current number of bytes
> copied so far.
>
> v6:
> 1. if an error occured and partial bytes were copied, don't return the error
> and instead return the successful partial copy.
> 2. OFFLOAD_STATUS locates the on-going copy and looks up the number of bytes
> copied so far.
>
> Olga Kornievskaia (10):
>   NFSD CB_OFFLOAD xdr
>   NFSD OFFLOAD_STATUS xdr
>   NFSD OFFLOAD_CANCEL xdr
>   NFSD xdr callback stateid in async COPY reply
>   NFSD first draft of async copy
>   NFSD return nfs4_stid in nfs4_preprocess_stateid_op
>   NFSD create new stateid for async copy
>   NFSD handle OFFLOAD_CANCEL op
>   NFSD support OFFLOAD_STATUS
>   NFSD stop queued async copies on client shutdown
>
>  fs/nfsd/netns.h        |   8 ++
>  fs/nfsd/nfs4callback.c |  97 +++++++++++++++
>  fs/nfsd/nfs4proc.c     | 316 ++++++++++++++++++++++++++++++++++++++++++++-----
>  fs/nfsd/nfs4state.c    |  77 +++++++++++-
>  fs/nfsd/nfs4xdr.c      |  50 ++++++--
>  fs/nfsd/nfsctl.c       |   1 +
>  fs/nfsd/state.h        |  21 +++-
>  fs/nfsd/xdr4.h         |  28 +++++
>  fs/nfsd/xdr4cb.h       |  10 ++
>  9 files changed, 571 insertions(+), 37 deletions(-)

Bruce, any comments on this version?

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v6 00/10] NFSD support for asynchronous COPY
  2017-11-03 19:57 ` [PATCH v6 00/10] NFSD support for asynchronous COPY Olga Kornievskaia
@ 2017-11-10 15:01   ` Olga Kornievskaia
  2017-11-14  0:48     ` J. Bruce Fields
  0 siblings, 1 reply; 48+ messages in thread
From: Olga Kornievskaia @ 2017-11-10 15:01 UTC (permalink / raw)
  To: Olga Kornievskaia; +Cc: J. Bruce Fields, linux-nfs

On Fri, Nov 3, 2017 at 3:57 PM, Olga Kornievskaia <aglo@umich.edu> wrote:
> On Tue, Oct 24, 2017 at 1:47 PM, Olga Kornievskaia <kolga@netapp.com> wrote:
>> To do asynchronous copies, NFSD creates a new kthread to handle the request.
>> Upon receiving the COPY, it generates a unique copy stateid (stored in a
>> global list for keeping track of state for OFFLOAD_STATUS to be queried by),
>> starts the thread, and replies back to the client. nfsd4_copy arguments that
>> are allocated on the stack are copies for the kthread.
>>
>> For the async copy handler, copy is done in the loop for the requested
>> number of bytes. If an error is encountered and partial copy has happened,
>> a successful partial copy is returned in the CB_OFFLOAD. vfs_copy_file_range
>> is called with 4MB chunks for both async and sync, allowing for 4MB
>> granularity of OFFLOAD_STATUS queiry. Once copy is done, the results are
>> queued for the callback workqueue and sent via CB_OFFLOAD.
>>
>> When the server received an OFFLOAD_CANCEL, it will find the kthread running
>> the copy and will send a SIGPENDING and kthread_stop() and it will interrupt
>> the ongoing do_splice() and once vfs returns we are choosing not to send
>> the CB_OFFLOAD back to the client.
>>
>> When the server receives an OFFLOAD_STATUS, it will find the kthread running
>> the copy and will locate within the copy state the current number of bytes
>> copied so far.
>>
>> v6:
>> 1. if an error occured and partial bytes were copied, don't return the error
>> and instead return the successful partial copy.
>> 2. OFFLOAD_STATUS locates the on-going copy and looks up the number of bytes
>> copied so far.
>>
>> Olga Kornievskaia (10):
>>   NFSD CB_OFFLOAD xdr
>>   NFSD OFFLOAD_STATUS xdr
>>   NFSD OFFLOAD_CANCEL xdr
>>   NFSD xdr callback stateid in async COPY reply
>>   NFSD first draft of async copy
>>   NFSD return nfs4_stid in nfs4_preprocess_stateid_op
>>   NFSD create new stateid for async copy
>>   NFSD handle OFFLOAD_CANCEL op
>>   NFSD support OFFLOAD_STATUS
>>   NFSD stop queued async copies on client shutdown
>>
>>  fs/nfsd/netns.h        |   8 ++
>>  fs/nfsd/nfs4callback.c |  97 +++++++++++++++
>>  fs/nfsd/nfs4proc.c     | 316 ++++++++++++++++++++++++++++++++++++++++++++-----
>>  fs/nfsd/nfs4state.c    |  77 +++++++++++-
>>  fs/nfsd/nfs4xdr.c      |  50 ++++++--
>>  fs/nfsd/nfsctl.c       |   1 +
>>  fs/nfsd/state.h        |  21 +++-
>>  fs/nfsd/xdr4.h         |  28 +++++
>>  fs/nfsd/xdr4cb.h       |  10 ++
>>  9 files changed, 571 insertions(+), 37 deletions(-)
>
> Bruce, any comments on this version?

Bruce, do you have any comments on this version?

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v6 00/10] NFSD support for asynchronous COPY
  2017-11-10 15:01   ` Olga Kornievskaia
@ 2017-11-14  0:48     ` J. Bruce Fields
  2017-11-28 20:28       ` Olga Kornievskaia
  0 siblings, 1 reply; 48+ messages in thread
From: J. Bruce Fields @ 2017-11-14  0:48 UTC (permalink / raw)
  To: Olga Kornievskaia; +Cc: Olga Kornievskaia, linux-nfs

On Fri, Nov 10, 2017 at 10:01:02AM -0500, Olga Kornievskaia wrote:
> On Fri, Nov 3, 2017 at 3:57 PM, Olga Kornievskaia <aglo@umich.edu> wrote:
> > On Tue, Oct 24, 2017 at 1:47 PM, Olga Kornievskaia <kolga@netapp.com> wrote:
> >> To do asynchronous copies, NFSD creates a new kthread to handle the request.
> >> Upon receiving the COPY, it generates a unique copy stateid (stored in a
> >> global list for keeping track of state for OFFLOAD_STATUS to be queried by),
> >> starts the thread, and replies back to the client. nfsd4_copy arguments that
> >> are allocated on the stack are copies for the kthread.
> >>
> >> For the async copy handler, copy is done in the loop for the requested
> >> number of bytes. If an error is encountered and partial copy has happened,
> >> a successful partial copy is returned in the CB_OFFLOAD. vfs_copy_file_range
> >> is called with 4MB chunks for both async and sync, allowing for 4MB
> >> granularity of OFFLOAD_STATUS queiry. Once copy is done, the results are
> >> queued for the callback workqueue and sent via CB_OFFLOAD.
> >>
> >> When the server received an OFFLOAD_CANCEL, it will find the kthread running
> >> the copy and will send a SIGPENDING and kthread_stop() and it will interrupt
> >> the ongoing do_splice() and once vfs returns we are choosing not to send
> >> the CB_OFFLOAD back to the client.
> >>
> >> When the server receives an OFFLOAD_STATUS, it will find the kthread running
> >> the copy and will locate within the copy state the current number of bytes
> >> copied so far.
> >>
> >> v6:
> >> 1. if an error occured and partial bytes were copied, don't return the error
> >> and instead return the successful partial copy.
> >> 2. OFFLOAD_STATUS locates the on-going copy and looks up the number of bytes
> >> copied so far.
> >>
> >> Olga Kornievskaia (10):
> >>   NFSD CB_OFFLOAD xdr
> >>   NFSD OFFLOAD_STATUS xdr
> >>   NFSD OFFLOAD_CANCEL xdr
> >>   NFSD xdr callback stateid in async COPY reply
> >>   NFSD first draft of async copy
> >>   NFSD return nfs4_stid in nfs4_preprocess_stateid_op
> >>   NFSD create new stateid for async copy
> >>   NFSD handle OFFLOAD_CANCEL op
> >>   NFSD support OFFLOAD_STATUS
> >>   NFSD stop queued async copies on client shutdown
> >>
> >>  fs/nfsd/netns.h        |   8 ++
> >>  fs/nfsd/nfs4callback.c |  97 +++++++++++++++
> >>  fs/nfsd/nfs4proc.c     | 316 ++++++++++++++++++++++++++++++++++++++++++++-----
> >>  fs/nfsd/nfs4state.c    |  77 +++++++++++-
> >>  fs/nfsd/nfs4xdr.c      |  50 ++++++--
> >>  fs/nfsd/nfsctl.c       |   1 +
> >>  fs/nfsd/state.h        |  21 +++-
> >>  fs/nfsd/xdr4.h         |  28 +++++
> >>  fs/nfsd/xdr4cb.h       |  10 ++
> >>  9 files changed, 571 insertions(+), 37 deletions(-)
> >
> > Bruce, any comments on this version?
> 
> Bruce, do you have any comments on this version?

I don't yet, apologies.  It is on my list to look at.

--b.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v6 00/10] NFSD support for asynchronous COPY
  2017-11-14  0:48     ` J. Bruce Fields
@ 2017-11-28 20:28       ` Olga Kornievskaia
  2017-11-30 20:18         ` J. Bruce Fields
  0 siblings, 1 reply; 48+ messages in thread
From: Olga Kornievskaia @ 2017-11-28 20:28 UTC (permalink / raw)
  To: J. Bruce Fields; +Cc: Olga Kornievskaia, linux-nfs

On Mon, Nov 13, 2017 at 7:48 PM, J. Bruce Fields <bfields@redhat.com> wrote:
> On Fri, Nov 10, 2017 at 10:01:02AM -0500, Olga Kornievskaia wrote:
>> On Fri, Nov 3, 2017 at 3:57 PM, Olga Kornievskaia <aglo@umich.edu> wrote:
>> > On Tue, Oct 24, 2017 at 1:47 PM, Olga Kornievskaia <kolga@netapp.com> wrote:
>> >> To do asynchronous copies, NFSD creates a new kthread to handle the request.
>> >> Upon receiving the COPY, it generates a unique copy stateid (stored in a
>> >> global list for keeping track of state for OFFLOAD_STATUS to be queried by),
>> >> starts the thread, and replies back to the client. nfsd4_copy arguments that
>> >> are allocated on the stack are copies for the kthread.
>> >>
>> >> For the async copy handler, copy is done in the loop for the requested
>> >> number of bytes. If an error is encountered and partial copy has happened,
>> >> a successful partial copy is returned in the CB_OFFLOAD. vfs_copy_file_range
>> >> is called with 4MB chunks for both async and sync, allowing for 4MB
>> >> granularity of OFFLOAD_STATUS queiry. Once copy is done, the results are
>> >> queued for the callback workqueue and sent via CB_OFFLOAD.
>> >>
>> >> When the server received an OFFLOAD_CANCEL, it will find the kthread running
>> >> the copy and will send a SIGPENDING and kthread_stop() and it will interrupt
>> >> the ongoing do_splice() and once vfs returns we are choosing not to send
>> >> the CB_OFFLOAD back to the client.
>> >>
>> >> When the server receives an OFFLOAD_STATUS, it will find the kthread running
>> >> the copy and will locate within the copy state the current number of bytes
>> >> copied so far.
>> >>
>> >> v6:
>> >> 1. if an error occured and partial bytes were copied, don't return the error
>> >> and instead return the successful partial copy.
>> >> 2. OFFLOAD_STATUS locates the on-going copy and looks up the number of bytes
>> >> copied so far.
>> >>
>> >> Olga Kornievskaia (10):
>> >>   NFSD CB_OFFLOAD xdr
>> >>   NFSD OFFLOAD_STATUS xdr
>> >>   NFSD OFFLOAD_CANCEL xdr
>> >>   NFSD xdr callback stateid in async COPY reply
>> >>   NFSD first draft of async copy
>> >>   NFSD return nfs4_stid in nfs4_preprocess_stateid_op
>> >>   NFSD create new stateid for async copy
>> >>   NFSD handle OFFLOAD_CANCEL op
>> >>   NFSD support OFFLOAD_STATUS
>> >>   NFSD stop queued async copies on client shutdown
>> >>
>> >>  fs/nfsd/netns.h        |   8 ++
>> >>  fs/nfsd/nfs4callback.c |  97 +++++++++++++++
>> >>  fs/nfsd/nfs4proc.c     | 316 ++++++++++++++++++++++++++++++++++++++++++++-----
>> >>  fs/nfsd/nfs4state.c    |  77 +++++++++++-
>> >>  fs/nfsd/nfs4xdr.c      |  50 ++++++--
>> >>  fs/nfsd/nfsctl.c       |   1 +
>> >>  fs/nfsd/state.h        |  21 +++-
>> >>  fs/nfsd/xdr4.h         |  28 +++++
>> >>  fs/nfsd/xdr4cb.h       |  10 ++
>> >>  9 files changed, 571 insertions(+), 37 deletions(-)
>> >
>> > Bruce, any comments on this version?
>>
>> Bruce, do you have any comments on this version?
>
> I don't yet, apologies.  It is on my list to look at.
>

Any ideas as to when that would be?

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v6 00/10] NFSD support for asynchronous COPY
  2017-11-28 20:28       ` Olga Kornievskaia
@ 2017-11-30 20:18         ` J. Bruce Fields
  2017-11-30 23:03           ` Olga Kornievskaia
  0 siblings, 1 reply; 48+ messages in thread
From: J. Bruce Fields @ 2017-11-30 20:18 UTC (permalink / raw)
  To: Olga Kornievskaia; +Cc: J. Bruce Fields, Olga Kornievskaia, linux-nfs

On Tue, Nov 28, 2017 at 03:28:46PM -0500, Olga Kornievskaia wrote:
> On Mon, Nov 13, 2017 at 7:48 PM, J. Bruce Fields <bfields@redhat.com> wrote:
> > On Fri, Nov 10, 2017 at 10:01:02AM -0500, Olga Kornievskaia wrote:
> >> On Fri, Nov 3, 2017 at 3:57 PM, Olga Kornievskaia <aglo@umich.edu> wrote:
> >> > Bruce, any comments on this version?
> >>
> >> Bruce, do you have any comments on this version?
> >
> > I don't yet, apologies.  It is on my list to look at.
> >
> 
> Any ideas as to when that would be?

Sorry for the silence.  I'll try to get it reviewed in time for 4.16.

But I'm worrying about async behavior.  Just the usual complaint:

	- A large copy could take anywhere from milliseconds to hours.

	- The only way the protocol gives to measure progress is
	  OFFLOAD_STATUS, but I'm pessimistic that we'll get a
	  corresponding copy_progress syscall interface that
	  applications will get patched to use.

So, currently the server's trying to avoid the problem by returning
short COPY results and hoping applications will handle that gracefully
(and that readahead and write behind will save performance).

But I guess that won't work in the server-to-server case.  Or would it?
I *think* you can just send one NOTIFY and then reuse the same stateid
in multiple COPYs, so maybe it's not any worse than in the single-server
case.

That would be a lot simpler if it worked.

--b.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v6 00/10] NFSD support for asynchronous COPY
  2017-11-30 20:18         ` J. Bruce Fields
@ 2017-11-30 23:03           ` Olga Kornievskaia
  2017-12-04 21:32             ` J. Bruce Fields
  0 siblings, 1 reply; 48+ messages in thread
From: Olga Kornievskaia @ 2017-11-30 23:03 UTC (permalink / raw)
  To: J. Bruce Fields; +Cc: J. Bruce Fields, Olga Kornievskaia, linux-nfs

On Thu, Nov 30, 2017 at 3:18 PM, J. Bruce Fields <bfields@fieldses.org> wrote:
> On Tue, Nov 28, 2017 at 03:28:46PM -0500, Olga Kornievskaia wrote:
>> On Mon, Nov 13, 2017 at 7:48 PM, J. Bruce Fields <bfields@redhat.com> wrote:
>> > On Fri, Nov 10, 2017 at 10:01:02AM -0500, Olga Kornievskaia wrote:
>> >> On Fri, Nov 3, 2017 at 3:57 PM, Olga Kornievskaia <aglo@umich.edu> wrote:
>> >> > Bruce, any comments on this version?
>> >>
>> >> Bruce, do you have any comments on this version?
>> >
>> > I don't yet, apologies.  It is on my list to look at.
>> >
>>
>> Any ideas as to when that would be?
>
> Sorry for the silence.  I'll try to get it reviewed in time for 4.16.

I hope I won't forget too much in 3months for this ;)

> But I'm worrying about async behavior.  Just the usual complaint:
>
>         - A large copy could take anywhere from milliseconds to hours.

With the sync copy your worry was with tying up the nfsd thread. Here
each copy gets its own thread and doesn't interfere with other
operations. So why worry?

>         - The only way the protocol gives to measure progress is
>           OFFLOAD_STATUS, but I'm pessimistic that we'll get a
>           corresponding copy_progress syscall interface that
>           applications will get patched to use.

Doing a cp, never had a status info so what's different now.
Interested folks can do as before ls -l dst_file to monitor progress.

> So, currently the server's trying to avoid the problem by returning
> short COPY results and hoping applications will handle that gracefully
> (and that readahead and write behind will save performance).

We have demonstrated that doing async copy performs significantly
better (given large enough file size which was different between intra
and inter copy but something like 16MB). Really there is no point in
introducing an asynchronous copy unless you are doing all of it.

> But I guess that won't work in the server-to-server case.  Or would it?
> I *think* you can just send one NOTIFY

NOTIFY is only for "inter" server-to-server copy and we are currently
reviewing "asynchronous intra" copy?

> and then reuse the same stateid
> in multiple COPYs, so maybe it's not any worse than in the single-server
> case.

No you can not reuse the same stateid in multiple COPY's (spec doesn't
allow it). It has to uniquely identity the copy. Otherwise, when
client receives a reply from the async COPY it doesn't know which out
of those multiple copies to match it to.

> That would be a lot simpler if it worked.
>
> --b.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v6 00/10] NFSD support for asynchronous COPY
  2017-11-30 23:03           ` Olga Kornievskaia
@ 2017-12-04 21:32             ` J. Bruce Fields
       [not found]               ` <CAN-5tyEVSwBmPMtUBJYDdLi7FK2MNMGuDQrrsvp776zD3Jcw0w@mail.gmail.com>
  0 siblings, 1 reply; 48+ messages in thread
From: J. Bruce Fields @ 2017-12-04 21:32 UTC (permalink / raw)
  To: Olga Kornievskaia; +Cc: J. Bruce Fields, Olga Kornievskaia, linux-nfs

On Thu, Nov 30, 2017 at 06:03:08PM -0500, Olga Kornievskaia wrote:
> On Thu, Nov 30, 2017 at 3:18 PM, J. Bruce Fields <bfields@fieldses.org> wrote:
> > On Tue, Nov 28, 2017 at 03:28:46PM -0500, Olga Kornievskaia wrote:
> >> On Mon, Nov 13, 2017 at 7:48 PM, J. Bruce Fields <bfields@redhat.com> wrote:
> >> > On Fri, Nov 10, 2017 at 10:01:02AM -0500, Olga Kornievskaia wrote:
> >> >> On Fri, Nov 3, 2017 at 3:57 PM, Olga Kornievskaia <aglo@umich.edu> wrote:
> >> >> > Bruce, any comments on this version?
> >> >>
> >> >> Bruce, do you have any comments on this version?
> >> >
> >> > I don't yet, apologies.  It is on my list to look at.
> >> >
> >>
> >> Any ideas as to when that would be?
> >
> > Sorry for the silence.  I'll try to get it reviewed in time for 4.16.
> 
> I hope I won't forget too much in 3months for this ;)
> 
> > But I'm worrying about async behavior.  Just the usual complaint:
> >
> >         - A large copy could take anywhere from milliseconds to hours.
> 
> With the sync copy your worry was with tying up the nfsd thread. Here
> each copy gets its own thread and doesn't interfere with other
> operations. So why worry?

I'm mainly just worried about the lack of feedback to the user.  "cp"
doesn't traditionally give any, but other programs that copy data do,
and they'll have to decide what to do instead.

> >         - The only way the protocol gives to measure progress is
> >           OFFLOAD_STATUS, but I'm pessimistic that we'll get a
> >           corresponding copy_progress syscall interface that
> >           applications will get patched to use.
> 
> Doing a cp, never had a status info so what's different now.
> Interested folks can do as before ls -l dst_file to monitor progress.

Maybe it'll be no big deal.  I'm not completely opposed to trying and
finding out, partly because it shouldn't be that hard to back out later
if necessary.

> > So, currently the server's trying to avoid the problem by returning
> > short COPY results and hoping applications will handle that gracefully
> > (and that readahead and write behind will save performance).
> 
> We have demonstrated that doing async copy performs significantly
> better (given large enough file size which was different between intra
> and inter copy but something like 16MB). Really there is no point in
> introducing an asynchronous copy unless you are doing all of it.

Sorry, I can't find that right now, do you have a URL or a message id?

> > But I guess that won't work in the server-to-server case.  Or would it?
> > I *think* you can just send one NOTIFY
> 
> NOTIFY is only for "inter" server-to-server copy and we are currently
> reviewing "asynchronous intra" copy?
> 
> > and then reuse the same stateid
> > in multiple COPYs, so maybe it's not any worse than in the single-server
> > case.
> 
> No you can not reuse the same stateid in multiple COPY's (spec doesn't
> allow it). It has to uniquely identity the copy. Otherwise, when
> client receives a reply from the async COPY it doesn't know which out
> of those multiple copies to match it to.

Why couldn't a client reuse the stateid after a synchronous COPY?

--b.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v6 00/10] NFSD support for asynchronous COPY
       [not found]               ` <CAN-5tyEVSwBmPMtUBJYDdLi7FK2MNMGuDQrrsvp776zD3Jcw0w@mail.gmail.com>
@ 2018-01-22 16:51                 ` Olga Kornievskaia
  2018-01-25 22:33                   ` J. Bruce Fields
  0 siblings, 1 reply; 48+ messages in thread
From: Olga Kornievskaia @ 2018-01-22 16:51 UTC (permalink / raw)
  To: J. Bruce Fields; +Cc: linux-nfs

On Fri, Dec 8, 2017 at 3:30 PM, Olga Kornievskaia <aglo@umich.edu> wrote:
> On Mon, Dec 4, 2017 at 4:32 PM, J. Bruce Fields <bfields@redhat.com> wrote:
>> On Thu, Nov 30, 2017 at 06:03:08PM -0500, Olga Kornievskaia wrote:
>>> On Thu, Nov 30, 2017 at 3:18 PM, J. Bruce Fields <bfields@fieldses.org> wrote:
>>> > On Tue, Nov 28, 2017 at 03:28:46PM -0500, Olga Kornievskaia wrote:
>>> >> On Mon, Nov 13, 2017 at 7:48 PM, J. Bruce Fields <bfields@redhat.com> wrote:
>>> >> > On Fri, Nov 10, 2017 at 10:01:02AM -0500, Olga Kornievskaia wrote:
>>> >> >> On Fri, Nov 3, 2017 at 3:57 PM, Olga Kornievskaia <aglo@umich.edu> wrote:
>>> >> >> > Bruce, any comments on this version?
>>> >> >>
>>> >> >> Bruce, do you have any comments on this version?
>>> >> >
>>> >> > I don't yet, apologies.  It is on my list to look at.
>>> >> >
>>> >>
>>> >> Any ideas as to when that would be?
>>> >
>>> > Sorry for the silence.  I'll try to get it reviewed in time for 4.16.
>>>
>>> I hope I won't forget too much in 3months for this ;)
>>>
>>> > But I'm worrying about async behavior.  Just the usual complaint:
>>> >
>>> >         - A large copy could take anywhere from milliseconds to hours.
>>>
>>> With the sync copy your worry was with tying up the nfsd thread. Here
>>> each copy gets its own thread and doesn't interfere with other
>>> operations. So why worry?
>>
>> I'm mainly just worried about the lack of feedback to the user.  "cp"
>> doesn't traditionally give any, but other programs that copy data do,
>> and they'll have to decide what to do instead.
>>
>>> >         - The only way the protocol gives to measure progress is
>>> >           OFFLOAD_STATUS, but I'm pessimistic that we'll get a
>>> >           corresponding copy_progress syscall interface that
>>> >           applications will get patched to use.
>>>
>>> Doing a cp, never had a status info so what's different now.
>>> Interested folks can do as before ls -l dst_file to monitor progress.
>>
>> Maybe it'll be no big deal.  I'm not completely opposed to trying and
>> finding out, partly because it shouldn't be that hard to back out later
>> if necessary.
>>
>>> > So, currently the server's trying to avoid the problem by returning
>>> > short COPY results and hoping applications will handle that gracefully
>>> > (and that readahead and write behind will save performance).
>>>
>>> We have demonstrated that doing async copy performs significantly
>>> better (given large enough file size which was different between intra
>>> and inter copy but something like 16MB). Really there is no point in
>>> introducing an asynchronous copy unless you are doing all of it.
>>
>> Sorry, I can't find that right now, do you have a URL or a message id?
>
> I re-sent it to you.
>
>>> > But I guess that won't work in the server-to-server case.  Or would it?
>>> > I *think* you can just send one NOTIFY
>>>
>>> NOTIFY is only for "inter" server-to-server copy and we are currently
>>> reviewing "asynchronous intra" copy?
>>>
>>> > and then reuse the same stateid
>>> > in multiple COPYs, so maybe it's not any worse than in the single-server
>>> > case.
>>>
>>> No you can not reuse the same stateid in multiple COPY's (spec doesn't
>>> allow it). It has to uniquely identity the copy. Otherwise, when
>>> client receives a reply from the async COPY it doesn't know which out
>>> of those multiple copies to match it to.
>>
>> Why couldn't a client reuse the stateid after a synchronous COPY?
>
> This line of questions is confusing. Why are you asking about
> synchronous COPY? We don't have an implementation of a synchronous
> "inter" COPY. It's only async. NOTIFY is present only in an inter
> COPY. NOTIFY generates a unique copy stateid to be used by a COPY. I
> guess I always assumed it meant one time use even for a synchronous
> copy. You definitely can't reuse the copy stateid for the async copy.
> Given that COPY is specified in general states that a unique stateid
> must be used then I think we can't separate synchronous from
> asynchronous case to re-use in one and not in the other.

Hi Bruce,

Any update on the patch series review?

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v6 01/10] NFSD CB_OFFLOAD xdr
  2017-10-24 17:47 ` [PATCH v6 01/10] NFSD CB_OFFLOAD xdr Olga Kornievskaia
@ 2018-01-25 16:43   ` J. Bruce Fields
  2018-01-26 15:16     ` Olga Kornievskaia
  0 siblings, 1 reply; 48+ messages in thread
From: J. Bruce Fields @ 2018-01-25 16:43 UTC (permalink / raw)
  To: Olga Kornievskaia; +Cc: bfields, linux-nfs

On Tue, Oct 24, 2017 at 01:47:43PM -0400, Olga Kornievskaia wrote:
> Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
> ---
>  fs/nfsd/nfs4callback.c | 97 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  fs/nfsd/state.h        |  1 +
>  fs/nfsd/xdr4.h         |  6 ++++
>  fs/nfsd/xdr4cb.h       | 10 ++++++
>  4 files changed, 114 insertions(+)
> 
> diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
> index 49b0a9e..d12d914 100644
> --- a/fs/nfsd/nfs4callback.c
> +++ b/fs/nfsd/nfs4callback.c
> @@ -39,6 +39,7 @@
>  #include "state.h"
>  #include "netns.h"
>  #include "xdr4cb.h"
> +#include "xdr4.h"
>  
>  #define NFSDDBG_FACILITY                NFSDDBG_PROC
>  
> @@ -105,6 +106,7 @@ enum nfs_cb_opnum4 {
>  	OP_CB_WANTS_CANCELLED		= 12,
>  	OP_CB_NOTIFY_LOCK		= 13,
>  	OP_CB_NOTIFY_DEVICEID		= 14,
> +	OP_CB_OFFLOAD			= 15,
>  	OP_CB_ILLEGAL			= 10044
>  };
>  
> @@ -683,6 +685,100 @@ static int nfs4_xdr_dec_cb_notify_lock(struct rpc_rqst *rqstp,
>  }
>  
>  /*
> + * struct write_response4 {
> + *	stateid4	wr_callback_id<1>;
> + *	length4		wr_count;
> + *	stable_how4	wr_committed;
> + *	verifier4	wr_writeverf;
> + * };
> + * union offload_info4 switch (nfsstat4 coa_status) {
> + *	case NFS4_OK:
> + *		write_response4	coa_resok4;
> + *	default:
> + *	length4		coa_bytes_copied;
> + * };
> + * struct CB_OFFLOAD4args {
> + *	nfs_fh4		coa_fh;
> + *	stateid4	coa_stateid;
> + *	offload_info4	coa_offload_info;
> + * };
> + */
> +static void encode_offload_info4(struct xdr_stream *xdr,
> +				 __be32 nfserr,
> +				 const struct nfsd4_copy *cp)
> +{
> +	__be32 *p;
> +
> +	p = xdr_reserve_space(xdr, 4);
> +	*p++ = nfserr;
> +	if (!nfserr) {
> +		p = xdr_reserve_space(xdr, 4 + 8 + 4 + NFS4_VERIFIER_SIZE);
> +		p = xdr_encode_empty_array(p);
> +		p = xdr_encode_hyper(p, cp->cp_res.wr_bytes_written);
> +		*p++ = cpu_to_be32(cp->cp_res.wr_stable_how);
> +		p = xdr_encode_opaque_fixed(p, cp->cp_res.wr_verifier.data,
> +					    NFS4_VERIFIER_SIZE);
> +	} else {
> +		p = xdr_reserve_space(xdr, 8);
> +		p = xdr_encode_hyper(p, cp->cp_res.wr_bytes_written);

Nit: since we're not using this any more, may as well just make that:

		/* We always return success if bytes were written: */
		p = xdr_encode_hyper(p, 0);

--b.

> +	}
> +}
> +
> +static void encode_cb_offload4args(struct xdr_stream *xdr,
> +				   __be32 nfserr,
> +				   const struct knfsd_fh *fh,
> +				   const struct nfsd4_copy *cp,
> +				   struct nfs4_cb_compound_hdr *hdr)
> +{
> +	__be32 *p;
> +
> +	p = xdr_reserve_space(xdr, 4);
> +	*p++ = cpu_to_be32(OP_CB_OFFLOAD);
> +	encode_nfs_fh4(xdr, fh);
> +	encode_stateid4(xdr, &cp->cp_res.cb_stateid);
> +	encode_offload_info4(xdr, nfserr, cp);
> +
> +	hdr->nops++;
> +}
> +
> +static void nfs4_xdr_enc_cb_offload(struct rpc_rqst *req,
> +				    struct xdr_stream *xdr,
> +				    const void *data)
> +{
> +	const struct nfsd4_callback *cb = data;
> +	const struct nfsd4_copy *cp =
> +		container_of(cb, struct nfsd4_copy, cp_cb);
> +	struct nfs4_cb_compound_hdr hdr = {
> +		.ident = 0,
> +		.minorversion = cb->cb_clp->cl_minorversion,
> +	};
> +
> +	encode_cb_compound4args(xdr, &hdr);
> +	encode_cb_sequence4args(xdr, cb, &hdr);
> +	encode_cb_offload4args(xdr, cp->nfserr, &cp->fh, cp, &hdr);
> +	encode_cb_nops(&hdr);
> +}
> +
> +static int nfs4_xdr_dec_cb_offload(struct rpc_rqst *rqstp,
> +				   struct xdr_stream *xdr,
> +				   void *data)
> +{
> +	struct nfsd4_callback *cb = data;
> +	struct nfs4_cb_compound_hdr hdr;
> +	int status;
> +
> +	status = decode_cb_compound4res(xdr, &hdr);
> +	if (unlikely(status))
> +		return status;
> +
> +	if (cb) {
> +		status = decode_cb_sequence4res(xdr, cb);
> +		if (unlikely(status || cb->cb_seq_status))
> +			return status;
> +	}
> +	return decode_cb_op_status(xdr, OP_CB_OFFLOAD, &cb->cb_status);
> +}
> +/*
>   * RPC procedure tables
>   */
>  #define PROC(proc, call, argtype, restype)				\
> @@ -703,6 +799,7 @@ static int nfs4_xdr_dec_cb_notify_lock(struct rpc_rqst *rqstp,
>  	PROC(CB_LAYOUT,	COMPOUND,	cb_layout,	cb_layout),
>  #endif
>  	PROC(CB_NOTIFY_LOCK,	COMPOUND,	cb_notify_lock,	cb_notify_lock),
> +	PROC(CB_OFFLOAD,	COMPOUND,	cb_offload,	cb_offload),
>  };
>  
>  static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)];
> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> index 005c911..f8b0210 100644
> --- a/fs/nfsd/state.h
> +++ b/fs/nfsd/state.h
> @@ -570,6 +570,7 @@ enum nfsd4_cb_op {
>  	NFSPROC4_CLNT_CB_NULL = 0,
>  	NFSPROC4_CLNT_CB_RECALL,
>  	NFSPROC4_CLNT_CB_LAYOUT,
> +	NFSPROC4_CLNT_CB_OFFLOAD,
>  	NFSPROC4_CLNT_CB_SEQUENCE,
>  	NFSPROC4_CLNT_CB_NOTIFY_LOCK,
>  };
> diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
> index 1e4edbf..4ac2676 100644
> --- a/fs/nfsd/xdr4.h
> +++ b/fs/nfsd/xdr4.h
> @@ -507,6 +507,7 @@ struct nfsd42_write_res {
>  	u64			wr_bytes_written;
>  	u32			wr_stable_how;
>  	nfs4_verifier		wr_verifier;
> +	stateid_t		cb_stateid;
>  };
>  
>  struct nfsd4_copy {
> @@ -523,6 +524,11 @@ struct nfsd4_copy {
>  
>  	/* response */
>  	struct nfsd42_write_res	cp_res;
> +
> +	/* for cb_offload */
> +	struct nfsd4_callback	cp_cb;
> +	__be32			nfserr;
> +	struct knfsd_fh		fh;
>  };
>  
>  struct nfsd4_seek {
> diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h
> index 49b719d..7e39913 100644
> --- a/fs/nfsd/xdr4cb.h
> +++ b/fs/nfsd/xdr4cb.h
> @@ -37,3 +37,13 @@
>  #define NFS4_dec_cb_notify_lock_sz	(cb_compound_dec_hdr_sz  +      \
>  					cb_sequence_dec_sz +            \
>  					op_dec_sz)
> +#define enc_cb_offload_info_sz		(1 + 1 + 2 + 1 +		\
> +					XDR_QUADLEN(NFS4_VERIFIER_SIZE))
> +#define NFS4_enc_cb_offload_sz		(cb_compound_enc_hdr_sz +       \
> +					cb_sequence_enc_sz +            \
> +					enc_nfs4_fh_sz +		\
> +					enc_stateid_sz +		\
> +					enc_cb_offload_info_sz)
> +#define NFS4_dec_cb_offload_sz		(cb_compound_dec_hdr_sz  +      \
> +					cb_sequence_dec_sz +            \
> +					op_dec_sz)
> -- 
> 1.8.3.1

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v6 05/10] NFSD first draft of async copy
  2017-10-24 17:47 ` [PATCH v6 05/10] NFSD first draft of async copy Olga Kornievskaia
@ 2018-01-25 22:04   ` J. Bruce Fields
  2018-01-26 15:17     ` Olga Kornievskaia
  2018-02-15 19:59     ` Olga Kornievskaia
  2018-01-25 22:29   ` J. Bruce Fields
  2018-01-26 21:34   ` J. Bruce Fields
  2 siblings, 2 replies; 48+ messages in thread
From: J. Bruce Fields @ 2018-01-25 22:04 UTC (permalink / raw)
  To: Olga Kornievskaia; +Cc: bfields, linux-nfs

Nit: this could use a better subject line.

On Tue, Oct 24, 2017 at 01:47:47PM -0400, Olga Kornievskaia wrote:
...
> +	if (!copy->cp_synchronous) {
> +		status = nfsd4_init_copy_res(copy, 0);
> +		async_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL);
> +		if (!async_copy) {
> +			status = nfserrno(-ENOMEM);
> +			goto out;
> +		}
> +		dup_copy_fields(copy, async_copy);
> +		memcpy(&copy->cp_res.cb_stateid, &copy->cp_dst_stateid,
> +			sizeof(copy->cp_dst_stateid));
> +		spin_lock(&async_copy->cp_clp->async_lock);
> +		list_add(&async_copy->copies,
> +				&async_copy->cp_clp->async_copies);
> +		spin_unlock(&async_copy->cp_clp->async_lock);

At this point other threads could in theory look up this async_copy, but
its copy_task field is not yet initialized.  I don't *think* that's a
problem for nfsd4_shutdown_copy, because I don't think the server could
be processing rpc's for this client any more at that point.  But I think
a malicious client might be able to trigger a NULL dereference in
nfsd4_offload_cancel.

Is there any reason not to assign copy_task before adding it to this
list?

--b.

> +		async_copy->copy_task = kthread_create(nfsd4_do_async_copy,
> +				async_copy, "%s", "copy thread");
> +		if (IS_ERR(async_copy->copy_task)) {
> +			status = PTR_ERR(async_copy->copy_task);
> +			goto out_err_dec;
> +		}
> +		wake_up_process(async_copy->copy_task);
> +	} else {
> +		status = nfsd4_do_copy(copy, 1);
>  	}
> -
> -	fput(src);
> -	fput(dst);
>  out:
>  	return status;
> +out_err_dec:
> +	cleanup_async_copy(async_copy);
> +	goto out;
>  }
>  
>  static __be32
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 0c04f81..d7767a1 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -1774,6 +1774,8 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
>  #ifdef CONFIG_NFSD_PNFS
>  	INIT_LIST_HEAD(&clp->cl_lo_states);
>  #endif
> +	INIT_LIST_HEAD(&clp->async_copies);
> +	spin_lock_init(&clp->async_lock);
>  	spin_lock_init(&clp->cl_lock);
>  	rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table");
>  	return clp;
> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> index f8b0210..9189062 100644
> --- a/fs/nfsd/state.h
> +++ b/fs/nfsd/state.h
> @@ -352,6 +352,8 @@ struct nfs4_client {
>  	struct rpc_wait_queue	cl_cb_waitq;	/* backchannel callers may */
>  						/* wait here for slots */
>  	struct net		*net;
> +	struct list_head	async_copies;	/* list of async copies */
> +	spinlock_t		async_lock;	/* lock for async copies */
>  };
>  
>  /* struct nfs4_client_reset
> diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
> index 9b0c099..0a19954 100644
> --- a/fs/nfsd/xdr4.h
> +++ b/fs/nfsd/xdr4.h
> @@ -529,6 +529,15 @@ struct nfsd4_copy {
>  	struct nfsd4_callback	cp_cb;
>  	__be32			nfserr;
>  	struct knfsd_fh		fh;
> +
> +	struct nfs4_client      *cp_clp;
> +
> +	struct file             *fh_src;
> +	struct file             *fh_dst;
> +	struct net              *net;
> +
> +	struct list_head	copies;
> +	struct task_struct	*copy_task;
>  };
>  
>  struct nfsd4_seek {
> -- 
> 1.8.3.1

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v6 10/10] NFSD stop queued async copies on client shutdown
  2017-10-24 17:47 ` [PATCH v6 10/10] NFSD stop queued async copies on client shutdown Olga Kornievskaia
@ 2018-01-25 22:22   ` J. Bruce Fields
  2018-01-26 15:17     ` Olga Kornievskaia
  0 siblings, 1 reply; 48+ messages in thread
From: J. Bruce Fields @ 2018-01-25 22:22 UTC (permalink / raw)
  To: Olga Kornievskaia; +Cc: bfields, linux-nfs

On Tue, Oct 24, 2017 at 01:47:52PM -0400, Olga Kornievskaia wrote:
> If client is shutting down and there are still async copies going
> on, then stop queued async copies.
> 
> Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
> ---
>  fs/nfsd/nfs4proc.c  | 13 +++++++++++++
>  fs/nfsd/nfs4state.c |  1 +
>  fs/nfsd/state.h     |  1 +
>  3 files changed, 15 insertions(+)
> 
> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> index b2f6549..34042ff6 100644
> --- a/fs/nfsd/nfs4proc.c
> +++ b/fs/nfsd/nfs4proc.c
> @@ -1105,6 +1105,19 @@ static void nfs4_put_copy(struct nfsd4_copy *copy)
>  	kfree(copy);
>  }
>  
> +void nfsd4_shutdown_copy(struct nfs4_client *clp)
> +{
> +	struct nfsd4_copy *copy;
> +
> +	spin_lock(&clp->async_lock);
> +	list_for_each_entry(copy, &clp->async_copies, copies) {
> +		set_tsk_thread_flag(copy->copy_task, TIF_SIGPENDING);
> +		kthread_stop(copy->copy_task);
> +		nfs4_put_copy(copy);
> +	}
> +	spin_unlock(&clp->async_lock);
> +}
> +
>  static void nfsd4_cb_offload_release(struct nfsd4_callback *cb)
>  {
>  	struct nfsd4_copy *copy = container_of(cb, struct nfsd4_copy, cp_cb);
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index f9151f2..efac39d 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -1947,6 +1947,7 @@ static __be32 mark_client_expired_locked(struct nfs4_client *clp)
>  	}
>  	nfsd4_return_all_client_layouts(clp);
>  	nfsd4_shutdown_callback(clp);
> +	nfsd4_shutdown_copy(clp);

The asynchronous copy threads can produce more callbacks, and I don't
think that's safe after we've called nfsd4_shutdown_callback(), so we
should move nfsd4_shutdown_copy() above nfsd4_shutdown_callback() if
possible.

--b.

>  	if (clp->cl_cb_conn.cb_xprt)
>  		svc_xprt_put(clp->cl_cb_conn.cb_xprt);
>  	free_client(clp);
> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> index b4de8ae..88869df 100644
> --- a/fs/nfsd/state.h
> +++ b/fs/nfsd/state.h
> @@ -643,6 +643,7 @@ extern void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
>  extern int nfsd4_create_callback_queue(void);
>  extern void nfsd4_destroy_callback_queue(void);
>  extern void nfsd4_shutdown_callback(struct nfs4_client *);
> +extern void nfsd4_shutdown_copy(struct nfs4_client *clp);
>  extern void nfsd4_prepare_cb_recall(struct nfs4_delegation *dp);
>  extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(const char *name,
>  							struct nfsd_net *nn);
> -- 
> 1.8.3.1

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v6 05/10] NFSD first draft of async copy
  2017-10-24 17:47 ` [PATCH v6 05/10] NFSD first draft of async copy Olga Kornievskaia
  2018-01-25 22:04   ` J. Bruce Fields
@ 2018-01-25 22:29   ` J. Bruce Fields
  2018-01-26 15:17     ` Olga Kornievskaia
  2018-01-26 21:34   ` J. Bruce Fields
  2 siblings, 1 reply; 48+ messages in thread
From: J. Bruce Fields @ 2018-01-25 22:29 UTC (permalink / raw)
  To: Olga Kornievskaia; +Cc: bfields, linux-nfs

On Tue, Oct 24, 2017 at 01:47:47PM -0400, Olga Kornievskaia wrote:
> +	copy->cp_clp = cstate->clp;
> +	memcpy(&copy->fh, &cstate->current_fh.fh_handle,
> +		sizeof(struct knfsd_fh));
> +	copy->net = SVC_NET(rqstp);
> +	if (!copy->cp_synchronous) {

I was thinking we might want to do a synchronous copy anyway in some
cases: e.g. if the copy is relatively small or if the filesystem
supports clone.

But I guess that's a premature optimization; better to keep this as you
have it for now.

--b.

> +		status = nfsd4_init_copy_res(copy, 0);
> +		async_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL);
> +		if (!async_copy) {
> +			status = nfserrno(-ENOMEM);
> +			goto out;
> +		}
> +		dup_copy_fields(copy, async_copy);
> +		memcpy(&copy->cp_res.cb_stateid, &copy->cp_dst_stateid,
> +			sizeof(copy->cp_dst_stateid));
> +		spin_lock(&async_copy->cp_clp->async_lock);
> +		list_add(&async_copy->copies,
> +				&async_copy->cp_clp->async_copies);
> +		spin_unlock(&async_copy->cp_clp->async_lock);
> +		async_copy->copy_task = kthread_create(nfsd4_do_async_copy,
> +				async_copy, "%s", "copy thread");
> +		if (IS_ERR(async_copy->copy_task)) {
> +			status = PTR_ERR(async_copy->copy_task);
> +			goto out_err_dec;
> +		}
> +		wake_up_process(async_copy->copy_task);
> +	} else {
> +		status = nfsd4_do_copy(copy, 1);

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v6 00/10] NFSD support for asynchronous COPY
  2018-01-22 16:51                 ` Olga Kornievskaia
@ 2018-01-25 22:33                   ` J. Bruce Fields
  2018-01-26 15:16                     ` Olga Kornievskaia
  0 siblings, 1 reply; 48+ messages in thread
From: J. Bruce Fields @ 2018-01-25 22:33 UTC (permalink / raw)
  To: Olga Kornievskaia; +Cc: linux-nfs

On Mon, Jan 22, 2018 at 11:51:08AM -0500, Olga Kornievskaia wrote:
> Any update on the patch series review?

Sorry for the slow response.  I feel like I'm about half-way through
reading the patches.  I'll try to wrap up tomorrow.

--b.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v6 00/10] NFSD support for asynchronous COPY
  2018-01-25 22:33                   ` J. Bruce Fields
@ 2018-01-26 15:16                     ` Olga Kornievskaia
  0 siblings, 0 replies; 48+ messages in thread
From: Olga Kornievskaia @ 2018-01-26 15:16 UTC (permalink / raw)
  To: J. Bruce Fields; +Cc: linux-nfs

On Thu, Jan 25, 2018 at 5:33 PM, J. Bruce Fields <bfields@redhat.com> wrote:
> On Mon, Jan 22, 2018 at 11:51:08AM -0500, Olga Kornievskaia wrote:
>> Any update on the patch series review?
>
> Sorry for the slow response.  I feel like I'm about half-way through
> reading the patches.  I'll try to wrap up tomorrow.

Thank you for the comments so far. I'm going thru them now and will
fix all for the next version.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v6 01/10] NFSD CB_OFFLOAD xdr
  2018-01-25 16:43   ` J. Bruce Fields
@ 2018-01-26 15:16     ` Olga Kornievskaia
  0 siblings, 0 replies; 48+ messages in thread
From: Olga Kornievskaia @ 2018-01-26 15:16 UTC (permalink / raw)
  To: J. Bruce Fields; +Cc: Olga Kornievskaia, J. Bruce Fields, linux-nfs

On Thu, Jan 25, 2018 at 11:43 AM, J. Bruce Fields <bfields@fieldses.org> wrote:
> On Tue, Oct 24, 2017 at 01:47:43PM -0400, Olga Kornievskaia wrote:
>> Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
>> ---
>>  fs/nfsd/nfs4callback.c | 97 ++++++++++++++++++++++++++++++++++++++++++++++++++
>>  fs/nfsd/state.h        |  1 +
>>  fs/nfsd/xdr4.h         |  6 ++++
>>  fs/nfsd/xdr4cb.h       | 10 ++++++
>>  4 files changed, 114 insertions(+)
>>
>> diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
>> index 49b0a9e..d12d914 100644
>> --- a/fs/nfsd/nfs4callback.c
>> +++ b/fs/nfsd/nfs4callback.c
>> @@ -39,6 +39,7 @@
>>  #include "state.h"
>>  #include "netns.h"
>>  #include "xdr4cb.h"
>> +#include "xdr4.h"
>>
>>  #define NFSDDBG_FACILITY                NFSDDBG_PROC
>>
>> @@ -105,6 +106,7 @@ enum nfs_cb_opnum4 {
>>       OP_CB_WANTS_CANCELLED           = 12,
>>       OP_CB_NOTIFY_LOCK               = 13,
>>       OP_CB_NOTIFY_DEVICEID           = 14,
>> +     OP_CB_OFFLOAD                   = 15,
>>       OP_CB_ILLEGAL                   = 10044
>>  };
>>
>> @@ -683,6 +685,100 @@ static int nfs4_xdr_dec_cb_notify_lock(struct rpc_rqst *rqstp,
>>  }
>>
>>  /*
>> + * struct write_response4 {
>> + *   stateid4        wr_callback_id<1>;
>> + *   length4         wr_count;
>> + *   stable_how4     wr_committed;
>> + *   verifier4       wr_writeverf;
>> + * };
>> + * union offload_info4 switch (nfsstat4 coa_status) {
>> + *   case NFS4_OK:
>> + *           write_response4 coa_resok4;
>> + *   default:
>> + *   length4         coa_bytes_copied;
>> + * };
>> + * struct CB_OFFLOAD4args {
>> + *   nfs_fh4         coa_fh;
>> + *   stateid4        coa_stateid;
>> + *   offload_info4   coa_offload_info;
>> + * };
>> + */
>> +static void encode_offload_info4(struct xdr_stream *xdr,
>> +                              __be32 nfserr,
>> +                              const struct nfsd4_copy *cp)
>> +{
>> +     __be32 *p;
>> +
>> +     p = xdr_reserve_space(xdr, 4);
>> +     *p++ = nfserr;
>> +     if (!nfserr) {
>> +             p = xdr_reserve_space(xdr, 4 + 8 + 4 + NFS4_VERIFIER_SIZE);
>> +             p = xdr_encode_empty_array(p);
>> +             p = xdr_encode_hyper(p, cp->cp_res.wr_bytes_written);
>> +             *p++ = cpu_to_be32(cp->cp_res.wr_stable_how);
>> +             p = xdr_encode_opaque_fixed(p, cp->cp_res.wr_verifier.data,
>> +                                         NFS4_VERIFIER_SIZE);
>> +     } else {
>> +             p = xdr_reserve_space(xdr, 8);
>> +             p = xdr_encode_hyper(p, cp->cp_res.wr_bytes_written);
>
> Nit: since we're not using this any more, may as well just make that:
>
>                 /* We always return success if bytes were written: */
>                 p = xdr_encode_hyper(p, 0);

Sure I can do this.

>
> --b.
>
>> +     }
>> +}
>> +
>> +static void encode_cb_offload4args(struct xdr_stream *xdr,
>> +                                __be32 nfserr,
>> +                                const struct knfsd_fh *fh,
>> +                                const struct nfsd4_copy *cp,
>> +                                struct nfs4_cb_compound_hdr *hdr)
>> +{
>> +     __be32 *p;
>> +
>> +     p = xdr_reserve_space(xdr, 4);
>> +     *p++ = cpu_to_be32(OP_CB_OFFLOAD);
>> +     encode_nfs_fh4(xdr, fh);
>> +     encode_stateid4(xdr, &cp->cp_res.cb_stateid);
>> +     encode_offload_info4(xdr, nfserr, cp);
>> +
>> +     hdr->nops++;
>> +}
>> +
>> +static void nfs4_xdr_enc_cb_offload(struct rpc_rqst *req,
>> +                                 struct xdr_stream *xdr,
>> +                                 const void *data)
>> +{
>> +     const struct nfsd4_callback *cb = data;
>> +     const struct nfsd4_copy *cp =
>> +             container_of(cb, struct nfsd4_copy, cp_cb);
>> +     struct nfs4_cb_compound_hdr hdr = {
>> +             .ident = 0,
>> +             .minorversion = cb->cb_clp->cl_minorversion,
>> +     };
>> +
>> +     encode_cb_compound4args(xdr, &hdr);
>> +     encode_cb_sequence4args(xdr, cb, &hdr);
>> +     encode_cb_offload4args(xdr, cp->nfserr, &cp->fh, cp, &hdr);
>> +     encode_cb_nops(&hdr);
>> +}
>> +
>> +static int nfs4_xdr_dec_cb_offload(struct rpc_rqst *rqstp,
>> +                                struct xdr_stream *xdr,
>> +                                void *data)
>> +{
>> +     struct nfsd4_callback *cb = data;
>> +     struct nfs4_cb_compound_hdr hdr;
>> +     int status;
>> +
>> +     status = decode_cb_compound4res(xdr, &hdr);
>> +     if (unlikely(status))
>> +             return status;
>> +
>> +     if (cb) {
>> +             status = decode_cb_sequence4res(xdr, cb);
>> +             if (unlikely(status || cb->cb_seq_status))
>> +                     return status;
>> +     }
>> +     return decode_cb_op_status(xdr, OP_CB_OFFLOAD, &cb->cb_status);
>> +}
>> +/*
>>   * RPC procedure tables
>>   */
>>  #define PROC(proc, call, argtype, restype)                           \
>> @@ -703,6 +799,7 @@ static int nfs4_xdr_dec_cb_notify_lock(struct rpc_rqst *rqstp,
>>       PROC(CB_LAYOUT, COMPOUND,       cb_layout,      cb_layout),
>>  #endif
>>       PROC(CB_NOTIFY_LOCK,    COMPOUND,       cb_notify_lock, cb_notify_lock),
>> +     PROC(CB_OFFLOAD,        COMPOUND,       cb_offload,     cb_offload),
>>  };
>>
>>  static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)];
>> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
>> index 005c911..f8b0210 100644
>> --- a/fs/nfsd/state.h
>> +++ b/fs/nfsd/state.h
>> @@ -570,6 +570,7 @@ enum nfsd4_cb_op {
>>       NFSPROC4_CLNT_CB_NULL = 0,
>>       NFSPROC4_CLNT_CB_RECALL,
>>       NFSPROC4_CLNT_CB_LAYOUT,
>> +     NFSPROC4_CLNT_CB_OFFLOAD,
>>       NFSPROC4_CLNT_CB_SEQUENCE,
>>       NFSPROC4_CLNT_CB_NOTIFY_LOCK,
>>  };
>> diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
>> index 1e4edbf..4ac2676 100644
>> --- a/fs/nfsd/xdr4.h
>> +++ b/fs/nfsd/xdr4.h
>> @@ -507,6 +507,7 @@ struct nfsd42_write_res {
>>       u64                     wr_bytes_written;
>>       u32                     wr_stable_how;
>>       nfs4_verifier           wr_verifier;
>> +     stateid_t               cb_stateid;
>>  };
>>
>>  struct nfsd4_copy {
>> @@ -523,6 +524,11 @@ struct nfsd4_copy {
>>
>>       /* response */
>>       struct nfsd42_write_res cp_res;
>> +
>> +     /* for cb_offload */
>> +     struct nfsd4_callback   cp_cb;
>> +     __be32                  nfserr;
>> +     struct knfsd_fh         fh;
>>  };
>>
>>  struct nfsd4_seek {
>> diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h
>> index 49b719d..7e39913 100644
>> --- a/fs/nfsd/xdr4cb.h
>> +++ b/fs/nfsd/xdr4cb.h
>> @@ -37,3 +37,13 @@
>>  #define NFS4_dec_cb_notify_lock_sz   (cb_compound_dec_hdr_sz  +      \
>>                                       cb_sequence_dec_sz +            \
>>                                       op_dec_sz)
>> +#define enc_cb_offload_info_sz               (1 + 1 + 2 + 1 +                \
>> +                                     XDR_QUADLEN(NFS4_VERIFIER_SIZE))
>> +#define NFS4_enc_cb_offload_sz               (cb_compound_enc_hdr_sz +       \
>> +                                     cb_sequence_enc_sz +            \
>> +                                     enc_nfs4_fh_sz +                \
>> +                                     enc_stateid_sz +                \
>> +                                     enc_cb_offload_info_sz)
>> +#define NFS4_dec_cb_offload_sz               (cb_compound_dec_hdr_sz  +      \
>> +                                     cb_sequence_dec_sz +            \
>> +                                     op_dec_sz)
>> --
>> 1.8.3.1
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v6 10/10] NFSD stop queued async copies on client shutdown
  2018-01-25 22:22   ` J. Bruce Fields
@ 2018-01-26 15:17     ` Olga Kornievskaia
  0 siblings, 0 replies; 48+ messages in thread
From: Olga Kornievskaia @ 2018-01-26 15:17 UTC (permalink / raw)
  To: J. Bruce Fields; +Cc: Olga Kornievskaia, J. Bruce Fields, linux-nfs

On Thu, Jan 25, 2018 at 5:22 PM, J. Bruce Fields <bfields@fieldses.org> wrote:
> On Tue, Oct 24, 2017 at 01:47:52PM -0400, Olga Kornievskaia wrote:
>> If client is shutting down and there are still async copies going
>> on, then stop queued async copies.
>>
>> Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
>> ---
>>  fs/nfsd/nfs4proc.c  | 13 +++++++++++++
>>  fs/nfsd/nfs4state.c |  1 +
>>  fs/nfsd/state.h     |  1 +
>>  3 files changed, 15 insertions(+)
>>
>> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
>> index b2f6549..34042ff6 100644
>> --- a/fs/nfsd/nfs4proc.c
>> +++ b/fs/nfsd/nfs4proc.c
>> @@ -1105,6 +1105,19 @@ static void nfs4_put_copy(struct nfsd4_copy *copy)
>>       kfree(copy);
>>  }
>>
>> +void nfsd4_shutdown_copy(struct nfs4_client *clp)
>> +{
>> +     struct nfsd4_copy *copy;
>> +
>> +     spin_lock(&clp->async_lock);
>> +     list_for_each_entry(copy, &clp->async_copies, copies) {
>> +             set_tsk_thread_flag(copy->copy_task, TIF_SIGPENDING);
>> +             kthread_stop(copy->copy_task);
>> +             nfs4_put_copy(copy);
>> +     }
>> +     spin_unlock(&clp->async_lock);
>> +}
>> +
>>  static void nfsd4_cb_offload_release(struct nfsd4_callback *cb)
>>  {
>>       struct nfsd4_copy *copy = container_of(cb, struct nfsd4_copy, cp_cb);
>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>> index f9151f2..efac39d 100644
>> --- a/fs/nfsd/nfs4state.c
>> +++ b/fs/nfsd/nfs4state.c
>> @@ -1947,6 +1947,7 @@ static __be32 mark_client_expired_locked(struct nfs4_client *clp)
>>       }
>>       nfsd4_return_all_client_layouts(clp);
>>       nfsd4_shutdown_callback(clp);
>> +     nfsd4_shutdown_copy(clp);
>
> The asynchronous copy threads can produce more callbacks, and I don't
> think that's safe after we've called nfsd4_shutdown_callback(), so we
> should move nfsd4_shutdown_copy() above nfsd4_shutdown_callback() if
> possible.

Yes shutdown_copy could be moved before the shutdown_callback. I will do that.

> --b.
>
>>       if (clp->cl_cb_conn.cb_xprt)
>>               svc_xprt_put(clp->cl_cb_conn.cb_xprt);
>>       free_client(clp);
>> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
>> index b4de8ae..88869df 100644
>> --- a/fs/nfsd/state.h
>> +++ b/fs/nfsd/state.h
>> @@ -643,6 +643,7 @@ extern void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
>>  extern int nfsd4_create_callback_queue(void);
>>  extern void nfsd4_destroy_callback_queue(void);
>>  extern void nfsd4_shutdown_callback(struct nfs4_client *);
>> +extern void nfsd4_shutdown_copy(struct nfs4_client *clp);
>>  extern void nfsd4_prepare_cb_recall(struct nfs4_delegation *dp);
>>  extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(const char *name,
>>                                                       struct nfsd_net *nn);
>> --
>> 1.8.3.1
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v6 05/10] NFSD first draft of async copy
  2018-01-25 22:04   ` J. Bruce Fields
@ 2018-01-26 15:17     ` Olga Kornievskaia
  2018-02-15 19:59     ` Olga Kornievskaia
  1 sibling, 0 replies; 48+ messages in thread
From: Olga Kornievskaia @ 2018-01-26 15:17 UTC (permalink / raw)
  To: J. Bruce Fields; +Cc: Olga Kornievskaia, J. Bruce Fields, linux-nfs

On Thu, Jan 25, 2018 at 5:04 PM, J. Bruce Fields <bfields@fieldses.org> wrote:
> Nit: this could use a better subject line.

Will change it.

> On Tue, Oct 24, 2017 at 01:47:47PM -0400, Olga Kornievskaia wrote:
> ...
>> +     if (!copy->cp_synchronous) {
>> +             status = nfsd4_init_copy_res(copy, 0);
>> +             async_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL);
>> +             if (!async_copy) {
>> +                     status = nfserrno(-ENOMEM);
>> +                     goto out;
>> +             }
>> +             dup_copy_fields(copy, async_copy);
>> +             memcpy(&copy->cp_res.cb_stateid, &copy->cp_dst_stateid,
>> +                     sizeof(copy->cp_dst_stateid));
>> +             spin_lock(&async_copy->cp_clp->async_lock);
>> +             list_add(&async_copy->copies,
>> +                             &async_copy->cp_clp->async_copies);
>> +             spin_unlock(&async_copy->cp_clp->async_lock);
>
> At this point other threads could in theory look up this async_copy, but
> its copy_task field is not yet initialized.  I don't *think* that's a
> problem for nfsd4_shutdown_copy, because I don't think the server could
> be processing rpc's for this client any more at that point.  But I think
> a malicious client might be able to trigger a NULL dereference in
> nfsd4_offload_cancel.
>
> Is there any reason not to assign copy_task before adding it to this
> list?

Good idea. I'll move the addition to after the copy_task assignment.

>
> --b.
>
>> +             async_copy->copy_task = kthread_create(nfsd4_do_async_copy,
>> +                             async_copy, "%s", "copy thread");
>> +             if (IS_ERR(async_copy->copy_task)) {
>> +                     status = PTR_ERR(async_copy->copy_task);
>> +                     goto out_err_dec;
>> +             }
>> +             wake_up_process(async_copy->copy_task);
>> +     } else {
>> +             status = nfsd4_do_copy(copy, 1);
>>       }
>> -
>> -     fput(src);
>> -     fput(dst);
>>  out:
>>       return status;
>> +out_err_dec:
>> +     cleanup_async_copy(async_copy);
>> +     goto out;
>>  }
>>
>>  static __be32
>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>> index 0c04f81..d7767a1 100644
>> --- a/fs/nfsd/nfs4state.c
>> +++ b/fs/nfsd/nfs4state.c
>> @@ -1774,6 +1774,8 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
>>  #ifdef CONFIG_NFSD_PNFS
>>       INIT_LIST_HEAD(&clp->cl_lo_states);
>>  #endif
>> +     INIT_LIST_HEAD(&clp->async_copies);
>> +     spin_lock_init(&clp->async_lock);
>>       spin_lock_init(&clp->cl_lock);
>>       rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table");
>>       return clp;
>> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
>> index f8b0210..9189062 100644
>> --- a/fs/nfsd/state.h
>> +++ b/fs/nfsd/state.h
>> @@ -352,6 +352,8 @@ struct nfs4_client {
>>       struct rpc_wait_queue   cl_cb_waitq;    /* backchannel callers may */
>>                                               /* wait here for slots */
>>       struct net              *net;
>> +     struct list_head        async_copies;   /* list of async copies */
>> +     spinlock_t              async_lock;     /* lock for async copies */
>>  };
>>
>>  /* struct nfs4_client_reset
>> diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
>> index 9b0c099..0a19954 100644
>> --- a/fs/nfsd/xdr4.h
>> +++ b/fs/nfsd/xdr4.h
>> @@ -529,6 +529,15 @@ struct nfsd4_copy {
>>       struct nfsd4_callback   cp_cb;
>>       __be32                  nfserr;
>>       struct knfsd_fh         fh;
>> +
>> +     struct nfs4_client      *cp_clp;
>> +
>> +     struct file             *fh_src;
>> +     struct file             *fh_dst;
>> +     struct net              *net;
>> +
>> +     struct list_head        copies;
>> +     struct task_struct      *copy_task;
>>  };
>>
>>  struct nfsd4_seek {
>> --
>> 1.8.3.1
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v6 05/10] NFSD first draft of async copy
  2018-01-25 22:29   ` J. Bruce Fields
@ 2018-01-26 15:17     ` Olga Kornievskaia
  0 siblings, 0 replies; 48+ messages in thread
From: Olga Kornievskaia @ 2018-01-26 15:17 UTC (permalink / raw)
  To: J. Bruce Fields; +Cc: Olga Kornievskaia, J. Bruce Fields, linux-nfs

On Thu, Jan 25, 2018 at 5:29 PM, J. Bruce Fields <bfields@fieldses.org> wrote:
> On Tue, Oct 24, 2017 at 01:47:47PM -0400, Olga Kornievskaia wrote:
>> +     copy->cp_clp = cstate->clp;
>> +     memcpy(&copy->fh, &cstate->current_fh.fh_handle,
>> +             sizeof(struct knfsd_fh));
>> +     copy->net = SVC_NET(rqstp);
>> +     if (!copy->cp_synchronous) {
>
> I was thinking we might want to do a synchronous copy anyway in some
> cases: e.g. if the copy is relatively small or if the filesystem
> supports clone.
>
> But I guess that's a premature optimization; better to keep this as you
> have it for now.

Let's keep it as is for now and complicate it later :-)

>
> --b.
>
>> +             status = nfsd4_init_copy_res(copy, 0);
>> +             async_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL);
>> +             if (!async_copy) {
>> +                     status = nfserrno(-ENOMEM);
>> +                     goto out;
>> +             }
>> +             dup_copy_fields(copy, async_copy);
>> +             memcpy(&copy->cp_res.cb_stateid, &copy->cp_dst_stateid,
>> +                     sizeof(copy->cp_dst_stateid));
>> +             spin_lock(&async_copy->cp_clp->async_lock);
>> +             list_add(&async_copy->copies,
>> +                             &async_copy->cp_clp->async_copies);
>> +             spin_unlock(&async_copy->cp_clp->async_lock);
>> +             async_copy->copy_task = kthread_create(nfsd4_do_async_copy,
>> +                             async_copy, "%s", "copy thread");
>> +             if (IS_ERR(async_copy->copy_task)) {
>> +                     status = PTR_ERR(async_copy->copy_task);
>> +                     goto out_err_dec;
>> +             }
>> +             wake_up_process(async_copy->copy_task);
>> +     } else {
>> +             status = nfsd4_do_copy(copy, 1);
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v6 05/10] NFSD first draft of async copy
  2017-10-24 17:47 ` [PATCH v6 05/10] NFSD first draft of async copy Olga Kornievskaia
  2018-01-25 22:04   ` J. Bruce Fields
  2018-01-25 22:29   ` J. Bruce Fields
@ 2018-01-26 21:34   ` J. Bruce Fields
  2018-02-02 19:50     ` Olga Kornievskaia
  2 siblings, 1 reply; 48+ messages in thread
From: J. Bruce Fields @ 2018-01-26 21:34 UTC (permalink / raw)
  To: Olga Kornievskaia; +Cc: bfields, linux-nfs

If I understand correctly (I may be wrong), once this patch is applied a
COPY may fail that previously worked--because we're switching over to
the asynchronous copy implementation before it's actually complete.

Of course, that's fixed by the end of this series.  But we try to avoid
that situation, where functionality is temporarily broken in the middle
of a patch series and then fixed later.

Options might be to squash this patch together with some of the later
patches.  Or go ahead and add this code but don't actually enable it
till later.  (E.g. arrange that the "if (!copy->cp_synchronous)" case
won't be taken till the last patch.  Maybe it already works that way, I
can't tell.)  Or maybe there's some slicker way that I don't see right
now.

--b.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v6 07/10] NFSD create new stateid for async copy
  2017-10-24 17:47 ` [PATCH v6 07/10] NFSD create new stateid for async copy Olga Kornievskaia
@ 2018-01-26 21:37   ` J. Bruce Fields
  2018-01-26 21:59   ` J. Bruce Fields
  1 sibling, 0 replies; 48+ messages in thread
From: J. Bruce Fields @ 2018-01-26 21:37 UTC (permalink / raw)
  To: Olga Kornievskaia; +Cc: bfields, linux-nfs

On Tue, Oct 24, 2017 at 01:47:49PM -0400, Olga Kornievskaia wrote:
> +/*
> + * Create a unique stateid_t to represent each COPY. Hang the copy
> + * stateids off the OPEN/LOCK/DELEG stateid from the client open
> + * the source file.
> + */
> +struct nfs4_cp_state *nfs4_alloc_init_cp_state(struct nfsd_net *nn,
> +					       struct nfs4_stid *p_stid)
> +{
> +	struct nfs4_cp_state *cps;
> +	int new_id;
> +
> +	cps = kzalloc(sizeof(struct nfs4_cp_state), GFP_KERNEL);
> +	if (!cps)
> +		return NULL;
> +	idr_preload(GFP_KERNEL);
> +	spin_lock(&nn->s2s_cp_lock);
> +	new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, cps, 0, 0, GFP_NOWAIT);
> +	spin_unlock(&nn->s2s_cp_lock);
> +	idr_preload_end();
> +	if (new_id < 0)
> +		goto out_free;
> +	cps->cp_stateid.si_opaque.so_id = new_id;
> +	cps->cp_stateid.si_opaque.so_clid.cl_boot = nn->boot_time;
> +	cps->cp_stateid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id;
> +	cps->cp_p_stid = p_stid;
> +	INIT_LIST_HEAD(&cps->cp_list);

The INIT_LIST_HEAD is redundant here, it'll just be overridden by the
list_add below.

> +	list_add(&cps->cp_list, &p_stid->sc_cp_list);

p_stid is a preexisting stateid that's visible to other nfsd threads
too, right?  So in theory another COPY using the same parent stateid
could be running concurrently with this one?

Maybe you could just use p_stid->sc_lock  for this.

Ditto for any other manipulations of that list.

--b.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v6 07/10] NFSD create new stateid for async copy
  2017-10-24 17:47 ` [PATCH v6 07/10] NFSD create new stateid for async copy Olga Kornievskaia
  2018-01-26 21:37   ` J. Bruce Fields
@ 2018-01-26 21:59   ` J. Bruce Fields
  2018-02-02 20:45     ` Olga Kornievskaia
  1 sibling, 1 reply; 48+ messages in thread
From: J. Bruce Fields @ 2018-01-26 21:59 UTC (permalink / raw)
  To: Olga Kornievskaia; +Cc: bfields, linux-nfs

On Tue, Oct 24, 2017 at 01:47:49PM -0400, Olga Kornievskaia wrote:
> Generate a new stateid to be used for reply to the asynchronous
> COPY (this would also be used later by COPY_NOTIFY as well).
> Associate the stateid with the parent OPEN/LOCK/DELEG stateid
> that can be freed during the free of the parent stateid. However,
> right now deciding to bind the lifetime to when the vfs copy
> is done. This way don't need to keep the nfsd_net structure for
> the callback. The drawback is that time copy state information
> is available for query by OFFLOAD_STATUS is slightly less.
> 
> Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
> ---
>  fs/nfsd/netns.h     |  8 +++++++
>  fs/nfsd/nfs4proc.c  | 32 +++++++++++++++++++-------
>  fs/nfsd/nfs4state.c | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>  fs/nfsd/nfsctl.c    |  1 +
>  fs/nfsd/state.h     | 14 ++++++++++++
>  fs/nfsd/xdr4.h      |  2 ++
>  6 files changed, 115 insertions(+), 8 deletions(-)
> 
> diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
> index 3714231..2c88a95 100644
> --- a/fs/nfsd/netns.h
> +++ b/fs/nfsd/netns.h
> @@ -119,6 +119,14 @@ struct nfsd_net {
>  	u32 clverifier_counter;
>  
>  	struct svc_serv *nfsd_serv;
> +
> +	/*
> +	 * clientid and stateid data for construction of net unique COPY
> +	 * stateids.
> +	 */
> +	u32		s2s_cp_cl_id;
> +	struct idr	s2s_cp_stateids;
> +	spinlock_t	s2s_cp_lock;
>  };
>  
>  /* Simple check to find out if a given net was properly initialized */
> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> index a020533..6876080 100644
> --- a/fs/nfsd/nfs4proc.c
> +++ b/fs/nfsd/nfs4proc.c
> @@ -1039,7 +1039,8 @@ static int fill_in_write_vector(struct kvec *vec, struct nfsd4_write *write)
>  static __be32
>  nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  		  stateid_t *src_stateid, struct file **src,
> -		  stateid_t *dst_stateid, struct file **dst)
> +		  stateid_t *dst_stateid, struct file **dst,
> +		  struct nfs4_stid **stid)
>  {
>  	__be32 status;
>  
> @@ -1053,7 +1054,7 @@ static int fill_in_write_vector(struct kvec *vec, struct nfsd4_write *write)
>  
>  	status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
>  					    dst_stateid, WR_STATE, dst, NULL,
> -					    NULL);
> +					    stid);
>  	if (status) {
>  		dprintk("NFSD: %s: couldn't process dst stateid!\n", __func__);
>  		goto out_put_src;
> @@ -1084,7 +1085,7 @@ static int fill_in_write_vector(struct kvec *vec, struct nfsd4_write *write)
>  	__be32 status;
>  
>  	status = nfsd4_verify_copy(rqstp, cstate, &clone->cl_src_stateid, &src,
> -				   &clone->cl_dst_stateid, &dst);
> +				   &clone->cl_dst_stateid, &dst, NULL);
>  	if (status)
>  		goto out;
>  
> @@ -1117,8 +1118,6 @@ static int nfsd4_cb_offload_done(struct nfsd4_callback *cb,
>  
>  static int nfsd4_init_copy_res(struct nfsd4_copy *copy, bool sync)
>  {
> -	memcpy(&copy->cp_res.cb_stateid, &copy->cp_dst_stateid,
> -		sizeof(copy->cp_dst_stateid));
>  	copy->cp_res.wr_stable_how = NFS_UNSTABLE;
>  	copy->cp_consecutive = 1;
>  	copy->cp_synchronous = sync;
> @@ -1180,10 +1179,15 @@ static void dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst)
>  	atomic_inc(&dst->cp_clp->cl_refcount);
>  	dst->fh_dst = get_file(src->fh_dst);
>  	dst->fh_src = get_file(src->fh_src);
> +	dst->stid = src->stid;
> +	dst->cps = src->cps;
>  }
>  
>  static void cleanup_async_copy(struct nfsd4_copy *copy)
>  {
> +	list_del(&copy->cps->cp_list);
> +	nfs4_free_cp_state(copy->cps);
> +	nfs4_put_stid(copy->stid);
>  	fput(copy->fh_dst);
>  	fput(copy->fh_src);
>  	spin_lock(&copy->cp_clp->async_lock);
> @@ -1225,7 +1229,7 @@ static int nfsd4_do_async_copy(void *data)
>  
>  	status = nfsd4_verify_copy(rqstp, cstate, &copy->cp_src_stateid,
>  				   &copy->fh_src, &copy->cp_dst_stateid,
> -				   &copy->fh_dst);
> +				   &copy->fh_dst, &copy->stid);
>  	if (status)
>  		goto out;
>  
> @@ -1234,15 +1238,27 @@ static int nfsd4_do_async_copy(void *data)
>  		sizeof(struct knfsd_fh));
>  	copy->net = SVC_NET(rqstp);
>  	if (!copy->cp_synchronous) {
> +		struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
> +
>  		status = nfsd4_init_copy_res(copy, 0);
>  		async_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL);
>  		if (!async_copy) {
>  			status = nfserrno(-ENOMEM);
>  			goto out;
>  		}
> +		copy->cps = nfs4_alloc_init_cp_state(nn, copy->stid);
> +		if (!copy->cps) {
> +			status = nfserrno(-ENOMEM);
> +			kfree(async_copy);
> +			goto out;
> +		}
> +		/* take a reference on the parent stateid so it's not
> +		 * not freed by the copy compound
> +		 */
> +		atomic_inc(&copy->stid->sc_count);
> +		memcpy(&copy->cp_res.cb_stateid, &copy->cps->cp_stateid,
> +			sizeof(copy->cps->cp_stateid));
>  		dup_copy_fields(copy, async_copy);
> -		memcpy(&copy->cp_res.cb_stateid, &copy->cp_dst_stateid,
> -			sizeof(copy->cp_dst_stateid));
>  		spin_lock(&async_copy->cp_clp->async_lock);
>  		list_add(&async_copy->copies,
>  				&async_copy->cp_clp->async_copies);
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index af40762..f9151f2 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -658,6 +658,7 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla
>  	/* Will be incremented before return to client: */
>  	atomic_set(&stid->sc_count, 1);
>  	spin_lock_init(&stid->sc_lock);
> +	INIT_LIST_HEAD(&stid->sc_cp_list);
>  
>  	/*
>  	 * It shouldn't be a problem to reuse an opaque stateid value.
> @@ -674,6 +675,66 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla
>  	return NULL;
>  }
>  
> +/*
> + * Create a unique stateid_t to represent each COPY. Hang the copy
> + * stateids off the OPEN/LOCK/DELEG stateid from the client open
> + * the source file.
> + */
> +struct nfs4_cp_state *nfs4_alloc_init_cp_state(struct nfsd_net *nn,
> +					       struct nfs4_stid *p_stid)
> +{
> +	struct nfs4_cp_state *cps;
> +	int new_id;
> +
> +	cps = kzalloc(sizeof(struct nfs4_cp_state), GFP_KERNEL);
> +	if (!cps)
> +		return NULL;
> +	idr_preload(GFP_KERNEL);
> +	spin_lock(&nn->s2s_cp_lock);
> +	new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, cps, 0, 0, GFP_NOWAIT);
> +	spin_unlock(&nn->s2s_cp_lock);
> +	idr_preload_end();
> +	if (new_id < 0)
> +		goto out_free;
> +	cps->cp_stateid.si_opaque.so_id = new_id;
> +	cps->cp_stateid.si_opaque.so_clid.cl_boot = nn->boot_time;
> +	cps->cp_stateid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id;
> +	cps->cp_p_stid = p_stid;
> +	INIT_LIST_HEAD(&cps->cp_list);
> +	list_add(&cps->cp_list, &p_stid->sc_cp_list);
> +
> +	return cps;
> +out_free:
> +	kfree(cps);
> +	return NULL;
> +}
> +
> +void nfs4_free_cp_state(struct nfs4_cp_state *cps)
> +{
> +	struct nfsd_net *nn;
> +
> +	nn = net_generic(cps->cp_p_stid->sc_client->net, nfsd_net_id);
> +	spin_lock(&nn->s2s_cp_lock);
> +	idr_remove(&nn->s2s_cp_stateids, cps->cp_stateid.si_opaque.so_id);
> +	spin_unlock(&nn->s2s_cp_lock);
> +
> +	kfree(cps);
> +}
> +
> +static void nfs4_free_cp_statelist(struct nfs4_stid *stid)
> +{
> +	struct nfs4_cp_state *cps;
> +
> +	might_sleep();
> +
> +	while (!list_empty(&stid->sc_cp_list)) {
> +		cps = list_first_entry(&stid->sc_cp_list, struct nfs4_cp_state,
> +				       cp_list);
> +		list_del(&cps->cp_list);
> +		nfs4_free_cp_state(cps);
> +	}
> +}
> +
>  static struct nfs4_ol_stateid * nfs4_alloc_open_stateid(struct nfs4_client *clp)
>  {
>  	struct nfs4_stid *stid;
> @@ -819,6 +880,9 @@ static void block_delegations(struct knfsd_fh *fh)
>  	}
>  	idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id);
>  	spin_unlock(&clp->cl_lock);
> +
> +	nfs4_free_cp_statelist(s);

Is this really safe if there are still ongoing copies?

I think the copies take a reference count on the parent stateid, so I
think we should never actually get here: perhaps this could just be a
WARN_ON(!list_empty(&s->s_cp_list)).  Though nfsd4_copy puts things on
this list before bumping the sc_count, so there may be a race there.

What is supposed to happen, by the way, if a close arrives for a stateid
with a copy in progress?  Do we cancel the copy before returning from
the close, or do we fail the close?  (Same question for unlock and
delegation return, I guess.)

--b.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v6 05/10] NFSD first draft of async copy
  2018-01-26 21:34   ` J. Bruce Fields
@ 2018-02-02 19:50     ` Olga Kornievskaia
  2018-02-02 19:55       ` J. Bruce Fields
  0 siblings, 1 reply; 48+ messages in thread
From: Olga Kornievskaia @ 2018-02-02 19:50 UTC (permalink / raw)
  To: J. Bruce Fields; +Cc: Olga Kornievskaia, J. Bruce Fields, linux-nfs

On Fri, Jan 26, 2018 at 4:34 PM, J. Bruce Fields <bfields@fieldses.org> wrote:
> If I understand correctly (I may be wrong), once this patch is applied a
> COPY may fail that previously worked--because we're switching over to
> the asynchronous copy implementation before it's actually complete.

I will have to double check this with testing but I think after this
patch the asynchronous copy is functional but doesn't comply with the
spec (eg., doesn't generate the unique stateid).

> Of course, that's fixed by the end of this series.  But we try to avoid
> that situation, where functionality is temporarily broken in the middle
> of a patch series and then fixed later.
>
> Options might be to squash this patch together with some of the later
> patches.  Or go ahead and add this code but don't actually enable it
> till later.  (E.g. arrange that the "if (!copy->cp_synchronous)" case
> won't be taken till the last patch.  Maybe it already works that way, I
> can't tell.)  Or maybe there's some slicker way that I don't see right
> now.

I could do if (!copy->cp_synchronous && 0) and then add a patch that removes 0.

>
> --b.
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v6 05/10] NFSD first draft of async copy
  2018-02-02 19:50     ` Olga Kornievskaia
@ 2018-02-02 19:55       ` J. Bruce Fields
  0 siblings, 0 replies; 48+ messages in thread
From: J. Bruce Fields @ 2018-02-02 19:55 UTC (permalink / raw)
  To: Olga Kornievskaia; +Cc: J. Bruce Fields, Olga Kornievskaia, linux-nfs

On Fri, Feb 02, 2018 at 02:50:01PM -0500, Olga Kornievskaia wrote:
> On Fri, Jan 26, 2018 at 4:34 PM, J. Bruce Fields <bfields@fieldses.org> wrote:
> > If I understand correctly (I may be wrong), once this patch is applied a
> > COPY may fail that previously worked--because we're switching over to
> > the asynchronous copy implementation before it's actually complete.
> 
> I will have to double check this with testing but I think after this
> patch the asynchronous copy is functional but doesn't comply with the
> spec (eg., doesn't generate the unique stateid).
> 
> > Of course, that's fixed by the end of this series.  But we try to avoid
> > that situation, where functionality is temporarily broken in the middle
> > of a patch series and then fixed later.
> >
> > Options might be to squash this patch together with some of the later
> > patches.  Or go ahead and add this code but don't actually enable it
> > till later.  (E.g. arrange that the "if (!copy->cp_synchronous)" case
> > won't be taken till the last patch.  Maybe it already works that way, I
> > can't tell.)  Or maybe there's some slicker way that I don't see right
> > now.
> 
> I could do if (!copy->cp_synchronous && 0) and then add a patch that removes 0.

OK.  I still don't see the "slick" solution, so let's go with that.

--b.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v6 07/10] NFSD create new stateid for async copy
  2018-01-26 21:59   ` J. Bruce Fields
@ 2018-02-02 20:45     ` Olga Kornievskaia
  2018-02-02 21:45       ` J. Bruce Fields
  0 siblings, 1 reply; 48+ messages in thread
From: Olga Kornievskaia @ 2018-02-02 20:45 UTC (permalink / raw)
  To: J. Bruce Fields; +Cc: Olga Kornievskaia, J. Bruce Fields, linux-nfs

On Fri, Jan 26, 2018 at 4:59 PM, J. Bruce Fields <bfields@fieldses.org> wrote:
> On Tue, Oct 24, 2017 at 01:47:49PM -0400, Olga Kornievskaia wrote:
>> Generate a new stateid to be used for reply to the asynchronous
>> COPY (this would also be used later by COPY_NOTIFY as well).
>> Associate the stateid with the parent OPEN/LOCK/DELEG stateid
>> that can be freed during the free of the parent stateid. However,
>> right now deciding to bind the lifetime to when the vfs copy
>> is done. This way don't need to keep the nfsd_net structure for
>> the callback. The drawback is that time copy state information
>> is available for query by OFFLOAD_STATUS is slightly less.
>>
>> Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
>> ---
>>  fs/nfsd/netns.h     |  8 +++++++
>>  fs/nfsd/nfs4proc.c  | 32 +++++++++++++++++++-------
>>  fs/nfsd/nfs4state.c | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>>  fs/nfsd/nfsctl.c    |  1 +
>>  fs/nfsd/state.h     | 14 ++++++++++++
>>  fs/nfsd/xdr4.h      |  2 ++
>>  6 files changed, 115 insertions(+), 8 deletions(-)
>>
>> diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
>> index 3714231..2c88a95 100644
>> --- a/fs/nfsd/netns.h
>> +++ b/fs/nfsd/netns.h
>> @@ -119,6 +119,14 @@ struct nfsd_net {
>>       u32 clverifier_counter;
>>
>>       struct svc_serv *nfsd_serv;
>> +
>> +     /*
>> +      * clientid and stateid data for construction of net unique COPY
>> +      * stateids.
>> +      */
>> +     u32             s2s_cp_cl_id;
>> +     struct idr      s2s_cp_stateids;
>> +     spinlock_t      s2s_cp_lock;
>>  };
>>
>>  /* Simple check to find out if a given net was properly initialized */
>> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
>> index a020533..6876080 100644
>> --- a/fs/nfsd/nfs4proc.c
>> +++ b/fs/nfsd/nfs4proc.c
>> @@ -1039,7 +1039,8 @@ static int fill_in_write_vector(struct kvec *vec, struct nfsd4_write *write)
>>  static __be32
>>  nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>>                 stateid_t *src_stateid, struct file **src,
>> -               stateid_t *dst_stateid, struct file **dst)
>> +               stateid_t *dst_stateid, struct file **dst,
>> +               struct nfs4_stid **stid)
>>  {
>>       __be32 status;
>>
>> @@ -1053,7 +1054,7 @@ static int fill_in_write_vector(struct kvec *vec, struct nfsd4_write *write)
>>
>>       status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
>>                                           dst_stateid, WR_STATE, dst, NULL,
>> -                                         NULL);
>> +                                         stid);
>>       if (status) {
>>               dprintk("NFSD: %s: couldn't process dst stateid!\n", __func__);
>>               goto out_put_src;
>> @@ -1084,7 +1085,7 @@ static int fill_in_write_vector(struct kvec *vec, struct nfsd4_write *write)
>>       __be32 status;
>>
>>       status = nfsd4_verify_copy(rqstp, cstate, &clone->cl_src_stateid, &src,
>> -                                &clone->cl_dst_stateid, &dst);
>> +                                &clone->cl_dst_stateid, &dst, NULL);
>>       if (status)
>>               goto out;
>>
>> @@ -1117,8 +1118,6 @@ static int nfsd4_cb_offload_done(struct nfsd4_callback *cb,
>>
>>  static int nfsd4_init_copy_res(struct nfsd4_copy *copy, bool sync)
>>  {
>> -     memcpy(&copy->cp_res.cb_stateid, &copy->cp_dst_stateid,
>> -             sizeof(copy->cp_dst_stateid));
>>       copy->cp_res.wr_stable_how = NFS_UNSTABLE;
>>       copy->cp_consecutive = 1;
>>       copy->cp_synchronous = sync;
>> @@ -1180,10 +1179,15 @@ static void dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst)
>>       atomic_inc(&dst->cp_clp->cl_refcount);
>>       dst->fh_dst = get_file(src->fh_dst);
>>       dst->fh_src = get_file(src->fh_src);
>> +     dst->stid = src->stid;
>> +     dst->cps = src->cps;
>>  }
>>
>>  static void cleanup_async_copy(struct nfsd4_copy *copy)
>>  {
>> +     list_del(&copy->cps->cp_list);
>> +     nfs4_free_cp_state(copy->cps);
>> +     nfs4_put_stid(copy->stid);
>>       fput(copy->fh_dst);
>>       fput(copy->fh_src);
>>       spin_lock(&copy->cp_clp->async_lock);
>> @@ -1225,7 +1229,7 @@ static int nfsd4_do_async_copy(void *data)
>>
>>       status = nfsd4_verify_copy(rqstp, cstate, &copy->cp_src_stateid,
>>                                  &copy->fh_src, &copy->cp_dst_stateid,
>> -                                &copy->fh_dst);
>> +                                &copy->fh_dst, &copy->stid);
>>       if (status)
>>               goto out;
>>
>> @@ -1234,15 +1238,27 @@ static int nfsd4_do_async_copy(void *data)
>>               sizeof(struct knfsd_fh));
>>       copy->net = SVC_NET(rqstp);
>>       if (!copy->cp_synchronous) {
>> +             struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
>> +
>>               status = nfsd4_init_copy_res(copy, 0);
>>               async_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL);
>>               if (!async_copy) {
>>                       status = nfserrno(-ENOMEM);
>>                       goto out;
>>               }
>> +             copy->cps = nfs4_alloc_init_cp_state(nn, copy->stid);
>> +             if (!copy->cps) {
>> +                     status = nfserrno(-ENOMEM);
>> +                     kfree(async_copy);
>> +                     goto out;
>> +             }
>> +             /* take a reference on the parent stateid so it's not
>> +              * not freed by the copy compound
>> +              */
>> +             atomic_inc(&copy->stid->sc_count);
>> +             memcpy(&copy->cp_res.cb_stateid, &copy->cps->cp_stateid,
>> +                     sizeof(copy->cps->cp_stateid));
>>               dup_copy_fields(copy, async_copy);
>> -             memcpy(&copy->cp_res.cb_stateid, &copy->cp_dst_stateid,
>> -                     sizeof(copy->cp_dst_stateid));
>>               spin_lock(&async_copy->cp_clp->async_lock);
>>               list_add(&async_copy->copies,
>>                               &async_copy->cp_clp->async_copies);
>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>> index af40762..f9151f2 100644
>> --- a/fs/nfsd/nfs4state.c
>> +++ b/fs/nfsd/nfs4state.c
>> @@ -658,6 +658,7 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla
>>       /* Will be incremented before return to client: */
>>       atomic_set(&stid->sc_count, 1);
>>       spin_lock_init(&stid->sc_lock);
>> +     INIT_LIST_HEAD(&stid->sc_cp_list);
>>
>>       /*
>>        * It shouldn't be a problem to reuse an opaque stateid value.
>> @@ -674,6 +675,66 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla
>>       return NULL;
>>  }
>>
>> +/*
>> + * Create a unique stateid_t to represent each COPY. Hang the copy
>> + * stateids off the OPEN/LOCK/DELEG stateid from the client open
>> + * the source file.
>> + */
>> +struct nfs4_cp_state *nfs4_alloc_init_cp_state(struct nfsd_net *nn,
>> +                                            struct nfs4_stid *p_stid)
>> +{
>> +     struct nfs4_cp_state *cps;
>> +     int new_id;
>> +
>> +     cps = kzalloc(sizeof(struct nfs4_cp_state), GFP_KERNEL);
>> +     if (!cps)
>> +             return NULL;
>> +     idr_preload(GFP_KERNEL);
>> +     spin_lock(&nn->s2s_cp_lock);
>> +     new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, cps, 0, 0, GFP_NOWAIT);
>> +     spin_unlock(&nn->s2s_cp_lock);
>> +     idr_preload_end();
>> +     if (new_id < 0)
>> +             goto out_free;
>> +     cps->cp_stateid.si_opaque.so_id = new_id;
>> +     cps->cp_stateid.si_opaque.so_clid.cl_boot = nn->boot_time;
>> +     cps->cp_stateid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id;
>> +     cps->cp_p_stid = p_stid;
>> +     INIT_LIST_HEAD(&cps->cp_list);
>> +     list_add(&cps->cp_list, &p_stid->sc_cp_list);
>> +
>> +     return cps;
>> +out_free:
>> +     kfree(cps);
>> +     return NULL;
>> +}
>> +
>> +void nfs4_free_cp_state(struct nfs4_cp_state *cps)
>> +{
>> +     struct nfsd_net *nn;
>> +
>> +     nn = net_generic(cps->cp_p_stid->sc_client->net, nfsd_net_id);
>> +     spin_lock(&nn->s2s_cp_lock);
>> +     idr_remove(&nn->s2s_cp_stateids, cps->cp_stateid.si_opaque.so_id);
>> +     spin_unlock(&nn->s2s_cp_lock);
>> +
>> +     kfree(cps);
>> +}
>> +
>> +static void nfs4_free_cp_statelist(struct nfs4_stid *stid)
>> +{
>> +     struct nfs4_cp_state *cps;
>> +
>> +     might_sleep();
>> +
>> +     while (!list_empty(&stid->sc_cp_list)) {
>> +             cps = list_first_entry(&stid->sc_cp_list, struct nfs4_cp_state,
>> +                                    cp_list);
>> +             list_del(&cps->cp_list);
>> +             nfs4_free_cp_state(cps);
>> +     }
>> +}
>> +
>>  static struct nfs4_ol_stateid * nfs4_alloc_open_stateid(struct nfs4_client *clp)
>>  {
>>       struct nfs4_stid *stid;
>> @@ -819,6 +880,9 @@ static void block_delegations(struct knfsd_fh *fh)
>>       }
>>       idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id);
>>       spin_unlock(&clp->cl_lock);
>> +
>> +     nfs4_free_cp_statelist(s);
>
> Is this really safe if there are still ongoing copies?

You are right. It is not.

> I think the copies take a reference count on the parent stateid, so I
> think we should never actually get here: perhaps this could just be a
> WARN_ON(!list_empty(&s->s_cp_list)).  Though nfsd4_copy puts things on
> this list before bumping the sc_count, so there may be a race there.
>
> What is supposed to happen, by the way, if a close arrives for a stateid
> with a copy in progress?  Do we cancel the copy before returning from
> the close, or do we fail the close?  (Same question for unlock and
> delegation return, I guess.)

Good questions. I think a normal client shouldn't send CLOSE/UNLOCK
while there is an on-going copy. I could see how a DELEGRETURN could
be coming in the middle of the copy (because the client received a
CB_RECALL).

For an easy solution my proposal is to do the following. Upon
receiving close, unlock, delegreturn, if the copy list is non-empty,
then we send an ERR_DELAY back until the copy is done.

Another proposal is to kill the copy thread but that seems too drastic
(specially in the case of a delegreturn). Right now when the copy
thread is killed, it does not send a reply back to the client (as it
assumes the client sent the OFFLOAD_CANCEL and doesn't need a reply).
I could change that to send a reply back. In case the copy used a
delegation stateid, then the next copy would choose a different
stateid. This could be done for the delegation stateid and ERR_DELAY
for the close/unlock.

Is either one acceptable and if so which one sounds better to you?


>
> --b.
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v6 07/10] NFSD create new stateid for async copy
  2018-02-02 20:45     ` Olga Kornievskaia
@ 2018-02-02 21:45       ` J. Bruce Fields
  2018-02-15 22:18         ` Olga Kornievskaia
  0 siblings, 1 reply; 48+ messages in thread
From: J. Bruce Fields @ 2018-02-02 21:45 UTC (permalink / raw)
  To: Olga Kornievskaia; +Cc: J. Bruce Fields, Olga Kornievskaia, linux-nfs

On Fri, Feb 02, 2018 at 03:45:28PM -0500, Olga Kornievskaia wrote:
> On Fri, Jan 26, 2018 at 4:59 PM, J. Bruce Fields <bfields@fieldses.org> wrote:
> > What is supposed to happen, by the way, if a close arrives for a stateid
> > with a copy in progress?  Do we cancel the copy before returning from
> > the close, or do we fail the close?  (Same question for unlock and
> > delegation return, I guess.)
> 
> Good questions. I think a normal client shouldn't send CLOSE/UNLOCK
> while there is an on-going copy. I could see how a DELEGRETURN could
> be coming in the middle of the copy (because the client received a
> CB_RECALL).
> 
> For an easy solution my proposal is to do the following. Upon
> receiving close, unlock, delegreturn, if the copy list is non-empty,
> then we send an ERR_DELAY back until the copy is done.
> 
> Another proposal is to kill the copy thread but that seems too drastic
> (specially in the case of a delegreturn). Right now when the copy
> thread is killed, it does not send a reply back to the client (as it
> assumes the client sent the OFFLOAD_CANCEL and doesn't need a reply).
> I could change that to send a reply back. In case the copy used a
> delegation stateid, then the next copy would choose a different
> stateid. This could be done for the delegation stateid and ERR_DELAY
> for the close/unlock.
> 
> Is either one acceptable and if so which one sounds better to you?

If we're confident this is something that only a misbehaving client
would do, then I'm OK with choosing whatever's simplest.  I guess we
should double-check with the spec to make sure this case hasn't already
been covered there.  Returning an error on the conflicting operation
sounds good to me.

Is it really OK to do a COPY with a delegation stateid?  That sounds
messy if the delegation is revoked partway through the copy.

--b.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v6 05/10] NFSD first draft of async copy
  2018-01-25 22:04   ` J. Bruce Fields
  2018-01-26 15:17     ` Olga Kornievskaia
@ 2018-02-15 19:59     ` Olga Kornievskaia
  2018-02-15 20:06       ` J. Bruce Fields
  1 sibling, 1 reply; 48+ messages in thread
From: Olga Kornievskaia @ 2018-02-15 19:59 UTC (permalink / raw)
  To: J. Bruce Fields; +Cc: Olga Kornievskaia, J. Bruce Fields, linux-nfs

On Thu, Jan 25, 2018 at 5:04 PM, J. Bruce Fields <bfields@fieldses.org> wrote:
> Nit: this could use a better subject line.
>
> On Tue, Oct 24, 2017 at 01:47:47PM -0400, Olga Kornievskaia wrote:
> ...
>> +     if (!copy->cp_synchronous) {
>> +             status = nfsd4_init_copy_res(copy, 0);
>> +             async_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL);
>> +             if (!async_copy) {
>> +                     status = nfserrno(-ENOMEM);
>> +                     goto out;
>> +             }
>> +             dup_copy_fields(copy, async_copy);
>> +             memcpy(&copy->cp_res.cb_stateid, &copy->cp_dst_stateid,
>> +                     sizeof(copy->cp_dst_stateid));
>> +             spin_lock(&async_copy->cp_clp->async_lock);
>> +             list_add(&async_copy->copies,
>> +                             &async_copy->cp_clp->async_copies);
>> +             spin_unlock(&async_copy->cp_clp->async_lock);
>
> At this point other threads could in theory look up this async_copy, but
> its copy_task field is not yet initialized.  I don't *think* that's a
> problem for nfsd4_shutdown_copy, because I don't think the server could
> be processing rpc's for this client any more at that point.  But I think
> a malicious client might be able to trigger a NULL dereference in
> nfsd4_offload_cancel.
>
> Is there any reason not to assign copy_task before adding it to this
> list?

Now that I'm making changes I don't believe this is an issue. A client
can't send nfsd4_offload_cancel() because it needs a copy stateid to
send it with. And at this point the copy has not been replied to.

>
> --b.
>
>> +             async_copy->copy_task = kthread_create(nfsd4_do_async_copy,
>> +                             async_copy, "%s", "copy thread");
>> +             if (IS_ERR(async_copy->copy_task)) {
>> +                     status = PTR_ERR(async_copy->copy_task);
>> +                     goto out_err_dec;
>> +             }
>> +             wake_up_process(async_copy->copy_task);
>> +     } else {
>> +             status = nfsd4_do_copy(copy, 1);
>>       }
>> -
>> -     fput(src);
>> -     fput(dst);
>>  out:
>>       return status;
>> +out_err_dec:
>> +     cleanup_async_copy(async_copy);
>> +     goto out;
>>  }
>>
>>  static __be32
>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>> index 0c04f81..d7767a1 100644
>> --- a/fs/nfsd/nfs4state.c
>> +++ b/fs/nfsd/nfs4state.c
>> @@ -1774,6 +1774,8 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
>>  #ifdef CONFIG_NFSD_PNFS
>>       INIT_LIST_HEAD(&clp->cl_lo_states);
>>  #endif
>> +     INIT_LIST_HEAD(&clp->async_copies);
>> +     spin_lock_init(&clp->async_lock);
>>       spin_lock_init(&clp->cl_lock);
>>       rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table");
>>       return clp;
>> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
>> index f8b0210..9189062 100644
>> --- a/fs/nfsd/state.h
>> +++ b/fs/nfsd/state.h
>> @@ -352,6 +352,8 @@ struct nfs4_client {
>>       struct rpc_wait_queue   cl_cb_waitq;    /* backchannel callers may */
>>                                               /* wait here for slots */
>>       struct net              *net;
>> +     struct list_head        async_copies;   /* list of async copies */
>> +     spinlock_t              async_lock;     /* lock for async copies */
>>  };
>>
>>  /* struct nfs4_client_reset
>> diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
>> index 9b0c099..0a19954 100644
>> --- a/fs/nfsd/xdr4.h
>> +++ b/fs/nfsd/xdr4.h
>> @@ -529,6 +529,15 @@ struct nfsd4_copy {
>>       struct nfsd4_callback   cp_cb;
>>       __be32                  nfserr;
>>       struct knfsd_fh         fh;
>> +
>> +     struct nfs4_client      *cp_clp;
>> +
>> +     struct file             *fh_src;
>> +     struct file             *fh_dst;
>> +     struct net              *net;
>> +
>> +     struct list_head        copies;
>> +     struct task_struct      *copy_task;
>>  };
>>
>>  struct nfsd4_seek {
>> --
>> 1.8.3.1
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v6 05/10] NFSD first draft of async copy
  2018-02-15 19:59     ` Olga Kornievskaia
@ 2018-02-15 20:06       ` J. Bruce Fields
  0 siblings, 0 replies; 48+ messages in thread
From: J. Bruce Fields @ 2018-02-15 20:06 UTC (permalink / raw)
  To: Olga Kornievskaia; +Cc: Olga Kornievskaia, J. Bruce Fields, linux-nfs

On Thu, Feb 15, 2018 at 02:59:14PM -0500, Olga Kornievskaia wrote:
> On Thu, Jan 25, 2018 at 5:04 PM, J. Bruce Fields <bfields@fieldses.org> wrote:
> > Nit: this could use a better subject line.
> >
> > On Tue, Oct 24, 2017 at 01:47:47PM -0400, Olga Kornievskaia wrote:
> > ...
> >> +     if (!copy->cp_synchronous) {
> >> +             status = nfsd4_init_copy_res(copy, 0);
> >> +             async_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL);
> >> +             if (!async_copy) {
> >> +                     status = nfserrno(-ENOMEM);
> >> +                     goto out;
> >> +             }
> >> +             dup_copy_fields(copy, async_copy);
> >> +             memcpy(&copy->cp_res.cb_stateid, &copy->cp_dst_stateid,
> >> +                     sizeof(copy->cp_dst_stateid));
> >> +             spin_lock(&async_copy->cp_clp->async_lock);
> >> +             list_add(&async_copy->copies,
> >> +                             &async_copy->cp_clp->async_copies);
> >> +             spin_unlock(&async_copy->cp_clp->async_lock);
> >
> > At this point other threads could in theory look up this async_copy, but
> > its copy_task field is not yet initialized.  I don't *think* that's a
> > problem for nfsd4_shutdown_copy, because I don't think the server could
> > be processing rpc's for this client any more at that point.  But I think
> > a malicious client might be able to trigger a NULL dereference in
> > nfsd4_offload_cancel.
> >
> > Is there any reason not to assign copy_task before adding it to this
> > list?
> 
> Now that I'm making changes I don't believe this is an issue. A client
> can't send nfsd4_offload_cancel() because it needs a copy stateid to
> send it with. And at this point the copy has not been replied to.

Right, but a malicious client might guess that copy stateid before it
gets the reply.

We want to make sure we're safe from crashing even on input that is very
unlikely.

--b.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v6 07/10] NFSD create new stateid for async copy
  2018-02-02 21:45       ` J. Bruce Fields
@ 2018-02-15 22:18         ` Olga Kornievskaia
  2018-02-16  1:43           ` J. Bruce Fields
  0 siblings, 1 reply; 48+ messages in thread
From: Olga Kornievskaia @ 2018-02-15 22:18 UTC (permalink / raw)
  To: J. Bruce Fields; +Cc: J. Bruce Fields, Olga Kornievskaia, linux-nfs

On Fri, Feb 2, 2018 at 4:45 PM, J. Bruce Fields <bfields@redhat.com> wrote:
> On Fri, Feb 02, 2018 at 03:45:28PM -0500, Olga Kornievskaia wrote:
>> On Fri, Jan 26, 2018 at 4:59 PM, J. Bruce Fields <bfields@fieldses.org> wrote:
>> > What is supposed to happen, by the way, if a close arrives for a stateid
>> > with a copy in progress?  Do we cancel the copy before returning from
>> > the close, or do we fail the close?  (Same question for unlock and
>> > delegation return, I guess.)
>>
>> Good questions. I think a normal client shouldn't send CLOSE/UNLOCK
>> while there is an on-going copy. I could see how a DELEGRETURN could
>> be coming in the middle of the copy (because the client received a
>> CB_RECALL).
>>
>> For an easy solution my proposal is to do the following. Upon
>> receiving close, unlock, delegreturn, if the copy list is non-empty,
>> then we send an ERR_DELAY back until the copy is done.
>>
>> Another proposal is to kill the copy thread but that seems too drastic
>> (specially in the case of a delegreturn). Right now when the copy
>> thread is killed, it does not send a reply back to the client (as it
>> assumes the client sent the OFFLOAD_CANCEL and doesn't need a reply).
>> I could change that to send a reply back. In case the copy used a
>> delegation stateid, then the next copy would choose a different
>> stateid. This could be done for the delegation stateid and ERR_DELAY
>> for the close/unlock.
>>
>> Is either one acceptable and if so which one sounds better to you?
>
> If we're confident this is something that only a misbehaving client
> would do, then I'm OK with choosing whatever's simplest.  I guess we
> should double-check with the spec to make sure this case hasn't already
> been covered there.  Returning an error on the conflicting operation
> sounds good to me.
>
> Is it really OK to do a COPY with a delegation stateid?  That sounds
> messy if the delegation is revoked partway through the copy.

Yes, according to the spec, it is ok to use any of the stateids.

RFC 7862 15.2.3
The ca_dst_stateid MUST refer to a stateid that is valid for a WRITE
   operation and follows the rules for stateids in Sections 8.2.5 and
   18.32.3 of [RFC5661].... while for an intra-server copy
ca_src_stateid MUST refer
   to a stateid that is valid for a READ operation and follows the rules
   for stateids in Sections 8.2.5 and 18.22.3 of [RFC5661].

8.2.5 is the section that says in the decreasing priority to choose
delegation stateid, locking stateid, open stateid.

Trying to deal with the delegation stateid seems to be rather complex.
Here's what I could do (which I mentioned before already):
1. possibly change the client to never use a delegation stateid.
2. for CLOSE/UNLOCK send back ERR_DELAY if copy list is not empty.
3. for DELEGRETURN (or when the server initiates CB_RECALL?) kill any
on-going copies. Change the nfsd copy to send a reply if it was killed
with a signal. What I'm not sure if when it's killed I get any partial
bytes from the VFS, I don't think so. I think in that case, the only
thing that I might reply with is a 0bytes copy. And whatever client
that did sent a copy with a delegation stateid would deal with
restarting it with a different stateid.

Having said all that, I'd like to ask why do you think handling
CLOSE/LOCKU/DELEGRETURN should be any different for async copy vs what
is currently there for the synchronous copy. Right now if the server
receives those operations it does nothing to the on-going synchronous
copy.

As you noted, the async copy takes a reference on the parent stateid
so the state won't go away while the async copy is going on. Why not
keep async copy same as the synchronous one?

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v6 07/10] NFSD create new stateid for async copy
  2018-02-15 22:18         ` Olga Kornievskaia
@ 2018-02-16  1:43           ` J. Bruce Fields
  2018-02-16 16:06             ` Olga Kornievskaia
  0 siblings, 1 reply; 48+ messages in thread
From: J. Bruce Fields @ 2018-02-16  1:43 UTC (permalink / raw)
  To: Olga Kornievskaia; +Cc: J. Bruce Fields, Olga Kornievskaia, linux-nfs

On Thu, Feb 15, 2018 at 05:18:55PM -0500, Olga Kornievskaia wrote:
> On Fri, Feb 2, 2018 at 4:45 PM, J. Bruce Fields <bfields@redhat.com> wrote:
> > Is it really OK to do a COPY with a delegation stateid?  That sounds
> > messy if the delegation is revoked partway through the copy.
> 
> Yes, according to the spec, it is ok to use any of the stateids.
> 
> RFC 7862 15.2.3
> The ca_dst_stateid MUST refer to a stateid that is valid for a WRITE
>    operation and follows the rules for stateids in Sections 8.2.5 and
>    18.32.3 of [RFC5661].... while for an intra-server copy
> ca_src_stateid MUST refer
>    to a stateid that is valid for a READ operation and follows the rules
>    for stateids in Sections 8.2.5 and 18.22.3 of [RFC5661].
> 
> 8.2.5 is the section that says in the decreasing priority to choose
> delegation stateid, locking stateid, open stateid.
> 
> Trying to deal with the delegation stateid seems to be rather complex.
> Here's what I could do (which I mentioned before already):
> 1. possibly change the client to never use a delegation stateid.
> 2. for CLOSE/UNLOCK send back ERR_DELAY if copy list is not empty.
> 3. for DELEGRETURN (or when the server initiates CB_RECALL?) kill any
> on-going copies. Change the nfsd copy to send a reply if it was killed
> with a signal. What I'm not sure if when it's killed I get any partial
> bytes from the VFS, I don't think so. I think in that case, the only
> thing that I might reply with is a 0bytes copy. And whatever client
> that did sent a copy with a delegation stateid would deal with
> restarting it with a different stateid.
> 
> Having said all that, I'd like to ask why do you think handling
> CLOSE/LOCKU/DELEGRETURN should be any different for async copy vs what
> is currently there for the synchronous copy. Right now if the server
> receives those operations it does nothing to the on-going synchronous
> copy.

Or for that matter, why should it be different from READ and
WRITE--looks like those can also continue executing after a close, too.
So, good point.

> As you noted, the async copy takes a reference on the parent stateid
> so the state won't go away while the async copy is going on. Why not
> keep async copy same as the synchronous one?

OK, so maybe it's not such a big deal.

I'm still confused about the delegation case, though: a copy can take a
very long time, so we can't just wait for the copy to end before
revoking the delegation.  So do we allow the copy to continue
indefinitely even after the delegation stateid it was initiated with has
long ago been revoked?

Maybe it's not a problem.  I guess I'm having trouble remembering why
copies (or IO in general) uses stateids in the first place....

--b.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v6 07/10] NFSD create new stateid for async copy
  2018-02-16  1:43           ` J. Bruce Fields
@ 2018-02-16 16:06             ` Olga Kornievskaia
  2018-02-16 18:12               ` J. Bruce Fields
  0 siblings, 1 reply; 48+ messages in thread
From: Olga Kornievskaia @ 2018-02-16 16:06 UTC (permalink / raw)
  To: J. Bruce Fields; +Cc: J. Bruce Fields, Olga Kornievskaia, linux-nfs

On Thu, Feb 15, 2018 at 8:43 PM, J. Bruce Fields <bfields@redhat.com> wrote:
> On Thu, Feb 15, 2018 at 05:18:55PM -0500, Olga Kornievskaia wrote:
>> On Fri, Feb 2, 2018 at 4:45 PM, J. Bruce Fields <bfields@redhat.com> wrote:
>> > Is it really OK to do a COPY with a delegation stateid?  That sounds
>> > messy if the delegation is revoked partway through the copy.
>>
>> Yes, according to the spec, it is ok to use any of the stateids.
>>
>> RFC 7862 15.2.3
>> The ca_dst_stateid MUST refer to a stateid that is valid for a WRITE
>>    operation and follows the rules for stateids in Sections 8.2.5 and
>>    18.32.3 of [RFC5661].... while for an intra-server copy
>> ca_src_stateid MUST refer
>>    to a stateid that is valid for a READ operation and follows the rules
>>    for stateids in Sections 8.2.5 and 18.22.3 of [RFC5661].
>>
>> 8.2.5 is the section that says in the decreasing priority to choose
>> delegation stateid, locking stateid, open stateid.
>>
>> Trying to deal with the delegation stateid seems to be rather complex.
>> Here's what I could do (which I mentioned before already):
>> 1. possibly change the client to never use a delegation stateid.
>> 2. for CLOSE/UNLOCK send back ERR_DELAY if copy list is not empty.
>> 3. for DELEGRETURN (or when the server initiates CB_RECALL?) kill any
>> on-going copies. Change the nfsd copy to send a reply if it was killed
>> with a signal. What I'm not sure if when it's killed I get any partial
>> bytes from the VFS, I don't think so. I think in that case, the only
>> thing that I might reply with is a 0bytes copy. And whatever client
>> that did sent a copy with a delegation stateid would deal with
>> restarting it with a different stateid.
>>
>> Having said all that, I'd like to ask why do you think handling
>> CLOSE/LOCKU/DELEGRETURN should be any different for async copy vs what
>> is currently there for the synchronous copy. Right now if the server
>> receives those operations it does nothing to the on-going synchronous
>> copy.
>
> Or for that matter, why should it be different from READ and
> WRITE--looks like those can also continue executing after a close, too.
> So, good point.
>
>> As you noted, the async copy takes a reference on the parent stateid
>> so the state won't go away while the async copy is going on. Why not
>> keep async copy same as the synchronous one?
>
> OK, so maybe it's not such a big deal.
>
> I'm still confused about the delegation case, though: a copy can take a
> very long time, so we can't just wait for the copy to end before
> revoking the delegation.  So do we allow the copy to continue
> indefinitely even after the delegation stateid it was initiated with has
> long ago been revoked?
>
> Maybe it's not a problem.  I guess I'm having trouble remembering why
> copies (or IO in general) uses stateids in the first place....

Sigh. No you are right, we shouldn't let it run long past the
revocation. With IO operations, once it starts an operation like
CLOSE/UNLOCK/DELEGRETURN can arrive at the server and it's ok because
IO isn't long, and once it's done the client will notice a change in
stateid and will choose a different stateid to do the next IO
operation. I guess that's why a synchronous copy is still bordering ok
because it's relatively short and the next copy will choose a
different stateid.

Unless there are any big objections, I'll make the following changes
1. CLOSE/UNLOCK will check for none-empty list and return ERR_DELAY.
2. change the copy code to once received a signal to return the
partial copy back.
3. DELEGRETURN will stop/kill any on-going copies, which would trigger
a partial copy reply and the next copy should choose a different
stateid.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v6 08/10] NFSD handle OFFLOAD_CANCEL op
  2017-10-24 17:47 ` [PATCH v6 08/10] NFSD handle OFFLOAD_CANCEL op Olga Kornievskaia
@ 2018-02-16 17:28   ` Olga Kornievskaia
  2018-02-16 18:10     ` J. Bruce Fields
  0 siblings, 1 reply; 48+ messages in thread
From: Olga Kornievskaia @ 2018-02-16 17:28 UTC (permalink / raw)
  To: Olga Kornievskaia; +Cc: J. Bruce Fields, linux-nfs

On Tue, Oct 24, 2017 at 1:47 PM, Olga Kornievskaia <kolga@netapp.com> wrote:
> Upon receiving OFFLOAD_CANCEL search the list of copy stateids,
> if found then set the SIGPENDING signal so that do_splice stops
> copying and also send kthread_stop to the copy thread to stop
> and wait for it. Take a reference on the copy from the
> offload_cancel thread so that it won't go away while we are
> trying to process it. Server won't be sending CB_OFFLOAD to the
> client since it received a cancel.
>
> Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
> ---
>  fs/nfsd/nfs4proc.c | 46 ++++++++++++++++++++++++++++++++++++++++++++--
>  fs/nfsd/xdr4.h     |  1 +
>  2 files changed, 45 insertions(+), 2 deletions(-)
>
> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> index 6876080..3b0bb54 100644
> --- a/fs/nfsd/nfs4proc.c
> +++ b/fs/nfsd/nfs4proc.c
> @@ -1097,6 +1097,14 @@ static int fill_in_write_vector(struct kvec *vec, struct nfsd4_write *write)
>  out:
>         return status;
>  }
> +
> +static void nfs4_put_copy(struct nfsd4_copy *copy)
> +{
> +       if (!atomic_dec_and_test(&copy->refcount))
> +               return;
> +       kfree(copy);
> +}
> +
>  static void nfsd4_cb_offload_release(struct nfsd4_callback *cb)
>  {
>         struct nfsd4_copy *copy = container_of(cb, struct nfsd4_copy, cp_cb);
> @@ -1134,6 +1142,8 @@ static int _nfsd_copy_file_range(struct nfsd4_copy *copy)
>         u64 dst_pos = copy->cp_dst_pos;
>
>         do {
> +               if (signalled() || kthread_should_stop())
> +                       return -1;
>                 bytes_copied = nfsd_copy_file_range(copy->fh_src, src_pos,
>                                 copy->fh_dst, dst_pos, bytes_total);
>                 if (bytes_copied <= 0)
> @@ -1152,11 +1162,16 @@ static int nfsd4_do_copy(struct nfsd4_copy *copy, bool sync)
>         ssize_t bytes;
>
>         bytes = _nfsd_copy_file_range(copy);
> +       if (signalled() || kthread_should_stop()) {
> +               status = -1;
> +               goto cleanup;
> +       }
>         if (bytes < 0 && !copy->cp_res.wr_bytes_written)
>                 status = nfserrno(bytes);
>         else
>                 status = nfsd4_init_copy_res(copy, sync);
>
> +cleanup:
>         fput(copy->fh_src);
>         fput(copy->fh_dst);
>         return status;
> @@ -1194,7 +1209,7 @@ static void cleanup_async_copy(struct nfsd4_copy *copy)
>         list_del(&copy->copies);
>         spin_unlock(&copy->cp_clp->async_lock);
>         atomic_dec(&copy->cp_clp->cl_refcount);
> -       kfree(copy);
> +       nfs4_put_copy(copy);
>  }
>
>  static int nfsd4_do_async_copy(void *data)
> @@ -1203,6 +1218,9 @@ static int nfsd4_do_async_copy(void *data)
>         struct nfsd4_copy *cb_copy;
>
>         copy->nfserr = nfsd4_do_copy(copy, 0);
> +       if (signalled() || kthread_should_stop())
> +               goto out;
> +
>         cb_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL);
>         if (!cb_copy)
>                 goto out;
> @@ -1259,6 +1277,7 @@ static int nfsd4_do_async_copy(void *data)
>                 memcpy(&copy->cp_res.cb_stateid, &copy->cps->cp_stateid,
>                         sizeof(copy->cps->cp_stateid));
>                 dup_copy_fields(copy, async_copy);
> +               atomic_set(&async_copy->refcount, 1);
>                 spin_lock(&async_copy->cp_clp->async_lock);
>                 list_add(&async_copy->copies,
>                                 &async_copy->cp_clp->async_copies);
> @@ -1285,7 +1304,30 @@ static int nfsd4_do_async_copy(void *data)
>                      struct nfsd4_compound_state *cstate,
>                      union nfsd4_op_u *u)
>  {
> -       return 0;
> +       struct nfsd4_offload_status *os = &u->offload_status;
> +       __be32 status = 0;
> +       struct nfsd4_copy *copy;
> +       bool found = false;
> +       struct nfs4_client *clp = cstate->clp;
> +
> +       spin_lock(&clp->async_lock);
> +       list_for_each_entry(copy, &clp->async_copies, copies) {
> +               if (memcmp(&copy->cps->cp_stateid, &os->stateid,
> +                               NFS4_STATEID_SIZE))
> +                       continue;
> +               found = true;
> +               atomic_inc(&copy->refcount);
> +               break;
> +       }
> +       spin_unlock(&clp->async_lock);
> +       if (found) {
> +               set_tsk_thread_flag(copy->copy_task, TIF_SIGPENDING);
> +               kthread_stop(copy->copy_task);
> +               nfs4_put_copy(copy);
> +       } else
> +               status = nfserr_bad_stateid;
> +
> +       return status;
>  }
>
>  static __be32
> diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
> index 27bac6a..3127b58 100644
> --- a/fs/nfsd/xdr4.h
> +++ b/fs/nfsd/xdr4.h
> @@ -540,6 +540,7 @@ struct nfsd4_copy {
>
>         struct list_head        copies;
>         struct task_struct      *copy_task;
> +       atomic_t                refcount;

Hi Bruce,

Is the code moving away from using atomic_t and using refcount_t
instead. Should I be changing this as well?

>  };
>
>  struct nfsd4_seek {
> --
> 1.8.3.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v6 08/10] NFSD handle OFFLOAD_CANCEL op
  2018-02-16 17:28   ` Olga Kornievskaia
@ 2018-02-16 18:10     ` J. Bruce Fields
  0 siblings, 0 replies; 48+ messages in thread
From: J. Bruce Fields @ 2018-02-16 18:10 UTC (permalink / raw)
  To: Olga Kornievskaia; +Cc: Olga Kornievskaia, linux-nfs

On Fri, Feb 16, 2018 at 12:28:19PM -0500, Olga Kornievskaia wrote:
> On Tue, Oct 24, 2017 at 1:47 PM, Olga Kornievskaia <kolga@netapp.com> wrote:
> > Upon receiving OFFLOAD_CANCEL search the list of copy stateids,
> > if found then set the SIGPENDING signal so that do_splice stops
> > copying and also send kthread_stop to the copy thread to stop
> > and wait for it. Take a reference on the copy from the
> > offload_cancel thread so that it won't go away while we are
> > trying to process it. Server won't be sending CB_OFFLOAD to the
> > client since it received a cancel.
> >
> > Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
> > ---
> >  fs/nfsd/nfs4proc.c | 46 ++++++++++++++++++++++++++++++++++++++++++++--
> >  fs/nfsd/xdr4.h     |  1 +
> >  2 files changed, 45 insertions(+), 2 deletions(-)
> >
> > diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> > index 6876080..3b0bb54 100644
> > --- a/fs/nfsd/nfs4proc.c
> > +++ b/fs/nfsd/nfs4proc.c
> > @@ -1097,6 +1097,14 @@ static int fill_in_write_vector(struct kvec *vec, struct nfsd4_write *write)
> >  out:
> >         return status;
> >  }
> > +
> > +static void nfs4_put_copy(struct nfsd4_copy *copy)
> > +{
> > +       if (!atomic_dec_and_test(&copy->refcount))
> > +               return;
> > +       kfree(copy);
> > +}
> > +
> >  static void nfsd4_cb_offload_release(struct nfsd4_callback *cb)
> >  {
> >         struct nfsd4_copy *copy = container_of(cb, struct nfsd4_copy, cp_cb);
> > @@ -1134,6 +1142,8 @@ static int _nfsd_copy_file_range(struct nfsd4_copy *copy)
> >         u64 dst_pos = copy->cp_dst_pos;
> >
> >         do {
> > +               if (signalled() || kthread_should_stop())
> > +                       return -1;
> >                 bytes_copied = nfsd_copy_file_range(copy->fh_src, src_pos,
> >                                 copy->fh_dst, dst_pos, bytes_total);
> >                 if (bytes_copied <= 0)
> > @@ -1152,11 +1162,16 @@ static int nfsd4_do_copy(struct nfsd4_copy *copy, bool sync)
> >         ssize_t bytes;
> >
> >         bytes = _nfsd_copy_file_range(copy);
> > +       if (signalled() || kthread_should_stop()) {
> > +               status = -1;
> > +               goto cleanup;
> > +       }
> >         if (bytes < 0 && !copy->cp_res.wr_bytes_written)
> >                 status = nfserrno(bytes);
> >         else
> >                 status = nfsd4_init_copy_res(copy, sync);
> >
> > +cleanup:
> >         fput(copy->fh_src);
> >         fput(copy->fh_dst);
> >         return status;
> > @@ -1194,7 +1209,7 @@ static void cleanup_async_copy(struct nfsd4_copy *copy)
> >         list_del(&copy->copies);
> >         spin_unlock(&copy->cp_clp->async_lock);
> >         atomic_dec(&copy->cp_clp->cl_refcount);
> > -       kfree(copy);
> > +       nfs4_put_copy(copy);
> >  }
> >
> >  static int nfsd4_do_async_copy(void *data)
> > @@ -1203,6 +1218,9 @@ static int nfsd4_do_async_copy(void *data)
> >         struct nfsd4_copy *cb_copy;
> >
> >         copy->nfserr = nfsd4_do_copy(copy, 0);
> > +       if (signalled() || kthread_should_stop())
> > +               goto out;
> > +
> >         cb_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL);
> >         if (!cb_copy)
> >                 goto out;
> > @@ -1259,6 +1277,7 @@ static int nfsd4_do_async_copy(void *data)
> >                 memcpy(&copy->cp_res.cb_stateid, &copy->cps->cp_stateid,
> >                         sizeof(copy->cps->cp_stateid));
> >                 dup_copy_fields(copy, async_copy);
> > +               atomic_set(&async_copy->refcount, 1);
> >                 spin_lock(&async_copy->cp_clp->async_lock);
> >                 list_add(&async_copy->copies,
> >                                 &async_copy->cp_clp->async_copies);
> > @@ -1285,7 +1304,30 @@ static int nfsd4_do_async_copy(void *data)
> >                      struct nfsd4_compound_state *cstate,
> >                      union nfsd4_op_u *u)
> >  {
> > -       return 0;
> > +       struct nfsd4_offload_status *os = &u->offload_status;
> > +       __be32 status = 0;
> > +       struct nfsd4_copy *copy;
> > +       bool found = false;
> > +       struct nfs4_client *clp = cstate->clp;
> > +
> > +       spin_lock(&clp->async_lock);
> > +       list_for_each_entry(copy, &clp->async_copies, copies) {
> > +               if (memcmp(&copy->cps->cp_stateid, &os->stateid,
> > +                               NFS4_STATEID_SIZE))
> > +                       continue;
> > +               found = true;
> > +               atomic_inc(&copy->refcount);
> > +               break;
> > +       }
> > +       spin_unlock(&clp->async_lock);
> > +       if (found) {
> > +               set_tsk_thread_flag(copy->copy_task, TIF_SIGPENDING);
> > +               kthread_stop(copy->copy_task);
> > +               nfs4_put_copy(copy);
> > +       } else
> > +               status = nfserr_bad_stateid;
> > +
> > +       return status;
> >  }
> >
> >  static __be32
> > diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
> > index 27bac6a..3127b58 100644
> > --- a/fs/nfsd/xdr4.h
> > +++ b/fs/nfsd/xdr4.h
> > @@ -540,6 +540,7 @@ struct nfsd4_copy {
> >
> >         struct list_head        copies;
> >         struct task_struct      *copy_task;
> > +       atomic_t                refcount;
> 
> Hi Bruce,
> 
> Is the code moving away from using atomic_t and using refcount_t
> instead. Should I be changing this as well?

Yeah, that would be a good idea, thanks.

--b.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v6 07/10] NFSD create new stateid for async copy
  2018-02-16 16:06             ` Olga Kornievskaia
@ 2018-02-16 18:12               ` J. Bruce Fields
  2018-02-16 20:53                 ` Olga Kornievskaia
  0 siblings, 1 reply; 48+ messages in thread
From: J. Bruce Fields @ 2018-02-16 18:12 UTC (permalink / raw)
  To: Olga Kornievskaia; +Cc: J. Bruce Fields, Olga Kornievskaia, linux-nfs

On Fri, Feb 16, 2018 at 11:06:07AM -0500, Olga Kornievskaia wrote:
> On Thu, Feb 15, 2018 at 8:43 PM, J. Bruce Fields <bfields@redhat.com> wrote:
> > On Thu, Feb 15, 2018 at 05:18:55PM -0500, Olga Kornievskaia wrote:
> >> On Fri, Feb 2, 2018 at 4:45 PM, J. Bruce Fields <bfields@redhat.com> wrote:
> >> > Is it really OK to do a COPY with a delegation stateid?  That sounds
> >> > messy if the delegation is revoked partway through the copy.
> >>
> >> Yes, according to the spec, it is ok to use any of the stateids.
> >>
> >> RFC 7862 15.2.3
> >> The ca_dst_stateid MUST refer to a stateid that is valid for a WRITE
> >>    operation and follows the rules for stateids in Sections 8.2.5 and
> >>    18.32.3 of [RFC5661].... while for an intra-server copy
> >> ca_src_stateid MUST refer
> >>    to a stateid that is valid for a READ operation and follows the rules
> >>    for stateids in Sections 8.2.5 and 18.22.3 of [RFC5661].
> >>
> >> 8.2.5 is the section that says in the decreasing priority to choose
> >> delegation stateid, locking stateid, open stateid.
> >>
> >> Trying to deal with the delegation stateid seems to be rather complex.
> >> Here's what I could do (which I mentioned before already):
> >> 1. possibly change the client to never use a delegation stateid.
> >> 2. for CLOSE/UNLOCK send back ERR_DELAY if copy list is not empty.
> >> 3. for DELEGRETURN (or when the server initiates CB_RECALL?) kill any
> >> on-going copies. Change the nfsd copy to send a reply if it was killed
> >> with a signal. What I'm not sure if when it's killed I get any partial
> >> bytes from the VFS, I don't think so. I think in that case, the only
> >> thing that I might reply with is a 0bytes copy. And whatever client
> >> that did sent a copy with a delegation stateid would deal with
> >> restarting it with a different stateid.
> >>
> >> Having said all that, I'd like to ask why do you think handling
> >> CLOSE/LOCKU/DELEGRETURN should be any different for async copy vs what
> >> is currently there for the synchronous copy. Right now if the server
> >> receives those operations it does nothing to the on-going synchronous
> >> copy.
> >
> > Or for that matter, why should it be different from READ and
> > WRITE--looks like those can also continue executing after a close, too.
> > So, good point.
> >
> >> As you noted, the async copy takes a reference on the parent stateid
> >> so the state won't go away while the async copy is going on. Why not
> >> keep async copy same as the synchronous one?
> >
> > OK, so maybe it's not such a big deal.
> >
> > I'm still confused about the delegation case, though: a copy can take a
> > very long time, so we can't just wait for the copy to end before
> > revoking the delegation.  So do we allow the copy to continue
> > indefinitely even after the delegation stateid it was initiated with has
> > long ago been revoked?
> >
> > Maybe it's not a problem.  I guess I'm having trouble remembering why
> > copies (or IO in general) uses stateids in the first place....
> 
> Sigh. No you are right, we shouldn't let it run long past the
> revocation.

I'm worried by it running long past the revocation, but I'm also not
managing to come with a case where it causes an actual problem.

--b.

> With IO operations, once it starts an operation like
> CLOSE/UNLOCK/DELEGRETURN can arrive at the server and it's ok because
> IO isn't long, and once it's done the client will notice a change in
> stateid and will choose a different stateid to do the next IO
> operation. I guess that's why a synchronous copy is still bordering ok
> because it's relatively short and the next copy will choose a
> different stateid.
> 
> Unless there are any big objections, I'll make the following changes
> 1. CLOSE/UNLOCK will check for none-empty list and return ERR_DELAY.
> 2. change the copy code to once received a signal to return the
> partial copy back.
> 3. DELEGRETURN will stop/kill any on-going copies, which would trigger
> a partial copy reply and the next copy should choose a different
> stateid.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v6 07/10] NFSD create new stateid for async copy
  2018-02-16 18:12               ` J. Bruce Fields
@ 2018-02-16 20:53                 ` Olga Kornievskaia
  2018-02-20 18:48                   ` J. Bruce Fields
  0 siblings, 1 reply; 48+ messages in thread
From: Olga Kornievskaia @ 2018-02-16 20:53 UTC (permalink / raw)
  To: J. Bruce Fields; +Cc: J. Bruce Fields, Olga Kornievskaia, linux-nfs

On Fri, Feb 16, 2018 at 1:12 PM, J. Bruce Fields <bfields@redhat.com> wrote:
> On Fri, Feb 16, 2018 at 11:06:07AM -0500, Olga Kornievskaia wrote:
>> On Thu, Feb 15, 2018 at 8:43 PM, J. Bruce Fields <bfields@redhat.com> wrote:
>> > On Thu, Feb 15, 2018 at 05:18:55PM -0500, Olga Kornievskaia wrote:
>> >> On Fri, Feb 2, 2018 at 4:45 PM, J. Bruce Fields <bfields@redhat.com> wrote:
>> >> > Is it really OK to do a COPY with a delegation stateid?  That sounds
>> >> > messy if the delegation is revoked partway through the copy.
>> >>
>> >> Yes, according to the spec, it is ok to use any of the stateids.
>> >>
>> >> RFC 7862 15.2.3
>> >> The ca_dst_stateid MUST refer to a stateid that is valid for a WRITE
>> >>    operation and follows the rules for stateids in Sections 8.2.5 and
>> >>    18.32.3 of [RFC5661].... while for an intra-server copy
>> >> ca_src_stateid MUST refer
>> >>    to a stateid that is valid for a READ operation and follows the rules
>> >>    for stateids in Sections 8.2.5 and 18.22.3 of [RFC5661].
>> >>
>> >> 8.2.5 is the section that says in the decreasing priority to choose
>> >> delegation stateid, locking stateid, open stateid.
>> >>
>> >> Trying to deal with the delegation stateid seems to be rather complex.
>> >> Here's what I could do (which I mentioned before already):
>> >> 1. possibly change the client to never use a delegation stateid.
>> >> 2. for CLOSE/UNLOCK send back ERR_DELAY if copy list is not empty.
>> >> 3. for DELEGRETURN (or when the server initiates CB_RECALL?) kill any
>> >> on-going copies. Change the nfsd copy to send a reply if it was killed
>> >> with a signal. What I'm not sure if when it's killed I get any partial
>> >> bytes from the VFS, I don't think so. I think in that case, the only
>> >> thing that I might reply with is a 0bytes copy. And whatever client
>> >> that did sent a copy with a delegation stateid would deal with
>> >> restarting it with a different stateid.
>> >>
>> >> Having said all that, I'd like to ask why do you think handling
>> >> CLOSE/LOCKU/DELEGRETURN should be any different for async copy vs what
>> >> is currently there for the synchronous copy. Right now if the server
>> >> receives those operations it does nothing to the on-going synchronous
>> >> copy.
>> >
>> > Or for that matter, why should it be different from READ and
>> > WRITE--looks like those can also continue executing after a close, too.
>> > So, good point.
>> >
>> >> As you noted, the async copy takes a reference on the parent stateid
>> >> so the state won't go away while the async copy is going on. Why not
>> >> keep async copy same as the synchronous one?
>> >
>> > OK, so maybe it's not such a big deal.
>> >
>> > I'm still confused about the delegation case, though: a copy can take a
>> > very long time, so we can't just wait for the copy to end before
>> > revoking the delegation.  So do we allow the copy to continue
>> > indefinitely even after the delegation stateid it was initiated with has
>> > long ago been revoked?
>> >
>> > Maybe it's not a problem.  I guess I'm having trouble remembering why
>> > copies (or IO in general) uses stateids in the first place....
>>
>> Sigh. No you are right, we shouldn't let it run long past the
>> revocation.
>
> I'm worried by it running long past the revocation, but I'm also not
> managing to come with a case where it causes an actual problem.

It seems wrong but I'm not sure exactly why. I thinking what if the
server receives all state closing operations (delegreturn, unlock,
close) and the copy is still going. A file is being changed after the
file is closed seem weird. But as I mentioned, properly working client
should be unlocking/closing the file until after the copy is done (or
until offload_cancel is sent after ctrl-c).

Who is responsible for making sure that the file isn't being accessed
after it's closed? Is it server's or client's? Like client is making
sure all the writes are done before it does the close. Should it be
client's responsibility as well to make sure there are no on-going
copies before doing the close?

Is the question here about just delegations or are you questioning
whether or not CLOSE/UNLOCK need to be delayed until the copy is
finished?


> --b.
>
>> With IO operations, once it starts an operation like
>> CLOSE/UNLOCK/DELEGRETURN can arrive at the server and it's ok because
>> IO isn't long, and once it's done the client will notice a change in
>> stateid and will choose a different stateid to do the next IO
>> operation. I guess that's why a synchronous copy is still bordering ok
>> because it's relatively short and the next copy will choose a
>> different stateid.
>>
>> Unless there are any big objections, I'll make the following changes
>> 1. CLOSE/UNLOCK will check for none-empty list and return ERR_DELAY.
>> 2. change the copy code to once received a signal to return the
>> partial copy back.
>> 3. DELEGRETURN will stop/kill any on-going copies, which would trigger
>> a partial copy reply and the next copy should choose a different
>> stateid.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v6 07/10] NFSD create new stateid for async copy
  2018-02-16 20:53                 ` Olga Kornievskaia
@ 2018-02-20 18:48                   ` J. Bruce Fields
  2018-03-06 17:15                     ` Olga Kornievskaia
  0 siblings, 1 reply; 48+ messages in thread
From: J. Bruce Fields @ 2018-02-20 18:48 UTC (permalink / raw)
  To: Olga Kornievskaia; +Cc: J. Bruce Fields, Olga Kornievskaia, linux-nfs

On Fri, Feb 16, 2018 at 03:53:05PM -0500, Olga Kornievskaia wrote:
> It seems wrong but I'm not sure exactly why. I thinking what if the
> server receives all state closing operations (delegreturn, unlock,
> close) and the copy is still going. A file is being changed after the
> file is closed seem weird. But as I mentioned, properly working client
> should be unlocking/closing the file until after the copy is done (or
> until offload_cancel is sent after ctrl-c).
> 
> Who is responsible for making sure that the file isn't being accessed
> after it's closed? Is it server's or client's? Like client is making
> sure all the writes are done before it does the close. Should it be
> client's responsibility as well to make sure there are no on-going
> copies before doing the close?
> 
> Is the question here about just delegations or are you questioning
> whether or not CLOSE/UNLOCK need to be delayed until the copy is
> finished?

Yeah, I kinda suspect there's a bug in the CLOSE and UNLOCK case too.

I don't know if the client really causes trouble for anyone but itself
if it allows IO to go on past close, unlock, or delegreturn.

If it's still going on after another client acquires a conflicting open,
lock, or delegation, that could be a problem: a client that holds a
mandatory lock, or an open with DENY_WRITE, or a delegation, has a right
not to expect the file to change underneath it.  We should already be
safe against that in the delegation case thanks to the vfs checks in
fs/locks.c:check_conflicting_open().

In the DENY_WRITE case I bet there's still bug.  Maybe in the mandatory
lock case, too.  But those cases are rare and already have other
problems.

So, I guess I don't care too much unless someone's seeing another
problem.

Still, killing off a long-running copy is probably a good precaution?

--b.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v6 07/10] NFSD create new stateid for async copy
  2018-02-20 18:48                   ` J. Bruce Fields
@ 2018-03-06 17:15                     ` Olga Kornievskaia
  2018-03-06 19:33                       ` J. Bruce Fields
  0 siblings, 1 reply; 48+ messages in thread
From: Olga Kornievskaia @ 2018-03-06 17:15 UTC (permalink / raw)
  To: J. Bruce Fields; +Cc: J. Bruce Fields, Olga Kornievskaia, linux-nfs

On Tue, Feb 20, 2018 at 1:48 PM, J. Bruce Fields <bfields@fieldses.org> wrote:
> On Fri, Feb 16, 2018 at 03:53:05PM -0500, Olga Kornievskaia wrote:
>> It seems wrong but I'm not sure exactly why. I thinking what if the
>> server receives all state closing operations (delegreturn, unlock,
>> close) and the copy is still going. A file is being changed after the
>> file is closed seem weird. But as I mentioned, properly working client
>> should be unlocking/closing the file until after the copy is done (or
>> until offload_cancel is sent after ctrl-c).
>>
>> Who is responsible for making sure that the file isn't being accessed
>> after it's closed? Is it server's or client's? Like client is making
>> sure all the writes are done before it does the close. Should it be
>> client's responsibility as well to make sure there are no on-going
>> copies before doing the close?
>>
>> Is the question here about just delegations or are you questioning
>> whether or not CLOSE/UNLOCK need to be delayed until the copy is
>> finished?
>
> Yeah, I kinda suspect there's a bug in the CLOSE and UNLOCK case too.
>
> I don't know if the client really causes trouble for anyone but itself
> if it allows IO to go on past close, unlock, or delegreturn.
>
> If it's still going on after another client acquires a conflicting open,
> lock, or delegation, that could be a problem: a client that holds a
> mandatory lock, or an open with DENY_WRITE, or a delegation, has a right
> not to expect the file to change underneath it.  We should already be
> safe against that in the delegation case thanks to the vfs checks in
> fs/locks.c:check_conflicting_open().
>
> In the DENY_WRITE case I bet there's still bug.  Maybe in the mandatory
> lock case, too.  But those cases are rare and already have other
> problems.
>
> So, I guess I don't care too much unless someone's seeing another
> problem.
>
> Still, killing off a long-running copy is probably a good precaution?

Now that I got back into the code and remembered it again, copy
stateid is never associated with a delegation stateid. Copy is
associated with the destination file's stateid which is opened for
write. Since linux server does not give out write delegations, then
the stateid is either an open stateid or lock stateid (in my testing I
varied not locking and locking the destination file so to get open
stateid as well as lock stateid to be used in the copy). If you want
me to keep the code for killing off copy in the delegreturn because in
the future there might be write delegations, I can but I recall the
rule of thumb is to keep it simple, thus I'm inclined to remove it.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v6 07/10] NFSD create new stateid for async copy
  2018-03-06 17:15                     ` Olga Kornievskaia
@ 2018-03-06 19:33                       ` J. Bruce Fields
  0 siblings, 0 replies; 48+ messages in thread
From: J. Bruce Fields @ 2018-03-06 19:33 UTC (permalink / raw)
  To: Olga Kornievskaia; +Cc: J. Bruce Fields, Olga Kornievskaia, linux-nfs

On Tue, Mar 06, 2018 at 12:15:59PM -0500, Olga Kornievskaia wrote:
> On Tue, Feb 20, 2018 at 1:48 PM, J. Bruce Fields <bfields@fieldses.org> wrote:
> > Yeah, I kinda suspect there's a bug in the CLOSE and UNLOCK case too.
> >
> > I don't know if the client really causes trouble for anyone but itself
> > if it allows IO to go on past close, unlock, or delegreturn.
> >
> > If it's still going on after another client acquires a conflicting open,
> > lock, or delegation, that could be a problem: a client that holds a
> > mandatory lock, or an open with DENY_WRITE, or a delegation, has a right
> > not to expect the file to change underneath it.  We should already be
> > safe against that in the delegation case thanks to the vfs checks in
> > fs/locks.c:check_conflicting_open().
> >
> > In the DENY_WRITE case I bet there's still bug.  Maybe in the mandatory
> > lock case, too.  But those cases are rare and already have other
> > problems.
> >
> > So, I guess I don't care too much unless someone's seeing another
> > problem.
> >
> > Still, killing off a long-running copy is probably a good precaution?
> 
> Now that I got back into the code and remembered it again, copy
> stateid is never associated with a delegation stateid. Copy is
> associated with the destination file's stateid which is opened for
> write. Since linux server does not give out write delegations, then
> the stateid is either an open stateid or lock stateid (in my testing I
> varied not locking and locking the destination file so to get open
> stateid as well as lock stateid to be used in the copy). If you want
> me to keep the code for killing off copy in the delegreturn because in
> the future there might be write delegations, I can but I recall the
> rule of thumb is to keep it simple, thus I'm inclined to remove it.

I'm hoping to get to write delegations soon....  And you can actually
write while holding a read delegation.  (Currently knfsd breaks the
delegation in that case, but I'm fixing that.)

That said, I still haven't convinced myself there's any real issue here.
So, I'm fine with leaving that out.

--b.

^ permalink raw reply	[flat|nested] 48+ messages in thread

end of thread, other threads:[~2018-03-06 19:33 UTC | newest]

Thread overview: 48+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-10-24 17:47 [PATCH v6 00/10] NFSD support for asynchronous COPY Olga Kornievskaia
2017-10-24 17:47 ` [PATCH v6 01/10] NFSD CB_OFFLOAD xdr Olga Kornievskaia
2018-01-25 16:43   ` J. Bruce Fields
2018-01-26 15:16     ` Olga Kornievskaia
2017-10-24 17:47 ` [PATCH v6 02/10] NFSD OFFLOAD_STATUS xdr Olga Kornievskaia
2017-10-24 17:47 ` [PATCH v6 03/10] NFSD OFFLOAD_CANCEL xdr Olga Kornievskaia
2017-10-24 17:47 ` [PATCH v6 04/10] NFSD xdr callback stateid in async COPY reply Olga Kornievskaia
2017-10-24 17:47 ` [PATCH v6 05/10] NFSD first draft of async copy Olga Kornievskaia
2018-01-25 22:04   ` J. Bruce Fields
2018-01-26 15:17     ` Olga Kornievskaia
2018-02-15 19:59     ` Olga Kornievskaia
2018-02-15 20:06       ` J. Bruce Fields
2018-01-25 22:29   ` J. Bruce Fields
2018-01-26 15:17     ` Olga Kornievskaia
2018-01-26 21:34   ` J. Bruce Fields
2018-02-02 19:50     ` Olga Kornievskaia
2018-02-02 19:55       ` J. Bruce Fields
2017-10-24 17:47 ` [PATCH v6 06/10] NFSD return nfs4_stid in nfs4_preprocess_stateid_op Olga Kornievskaia
2017-10-24 17:47 ` [PATCH v6 07/10] NFSD create new stateid for async copy Olga Kornievskaia
2018-01-26 21:37   ` J. Bruce Fields
2018-01-26 21:59   ` J. Bruce Fields
2018-02-02 20:45     ` Olga Kornievskaia
2018-02-02 21:45       ` J. Bruce Fields
2018-02-15 22:18         ` Olga Kornievskaia
2018-02-16  1:43           ` J. Bruce Fields
2018-02-16 16:06             ` Olga Kornievskaia
2018-02-16 18:12               ` J. Bruce Fields
2018-02-16 20:53                 ` Olga Kornievskaia
2018-02-20 18:48                   ` J. Bruce Fields
2018-03-06 17:15                     ` Olga Kornievskaia
2018-03-06 19:33                       ` J. Bruce Fields
2017-10-24 17:47 ` [PATCH v6 08/10] NFSD handle OFFLOAD_CANCEL op Olga Kornievskaia
2018-02-16 17:28   ` Olga Kornievskaia
2018-02-16 18:10     ` J. Bruce Fields
2017-10-24 17:47 ` [PATCH v6 09/10] NFSD support OFFLOAD_STATUS Olga Kornievskaia
2017-10-24 17:47 ` [PATCH v6 10/10] NFSD stop queued async copies on client shutdown Olga Kornievskaia
2018-01-25 22:22   ` J. Bruce Fields
2018-01-26 15:17     ` Olga Kornievskaia
2017-11-03 19:57 ` [PATCH v6 00/10] NFSD support for asynchronous COPY Olga Kornievskaia
2017-11-10 15:01   ` Olga Kornievskaia
2017-11-14  0:48     ` J. Bruce Fields
2017-11-28 20:28       ` Olga Kornievskaia
2017-11-30 20:18         ` J. Bruce Fields
2017-11-30 23:03           ` Olga Kornievskaia
2017-12-04 21:32             ` J. Bruce Fields
     [not found]               ` <CAN-5tyEVSwBmPMtUBJYDdLi7FK2MNMGuDQrrsvp776zD3Jcw0w@mail.gmail.com>
2018-01-22 16:51                 ` Olga Kornievskaia
2018-01-25 22:33                   ` J. Bruce Fields
2018-01-26 15:16                     ` Olga Kornievskaia

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.