All of lore.kernel.org
 help / color / mirror / Atom feed
From: Bryan Schumaker <bjschuma@netapp.com>
To: "J. Bruce Fields" <bfields@fieldses.org>
Cc: <Trond.Myklebust@netapp.com>, <linux-nfs@vger.kernel.org>
Subject: Re: [RFC 4/5] NFSD: Defer copying
Date: Mon, 22 Jul 2013 15:17:29 -0400	[thread overview]
Message-ID: <51ED8549.3040308@netapp.com> (raw)
In-Reply-To: <20130722185002.GB10109@fieldses.org>

On 07/22/2013 02:50 PM, J. Bruce Fields wrote:
> On Fri, Jul 19, 2013 at 05:03:49PM -0400, bjschuma@netapp.com wrote:
>> From: Bryan Schumaker <bjschuma@netapp.com>
>>
>> Rather than performing the copy right away, schedule it to run later and
>> reply to the client.  Later, send a callback to notify the client that
>> the copy has finished.
> 
> I believe you need to implement the referring triple support described
> in http://tools.ietf.org/html/rfc5661#section-2.10.6.3 to fix the race
> described in
> http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion2-19#section-15.1.3
> .

I'll re-read and re-write.

> 
> I see cb_delay initialized below, but not otherwise used.  Am I missing
> anything?

Whoops!  I was using that earlier to try to fake up a callback, but I eventually decided it's easier to just do the copy asynchronously.  I must have forgotten to take it out :(

> 
> What about OFFLOAD_STATUS and OFFLOAD_ABORT?

I haven't thought out those too much... I haven't thought about a use for them on the client yet.

> 
> In some common cases the reply will be very quick, and we might be
> better off handling it synchronously.  Could we implement a heuristic
> like "copy synchronously if the filesystem has special support or the
> range is less than the maximum iosize, otherwise copy asynchronously"?

I'm sure that can be done, I'm just not sure how to do it yet...

> 
> --b.
> 
> 
>> Signed-off-by: Bryan Schumaker <bjschuma@netapp.com>
>> ---
>>  fs/nfsd/nfs4callback.c | 136 +++++++++++++++++++++++++++++++++++++++++++++++++
>>  fs/nfsd/nfs4proc.c     |  59 +++++++++++++++------
>>  fs/nfsd/nfs4state.c    |  11 ++++
>>  fs/nfsd/state.h        |  21 ++++++++
>>  fs/nfsd/xdr4cb.h       |   9 ++++
>>  5 files changed, 221 insertions(+), 15 deletions(-)
>>
>> diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
>> index 7f05cd1..8f797e1 100644
>> --- a/fs/nfsd/nfs4callback.c
>> +++ b/fs/nfsd/nfs4callback.c
>> @@ -52,6 +52,9 @@ enum {
>>  	NFSPROC4_CLNT_CB_NULL = 0,
>>  	NFSPROC4_CLNT_CB_RECALL,
>>  	NFSPROC4_CLNT_CB_SEQUENCE,
>> +
>> +	/* NFS v4.2 callback */
>> +	NFSPROC4_CLNT_CB_OFFLOAD,
>>  };
>>  
>>  struct nfs4_cb_compound_hdr {
>> @@ -110,6 +113,7 @@ enum nfs_cb_opnum4 {
>>  	OP_CB_WANTS_CANCELLED		= 12,
>>  	OP_CB_NOTIFY_LOCK		= 13,
>>  	OP_CB_NOTIFY_DEVICEID		= 14,
>> +	OP_CB_OFFLOAD			= 15,
>>  	OP_CB_ILLEGAL			= 10044
>>  };
>>  
>> @@ -469,6 +473,31 @@ out_default:
>>  	return nfs_cb_stat_to_errno(nfserr);
>>  }
>>  
>> +static void encode_cb_offload4args(struct xdr_stream *xdr,
>> +				   const struct nfs4_cb_offload *offload,
>> +				   struct nfs4_cb_compound_hdr *hdr)
>> +{
>> +	__be32 *p;
>> +
>> +	if (hdr->minorversion < 2)
>> +		return;
>> +
>> +	encode_nfs_cb_opnum4(xdr, OP_CB_OFFLOAD);
>> +	encode_nfs_fh4(xdr, &offload->co_dst_fh);
>> +	encode_stateid4(xdr, &offload->co_stid->sc_stateid);
>> +
>> +	p = xdr_reserve_space(xdr, 4);
>> +	*p = cpu_to_be32(1);
>> +	encode_stateid4(xdr, &offload->co_stid->sc_stateid);
>> +
>> +	p = xdr_reserve_space(xdr, 12 + NFS4_VERIFIER_SIZE);
>> +	p = xdr_encode_hyper(p, offload->co_count);
>> +	*p++ = cpu_to_be32(offload->co_stable_how);
>> +	xdr_encode_opaque_fixed(p, offload->co_verifier.data, NFS4_VERIFIER_SIZE);
>> +
>> +	hdr->nops++;
>> +}
>> +
>>  /*
>>   * NFSv4.0 and NFSv4.1 XDR encode functions
>>   *
>> @@ -505,6 +534,23 @@ static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr,
>>  	encode_cb_nops(&hdr);
>>  }
>>  
>> +/*
>> + * CB_OFFLOAD
>> + */
>> +static void nfs4_xdr_enc_cb_offload(struct rpc_rqst *req, struct xdr_stream *xdr,
>> +				    const struct nfsd4_callback *cb)
>> +{
>> +	const struct nfs4_cb_offload *args = cb->cb_op;
>> +	struct nfs4_cb_compound_hdr hdr = {
>> +		.ident = cb->cb_clp->cl_cb_ident,
>> +		.minorversion = cb->cb_minorversion,
>> +	};
>> +
>> +	encode_cb_compound4args(xdr, &hdr);
>> +	encode_cb_sequence4args(xdr, cb, &hdr);
>> +	encode_cb_offload4args(xdr, args, &hdr);
>> +	encode_cb_nops(&hdr);
>> +}
>>  
>>  /*
>>   * NFSv4.0 and NFSv4.1 XDR decode functions
>> @@ -552,6 +598,36 @@ out:
>>  }
>>  
>>  /*
>> + * CB_OFFLOAD
>> + */
>> +static int nfs4_xdr_dec_cb_offload(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
>> +				   struct nfsd4_callback *cb)
>> +{
>> +	struct nfs4_cb_compound_hdr hdr;
>> +	enum nfsstat4 nfserr;
>> +	int status;
>> +
>> +	status = decode_cb_compound4res(xdr, &hdr);
>> +	if (unlikely(status))
>> +		goto out;
>> +
>> +	if (cb != NULL) {
>> +		status = decode_cb_sequence4res(xdr, cb);
>> +		if (unlikely(status))
>> +			goto out;
>> +	}
>> +
>> +	status = decode_cb_op_status(xdr, OP_CB_OFFLOAD, &nfserr);
>> +	if (unlikely(status))
>> +		goto out;
>> +	if (unlikely(nfserr != NFS4_OK))
>> +		status = nfs_cb_stat_to_errno(nfserr);
>> +
>> +out:
>> +	return status;
>> +}
>> +
>> +/*
>>   * RPC procedure tables
>>   */
>>  #define PROC(proc, call, argtype, restype)				\
>> @@ -568,6 +644,7 @@ out:
>>  static struct rpc_procinfo nfs4_cb_procedures[] = {
>>  	PROC(CB_NULL,	NULL,		cb_null,	cb_null),
>>  	PROC(CB_RECALL,	COMPOUND,	cb_recall,	cb_recall),
>> +	PROC(CB_OFFLOAD, COMPOUND,	cb_offload,	cb_offload),
>>  };
>>  
>>  static struct rpc_version nfs_cb_version4 = {
>> @@ -1017,6 +1094,11 @@ void nfsd4_init_callback(struct nfsd4_callback *cb)
>>  	INIT_WORK(&cb->cb_work, nfsd4_do_callback_rpc);
>>  }
>>  
>> +void nfsd4_init_delayed_callback(struct nfsd4_callback *cb)
>> +{
>> +	INIT_DELAYED_WORK(&cb->cb_delay, nfsd4_do_callback_rpc);
>> +}
>> +
>>  void nfsd4_cb_recall(struct nfs4_delegation *dp)
>>  {
>>  	struct nfsd4_callback *cb = &dp->dl_recall;
>> @@ -1036,3 +1118,57 @@ void nfsd4_cb_recall(struct nfs4_delegation *dp)
>>  
>>  	run_nfsd4_cb(&dp->dl_recall);
>>  }
>> +
>> +static void nfsd4_cb_offload_done(struct rpc_task *task, void *calldata)
>> +{
>> +	struct nfsd4_callback *cb = calldata;
>> +	struct nfs4_client *clp = cb->cb_clp;
>> +	struct rpc_clnt *current_rpc_client = clp->cl_cb_client;
>> +
>> +	nfsd4_cb_done(task, calldata);
>> +
>> +	if (current_rpc_client != task->tk_client)
>> +		return;
>> +
>> +	if (cb->cb_done)
>> +		return;
>> +
>> +	if (task->tk_status != 0)
>> +		nfsd4_mark_cb_down(clp, task->tk_status);
>> +	cb->cb_done = true;
>> +}
>> +
>> +static void nfsd4_cb_offload_release(void *calldata)
>> +{
>> +	struct nfsd4_callback *cb = calldata;
>> +	struct nfs4_cb_offload *offload = container_of(cb, struct nfs4_cb_offload, co_callback);
>> +
>> +	if (cb->cb_done) {
>> +		nfs4_free_offload_stateid(offload->co_stid);
>> +		kfree(offload);
>> +	}
>> +}
>> +
>> +static const struct rpc_call_ops nfsd4_cb_offload_ops = {
>> +	.rpc_call_prepare = nfsd4_cb_prepare,
>> +	.rpc_call_done    = nfsd4_cb_offload_done,
>> +	.rpc_release      = nfsd4_cb_offload_release,
>> +};
>> +
>> +void nfsd4_cb_offload(struct nfs4_cb_offload *offload)
>> +{
>> +	struct nfsd4_callback *cb = &offload->co_callback;
>> +
>> +	cb->cb_op = offload;
>> +	cb->cb_clp = offload->co_stid->sc_client;
>> +	cb->cb_msg.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_OFFLOAD];
>> +	cb->cb_msg.rpc_argp = cb;
>> +	cb->cb_msg.rpc_resp = cb;
>> +
>> +	cb->cb_ops = &nfsd4_cb_offload_ops;
>> +
>> +	INIT_LIST_HEAD(&cb->cb_per_client);
>> +	cb->cb_done = true;
>> +
>> +	run_nfsd4_cb(cb);
>> +}
>> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
>> index d4584ea..66a787f 100644
>> --- a/fs/nfsd/nfs4proc.c
>> +++ b/fs/nfsd/nfs4proc.c
>> @@ -35,6 +35,7 @@
>>  #include <linux/file.h>
>>  #include <linux/slab.h>
>>  
>> +#include "state.h"
>>  #include "idmap.h"
>>  #include "cache.h"
>>  #include "xdr4.h"
>> @@ -1062,29 +1063,57 @@ out:
>>  	return status;
>>  }
>>  
>> +static void
>> +nfsd4_copy_async(struct work_struct *w)
>> +{
>> +	__be32 status;
>> +	struct nfs4_cb_offload *offload;
>> +
>> +	offload = container_of(w, struct nfs4_cb_offload, co_work);
>> +	status  = nfsd_copy_range(offload->co_src_file, offload->co_src_pos,
>> +				  offload->co_dst_file, offload->co_dst_pos,
>> +				  offload->co_count);
>> +
>> +	if (status == nfs_ok) {
>> +		offload->co_stable_how = NFS_FILE_SYNC;
>> +		gen_boot_verifier(&offload->co_verifier, offload->co_net);
>> +		fput(offload->co_src_file);
>> +		fput(offload->co_dst_file);
>> +	}
>> +	nfsd4_cb_offload(offload);
>> +}
>> +
>>  static __be32
>>  nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>>  		struct nfsd4_copy *copy)
>>  {
>> -	__be32 status;
>>  	struct file *src = NULL, *dst = NULL;
>> +	struct nfs4_cb_offload *offload;
>>  
>> -	status = nfsd4_verify_copy(rqstp, cstate, copy, &src, &dst);
>> -	if (status)
>> -		return status;
>> -
>> -	status = nfsd_copy_range(src, copy->cp_src_pos,
>> -				 dst, copy->cp_dst_pos,
>> -				 copy->cp_count);
>> +	if (nfsd4_verify_copy(rqstp, cstate, copy, &src, &dst))
>> +		return nfserr_jukebox;
>>  
>> -	if (status == nfs_ok) {
>> -		copy->cp_res.wr_stateid = NULL;
>> -		copy->cp_res.wr_bytes_written = copy->cp_count;
>> -		copy->cp_res.wr_stable_how = NFS_FILE_SYNC;
>> -		gen_boot_verifier(&copy->cp_res.wr_verifier, SVC_NET(rqstp));
>> -	}
>> +	offload = kmalloc(sizeof(struct nfs4_cb_offload), GFP_KERNEL);
>> +	if (!offload)
>> +		return nfserr_jukebox;
>>  
>> -	return status;
>> +	offload->co_src_file = get_file(src);
>> +	offload->co_dst_file = get_file(dst);
>> +	offload->co_src_pos  = copy->cp_src_pos;
>> +	offload->co_dst_pos  = copy->cp_dst_pos;
>> +	offload->co_count    = copy->cp_count;
>> +	offload->co_stid     = nfs4_alloc_offload_stateid(cstate->session->se_client);
>> +	offload->co_net      = SVC_NET(rqstp);
>> +	INIT_WORK(&offload->co_work, nfsd4_copy_async);
>> +	nfsd4_init_callback(&offload->co_callback);
>> +	memcpy(&offload->co_dst_fh, &cstate->current_fh, sizeof(struct knfsd_fh));
>> +
>> +	copy->cp_res.wr_stateid = &offload->co_stid->sc_stateid;
>> +	copy->cp_res.wr_bytes_written = 0;
>> +	copy->cp_res.wr_stable_how = NFS_UNSTABLE;
>> +
>> +	schedule_work(&offload->co_work);
>> +	return nfs_ok;
>>  }
>>  
>>  /* This routine never returns NFS_OK!  If there are no other errors, it
>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>> index c4e270e..582edb5 100644
>> --- a/fs/nfsd/nfs4state.c
>> +++ b/fs/nfsd/nfs4state.c
>> @@ -364,6 +364,11 @@ static struct nfs4_ol_stateid * nfs4_alloc_stateid(struct nfs4_client *clp)
>>  	return openlockstateid(nfs4_alloc_stid(clp, stateid_slab));
>>  }
>>  
>> +struct nfs4_stid *nfs4_alloc_offload_stateid(struct nfs4_client *clp)
>> +{
>> +	return nfs4_alloc_stid(clp, stateid_slab);
>> +}
>> +
>>  static struct nfs4_delegation *
>>  alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct svc_fh *current_fh)
>>  {
>> @@ -617,6 +622,12 @@ static void free_generic_stateid(struct nfs4_ol_stateid *stp)
>>  	kmem_cache_free(stateid_slab, stp);
>>  }
>>  
>> +void nfs4_free_offload_stateid(struct nfs4_stid *stid)
>> +{
>> +	remove_stid(stid);
>> +	kmem_cache_free(stateid_slab, stid);
>> +}
>> +
>>  static void release_lock_stateid(struct nfs4_ol_stateid *stp)
>>  {
>>  	struct file *file;
>> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
>> index 2478805..56682fb 100644
>> --- a/fs/nfsd/state.h
>> +++ b/fs/nfsd/state.h
>> @@ -70,6 +70,7 @@ struct nfsd4_callback {
>>  	struct rpc_message cb_msg;
>>  	const struct rpc_call_ops *cb_ops;
>>  	struct work_struct cb_work;
>> +	struct delayed_work cb_delay;
>>  	bool cb_done;
>>  };
>>  
>> @@ -101,6 +102,22 @@ struct nfs4_delegation {
>>  	struct nfsd4_callback	dl_recall;
>>  };
>>  
>> +struct nfs4_cb_offload {
>> +	struct file	*co_src_file;
>> +	struct file	*co_dst_file;
>> +	u64		co_src_pos;
>> +	u64		co_dst_pos;
>> +	u64		co_count;
>> +	u32		co_stable_how;
>> +	struct knfsd_fh	co_dst_fh;
>> +	nfs4_verifier	co_verifier;
>> +	struct net	*co_net;
>> +
>> +	struct nfs4_stid		*co_stid;
>> +	struct work_struct		co_work;
>> +	struct nfsd4_callback		co_callback;
>> +};
>> +
>>  /* client delegation callback info */
>>  struct nfs4_cb_conn {
>>  	/* SETCLIENTID info */
>> @@ -468,10 +485,12 @@ extern void nfs4_free_openowner(struct nfs4_openowner *);
>>  extern void nfs4_free_lockowner(struct nfs4_lockowner *);
>>  extern int set_callback_cred(void);
>>  extern void nfsd4_init_callback(struct nfsd4_callback *);
>> +extern void nfsd4_init_delayed_callback(struct nfsd4_callback *);
>>  extern void nfsd4_probe_callback(struct nfs4_client *clp);
>>  extern void nfsd4_probe_callback_sync(struct nfs4_client *clp);
>>  extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *);
>>  extern void nfsd4_cb_recall(struct nfs4_delegation *dp);
>> +extern void nfsd4_cb_offload(struct nfs4_cb_offload *);
>>  extern int nfsd4_create_callback_queue(void);
>>  extern void nfsd4_destroy_callback_queue(void);
>>  extern void nfsd4_shutdown_callback(struct nfs4_client *);
>> @@ -480,6 +499,8 @@ extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(const char *name,
>>  							struct nfsd_net *nn);
>>  extern bool nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn);
>>  extern void put_client_renew(struct nfs4_client *clp);
>> +extern struct nfs4_stid *nfs4_alloc_offload_stateid(struct nfs4_client *);
>> +extern void nfs4_free_offload_stateid(struct nfs4_stid *);
>>  
>>  /* nfs4recover operations */
>>  extern int nfsd4_client_tracking_init(struct net *net);
>> diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h
>> index c5c55df..75b0ef7 100644
>> --- a/fs/nfsd/xdr4cb.h
>> +++ b/fs/nfsd/xdr4cb.h
>> @@ -21,3 +21,12 @@
>>  #define NFS4_dec_cb_recall_sz		(cb_compound_dec_hdr_sz  +      \
>>  					cb_sequence_dec_sz +            \
>>  					op_dec_sz)
>> +
>> +#define NFS4_enc_cb_offload_sz		(cb_compound_enc_hdr_sz +      \
>> +					 cb_sequence_enc_sz +          \
>> +					 1 + enc_stateid_sz + 2 + 1 +  \
>> +					 XDR_QUADLEN(NFS4_VERIFIER_SIZE))
>> +
>> +#define NFS4_dec_cb_offload_sz		(cb_compound_dec_hdr_sz +  \
>> +					 cb_sequence_dec_sz +      \
>> +					 op_dec_sz)
>> -- 
>> 1.8.3.3
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html


  reply	other threads:[~2013-07-22 19:17 UTC|newest]

Thread overview: 30+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-07-19 21:03 [RFC 0/5] NFS Server Side Copy bjschuma
2013-07-19 21:03 ` [RFC 1/5] Improve on the copyfile systemcall bjschuma
2013-07-19 21:03 ` [RFC 2/5] NFSD: Implement the COPY call bjschuma
2013-07-22 18:05   ` J. Bruce Fields
2013-07-22 18:59     ` Bryan Schumaker
2013-07-19 21:03 ` [RFC 3/5] NFS: Add COPY nfs operation bjschuma
2013-07-24 14:21   ` Myklebust, Trond
2013-07-19 21:03 ` [RFC 4/5] NFSD: Defer copying bjschuma
2013-07-22 18:50   ` J. Bruce Fields
2013-07-22 19:17     ` Bryan Schumaker [this message]
2013-07-22 19:30       ` J. Bruce Fields
2013-07-22 19:37         ` Bryan Schumaker
2013-07-22 19:43           ` J. Bruce Fields
2013-07-22 19:54             ` Bryan Schumaker
2013-07-22 19:55               ` J. Bruce Fields
2013-08-05  8:38                 ` Ric Wheeler
2013-08-05 14:41                   ` J. Bruce Fields
2013-08-05 14:44                     ` Ric Wheeler
2013-08-05 14:56                       ` J. Bruce Fields
2013-08-05 14:44                     ` Bryan Schumaker
2013-08-05 14:50                     ` Myklebust, Trond
2013-08-05 18:11                       ` J. Bruce Fields
2013-08-05 18:17                         ` Chuck Lever
2013-08-05 18:24                           ` Myklebust, Trond
2013-08-05 18:30                         ` J. Bruce Fields
2013-07-19 21:03 ` [RFC 5/5] NFS: Change copy to support async servers bjschuma
2013-07-24 14:28   ` Myklebust, Trond
2013-07-22 18:53 ` [RFC 0/5] NFS Server Side Copy J. Bruce Fields
2013-07-22 19:38   ` Bryan Schumaker
2013-07-22 19:42     ` J. Bruce Fields

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=51ED8549.3040308@netapp.com \
    --to=bjschuma@netapp.com \
    --cc=Trond.Myklebust@netapp.com \
    --cc=bfields@fieldses.org \
    --cc=linux-nfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.