All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3 0/2] add support for CB_RECALL_ANY and the delegation shrinker
@ 2022-10-28  2:16 Dai Ngo
  2022-10-28  2:16 ` [PATCH v3 1/2] NFSD: add support for sending CB_RECALL_ANY Dai Ngo
  2022-10-28  2:16 ` [PATCH v3 2/2] NFSD: add delegation shrinker to react to low memory condition Dai Ngo
  0 siblings, 2 replies; 6+ messages in thread
From: Dai Ngo @ 2022-10-28  2:16 UTC (permalink / raw)
  To: chuck.lever, jlayton; +Cc: linux-nfs

This patch series adds:

    . support for sending the CB_RECALL_ANY op.
      There is only one nfsd4_callback, cl_recall_any, added for each
      nfs4_client. Access to it must be serialized. For now it's done
      by the cl_recall_any_busy flag since it's used only by the
      delegation shrinker. If there is another consumer of cl_recall_any
      then a spinlock must be used.

    . the delegation shrinker that sends the advisory CB_RECALL_ANY 
      to the clients to release unused delegations.

v2:
    . modify deleg_reaper to check and send CB_RECALL_ANY to client
      only once per 5 secs.
v3:
    . modify nfsd4_cb_recall_any_release to use nn->client_lock to
      protect cl_recall_any_busy and call put_client_renew_locked
      to decrement cl_rpc_users.
---

Dai Ngo (2):
     NFSD: add support for sending CB_RECALL_ANY
     NFSD: add delegation shrinker to react to low memory condition

 fs/nfsd/netns.h        |   3 ++
 fs/nfsd/nfs4callback.c |  64 ++++++++++++++++++++++++++
 fs/nfsd/nfs4state.c    | 106 +++++++++++++++++++++++++++++++++++++++++++-
 fs/nfsd/state.h        |  10 +++++
 fs/nfsd/xdr4cb.h       |   6 +++
 5 files changed, 188 insertions(+), 1 deletion(-)

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH v3 1/2] NFSD: add support for sending CB_RECALL_ANY
  2022-10-28  2:16 [PATCH v3 0/2] add support for CB_RECALL_ANY and the delegation shrinker Dai Ngo
@ 2022-10-28  2:16 ` Dai Ngo
  2022-10-28 14:11   ` Chuck Lever III
  2022-10-28  2:16 ` [PATCH v3 2/2] NFSD: add delegation shrinker to react to low memory condition Dai Ngo
  1 sibling, 1 reply; 6+ messages in thread
From: Dai Ngo @ 2022-10-28  2:16 UTC (permalink / raw)
  To: chuck.lever, jlayton; +Cc: linux-nfs

There is only one nfsd4_callback, cl_recall_any, added for each
nfs4_client. Access to it must be serialized. For now it's done
by the cl_recall_any_busy flag since it's used only by the
delegation shrinker. If there is another consumer of CB_RECALL_ANY
then a spinlock must be used.

Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
---
 fs/nfsd/nfs4callback.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/nfsd/nfs4state.c    | 32 +++++++++++++++++++++++++
 fs/nfsd/state.h        |  8 +++++++
 fs/nfsd/xdr4cb.h       |  6 +++++
 4 files changed, 110 insertions(+)

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index f0e69edf5f0f..03587e1397f4 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -329,6 +329,29 @@ static void encode_cb_recall4args(struct xdr_stream *xdr,
 }
 
 /*
+ * CB_RECALLANY4args
+ *
+ *	struct CB_RECALLANY4args {
+ *		uint32_t	craa_objects_to_keep;
+ *		bitmap4		craa_type_mask;
+ *	};
+ */
+static void
+encode_cb_recallany4args(struct xdr_stream *xdr,
+			struct nfs4_cb_compound_hdr *hdr, uint32_t bmval)
+{
+	__be32 *p;
+
+	encode_nfs_cb_opnum4(xdr, OP_CB_RECALL_ANY);
+	p = xdr_reserve_space(xdr, 4);
+	*p++ = xdr_zero;	/* craa_objects_to_keep */
+	p = xdr_reserve_space(xdr, 8);
+	*p++ = cpu_to_be32(1);
+	*p++ = cpu_to_be32(bmval);
+	hdr->nops++;
+}
+
+/*
  * CB_SEQUENCE4args
  *
  *	struct CB_SEQUENCE4args {
@@ -482,6 +505,24 @@ static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr,
 	encode_cb_nops(&hdr);
 }
 
+/*
+ * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects
+ */
+static void
+nfs4_xdr_enc_cb_recall_any(struct rpc_rqst *req,
+		struct xdr_stream *xdr, const void *data)
+{
+	const struct nfsd4_callback *cb = data;
+	struct nfs4_cb_compound_hdr hdr = {
+		.ident = cb->cb_clp->cl_cb_ident,
+		.minorversion = cb->cb_clp->cl_minorversion,
+	};
+
+	encode_cb_compound4args(xdr, &hdr);
+	encode_cb_sequence4args(xdr, cb, &hdr);
+	encode_cb_recallany4args(xdr, &hdr, cb->cb_clp->cl_recall_any_bm);
+	encode_cb_nops(&hdr);
+}
 
 /*
  * NFSv4.0 and NFSv4.1 XDR decode functions
@@ -520,6 +561,28 @@ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
 	return decode_cb_op_status(xdr, OP_CB_RECALL, &cb->cb_status);
 }
 
+/*
+ * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects
+ */
+static int
+nfs4_xdr_dec_cb_recall_any(struct rpc_rqst *rqstp,
+				  struct xdr_stream *xdr,
+				  void *data)
+{
+	struct nfsd4_callback *cb = data;
+	struct nfs4_cb_compound_hdr hdr;
+	int status;
+
+	status = decode_cb_compound4res(xdr, &hdr);
+	if (unlikely(status))
+		return status;
+	status = decode_cb_sequence4res(xdr, cb);
+	if (unlikely(status || cb->cb_seq_status))
+		return status;
+	status =  decode_cb_op_status(xdr, OP_CB_RECALL_ANY, &cb->cb_status);
+	return status;
+}
+
 #ifdef CONFIG_NFSD_PNFS
 /*
  * CB_LAYOUTRECALL4args
@@ -783,6 +846,7 @@ static const struct rpc_procinfo nfs4_cb_procedures[] = {
 #endif
 	PROC(CB_NOTIFY_LOCK,	COMPOUND,	cb_notify_lock,	cb_notify_lock),
 	PROC(CB_OFFLOAD,	COMPOUND,	cb_offload,	cb_offload),
+	PROC(CB_RECALL_ANY,	COMPOUND,	cb_recall_any,	cb_recall_any),
 };
 
 static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)];
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 4e718500a00c..68d049973ce3 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2854,6 +2854,36 @@ static const struct tree_descr client_files[] = {
 	[3] = {""},
 };
 
+static int
+nfsd4_cb_recall_any_done(struct nfsd4_callback *cb,
+			struct rpc_task *task)
+{
+	switch (task->tk_status) {
+	case -NFS4ERR_DELAY:
+		rpc_delay(task, 2 * HZ);
+		return 0;
+	default:
+		return 1;
+	}
+}
+
+static void
+nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
+{
+	struct nfs4_client *clp = cb->cb_clp;
+	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+
+	spin_lock(&nn->client_lock);
+	clp->cl_recall_any_busy = false;
+	put_client_renew_locked(clp);
+	spin_unlock(&nn->client_lock);
+}
+
+static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = {
+	.done		= nfsd4_cb_recall_any_done,
+	.release	= nfsd4_cb_recall_any_release,
+};
+
 static struct nfs4_client *create_client(struct xdr_netobj name,
 		struct svc_rqst *rqstp, nfs4_verifier *verf)
 {
@@ -2891,6 +2921,8 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
 		free_client(clp);
 		return NULL;
 	}
+	nfsd4_init_cb(&clp->cl_recall_any, clp, &nfsd4_cb_recall_any_ops,
+			NFSPROC4_CLNT_CB_RECALL_ANY);
 	return clp;
 }
 
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index e2daef3cc003..49ca06169642 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -411,6 +411,10 @@ struct nfs4_client {
 
 	unsigned int		cl_state;
 	atomic_t		cl_delegs_in_recall;
+
+	bool			cl_recall_any_busy;
+	uint32_t		cl_recall_any_bm;
+	struct nfsd4_callback	cl_recall_any;
 };
 
 /* struct nfs4_client_reset
@@ -639,8 +643,12 @@ enum nfsd4_cb_op {
 	NFSPROC4_CLNT_CB_OFFLOAD,
 	NFSPROC4_CLNT_CB_SEQUENCE,
 	NFSPROC4_CLNT_CB_NOTIFY_LOCK,
+	NFSPROC4_CLNT_CB_RECALL_ANY,
 };
 
+#define RCA4_TYPE_MASK_RDATA_DLG	0
+#define RCA4_TYPE_MASK_WDATA_DLG	1
+
 /* Returns true iff a is later than b: */
 static inline bool nfsd4_stateid_generation_after(stateid_t *a, stateid_t *b)
 {
diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h
index 547cf07cf4e0..0d39af1b00a0 100644
--- a/fs/nfsd/xdr4cb.h
+++ b/fs/nfsd/xdr4cb.h
@@ -48,3 +48,9 @@
 #define NFS4_dec_cb_offload_sz		(cb_compound_dec_hdr_sz  +      \
 					cb_sequence_dec_sz +            \
 					op_dec_sz)
+#define NFS4_enc_cb_recall_any_sz	(cb_compound_enc_hdr_sz +       \
+					cb_sequence_enc_sz +            \
+					1 + 1 + 1)
+#define NFS4_dec_cb_recall_any_sz	(cb_compound_dec_hdr_sz  +      \
+					cb_sequence_dec_sz +            \
+					op_dec_sz)
-- 
2.9.5


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH v3 2/2] NFSD: add delegation shrinker to react to low memory condition
  2022-10-28  2:16 [PATCH v3 0/2] add support for CB_RECALL_ANY and the delegation shrinker Dai Ngo
  2022-10-28  2:16 ` [PATCH v3 1/2] NFSD: add support for sending CB_RECALL_ANY Dai Ngo
@ 2022-10-28  2:16 ` Dai Ngo
  1 sibling, 0 replies; 6+ messages in thread
From: Dai Ngo @ 2022-10-28  2:16 UTC (permalink / raw)
  To: chuck.lever, jlayton; +Cc: linux-nfs

The delegation shrinker is scheduled to run on every shrinker's
'count' callback. It scans the client list and sends the
courtesy CB_RECALL_ANY to the clients that hold delegations.

To avoid flooding the clients with CB_RECALL_ANY requests, the
delegation shrinker is scheduled to run after a 5 seconds delay.

Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
---
 fs/nfsd/netns.h     |  3 +++
 fs/nfsd/nfs4state.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 fs/nfsd/state.h     |  2 ++
 3 files changed, 78 insertions(+), 1 deletion(-)

diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 8c854ba3285b..394a05fb46d8 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -196,6 +196,9 @@ struct nfsd_net {
 	atomic_t		nfsd_courtesy_clients;
 	struct shrinker		nfsd_client_shrinker;
 	struct delayed_work	nfsd_shrinker_work;
+
+	struct shrinker		nfsd_deleg_shrinker;
+	struct delayed_work	nfsd_deleg_shrinker_work;
 };
 
 /* Simple check to find out if a given net was properly initialized */
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 68d049973ce3..c01006340912 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2921,6 +2921,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
 		free_client(clp);
 		return NULL;
 	}
+	clp->cl_recall_any_time = 0;
 	nfsd4_init_cb(&clp->cl_recall_any, clp, &nfsd4_cb_recall_any_ops,
 			NFSPROC4_CLNT_CB_RECALL_ANY);
 	return clp;
@@ -4397,11 +4398,32 @@ nfsd_courtesy_client_scan(struct shrinker *shrink, struct shrink_control *sc)
 	return SHRINK_STOP;
 }
 
+static unsigned long
+nfsd_deleg_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
+{
+	unsigned long cnt;
+	struct nfsd_net *nn = container_of(shrink,
+				struct nfsd_net, nfsd_deleg_shrinker);
+
+	cnt = atomic_long_read(&num_delegations);
+	if (cnt)
+		mod_delayed_work(laundry_wq,
+			&nn->nfsd_deleg_shrinker_work, 0);
+	return cnt;
+}
+
+static unsigned long
+nfsd_deleg_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc)
+{
+	return SHRINK_STOP;
+}
+
 int
 nfsd4_init_leases_net(struct nfsd_net *nn)
 {
 	struct sysinfo si;
 	u64 max_clients;
+	int retval;
 
 	nn->nfsd4_lease = 90;	/* default lease time */
 	nn->nfsd4_grace = 90;
@@ -4422,13 +4444,23 @@ nfsd4_init_leases_net(struct nfsd_net *nn)
 	nn->nfsd_client_shrinker.scan_objects = nfsd_courtesy_client_scan;
 	nn->nfsd_client_shrinker.count_objects = nfsd_courtesy_client_count;
 	nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS;
-	return register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client");
+	retval = register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client");
+	if (retval)
+		return retval;
+	nn->nfsd_deleg_shrinker.scan_objects = nfsd_deleg_shrinker_scan;
+	nn->nfsd_deleg_shrinker.count_objects = nfsd_deleg_shrinker_count;
+	nn->nfsd_deleg_shrinker.seeks = DEFAULT_SEEKS;
+	retval = register_shrinker(&nn->nfsd_deleg_shrinker, "nfsd-delegation");
+	if (retval)
+		unregister_shrinker(&nn->nfsd_client_shrinker);
+	return retval;
 }
 
 void
 nfsd4_leases_net_shutdown(struct nfsd_net *nn)
 {
 	unregister_shrinker(&nn->nfsd_client_shrinker);
+	unregister_shrinker(&nn->nfsd_deleg_shrinker);
 }
 
 static void init_nfs4_replay(struct nfs4_replay *rp)
@@ -6167,6 +6199,45 @@ courtesy_client_reaper(struct work_struct *reaper)
 	nfs4_process_client_reaplist(&reaplist);
 }
 
+static void
+deleg_reaper(struct work_struct *deleg_work)
+{
+	struct list_head *pos, *next;
+	struct nfs4_client *clp;
+	struct list_head cblist;
+	struct delayed_work *dwork = to_delayed_work(deleg_work);
+	struct nfsd_net *nn = container_of(dwork, struct nfsd_net,
+					nfsd_deleg_shrinker_work);
+
+	INIT_LIST_HEAD(&cblist);
+	spin_lock(&nn->client_lock);
+	list_for_each_safe(pos, next, &nn->client_lru) {
+		clp = list_entry(pos, struct nfs4_client, cl_lru);
+		if (clp->cl_state != NFSD4_ACTIVE ||
+				list_empty(&clp->cl_delegations) ||
+				atomic_read(&clp->cl_delegs_in_recall) ||
+				clp->cl_recall_any_busy ||
+				(ktime_get_boottime_seconds() -
+					clp->cl_recall_any_time < 5)) {
+			continue;
+		}
+		clp->cl_recall_any_busy = true;
+		list_add(&clp->cl_recall_any_cblist, &cblist);
+
+		/* release in nfsd4_cb_recall_any_release */
+		atomic_inc(&clp->cl_rpc_users);
+	}
+	spin_unlock(&nn->client_lock);
+	list_for_each_safe(pos, next, &cblist) {
+		clp = list_entry(pos, struct nfs4_client, cl_recall_any_cblist);
+		list_del_init(&clp->cl_recall_any_cblist);
+		clp->cl_recall_any_bm = BIT(RCA4_TYPE_MASK_RDATA_DLG) |
+						BIT(RCA4_TYPE_MASK_WDATA_DLG);
+		clp->cl_recall_any_time = ktime_get_boottime_seconds();
+		nfsd4_run_cb(&clp->cl_recall_any);
+	}
+}
+
 static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp)
 {
 	if (!fh_match(&fhp->fh_handle, &stp->sc_file->fi_fhandle))
@@ -7990,6 +8061,7 @@ static int nfs4_state_create_net(struct net *net)
 
 	INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main);
 	INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, courtesy_client_reaper);
+	INIT_DELAYED_WORK(&nn->nfsd_deleg_shrinker_work, deleg_reaper);
 	get_net(net);
 
 	return 0;
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 49ca06169642..7d68d9a50901 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -415,6 +415,8 @@ struct nfs4_client {
 	bool			cl_recall_any_busy;
 	uint32_t		cl_recall_any_bm;
 	struct nfsd4_callback	cl_recall_any;
+	struct list_head	cl_recall_any_cblist;
+	time64_t		cl_recall_any_time;
 };
 
 /* struct nfs4_client_reset
-- 
2.9.5


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH v3 1/2] NFSD: add support for sending CB_RECALL_ANY
  2022-10-28  2:16 ` [PATCH v3 1/2] NFSD: add support for sending CB_RECALL_ANY Dai Ngo
@ 2022-10-28 14:11   ` Chuck Lever III
  2022-10-30 16:16     ` dai.ngo
  0 siblings, 1 reply; 6+ messages in thread
From: Chuck Lever III @ 2022-10-28 14:11 UTC (permalink / raw)
  To: Dai Ngo; +Cc: jlayton, Linux NFS Mailing List



> On Oct 27, 2022, at 10:16 PM, Dai Ngo <dai.ngo@oracle.com> wrote:
> 
> There is only one nfsd4_callback, cl_recall_any, added for each
> nfs4_client.

Is only one needed at a time, or might NFSD need to send more
than one concurrently? So far it looks like the only reason to
limit it to one at a time is the way the cb_recall_any arguments
are passed to the XDR layer.


> Access to it must be serialized. For now it's done
> by the cl_recall_any_busy flag since it's used only by the
> delegation shrinker. If there is another consumer of CB_RECALL_ANY
> then a spinlock must be used.

The usual arrangement is to add the XDR infrastructure for a new
operation in one patch, and then add consumers in subsequent
patches. Can you move the hunks that change fs/nfsd/nfs4state.c
to 2/2 and update the above description accordingly?

In a separate patch you should add a trace_nfsd_cb_recall_any and
a trace_nfsd_cb_recall_any_done tracepoint. There are already nice
examples in fs/nfsd/trace.h for the other callback operations.


A little more below.


> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
> ---
> fs/nfsd/nfs4callback.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++
> fs/nfsd/nfs4state.c    | 32 +++++++++++++++++++++++++
> fs/nfsd/state.h        |  8 +++++++
> fs/nfsd/xdr4cb.h       |  6 +++++
> 4 files changed, 110 insertions(+)
> 
> diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
> index f0e69edf5f0f..03587e1397f4 100644
> --- a/fs/nfsd/nfs4callback.c
> +++ b/fs/nfsd/nfs4callback.c
> @@ -329,6 +329,29 @@ static void encode_cb_recall4args(struct xdr_stream *xdr,
> }
> 
> /*
> + * CB_RECALLANY4args
> + *
> + *	struct CB_RECALLANY4args {
> + *		uint32_t	craa_objects_to_keep;
> + *		bitmap4		craa_type_mask;
> + *	};
> + */
> +static void
> +encode_cb_recallany4args(struct xdr_stream *xdr,
> +			struct nfs4_cb_compound_hdr *hdr, uint32_t bmval)
> +{
> +	__be32 *p;
> +
> +	encode_nfs_cb_opnum4(xdr, OP_CB_RECALL_ANY);
> +	p = xdr_reserve_space(xdr, 4);
> +	*p++ = xdr_zero;	/* craa_objects_to_keep */

Let's use xdr_stream_encode_u32() here. Would it be reasonable
for the upper layer to provide this value, or will NFSD always
want it to be zero?


> +	p = xdr_reserve_space(xdr, 8);

Let's use xdr_stream_encode_uint32_array() here. encode_cb_recallany4args's
caller should pass a u32 * and a length, not just a simple u32.


> +	*p++ = cpu_to_be32(1);
> +	*p++ = cpu_to_be32(bmval);
> +	hdr->nops++;
> +}
> +
> +/*
>  * CB_SEQUENCE4args
>  *
>  *	struct CB_SEQUENCE4args {
> @@ -482,6 +505,24 @@ static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr,
> 	encode_cb_nops(&hdr);
> }
> 
> +/*
> + * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects
> + */
> +static void
> +nfs4_xdr_enc_cb_recall_any(struct rpc_rqst *req,
> +		struct xdr_stream *xdr, const void *data)
> +{
> +	const struct nfsd4_callback *cb = data;
> +	struct nfs4_cb_compound_hdr hdr = {
> +		.ident = cb->cb_clp->cl_cb_ident,
> +		.minorversion = cb->cb_clp->cl_minorversion,
> +	};
> +
> +	encode_cb_compound4args(xdr, &hdr);
> +	encode_cb_sequence4args(xdr, cb, &hdr);
> +	encode_cb_recallany4args(xdr, &hdr, cb->cb_clp->cl_recall_any_bm);
> +	encode_cb_nops(&hdr);
> +}
> 
> /*
>  * NFSv4.0 and NFSv4.1 XDR decode functions
> @@ -520,6 +561,28 @@ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
> 	return decode_cb_op_status(xdr, OP_CB_RECALL, &cb->cb_status);
> }
> 
> +/*
> + * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects
> + */
> +static int
> +nfs4_xdr_dec_cb_recall_any(struct rpc_rqst *rqstp,
> +				  struct xdr_stream *xdr,
> +				  void *data)
> +{
> +	struct nfsd4_callback *cb = data;
> +	struct nfs4_cb_compound_hdr hdr;
> +	int status;
> +
> +	status = decode_cb_compound4res(xdr, &hdr);
> +	if (unlikely(status))
> +		return status;
> +	status = decode_cb_sequence4res(xdr, cb);
> +	if (unlikely(status || cb->cb_seq_status))
> +		return status;
> +	status =  decode_cb_op_status(xdr, OP_CB_RECALL_ANY, &cb->cb_status);
> +	return status;
> +}
> +
> #ifdef CONFIG_NFSD_PNFS
> /*
>  * CB_LAYOUTRECALL4args
> @@ -783,6 +846,7 @@ static const struct rpc_procinfo nfs4_cb_procedures[] = {
> #endif
> 	PROC(CB_NOTIFY_LOCK,	COMPOUND,	cb_notify_lock,	cb_notify_lock),
> 	PROC(CB_OFFLOAD,	COMPOUND,	cb_offload,	cb_offload),
> +	PROC(CB_RECALL_ANY,	COMPOUND,	cb_recall_any,	cb_recall_any),
> };
> 
> static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)];
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 4e718500a00c..68d049973ce3 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -2854,6 +2854,36 @@ static const struct tree_descr client_files[] = {
> 	[3] = {""},
> };
> 
> +static int
> +nfsd4_cb_recall_any_done(struct nfsd4_callback *cb,
> +			struct rpc_task *task)
> +{
> +	switch (task->tk_status) {
> +	case -NFS4ERR_DELAY:
> +		rpc_delay(task, 2 * HZ);
> +		return 0;
> +	default:
> +		return 1;
> +	}
> +}
> +
> +static void
> +nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
> +{
> +	struct nfs4_client *clp = cb->cb_clp;
> +	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
> +
> +	spin_lock(&nn->client_lock);
> +	clp->cl_recall_any_busy = false;
> +	put_client_renew_locked(clp);
> +	spin_unlock(&nn->client_lock);
> +}
> +
> +static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = {
> +	.done		= nfsd4_cb_recall_any_done,
> +	.release	= nfsd4_cb_recall_any_release,
> +};
> +
> static struct nfs4_client *create_client(struct xdr_netobj name,
> 		struct svc_rqst *rqstp, nfs4_verifier *verf)
> {
> @@ -2891,6 +2921,8 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
> 		free_client(clp);
> 		return NULL;
> 	}
> +	nfsd4_init_cb(&clp->cl_recall_any, clp, &nfsd4_cb_recall_any_ops,
> +			NFSPROC4_CLNT_CB_RECALL_ANY);
> 	return clp;
> }
> 
> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> index e2daef3cc003..49ca06169642 100644
> --- a/fs/nfsd/state.h
> +++ b/fs/nfsd/state.h
> @@ -411,6 +411,10 @@ struct nfs4_client {
> 
> 	unsigned int		cl_state;
> 	atomic_t		cl_delegs_in_recall;
> +
> +	bool			cl_recall_any_busy;

Rather than adding a boolean field, you could add a bit to
cl_flags.

I'm not convinced you need to add the argument fields here...
I think kmalloc'ing the arguments and then freeing them in
nfsd4_cb_recall_any_release() would be sufficient.


> +	uint32_t		cl_recall_any_bm;
> +	struct nfsd4_callback	cl_recall_any;
> };
> 
> /* struct nfs4_client_reset
> @@ -639,8 +643,12 @@ enum nfsd4_cb_op {
> 	NFSPROC4_CLNT_CB_OFFLOAD,
> 	NFSPROC4_CLNT_CB_SEQUENCE,
> 	NFSPROC4_CLNT_CB_NOTIFY_LOCK,
> +	NFSPROC4_CLNT_CB_RECALL_ANY,
> };
> 
> +#define RCA4_TYPE_MASK_RDATA_DLG	0
> +#define RCA4_TYPE_MASK_WDATA_DLG	1
> +
> /* Returns true iff a is later than b: */
> static inline bool nfsd4_stateid_generation_after(stateid_t *a, stateid_t *b)
> {
> diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h
> index 547cf07cf4e0..0d39af1b00a0 100644
> --- a/fs/nfsd/xdr4cb.h
> +++ b/fs/nfsd/xdr4cb.h
> @@ -48,3 +48,9 @@
> #define NFS4_dec_cb_offload_sz		(cb_compound_dec_hdr_sz  +      \
> 					cb_sequence_dec_sz +            \
> 					op_dec_sz)
> +#define NFS4_enc_cb_recall_any_sz	(cb_compound_enc_hdr_sz +       \
> +					cb_sequence_enc_sz +            \
> +					1 + 1 + 1)
> +#define NFS4_dec_cb_recall_any_sz	(cb_compound_dec_hdr_sz  +      \
> +					cb_sequence_dec_sz +            \
> +					op_dec_sz)
> -- 
> 2.9.5
> 

--
Chuck Lever




^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v3 1/2] NFSD: add support for sending CB_RECALL_ANY
  2022-10-28 14:11   ` Chuck Lever III
@ 2022-10-30 16:16     ` dai.ngo
  2022-10-30 17:12       ` Chuck Lever III
  0 siblings, 1 reply; 6+ messages in thread
From: dai.ngo @ 2022-10-30 16:16 UTC (permalink / raw)
  To: Chuck Lever III; +Cc: jlayton, Linux NFS Mailing List

On 10/28/22 7:11 AM, Chuck Lever III wrote:
>
>> On Oct 27, 2022, at 10:16 PM, Dai Ngo <dai.ngo@oracle.com> wrote:
>>
>> There is only one nfsd4_callback, cl_recall_any, added for each
>> nfs4_client.
> Is only one needed at a time, or might NFSD need to send more
> than one concurrently? So far it looks like the only reason to
> limit it to one at a time is the way the cb_recall_any arguments
> are passed to the XDR layer.

We only need to send one CB_RECALL_ANY for each client that hold
delegations so just one nfsd4_callback needed for this purpose.
Do you see a need to support multiple nfsd4_callback's per client
for cl_recall_any?

>
>
>> Access to it must be serialized. For now it's done
>> by the cl_recall_any_busy flag since it's used only by the
>> delegation shrinker. If there is another consumer of CB_RECALL_ANY
>> then a spinlock must be used.
> The usual arrangement is to add the XDR infrastructure for a new
> operation in one patch, and then add consumers in subsequent
> patches. Can you move the hunks that change fs/nfsd/nfs4state.c
> to 2/2 and update the above description accordingly?

fix in v4.

>
> In a separate patch you should add a trace_nfsd_cb_recall_any and
> a trace_nfsd_cb_recall_any_done tracepoint. There are already nice
> examples in fs/nfsd/trace.h for the other callback operations.

fix in v4.

>
>
> A little more below.
>
>
>> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
>> ---
>> fs/nfsd/nfs4callback.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++
>> fs/nfsd/nfs4state.c    | 32 +++++++++++++++++++++++++
>> fs/nfsd/state.h        |  8 +++++++
>> fs/nfsd/xdr4cb.h       |  6 +++++
>> 4 files changed, 110 insertions(+)
>>
>> diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
>> index f0e69edf5f0f..03587e1397f4 100644
>> --- a/fs/nfsd/nfs4callback.c
>> +++ b/fs/nfsd/nfs4callback.c
>> @@ -329,6 +329,29 @@ static void encode_cb_recall4args(struct xdr_stream *xdr,
>> }
>>
>> /*
>> + * CB_RECALLANY4args
>> + *
>> + *	struct CB_RECALLANY4args {
>> + *		uint32_t	craa_objects_to_keep;
>> + *		bitmap4		craa_type_mask;
>> + *	};
>> + */
>> +static void
>> +encode_cb_recallany4args(struct xdr_stream *xdr,
>> +			struct nfs4_cb_compound_hdr *hdr, uint32_t bmval)
>> +{
>> +	__be32 *p;
>> +
>> +	encode_nfs_cb_opnum4(xdr, OP_CB_RECALL_ANY);
>> +	p = xdr_reserve_space(xdr, 4);
>> +	*p++ = xdr_zero;	/* craa_objects_to_keep */
> Let's use xdr_stream_encode_u32() here.

Yes, we can use xdr_stream_encode_u32() here. However xdr_stream_encode_u32
can return error and currently all the xdr encoding functions,
nfs4_xdr_enc_xxxx, are defined to return void so rpcauth_wrap_req_encode
just ignores encode errors always return 0. Note that gss_wrap_req_integ
and gss_wrap_req_priv actually return encoding errors.

>   Would it be reasonable
> for the upper layer to provide this value, or will NFSD always
> want it to be zero?

Ideally the XDR encode routine should not make any decision regarding
how many objects the client can keep, it's up to the consumer of the
CB_RECALL_ANY to decide. However in this case, NFSD always want to set
this to 0 so instead of passing this in as another argument to the
encoder, I just did the short cut here. Do you want to pass this in
as an argument?

>
>
>> +	p = xdr_reserve_space(xdr, 8);
> Let's use xdr_stream_encode_uint32_array() here. encode_cb_recallany4args's
> caller should pass a u32 * and a length, not just a simple u32.

fix in v4.

>
>
>> +	*p++ = cpu_to_be32(1);
>> +	*p++ = cpu_to_be32(bmval);
>> +	hdr->nops++;
>> +}
>> +
>> +/*
>>   * CB_SEQUENCE4args
>>   *
>>   *	struct CB_SEQUENCE4args {
>> @@ -482,6 +505,24 @@ static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr,
>> 	encode_cb_nops(&hdr);
>> }
>>
>> +/*
>> + * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects
>> + */
>> +static void
>> +nfs4_xdr_enc_cb_recall_any(struct rpc_rqst *req,
>> +		struct xdr_stream *xdr, const void *data)
>> +{
>> +	const struct nfsd4_callback *cb = data;
>> +	struct nfs4_cb_compound_hdr hdr = {
>> +		.ident = cb->cb_clp->cl_cb_ident,
>> +		.minorversion = cb->cb_clp->cl_minorversion,
>> +	};
>> +
>> +	encode_cb_compound4args(xdr, &hdr);
>> +	encode_cb_sequence4args(xdr, cb, &hdr);
>> +	encode_cb_recallany4args(xdr, &hdr, cb->cb_clp->cl_recall_any_bm);
>> +	encode_cb_nops(&hdr);
>> +}
>>
>> /*
>>   * NFSv4.0 and NFSv4.1 XDR decode functions
>> @@ -520,6 +561,28 @@ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
>> 	return decode_cb_op_status(xdr, OP_CB_RECALL, &cb->cb_status);
>> }
>>
>> +/*
>> + * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects
>> + */
>> +static int
>> +nfs4_xdr_dec_cb_recall_any(struct rpc_rqst *rqstp,
>> +				  struct xdr_stream *xdr,
>> +				  void *data)
>> +{
>> +	struct nfsd4_callback *cb = data;
>> +	struct nfs4_cb_compound_hdr hdr;
>> +	int status;
>> +
>> +	status = decode_cb_compound4res(xdr, &hdr);
>> +	if (unlikely(status))
>> +		return status;
>> +	status = decode_cb_sequence4res(xdr, cb);
>> +	if (unlikely(status || cb->cb_seq_status))
>> +		return status;
>> +	status =  decode_cb_op_status(xdr, OP_CB_RECALL_ANY, &cb->cb_status);
>> +	return status;
>> +}
>> +
>> #ifdef CONFIG_NFSD_PNFS
>> /*
>>   * CB_LAYOUTRECALL4args
>> @@ -783,6 +846,7 @@ static const struct rpc_procinfo nfs4_cb_procedures[] = {
>> #endif
>> 	PROC(CB_NOTIFY_LOCK,	COMPOUND,	cb_notify_lock,	cb_notify_lock),
>> 	PROC(CB_OFFLOAD,	COMPOUND,	cb_offload,	cb_offload),
>> +	PROC(CB_RECALL_ANY,	COMPOUND,	cb_recall_any,	cb_recall_any),
>> };
>>
>> static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)];
>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>> index 4e718500a00c..68d049973ce3 100644
>> --- a/fs/nfsd/nfs4state.c
>> +++ b/fs/nfsd/nfs4state.c
>> @@ -2854,6 +2854,36 @@ static const struct tree_descr client_files[] = {
>> 	[3] = {""},
>> };
>>
>> +static int
>> +nfsd4_cb_recall_any_done(struct nfsd4_callback *cb,
>> +			struct rpc_task *task)
>> +{
>> +	switch (task->tk_status) {
>> +	case -NFS4ERR_DELAY:
>> +		rpc_delay(task, 2 * HZ);
>> +		return 0;
>> +	default:
>> +		return 1;
>> +	}
>> +}
>> +
>> +static void
>> +nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
>> +{
>> +	struct nfs4_client *clp = cb->cb_clp;
>> +	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
>> +
>> +	spin_lock(&nn->client_lock);
>> +	clp->cl_recall_any_busy = false;
>> +	put_client_renew_locked(clp);
>> +	spin_unlock(&nn->client_lock);
>> +}
>> +
>> +static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = {
>> +	.done		= nfsd4_cb_recall_any_done,
>> +	.release	= nfsd4_cb_recall_any_release,
>> +};
>> +
>> static struct nfs4_client *create_client(struct xdr_netobj name,
>> 		struct svc_rqst *rqstp, nfs4_verifier *verf)
>> {
>> @@ -2891,6 +2921,8 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
>> 		free_client(clp);
>> 		return NULL;
>> 	}
>> +	nfsd4_init_cb(&clp->cl_recall_any, clp, &nfsd4_cb_recall_any_ops,
>> +			NFSPROC4_CLNT_CB_RECALL_ANY);
>> 	return clp;
>> }
>>
>> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
>> index e2daef3cc003..49ca06169642 100644
>> --- a/fs/nfsd/state.h
>> +++ b/fs/nfsd/state.h
>> @@ -411,6 +411,10 @@ struct nfs4_client {
>>
>> 	unsigned int		cl_state;
>> 	atomic_t		cl_delegs_in_recall;
>> +
>> +	bool			cl_recall_any_busy;
> Rather than adding a boolean field, you could add a bit to
> cl_flags.

fix in v4.

>
> I'm not convinced you need to add the argument fields here...
> I think kmalloc'ing the arguments and then freeing them in
> nfsd4_cb_recall_any_release() would be sufficient.

Since cb_recall_any is sent when we're running low on system memory,
I'm trying not to use kmalloc'ing to avoid potential deadlock or adding
more stress to the system.

Thanks!
-Dai

>
>
>> +	uint32_t		cl_recall_any_bm;
>> +	struct nfsd4_callback	cl_recall_any;
>> };
>>
>> /* struct nfs4_client_reset
>> @@ -639,8 +643,12 @@ enum nfsd4_cb_op {
>> 	NFSPROC4_CLNT_CB_OFFLOAD,
>> 	NFSPROC4_CLNT_CB_SEQUENCE,
>> 	NFSPROC4_CLNT_CB_NOTIFY_LOCK,
>> +	NFSPROC4_CLNT_CB_RECALL_ANY,
>> };
>>
>> +#define RCA4_TYPE_MASK_RDATA_DLG	0
>> +#define RCA4_TYPE_MASK_WDATA_DLG	1
>> +
>> /* Returns true iff a is later than b: */
>> static inline bool nfsd4_stateid_generation_after(stateid_t *a, stateid_t *b)
>> {
>> diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h
>> index 547cf07cf4e0..0d39af1b00a0 100644
>> --- a/fs/nfsd/xdr4cb.h
>> +++ b/fs/nfsd/xdr4cb.h
>> @@ -48,3 +48,9 @@
>> #define NFS4_dec_cb_offload_sz		(cb_compound_dec_hdr_sz  +      \
>> 					cb_sequence_dec_sz +            \
>> 					op_dec_sz)
>> +#define NFS4_enc_cb_recall_any_sz	(cb_compound_enc_hdr_sz +       \
>> +					cb_sequence_enc_sz +            \
>> +					1 + 1 + 1)
>> +#define NFS4_dec_cb_recall_any_sz	(cb_compound_dec_hdr_sz  +      \
>> +					cb_sequence_dec_sz +            \
>> +					op_dec_sz)
>> -- 
>> 2.9.5
>>
> --
> Chuck Lever
>
>
>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v3 1/2] NFSD: add support for sending CB_RECALL_ANY
  2022-10-30 16:16     ` dai.ngo
@ 2022-10-30 17:12       ` Chuck Lever III
  0 siblings, 0 replies; 6+ messages in thread
From: Chuck Lever III @ 2022-10-30 17:12 UTC (permalink / raw)
  To: Dai Ngo; +Cc: jlayton, Linux NFS Mailing List



> On Oct 30, 2022, at 12:16 PM, Dai Ngo <dai.ngo@oracle.com> wrote:
> 
> On 10/28/22 7:11 AM, Chuck Lever III wrote:
>> 
>>> On Oct 27, 2022, at 10:16 PM, Dai Ngo <dai.ngo@oracle.com> wrote:
>>> 
>>> There is only one nfsd4_callback, cl_recall_any, added for each
>>> nfs4_client.
>> Is only one needed at a time, or might NFSD need to send more
>> than one concurrently? So far it looks like the only reason to
>> limit it to one at a time is the way the cb_recall_any arguments
>> are passed to the XDR layer.
> 
> We only need to send one CB_RECALL_ANY for each client that hold
> delegations so just one nfsd4_callback needed for this purpose.
> Do you see a need to support multiple nfsd4_callback's per client
> for cl_recall_any?

CB_RECALL_ANY has a bitmap argument so that it can be used for
recalling many different types of state. I don't pretend to
understand all of it, but it seems like eventually NFSD might
want to send more than one of these at a time, and not just in
low memory scenarios. I suggest a way below of doing this
without adding complexity.


>>> Access to it must be serialized. For now it's done
>>> by the cl_recall_any_busy flag since it's used only by the
>>> delegation shrinker. If there is another consumer of CB_RECALL_ANY
>>> then a spinlock must be used.
>> The usual arrangement is to add the XDR infrastructure for a new
>> operation in one patch, and then add consumers in subsequent
>> patches. Can you move the hunks that change fs/nfsd/nfs4state.c
>> to 2/2 and update the above description accordingly?
> 
> fix in v4.
> 
>> 
>> In a separate patch you should add a trace_nfsd_cb_recall_any and
>> a trace_nfsd_cb_recall_any_done tracepoint. There are already nice
>> examples in fs/nfsd/trace.h for the other callback operations.
> 
> fix in v4.
> 
>> 
>> 
>> A little more below.
>> 
>> 
>>> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
>>> ---
>>> fs/nfsd/nfs4callback.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++
>>> fs/nfsd/nfs4state.c    | 32 +++++++++++++++++++++++++
>>> fs/nfsd/state.h        |  8 +++++++
>>> fs/nfsd/xdr4cb.h       |  6 +++++
>>> 4 files changed, 110 insertions(+)
>>> 
>>> diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
>>> index f0e69edf5f0f..03587e1397f4 100644
>>> --- a/fs/nfsd/nfs4callback.c
>>> +++ b/fs/nfsd/nfs4callback.c
>>> @@ -329,6 +329,29 @@ static void encode_cb_recall4args(struct xdr_stream *xdr,
>>> }
>>> 
>>> /*
>>> + * CB_RECALLANY4args
>>> + *
>>> + *	struct CB_RECALLANY4args {
>>> + *		uint32_t	craa_objects_to_keep;
>>> + *		bitmap4		craa_type_mask;
>>> + *	};
>>> + */
>>> +static void
>>> +encode_cb_recallany4args(struct xdr_stream *xdr,
>>> +			struct nfs4_cb_compound_hdr *hdr, uint32_t bmval)
>>> +{
>>> +	__be32 *p;
>>> +
>>> +	encode_nfs_cb_opnum4(xdr, OP_CB_RECALL_ANY);
>>> +	p = xdr_reserve_space(xdr, 4);
>>> +	*p++ = xdr_zero;	/* craa_objects_to_keep */
>> Let's use xdr_stream_encode_u32() here.
> 
> Yes, we can use xdr_stream_encode_u32() here. However xdr_stream_encode_u32
> can return error and currently all the xdr encoding functions,
> nfs4_xdr_enc_xxxx, are defined to return void so rpcauth_wrap_req_encode
> just ignores encode errors always return 0. Note that gss_wrap_req_integ
> and gss_wrap_req_priv actually return encoding errors.

The convention it seems is to ignore errors in this code and assume
that the upper layer has set up the send buffer properly. Anything
else would be a local software bug, so it is probably a safe
assumption.


>>  Would it be reasonable
>> for the upper layer to provide this value, or will NFSD always
>> want it to be zero?
> 
> Ideally the XDR encode routine should not make any decision regarding
> how many objects the client can keep, it's up to the consumer of the
> CB_RECALL_ANY to decide. However in this case, NFSD always want to set
> this to 0

"in this case" I assume means in the low-memory scenario. I don't
know enough to say if other scenarios might want to choose a
different value.


> so instead of passing this in as another argument to the
> encoder, I just did the short cut here. Do you want to pass this in
> as an argument?

I don't think it would be difficult to parametrize this, so give it
a shot. Suggestion way below.


>>> +	p = xdr_reserve_space(xdr, 8);
>> Let's use xdr_stream_encode_uint32_array() here. encode_cb_recallany4args's
>> caller should pass a u32 * and a length, not just a simple u32.
> 
> fix in v4.
> 
>> 
>> 
>>> +	*p++ = cpu_to_be32(1);
>>> +	*p++ = cpu_to_be32(bmval);
>>> +	hdr->nops++;
>>> +}
>>> +
>>> +/*
>>>  * CB_SEQUENCE4args
>>>  *
>>>  *	struct CB_SEQUENCE4args {
>>> @@ -482,6 +505,24 @@ static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr,
>>> 	encode_cb_nops(&hdr);
>>> }
>>> 
>>> +/*
>>> + * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects
>>> + */
>>> +static void
>>> +nfs4_xdr_enc_cb_recall_any(struct rpc_rqst *req,
>>> +		struct xdr_stream *xdr, const void *data)
>>> +{
>>> +	const struct nfsd4_callback *cb = data;
>>> +	struct nfs4_cb_compound_hdr hdr = {
>>> +		.ident = cb->cb_clp->cl_cb_ident,
>>> +		.minorversion = cb->cb_clp->cl_minorversion,
>>> +	};
>>> +
>>> +	encode_cb_compound4args(xdr, &hdr);
>>> +	encode_cb_sequence4args(xdr, cb, &hdr);
>>> +	encode_cb_recallany4args(xdr, &hdr, cb->cb_clp->cl_recall_any_bm);
>>> +	encode_cb_nops(&hdr);
>>> +}
>>> 
>>> /*
>>>  * NFSv4.0 and NFSv4.1 XDR decode functions
>>> @@ -520,6 +561,28 @@ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
>>> 	return decode_cb_op_status(xdr, OP_CB_RECALL, &cb->cb_status);
>>> }
>>> 
>>> +/*
>>> + * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects
>>> + */
>>> +static int
>>> +nfs4_xdr_dec_cb_recall_any(struct rpc_rqst *rqstp,
>>> +				  struct xdr_stream *xdr,
>>> +				  void *data)
>>> +{
>>> +	struct nfsd4_callback *cb = data;
>>> +	struct nfs4_cb_compound_hdr hdr;
>>> +	int status;
>>> +
>>> +	status = decode_cb_compound4res(xdr, &hdr);
>>> +	if (unlikely(status))
>>> +		return status;
>>> +	status = decode_cb_sequence4res(xdr, cb);
>>> +	if (unlikely(status || cb->cb_seq_status))
>>> +		return status;
>>> +	status =  decode_cb_op_status(xdr, OP_CB_RECALL_ANY, &cb->cb_status);
>>> +	return status;
>>> +}
>>> +
>>> #ifdef CONFIG_NFSD_PNFS
>>> /*
>>>  * CB_LAYOUTRECALL4args
>>> @@ -783,6 +846,7 @@ static const struct rpc_procinfo nfs4_cb_procedures[] = {
>>> #endif
>>> 	PROC(CB_NOTIFY_LOCK,	COMPOUND,	cb_notify_lock,	cb_notify_lock),
>>> 	PROC(CB_OFFLOAD,	COMPOUND,	cb_offload,	cb_offload),
>>> +	PROC(CB_RECALL_ANY,	COMPOUND,	cb_recall_any,	cb_recall_any),
>>> };
>>> 
>>> static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)];
>>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>>> index 4e718500a00c..68d049973ce3 100644
>>> --- a/fs/nfsd/nfs4state.c
>>> +++ b/fs/nfsd/nfs4state.c
>>> @@ -2854,6 +2854,36 @@ static const struct tree_descr client_files[] = {
>>> 	[3] = {""},
>>> };
>>> 
>>> +static int
>>> +nfsd4_cb_recall_any_done(struct nfsd4_callback *cb,
>>> +			struct rpc_task *task)
>>> +{
>>> +	switch (task->tk_status) {
>>> +	case -NFS4ERR_DELAY:
>>> +		rpc_delay(task, 2 * HZ);
>>> +		return 0;
>>> +	default:
>>> +		return 1;
>>> +	}
>>> +}
>>> +
>>> +static void
>>> +nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
>>> +{
>>> +	struct nfs4_client *clp = cb->cb_clp;
>>> +	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
>>> +
>>> +	spin_lock(&nn->client_lock);
>>> +	clp->cl_recall_any_busy = false;
>>> +	put_client_renew_locked(clp);
>>> +	spin_unlock(&nn->client_lock);
>>> +}
>>> +
>>> +static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = {
>>> +	.done		= nfsd4_cb_recall_any_done,
>>> +	.release	= nfsd4_cb_recall_any_release,
>>> +};
>>> +
>>> static struct nfs4_client *create_client(struct xdr_netobj name,
>>> 		struct svc_rqst *rqstp, nfs4_verifier *verf)
>>> {
>>> @@ -2891,6 +2921,8 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
>>> 		free_client(clp);
>>> 		return NULL;
>>> 	}
>>> +	nfsd4_init_cb(&clp->cl_recall_any, clp, &nfsd4_cb_recall_any_ops,
>>> +			NFSPROC4_CLNT_CB_RECALL_ANY);
>>> 	return clp;
>>> }
>>> 
>>> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
>>> index e2daef3cc003..49ca06169642 100644
>>> --- a/fs/nfsd/state.h
>>> +++ b/fs/nfsd/state.h
>>> @@ -411,6 +411,10 @@ struct nfs4_client {
>>> 
>>> 	unsigned int		cl_state;
>>> 	atomic_t		cl_delegs_in_recall;
>>> +
>>> +	bool			cl_recall_any_busy;
>> Rather than adding a boolean field, you could add a bit to
>> cl_flags.
> 
> fix in v4.
> 
>> 
>> I'm not convinced you need to add the argument fields here...
>> I think kmalloc'ing the arguments and then freeing them in
>> nfsd4_cb_recall_any_release() would be sufficient.
> 
> Since cb_recall_any is sent when we're running low on system memory,
> I'm trying not to use kmalloc'ing to avoid potential deadlock or adding
> more stress to the system.

Fair. Well, again, there are potentially other use cases that
aren't concerned about memory pressure.

Construct something like this (in fs/nfsd/xdr4.h):

struct nfsd4_cb_recall_any {
	struct nfsd4_callback	ra_cb;
	u32			ra_keep;
	u32			ra_bmval[1];
};

Then either add it to another long-term structure like nfs4_client
or the upper layer can kmalloc it on demand. That way the XDR code
supports both ways and does not have to care which the upper layer
wants to use.


> Thanks!
> -Dai
> 
>> 
>> 
>>> +	uint32_t		cl_recall_any_bm;
>>> +	struct nfsd4_callback	cl_recall_any;
>>> };
>>> 
>>> /* struct nfs4_client_reset
>>> @@ -639,8 +643,12 @@ enum nfsd4_cb_op {
>>> 	NFSPROC4_CLNT_CB_OFFLOAD,
>>> 	NFSPROC4_CLNT_CB_SEQUENCE,
>>> 	NFSPROC4_CLNT_CB_NOTIFY_LOCK,
>>> +	NFSPROC4_CLNT_CB_RECALL_ANY,
>>> };
>>> 
>>> +#define RCA4_TYPE_MASK_RDATA_DLG	0
>>> +#define RCA4_TYPE_MASK_WDATA_DLG	1
>>> +
>>> /* Returns true iff a is later than b: */
>>> static inline bool nfsd4_stateid_generation_after(stateid_t *a, stateid_t *b)
>>> {
>>> diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h
>>> index 547cf07cf4e0..0d39af1b00a0 100644
>>> --- a/fs/nfsd/xdr4cb.h
>>> +++ b/fs/nfsd/xdr4cb.h
>>> @@ -48,3 +48,9 @@
>>> #define NFS4_dec_cb_offload_sz		(cb_compound_dec_hdr_sz  +      \
>>> 					cb_sequence_dec_sz +            \
>>> 					op_dec_sz)
>>> +#define NFS4_enc_cb_recall_any_sz	(cb_compound_enc_hdr_sz +       \
>>> +					cb_sequence_enc_sz +            \
>>> +					1 + 1 + 1)
>>> +#define NFS4_dec_cb_recall_any_sz	(cb_compound_dec_hdr_sz  +      \
>>> +					cb_sequence_dec_sz +            \
>>> +					op_dec_sz)
>>> -- 
>>> 2.9.5
>>> 
>> --
>> Chuck Lever

--
Chuck Lever




^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2022-10-30 17:12 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-10-28  2:16 [PATCH v3 0/2] add support for CB_RECALL_ANY and the delegation shrinker Dai Ngo
2022-10-28  2:16 ` [PATCH v3 1/2] NFSD: add support for sending CB_RECALL_ANY Dai Ngo
2022-10-28 14:11   ` Chuck Lever III
2022-10-30 16:16     ` dai.ngo
2022-10-30 17:12       ` Chuck Lever III
2022-10-28  2:16 ` [PATCH v3 2/2] NFSD: add delegation shrinker to react to low memory condition Dai Ngo

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.