All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v5 0/2] NFSD: delay unmount source's export after inter-server copy completed.
@ 2021-05-17 22:43 Dai Ngo
  2021-05-17 22:43 ` [PATCH v5 1/2] " Dai Ngo
  2021-05-17 22:43 ` [PATCH v5 2/2] NFSv4.2: remove restriction of copy size for inter-server copy Dai Ngo
  0 siblings, 2 replies; 15+ messages in thread
From: Dai Ngo @ 2021-05-17 22:43 UTC (permalink / raw)
  To: olga.kornievskaia, bfields; +Cc: linux-nfs, trondmy, chuck.lever

Hi Olga, Bruce,

Currently the source's export is mounted and unmounted on every
inter-server copy operation. This causes unnecessary overhead
for each copy.

This patch series is an enhancement to allow the export to remain
mounted for a configurable period (default to 15 minutes). If the 
export is not being used for the configured time it will be unmounted
by a delayed task. If it's used again then its expiration time is
extended for another period.

Since mount and unmount are no longer done on every copy request,
the restriction of copy size (14*rsize), in __nfs4_copy_file_range,
is removed.

-Dai

v2: fix compiler warning of missing prototype.
v3: remove the used of semaphore.
    eliminated all RPC calls for subsequence mount by allowing
       all exports from one server to share one vfsmount.
    make inter-server threshold a module configuration parameter.
v4: convert nsui_refcnt to use refcount_t.
    add note about 20secs wait in nfsd4_interssc_connect.
    removed (14*rsize) restriction from __nfs4_copy_file_range.
v5: make use of the laundomat thread to service delayed unmount list.
    destroy delayed unmount list when nfsd is shutdown.
    make delayed unmount list per nfsd_net to support container.



^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH v5 1/2] NFSD: delay unmount source's export after inter-server copy completed.
  2021-05-17 22:43 [PATCH v5 0/2] NFSD: delay unmount source's export after inter-server copy completed Dai Ngo
@ 2021-05-17 22:43 ` Dai Ngo
  2021-05-18 16:28   ` Olga Kornievskaia
                     ` (2 more replies)
  2021-05-17 22:43 ` [PATCH v5 2/2] NFSv4.2: remove restriction of copy size for inter-server copy Dai Ngo
  1 sibling, 3 replies; 15+ messages in thread
From: Dai Ngo @ 2021-05-17 22:43 UTC (permalink / raw)
  To: olga.kornievskaia, bfields; +Cc: linux-nfs, trondmy, chuck.lever

Currently the source's export is mounted and unmounted on every
inter-server copy operation. This patch is an enhancement to delay
the unmount of the source export for a certain period of time to
eliminate the mount and unmount overhead on subsequent copy operations.

After a copy operation completes, a delayed task is scheduled to
unmount the export after a configurable idle time. Each time the
export is being used again, its expire time is extended to allow
the export to remain mounted.

The unmount task and the mount operation of the copy request are
synced to make sure the export is not unmounted while it's being
used.

Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
---
 fs/nfsd/netns.h         |   5 ++
 fs/nfsd/nfs4proc.c      | 216 +++++++++++++++++++++++++++++++++++++++++++++++-
 fs/nfsd/nfs4state.c     |   8 ++
 fs/nfsd/nfsd.h          |   6 ++
 fs/nfsd/nfssvc.c        |   3 +
 include/linux/nfs_ssc.h |  16 ++++
 6 files changed, 250 insertions(+), 4 deletions(-)

diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index c330f5bd0cf3..6018e5050cb4 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -21,6 +21,7 @@
 
 struct cld_net;
 struct nfsd4_client_tracking_ops;
+struct nfsd4_ssc_umount;
 
 enum {
 	/* cache misses due only to checksum comparison failures */
@@ -176,6 +177,10 @@ struct nfsd_net {
 	unsigned int             longest_chain_cachesize;
 
 	struct shrinker		nfsd_reply_cache_shrinker;
+
+	spinlock_t              nfsd_ssc_lock;
+	struct nfsd4_ssc_umount	*nfsd_ssc_umount;
+
 	/* utsname taken from the process that starts the server */
 	char			nfsd_name[UNX_MAXNODENAME+1];
 };
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index dd9f38d072dd..892ad72d87ae 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -55,6 +55,99 @@ module_param(inter_copy_offload_enable, bool, 0644);
 MODULE_PARM_DESC(inter_copy_offload_enable,
 		 "Enable inter server to server copy offload. Default: false");
 
+#ifdef CONFIG_NFSD_V4_2_INTER_SSC
+static int nfsd4_ssc_umount_timeout = 900000;		/* default to 15 mins */
+module_param(nfsd4_ssc_umount_timeout, int, 0644);
+MODULE_PARM_DESC(nfsd4_ssc_umount_timeout,
+		"idle msecs before unmount export from source server");
+
+void nfsd4_ssc_expire_umount(struct nfsd_net *nn)
+{
+	bool do_wakeup = false;
+	struct nfsd4_ssc_umount_item *ni = 0;
+	struct nfsd4_ssc_umount_item *tmp;
+	struct nfsd4_ssc_umount *nu;
+
+	spin_lock(&nn->nfsd_ssc_lock);
+	if (!nn->nfsd_ssc_umount) {
+		spin_unlock(&nn->nfsd_ssc_lock);
+		return;
+	}
+	nu = nn->nfsd_ssc_umount;
+	list_for_each_entry_safe(ni, tmp, &nu->nsu_list, nsui_list) {
+		if (time_after(jiffies, ni->nsui_expire)) {
+			if (refcount_read(&ni->nsui_refcnt) > 0)
+				continue;
+
+			/* mark being unmount */
+			ni->nsui_busy = true;
+			spin_unlock(&nn->nfsd_ssc_lock);
+			mntput(ni->nsui_vfsmount);
+			spin_lock(&nn->nfsd_ssc_lock);
+
+			/* waiters need to start from begin of list */
+			list_del(&ni->nsui_list);
+			kfree(ni);
+
+			/* wakeup ssc_connect waiters */
+			do_wakeup = true;
+			continue;
+		}
+		break;
+	}
+	if (!list_empty(&nu->nsu_list)) {
+		ni = list_first_entry(&nu->nsu_list,
+			struct nfsd4_ssc_umount_item, nsui_list);
+		nu->nsu_expire = ni->nsui_expire;
+	} else
+		nu->nsu_expire = 0;
+
+	if (do_wakeup)
+		wake_up_all(&nu->nsu_waitq);
+	spin_unlock(&nn->nfsd_ssc_lock);
+}
+
+/*
+ * This is called when nfsd is being shutdown, after all inter_ssc
+ * cleanup were done, to destroy the ssc delayed unmount list.
+ */
+void nfsd4_ssc_shutdown_umount(struct nfsd_net *nn)
+{
+	struct nfsd4_ssc_umount_item *ni = 0;
+	struct nfsd4_ssc_umount_item *tmp;
+	struct nfsd4_ssc_umount *nu;
+
+	spin_lock(&nn->nfsd_ssc_lock);
+	if (!nn->nfsd_ssc_umount) {
+		spin_unlock(&nn->nfsd_ssc_lock);
+		return;
+	}
+	nu = nn->nfsd_ssc_umount;
+	nn->nfsd_ssc_umount = 0;
+	list_for_each_entry_safe(ni, tmp, &nu->nsu_list, nsui_list) {
+		list_del(&ni->nsui_list);
+		spin_unlock(&nn->nfsd_ssc_lock);
+		mntput(ni->nsui_vfsmount);
+		kfree(ni);
+		spin_lock(&nn->nfsd_ssc_lock);
+	}
+	spin_unlock(&nn->nfsd_ssc_lock);
+	kfree(nu);
+}
+
+void nfsd4_ssc_init_umount_work(struct nfsd_net *nn)
+{
+	nn->nfsd_ssc_umount = kzalloc(sizeof(struct nfsd4_ssc_umount),
+					GFP_KERNEL);
+	if (!nn->nfsd_ssc_umount)
+		return;
+	spin_lock_init(&nn->nfsd_ssc_lock);
+	INIT_LIST_HEAD(&nn->nfsd_ssc_umount->nsu_list);
+	init_waitqueue_head(&nn->nfsd_ssc_umount->nsu_waitq);
+}
+EXPORT_SYMBOL_GPL(nfsd4_ssc_init_umount_work);
+#endif
+
 #ifdef CONFIG_NFSD_V4_SECURITY_LABEL
 #include <linux/security.h>
 
@@ -1181,6 +1274,12 @@ nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp,
 	char *ipaddr, *dev_name, *raw_data;
 	int len, raw_len;
 	__be32 status = nfserr_inval;
+	struct nfsd4_ssc_umount_item *ni = 0;
+	struct nfsd4_ssc_umount_item *work = NULL;
+	struct nfsd4_ssc_umount_item *tmp;
+	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+	struct nfsd4_ssc_umount *nu;
+	DEFINE_WAIT(wait);
 
 	naddr = &nss->u.nl4_addr;
 	tmp_addrlen = rpc_uaddr2sockaddr(SVC_NET(rqstp), naddr->addr,
@@ -1229,12 +1328,76 @@ nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp,
 		goto out_free_rawdata;
 	snprintf(dev_name, len + 5, "%s%s%s:/", startsep, ipaddr, endsep);
 
+	work = kzalloc(sizeof(*work), GFP_KERNEL);
+try_again:
+	spin_lock(&nn->nfsd_ssc_lock);
+	if (!nn->nfsd_ssc_umount) {
+		spin_unlock(&nn->nfsd_ssc_lock);
+		kfree(work);
+		work = NULL;
+		goto skip_dul;
+	}
+	nu = nn->nfsd_ssc_umount;
+	list_for_each_entry_safe(ni, tmp, &nu->nsu_list, nsui_list) {
+		if (strncmp(ni->nsui_ipaddr, ipaddr, sizeof(ni->nsui_ipaddr)))
+			continue;
+		/* found a match */
+		if (ni->nsui_busy) {
+			/*  wait - and try again */
+			prepare_to_wait(&nu->nsu_waitq, &wait,
+				TASK_INTERRUPTIBLE);
+			spin_unlock(&nn->nfsd_ssc_lock);
+
+			/* allow 20secs for mount/unmount for now - revisit */
+			if (signal_pending(current) ||
+					(schedule_timeout(20*HZ) == 0)) {
+				status = nfserr_eagain;
+				kfree(work);
+				goto out_free_devname;
+			}
+			finish_wait(&nu->nsu_waitq, &wait);
+			goto try_again;
+		}
+		ss_mnt = ni->nsui_vfsmount;
+		if (refcount_read(&ni->nsui_refcnt) == 0)
+			refcount_set(&ni->nsui_refcnt, 1);
+		else
+			refcount_inc(&ni->nsui_refcnt);
+		spin_unlock(&nn->nfsd_ssc_lock);
+		kfree(work);
+		goto out_done;
+	}
+	/* create new entry, set busy, insert list, clear busy after mount */
+	if (work) {
+		strncpy(work->nsui_ipaddr, ipaddr, sizeof(work->nsui_ipaddr));
+		refcount_set(&work->nsui_refcnt, 1);
+		work->nsui_busy = true;
+		list_add_tail(&work->nsui_list, &nu->nsu_list);
+	}
+	spin_unlock(&nn->nfsd_ssc_lock);
+skip_dul:
+
 	/* Use an 'internal' mount: SB_KERNMOUNT -> MNT_INTERNAL */
 	ss_mnt = vfs_kern_mount(type, SB_KERNMOUNT, dev_name, raw_data);
 	module_put(type->owner);
-	if (IS_ERR(ss_mnt))
+	if (IS_ERR(ss_mnt)) {
+		if (work) {
+			spin_lock(&nn->nfsd_ssc_lock);
+			list_del(&work->nsui_list);
+			wake_up_all(&nu->nsu_waitq);
+			spin_unlock(&nn->nfsd_ssc_lock);
+			kfree(work);
+		}
 		goto out_free_devname;
-
+	}
+	if (work) {
+		spin_lock(&nn->nfsd_ssc_lock);
+		work->nsui_vfsmount = ss_mnt;
+		work->nsui_busy = false;
+		wake_up_all(&nu->nsu_waitq);
+		spin_unlock(&nn->nfsd_ssc_lock);
+	}
+out_done:
 	status = 0;
 	*mount = ss_mnt;
 
@@ -1301,10 +1464,55 @@ static void
 nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct nfsd_file *src,
 			struct nfsd_file *dst)
 {
+	bool found = false;
+	bool resched = false;
+	long timeout;
+	struct nfsd4_ssc_umount_item *tmp;
+	struct nfsd4_ssc_umount_item *ni = 0;
+	struct nfsd4_ssc_umount *nu;
+	struct nfsd_net *nn = net_generic(dst->nf_net, nfsd_net_id);
+
 	nfs42_ssc_close(src->nf_file);
-	fput(src->nf_file);
 	nfsd_file_put(dst);
-	mntput(ss_mnt);
+	fput(src->nf_file);
+
+	if (!nn) {
+		mntput(ss_mnt);
+		return;
+	}
+	spin_lock(&nn->nfsd_ssc_lock);
+	if (!nn->nfsd_ssc_umount) {
+		/* delayed unmount list not setup */
+		spin_unlock(&nn->nfsd_ssc_lock);
+		mntput(ss_mnt);
+		return;
+	}
+	nu = nn->nfsd_ssc_umount;
+	timeout = msecs_to_jiffies(nfsd4_ssc_umount_timeout);
+	list_for_each_entry_safe(ni, tmp, &nu->nsu_list, nsui_list) {
+		if (ni->nsui_vfsmount->mnt_sb == ss_mnt->mnt_sb) {
+			list_del(&ni->nsui_list);
+			/*
+			 * vfsmount can be shared by multiple exports,
+			 * decrement refcnt and schedule delayed task
+			 * if it drops to 0.
+			 */
+			if (refcount_dec_and_test(&ni->nsui_refcnt))
+				resched = true;
+			ni->nsui_expire = jiffies + timeout;
+			list_add_tail(&ni->nsui_list, &nu->nsu_list);
+			found = true;
+			break;
+		}
+	}
+	if (!found) {
+		spin_unlock(&nn->nfsd_ssc_lock);
+		mntput(ss_mnt);
+		return;
+	}
+	if (resched && !nu->nsu_expire)
+		nu->nsu_expire = ni->nsui_expire;
+	spin_unlock(&nn->nfsd_ssc_lock);
 }
 
 #else /* CONFIG_NFSD_V4_2_INTER_SSC */
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 97447a64bad0..0cdc898f06c9 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -5459,6 +5459,11 @@ nfs4_laundromat(struct nfsd_net *nn)
 		list_del_init(&nbl->nbl_lru);
 		free_blocked_lock(nbl);
 	}
+
+#ifdef CONFIG_NFSD_V4_2_INTER_SSC
+	/* service the inter-copy delayed unmount list */
+	nfsd4_ssc_expire_umount(nn);
+#endif
 out:
 	new_timeo = max_t(time64_t, new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT);
 	return new_timeo;
@@ -7398,6 +7403,9 @@ nfs4_state_shutdown_net(struct net *net)
 
 	nfsd4_client_tracking_exit(net);
 	nfs4_state_destroy_net(net);
+#ifdef CONFIG_NFSD_V4_2_INTER_SSC
+	nfsd4_ssc_shutdown_umount(nn);
+#endif
 	mntput(nn->nfsd_mnt);
 }
 
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 8bdc37aa2c2e..cf86d9010974 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -483,6 +483,12 @@ static inline bool nfsd_attrs_supported(u32 minorversion, const u32 *bmval)
 extern int nfsd4_is_junction(struct dentry *dentry);
 extern int register_cld_notifier(void);
 extern void unregister_cld_notifier(void);
+#ifdef CONFIG_NFSD_V4_2_INTER_SSC
+extern void nfsd4_ssc_init_umount_work(struct nfsd_net *nn);
+extern void nfsd4_ssc_expire_umount(struct nfsd_net *nn);
+extern void nfsd4_ssc_shutdown_umount(struct nfsd_net *nn);
+#endif
+
 #else /* CONFIG_NFSD_V4 */
 static inline int nfsd4_is_junction(struct dentry *dentry)
 {
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 6de406322106..ce89a8fe07ff 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -403,6 +403,9 @@ static int nfsd_startup_net(int nrservs, struct net *net, const struct cred *cre
 	if (ret)
 		goto out_filecache;
 
+#ifdef CONFIG_NFSD_V4_2_INTER_SSC
+	nfsd4_ssc_init_umount_work(nn);
+#endif
 	nn->nfsd_net_up = true;
 	return 0;
 
diff --git a/include/linux/nfs_ssc.h b/include/linux/nfs_ssc.h
index f5ba0fbff72f..18afe62988b5 100644
--- a/include/linux/nfs_ssc.h
+++ b/include/linux/nfs_ssc.h
@@ -8,6 +8,7 @@
  */
 
 #include <linux/nfs_fs.h>
+#include <linux/sunrpc/svc.h>
 
 extern struct nfs_ssc_client_ops_tbl nfs_ssc_client_tbl;
 
@@ -52,6 +53,21 @@ static inline void nfs42_ssc_close(struct file *filep)
 	if (nfs_ssc_client_tbl.ssc_nfs4_ops)
 		(*nfs_ssc_client_tbl.ssc_nfs4_ops->sco_close)(filep);
 }
+
+struct nfsd4_ssc_umount_item {
+	struct list_head nsui_list;
+	bool nsui_busy;
+	refcount_t nsui_refcnt;
+	unsigned long nsui_expire;
+	struct vfsmount *nsui_vfsmount;
+	char nsui_ipaddr[RPC_MAX_ADDRBUFLEN];
+};
+
+struct nfsd4_ssc_umount {
+	struct list_head nsu_list;
+	unsigned long nsu_expire;
+	wait_queue_head_t nsu_waitq;
+};
 #endif
 
 /*
-- 
2.9.5


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH v5 2/2] NFSv4.2: remove restriction of copy size for inter-server copy.
  2021-05-17 22:43 [PATCH v5 0/2] NFSD: delay unmount source's export after inter-server copy completed Dai Ngo
  2021-05-17 22:43 ` [PATCH v5 1/2] " Dai Ngo
@ 2021-05-17 22:43 ` Dai Ngo
  1 sibling, 0 replies; 15+ messages in thread
From: Dai Ngo @ 2021-05-17 22:43 UTC (permalink / raw)
  To: olga.kornievskaia, bfields; +Cc: linux-nfs, trondmy, chuck.lever

Currently inter-server copy is allowed only if the copy size is larger
than (rsize*14) which is the over-head of the mount operation of the
source export. This patch, relying on the delayed unmount feature,
removes this restriction since the mount and unmount overhead is now
not applicable for every inter-server copy.

Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
---
 fs/nfs/nfs4file.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 441a2fa073c8..b5821ed46994 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -158,13 +158,11 @@ static ssize_t __nfs4_copy_file_range(struct file *file_in, loff_t pos_in,
 		sync = true;
 retry:
 	if (!nfs42_files_from_same_server(file_in, file_out)) {
-		/* for inter copy, if copy size if smaller than 12 RPC
-		 * payloads, fallback to traditional copy. There are
-		 * 14 RPCs during an NFSv4.x mount between source/dest
-		 * servers.
+		/*
+		 * for inter copy, if copy size is too small
+		 * then fallback to generic copy.
 		 */
-		if (sync ||
-			count <= 14 * NFS_SERVER(file_inode(file_in))->rsize)
+		if (sync)
 			return -EOPNOTSUPP;
 		cn_resp = kzalloc(sizeof(struct nfs42_copy_notify_res),
 				GFP_NOFS);
-- 
2.9.5


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH v5 1/2] NFSD: delay unmount source's export after inter-server copy completed.
  2021-05-17 22:43 ` [PATCH v5 1/2] " Dai Ngo
@ 2021-05-18 16:28   ` Olga Kornievskaia
  2021-05-18 16:30     ` dai.ngo
  2021-05-18 17:04   ` J. Bruce Fields
  2021-05-18 17:16   ` Olga Kornievskaia
  2 siblings, 1 reply; 15+ messages in thread
From: Olga Kornievskaia @ 2021-05-18 16:28 UTC (permalink / raw)
  To: Dai Ngo; +Cc: J. Bruce Fields, linux-nfs, Trond Myklebust, Chuck Lever

On Mon, May 17, 2021 at 6:43 PM Dai Ngo <dai.ngo@oracle.com> wrote:
>
> Currently the source's export is mounted and unmounted on every
> inter-server copy operation. This patch is an enhancement to delay
> the unmount of the source export for a certain period of time to
> eliminate the mount and unmount overhead on subsequent copy operations.
>
> After a copy operation completes, a delayed task is scheduled to
> unmount the export after a configurable idle time. Each time the
> export is being used again, its expire time is extended to allow
> the export to remain mounted.
>
> The unmount task and the mount operation of the copy request are
> synced to make sure the export is not unmounted while it's being
> used.

Can you tell me what this should apply on top of? It doesn't apply to
5.13-rc2. I know Chuck posted a lot of nfsd patches which I don't
have, is your patch on top of that?

>
> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
> ---
>  fs/nfsd/netns.h         |   5 ++
>  fs/nfsd/nfs4proc.c      | 216 +++++++++++++++++++++++++++++++++++++++++++++++-
>  fs/nfsd/nfs4state.c     |   8 ++
>  fs/nfsd/nfsd.h          |   6 ++
>  fs/nfsd/nfssvc.c        |   3 +
>  include/linux/nfs_ssc.h |  16 ++++
>  6 files changed, 250 insertions(+), 4 deletions(-)
>
> diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
> index c330f5bd0cf3..6018e5050cb4 100644
> --- a/fs/nfsd/netns.h
> +++ b/fs/nfsd/netns.h
> @@ -21,6 +21,7 @@
>
>  struct cld_net;
>  struct nfsd4_client_tracking_ops;
> +struct nfsd4_ssc_umount;
>
>  enum {
>         /* cache misses due only to checksum comparison failures */
> @@ -176,6 +177,10 @@ struct nfsd_net {
>         unsigned int             longest_chain_cachesize;
>
>         struct shrinker         nfsd_reply_cache_shrinker;
> +
> +       spinlock_t              nfsd_ssc_lock;
> +       struct nfsd4_ssc_umount *nfsd_ssc_umount;
> +
>         /* utsname taken from the process that starts the server */
>         char                    nfsd_name[UNX_MAXNODENAME+1];
>  };
> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> index dd9f38d072dd..892ad72d87ae 100644
> --- a/fs/nfsd/nfs4proc.c
> +++ b/fs/nfsd/nfs4proc.c
> @@ -55,6 +55,99 @@ module_param(inter_copy_offload_enable, bool, 0644);
>  MODULE_PARM_DESC(inter_copy_offload_enable,
>                  "Enable inter server to server copy offload. Default: false");
>
> +#ifdef CONFIG_NFSD_V4_2_INTER_SSC
> +static int nfsd4_ssc_umount_timeout = 900000;          /* default to 15 mins */
> +module_param(nfsd4_ssc_umount_timeout, int, 0644);
> +MODULE_PARM_DESC(nfsd4_ssc_umount_timeout,
> +               "idle msecs before unmount export from source server");
> +
> +void nfsd4_ssc_expire_umount(struct nfsd_net *nn)
> +{
> +       bool do_wakeup = false;
> +       struct nfsd4_ssc_umount_item *ni = 0;
> +       struct nfsd4_ssc_umount_item *tmp;
> +       struct nfsd4_ssc_umount *nu;
> +
> +       spin_lock(&nn->nfsd_ssc_lock);
> +       if (!nn->nfsd_ssc_umount) {
> +               spin_unlock(&nn->nfsd_ssc_lock);
> +               return;
> +       }
> +       nu = nn->nfsd_ssc_umount;
> +       list_for_each_entry_safe(ni, tmp, &nu->nsu_list, nsui_list) {
> +               if (time_after(jiffies, ni->nsui_expire)) {
> +                       if (refcount_read(&ni->nsui_refcnt) > 0)
> +                               continue;
> +
> +                       /* mark being unmount */
> +                       ni->nsui_busy = true;
> +                       spin_unlock(&nn->nfsd_ssc_lock);
> +                       mntput(ni->nsui_vfsmount);
> +                       spin_lock(&nn->nfsd_ssc_lock);
> +
> +                       /* waiters need to start from begin of list */
> +                       list_del(&ni->nsui_list);
> +                       kfree(ni);
> +
> +                       /* wakeup ssc_connect waiters */
> +                       do_wakeup = true;
> +                       continue;
> +               }
> +               break;
> +       }
> +       if (!list_empty(&nu->nsu_list)) {
> +               ni = list_first_entry(&nu->nsu_list,
> +                       struct nfsd4_ssc_umount_item, nsui_list);
> +               nu->nsu_expire = ni->nsui_expire;
> +       } else
> +               nu->nsu_expire = 0;
> +
> +       if (do_wakeup)
> +               wake_up_all(&nu->nsu_waitq);
> +       spin_unlock(&nn->nfsd_ssc_lock);
> +}
> +
> +/*
> + * This is called when nfsd is being shutdown, after all inter_ssc
> + * cleanup were done, to destroy the ssc delayed unmount list.
> + */
> +void nfsd4_ssc_shutdown_umount(struct nfsd_net *nn)
> +{
> +       struct nfsd4_ssc_umount_item *ni = 0;
> +       struct nfsd4_ssc_umount_item *tmp;
> +       struct nfsd4_ssc_umount *nu;
> +
> +       spin_lock(&nn->nfsd_ssc_lock);
> +       if (!nn->nfsd_ssc_umount) {
> +               spin_unlock(&nn->nfsd_ssc_lock);
> +               return;
> +       }
> +       nu = nn->nfsd_ssc_umount;
> +       nn->nfsd_ssc_umount = 0;
> +       list_for_each_entry_safe(ni, tmp, &nu->nsu_list, nsui_list) {
> +               list_del(&ni->nsui_list);
> +               spin_unlock(&nn->nfsd_ssc_lock);
> +               mntput(ni->nsui_vfsmount);
> +               kfree(ni);
> +               spin_lock(&nn->nfsd_ssc_lock);
> +       }
> +       spin_unlock(&nn->nfsd_ssc_lock);
> +       kfree(nu);
> +}
> +
> +void nfsd4_ssc_init_umount_work(struct nfsd_net *nn)
> +{
> +       nn->nfsd_ssc_umount = kzalloc(sizeof(struct nfsd4_ssc_umount),
> +                                       GFP_KERNEL);
> +       if (!nn->nfsd_ssc_umount)
> +               return;
> +       spin_lock_init(&nn->nfsd_ssc_lock);
> +       INIT_LIST_HEAD(&nn->nfsd_ssc_umount->nsu_list);
> +       init_waitqueue_head(&nn->nfsd_ssc_umount->nsu_waitq);
> +}
> +EXPORT_SYMBOL_GPL(nfsd4_ssc_init_umount_work);
> +#endif
> +
>  #ifdef CONFIG_NFSD_V4_SECURITY_LABEL
>  #include <linux/security.h>
>
> @@ -1181,6 +1274,12 @@ nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp,
>         char *ipaddr, *dev_name, *raw_data;
>         int len, raw_len;
>         __be32 status = nfserr_inval;
> +       struct nfsd4_ssc_umount_item *ni = 0;
> +       struct nfsd4_ssc_umount_item *work = NULL;
> +       struct nfsd4_ssc_umount_item *tmp;
> +       struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
> +       struct nfsd4_ssc_umount *nu;
> +       DEFINE_WAIT(wait);
>
>         naddr = &nss->u.nl4_addr;
>         tmp_addrlen = rpc_uaddr2sockaddr(SVC_NET(rqstp), naddr->addr,
> @@ -1229,12 +1328,76 @@ nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp,
>                 goto out_free_rawdata;
>         snprintf(dev_name, len + 5, "%s%s%s:/", startsep, ipaddr, endsep);
>
> +       work = kzalloc(sizeof(*work), GFP_KERNEL);
> +try_again:
> +       spin_lock(&nn->nfsd_ssc_lock);
> +       if (!nn->nfsd_ssc_umount) {
> +               spin_unlock(&nn->nfsd_ssc_lock);
> +               kfree(work);
> +               work = NULL;
> +               goto skip_dul;
> +       }
> +       nu = nn->nfsd_ssc_umount;
> +       list_for_each_entry_safe(ni, tmp, &nu->nsu_list, nsui_list) {
> +               if (strncmp(ni->nsui_ipaddr, ipaddr, sizeof(ni->nsui_ipaddr)))
> +                       continue;
> +               /* found a match */
> +               if (ni->nsui_busy) {
> +                       /*  wait - and try again */
> +                       prepare_to_wait(&nu->nsu_waitq, &wait,
> +                               TASK_INTERRUPTIBLE);
> +                       spin_unlock(&nn->nfsd_ssc_lock);
> +
> +                       /* allow 20secs for mount/unmount for now - revisit */
> +                       if (signal_pending(current) ||
> +                                       (schedule_timeout(20*HZ) == 0)) {
> +                               status = nfserr_eagain;
> +                               kfree(work);
> +                               goto out_free_devname;
> +                       }
> +                       finish_wait(&nu->nsu_waitq, &wait);
> +                       goto try_again;
> +               }
> +               ss_mnt = ni->nsui_vfsmount;
> +               if (refcount_read(&ni->nsui_refcnt) == 0)
> +                       refcount_set(&ni->nsui_refcnt, 1);
> +               else
> +                       refcount_inc(&ni->nsui_refcnt);
> +               spin_unlock(&nn->nfsd_ssc_lock);
> +               kfree(work);
> +               goto out_done;
> +       }
> +       /* create new entry, set busy, insert list, clear busy after mount */
> +       if (work) {
> +               strncpy(work->nsui_ipaddr, ipaddr, sizeof(work->nsui_ipaddr));
> +               refcount_set(&work->nsui_refcnt, 1);
> +               work->nsui_busy = true;
> +               list_add_tail(&work->nsui_list, &nu->nsu_list);
> +       }
> +       spin_unlock(&nn->nfsd_ssc_lock);
> +skip_dul:
> +
>         /* Use an 'internal' mount: SB_KERNMOUNT -> MNT_INTERNAL */
>         ss_mnt = vfs_kern_mount(type, SB_KERNMOUNT, dev_name, raw_data);
>         module_put(type->owner);
> -       if (IS_ERR(ss_mnt))
> +       if (IS_ERR(ss_mnt)) {
> +               if (work) {
> +                       spin_lock(&nn->nfsd_ssc_lock);
> +                       list_del(&work->nsui_list);
> +                       wake_up_all(&nu->nsu_waitq);
> +                       spin_unlock(&nn->nfsd_ssc_lock);
> +                       kfree(work);
> +               }
>                 goto out_free_devname;
> -
> +       }
> +       if (work) {
> +               spin_lock(&nn->nfsd_ssc_lock);
> +               work->nsui_vfsmount = ss_mnt;
> +               work->nsui_busy = false;
> +               wake_up_all(&nu->nsu_waitq);
> +               spin_unlock(&nn->nfsd_ssc_lock);
> +       }
> +out_done:
>         status = 0;
>         *mount = ss_mnt;
>
> @@ -1301,10 +1464,55 @@ static void
>  nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct nfsd_file *src,
>                         struct nfsd_file *dst)
>  {
> +       bool found = false;
> +       bool resched = false;
> +       long timeout;
> +       struct nfsd4_ssc_umount_item *tmp;
> +       struct nfsd4_ssc_umount_item *ni = 0;
> +       struct nfsd4_ssc_umount *nu;
> +       struct nfsd_net *nn = net_generic(dst->nf_net, nfsd_net_id);
> +
>         nfs42_ssc_close(src->nf_file);
> -       fput(src->nf_file);
>         nfsd_file_put(dst);
> -       mntput(ss_mnt);
> +       fput(src->nf_file);
> +
> +       if (!nn) {
> +               mntput(ss_mnt);
> +               return;
> +       }
> +       spin_lock(&nn->nfsd_ssc_lock);
> +       if (!nn->nfsd_ssc_umount) {
> +               /* delayed unmount list not setup */
> +               spin_unlock(&nn->nfsd_ssc_lock);
> +               mntput(ss_mnt);
> +               return;
> +       }
> +       nu = nn->nfsd_ssc_umount;
> +       timeout = msecs_to_jiffies(nfsd4_ssc_umount_timeout);
> +       list_for_each_entry_safe(ni, tmp, &nu->nsu_list, nsui_list) {
> +               if (ni->nsui_vfsmount->mnt_sb == ss_mnt->mnt_sb) {
> +                       list_del(&ni->nsui_list);
> +                       /*
> +                        * vfsmount can be shared by multiple exports,
> +                        * decrement refcnt and schedule delayed task
> +                        * if it drops to 0.
> +                        */
> +                       if (refcount_dec_and_test(&ni->nsui_refcnt))
> +                               resched = true;
> +                       ni->nsui_expire = jiffies + timeout;
> +                       list_add_tail(&ni->nsui_list, &nu->nsu_list);
> +                       found = true;
> +                       break;
> +               }
> +       }
> +       if (!found) {
> +               spin_unlock(&nn->nfsd_ssc_lock);
> +               mntput(ss_mnt);
> +               return;
> +       }
> +       if (resched && !nu->nsu_expire)
> +               nu->nsu_expire = ni->nsui_expire;
> +       spin_unlock(&nn->nfsd_ssc_lock);
>  }
>
>  #else /* CONFIG_NFSD_V4_2_INTER_SSC */
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 97447a64bad0..0cdc898f06c9 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -5459,6 +5459,11 @@ nfs4_laundromat(struct nfsd_net *nn)
>                 list_del_init(&nbl->nbl_lru);
>                 free_blocked_lock(nbl);
>         }
> +
> +#ifdef CONFIG_NFSD_V4_2_INTER_SSC
> +       /* service the inter-copy delayed unmount list */
> +       nfsd4_ssc_expire_umount(nn);
> +#endif
>  out:
>         new_timeo = max_t(time64_t, new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT);
>         return new_timeo;
> @@ -7398,6 +7403,9 @@ nfs4_state_shutdown_net(struct net *net)
>
>         nfsd4_client_tracking_exit(net);
>         nfs4_state_destroy_net(net);
> +#ifdef CONFIG_NFSD_V4_2_INTER_SSC
> +       nfsd4_ssc_shutdown_umount(nn);
> +#endif
>         mntput(nn->nfsd_mnt);
>  }
>
> diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
> index 8bdc37aa2c2e..cf86d9010974 100644
> --- a/fs/nfsd/nfsd.h
> +++ b/fs/nfsd/nfsd.h
> @@ -483,6 +483,12 @@ static inline bool nfsd_attrs_supported(u32 minorversion, const u32 *bmval)
>  extern int nfsd4_is_junction(struct dentry *dentry);
>  extern int register_cld_notifier(void);
>  extern void unregister_cld_notifier(void);
> +#ifdef CONFIG_NFSD_V4_2_INTER_SSC
> +extern void nfsd4_ssc_init_umount_work(struct nfsd_net *nn);
> +extern void nfsd4_ssc_expire_umount(struct nfsd_net *nn);
> +extern void nfsd4_ssc_shutdown_umount(struct nfsd_net *nn);
> +#endif
> +
>  #else /* CONFIG_NFSD_V4 */
>  static inline int nfsd4_is_junction(struct dentry *dentry)
>  {
> diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
> index 6de406322106..ce89a8fe07ff 100644
> --- a/fs/nfsd/nfssvc.c
> +++ b/fs/nfsd/nfssvc.c
> @@ -403,6 +403,9 @@ static int nfsd_startup_net(int nrservs, struct net *net, const struct cred *cre
>         if (ret)
>                 goto out_filecache;
>
> +#ifdef CONFIG_NFSD_V4_2_INTER_SSC
> +       nfsd4_ssc_init_umount_work(nn);
> +#endif
>         nn->nfsd_net_up = true;
>         return 0;
>
> diff --git a/include/linux/nfs_ssc.h b/include/linux/nfs_ssc.h
> index f5ba0fbff72f..18afe62988b5 100644
> --- a/include/linux/nfs_ssc.h
> +++ b/include/linux/nfs_ssc.h
> @@ -8,6 +8,7 @@
>   */
>
>  #include <linux/nfs_fs.h>
> +#include <linux/sunrpc/svc.h>
>
>  extern struct nfs_ssc_client_ops_tbl nfs_ssc_client_tbl;
>
> @@ -52,6 +53,21 @@ static inline void nfs42_ssc_close(struct file *filep)
>         if (nfs_ssc_client_tbl.ssc_nfs4_ops)
>                 (*nfs_ssc_client_tbl.ssc_nfs4_ops->sco_close)(filep);
>  }
> +
> +struct nfsd4_ssc_umount_item {
> +       struct list_head nsui_list;
> +       bool nsui_busy;
> +       refcount_t nsui_refcnt;
> +       unsigned long nsui_expire;
> +       struct vfsmount *nsui_vfsmount;
> +       char nsui_ipaddr[RPC_MAX_ADDRBUFLEN];
> +};
> +
> +struct nfsd4_ssc_umount {
> +       struct list_head nsu_list;
> +       unsigned long nsu_expire;
> +       wait_queue_head_t nsu_waitq;
> +};
>  #endif
>
>  /*
> --
> 2.9.5
>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v5 1/2] NFSD: delay unmount source's export after inter-server copy completed.
  2021-05-18 16:28   ` Olga Kornievskaia
@ 2021-05-18 16:30     ` dai.ngo
  2021-05-18 16:40       ` Olga Kornievskaia
  0 siblings, 1 reply; 15+ messages in thread
From: dai.ngo @ 2021-05-18 16:30 UTC (permalink / raw)
  To: Olga Kornievskaia
  Cc: J. Bruce Fields, linux-nfs, Trond Myklebust, Chuck Lever


On 5/18/21 9:28 AM, Olga Kornievskaia wrote:
> On Mon, May 17, 2021 at 6:43 PM Dai Ngo <dai.ngo@oracle.com> wrote:
>> Currently the source's export is mounted and unmounted on every
>> inter-server copy operation. This patch is an enhancement to delay
>> the unmount of the source export for a certain period of time to
>> eliminate the mount and unmount overhead on subsequent copy operations.
>>
>> After a copy operation completes, a delayed task is scheduled to
>> unmount the export after a configurable idle time. Each time the
>> export is being used again, its expire time is extended to allow
>> the export to remain mounted.
>>
>> The unmount task and the mount operation of the copy request are
>> synced to make sure the export is not unmounted while it's being
>> used.
> Can you tell me what this should apply on top of? It doesn't apply to
> 5.13-rc2. I know Chuck posted a lot of nfsd patches which I don't
> have, is your patch on top of that?

I built it on top of 5.12-rc8.

-Dai

>
>> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
>> ---
>>   fs/nfsd/netns.h         |   5 ++
>>   fs/nfsd/nfs4proc.c      | 216 +++++++++++++++++++++++++++++++++++++++++++++++-
>>   fs/nfsd/nfs4state.c     |   8 ++
>>   fs/nfsd/nfsd.h          |   6 ++
>>   fs/nfsd/nfssvc.c        |   3 +
>>   include/linux/nfs_ssc.h |  16 ++++
>>   6 files changed, 250 insertions(+), 4 deletions(-)
>>
>> diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
>> index c330f5bd0cf3..6018e5050cb4 100644
>> --- a/fs/nfsd/netns.h
>> +++ b/fs/nfsd/netns.h
>> @@ -21,6 +21,7 @@
>>
>>   struct cld_net;
>>   struct nfsd4_client_tracking_ops;
>> +struct nfsd4_ssc_umount;
>>
>>   enum {
>>          /* cache misses due only to checksum comparison failures */
>> @@ -176,6 +177,10 @@ struct nfsd_net {
>>          unsigned int             longest_chain_cachesize;
>>
>>          struct shrinker         nfsd_reply_cache_shrinker;
>> +
>> +       spinlock_t              nfsd_ssc_lock;
>> +       struct nfsd4_ssc_umount *nfsd_ssc_umount;
>> +
>>          /* utsname taken from the process that starts the server */
>>          char                    nfsd_name[UNX_MAXNODENAME+1];
>>   };
>> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
>> index dd9f38d072dd..892ad72d87ae 100644
>> --- a/fs/nfsd/nfs4proc.c
>> +++ b/fs/nfsd/nfs4proc.c
>> @@ -55,6 +55,99 @@ module_param(inter_copy_offload_enable, bool, 0644);
>>   MODULE_PARM_DESC(inter_copy_offload_enable,
>>                   "Enable inter server to server copy offload. Default: false");
>>
>> +#ifdef CONFIG_NFSD_V4_2_INTER_SSC
>> +static int nfsd4_ssc_umount_timeout = 900000;          /* default to 15 mins */
>> +module_param(nfsd4_ssc_umount_timeout, int, 0644);
>> +MODULE_PARM_DESC(nfsd4_ssc_umount_timeout,
>> +               "idle msecs before unmount export from source server");
>> +
>> +void nfsd4_ssc_expire_umount(struct nfsd_net *nn)
>> +{
>> +       bool do_wakeup = false;
>> +       struct nfsd4_ssc_umount_item *ni = 0;
>> +       struct nfsd4_ssc_umount_item *tmp;
>> +       struct nfsd4_ssc_umount *nu;
>> +
>> +       spin_lock(&nn->nfsd_ssc_lock);
>> +       if (!nn->nfsd_ssc_umount) {
>> +               spin_unlock(&nn->nfsd_ssc_lock);
>> +               return;
>> +       }
>> +       nu = nn->nfsd_ssc_umount;
>> +       list_for_each_entry_safe(ni, tmp, &nu->nsu_list, nsui_list) {
>> +               if (time_after(jiffies, ni->nsui_expire)) {
>> +                       if (refcount_read(&ni->nsui_refcnt) > 0)
>> +                               continue;
>> +
>> +                       /* mark being unmount */
>> +                       ni->nsui_busy = true;
>> +                       spin_unlock(&nn->nfsd_ssc_lock);
>> +                       mntput(ni->nsui_vfsmount);
>> +                       spin_lock(&nn->nfsd_ssc_lock);
>> +
>> +                       /* waiters need to start from begin of list */
>> +                       list_del(&ni->nsui_list);
>> +                       kfree(ni);
>> +
>> +                       /* wakeup ssc_connect waiters */
>> +                       do_wakeup = true;
>> +                       continue;
>> +               }
>> +               break;
>> +       }
>> +       if (!list_empty(&nu->nsu_list)) {
>> +               ni = list_first_entry(&nu->nsu_list,
>> +                       struct nfsd4_ssc_umount_item, nsui_list);
>> +               nu->nsu_expire = ni->nsui_expire;
>> +       } else
>> +               nu->nsu_expire = 0;
>> +
>> +       if (do_wakeup)
>> +               wake_up_all(&nu->nsu_waitq);
>> +       spin_unlock(&nn->nfsd_ssc_lock);
>> +}
>> +
>> +/*
>> + * This is called when nfsd is being shutdown, after all inter_ssc
>> + * cleanup were done, to destroy the ssc delayed unmount list.
>> + */
>> +void nfsd4_ssc_shutdown_umount(struct nfsd_net *nn)
>> +{
>> +       struct nfsd4_ssc_umount_item *ni = 0;
>> +       struct nfsd4_ssc_umount_item *tmp;
>> +       struct nfsd4_ssc_umount *nu;
>> +
>> +       spin_lock(&nn->nfsd_ssc_lock);
>> +       if (!nn->nfsd_ssc_umount) {
>> +               spin_unlock(&nn->nfsd_ssc_lock);
>> +               return;
>> +       }
>> +       nu = nn->nfsd_ssc_umount;
>> +       nn->nfsd_ssc_umount = 0;
>> +       list_for_each_entry_safe(ni, tmp, &nu->nsu_list, nsui_list) {
>> +               list_del(&ni->nsui_list);
>> +               spin_unlock(&nn->nfsd_ssc_lock);
>> +               mntput(ni->nsui_vfsmount);
>> +               kfree(ni);
>> +               spin_lock(&nn->nfsd_ssc_lock);
>> +       }
>> +       spin_unlock(&nn->nfsd_ssc_lock);
>> +       kfree(nu);
>> +}
>> +
>> +void nfsd4_ssc_init_umount_work(struct nfsd_net *nn)
>> +{
>> +       nn->nfsd_ssc_umount = kzalloc(sizeof(struct nfsd4_ssc_umount),
>> +                                       GFP_KERNEL);
>> +       if (!nn->nfsd_ssc_umount)
>> +               return;
>> +       spin_lock_init(&nn->nfsd_ssc_lock);
>> +       INIT_LIST_HEAD(&nn->nfsd_ssc_umount->nsu_list);
>> +       init_waitqueue_head(&nn->nfsd_ssc_umount->nsu_waitq);
>> +}
>> +EXPORT_SYMBOL_GPL(nfsd4_ssc_init_umount_work);
>> +#endif
>> +
>>   #ifdef CONFIG_NFSD_V4_SECURITY_LABEL
>>   #include <linux/security.h>
>>
>> @@ -1181,6 +1274,12 @@ nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp,
>>          char *ipaddr, *dev_name, *raw_data;
>>          int len, raw_len;
>>          __be32 status = nfserr_inval;
>> +       struct nfsd4_ssc_umount_item *ni = 0;
>> +       struct nfsd4_ssc_umount_item *work = NULL;
>> +       struct nfsd4_ssc_umount_item *tmp;
>> +       struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
>> +       struct nfsd4_ssc_umount *nu;
>> +       DEFINE_WAIT(wait);
>>
>>          naddr = &nss->u.nl4_addr;
>>          tmp_addrlen = rpc_uaddr2sockaddr(SVC_NET(rqstp), naddr->addr,
>> @@ -1229,12 +1328,76 @@ nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp,
>>                  goto out_free_rawdata;
>>          snprintf(dev_name, len + 5, "%s%s%s:/", startsep, ipaddr, endsep);
>>
>> +       work = kzalloc(sizeof(*work), GFP_KERNEL);
>> +try_again:
>> +       spin_lock(&nn->nfsd_ssc_lock);
>> +       if (!nn->nfsd_ssc_umount) {
>> +               spin_unlock(&nn->nfsd_ssc_lock);
>> +               kfree(work);
>> +               work = NULL;
>> +               goto skip_dul;
>> +       }
>> +       nu = nn->nfsd_ssc_umount;
>> +       list_for_each_entry_safe(ni, tmp, &nu->nsu_list, nsui_list) {
>> +               if (strncmp(ni->nsui_ipaddr, ipaddr, sizeof(ni->nsui_ipaddr)))
>> +                       continue;
>> +               /* found a match */
>> +               if (ni->nsui_busy) {
>> +                       /*  wait - and try again */
>> +                       prepare_to_wait(&nu->nsu_waitq, &wait,
>> +                               TASK_INTERRUPTIBLE);
>> +                       spin_unlock(&nn->nfsd_ssc_lock);
>> +
>> +                       /* allow 20secs for mount/unmount for now - revisit */
>> +                       if (signal_pending(current) ||
>> +                                       (schedule_timeout(20*HZ) == 0)) {
>> +                               status = nfserr_eagain;
>> +                               kfree(work);
>> +                               goto out_free_devname;
>> +                       }
>> +                       finish_wait(&nu->nsu_waitq, &wait);
>> +                       goto try_again;
>> +               }
>> +               ss_mnt = ni->nsui_vfsmount;
>> +               if (refcount_read(&ni->nsui_refcnt) == 0)
>> +                       refcount_set(&ni->nsui_refcnt, 1);
>> +               else
>> +                       refcount_inc(&ni->nsui_refcnt);
>> +               spin_unlock(&nn->nfsd_ssc_lock);
>> +               kfree(work);
>> +               goto out_done;
>> +       }
>> +       /* create new entry, set busy, insert list, clear busy after mount */
>> +       if (work) {
>> +               strncpy(work->nsui_ipaddr, ipaddr, sizeof(work->nsui_ipaddr));
>> +               refcount_set(&work->nsui_refcnt, 1);
>> +               work->nsui_busy = true;
>> +               list_add_tail(&work->nsui_list, &nu->nsu_list);
>> +       }
>> +       spin_unlock(&nn->nfsd_ssc_lock);
>> +skip_dul:
>> +
>>          /* Use an 'internal' mount: SB_KERNMOUNT -> MNT_INTERNAL */
>>          ss_mnt = vfs_kern_mount(type, SB_KERNMOUNT, dev_name, raw_data);
>>          module_put(type->owner);
>> -       if (IS_ERR(ss_mnt))
>> +       if (IS_ERR(ss_mnt)) {
>> +               if (work) {
>> +                       spin_lock(&nn->nfsd_ssc_lock);
>> +                       list_del(&work->nsui_list);
>> +                       wake_up_all(&nu->nsu_waitq);
>> +                       spin_unlock(&nn->nfsd_ssc_lock);
>> +                       kfree(work);
>> +               }
>>                  goto out_free_devname;
>> -
>> +       }
>> +       if (work) {
>> +               spin_lock(&nn->nfsd_ssc_lock);
>> +               work->nsui_vfsmount = ss_mnt;
>> +               work->nsui_busy = false;
>> +               wake_up_all(&nu->nsu_waitq);
>> +               spin_unlock(&nn->nfsd_ssc_lock);
>> +       }
>> +out_done:
>>          status = 0;
>>          *mount = ss_mnt;
>>
>> @@ -1301,10 +1464,55 @@ static void
>>   nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct nfsd_file *src,
>>                          struct nfsd_file *dst)
>>   {
>> +       bool found = false;
>> +       bool resched = false;
>> +       long timeout;
>> +       struct nfsd4_ssc_umount_item *tmp;
>> +       struct nfsd4_ssc_umount_item *ni = 0;
>> +       struct nfsd4_ssc_umount *nu;
>> +       struct nfsd_net *nn = net_generic(dst->nf_net, nfsd_net_id);
>> +
>>          nfs42_ssc_close(src->nf_file);
>> -       fput(src->nf_file);
>>          nfsd_file_put(dst);
>> -       mntput(ss_mnt);
>> +       fput(src->nf_file);
>> +
>> +       if (!nn) {
>> +               mntput(ss_mnt);
>> +               return;
>> +       }
>> +       spin_lock(&nn->nfsd_ssc_lock);
>> +       if (!nn->nfsd_ssc_umount) {
>> +               /* delayed unmount list not setup */
>> +               spin_unlock(&nn->nfsd_ssc_lock);
>> +               mntput(ss_mnt);
>> +               return;
>> +       }
>> +       nu = nn->nfsd_ssc_umount;
>> +       timeout = msecs_to_jiffies(nfsd4_ssc_umount_timeout);
>> +       list_for_each_entry_safe(ni, tmp, &nu->nsu_list, nsui_list) {
>> +               if (ni->nsui_vfsmount->mnt_sb == ss_mnt->mnt_sb) {
>> +                       list_del(&ni->nsui_list);
>> +                       /*
>> +                        * vfsmount can be shared by multiple exports,
>> +                        * decrement refcnt and schedule delayed task
>> +                        * if it drops to 0.
>> +                        */
>> +                       if (refcount_dec_and_test(&ni->nsui_refcnt))
>> +                               resched = true;
>> +                       ni->nsui_expire = jiffies + timeout;
>> +                       list_add_tail(&ni->nsui_list, &nu->nsu_list);
>> +                       found = true;
>> +                       break;
>> +               }
>> +       }
>> +       if (!found) {
>> +               spin_unlock(&nn->nfsd_ssc_lock);
>> +               mntput(ss_mnt);
>> +               return;
>> +       }
>> +       if (resched && !nu->nsu_expire)
>> +               nu->nsu_expire = ni->nsui_expire;
>> +       spin_unlock(&nn->nfsd_ssc_lock);
>>   }
>>
>>   #else /* CONFIG_NFSD_V4_2_INTER_SSC */
>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>> index 97447a64bad0..0cdc898f06c9 100644
>> --- a/fs/nfsd/nfs4state.c
>> +++ b/fs/nfsd/nfs4state.c
>> @@ -5459,6 +5459,11 @@ nfs4_laundromat(struct nfsd_net *nn)
>>                  list_del_init(&nbl->nbl_lru);
>>                  free_blocked_lock(nbl);
>>          }
>> +
>> +#ifdef CONFIG_NFSD_V4_2_INTER_SSC
>> +       /* service the inter-copy delayed unmount list */
>> +       nfsd4_ssc_expire_umount(nn);
>> +#endif
>>   out:
>>          new_timeo = max_t(time64_t, new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT);
>>          return new_timeo;
>> @@ -7398,6 +7403,9 @@ nfs4_state_shutdown_net(struct net *net)
>>
>>          nfsd4_client_tracking_exit(net);
>>          nfs4_state_destroy_net(net);
>> +#ifdef CONFIG_NFSD_V4_2_INTER_SSC
>> +       nfsd4_ssc_shutdown_umount(nn);
>> +#endif
>>          mntput(nn->nfsd_mnt);
>>   }
>>
>> diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
>> index 8bdc37aa2c2e..cf86d9010974 100644
>> --- a/fs/nfsd/nfsd.h
>> +++ b/fs/nfsd/nfsd.h
>> @@ -483,6 +483,12 @@ static inline bool nfsd_attrs_supported(u32 minorversion, const u32 *bmval)
>>   extern int nfsd4_is_junction(struct dentry *dentry);
>>   extern int register_cld_notifier(void);
>>   extern void unregister_cld_notifier(void);
>> +#ifdef CONFIG_NFSD_V4_2_INTER_SSC
>> +extern void nfsd4_ssc_init_umount_work(struct nfsd_net *nn);
>> +extern void nfsd4_ssc_expire_umount(struct nfsd_net *nn);
>> +extern void nfsd4_ssc_shutdown_umount(struct nfsd_net *nn);
>> +#endif
>> +
>>   #else /* CONFIG_NFSD_V4 */
>>   static inline int nfsd4_is_junction(struct dentry *dentry)
>>   {
>> diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
>> index 6de406322106..ce89a8fe07ff 100644
>> --- a/fs/nfsd/nfssvc.c
>> +++ b/fs/nfsd/nfssvc.c
>> @@ -403,6 +403,9 @@ static int nfsd_startup_net(int nrservs, struct net *net, const struct cred *cre
>>          if (ret)
>>                  goto out_filecache;
>>
>> +#ifdef CONFIG_NFSD_V4_2_INTER_SSC
>> +       nfsd4_ssc_init_umount_work(nn);
>> +#endif
>>          nn->nfsd_net_up = true;
>>          return 0;
>>
>> diff --git a/include/linux/nfs_ssc.h b/include/linux/nfs_ssc.h
>> index f5ba0fbff72f..18afe62988b5 100644
>> --- a/include/linux/nfs_ssc.h
>> +++ b/include/linux/nfs_ssc.h
>> @@ -8,6 +8,7 @@
>>    */
>>
>>   #include <linux/nfs_fs.h>
>> +#include <linux/sunrpc/svc.h>
>>
>>   extern struct nfs_ssc_client_ops_tbl nfs_ssc_client_tbl;
>>
>> @@ -52,6 +53,21 @@ static inline void nfs42_ssc_close(struct file *filep)
>>          if (nfs_ssc_client_tbl.ssc_nfs4_ops)
>>                  (*nfs_ssc_client_tbl.ssc_nfs4_ops->sco_close)(filep);
>>   }
>> +
>> +struct nfsd4_ssc_umount_item {
>> +       struct list_head nsui_list;
>> +       bool nsui_busy;
>> +       refcount_t nsui_refcnt;
>> +       unsigned long nsui_expire;
>> +       struct vfsmount *nsui_vfsmount;
>> +       char nsui_ipaddr[RPC_MAX_ADDRBUFLEN];
>> +};
>> +
>> +struct nfsd4_ssc_umount {
>> +       struct list_head nsu_list;
>> +       unsigned long nsu_expire;
>> +       wait_queue_head_t nsu_waitq;
>> +};
>>   #endif
>>
>>   /*
>> --
>> 2.9.5
>>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v5 1/2] NFSD: delay unmount source's export after inter-server copy completed.
  2021-05-18 16:30     ` dai.ngo
@ 2021-05-18 16:40       ` Olga Kornievskaia
  0 siblings, 0 replies; 15+ messages in thread
From: Olga Kornievskaia @ 2021-05-18 16:40 UTC (permalink / raw)
  To: Dai Ngo; +Cc: J. Bruce Fields, linux-nfs, Trond Myklebust, Chuck Lever

On Tue, May 18, 2021 at 12:30 PM <dai.ngo@oracle.com> wrote:
>
>
> On 5/18/21 9:28 AM, Olga Kornievskaia wrote:
> > On Mon, May 17, 2021 at 6:43 PM Dai Ngo <dai.ngo@oracle.com> wrote:
> >> Currently the source's export is mounted and unmounted on every
> >> inter-server copy operation. This patch is an enhancement to delay
> >> the unmount of the source export for a certain period of time to
> >> eliminate the mount and unmount overhead on subsequent copy operations.
> >>
> >> After a copy operation completes, a delayed task is scheduled to
> >> unmount the export after a configurable idle time. Each time the
> >> export is being used again, its expire time is extended to allow
> >> the export to remain mounted.
> >>
> >> The unmount task and the mount operation of the copy request are
> >> synced to make sure the export is not unmounted while it's being
> >> used.
> > Can you tell me what this should apply on top of? It doesn't apply to
> > 5.13-rc2. I know Chuck posted a lot of nfsd patches which I don't
> > have, is your patch on top of that?
>
> I built it on top of 5.12-rc8. I'm not sure how.

This chunk fails:
@@ -7398,6 +7403,9 @@ nfs4_state_shutdown_net(struct net *net)

        nfsd4_client_tracking_exit(net);
        nfs4_state_destroy_net(net);
+#ifdef CONFIG_NFSD_V4_2_INTER_SSC
+       nfsd4_ssc_shutdown_umount(nn);
+#endif
        mntput(nn->nfsd_mnt);
 }

Looks like this patch "nfsd: Ensure knfsd shuts down when the "nfsd"
pseudofs is unmounted" removes it. Can you rebase on the latest?



>
> -Dai
>
> >
> >> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
> >> ---
> >>   fs/nfsd/netns.h         |   5 ++
> >>   fs/nfsd/nfs4proc.c      | 216 +++++++++++++++++++++++++++++++++++++++++++++++-
> >>   fs/nfsd/nfs4state.c     |   8 ++
> >>   fs/nfsd/nfsd.h          |   6 ++
> >>   fs/nfsd/nfssvc.c        |   3 +
> >>   include/linux/nfs_ssc.h |  16 ++++
> >>   6 files changed, 250 insertions(+), 4 deletions(-)
> >>
> >> diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
> >> index c330f5bd0cf3..6018e5050cb4 100644
> >> --- a/fs/nfsd/netns.h
> >> +++ b/fs/nfsd/netns.h
> >> @@ -21,6 +21,7 @@
> >>
> >>   struct cld_net;
> >>   struct nfsd4_client_tracking_ops;
> >> +struct nfsd4_ssc_umount;
> >>
> >>   enum {
> >>          /* cache misses due only to checksum comparison failures */
> >> @@ -176,6 +177,10 @@ struct nfsd_net {
> >>          unsigned int             longest_chain_cachesize;
> >>
> >>          struct shrinker         nfsd_reply_cache_shrinker;
> >> +
> >> +       spinlock_t              nfsd_ssc_lock;
> >> +       struct nfsd4_ssc_umount *nfsd_ssc_umount;
> >> +
> >>          /* utsname taken from the process that starts the server */
> >>          char                    nfsd_name[UNX_MAXNODENAME+1];
> >>   };
> >> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> >> index dd9f38d072dd..892ad72d87ae 100644
> >> --- a/fs/nfsd/nfs4proc.c
> >> +++ b/fs/nfsd/nfs4proc.c
> >> @@ -55,6 +55,99 @@ module_param(inter_copy_offload_enable, bool, 0644);
> >>   MODULE_PARM_DESC(inter_copy_offload_enable,
> >>                   "Enable inter server to server copy offload. Default: false");
> >>
> >> +#ifdef CONFIG_NFSD_V4_2_INTER_SSC
> >> +static int nfsd4_ssc_umount_timeout = 900000;          /* default to 15 mins */
> >> +module_param(nfsd4_ssc_umount_timeout, int, 0644);
> >> +MODULE_PARM_DESC(nfsd4_ssc_umount_timeout,
> >> +               "idle msecs before unmount export from source server");
> >> +
> >> +void nfsd4_ssc_expire_umount(struct nfsd_net *nn)
> >> +{
> >> +       bool do_wakeup = false;
> >> +       struct nfsd4_ssc_umount_item *ni = 0;
> >> +       struct nfsd4_ssc_umount_item *tmp;
> >> +       struct nfsd4_ssc_umount *nu;
> >> +
> >> +       spin_lock(&nn->nfsd_ssc_lock);
> >> +       if (!nn->nfsd_ssc_umount) {
> >> +               spin_unlock(&nn->nfsd_ssc_lock);
> >> +               return;
> >> +       }
> >> +       nu = nn->nfsd_ssc_umount;
> >> +       list_for_each_entry_safe(ni, tmp, &nu->nsu_list, nsui_list) {
> >> +               if (time_after(jiffies, ni->nsui_expire)) {
> >> +                       if (refcount_read(&ni->nsui_refcnt) > 0)
> >> +                               continue;
> >> +
> >> +                       /* mark being unmount */
> >> +                       ni->nsui_busy = true;
> >> +                       spin_unlock(&nn->nfsd_ssc_lock);
> >> +                       mntput(ni->nsui_vfsmount);
> >> +                       spin_lock(&nn->nfsd_ssc_lock);
> >> +
> >> +                       /* waiters need to start from begin of list */
> >> +                       list_del(&ni->nsui_list);
> >> +                       kfree(ni);
> >> +
> >> +                       /* wakeup ssc_connect waiters */
> >> +                       do_wakeup = true;
> >> +                       continue;
> >> +               }
> >> +               break;
> >> +       }
> >> +       if (!list_empty(&nu->nsu_list)) {
> >> +               ni = list_first_entry(&nu->nsu_list,
> >> +                       struct nfsd4_ssc_umount_item, nsui_list);
> >> +               nu->nsu_expire = ni->nsui_expire;
> >> +       } else
> >> +               nu->nsu_expire = 0;
> >> +
> >> +       if (do_wakeup)
> >> +               wake_up_all(&nu->nsu_waitq);
> >> +       spin_unlock(&nn->nfsd_ssc_lock);
> >> +}
> >> +
> >> +/*
> >> + * This is called when nfsd is being shutdown, after all inter_ssc
> >> + * cleanup were done, to destroy the ssc delayed unmount list.
> >> + */
> >> +void nfsd4_ssc_shutdown_umount(struct nfsd_net *nn)
> >> +{
> >> +       struct nfsd4_ssc_umount_item *ni = 0;
> >> +       struct nfsd4_ssc_umount_item *tmp;
> >> +       struct nfsd4_ssc_umount *nu;
> >> +
> >> +       spin_lock(&nn->nfsd_ssc_lock);
> >> +       if (!nn->nfsd_ssc_umount) {
> >> +               spin_unlock(&nn->nfsd_ssc_lock);
> >> +               return;
> >> +       }
> >> +       nu = nn->nfsd_ssc_umount;
> >> +       nn->nfsd_ssc_umount = 0;
> >> +       list_for_each_entry_safe(ni, tmp, &nu->nsu_list, nsui_list) {
> >> +               list_del(&ni->nsui_list);
> >> +               spin_unlock(&nn->nfsd_ssc_lock);
> >> +               mntput(ni->nsui_vfsmount);
> >> +               kfree(ni);
> >> +               spin_lock(&nn->nfsd_ssc_lock);
> >> +       }
> >> +       spin_unlock(&nn->nfsd_ssc_lock);
> >> +       kfree(nu);
> >> +}
> >> +
> >> +void nfsd4_ssc_init_umount_work(struct nfsd_net *nn)
> >> +{
> >> +       nn->nfsd_ssc_umount = kzalloc(sizeof(struct nfsd4_ssc_umount),
> >> +                                       GFP_KERNEL);
> >> +       if (!nn->nfsd_ssc_umount)
> >> +               return;
> >> +       spin_lock_init(&nn->nfsd_ssc_lock);
> >> +       INIT_LIST_HEAD(&nn->nfsd_ssc_umount->nsu_list);
> >> +       init_waitqueue_head(&nn->nfsd_ssc_umount->nsu_waitq);
> >> +}
> >> +EXPORT_SYMBOL_GPL(nfsd4_ssc_init_umount_work);
> >> +#endif
> >> +
> >>   #ifdef CONFIG_NFSD_V4_SECURITY_LABEL
> >>   #include <linux/security.h>
> >>
> >> @@ -1181,6 +1274,12 @@ nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp,
> >>          char *ipaddr, *dev_name, *raw_data;
> >>          int len, raw_len;
> >>          __be32 status = nfserr_inval;
> >> +       struct nfsd4_ssc_umount_item *ni = 0;
> >> +       struct nfsd4_ssc_umount_item *work = NULL;
> >> +       struct nfsd4_ssc_umount_item *tmp;
> >> +       struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
> >> +       struct nfsd4_ssc_umount *nu;
> >> +       DEFINE_WAIT(wait);
> >>
> >>          naddr = &nss->u.nl4_addr;
> >>          tmp_addrlen = rpc_uaddr2sockaddr(SVC_NET(rqstp), naddr->addr,
> >> @@ -1229,12 +1328,76 @@ nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp,
> >>                  goto out_free_rawdata;
> >>          snprintf(dev_name, len + 5, "%s%s%s:/", startsep, ipaddr, endsep);
> >>
> >> +       work = kzalloc(sizeof(*work), GFP_KERNEL);
> >> +try_again:
> >> +       spin_lock(&nn->nfsd_ssc_lock);
> >> +       if (!nn->nfsd_ssc_umount) {
> >> +               spin_unlock(&nn->nfsd_ssc_lock);
> >> +               kfree(work);
> >> +               work = NULL;
> >> +               goto skip_dul;
> >> +       }
> >> +       nu = nn->nfsd_ssc_umount;
> >> +       list_for_each_entry_safe(ni, tmp, &nu->nsu_list, nsui_list) {
> >> +               if (strncmp(ni->nsui_ipaddr, ipaddr, sizeof(ni->nsui_ipaddr)))
> >> +                       continue;
> >> +               /* found a match */
> >> +               if (ni->nsui_busy) {
> >> +                       /*  wait - and try again */
> >> +                       prepare_to_wait(&nu->nsu_waitq, &wait,
> >> +                               TASK_INTERRUPTIBLE);
> >> +                       spin_unlock(&nn->nfsd_ssc_lock);
> >> +
> >> +                       /* allow 20secs for mount/unmount for now - revisit */
> >> +                       if (signal_pending(current) ||
> >> +                                       (schedule_timeout(20*HZ) == 0)) {
> >> +                               status = nfserr_eagain;
> >> +                               kfree(work);
> >> +                               goto out_free_devname;
> >> +                       }
> >> +                       finish_wait(&nu->nsu_waitq, &wait);
> >> +                       goto try_again;
> >> +               }
> >> +               ss_mnt = ni->nsui_vfsmount;
> >> +               if (refcount_read(&ni->nsui_refcnt) == 0)
> >> +                       refcount_set(&ni->nsui_refcnt, 1);
> >> +               else
> >> +                       refcount_inc(&ni->nsui_refcnt);
> >> +               spin_unlock(&nn->nfsd_ssc_lock);
> >> +               kfree(work);
> >> +               goto out_done;
> >> +       }
> >> +       /* create new entry, set busy, insert list, clear busy after mount */
> >> +       if (work) {
> >> +               strncpy(work->nsui_ipaddr, ipaddr, sizeof(work->nsui_ipaddr));
> >> +               refcount_set(&work->nsui_refcnt, 1);
> >> +               work->nsui_busy = true;
> >> +               list_add_tail(&work->nsui_list, &nu->nsu_list);
> >> +       }
> >> +       spin_unlock(&nn->nfsd_ssc_lock);
> >> +skip_dul:
> >> +
> >>          /* Use an 'internal' mount: SB_KERNMOUNT -> MNT_INTERNAL */
> >>          ss_mnt = vfs_kern_mount(type, SB_KERNMOUNT, dev_name, raw_data);
> >>          module_put(type->owner);
> >> -       if (IS_ERR(ss_mnt))
> >> +       if (IS_ERR(ss_mnt)) {
> >> +               if (work) {
> >> +                       spin_lock(&nn->nfsd_ssc_lock);
> >> +                       list_del(&work->nsui_list);
> >> +                       wake_up_all(&nu->nsu_waitq);
> >> +                       spin_unlock(&nn->nfsd_ssc_lock);
> >> +                       kfree(work);
> >> +               }
> >>                  goto out_free_devname;
> >> -
> >> +       }
> >> +       if (work) {
> >> +               spin_lock(&nn->nfsd_ssc_lock);
> >> +               work->nsui_vfsmount = ss_mnt;
> >> +               work->nsui_busy = false;
> >> +               wake_up_all(&nu->nsu_waitq);
> >> +               spin_unlock(&nn->nfsd_ssc_lock);
> >> +       }
> >> +out_done:
> >>          status = 0;
> >>          *mount = ss_mnt;
> >>
> >> @@ -1301,10 +1464,55 @@ static void
> >>   nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct nfsd_file *src,
> >>                          struct nfsd_file *dst)
> >>   {
> >> +       bool found = false;
> >> +       bool resched = false;
> >> +       long timeout;
> >> +       struct nfsd4_ssc_umount_item *tmp;
> >> +       struct nfsd4_ssc_umount_item *ni = 0;
> >> +       struct nfsd4_ssc_umount *nu;
> >> +       struct nfsd_net *nn = net_generic(dst->nf_net, nfsd_net_id);
> >> +
> >>          nfs42_ssc_close(src->nf_file);
> >> -       fput(src->nf_file);
> >>          nfsd_file_put(dst);
> >> -       mntput(ss_mnt);
> >> +       fput(src->nf_file);
> >> +
> >> +       if (!nn) {
> >> +               mntput(ss_mnt);
> >> +               return;
> >> +       }
> >> +       spin_lock(&nn->nfsd_ssc_lock);
> >> +       if (!nn->nfsd_ssc_umount) {
> >> +               /* delayed unmount list not setup */
> >> +               spin_unlock(&nn->nfsd_ssc_lock);
> >> +               mntput(ss_mnt);
> >> +               return;
> >> +       }
> >> +       nu = nn->nfsd_ssc_umount;
> >> +       timeout = msecs_to_jiffies(nfsd4_ssc_umount_timeout);
> >> +       list_for_each_entry_safe(ni, tmp, &nu->nsu_list, nsui_list) {
> >> +               if (ni->nsui_vfsmount->mnt_sb == ss_mnt->mnt_sb) {
> >> +                       list_del(&ni->nsui_list);
> >> +                       /*
> >> +                        * vfsmount can be shared by multiple exports,
> >> +                        * decrement refcnt and schedule delayed task
> >> +                        * if it drops to 0.
> >> +                        */
> >> +                       if (refcount_dec_and_test(&ni->nsui_refcnt))
> >> +                               resched = true;
> >> +                       ni->nsui_expire = jiffies + timeout;
> >> +                       list_add_tail(&ni->nsui_list, &nu->nsu_list);
> >> +                       found = true;
> >> +                       break;
> >> +               }
> >> +       }
> >> +       if (!found) {
> >> +               spin_unlock(&nn->nfsd_ssc_lock);
> >> +               mntput(ss_mnt);
> >> +               return;
> >> +       }
> >> +       if (resched && !nu->nsu_expire)
> >> +               nu->nsu_expire = ni->nsui_expire;
> >> +       spin_unlock(&nn->nfsd_ssc_lock);
> >>   }
> >>
> >>   #else /* CONFIG_NFSD_V4_2_INTER_SSC */
> >> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> >> index 97447a64bad0..0cdc898f06c9 100644
> >> --- a/fs/nfsd/nfs4state.c
> >> +++ b/fs/nfsd/nfs4state.c
> >> @@ -5459,6 +5459,11 @@ nfs4_laundromat(struct nfsd_net *nn)
> >>                  list_del_init(&nbl->nbl_lru);
> >>                  free_blocked_lock(nbl);
> >>          }
> >> +
> >> +#ifdef CONFIG_NFSD_V4_2_INTER_SSC
> >> +       /* service the inter-copy delayed unmount list */
> >> +       nfsd4_ssc_expire_umount(nn);
> >> +#endif
> >>   out:
> >>          new_timeo = max_t(time64_t, new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT);
> >>          return new_timeo;
> >> @@ -7398,6 +7403,9 @@ nfs4_state_shutdown_net(struct net *net)
> >>
> >>          nfsd4_client_tracking_exit(net);
> >>          nfs4_state_destroy_net(net);
> >> +#ifdef CONFIG_NFSD_V4_2_INTER_SSC
> >> +       nfsd4_ssc_shutdown_umount(nn);
> >> +#endif
> >>          mntput(nn->nfsd_mnt);
> >>   }
> >>
> >> diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
> >> index 8bdc37aa2c2e..cf86d9010974 100644
> >> --- a/fs/nfsd/nfsd.h
> >> +++ b/fs/nfsd/nfsd.h
> >> @@ -483,6 +483,12 @@ static inline bool nfsd_attrs_supported(u32 minorversion, const u32 *bmval)
> >>   extern int nfsd4_is_junction(struct dentry *dentry);
> >>   extern int register_cld_notifier(void);
> >>   extern void unregister_cld_notifier(void);
> >> +#ifdef CONFIG_NFSD_V4_2_INTER_SSC
> >> +extern void nfsd4_ssc_init_umount_work(struct nfsd_net *nn);
> >> +extern void nfsd4_ssc_expire_umount(struct nfsd_net *nn);
> >> +extern void nfsd4_ssc_shutdown_umount(struct nfsd_net *nn);
> >> +#endif
> >> +
> >>   #else /* CONFIG_NFSD_V4 */
> >>   static inline int nfsd4_is_junction(struct dentry *dentry)
> >>   {
> >> diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
> >> index 6de406322106..ce89a8fe07ff 100644
> >> --- a/fs/nfsd/nfssvc.c
> >> +++ b/fs/nfsd/nfssvc.c
> >> @@ -403,6 +403,9 @@ static int nfsd_startup_net(int nrservs, struct net *net, const struct cred *cre
> >>          if (ret)
> >>                  goto out_filecache;
> >>
> >> +#ifdef CONFIG_NFSD_V4_2_INTER_SSC
> >> +       nfsd4_ssc_init_umount_work(nn);
> >> +#endif
> >>          nn->nfsd_net_up = true;
> >>          return 0;
> >>
> >> diff --git a/include/linux/nfs_ssc.h b/include/linux/nfs_ssc.h
> >> index f5ba0fbff72f..18afe62988b5 100644
> >> --- a/include/linux/nfs_ssc.h
> >> +++ b/include/linux/nfs_ssc.h
> >> @@ -8,6 +8,7 @@
> >>    */
> >>
> >>   #include <linux/nfs_fs.h>
> >> +#include <linux/sunrpc/svc.h>
> >>
> >>   extern struct nfs_ssc_client_ops_tbl nfs_ssc_client_tbl;
> >>
> >> @@ -52,6 +53,21 @@ static inline void nfs42_ssc_close(struct file *filep)
> >>          if (nfs_ssc_client_tbl.ssc_nfs4_ops)
> >>                  (*nfs_ssc_client_tbl.ssc_nfs4_ops->sco_close)(filep);
> >>   }
> >> +
> >> +struct nfsd4_ssc_umount_item {
> >> +       struct list_head nsui_list;
> >> +       bool nsui_busy;
> >> +       refcount_t nsui_refcnt;
> >> +       unsigned long nsui_expire;
> >> +       struct vfsmount *nsui_vfsmount;
> >> +       char nsui_ipaddr[RPC_MAX_ADDRBUFLEN];
> >> +};
> >> +
> >> +struct nfsd4_ssc_umount {
> >> +       struct list_head nsu_list;
> >> +       unsigned long nsu_expire;
> >> +       wait_queue_head_t nsu_waitq;
> >> +};
> >>   #endif
> >>
> >>   /*
> >> --
> >> 2.9.5
> >>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v5 1/2] NFSD: delay unmount source's export after inter-server copy completed.
  2021-05-17 22:43 ` [PATCH v5 1/2] " Dai Ngo
  2021-05-18 16:28   ` Olga Kornievskaia
@ 2021-05-18 17:04   ` J. Bruce Fields
  2021-05-18 17:21     ` J. Bruce Fields
  2021-05-18 17:49     ` dai.ngo
  2021-05-18 17:16   ` Olga Kornievskaia
  2 siblings, 2 replies; 15+ messages in thread
From: J. Bruce Fields @ 2021-05-18 17:04 UTC (permalink / raw)
  To: Dai Ngo; +Cc: olga.kornievskaia, linux-nfs, trondmy, chuck.lever

On Mon, May 17, 2021 at 06:43:29PM -0400, Dai Ngo wrote:
> +struct nfsd4_ssc_umount;
>  
>  enum {
>  	/* cache misses due only to checksum comparison failures */
> @@ -176,6 +177,10 @@ struct nfsd_net {
>  	unsigned int             longest_chain_cachesize;
>  
>  	struct shrinker		nfsd_reply_cache_shrinker;
> +
> +	spinlock_t              nfsd_ssc_lock;
> +	struct nfsd4_ssc_umount	*nfsd_ssc_umount;
...

> +void nfsd4_ssc_init_umount_work(struct nfsd_net *nn)
> +{
> +	nn->nfsd_ssc_umount = kzalloc(sizeof(struct nfsd4_ssc_umount),
> +					GFP_KERNEL);
> +	if (!nn->nfsd_ssc_umount)
> +		return;

Is there any reason this needs to be allocated dynamically?  Let's just
embed it in nfsd_net.

Actually, I'm not convinced the separate structure definition's really
that helpful:

> +struct nfsd4_ssc_umount {
> +	struct list_head nsu_list;
> +	unsigned long nsu_expire;
> +	wait_queue_head_t nsu_waitq;
> +};

How about just:

	struct nfsd_net {
	...
	/* tracking server-to-server copy mounts: */
	spinlock_t		nfsd_ssc_lock;
	struct list_head	nfsd_ssc_mount_list;
	unsigned long		nfsd_ssc_mount_expire;
	wait_queeu_head_t	nfsd_ssc_mount_waitq;

or something along those lines?

--b.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v5 1/2] NFSD: delay unmount source's export after inter-server copy completed.
  2021-05-17 22:43 ` [PATCH v5 1/2] " Dai Ngo
  2021-05-18 16:28   ` Olga Kornievskaia
  2021-05-18 17:04   ` J. Bruce Fields
@ 2021-05-18 17:16   ` Olga Kornievskaia
  2021-05-18 17:23     ` J. Bruce Fields
  2021-05-18 17:48     ` dai.ngo
  2 siblings, 2 replies; 15+ messages in thread
From: Olga Kornievskaia @ 2021-05-18 17:16 UTC (permalink / raw)
  To: Dai Ngo; +Cc: J. Bruce Fields, linux-nfs, Trond Myklebust, Chuck Lever

Hi Dai,

General review comments inline

On Mon, May 17, 2021 at 6:43 PM Dai Ngo <dai.ngo@oracle.com> wrote:
>
> Currently the source's export is mounted and unmounted on every
> inter-server copy operation. This patch is an enhancement to delay
> the unmount of the source export for a certain period of time to
> eliminate the mount and unmount overhead on subsequent copy operations.
>
> After a copy operation completes, a delayed task is scheduled to
> unmount the export after a configurable idle time. Each time the
> export is being used again, its expire time is extended to allow
> the export to remain mounted.
>
> The unmount task and the mount operation of the copy request are
> synced to make sure the export is not unmounted while it's being
> used.
>
> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
> ---
>  fs/nfsd/netns.h         |   5 ++
>  fs/nfsd/nfs4proc.c      | 216 +++++++++++++++++++++++++++++++++++++++++++++++-
>  fs/nfsd/nfs4state.c     |   8 ++
>  fs/nfsd/nfsd.h          |   6 ++
>  fs/nfsd/nfssvc.c        |   3 +
>  include/linux/nfs_ssc.h |  16 ++++
>  6 files changed, 250 insertions(+), 4 deletions(-)
>
> diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
> index c330f5bd0cf3..6018e5050cb4 100644
> --- a/fs/nfsd/netns.h
> +++ b/fs/nfsd/netns.h
> @@ -21,6 +21,7 @@
>
>  struct cld_net;
>  struct nfsd4_client_tracking_ops;
> +struct nfsd4_ssc_umount;
>
>  enum {
>         /* cache misses due only to checksum comparison failures */
> @@ -176,6 +177,10 @@ struct nfsd_net {
>         unsigned int             longest_chain_cachesize;
>
>         struct shrinker         nfsd_reply_cache_shrinker;
> +
> +       spinlock_t              nfsd_ssc_lock;
> +       struct nfsd4_ssc_umount *nfsd_ssc_umount;
> +
>         /* utsname taken from the process that starts the server */
>         char                    nfsd_name[UNX_MAXNODENAME+1];
>  };
> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> index dd9f38d072dd..892ad72d87ae 100644
> --- a/fs/nfsd/nfs4proc.c
> +++ b/fs/nfsd/nfs4proc.c
> @@ -55,6 +55,99 @@ module_param(inter_copy_offload_enable, bool, 0644);
>  MODULE_PARM_DESC(inter_copy_offload_enable,
>                  "Enable inter server to server copy offload. Default: false");
>
> +#ifdef CONFIG_NFSD_V4_2_INTER_SSC
> +static int nfsd4_ssc_umount_timeout = 900000;          /* default to 15 mins */
> +module_param(nfsd4_ssc_umount_timeout, int, 0644);
> +MODULE_PARM_DESC(nfsd4_ssc_umount_timeout,
> +               "idle msecs before unmount export from source server");
> +
> +void nfsd4_ssc_expire_umount(struct nfsd_net *nn)

Why should this be in nfs4proc.c when it's used in nfs4state.c only?

> +{
> +       bool do_wakeup = false;
> +       struct nfsd4_ssc_umount_item *ni = 0;
> +       struct nfsd4_ssc_umount_item *tmp;
> +       struct nfsd4_ssc_umount *nu;
> +
> +       spin_lock(&nn->nfsd_ssc_lock);
> +       if (!nn->nfsd_ssc_umount) {
> +               spin_unlock(&nn->nfsd_ssc_lock);
> +               return;
> +       }
> +       nu = nn->nfsd_ssc_umount;
> +       list_for_each_entry_safe(ni, tmp, &nu->nsu_list, nsui_list) {
> +               if (time_after(jiffies, ni->nsui_expire)) {
> +                       if (refcount_read(&ni->nsui_refcnt) > 0)
> +                               continue;
> +
> +                       /* mark being unmount */
> +                       ni->nsui_busy = true;
> +                       spin_unlock(&nn->nfsd_ssc_lock);
> +                       mntput(ni->nsui_vfsmount);
> +                       spin_lock(&nn->nfsd_ssc_lock);
> +
> +                       /* waiters need to start from begin of list */
> +                       list_del(&ni->nsui_list);
> +                       kfree(ni);
> +
> +                       /* wakeup ssc_connect waiters */
> +                       do_wakeup = true;
> +                       continue;
> +               }
> +               break;

mnt1, mnt2, mnt3  added all at the same time. mnt1, mnt3 only used
once during the expiration time. mnt2 is being used continuously.
expiration time comes. delayed mnt1 is processed. mnt2 is next on the
list and it's not expired so code quits. mnt3 will not be umounted
until mnt2 expires. I think the break is wrong.

> +       }
> +       if (!list_empty(&nu->nsu_list)) {
> +               ni = list_first_entry(&nu->nsu_list,
> +                       struct nfsd4_ssc_umount_item, nsui_list);
> +               nu->nsu_expire = ni->nsui_expire;
> +       } else
> +               nu->nsu_expire = 0;
> +
> +       if (do_wakeup)
> +               wake_up_all(&nu->nsu_waitq);
> +       spin_unlock(&nn->nfsd_ssc_lock);
> +}
> +
> +/*
> + * This is called when nfsd is being shutdown, after all inter_ssc
> + * cleanup were done, to destroy the ssc delayed unmount list.
> + */
> +void nfsd4_ssc_shutdown_umount(struct nfsd_net *nn)
> +{
> +       struct nfsd4_ssc_umount_item *ni = 0;
> +       struct nfsd4_ssc_umount_item *tmp;
> +       struct nfsd4_ssc_umount *nu;
> +
> +       spin_lock(&nn->nfsd_ssc_lock);
> +       if (!nn->nfsd_ssc_umount) {
> +               spin_unlock(&nn->nfsd_ssc_lock);
> +               return;
> +       }
> +       nu = nn->nfsd_ssc_umount;
> +       nn->nfsd_ssc_umount = 0;
> +       list_for_each_entry_safe(ni, tmp, &nu->nsu_list, nsui_list) {
> +               list_del(&ni->nsui_list);
> +               spin_unlock(&nn->nfsd_ssc_lock);
> +               mntput(ni->nsui_vfsmount);
> +               kfree(ni);
> +               spin_lock(&nn->nfsd_ssc_lock);
> +       }
> +       spin_unlock(&nn->nfsd_ssc_lock);
> +       kfree(nu);

General question to maybe Bruce: can nfsd_net go away while a compound
is using it ? I hope not.  I can't quite think thru the scenario where
we started the async copy task but then somehow nfsd_net is going away
before async_copy is done and if there is anything we need to take
care of. I think the net shutdown would not find anything to free so
it would just return. And the copy's cleanup would not find a nfsd_net
and so it would right away unmount. This is more of thinking out loud
but I'd like somebody else to confirm I'm correct that we are OK here.

> +}
> +
> +void nfsd4_ssc_init_umount_work(struct nfsd_net *nn)
> +{
> +       nn->nfsd_ssc_umount = kzalloc(sizeof(struct nfsd4_ssc_umount),
> +                                       GFP_KERNEL);
> +       if (!nn->nfsd_ssc_umount)
> +               return;
> +       spin_lock_init(&nn->nfsd_ssc_lock);
> +       INIT_LIST_HEAD(&nn->nfsd_ssc_umount->nsu_list);
> +       init_waitqueue_head(&nn->nfsd_ssc_umount->nsu_waitq);
> +}
> +EXPORT_SYMBOL_GPL(nfsd4_ssc_init_umount_work);
> +#endif
> +
>  #ifdef CONFIG_NFSD_V4_SECURITY_LABEL
>  #include <linux/security.h>
>
> @@ -1181,6 +1274,12 @@ nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp,
>         char *ipaddr, *dev_name, *raw_data;
>         int len, raw_len;
>         __be32 status = nfserr_inval;
> +       struct nfsd4_ssc_umount_item *ni = 0;
> +       struct nfsd4_ssc_umount_item *work = NULL;
> +       struct nfsd4_ssc_umount_item *tmp;
> +       struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
> +       struct nfsd4_ssc_umount *nu;
> +       DEFINE_WAIT(wait);
>
>         naddr = &nss->u.nl4_addr;
>         tmp_addrlen = rpc_uaddr2sockaddr(SVC_NET(rqstp), naddr->addr,
> @@ -1229,12 +1328,76 @@ nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp,
>                 goto out_free_rawdata;
>         snprintf(dev_name, len + 5, "%s%s%s:/", startsep, ipaddr, endsep);
>
> +       work = kzalloc(sizeof(*work), GFP_KERNEL);
> +try_again:
> +       spin_lock(&nn->nfsd_ssc_lock);
> +       if (!nn->nfsd_ssc_umount) {
> +               spin_unlock(&nn->nfsd_ssc_lock);
> +               kfree(work);
> +               work = NULL;
> +               goto skip_dul;
> +       }
> +       nu = nn->nfsd_ssc_umount;
> +       list_for_each_entry_safe(ni, tmp, &nu->nsu_list, nsui_list) {
> +               if (strncmp(ni->nsui_ipaddr, ipaddr, sizeof(ni->nsui_ipaddr)))
> +                       continue;
> +               /* found a match */
> +               if (ni->nsui_busy) {
> +                       /*  wait - and try again */
> +                       prepare_to_wait(&nu->nsu_waitq, &wait,
> +                               TASK_INTERRUPTIBLE);
> +                       spin_unlock(&nn->nfsd_ssc_lock);
> +
> +                       /* allow 20secs for mount/unmount for now - revisit */
> +                       if (signal_pending(current) ||
> +                                       (schedule_timeout(20*HZ) == 0)) {
> +                               status = nfserr_eagain;
> +                               kfree(work);
> +                               goto out_free_devname;
> +                       }
> +                       finish_wait(&nu->nsu_waitq, &wait);
> +                       goto try_again;
> +               }
> +               ss_mnt = ni->nsui_vfsmount;
> +               if (refcount_read(&ni->nsui_refcnt) == 0)
> +                       refcount_set(&ni->nsui_refcnt, 1);
> +               else
> +                       refcount_inc(&ni->nsui_refcnt);
> +               spin_unlock(&nn->nfsd_ssc_lock);
> +               kfree(work);
> +               goto out_done;
> +       }
> +       /* create new entry, set busy, insert list, clear busy after mount */
> +       if (work) {
> +               strncpy(work->nsui_ipaddr, ipaddr, sizeof(work->nsui_ipaddr));
> +               refcount_set(&work->nsui_refcnt, 1);
> +               work->nsui_busy = true;
> +               list_add_tail(&work->nsui_list, &nu->nsu_list);
> +       }
> +       spin_unlock(&nn->nfsd_ssc_lock);
> +skip_dul:
> +
>         /* Use an 'internal' mount: SB_KERNMOUNT -> MNT_INTERNAL */
>         ss_mnt = vfs_kern_mount(type, SB_KERNMOUNT, dev_name, raw_data);
>         module_put(type->owner);
> -       if (IS_ERR(ss_mnt))
> +       if (IS_ERR(ss_mnt)) {
> +               if (work) {
> +                       spin_lock(&nn->nfsd_ssc_lock);
> +                       list_del(&work->nsui_list);
> +                       wake_up_all(&nu->nsu_waitq);
> +                       spin_unlock(&nn->nfsd_ssc_lock);
> +                       kfree(work);
> +               }
>                 goto out_free_devname;
> -
> +       }
> +       if (work) {
> +               spin_lock(&nn->nfsd_ssc_lock);
> +               work->nsui_vfsmount = ss_mnt;
> +               work->nsui_busy = false;
> +               wake_up_all(&nu->nsu_waitq);
> +               spin_unlock(&nn->nfsd_ssc_lock);
> +       }
> +out_done:
>         status = 0;
>         *mount = ss_mnt;
>
> @@ -1301,10 +1464,55 @@ static void
>  nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct nfsd_file *src,
>                         struct nfsd_file *dst)
>  {
> +       bool found = false;
> +       bool resched = false;
> +       long timeout;
> +       struct nfsd4_ssc_umount_item *tmp;
> +       struct nfsd4_ssc_umount_item *ni = 0;
> +       struct nfsd4_ssc_umount *nu;
> +       struct nfsd_net *nn = net_generic(dst->nf_net, nfsd_net_id);
> +
>         nfs42_ssc_close(src->nf_file);
> -       fput(src->nf_file);
>         nfsd_file_put(dst);
> -       mntput(ss_mnt);
> +       fput(src->nf_file);
> +
> +       if (!nn) {
> +               mntput(ss_mnt);
> +               return;
> +       }
> +       spin_lock(&nn->nfsd_ssc_lock);
> +       if (!nn->nfsd_ssc_umount) {
> +               /* delayed unmount list not setup */
> +               spin_unlock(&nn->nfsd_ssc_lock);
> +               mntput(ss_mnt);
> +               return;
> +       }
> +       nu = nn->nfsd_ssc_umount;
> +       timeout = msecs_to_jiffies(nfsd4_ssc_umount_timeout);
> +       list_for_each_entry_safe(ni, tmp, &nu->nsu_list, nsui_list) {
> +               if (ni->nsui_vfsmount->mnt_sb == ss_mnt->mnt_sb) {
> +                       list_del(&ni->nsui_list);
> +                       /*
> +                        * vfsmount can be shared by multiple exports,
> +                        * decrement refcnt and schedule delayed task
> +                        * if it drops to 0.
> +                        */
> +                       if (refcount_dec_and_test(&ni->nsui_refcnt))
> +                               resched = true;
> +                       ni->nsui_expire = jiffies + timeout;
> +                       list_add_tail(&ni->nsui_list, &nu->nsu_list);
> +                       found = true;
> +                       break;
> +               }
> +       }
> +       if (!found) {
> +               spin_unlock(&nn->nfsd_ssc_lock);
> +               mntput(ss_mnt);
> +               return;
> +       }
> +       if (resched && !nu->nsu_expire)
> +               nu->nsu_expire = ni->nsui_expire;
> +       spin_unlock(&nn->nfsd_ssc_lock);
>  }
>
>  #else /* CONFIG_NFSD_V4_2_INTER_SSC */
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 97447a64bad0..0cdc898f06c9 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -5459,6 +5459,11 @@ nfs4_laundromat(struct nfsd_net *nn)
>                 list_del_init(&nbl->nbl_lru);
>                 free_blocked_lock(nbl);
>         }
> +
> +#ifdef CONFIG_NFSD_V4_2_INTER_SSC
> +       /* service the inter-copy delayed unmount list */
> +       nfsd4_ssc_expire_umount(nn);
> +#endif
>  out:
>         new_timeo = max_t(time64_t, new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT);
>         return new_timeo;
> @@ -7398,6 +7403,9 @@ nfs4_state_shutdown_net(struct net *net)
>
>         nfsd4_client_tracking_exit(net);
>         nfs4_state_destroy_net(net);
> +#ifdef CONFIG_NFSD_V4_2_INTER_SSC
> +       nfsd4_ssc_shutdown_umount(nn);
> +#endif
>         mntput(nn->nfsd_mnt);
>  }
>
> diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
> index 8bdc37aa2c2e..cf86d9010974 100644
> --- a/fs/nfsd/nfsd.h
> +++ b/fs/nfsd/nfsd.h
> @@ -483,6 +483,12 @@ static inline bool nfsd_attrs_supported(u32 minorversion, const u32 *bmval)
>  extern int nfsd4_is_junction(struct dentry *dentry);
>  extern int register_cld_notifier(void);
>  extern void unregister_cld_notifier(void);
> +#ifdef CONFIG_NFSD_V4_2_INTER_SSC
> +extern void nfsd4_ssc_init_umount_work(struct nfsd_net *nn);
> +extern void nfsd4_ssc_expire_umount(struct nfsd_net *nn);
> +extern void nfsd4_ssc_shutdown_umount(struct nfsd_net *nn);
> +#endif
> +
>  #else /* CONFIG_NFSD_V4 */
>  static inline int nfsd4_is_junction(struct dentry *dentry)
>  {
> diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
> index 6de406322106..ce89a8fe07ff 100644
> --- a/fs/nfsd/nfssvc.c
> +++ b/fs/nfsd/nfssvc.c
> @@ -403,6 +403,9 @@ static int nfsd_startup_net(int nrservs, struct net *net, const struct cred *cre
>         if (ret)
>                 goto out_filecache;
>
> +#ifdef CONFIG_NFSD_V4_2_INTER_SSC
> +       nfsd4_ssc_init_umount_work(nn);
> +#endif
>         nn->nfsd_net_up = true;
>         return 0;
>
> diff --git a/include/linux/nfs_ssc.h b/include/linux/nfs_ssc.h
> index f5ba0fbff72f..18afe62988b5 100644
> --- a/include/linux/nfs_ssc.h
> +++ b/include/linux/nfs_ssc.h
> @@ -8,6 +8,7 @@
>   */
>
>  #include <linux/nfs_fs.h>
> +#include <linux/sunrpc/svc.h>
>
>  extern struct nfs_ssc_client_ops_tbl nfs_ssc_client_tbl;
>
> @@ -52,6 +53,21 @@ static inline void nfs42_ssc_close(struct file *filep)
>         if (nfs_ssc_client_tbl.ssc_nfs4_ops)
>                 (*nfs_ssc_client_tbl.ssc_nfs4_ops->sco_close)(filep);
>  }
> +
> +struct nfsd4_ssc_umount_item {
> +       struct list_head nsui_list;
> +       bool nsui_busy;
> +       refcount_t nsui_refcnt;
> +       unsigned long nsui_expire;
> +       struct vfsmount *nsui_vfsmount;
> +       char nsui_ipaddr[RPC_MAX_ADDRBUFLEN];
> +};
> +
> +struct nfsd4_ssc_umount {
> +       struct list_head nsu_list;
> +       unsigned long nsu_expire;
> +       wait_queue_head_t nsu_waitq;
> +};
>  #endif
>
>  /*
> --
> 2.9.5
>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v5 1/2] NFSD: delay unmount source's export after inter-server copy completed.
  2021-05-18 17:04   ` J. Bruce Fields
@ 2021-05-18 17:21     ` J. Bruce Fields
  2021-05-18 17:50       ` dai.ngo
  2021-05-18 17:49     ` dai.ngo
  1 sibling, 1 reply; 15+ messages in thread
From: J. Bruce Fields @ 2021-05-18 17:21 UTC (permalink / raw)
  To: Dai Ngo; +Cc: olga.kornievskaia, linux-nfs, trondmy, chuck.lever

On Tue, May 18, 2021 at 01:04:56PM -0400, J. Bruce Fields wrote:
> On Mon, May 17, 2021 at 06:43:29PM -0400, Dai Ngo wrote:
> > +struct nfsd4_ssc_umount;
> >  
> >  enum {
> >  	/* cache misses due only to checksum comparison failures */
> > @@ -176,6 +177,10 @@ struct nfsd_net {
> >  	unsigned int             longest_chain_cachesize;
> >  
> >  	struct shrinker		nfsd_reply_cache_shrinker;
> > +
> > +	spinlock_t              nfsd_ssc_lock;
> > +	struct nfsd4_ssc_umount	*nfsd_ssc_umount;
> ...
> 
> > +void nfsd4_ssc_init_umount_work(struct nfsd_net *nn)
> > +{
> > +	nn->nfsd_ssc_umount = kzalloc(sizeof(struct nfsd4_ssc_umount),
> > +					GFP_KERNEL);
> > +	if (!nn->nfsd_ssc_umount)
> > +		return;
> 
> Is there any reason this needs to be allocated dynamically?  Let's just
> embed it in nfsd_net.
> 
> Actually, I'm not convinced the separate structure definition's really
> that helpful:
> 
> > +struct nfsd4_ssc_umount {
> > +	struct list_head nsu_list;
> > +	unsigned long nsu_expire;

Also: doesn't look like nsu_expire is actually used.  Am I missing
something, or is this a leftover from the conversion to the using the
laundromat thread?

--b.

> > +	wait_queue_head_t nsu_waitq;
> > +};
> 
> How about just:
> 
> 	struct nfsd_net {
> 	...
> 	/* tracking server-to-server copy mounts: */
> 	spinlock_t		nfsd_ssc_lock;
> 	struct list_head	nfsd_ssc_mount_list;
> 	unsigned long		nfsd_ssc_mount_expire;
> 	wait_queeu_head_t	nfsd_ssc_mount_waitq;
> 
> or something along those lines?
> 
> --b.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v5 1/2] NFSD: delay unmount source's export after inter-server copy completed.
  2021-05-18 17:16   ` Olga Kornievskaia
@ 2021-05-18 17:23     ` J. Bruce Fields
  2021-05-18 17:31       ` Olga Kornievskaia
  2021-05-18 17:48     ` dai.ngo
  1 sibling, 1 reply; 15+ messages in thread
From: J. Bruce Fields @ 2021-05-18 17:23 UTC (permalink / raw)
  To: Olga Kornievskaia; +Cc: Dai Ngo, linux-nfs, Trond Myklebust, Chuck Lever

On Tue, May 18, 2021 at 01:16:56PM -0400, Olga Kornievskaia wrote:
> General question to maybe Bruce: can nfsd_net go away while a compound
> is using it ?

No.  Any server threads for that container should be shut down before
nfsd_net goes away.

--b.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v5 1/2] NFSD: delay unmount source's export after inter-server copy completed.
  2021-05-18 17:23     ` J. Bruce Fields
@ 2021-05-18 17:31       ` Olga Kornievskaia
  2021-05-18 17:35         ` J. Bruce Fields
  0 siblings, 1 reply; 15+ messages in thread
From: Olga Kornievskaia @ 2021-05-18 17:31 UTC (permalink / raw)
  To: J. Bruce Fields; +Cc: Dai Ngo, linux-nfs, Trond Myklebust, Chuck Lever

On Tue, May 18, 2021 at 1:23 PM J. Bruce Fields <bfields@fieldses.org> wrote:
>
> On Tue, May 18, 2021 at 01:16:56PM -0400, Olga Kornievskaia wrote:
> > General question to maybe Bruce: can nfsd_net go away while a compound
> > is using it ?
>
> No.  Any server threads for that container should be shut down before
> nfsd_net goes away.

Right, sorry but the async copy is a task that works after the
compound of copy.  You say "threads .. should be shut down", who makes
sure of that (meaning should async copy code make sure of it)?

>
> --b.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v5 1/2] NFSD: delay unmount source's export after inter-server copy completed.
  2021-05-18 17:31       ` Olga Kornievskaia
@ 2021-05-18 17:35         ` J. Bruce Fields
  0 siblings, 0 replies; 15+ messages in thread
From: J. Bruce Fields @ 2021-05-18 17:35 UTC (permalink / raw)
  To: Olga Kornievskaia; +Cc: Dai Ngo, linux-nfs, Trond Myklebust, Chuck Lever

On Tue, May 18, 2021 at 01:31:11PM -0400, Olga Kornievskaia wrote:
> On Tue, May 18, 2021 at 1:23 PM J. Bruce Fields <bfields@fieldses.org> wrote:
> >
> > On Tue, May 18, 2021 at 01:16:56PM -0400, Olga Kornievskaia wrote:
> > > General question to maybe Bruce: can nfsd_net go away while a compound
> > > is using it ?
> >
> > No.  Any server threads for that container should be shut down before
> > nfsd_net goes away.
> 
> Right, sorry but the async copy is a task that works after the
> compound of copy.

OK, I thought you were talking about normal compound processing from
server threads.

> You say "threads .. should be shut down", who makes
> sure of that (meaning should async copy code make sure of it)?

Just looking at the code... nfsd4_shutdown_copy() is called when a
client is destroyed to stop any in-progress copies.

--b.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v5 1/2] NFSD: delay unmount source's export after inter-server copy completed.
  2021-05-18 17:16   ` Olga Kornievskaia
  2021-05-18 17:23     ` J. Bruce Fields
@ 2021-05-18 17:48     ` dai.ngo
  1 sibling, 0 replies; 15+ messages in thread
From: dai.ngo @ 2021-05-18 17:48 UTC (permalink / raw)
  To: Olga Kornievskaia
  Cc: J. Bruce Fields, linux-nfs, Trond Myklebust, Chuck Lever


On 5/18/21 10:16 AM, Olga Kornievskaia wrote:
> Hi Dai,
>
> General review comments inline
>
> On Mon, May 17, 2021 at 6:43 PM Dai Ngo <dai.ngo@oracle.com> wrote:
>> Currently the source's export is mounted and unmounted on every
>> inter-server copy operation. This patch is an enhancement to delay
>> the unmount of the source export for a certain period of time to
>> eliminate the mount and unmount overhead on subsequent copy operations.
>>
>> After a copy operation completes, a delayed task is scheduled to
>> unmount the export after a configurable idle time. Each time the
>> export is being used again, its expire time is extended to allow
>> the export to remain mounted.
>>
>> The unmount task and the mount operation of the copy request are
>> synced to make sure the export is not unmounted while it's being
>> used.
>>
>> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
>> ---
>>   fs/nfsd/netns.h         |   5 ++
>>   fs/nfsd/nfs4proc.c      | 216 +++++++++++++++++++++++++++++++++++++++++++++++-
>>   fs/nfsd/nfs4state.c     |   8 ++
>>   fs/nfsd/nfsd.h          |   6 ++
>>   fs/nfsd/nfssvc.c        |   3 +
>>   include/linux/nfs_ssc.h |  16 ++++
>>   6 files changed, 250 insertions(+), 4 deletions(-)
>>
>> diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
>> index c330f5bd0cf3..6018e5050cb4 100644
>> --- a/fs/nfsd/netns.h
>> +++ b/fs/nfsd/netns.h
>> @@ -21,6 +21,7 @@
>>
>>   struct cld_net;
>>   struct nfsd4_client_tracking_ops;
>> +struct nfsd4_ssc_umount;
>>
>>   enum {
>>          /* cache misses due only to checksum comparison failures */
>> @@ -176,6 +177,10 @@ struct nfsd_net {
>>          unsigned int             longest_chain_cachesize;
>>
>>          struct shrinker         nfsd_reply_cache_shrinker;
>> +
>> +       spinlock_t              nfsd_ssc_lock;
>> +       struct nfsd4_ssc_umount *nfsd_ssc_umount;
>> +
>>          /* utsname taken from the process that starts the server */
>>          char                    nfsd_name[UNX_MAXNODENAME+1];
>>   };
>> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
>> index dd9f38d072dd..892ad72d87ae 100644
>> --- a/fs/nfsd/nfs4proc.c
>> +++ b/fs/nfsd/nfs4proc.c
>> @@ -55,6 +55,99 @@ module_param(inter_copy_offload_enable, bool, 0644);
>>   MODULE_PARM_DESC(inter_copy_offload_enable,
>>                   "Enable inter server to server copy offload. Default: false");
>>
>> +#ifdef CONFIG_NFSD_V4_2_INTER_SSC
>> +static int nfsd4_ssc_umount_timeout = 900000;          /* default to 15 mins */
>> +module_param(nfsd4_ssc_umount_timeout, int, 0644);
>> +MODULE_PARM_DESC(nfsd4_ssc_umount_timeout,
>> +               "idle msecs before unmount export from source server");
>> +
>> +void nfsd4_ssc_expire_umount(struct nfsd_net *nn)
> Why should this be in nfs4proc.c when it's used in nfs4state.c only?

I will move nfsd4_ssc_expire_umount to nfs4state.c

>
>> +{
>> +       bool do_wakeup = false;
>> +       struct nfsd4_ssc_umount_item *ni = 0;
>> +       struct nfsd4_ssc_umount_item *tmp;
>> +       struct nfsd4_ssc_umount *nu;
>> +
>> +       spin_lock(&nn->nfsd_ssc_lock);
>> +       if (!nn->nfsd_ssc_umount) {
>> +               spin_unlock(&nn->nfsd_ssc_lock);
>> +               return;
>> +       }
>> +       nu = nn->nfsd_ssc_umount;
>> +       list_for_each_entry_safe(ni, tmp, &nu->nsu_list, nsui_list) {
>> +               if (time_after(jiffies, ni->nsui_expire)) {
>> +                       if (refcount_read(&ni->nsui_refcnt) > 0)
>> +                               continue;
>> +
>> +                       /* mark being unmount */
>> +                       ni->nsui_busy = true;
>> +                       spin_unlock(&nn->nfsd_ssc_lock);
>> +                       mntput(ni->nsui_vfsmount);
>> +                       spin_lock(&nn->nfsd_ssc_lock);
>> +
>> +                       /* waiters need to start from begin of list */
>> +                       list_del(&ni->nsui_list);
>> +                       kfree(ni);
>> +
>> +                       /* wakeup ssc_connect waiters */
>> +                       do_wakeup = true;
>> +                       continue;
>> +               }
>> +               break;
> mnt1, mnt2, mnt3  added all at the same time. mnt1, mnt3 only used
> once during the expiration time. mnt2 is being used continuously.

m2's nsu_expire is updated and re-inserted to the tail of the list
every time it's used.

> expiration time comes. delayed mnt1 is processed. mnt2 is next on the
> list and it's not expired so code quits. mnt3 will not be umounted
> until mnt2 expires. I think the break is wrong.

m1 and m3 will be processed first and unmounted if their nsu_expire
expires, m2 is last on the list.

-Dai

>
>> +       }
>> +       if (!list_empty(&nu->nsu_list)) {
>> +               ni = list_first_entry(&nu->nsu_list,
>> +                       struct nfsd4_ssc_umount_item, nsui_list);
>> +               nu->nsu_expire = ni->nsui_expire;
>> +       } else
>> +               nu->nsu_expire = 0;
>> +
>> +       if (do_wakeup)
>> +               wake_up_all(&nu->nsu_waitq);
>> +       spin_unlock(&nn->nfsd_ssc_lock);
>> +}
>> +
>> +/*
>> + * This is called when nfsd is being shutdown, after all inter_ssc
>> + * cleanup were done, to destroy the ssc delayed unmount list.
>> + */
>> +void nfsd4_ssc_shutdown_umount(struct nfsd_net *nn)
>> +{
>> +       struct nfsd4_ssc_umount_item *ni = 0;
>> +       struct nfsd4_ssc_umount_item *tmp;
>> +       struct nfsd4_ssc_umount *nu;
>> +
>> +       spin_lock(&nn->nfsd_ssc_lock);
>> +       if (!nn->nfsd_ssc_umount) {
>> +               spin_unlock(&nn->nfsd_ssc_lock);
>> +               return;
>> +       }
>> +       nu = nn->nfsd_ssc_umount;
>> +       nn->nfsd_ssc_umount = 0;
>> +       list_for_each_entry_safe(ni, tmp, &nu->nsu_list, nsui_list) {
>> +               list_del(&ni->nsui_list);
>> +               spin_unlock(&nn->nfsd_ssc_lock);
>> +               mntput(ni->nsui_vfsmount);
>> +               kfree(ni);
>> +               spin_lock(&nn->nfsd_ssc_lock);
>> +       }
>> +       spin_unlock(&nn->nfsd_ssc_lock);
>> +       kfree(nu);
> General question to maybe Bruce: can nfsd_net go away while a compound
> is using it ? I hope not.  I can't quite think thru the scenario where
> we started the async copy task but then somehow nfsd_net is going away
> before async_copy is done and if there is anything we need to take
> care of. I think the net shutdown would not find anything to free so
> it would just return. And the copy's cleanup would not find a nfsd_net
> and so it would right away unmount. This is more of thinking out loud
> but I'd like somebody else to confirm I'm correct that we are OK here.
>
>> +}
>> +
>> +void nfsd4_ssc_init_umount_work(struct nfsd_net *nn)
>> +{
>> +       nn->nfsd_ssc_umount = kzalloc(sizeof(struct nfsd4_ssc_umount),
>> +                                       GFP_KERNEL);
>> +       if (!nn->nfsd_ssc_umount)
>> +               return;
>> +       spin_lock_init(&nn->nfsd_ssc_lock);
>> +       INIT_LIST_HEAD(&nn->nfsd_ssc_umount->nsu_list);
>> +       init_waitqueue_head(&nn->nfsd_ssc_umount->nsu_waitq);
>> +}
>> +EXPORT_SYMBOL_GPL(nfsd4_ssc_init_umount_work);
>> +#endif
>> +
>>   #ifdef CONFIG_NFSD_V4_SECURITY_LABEL
>>   #include <linux/security.h>
>>
>> @@ -1181,6 +1274,12 @@ nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp,
>>          char *ipaddr, *dev_name, *raw_data;
>>          int len, raw_len;
>>          __be32 status = nfserr_inval;
>> +       struct nfsd4_ssc_umount_item *ni = 0;
>> +       struct nfsd4_ssc_umount_item *work = NULL;
>> +       struct nfsd4_ssc_umount_item *tmp;
>> +       struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
>> +       struct nfsd4_ssc_umount *nu;
>> +       DEFINE_WAIT(wait);
>>
>>          naddr = &nss->u.nl4_addr;
>>          tmp_addrlen = rpc_uaddr2sockaddr(SVC_NET(rqstp), naddr->addr,
>> @@ -1229,12 +1328,76 @@ nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp,
>>                  goto out_free_rawdata;
>>          snprintf(dev_name, len + 5, "%s%s%s:/", startsep, ipaddr, endsep);
>>
>> +       work = kzalloc(sizeof(*work), GFP_KERNEL);
>> +try_again:
>> +       spin_lock(&nn->nfsd_ssc_lock);
>> +       if (!nn->nfsd_ssc_umount) {
>> +               spin_unlock(&nn->nfsd_ssc_lock);
>> +               kfree(work);
>> +               work = NULL;
>> +               goto skip_dul;
>> +       }
>> +       nu = nn->nfsd_ssc_umount;
>> +       list_for_each_entry_safe(ni, tmp, &nu->nsu_list, nsui_list) {
>> +               if (strncmp(ni->nsui_ipaddr, ipaddr, sizeof(ni->nsui_ipaddr)))
>> +                       continue;
>> +               /* found a match */
>> +               if (ni->nsui_busy) {
>> +                       /*  wait - and try again */
>> +                       prepare_to_wait(&nu->nsu_waitq, &wait,
>> +                               TASK_INTERRUPTIBLE);
>> +                       spin_unlock(&nn->nfsd_ssc_lock);
>> +
>> +                       /* allow 20secs for mount/unmount for now - revisit */
>> +                       if (signal_pending(current) ||
>> +                                       (schedule_timeout(20*HZ) == 0)) {
>> +                               status = nfserr_eagain;
>> +                               kfree(work);
>> +                               goto out_free_devname;
>> +                       }
>> +                       finish_wait(&nu->nsu_waitq, &wait);
>> +                       goto try_again;
>> +               }
>> +               ss_mnt = ni->nsui_vfsmount;
>> +               if (refcount_read(&ni->nsui_refcnt) == 0)
>> +                       refcount_set(&ni->nsui_refcnt, 1);
>> +               else
>> +                       refcount_inc(&ni->nsui_refcnt);
>> +               spin_unlock(&nn->nfsd_ssc_lock);
>> +               kfree(work);
>> +               goto out_done;
>> +       }
>> +       /* create new entry, set busy, insert list, clear busy after mount */
>> +       if (work) {
>> +               strncpy(work->nsui_ipaddr, ipaddr, sizeof(work->nsui_ipaddr));
>> +               refcount_set(&work->nsui_refcnt, 1);
>> +               work->nsui_busy = true;
>> +               list_add_tail(&work->nsui_list, &nu->nsu_list);
>> +       }
>> +       spin_unlock(&nn->nfsd_ssc_lock);
>> +skip_dul:
>> +
>>          /* Use an 'internal' mount: SB_KERNMOUNT -> MNT_INTERNAL */
>>          ss_mnt = vfs_kern_mount(type, SB_KERNMOUNT, dev_name, raw_data);
>>          module_put(type->owner);
>> -       if (IS_ERR(ss_mnt))
>> +       if (IS_ERR(ss_mnt)) {
>> +               if (work) {
>> +                       spin_lock(&nn->nfsd_ssc_lock);
>> +                       list_del(&work->nsui_list);
>> +                       wake_up_all(&nu->nsu_waitq);
>> +                       spin_unlock(&nn->nfsd_ssc_lock);
>> +                       kfree(work);
>> +               }
>>                  goto out_free_devname;
>> -
>> +       }
>> +       if (work) {
>> +               spin_lock(&nn->nfsd_ssc_lock);
>> +               work->nsui_vfsmount = ss_mnt;
>> +               work->nsui_busy = false;
>> +               wake_up_all(&nu->nsu_waitq);
>> +               spin_unlock(&nn->nfsd_ssc_lock);
>> +       }
>> +out_done:
>>          status = 0;
>>          *mount = ss_mnt;
>>
>> @@ -1301,10 +1464,55 @@ static void
>>   nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct nfsd_file *src,
>>                          struct nfsd_file *dst)
>>   {
>> +       bool found = false;
>> +       bool resched = false;
>> +       long timeout;
>> +       struct nfsd4_ssc_umount_item *tmp;
>> +       struct nfsd4_ssc_umount_item *ni = 0;
>> +       struct nfsd4_ssc_umount *nu;
>> +       struct nfsd_net *nn = net_generic(dst->nf_net, nfsd_net_id);
>> +
>>          nfs42_ssc_close(src->nf_file);
>> -       fput(src->nf_file);
>>          nfsd_file_put(dst);
>> -       mntput(ss_mnt);
>> +       fput(src->nf_file);
>> +
>> +       if (!nn) {
>> +               mntput(ss_mnt);
>> +               return;
>> +       }
>> +       spin_lock(&nn->nfsd_ssc_lock);
>> +       if (!nn->nfsd_ssc_umount) {
>> +               /* delayed unmount list not setup */
>> +               spin_unlock(&nn->nfsd_ssc_lock);
>> +               mntput(ss_mnt);
>> +               return;
>> +       }
>> +       nu = nn->nfsd_ssc_umount;
>> +       timeout = msecs_to_jiffies(nfsd4_ssc_umount_timeout);
>> +       list_for_each_entry_safe(ni, tmp, &nu->nsu_list, nsui_list) {
>> +               if (ni->nsui_vfsmount->mnt_sb == ss_mnt->mnt_sb) {
>> +                       list_del(&ni->nsui_list);
>> +                       /*
>> +                        * vfsmount can be shared by multiple exports,
>> +                        * decrement refcnt and schedule delayed task
>> +                        * if it drops to 0.
>> +                        */
>> +                       if (refcount_dec_and_test(&ni->nsui_refcnt))
>> +                               resched = true;
>> +                       ni->nsui_expire = jiffies + timeout;
>> +                       list_add_tail(&ni->nsui_list, &nu->nsu_list);
>> +                       found = true;
>> +                       break;
>> +               }
>> +       }
>> +       if (!found) {
>> +               spin_unlock(&nn->nfsd_ssc_lock);
>> +               mntput(ss_mnt);
>> +               return;
>> +       }
>> +       if (resched && !nu->nsu_expire)
>> +               nu->nsu_expire = ni->nsui_expire;
>> +       spin_unlock(&nn->nfsd_ssc_lock);
>>   }
>>
>>   #else /* CONFIG_NFSD_V4_2_INTER_SSC */
>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>> index 97447a64bad0..0cdc898f06c9 100644
>> --- a/fs/nfsd/nfs4state.c
>> +++ b/fs/nfsd/nfs4state.c
>> @@ -5459,6 +5459,11 @@ nfs4_laundromat(struct nfsd_net *nn)
>>                  list_del_init(&nbl->nbl_lru);
>>                  free_blocked_lock(nbl);
>>          }
>> +
>> +#ifdef CONFIG_NFSD_V4_2_INTER_SSC
>> +       /* service the inter-copy delayed unmount list */
>> +       nfsd4_ssc_expire_umount(nn);
>> +#endif
>>   out:
>>          new_timeo = max_t(time64_t, new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT);
>>          return new_timeo;
>> @@ -7398,6 +7403,9 @@ nfs4_state_shutdown_net(struct net *net)
>>
>>          nfsd4_client_tracking_exit(net);
>>          nfs4_state_destroy_net(net);
>> +#ifdef CONFIG_NFSD_V4_2_INTER_SSC
>> +       nfsd4_ssc_shutdown_umount(nn);
>> +#endif
>>          mntput(nn->nfsd_mnt);
>>   }
>>
>> diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
>> index 8bdc37aa2c2e..cf86d9010974 100644
>> --- a/fs/nfsd/nfsd.h
>> +++ b/fs/nfsd/nfsd.h
>> @@ -483,6 +483,12 @@ static inline bool nfsd_attrs_supported(u32 minorversion, const u32 *bmval)
>>   extern int nfsd4_is_junction(struct dentry *dentry);
>>   extern int register_cld_notifier(void);
>>   extern void unregister_cld_notifier(void);
>> +#ifdef CONFIG_NFSD_V4_2_INTER_SSC
>> +extern void nfsd4_ssc_init_umount_work(struct nfsd_net *nn);
>> +extern void nfsd4_ssc_expire_umount(struct nfsd_net *nn);
>> +extern void nfsd4_ssc_shutdown_umount(struct nfsd_net *nn);
>> +#endif
>> +
>>   #else /* CONFIG_NFSD_V4 */
>>   static inline int nfsd4_is_junction(struct dentry *dentry)
>>   {
>> diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
>> index 6de406322106..ce89a8fe07ff 100644
>> --- a/fs/nfsd/nfssvc.c
>> +++ b/fs/nfsd/nfssvc.c
>> @@ -403,6 +403,9 @@ static int nfsd_startup_net(int nrservs, struct net *net, const struct cred *cre
>>          if (ret)
>>                  goto out_filecache;
>>
>> +#ifdef CONFIG_NFSD_V4_2_INTER_SSC
>> +       nfsd4_ssc_init_umount_work(nn);
>> +#endif
>>          nn->nfsd_net_up = true;
>>          return 0;
>>
>> diff --git a/include/linux/nfs_ssc.h b/include/linux/nfs_ssc.h
>> index f5ba0fbff72f..18afe62988b5 100644
>> --- a/include/linux/nfs_ssc.h
>> +++ b/include/linux/nfs_ssc.h
>> @@ -8,6 +8,7 @@
>>    */
>>
>>   #include <linux/nfs_fs.h>
>> +#include <linux/sunrpc/svc.h>
>>
>>   extern struct nfs_ssc_client_ops_tbl nfs_ssc_client_tbl;
>>
>> @@ -52,6 +53,21 @@ static inline void nfs42_ssc_close(struct file *filep)
>>          if (nfs_ssc_client_tbl.ssc_nfs4_ops)
>>                  (*nfs_ssc_client_tbl.ssc_nfs4_ops->sco_close)(filep);
>>   }
>> +
>> +struct nfsd4_ssc_umount_item {
>> +       struct list_head nsui_list;
>> +       bool nsui_busy;
>> +       refcount_t nsui_refcnt;
>> +       unsigned long nsui_expire;
>> +       struct vfsmount *nsui_vfsmount;
>> +       char nsui_ipaddr[RPC_MAX_ADDRBUFLEN];
>> +};
>> +
>> +struct nfsd4_ssc_umount {
>> +       struct list_head nsu_list;
>> +       unsigned long nsu_expire;
>> +       wait_queue_head_t nsu_waitq;
>> +};
>>   #endif
>>
>>   /*
>> --
>> 2.9.5
>>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v5 1/2] NFSD: delay unmount source's export after inter-server copy completed.
  2021-05-18 17:04   ` J. Bruce Fields
  2021-05-18 17:21     ` J. Bruce Fields
@ 2021-05-18 17:49     ` dai.ngo
  1 sibling, 0 replies; 15+ messages in thread
From: dai.ngo @ 2021-05-18 17:49 UTC (permalink / raw)
  To: J. Bruce Fields; +Cc: olga.kornievskaia, linux-nfs, trondmy, chuck.lever


On 5/18/21 10:04 AM, J. Bruce Fields wrote:
> On Mon, May 17, 2021 at 06:43:29PM -0400, Dai Ngo wrote:
>> +struct nfsd4_ssc_umount;
>>   
>>   enum {
>>   	/* cache misses due only to checksum comparison failures */
>> @@ -176,6 +177,10 @@ struct nfsd_net {
>>   	unsigned int             longest_chain_cachesize;
>>   
>>   	struct shrinker		nfsd_reply_cache_shrinker;
>> +
>> +	spinlock_t              nfsd_ssc_lock;
>> +	struct nfsd4_ssc_umount	*nfsd_ssc_umount;
> ...
>
>> +void nfsd4_ssc_init_umount_work(struct nfsd_net *nn)
>> +{
>> +	nn->nfsd_ssc_umount = kzalloc(sizeof(struct nfsd4_ssc_umount),
>> +					GFP_KERNEL);
>> +	if (!nn->nfsd_ssc_umount)
>> +		return;
> Is there any reason this needs to be allocated dynamically?  Let's just
> embed it in nfsd_net.
>
> Actually, I'm not convinced the separate structure definition's really
> that helpful:
>
>> +struct nfsd4_ssc_umount {
>> +	struct list_head nsu_list;
>> +	unsigned long nsu_expire;
>> +	wait_queue_head_t nsu_waitq;
>> +};
> How about just:
>
> 	struct nfsd_net {
> 	...
> 	/* tracking server-to-server copy mounts: */
> 	spinlock_t		nfsd_ssc_lock;
> 	struct list_head	nfsd_ssc_mount_list;
> 	unsigned long		nfsd_ssc_mount_expire;
> 	wait_queeu_head_t	nfsd_ssc_mount_waitq;
>
> or something along those lines?

I will move nfsd4_ssc_umount into nfsd_net.

-Dai

>
> --b.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v5 1/2] NFSD: delay unmount source's export after inter-server copy completed.
  2021-05-18 17:21     ` J. Bruce Fields
@ 2021-05-18 17:50       ` dai.ngo
  0 siblings, 0 replies; 15+ messages in thread
From: dai.ngo @ 2021-05-18 17:50 UTC (permalink / raw)
  To: J. Bruce Fields; +Cc: olga.kornievskaia, linux-nfs, trondmy, chuck.lever


On 5/18/21 10:21 AM, J. Bruce Fields wrote:
> On Tue, May 18, 2021 at 01:04:56PM -0400, J. Bruce Fields wrote:
>> On Mon, May 17, 2021 at 06:43:29PM -0400, Dai Ngo wrote:
>>> +struct nfsd4_ssc_umount;
>>>   
>>>   enum {
>>>   	/* cache misses due only to checksum comparison failures */
>>> @@ -176,6 +177,10 @@ struct nfsd_net {
>>>   	unsigned int             longest_chain_cachesize;
>>>   
>>>   	struct shrinker		nfsd_reply_cache_shrinker;
>>> +
>>> +	spinlock_t              nfsd_ssc_lock;
>>> +	struct nfsd4_ssc_umount	*nfsd_ssc_umount;
>> ...
>>
>>> +void nfsd4_ssc_init_umount_work(struct nfsd_net *nn)
>>> +{
>>> +	nn->nfsd_ssc_umount = kzalloc(sizeof(struct nfsd4_ssc_umount),
>>> +					GFP_KERNEL);
>>> +	if (!nn->nfsd_ssc_umount)
>>> +		return;
>> Is there any reason this needs to be allocated dynamically?  Let's just
>> embed it in nfsd_net.
>>
>> Actually, I'm not convinced the separate structure definition's really
>> that helpful:
>>
>>> +struct nfsd4_ssc_umount {
>>> +	struct list_head nsu_list;
>>> +	unsigned long nsu_expire;
> Also: doesn't look like nsu_expire is actually used.  Am I missing
> something, or is this a leftover from the conversion to the using the
> laundromat thread?

Yes, will be cleaned up in v6.

-Dai

>
> --b.
>
>>> +	wait_queue_head_t nsu_waitq;
>>> +};
>> How about just:
>>
>> 	struct nfsd_net {
>> 	...
>> 	/* tracking server-to-server copy mounts: */
>> 	spinlock_t		nfsd_ssc_lock;
>> 	struct list_head	nfsd_ssc_mount_list;
>> 	unsigned long		nfsd_ssc_mount_expire;
>> 	wait_queeu_head_t	nfsd_ssc_mount_waitq;
>>
>> or something along those lines?
>>
>> --b.

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2021-05-18 17:51 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-05-17 22:43 [PATCH v5 0/2] NFSD: delay unmount source's export after inter-server copy completed Dai Ngo
2021-05-17 22:43 ` [PATCH v5 1/2] " Dai Ngo
2021-05-18 16:28   ` Olga Kornievskaia
2021-05-18 16:30     ` dai.ngo
2021-05-18 16:40       ` Olga Kornievskaia
2021-05-18 17:04   ` J. Bruce Fields
2021-05-18 17:21     ` J. Bruce Fields
2021-05-18 17:50       ` dai.ngo
2021-05-18 17:49     ` dai.ngo
2021-05-18 17:16   ` Olga Kornievskaia
2021-05-18 17:23     ` J. Bruce Fields
2021-05-18 17:31       ` Olga Kornievskaia
2021-05-18 17:35         ` J. Bruce Fields
2021-05-18 17:48     ` dai.ngo
2021-05-17 22:43 ` [PATCH v5 2/2] NFSv4.2: remove restriction of copy size for inter-server copy Dai Ngo

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.