linux-nfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/2] enhance NFSv4.2 SSC to delay unmount source's export.
@ 2021-04-01 23:12 Dai Ngo
  2021-04-01 23:12 ` [PATCH 1/2] NFSD: delay unmount source's export after inter-server copy completed Dai Ngo
  2021-04-01 23:12 ` [PATCH 2/2] NFSv4.2: mount overhead should not be used as threshold for inter-server copy Dai Ngo
  0 siblings, 2 replies; 4+ messages in thread
From: Dai Ngo @ 2021-04-01 23:12 UTC (permalink / raw)
  To: olga.kornievskaia; +Cc: linux-nfs, trondmy, bfields, chuck.lever

Hi,

Currently the source's export is mounted and unmounted on every
inter-server copy operation. This causes unnecessary overhead
for each copy.

This patch series is an enhancement to allow the export to remain
mounted for a configurable period (default to 15 minutes). If the 
export is not being used for the configured time it will be unmounted
by a delayed task. If it's used again then its expiration time is
extended for another period.

Since mount and unmount are no longer done on each copy request,
this overhead is no longer used to decide whether the copy should
be done with inter-server copy or generic copy. The threshold used
to determine sync or async copy is now used for this decision.

-Dai



^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH 1/2] NFSD: delay unmount source's export after inter-server copy completed.
  2021-04-01 23:12 [PATCH 0/2] enhance NFSv4.2 SSC to delay unmount source's export Dai Ngo
@ 2021-04-01 23:12 ` Dai Ngo
  2021-04-02  1:21   ` kernel test robot
  2021-04-01 23:12 ` [PATCH 2/2] NFSv4.2: mount overhead should not be used as threshold for inter-server copy Dai Ngo
  1 sibling, 1 reply; 4+ messages in thread
From: Dai Ngo @ 2021-04-01 23:12 UTC (permalink / raw)
  To: olga.kornievskaia; +Cc: linux-nfs, trondmy, bfields, chuck.lever

Currently the source's export is mounted and unmounted on every
inter-server copy operation. This patch is an enhancement to delay
the unmount of the source export for a certain period of time to
eliminate the mount and unmount overhead on subsequent copy operations.

After a copy operation completes, a delayed task is scheduled to
unmount the export after a configurable idle time. Each time the
export is being used again, its expire time is extended to allow
the export to remain mounted.

The unmount task and the mount operation of the copy request are
synced to make sure the export is not unmounted while it's being
used.

Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
---
 fs/nfsd/nfs4proc.c      | 136 ++++++++++++++++++++++++++++++++++++++++++++++--
 fs/nfsd/nfsd.h          |   4 ++
 fs/nfsd/nfssvc.c        |   3 ++
 include/linux/nfs_ssc.h |  17 ++++++
 4 files changed, 157 insertions(+), 3 deletions(-)

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index dd9f38d072dd..4e9a53d477a0 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -55,6 +55,74 @@ module_param(inter_copy_offload_enable, bool, 0644);
 MODULE_PARM_DESC(inter_copy_offload_enable,
 		 "Enable inter server to server copy offload. Default: false");
 
+#ifdef CONFIG_NFSD_V4_2_INTER_SSC
+static int nfsd4_ssc_umount_timeout = 900000;		/* default to 15 mins */
+module_param(nfsd4_ssc_umount_timeout, int, 0644);
+MODULE_PARM_DESC(nfsd4_ssc_umount_timeout,
+		"idle msecs before unmount export from source server");
+
+static struct nfsd4_ssc_umount nfsd4_ssc_umount;
+
+/* nfsd4_ssc_umount.nsu_lock must be held */
+static void nfsd4_scc_update_umnt_timo(void)
+{
+	struct nfsd4_ssc_umount_item *ni = 0;
+
+	if (!list_empty(&nfsd4_ssc_umount.nsu_list)) {
+		ni = list_first_entry(&nfsd4_ssc_umount.nsu_list,
+			struct nfsd4_ssc_umount_item, nsui_list);
+		nfsd4_ssc_umount.nsu_expire = ni->nsui_expire;
+		schedule_delayed_work(&nfsd4_ssc_umount.nsu_umount_work,
+			ni->nsui_expire - jiffies);
+	} else
+		nfsd4_ssc_umount.nsu_expire = 0;
+}
+
+void nfsd4_ssc_expire_umount(struct work_struct *work)
+{
+	struct nfsd4_ssc_umount_item *ni = 0;
+	struct nfsd4_ssc_umount_item *tmp;
+
+	down_write(&nfsd4_ssc_umount.nsu_sem);
+	spin_lock(&nfsd4_ssc_umount.nsu_lock);
+	list_for_each_entry_safe(ni, tmp, &nfsd4_ssc_umount.nsu_list, nsui_list) {
+		if (time_after(jiffies, ni->nsui_expire)) {
+			list_del(&ni->nsui_list);
+			cancel_delayed_work(&nfsd4_ssc_umount.nsu_umount_work);
+			spin_unlock(&nfsd4_ssc_umount.nsu_lock);
+			up_write(&nfsd4_ssc_umount.nsu_sem);
+
+			mntput(ni->nsui_vfsmount);
+			kfree(ni);
+
+			down_write(&nfsd4_ssc_umount.nsu_sem);
+			spin_lock(&nfsd4_ssc_umount.nsu_lock);
+			continue;
+		}
+		break;
+	}
+	nfsd4_scc_update_umnt_timo();
+	spin_unlock(&nfsd4_ssc_umount.nsu_lock);
+	up_write(&nfsd4_ssc_umount.nsu_sem);
+}
+EXPORT_SYMBOL_GPL(nfsd4_ssc_expire_umount);
+
+static DECLARE_DELAYED_WORK(nfsd4, nfsd4_ssc_expire_umount);
+
+void nfsd4_ssc_init_umount_work(void)
+{
+	if (nfsd4_ssc_umount.nsu_inited)
+		return;
+	INIT_DELAYED_WORK(&nfsd4_ssc_umount.nsu_umount_work,
+		nfsd4_ssc_expire_umount);
+	INIT_LIST_HEAD(&nfsd4_ssc_umount.nsu_list);
+	spin_lock_init(&nfsd4_ssc_umount.nsu_lock);
+	init_rwsem(&nfsd4_ssc_umount.nsu_sem);
+	nfsd4_ssc_umount.nsu_inited = true;
+}
+EXPORT_SYMBOL_GPL(nfsd4_ssc_init_umount_work);
+#endif
+
 #ifdef CONFIG_NFSD_V4_SECURITY_LABEL
 #include <linux/security.h>
 
@@ -1181,6 +1249,8 @@ nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp,
 	char *ipaddr, *dev_name, *raw_data;
 	int len, raw_len;
 	__be32 status = nfserr_inval;
+	struct nfsd4_ssc_umount_item *ni = 0;
+	struct nfsd4_ssc_umount_item *tmp;
 
 	naddr = &nss->u.nl4_addr;
 	tmp_addrlen = rpc_uaddr2sockaddr(SVC_NET(rqstp), naddr->addr,
@@ -1229,11 +1299,33 @@ nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp,
 		goto out_free_rawdata;
 	snprintf(dev_name, len + 5, "%s%s%s:/", startsep, ipaddr, endsep);
 
+	/* wait for ssc unmount task */
+	down_read(&nfsd4_ssc_umount.nsu_sem);
+
 	/* Use an 'internal' mount: SB_KERNMOUNT -> MNT_INTERNAL */
 	ss_mnt = vfs_kern_mount(type, SB_KERNMOUNT, dev_name, raw_data);
 	module_put(type->owner);
-	if (IS_ERR(ss_mnt))
+	if (IS_ERR(ss_mnt)) {
+		up_read(&nfsd4_ssc_umount.nsu_sem);
 		goto out_free_devname;
+	}
+
+	 /* delete work entry if it exists */
+	spin_lock(&nfsd4_ssc_umount.nsu_lock);
+	list_for_each_entry_safe(ni, tmp, &nfsd4_ssc_umount.nsu_list, nsui_list) {
+		if (ni->nsui_vfsmount->mnt_sb != ss_mnt->mnt_sb)
+			continue;
+		list_del(&ni->nsui_list);
+		cancel_delayed_work(&nfsd4_ssc_umount.nsu_umount_work);
+		nfsd4_scc_update_umnt_timo();
+		spin_unlock(&nfsd4_ssc_umount.nsu_lock);
+		mntput(ni->nsui_vfsmount);
+		kfree(ni);
+		goto out_done;
+	}
+	spin_unlock(&nfsd4_ssc_umount.nsu_lock);
+out_done:
+	up_read(&nfsd4_ssc_umount.nsu_sem);
 
 	status = 0;
 	*mount = ss_mnt;
@@ -1301,10 +1393,48 @@ static void
 nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct nfsd_file *src,
 			struct nfsd_file *dst)
 {
+	long timeout;
+	struct nfsd4_ssc_umount_item *work, *tmp;
+	struct nfsd4_ssc_umount_item *ni = 0;
+
 	nfs42_ssc_close(src->nf_file);
-	fput(src->nf_file);
 	nfsd_file_put(dst);
-	mntput(ss_mnt);
+	fput(src->nf_file);
+
+	work = kzalloc(sizeof(*work), GFP_KERNEL);
+	if (!work) {
+		mntput(ss_mnt);
+		return;
+	}
+	timeout = msecs_to_jiffies(nfsd4_ssc_umount_timeout);
+	work->nsui_vfsmount = ss_mnt;
+	work->nsui_expire = jiffies + timeout;
+
+	spin_lock(&nfsd4_ssc_umount.nsu_lock);
+	/*
+	 * check if entry for vfsmount->mnt_sb exists, if it does
+	 * then remove it, update expire time and re-insert at tail,
+	 * do the mntput for this call and return. Otherwise create
+	 * new work entry.
+	 */
+	list_for_each_entry_safe(ni, tmp, &nfsd4_ssc_umount.nsu_list,
+		nsui_list) {
+		if (ni->nsui_vfsmount->mnt_sb == ss_mnt->mnt_sb) {
+			list_del(&ni->nsui_list);
+			mntput(ss_mnt);
+			kfree(work);
+			ni->nsui_expire = jiffies + timeout;
+			work = ni;
+			break;
+		}
+	}
+	list_add_tail(&work->nsui_list, &nfsd4_ssc_umount.nsu_list);
+	if (!nfsd4_ssc_umount.nsu_expire) {
+		nfsd4_ssc_umount.nsu_expire = work->nsui_expire;
+		schedule_delayed_work(&nfsd4_ssc_umount.nsu_umount_work,
+			timeout);
+	}
+	spin_unlock(&nfsd4_ssc_umount.nsu_lock);
 }
 
 #else /* CONFIG_NFSD_V4_2_INTER_SSC */
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 8bdc37aa2c2e..b3bf8a5f4472 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -483,6 +483,10 @@ static inline bool nfsd_attrs_supported(u32 minorversion, const u32 *bmval)
 extern int nfsd4_is_junction(struct dentry *dentry);
 extern int register_cld_notifier(void);
 extern void unregister_cld_notifier(void);
+#ifdef CONFIG_NFSD_V4_2_INTER_SSC
+extern void nfsd4_ssc_init_umount_work(void);
+#endif
+
 #else /* CONFIG_NFSD_V4 */
 static inline int nfsd4_is_junction(struct dentry *dentry)
 {
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 6de406322106..2558db55b88b 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -322,6 +322,9 @@ static int nfsd_startup_generic(int nrservs)
 	ret = nfs4_state_start();
 	if (ret)
 		goto out_file_cache;
+#ifdef CONFIG_NFSD_V4_2_INTER_SSC
+	nfsd4_ssc_init_umount_work();
+#endif
 	return 0;
 
 out_file_cache:
diff --git a/include/linux/nfs_ssc.h b/include/linux/nfs_ssc.h
index f5ba0fbff72f..337d740dad17 100644
--- a/include/linux/nfs_ssc.h
+++ b/include/linux/nfs_ssc.h
@@ -8,6 +8,7 @@
  */
 
 #include <linux/nfs_fs.h>
+#include <linux/sunrpc/svc.h>
 
 extern struct nfs_ssc_client_ops_tbl nfs_ssc_client_tbl;
 
@@ -52,6 +53,22 @@ static inline void nfs42_ssc_close(struct file *filep)
 	if (nfs_ssc_client_tbl.ssc_nfs4_ops)
 		(*nfs_ssc_client_tbl.ssc_nfs4_ops->sco_close)(filep);
 }
+
+struct nfsd4_ssc_umount_item {
+	struct list_head nsui_list;
+	unsigned long nsui_expire;
+	struct vfsmount *nsui_vfsmount;
+};
+
+struct nfsd4_ssc_umount {
+	struct list_head nsu_list;
+	struct delayed_work nsu_umount_work;
+	spinlock_t nsu_lock;
+	struct rw_semaphore nsu_sem;
+	unsigned long nsu_expire;
+	bool nsu_inited;
+};
+
 #endif
 
 /*
-- 
2.9.5


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 2/2] NFSv4.2: mount overhead should not be used as threshold for inter-server copy
  2021-04-01 23:12 [PATCH 0/2] enhance NFSv4.2 SSC to delay unmount source's export Dai Ngo
  2021-04-01 23:12 ` [PATCH 1/2] NFSD: delay unmount source's export after inter-server copy completed Dai Ngo
@ 2021-04-01 23:12 ` Dai Ngo
  1 sibling, 0 replies; 4+ messages in thread
From: Dai Ngo @ 2021-04-01 23:12 UTC (permalink / raw)
  To: olga.kornievskaia; +Cc: linux-nfs, trondmy, bfields, chuck.lever

Since mount and unmount are not done on each copy request, its overhead
should not be considered as the threshold for doing inter-server copy.
The threshold used to determine sync or async copy is also used to decide
whether copy is done with inter-server copy or generic copy.

Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
---
 fs/nfs/nfs4file.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 441a2fa073c8..67ca798a1a79 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -158,13 +158,11 @@ static ssize_t __nfs4_copy_file_range(struct file *file_in, loff_t pos_in,
 		sync = true;
 retry:
 	if (!nfs42_files_from_same_server(file_in, file_out)) {
-		/* for inter copy, if copy size if smaller than 12 RPC
-		 * payloads, fallback to traditional copy. There are
-		 * 14 RPCs during an NFSv4.x mount between source/dest
-		 * servers.
+		/*
+		 * for inter copy, if copy size is small enough
+		 * for sync copy then fallback to traditional copy.
 		 */
-		if (sync ||
-			count <= 14 * NFS_SERVER(file_inode(file_in))->rsize)
+		if (sync)
 			return -EOPNOTSUPP;
 		cn_resp = kzalloc(sizeof(struct nfs42_copy_notify_res),
 				GFP_NOFS);
-- 
2.9.5


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH 1/2] NFSD: delay unmount source's export after inter-server copy completed.
  2021-04-01 23:12 ` [PATCH 1/2] NFSD: delay unmount source's export after inter-server copy completed Dai Ngo
@ 2021-04-02  1:21   ` kernel test robot
  0 siblings, 0 replies; 4+ messages in thread
From: kernel test robot @ 2021-04-02  1:21 UTC (permalink / raw)
  To: Dai Ngo, olga.kornievskaia
  Cc: kbuild-all, linux-nfs, trondmy, bfields, chuck.lever

[-- Attachment #1: Type: text/plain, Size: 2860 bytes --]

Hi Dai,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on nfs/linux-next]
[also build test WARNING on v5.12-rc5 next-20210401]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Dai-Ngo/enhance-NFSv4-2-SSC-to-delay-unmount-source-s-export/20210402-071534
base:   git://git.linux-nfs.org/projects/trondmy/linux-nfs.git linux-next
config: mips-allyesconfig (attached as .config)
compiler: mips-linux-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/0day-ci/linux/commit/84c442c7d2275ebcf694b23503725a5a5d7895e5
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Dai-Ngo/enhance-NFSv4-2-SSC-to-delay-unmount-source-s-export/20210402-071534
        git checkout 84c442c7d2275ebcf694b23503725a5a5d7895e5
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=mips 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

>> fs/nfsd/nfs4proc.c:81:6: warning: no previous prototype for 'nfsd4_ssc_expire_umount' [-Wmissing-prototypes]
      81 | void nfsd4_ssc_expire_umount(struct work_struct *work)
         |      ^~~~~~~~~~~~~~~~~~~~~~~


vim +/nfsd4_ssc_expire_umount +81 fs/nfsd/nfs4proc.c

    80	
  > 81	void nfsd4_ssc_expire_umount(struct work_struct *work)
    82	{
    83		struct nfsd4_ssc_umount_item *ni = 0;
    84		struct nfsd4_ssc_umount_item *tmp;
    85	
    86		down_write(&nfsd4_ssc_umount.nsu_sem);
    87		spin_lock(&nfsd4_ssc_umount.nsu_lock);
    88		list_for_each_entry_safe(ni, tmp, &nfsd4_ssc_umount.nsu_list, nsui_list) {
    89			if (time_after(jiffies, ni->nsui_expire)) {
    90				list_del(&ni->nsui_list);
    91				cancel_delayed_work(&nfsd4_ssc_umount.nsu_umount_work);
    92				spin_unlock(&nfsd4_ssc_umount.nsu_lock);
    93				up_write(&nfsd4_ssc_umount.nsu_sem);
    94	
    95				mntput(ni->nsui_vfsmount);
    96				kfree(ni);
    97	
    98				down_write(&nfsd4_ssc_umount.nsu_sem);
    99				spin_lock(&nfsd4_ssc_umount.nsu_lock);
   100				continue;
   101			}
   102			break;
   103		}
   104		nfsd4_scc_update_umnt_timo();
   105		spin_unlock(&nfsd4_ssc_umount.nsu_lock);
   106		up_write(&nfsd4_ssc_umount.nsu_sem);
   107	}
   108	EXPORT_SYMBOL_GPL(nfsd4_ssc_expire_umount);
   109	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 70235 bytes --]

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2021-04-02  1:22 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-04-01 23:12 [PATCH 0/2] enhance NFSv4.2 SSC to delay unmount source's export Dai Ngo
2021-04-01 23:12 ` [PATCH 1/2] NFSD: delay unmount source's export after inter-server copy completed Dai Ngo
2021-04-02  1:21   ` kernel test robot
2021-04-01 23:12 ` [PATCH 2/2] NFSv4.2: mount overhead should not be used as threshold for inter-server copy Dai Ngo

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).