All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 2/5] NLM failover - per fs grace period
@ 2006-08-14  6:00 ` Wendy Cheng
  0 siblings, 0 replies; 12+ messages in thread
From: Wendy Cheng @ 2006-08-14  6:00 UTC (permalink / raw)
  To: Linux NFS Mailing List; +Cc: cluster-devel, lhh

[-- Attachment #1: Type: text/plain, Size: 1028 bytes --]

This change enables per NFS-export entry lockd grace period. The
implementation is based on a global single linked list nlm_servs that
contains entries of fsid info. It is expected this would not be a
frequent event. The nlm_servs list should be short and the entries
expire within a maximum of 50 seconds.  The grace period setting follows
the existing NLM grace period handling logic and is triggered via
echoing the NFS export filesystem id into /proc/fs/nfsd/nlm_set_igrace
file as:

shell> echo 1234 > /proc/fs/nfsd/nlm_set_igrace

Signed-off-by: S. Wendy Cheng <wcheng@redhat.com>
Signed-off-by: Lon Hohberger  <lhh@redhat.com>

 fs/lockd/svc.c              |    8 +-
 fs/lockd/svc4proc.c         |   31 +++++++---
 fs/lockd/svcproc.c          |   29 +++++++--
 fs/lockd/svcsubs.c          |  133 ++++++++++++++++++++++++++++++++++++
++++++++ 
 fs/nfsd/nfsctl.c            |   32 ++++++++++
 include/linux/lockd/bind.h  |    3
 include/linux/lockd/lockd.h |   10 +++
 7 files changed, 230 insertions(+), 16 deletions(-)



[-- Attachment #2: gfs_nlm_igrace.patch --]
[-- Type: text/x-patch, Size: 13774 bytes --]

--- linux-1/include/linux/lockd/lockd.h	2006-08-11 10:12:29.000000000 -0400
+++ linux-2/include/linux/lockd/lockd.h	2006-08-12 02:02:42.000000000 -0400
@@ -107,6 +107,13 @@ struct nlm_file {
 	int		       	f_hash;		/* hash of f_handle */
 };
 
+/* Server fsid linked list for NLM lock failover */
+struct nlm_serv {
+	struct nlm_serv*	s_next;		/* linked list */
+	unsigned long		s_grace_period;	/* per fsid grace period */
+	int			s_fsid;		/* export fsid */
+};
+
 /*
  * This is a server block (i.e. a lock requested by some client which
  * couldn't be granted because of a conflicting lock).
@@ -188,6 +195,8 @@ void		  nlmsvc_traverse_blocks(struct nl
 					int action);
 void	  nlmsvc_grant_reply(struct svc_rqst *, struct nlm_cookie *, u32);
 
+unsigned long set_grace_period(void); /*required by svcsubs.c and svc.c 
+					to support nlm failover */
 /*
  * File handling for the server personality
  */
@@ -198,6 +207,7 @@ void		  nlmsvc_mark_resources(void);
 void		  nlmsvc_free_host_resources(struct nlm_host *);
 void		  nlmsvc_invalidate_all(void);
 int 		  nlmsvc_fo_unlock(int *fsid);
+int 		  nlmsvc_fo_check(struct nfs_fh *fh);
 
 static __inline__ struct inode *
 nlmsvc_file_inode(struct nlm_file *file)
--- linux-1/fs/lockd/svcsubs.c	2006-08-11 10:12:29.000000000 -0400
+++ linux-2/fs/lockd/svcsubs.c	2006-08-11 12:09:03.000000000 -0400
@@ -62,6 +62,10 @@ static inline void nlm_debug_print_file(
 }
 #endif
 
+/* Global control structure for lock failover */
+static spinlock_t nlm_fo_lock=SPIN_LOCK_UNLOCKED;
+struct nlm_serv *nlm_servs=NULL;
+
 static inline unsigned int file_hash(struct nfs_fh *f)
 {
 	unsigned int tmp=0;
@@ -400,3 +404,132 @@ nlmsvc_fo_unlock(int *fsid)
 	return (nlm_traverse_files(NULL, fsid, NLM_ACT_FO_UNLOCK)); 
 }
 
+EXPORT_SYMBOL(nlmsvc_fo_setgrace);
+
+/*
+ * Add fsid into global nlm_servs list.
+ */
+int
+nlmsvc_fo_setgrace(int fsid)
+{
+	struct nlm_serv *per_fsid, *entry;
+
+	/* allocate the entry */
+	per_fsid = kmalloc(sizeof(struct nlm_serv), GFP_KERNEL);
+	if (per_fsid == NULL) {
+		printk("lockd: nlmsvc_fo_setgrace kmalloc fails\n");
+		return(-ENOMEM);
+	}
+
+	dprintk("lockd: nlmsvc_fo_setgrace fsid=%d jiffies=%lu\n", 
+		fsid, jiffies);
+
+	/* fill in info */
+	per_fsid->s_grace_period = set_grace_period();
+	per_fsid->s_fsid = fsid;
+
+	/* link into the global list */
+	spin_lock(&nlm_fo_lock);
+	
+	entry = nlm_servs;
+	per_fsid->s_next = entry;
+	nlm_servs = per_fsid;
+
+	/* done */
+	spin_unlock(&nlm_fo_lock);
+	return 0;
+}
+
+/* nlm_servs gargabe collection 
+ *  - caller should hold nlm_ip_mutex
+ */
+static inline void
+__nlm_servs_gc(struct nlm_serv *e_purge)
+{
+	struct nlm_serv *e_next;
+
+	while (e_purge) {
+		e_next = e_purge->s_next;
+		dprintk("lockd: purge fsid=%d grace period at jiffies=%lu\n",
+			e_purge->s_fsid, jiffies);
+		kfree(e_purge);
+		e_purge = e_next;
+	}
+}
+
+/* 
+ * Reset global nlm_servs list 
+ */
+void 
+nlmsvc_fo_reset_servs()
+{
+	struct nlm_serv *e_purge;
+
+	spin_lock(&nlm_fo_lock);
+
+	/* nothing to do */
+	if (!nlm_servs) {
+		spin_unlock(&nlm_fo_lock);
+		return;
+	}
+
+	dprintk("lockd: nlmsvc_fo_reset nlm_servs\n");
+
+	/* purge the entries */
+	e_purge   = nlm_servs;
+	nlm_servs = NULL;
+	__nlm_servs_gc(e_purge);
+
+	spin_unlock(&nlm_fo_lock);
+	return;
+}
+
+/*
+ * Check whether the fsid is in the failover list: nlm_servs.
+ *	return TRUE (1) if fsid in nlm_serv.
+ */
+int
+nlmsvc_fo_check(struct nfs_fh *fh)
+{
+	struct nlm_serv **e_top, *e_this, *e_purge=NULL;
+	int rc=0, this_fsid, not_found;
+
+	spin_lock(&nlm_fo_lock);
+
+	/* no failover entry */
+	if (!(e_this = nlm_servs))  
+		goto nlmsvc_fo_check_out;
+
+	/* see if this fh has fsid */
+	not_found = nlm_fo_get_fsid(fh, &this_fsid);
+	if (not_found) 
+		goto nlmsvc_fo_check_out;
+
+	/* check to see whether this_fsid is in nlm_servs list */
+	e_top = &nlm_servs;
+	while (e_this) {
+		if (time_before(e_this->s_grace_period, jiffies)) {
+			dprintk("lockd: fsid=%d grace period expires\n",
+				e_this->s_fsid);
+			e_purge = e_this;
+			break;
+		} else if (e_this->s_fsid == this_fsid) {
+			dprintk("lockd: fsid=%d in grace period\n",
+				e_this->s_fsid);
+			rc = 1;
+		}
+		e_top = &(e_this->s_next);
+		e_this = e_this->s_next;
+	}
+
+	/* piggy back nlm_servs garbage collection */
+	if (e_purge) {
+		*e_top = NULL;
+		__nlm_servs_gc(e_purge);
+	}
+
+nlmsvc_fo_check_out:
+	spin_unlock(&nlm_fo_lock);
+	return rc;
+}
+
--- linux-1/include/linux/lockd/bind.h	2006-08-11 10:12:29.000000000 -0400
+++ linux-2/include/linux/lockd/bind.h	2006-08-11 10:17:04.000000000 -0400
@@ -37,5 +37,8 @@ extern void	lockd_down(void);
  * NLM failover
  */
 extern int     nlmsvc_fo_unlock(int *fsid);
+extern int     nlmsvc_fo_setgrace(int fsid);
+extern void    nlmsvc_fo_reset_servs(void);
+
 
 #endif /* LINUX_LOCKD_BIND_H */
--- linux-1/fs/nfsd/nfsctl.c	2006-08-11 10:12:29.000000000 -0400
+++ linux-2/fs/nfsd/nfsctl.c	2006-08-11 10:17:04.000000000 -0400
@@ -56,6 +56,7 @@ enum {
 	NFSD_List,
 	NFSD_Fh,
 	NFSD_Nlm_unlock,
+	NFSD_Nlm_igrace,
 	NFSD_Threads,
 	NFSD_Versions,
 	/*
@@ -93,6 +94,7 @@ static ssize_t write_recoverydir(struct 
 #define NFSDDBG_FACILITY	NFSDDBG_CLUSTER
 
 static ssize_t do_nlm_fo_unlock(struct file *file, char *buf, size_t size);
+static ssize_t do_nlm_fs_grace(struct file *file, char *buf, size_t size);
 
 static ssize_t (*write_op[])(struct file *, char *, size_t) = {
 	[NFSD_Svc] = write_svc,
@@ -104,6 +106,7 @@ static ssize_t (*write_op[])(struct file
 	[NFSD_Getfs] = write_getfs,
 	[NFSD_Fh] = write_filehandle,
 	[NFSD_Nlm_unlock] = do_nlm_fo_unlock,
+	[NFSD_Nlm_igrace] = do_nlm_fs_grace,
 	[NFSD_Threads] = write_threads,
 	[NFSD_Versions] = write_versions,
 #ifdef CONFIG_NFSD_V4
@@ -348,6 +351,34 @@ static ssize_t write_filehandle(struct f
 	return mesg - buf;	
 }
 
+static ssize_t do_nlm_fs_grace(struct file *file, char *buf, size_t size)
+{
+	char *mesg = buf;
+	int fsid, rc;
+
+	if (size <= 0) return -EINVAL;
+
+	/* convert string into a valid fsid */
+	rc = get_int(&mesg, &fsid);
+	if (rc) {
+		dprintk("do_nlm_fsid_grace: invalid fsid (%s)\n", buf);
+		return rc;
+	}
+
+	/* call nlm to set the grace period */
+	rc = nlmsvc_fo_setgrace(fsid);
+	if (rc) {
+		dprintk("nlmsvc_fo_setgrace return rc=%d\n", rc);
+		return rc;
+	}
+
+	dprintk("nlm set fsid=%d grace period\n", fsid);
+
+	/* done */
+	sprintf(buf, "nlm set per fsid=%d grace period\n", fsid);
+	return strlen(buf);
+}
+
 static ssize_t do_nlm_fo_unlock(struct file *file, char *buf, size_t size)
 {
 	char *mesg = buf;
@@ -523,6 +554,7 @@ static int nfsd_fill_super(struct super_
 		[NFSD_List] = {"exports", &exports_operations, S_IRUGO},
 		[NFSD_Fh] = {"filehandle", &transaction_ops, S_IWUSR|S_IRUSR},
 		[NFSD_Nlm_unlock] = {"nlm_unlock", &transaction_ops, S_IWUSR|S_IRUSR},
+		[NFSD_Nlm_igrace] = {"nlm_set_igrace", &transaction_ops, S_IWUSR|S_IRUSR},
 		[NFSD_Threads] = {"threads", &transaction_ops, S_IWUSR|S_IRUSR},
 		[NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR},
 #ifdef CONFIG_NFSD_V4
--- linux-1/fs/lockd/svc4proc.c	2006-08-11 10:11:30.000000000 -0400
+++ linux-2/fs/lockd/svc4proc.c	2006-08-12 02:03:55.000000000 -0400
@@ -21,6 +21,20 @@
 
 #define NLMDBG_FACILITY		NLMDBG_CLIENT
 
+extern struct nlm_serv *nlm_servs;
+
+/* 
+ * Check for per filesystem failover grace period 
+ */
+static inline int
+nlm4svc_fo_grace_period(struct nlm_args *argp) 
+{
+	if (unlikely(nlm_servs))
+		return(nlmsvc_fo_check(&argp->lock.fh));
+
+	return 0;
+}
+
 /*
  * Obtain client and file from arguments
  */
@@ -89,13 +103,13 @@ nlm4svc_proc_test(struct svc_rqst *rqstp
 	resp->cookie = argp->cookie;
 
 	/* Don't accept test requests during grace period */
-	if (nlmsvc_grace_period) {
+	if ((nlmsvc_grace_period) || (nlm4svc_fo_grace_period(argp))) {
 		resp->status = nlm_lck_denied_grace_period;
 		return rpc_success;
 	}
 
 	/* Obtain client and file */
-	if ((resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file)))
+	if (resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file))
 		return rpc_success;
 
 	/* Now check for conflicting locks */
@@ -119,13 +133,14 @@ nlm4svc_proc_lock(struct svc_rqst *rqstp
 	resp->cookie = argp->cookie;
 
 	/* Don't accept new lock requests during grace period */
-	if (nlmsvc_grace_period && !argp->reclaim) {
+	if ((nlmsvc_grace_period || (nlm4svc_fo_grace_period(argp))) 
+			&& !argp->reclaim) {
 		resp->status = nlm_lck_denied_grace_period;
 		return rpc_success;
 	}
 
 	/* Obtain client and file */
-	if ((resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file)))
+	if (resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file))
 		return rpc_success;
 
 #if 0
@@ -162,7 +177,7 @@ nlm4svc_proc_cancel(struct svc_rqst *rqs
 	resp->cookie = argp->cookie;
 
 	/* Don't accept requests during grace period */
-	if (nlmsvc_grace_period) {
+	if ((nlmsvc_grace_period || (nlm4svc_fo_grace_period(argp)))) {
 		resp->status = nlm_lck_denied_grace_period;
 		return rpc_success;
 	}
@@ -195,7 +210,7 @@ nlm4svc_proc_unlock(struct svc_rqst *rqs
 	resp->cookie = argp->cookie;
 
 	/* Don't accept new lock requests during grace period */
-	if (nlmsvc_grace_period) {
+	if (nlmsvc_grace_period || (nlm4svc_fo_grace_period(argp))) {
 		resp->status = nlm_lck_denied_grace_period;
 		return rpc_success;
 	}
@@ -330,7 +345,7 @@ nlm4svc_proc_share(struct svc_rqst *rqst
 	resp->cookie = argp->cookie;
 
 	/* Don't accept new lock requests during grace period */
-	if (nlmsvc_grace_period && !argp->reclaim) {
+	if ((nlmsvc_grace_period ||(nlm4svc_fo_grace_period(argp))) && !argp->reclaim) {
 		resp->status = nlm_lck_denied_grace_period;
 		return rpc_success;
 	}
@@ -363,7 +378,7 @@ nlm4svc_proc_unshare(struct svc_rqst *rq
 	resp->cookie = argp->cookie;
 
 	/* Don't accept requests during grace period */
-	if (nlmsvc_grace_period) {
+	if (nlmsvc_grace_period || (nlm4svc_fo_grace_period(argp))) {
 		resp->status = nlm_lck_denied_grace_period;
 		return rpc_success;
 	}
--- linux-1/fs/lockd/svcproc.c	2006-08-11 10:11:30.000000000 -0400
+++ linux-2/fs/lockd/svcproc.c	2006-08-12 01:57:38.000000000 -0400
@@ -50,6 +50,21 @@ cast_to_nlm(u32 status, u32 vers)
 #endif
 
 /*
+ * Check for per filesystem failover grace period 
+ */
+
+extern struct nlm_serv *nlm_servs;
+
+static inline int
+nlmsvc_fo_grace_period(struct nlm_args *argp)
+{
+	if (unlikely(nlm_servs))
+		return(nlmsvc_fo_check(&argp->lock.fh));
+
+	return 0;
+}
+
+/*
  * Obtain client and file from arguments
  */
 static u32
@@ -115,7 +130,7 @@ nlmsvc_proc_test(struct svc_rqst *rqstp,
 	resp->cookie = argp->cookie;
 
 	/* Don't accept test requests during grace period */
-	if (nlmsvc_grace_period) {
+	if (nlmsvc_grace_period || (nlmsvc_fo_grace_period(argp))) {
 		resp->status = nlm_lck_denied_grace_period;
 		return rpc_success;
 	}
@@ -146,7 +161,8 @@ nlmsvc_proc_lock(struct svc_rqst *rqstp,
 	resp->cookie = argp->cookie;
 
 	/* Don't accept new lock requests during grace period */
-	if (nlmsvc_grace_period && !argp->reclaim) {
+	if ((nlmsvc_grace_period || (nlmsvc_fo_grace_period(argp))) 
+			&& !argp->reclaim) {
 		resp->status = nlm_lck_denied_grace_period;
 		return rpc_success;
 	}
@@ -189,7 +205,7 @@ nlmsvc_proc_cancel(struct svc_rqst *rqst
 	resp->cookie = argp->cookie;
 
 	/* Don't accept requests during grace period */
-	if (nlmsvc_grace_period) {
+	if (nlmsvc_grace_period || nlmsvc_fo_grace_period(argp)) {
 		resp->status = nlm_lck_denied_grace_period;
 		return rpc_success;
 	}
@@ -222,7 +238,7 @@ nlmsvc_proc_unlock(struct svc_rqst *rqst
 	resp->cookie = argp->cookie;
 
 	/* Don't accept new lock requests during grace period */
-	if (nlmsvc_grace_period) {
+	if (nlmsvc_grace_period || nlmsvc_fo_grace_period(argp)) {
 		resp->status = nlm_lck_denied_grace_period;
 		return rpc_success;
 	}
@@ -359,7 +375,8 @@ nlmsvc_proc_share(struct svc_rqst *rqstp
 	resp->cookie = argp->cookie;
 
 	/* Don't accept new lock requests during grace period */
-	if (nlmsvc_grace_period && !argp->reclaim) {
+	if ((nlmsvc_grace_period || (nlmsvc_fo_grace_period(argp))) 
+			&& !argp->reclaim) {
 		resp->status = nlm_lck_denied_grace_period;
 		return rpc_success;
 	}
@@ -392,7 +409,7 @@ nlmsvc_proc_unshare(struct svc_rqst *rqs
 	resp->cookie = argp->cookie;
 
 	/* Don't accept requests during grace period */
-	if (nlmsvc_grace_period) {
+	if (nlmsvc_grace_period || nlmsvc_fo_grace_period(argp)) {
 		resp->status = nlm_lck_denied_grace_period;
 		return rpc_success;
 	}
--- linux-1/fs/lockd/svc.c	2006-08-11 10:11:30.000000000 -0400
+++ linux-2/fs/lockd/svc.c	2006-08-11 10:17:04.000000000 -0400
@@ -71,7 +71,7 @@ static const int		nlm_port_min = 0, nlm_
 
 static struct ctl_table_header * nlm_sysctl_table;
 
-static unsigned long set_grace_period(void)
+unsigned long set_grace_period(void)
 {
 	unsigned long grace_period;
 
@@ -81,7 +81,6 @@ static unsigned long set_grace_period(vo
 				/ nlm_timeout) * nlm_timeout * HZ;
 	else
 		grace_period = nlm_timeout * 5 * HZ;
-	nlmsvc_grace_period = 1;
 	return grace_period + jiffies;
 }
 
@@ -129,6 +128,8 @@ lockd(struct svc_rqst *rqstp)
 	nlmsvc_timeout = nlm_timeout * HZ;
 
 	grace_period_expire = set_grace_period();
+	nlmsvc_grace_period = 1;
+	(void) nlmsvc_fo_reset_servs();
 
 	/*
 	 * The main request loop. We don't terminate until the last
@@ -143,6 +144,8 @@ lockd(struct svc_rqst *rqstp)
 			if (nlmsvc_ops) {
 				nlmsvc_invalidate_all();
 				grace_period_expire = set_grace_period();
+				nlmsvc_grace_period = 1;
+				(void) nlmsvc_fo_reset_servs();
 			}
 		}
 
@@ -189,6 +192,7 @@ lockd(struct svc_rqst *rqstp)
 			nlmsvc_invalidate_all();
 		nlm_shutdown_hosts();
 		nlmsvc_pid = 0;
+		(void) nlmsvc_fo_reset_servs();
 	} else
 		printk(KERN_DEBUG
 			"lockd: new process, skipping host shutdown\n");

[-- Attachment #3: Type: text/plain, Size: 373 bytes --]

-------------------------------------------------------------------------
Using Tomcat but need to do more? Need to support web services, security?
Get stuff done quickly with pre-integrated technology to make your job easier
Download IBM WebSphere Application Server v.1.0.1 based on Apache Geronimo
http://sel.as-us.falkag.net/sel?cmd=lnk&kid=120709&bid=263057&dat=121642

[-- Attachment #4: Type: text/plain, Size: 140 bytes --]

_______________________________________________
NFS maillist  -  NFS@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/nfs

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [Cluster-devel] [PATCH 2/5] NLM failover - per fs grace period
@ 2006-08-14  6:00 ` Wendy Cheng
  0 siblings, 0 replies; 12+ messages in thread
From: Wendy Cheng @ 2006-08-14  6:00 UTC (permalink / raw)
  To: cluster-devel.redhat.com

This change enables per NFS-export entry lockd grace period. The
implementation is based on a global single linked list nlm_servs that
contains entries of fsid info. It is expected this would not be a
frequent event. The nlm_servs list should be short and the entries
expire within a maximum of 50 seconds.  The grace period setting follows
the existing NLM grace period handling logic and is triggered via
echoing the NFS export filesystem id into /proc/fs/nfsd/nlm_set_igrace
file as:

shell> echo 1234 > /proc/fs/nfsd/nlm_set_igrace

Signed-off-by: S. Wendy Cheng <wcheng@redhat.com>
Signed-off-by: Lon Hohberger  <lhh@redhat.com>

 fs/lockd/svc.c              |    8 +-
 fs/lockd/svc4proc.c         |   31 +++++++---
 fs/lockd/svcproc.c          |   29 +++++++--
 fs/lockd/svcsubs.c          |  133 ++++++++++++++++++++++++++++++++++++
++++++++ 
 fs/nfsd/nfsctl.c            |   32 ++++++++++
 include/linux/lockd/bind.h  |    3
 include/linux/lockd/lockd.h |   10 +++
 7 files changed, 230 insertions(+), 16 deletions(-)


-------------- next part --------------
A non-text attachment was scrubbed...
Name: gfs_nlm_igrace.patch
Type: text/x-patch
Size: 13774 bytes
Desc: not available
URL: <http://listman.redhat.com/archives/cluster-devel/attachments/20060814/e2dca800/attachment.bin>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/5] NLM failover - per fs grace period
  2006-08-14  6:00 ` [Cluster-devel] " Wendy Cheng
@ 2006-08-14 15:44   ` Trond Myklebust
  -1 siblings, 0 replies; 12+ messages in thread
From: Trond Myklebust @ 2006-08-14 15:44 UTC (permalink / raw)
  To: Wendy Cheng; +Cc: cluster-devel, lhh, Linux NFS Mailing List

On Mon, 2006-08-14 at 02:00 -0400, Wendy Cheng wrote:
> This change enables per NFS-export entry lockd grace period. The
> implementation is based on a global single linked list nlm_servs that
> contains entries of fsid info. It is expected this would not be a
> frequent event. The nlm_servs list should be short and the entries
> expire within a maximum of 50 seconds.  The grace period setting follows
> the existing NLM grace period handling logic and is triggered via
> echoing the NFS export filesystem id into /proc/fs/nfsd/nlm_set_igrace
> file as:
> 
> shell> echo 1234 > /proc/fs/nfsd/nlm_set_igrace

I still don't find the above interface convincing.

Firstly, as I already told you, the NSM protocol does not allow you to
set only a single filesystem in grace. Clients get notified of server
reboots, not filesystem reboots: if they try to reclaim locks and find
out that some of filesystems they have mounted will not allow them to do
so, then they _will_ get confused and start dropping locks that would
otherwise be perfectly valid.

Secondly, with the above interface, you have to export the filesystem
first, and then set the grace period. Since that is not an atomic
operation, it is perfectly possible for someone to mount the filesystem,
after you exported it, then set a new lock before you have managed to
declare it in grace. This makes reclaiming locks impossible.

Cheers,
  Trond


-------------------------------------------------------------------------
Using Tomcat but need to do more? Need to support web services, security?
Get stuff done quickly with pre-integrated technology to make your job easier
Download IBM WebSphere Application Server v.1.0.1 based on Apache Geronimo
http://sel.as-us.falkag.net/sel?cmd=lnk&kid=120709&bid=263057&dat=121642
_______________________________________________
NFS maillist  -  NFS@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/nfs

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [Cluster-devel] Re: [NFS] [PATCH 2/5] NLM failover - per fs grace period
@ 2006-08-14 15:44   ` Trond Myklebust
  0 siblings, 0 replies; 12+ messages in thread
From: Trond Myklebust @ 2006-08-14 15:44 UTC (permalink / raw)
  To: cluster-devel.redhat.com

On Mon, 2006-08-14 at 02:00 -0400, Wendy Cheng wrote:
> This change enables per NFS-export entry lockd grace period. The
> implementation is based on a global single linked list nlm_servs that
> contains entries of fsid info. It is expected this would not be a
> frequent event. The nlm_servs list should be short and the entries
> expire within a maximum of 50 seconds.  The grace period setting follows
> the existing NLM grace period handling logic and is triggered via
> echoing the NFS export filesystem id into /proc/fs/nfsd/nlm_set_igrace
> file as:
> 
> shell> echo 1234 > /proc/fs/nfsd/nlm_set_igrace

I still don't find the above interface convincing.

Firstly, as I already told you, the NSM protocol does not allow you to
set only a single filesystem in grace. Clients get notified of server
reboots, not filesystem reboots: if they try to reclaim locks and find
out that some of filesystems they have mounted will not allow them to do
so, then they _will_ get confused and start dropping locks that would
otherwise be perfectly valid.

Secondly, with the above interface, you have to export the filesystem
first, and then set the grace period. Since that is not an atomic
operation, it is perfectly possible for someone to mount the filesystem,
after you exported it, then set a new lock before you have managed to
declare it in grace. This makes reclaiming locks impossible.

Cheers,
  Trond



^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/5] NLM failover - per fs grace period
  2006-08-14 15:44   ` [Cluster-devel] Re: [NFS] " Trond Myklebust
@ 2006-08-14 15:59     ` Wendy Cheng
  -1 siblings, 0 replies; 12+ messages in thread
From: Wendy Cheng @ 2006-08-14 15:59 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: cluster-devel, lhh, Linux NFS Mailing List

Trond Myklebust wrote:

>On Mon, 2006-08-14 at 02:00 -0400, Wendy Cheng wrote:
>  
>
>>This change enables per NFS-export entry lockd grace period. The
>>implementation is based on a global single linked list nlm_servs that
>>contains entries of fsid info. It is expected this would not be a
>>frequent event. The nlm_servs list should be short and the entries
>>expire within a maximum of 50 seconds.  The grace period setting follows
>>the existing NLM grace period handling logic and is triggered via
>>echoing the NFS export filesystem id into /proc/fs/nfsd/nlm_set_igrace
>>file as:
>>
>>shell> echo 1234 > /proc/fs/nfsd/nlm_set_igrace
>>    
>>
>
>I still don't find the above interface convincing.
>
>Firstly, as I already told you, the NSM protocol does not allow you to
>set only a single filesystem in grace. Clients get notified of server
>reboots, not filesystem reboots: if they try to reclaim locks and find
>out that some of filesystems they have mounted will not allow them to do
>so, then they _will_ get confused and start dropping locks that would
>otherwise be perfectly valid.
>  
>
I'll check into Linux client code to see what's going on.  But please be 
aware that the individual filesystem grace period goes with floating ip. 
You notify (nfs) client by floating ip address, NOT by filesystem id 
(but set the grace period in server via fsid).

Say you expect nfs requests going into floating ip 10.10.1.1 that will 
handle exported fsid 1234 and 1235. On server, you do /proc entries 
based on 1234 and 1235 and you notify client about 10.10.1.1.

>Secondly, with the above interface, you have to export the filesystem
>first, and then set the grace period. 
>
No, you don't. The changes and code has nothing to do with export. It 
just adds the numerical fsid into a global array (nlm_servs). When lock 
requests finally arrive (later), it extracts the filesystem id from the 
filehandle to compare. It can be invoked before filesystem is exported.

-- Wendy


-------------------------------------------------------------------------
Using Tomcat but need to do more? Need to support web services, security?
Get stuff done quickly with pre-integrated technology to make your job easier
Download IBM WebSphere Application Server v.1.0.1 based on Apache Geronimo
http://sel.as-us.falkag.net/sel?cmd=lnk&kid=120709&bid=263057&dat=121642
_______________________________________________
NFS maillist  -  NFS@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/nfs

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [Cluster-devel] Re: [NFS] [PATCH 2/5] NLM failover - per fs grace period
@ 2006-08-14 15:59     ` Wendy Cheng
  0 siblings, 0 replies; 12+ messages in thread
From: Wendy Cheng @ 2006-08-14 15:59 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Trond Myklebust wrote:

>On Mon, 2006-08-14 at 02:00 -0400, Wendy Cheng wrote:
>  
>
>>This change enables per NFS-export entry lockd grace period. The
>>implementation is based on a global single linked list nlm_servs that
>>contains entries of fsid info. It is expected this would not be a
>>frequent event. The nlm_servs list should be short and the entries
>>expire within a maximum of 50 seconds.  The grace period setting follows
>>the existing NLM grace period handling logic and is triggered via
>>echoing the NFS export filesystem id into /proc/fs/nfsd/nlm_set_igrace
>>file as:
>>
>>shell> echo 1234 > /proc/fs/nfsd/nlm_set_igrace
>>    
>>
>
>I still don't find the above interface convincing.
>
>Firstly, as I already told you, the NSM protocol does not allow you to
>set only a single filesystem in grace. Clients get notified of server
>reboots, not filesystem reboots: if they try to reclaim locks and find
>out that some of filesystems they have mounted will not allow them to do
>so, then they _will_ get confused and start dropping locks that would
>otherwise be perfectly valid.
>  
>
I'll check into Linux client code to see what's going on.  But please be 
aware that the individual filesystem grace period goes with floating ip. 
You notify (nfs) client by floating ip address, NOT by filesystem id 
(but set the grace period in server via fsid).

Say you expect nfs requests going into floating ip 10.10.1.1 that will 
handle exported fsid 1234 and 1235. On server, you do /proc entries 
based on 1234 and 1235 and you notify client about 10.10.1.1.

>Secondly, with the above interface, you have to export the filesystem
>first, and then set the grace period. 
>
No, you don't. The changes and code has nothing to do with export. It 
just adds the numerical fsid into a global array (nlm_servs). When lock 
requests finally arrive (later), it extracts the filesystem id from the 
filehandle to compare. It can be invoked before filesystem is exported.

-- Wendy



^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/5] NLM failover - per fs grace period
  2006-08-14 15:59     ` [Cluster-devel] Re: [NFS] " Wendy Cheng
@ 2006-08-15 18:32       ` Wendy Cheng
  -1 siblings, 0 replies; 12+ messages in thread
From: Wendy Cheng @ 2006-08-15 18:32 UTC (permalink / raw)
  To: Neil Brown; +Cc: cluster-devel, lhh, Linux NFS Mailing List, Trond Myklebust

There were few replies emails that didn't arrive at nfs list last week 
but got archived in:

* why switching to fsid
https://www.redhat.com/archives/cluster-devel/2006-August/msg00090.html

* cluster script failover sequence)
https://www.redhat.com/archives/cluster-devel/2006-August/msg00087.html

To help people understand the patch logic, the following is a sample 
failover flow:

Assume we have server_A and server_B. Server_A is a multi-home NFS 
server with two ip address 10.10.10.1 and 10.10.1.1 where:

shell> cat /etc/exports
/mnt/ext3/exports    *(fsid=1234,sync,rw)
/mnt/gfs1/exports    *(fsid=1236,sync,rw)

It is expected ext3 filesystem served by 10.10.1.1 and gfs served by 
10.10.10.1. If we ever want to move ext3 over to server_B, the sequence 
would be:

On server_A:
1. Tear down 10.10.1.1
2. Un-export ext3 exports
3. echo 1234 > /proc/fs/nfsd/nlm_unlock
4. Umount ext3

On sever_B:
1. Mount ext3 fs
2. echo 1234 > /proc/fs/nfsd/nlm_set_igrace
3. Export ext3 exports
4. Bring up 10.10.1.1
5. Sending restricted reclaim notifications via 10.10.1.1

Step 5 is implemented based on the ha-callout program as described in 
"man rpc.statd".  What our cluster suite (user mode script) will do (if 
this patch set gets accepted) is to bring up rpc.statd on both servers 
with ha-callout program.
On server_A, the ha-callout program constructs two sm directories 
(sm-10.10.10.1 and sm-10.10.1.1) that can be accessed by both servers. 
This is normally done by placing the directories on the filesystem that 
will get moved over. The original /var/lib/nfs/statd/sm stays in its 
default place in case of server_A ends up with a real reboot (or crash). 
When server_B takes over, it sends out an one-time notice to all the 
clients that has entries in sm-10.10.1.1 directory.

Note that the code of ha-callout program (will be done by our cluster 
suite) actually can be integrated into statd within nfs-utils package. 
However, I would like to keep the changes made into mainline nfs code as 
miminum as possible at this moment.

-- Wendy





-------------------------------------------------------------------------
Using Tomcat but need to do more? Need to support web services, security?
Get stuff done quickly with pre-integrated technology to make your job easier
Download IBM WebSphere Application Server v.1.0.1 based on Apache Geronimo
http://sel.as-us.falkag.net/sel?cmd=lnk&kid=120709&bid=263057&dat=121642
_______________________________________________
NFS maillist  -  NFS@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/nfs

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [Cluster-devel] Re: [NFS] [PATCH 2/5] NLM failover - per fs grace period
@ 2006-08-15 18:32       ` Wendy Cheng
  0 siblings, 0 replies; 12+ messages in thread
From: Wendy Cheng @ 2006-08-15 18:32 UTC (permalink / raw)
  To: cluster-devel.redhat.com

There were few replies emails that didn't arrive at nfs list last week 
but got archived in:

* why switching to fsid
https://www.redhat.com/archives/cluster-devel/2006-August/msg00090.html

* cluster script failover sequence)
https://www.redhat.com/archives/cluster-devel/2006-August/msg00087.html

To help people understand the patch logic, the following is a sample 
failover flow:

Assume we have server_A and server_B. Server_A is a multi-home NFS 
server with two ip address 10.10.10.1 and 10.10.1.1 where:

shell> cat /etc/exports
/mnt/ext3/exports    *(fsid=1234,sync,rw)
/mnt/gfs1/exports    *(fsid=1236,sync,rw)

It is expected ext3 filesystem served by 10.10.1.1 and gfs served by 
10.10.10.1. If we ever want to move ext3 over to server_B, the sequence 
would be:

On server_A:
1. Tear down 10.10.1.1
2. Un-export ext3 exports
3. echo 1234 > /proc/fs/nfsd/nlm_unlock
4. Umount ext3

On sever_B:
1. Mount ext3 fs
2. echo 1234 > /proc/fs/nfsd/nlm_set_igrace
3. Export ext3 exports
4. Bring up 10.10.1.1
5. Sending restricted reclaim notifications via 10.10.1.1

Step 5 is implemented based on the ha-callout program as described in 
"man rpc.statd".  What our cluster suite (user mode script) will do (if 
this patch set gets accepted) is to bring up rpc.statd on both servers 
with ha-callout program.
On server_A, the ha-callout program constructs two sm directories 
(sm-10.10.10.1 and sm-10.10.1.1) that can be accessed by both servers. 
This is normally done by placing the directories on the filesystem that 
will get moved over. The original /var/lib/nfs/statd/sm stays in its 
default place in case of server_A ends up with a real reboot (or crash). 
When server_B takes over, it sends out an one-time notice to all the 
clients that has entries in sm-10.10.1.1 directory.

Note that the code of ha-callout program (will be done by our cluster 
suite) actually can be integrated into statd within nfs-utils package. 
However, I would like to keep the changes made into mainline nfs code as 
miminum as possible at this moment.

-- Wendy






^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/5] NLM failover - per fs grace period
  2006-08-14  6:00 ` [Cluster-devel] " Wendy Cheng
@ 2006-08-18  9:49   ` Greg Banks
  -1 siblings, 0 replies; 12+ messages in thread
From: Greg Banks @ 2006-08-18  9:49 UTC (permalink / raw)
  To: Wendy Cheng; +Cc: cluster-devel, lhh, Linux NFS Mailing List

On Mon, 2006-08-14 at 16:00, Wendy Cheng wrote:
> This change enables per NFS-export entry lockd grace period.[...]

>  
> +/* Server fsid linked list for NLM lock failover */
> +struct nlm_serv {
> +	struct nlm_serv*	s_next;		/* linked list */
> +	unsigned long		s_grace_period;	/* per fsid grace period */
> +	int			s_fsid;		/* export fsid */
> +};
> +

The name of this structure appears to be left over from your
previous approach; it doesn't really represent a server anymore.
Giving the structure, and list, and the lock that protects it
similar and appropriate names might be nice.

Also, the s_grace_period field isn't actually a period, it's
the future expiry time expressed in jiffies.  The field name
and comment are both confusing.

> +int
> +nlmsvc_fo_setgrace(int fsid)
> +{
> +	struct nlm_serv *per_fsid, *entry;
> +
> +	/* allocate the entry */
> +	per_fsid = kmalloc(sizeof(struct nlm_serv), GFP_KERNEL);
> +	if (per_fsid == NULL) {
> +		printk("lockd: nlmsvc_fo_setgrace kmalloc fails\n");
> +		return(-ENOMEM);
> +	}
> +

These actions:

# echo 1234 > /proc/fs/nfsd/nlm_set_igrace
# echo 1234 > /proc/fs/nfsd/nlm_set_igrace

result in two separate nlm_serv structures being allocated and
stored in the global list.  That doesn't make sense, a filesystem
should have at most one grace period.

> +	/* link into the global list */
> +	spin_lock(&nlm_fo_lock);
> +	
> +	entry = nlm_servs;
> +	per_fsid->s_next = entry;
> +	nlm_servs = per_fsid;

Why use opencoded list manipulation when Linux has a adequate
list package in <linux/list.h> ?

> +
> +	/* done */
> +	spin_unlock(&nlm_fo_lock);
> +	return 0;
> +}
> +
> +/* nlm_servs gargabe collection 

s/gargabe/garbage/

> + *  - caller should hold nlm_ip_mutex

This comment is stale, you don't have an nlm_ip_mutex anymore.

> +void 
> +nlmsvc_fo_reset_servs()
> +{
> [...]
> +	return;

This "return" is superfluous.

> +int
> +nlmsvc_fo_check(struct nfs_fh *fh)
> +{
> +	struct nlm_serv **e_top, *e_this, *e_purge=NULL;
> +	int rc=0, this_fsid, not_found;
> +
> +	spin_lock(&nlm_fo_lock);
> +
> +	/* no failover entry */
> +	if (!(e_this = nlm_servs))  
> +		goto nlmsvc_fo_check_out;
> +
> +	/* see if this fh has fsid */
> +	not_found = nlm_fo_get_fsid(fh, &this_fsid);
> +	if (not_found) 
> +		goto nlmsvc_fo_check_out;

You could do this outside the critical section.

> +
> +	/* check to see whether this_fsid is in nlm_servs list */
> +	e_top = &nlm_servs;
> +	while (e_this) {
> +		if (time_before(e_this->s_grace_period, jiffies)) {
> +			dprintk("lockd: fsid=%d grace period expires\n",
> +				e_this->s_fsid);
> +			e_purge = e_this;
> +			break;
> +		} else if (e_this->s_fsid == this_fsid) {
> +			dprintk("lockd: fsid=%d in grace period\n",
> +				e_this->s_fsid);
> +			rc = 1;
> +		}
> +		e_top = &(e_this->s_next);
> +		e_this = e_this->s_next;
> +	}
> +
> +	/* piggy back nlm_servs garbage collection */
> +	if (e_purge) {
> +		*e_top = NULL;
> +		__nlm_servs_gc(e_purge);
> +	}
> +

So...if you find an expired entry, you delete entries from the global
list starting at that entry and continuing to the end.  Presumably the
assumption here is that the list is sorted in decreasing order of expiry
time, because you only prepend to the list in nlmsvc_fo_setgrace().

However that's wrong: the expiry times returned from set_grace_period()
don't have to monotonically increase.  Both nlm_grace_period and
nlm_timeout may be changed by sysctls, so the length of the grace
period can vary.  If it varies downwards between two writes to the
set_igrace file, the list may not be in the order you expect.

Also, if your userspace program went beserk and started writing
random fsids into the set_igrace file, they wouldn't be purged
until lockd is shut down or the first NLM call arrives *after*
their grace expires.  This has the potential for a kernel memory
Denial of Service attack.  Perhaps, when you add a new entry you
could also purge expired ones, and/or put an arbitrary limit on
how large the list can grow?


>  static ssize_t (*write_op[])(struct file *, char *, size_t) = {
>  	[NFSD_Svc] = write_svc,
> @@ -104,6 +106,7 @@ static ssize_t (*write_op[])(struct file
>  	[NFSD_Getfs] = write_getfs,
>  	[NFSD_Fh] = write_filehandle,
>  	[NFSD_Nlm_unlock] = do_nlm_fo_unlock,
> +	[NFSD_Nlm_igrace] = do_nlm_fs_grace,
>  	[NFSD_Threads] = write_threads,
>  	[NFSD_Versions] = write_versions,
>  #ifdef CONFIG_NFSD_V4

Same comment as before.

> +extern struct nlm_serv *nlm_servs;
> +
> +static inline int
> +nlmsvc_fo_grace_period(struct nlm_args *argp)
> +{
> +	if (unlikely(nlm_servs))
> +		return(nlmsvc_fo_check(&argp->lock.fh));
> +
> +	return 0;
> +}
> +

You have two nearly identical functions to do this, which seems
superfluous.  This is why we have header files.

> @@ -81,7 +81,6 @@ static unsigned long set_grace_period(vo
>  				/ nlm_timeout) * nlm_timeout * HZ;
>  	else
>  		grace_period = nlm_timeout * 5 * HZ;
> -	nlmsvc_grace_period = 1;
>  	return grace_period + jiffies;
>  }

As set_grace_period() no longer sets anything, and returns
an expiry time rather than a period, it's name isn't very
appropriate anymore.

Greg.
-- 
Greg Banks, R&D Software Engineer, SGI Australian Software Group.
I don't speak for SGI.



-------------------------------------------------------------------------
Using Tomcat but need to do more? Need to support web services, security?
Get stuff done quickly with pre-integrated technology to make your job easier
Download IBM WebSphere Application Server v.1.0.1 based on Apache Geronimo
http://sel.as-us.falkag.net/sel?cmd=lnk&kid=120709&bid=263057&dat=121642
_______________________________________________
NFS maillist  -  NFS@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/nfs

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [Cluster-devel] Re: [NFS] [PATCH 2/5] NLM failover - per fs grace period
@ 2006-08-18  9:49   ` Greg Banks
  0 siblings, 0 replies; 12+ messages in thread
From: Greg Banks @ 2006-08-18  9:49 UTC (permalink / raw)
  To: cluster-devel.redhat.com

On Mon, 2006-08-14 at 16:00, Wendy Cheng wrote:
> This change enables per NFS-export entry lockd grace period.[...]

>  
> +/* Server fsid linked list for NLM lock failover */
> +struct nlm_serv {
> +	struct nlm_serv*	s_next;		/* linked list */
> +	unsigned long		s_grace_period;	/* per fsid grace period */
> +	int			s_fsid;		/* export fsid */
> +};
> +

The name of this structure appears to be left over from your
previous approach; it doesn't really represent a server anymore.
Giving the structure, and list, and the lock that protects it
similar and appropriate names might be nice.

Also, the s_grace_period field isn't actually a period, it's
the future expiry time expressed in jiffies.  The field name
and comment are both confusing.

> +int
> +nlmsvc_fo_setgrace(int fsid)
> +{
> +	struct nlm_serv *per_fsid, *entry;
> +
> +	/* allocate the entry */
> +	per_fsid = kmalloc(sizeof(struct nlm_serv), GFP_KERNEL);
> +	if (per_fsid == NULL) {
> +		printk("lockd: nlmsvc_fo_setgrace kmalloc fails\n");
> +		return(-ENOMEM);
> +	}
> +

These actions:

# echo 1234 > /proc/fs/nfsd/nlm_set_igrace
# echo 1234 > /proc/fs/nfsd/nlm_set_igrace

result in two separate nlm_serv structures being allocated and
stored in the global list.  That doesn't make sense, a filesystem
should have at most one grace period.

> +	/* link into the global list */
> +	spin_lock(&nlm_fo_lock);
> +	
> +	entry = nlm_servs;
> +	per_fsid->s_next = entry;
> +	nlm_servs = per_fsid;

Why use opencoded list manipulation when Linux has a adequate
list package in <linux/list.h> ?

> +
> +	/* done */
> +	spin_unlock(&nlm_fo_lock);
> +	return 0;
> +}
> +
> +/* nlm_servs gargabe collection 

s/gargabe/garbage/

> + *  - caller should hold nlm_ip_mutex

This comment is stale, you don't have an nlm_ip_mutex anymore.

> +void 
> +nlmsvc_fo_reset_servs()
> +{
> [...]
> +	return;

This "return" is superfluous.

> +int
> +nlmsvc_fo_check(struct nfs_fh *fh)
> +{
> +	struct nlm_serv **e_top, *e_this, *e_purge=NULL;
> +	int rc=0, this_fsid, not_found;
> +
> +	spin_lock(&nlm_fo_lock);
> +
> +	/* no failover entry */
> +	if (!(e_this = nlm_servs))  
> +		goto nlmsvc_fo_check_out;
> +
> +	/* see if this fh has fsid */
> +	not_found = nlm_fo_get_fsid(fh, &this_fsid);
> +	if (not_found) 
> +		goto nlmsvc_fo_check_out;

You could do this outside the critical section.

> +
> +	/* check to see whether this_fsid is in nlm_servs list */
> +	e_top = &nlm_servs;
> +	while (e_this) {
> +		if (time_before(e_this->s_grace_period, jiffies)) {
> +			dprintk("lockd: fsid=%d grace period expires\n",
> +				e_this->s_fsid);
> +			e_purge = e_this;
> +			break;
> +		} else if (e_this->s_fsid == this_fsid) {
> +			dprintk("lockd: fsid=%d in grace period\n",
> +				e_this->s_fsid);
> +			rc = 1;
> +		}
> +		e_top = &(e_this->s_next);
> +		e_this = e_this->s_next;
> +	}
> +
> +	/* piggy back nlm_servs garbage collection */
> +	if (e_purge) {
> +		*e_top = NULL;
> +		__nlm_servs_gc(e_purge);
> +	}
> +

So...if you find an expired entry, you delete entries from the global
list starting at that entry and continuing to the end.  Presumably the
assumption here is that the list is sorted in decreasing order of expiry
time, because you only prepend to the list in nlmsvc_fo_setgrace().

However that's wrong: the expiry times returned from set_grace_period()
don't have to monotonically increase.  Both nlm_grace_period and
nlm_timeout may be changed by sysctls, so the length of the grace
period can vary.  If it varies downwards between two writes to the
set_igrace file, the list may not be in the order you expect.

Also, if your userspace program went beserk and started writing
random fsids into the set_igrace file, they wouldn't be purged
until lockd is shut down or the first NLM call arrives *after*
their grace expires.  This has the potential for a kernel memory
Denial of Service attack.  Perhaps, when you add a new entry you
could also purge expired ones, and/or put an arbitrary limit on
how large the list can grow?


>  static ssize_t (*write_op[])(struct file *, char *, size_t) = {
>  	[NFSD_Svc] = write_svc,
> @@ -104,6 +106,7 @@ static ssize_t (*write_op[])(struct file
>  	[NFSD_Getfs] = write_getfs,
>  	[NFSD_Fh] = write_filehandle,
>  	[NFSD_Nlm_unlock] = do_nlm_fo_unlock,
> +	[NFSD_Nlm_igrace] = do_nlm_fs_grace,
>  	[NFSD_Threads] = write_threads,
>  	[NFSD_Versions] = write_versions,
>  #ifdef CONFIG_NFSD_V4

Same comment as before.

> +extern struct nlm_serv *nlm_servs;
> +
> +static inline int
> +nlmsvc_fo_grace_period(struct nlm_args *argp)
> +{
> +	if (unlikely(nlm_servs))
> +		return(nlmsvc_fo_check(&argp->lock.fh));
> +
> +	return 0;
> +}
> +

You have two nearly identical functions to do this, which seems
superfluous.  This is why we have header files.

> @@ -81,7 +81,6 @@ static unsigned long set_grace_period(vo
>  				/ nlm_timeout) * nlm_timeout * HZ;
>  	else
>  		grace_period = nlm_timeout * 5 * HZ;
> -	nlmsvc_grace_period = 1;
>  	return grace_period + jiffies;
>  }

As set_grace_period() no longer sets anything, and returns
an expiry time rather than a period, it's name isn't very
appropriate anymore.

Greg.
-- 
Greg Banks, R&D Software Engineer, SGI Australian Software Group.
I don't speak for SGI.




^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/5] NLM failover - per fs grace period
  2006-08-18  9:49   ` [Cluster-devel] Re: [NFS] " Greg Banks
@ 2006-08-18 20:11     ` James Yarbrough
  -1 siblings, 0 replies; 12+ messages in thread
From: James Yarbrough @ 2006-08-18 20:11 UTC (permalink / raw)
  To: Greg Banks; +Cc: cluster-devel, lhh, Linux NFS Mailing List, Wendy Cheng

Greg Banks wrote:
> 
> On Mon, 2006-08-14 at 16:00, Wendy Cheng wrote:
> > This change enables per NFS-export entry lockd grace period.[...]
> 
> >
> > +/* Server fsid linked list for NLM lock failover */
> > +struct nlm_serv {
> > +     struct nlm_serv*        s_next;         /* linked list */
> > +     unsigned long           s_grace_period; /* per fsid grace period */
> > +     int                     s_fsid;         /* export fsid */
> > +};
> > +
> 
> The name of this structure appears to be left over from your
> previous approach; it doesn't really represent a server anymore.
> Giving the structure, and list, and the lock that protects it
> similar and appropriate names might be nice.
> 
> Also, the s_grace_period field isn't actually a period, it's
> the future expiry time expressed in jiffies.  The field name
> and comment are both confusing.

It might be a good idea to change s_grace_period to something like
s_grace_end since it actually marks the ending time of the grace period.
If you do change the name, it would be a good idea to enhance the
commentary to indicate the relationship of the field to the grace period.
That should leave enough breadcrumbs for anyone familiar with the NLM
terminology to follow.

-- 
jmy@sgi.com
650 933 3124

I need more snakes.

-------------------------------------------------------------------------
Using Tomcat but need to do more? Need to support web services, security?
Get stuff done quickly with pre-integrated technology to make your job easier
Download IBM WebSphere Application Server v.1.0.1 based on Apache Geronimo
http://sel.as-us.falkag.net/sel?cmd=lnk&kid=120709&bid=263057&dat=121642
_______________________________________________
NFS maillist  -  NFS@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/nfs

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [Cluster-devel] Re: [NFS] [PATCH 2/5] NLM failover - per fs grace period
@ 2006-08-18 20:11     ` James Yarbrough
  0 siblings, 0 replies; 12+ messages in thread
From: James Yarbrough @ 2006-08-18 20:11 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Greg Banks wrote:
> 
> On Mon, 2006-08-14 at 16:00, Wendy Cheng wrote:
> > This change enables per NFS-export entry lockd grace period.[...]
> 
> >
> > +/* Server fsid linked list for NLM lock failover */
> > +struct nlm_serv {
> > +     struct nlm_serv*        s_next;         /* linked list */
> > +     unsigned long           s_grace_period; /* per fsid grace period */
> > +     int                     s_fsid;         /* export fsid */
> > +};
> > +
> 
> The name of this structure appears to be left over from your
> previous approach; it doesn't really represent a server anymore.
> Giving the structure, and list, and the lock that protects it
> similar and appropriate names might be nice.
> 
> Also, the s_grace_period field isn't actually a period, it's
> the future expiry time expressed in jiffies.  The field name
> and comment are both confusing.

It might be a good idea to change s_grace_period to something like
s_grace_end since it actually marks the ending time of the grace period.
If you do change the name, it would be a good idea to enhance the
commentary to indicate the relationship of the field to the grace period.
That should leave enough breadcrumbs for anyone familiar with the NLM
terminology to follow.

-- 
jmy at sgi.com
650 933 3124

I need more snakes.



^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2006-08-18 20:11 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2006-08-14  6:00 [PATCH 2/5] NLM failover - per fs grace period Wendy Cheng
2006-08-14  6:00 ` [Cluster-devel] " Wendy Cheng
2006-08-14 15:44 ` Trond Myklebust
2006-08-14 15:44   ` [Cluster-devel] Re: [NFS] " Trond Myklebust
2006-08-14 15:59   ` Wendy Cheng
2006-08-14 15:59     ` [Cluster-devel] Re: [NFS] " Wendy Cheng
2006-08-15 18:32     ` Wendy Cheng
2006-08-15 18:32       ` [Cluster-devel] Re: [NFS] " Wendy Cheng
2006-08-18  9:49 ` Greg Banks
2006-08-18  9:49   ` [Cluster-devel] Re: [NFS] " Greg Banks
2006-08-18 20:11   ` James Yarbrough
2006-08-18 20:11     ` [Cluster-devel] Re: [NFS] " James Yarbrough

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.