All of lore.kernel.org
 help / color / mirror / Atom feed
* [Cluster-devel] cluster/gfs-kernel/src/gfs inode.c inode.h ops ...
@ 2007-02-14 23:15 wcheng
  0 siblings, 0 replies; only message in thread
From: wcheng @ 2007-02-14 23:15 UTC (permalink / raw)
  To: cluster-devel.redhat.com

CVSROOT:	/cvs/cluster
Module name:	cluster
Branch: 	RHEL4
Changes by:	wcheng at sourceware.org	2007-02-14 23:15:44

Modified files:
	gfs-kernel/src/gfs: inode.c inode.h ops_inode.c 

Log message:
	Final patch for bugzilla 1904745 - there is a deadlock documented in the
	bugzilla entry comment #26 that is messy to get around under current
	GFS lock order rule. To compromise the issue, we will fail the rename
	with the original error return code (-ENOENT), if the new ino has any
	possibility to cause deadlock. This implies, if GFS ino number is
	allocated via true random manner, we only reduce this error return
	code by 25%.

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs-kernel/src/gfs/inode.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.20.2.3&r2=1.20.2.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs-kernel/src/gfs/inode.h.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.3.2.1&r2=1.3.2.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs-kernel/src/gfs/ops_inode.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.6.2.5&r2=1.6.2.6

--- cluster/gfs-kernel/src/gfs/inode.c	2006/10/10 18:35:27	1.20.2.3
+++ cluster/gfs-kernel/src/gfs/inode.c	2007/02/14 23:15:44	1.20.2.4
@@ -335,7 +335,7 @@
 
 	/* Read dinode from disk */
 	error = gfs_copyin_dinode(ip);
-	if (error)
+	if (error) 
 		goto fail_iopen;
 
 	gfs_glock_hold(i_gl);
@@ -1566,7 +1566,8 @@
  */
 
 int
-gfs_unlink_ok(struct gfs_inode *dip, struct qstr *name, struct gfs_inode *ip)
+gfs_unlink_ok(struct gfs_inode *dip, struct qstr *name, 
+	struct gfs_inode *ip, struct gfs_inum *inump)
 {
 	struct gfs_inum inum;
 	unsigned int type;
@@ -1589,11 +1590,22 @@
 		return error;
 
 	error = gfs_dir_search(dip, name, &inum, &type);
-	if (error)
+	if (error) 
 		return error;
 
-	if (inum.no_formal_ino != ip->i_num.no_formal_ino)
+	if (inum.no_formal_ino != ip->i_num.no_formal_ino) {
+		/*
+		 * Add for rename - bugzilla 190475
+		 *   return new inum such that gfs_rename can
+		 *   do its silly rename.
+		 */
+		if (unlikely(inump)) { 
+			inump->no_formal_ino = inum.no_formal_ino;
+			inump->no_addr = inum.no_addr;
+		}
+			
 		return -ENOENT;
+	}
 
 	if (ip->i_di.di_type != type) {
 		gfs_consist_inode(dip);
--- cluster/gfs-kernel/src/gfs/inode.h	2007/01/16 20:39:03	1.3.2.1
+++ cluster/gfs-kernel/src/gfs/inode.h	2007/02/14 23:15:44	1.3.2.2
@@ -37,7 +37,7 @@
 int gfs_unlinki(struct gfs_inode *dip, struct qstr *name, struct gfs_inode *ip);
 int gfs_rmdiri(struct gfs_inode *dip, struct qstr *name, struct gfs_inode *ip);
 int gfs_unlink_ok(struct gfs_inode *dip, struct qstr *name,
-		  struct gfs_inode *ip);
+		  struct gfs_inode *ip, struct gfs_inum *inum);
 int gfs_ok_to_move(struct gfs_inode *this, struct gfs_inode *to);
 int gfs_readlinki(struct gfs_inode *ip, char **buf, unsigned int *len);
 
--- cluster/gfs-kernel/src/gfs/ops_inode.c	2007/01/18 20:40:12	1.6.2.5
+++ cluster/gfs-kernel/src/gfs/ops_inode.c	2007/02/14 23:15:44	1.6.2.6
@@ -537,7 +537,7 @@
 	if (error)
 		goto fail;
 
-	error = gfs_unlink_ok(dip, &dentry->d_name, ip);
+	error = gfs_unlink_ok(dip, &dentry->d_name, ip, NULL);
 	if (error)
 		goto fail_gunlock;
 
@@ -772,7 +772,7 @@
 	if (error)
 		goto fail;
 
-	error = gfs_unlink_ok(dip, &dentry->d_name, ip);
+	error = gfs_unlink_ok(dip, &dentry->d_name, ip, NULL);
 	if (error)
 		goto fail_gunlock;
 
@@ -914,6 +914,75 @@
 	return 0;
 }
 
+/*
+ * Obtain a clone dentry with correct inode:
+ *
+ * This is to work around the window between lock_rename() 
+ * and gfs_rename() where another node may have renamed the 
+ * target file (bugzilla 190475). The bug most likely occurs
+ * in a mail server environment.
+ *
+ * Return true if the dentry has been successfully updated.
+ */
+static int
+gfs_refresh_dentry(struct gfs_sbd *sdp, struct dentry **f_dentry, 
+	struct gfs_inum *inump) 
+{
+	struct inode *inode;
+	struct dentry *dentry, *sdentry=*f_dentry;
+
+	dentry = d_alloc(sdentry->d_parent, &sdentry->d_name);
+	if (!dentry) 
+		return 0;
+
+	/* Get the fully updated inode */
+	inode = NULL;
+	if (inump->no_formal_ino) {
+		inode = gfs_refresh_iobj(sdp, inump, NULL);
+		if (IS_ERR(inode)) { 
+			dput(dentry);
+			return 0;
+		}
+	} 
+
+	/* 
+	 * Remove the old dentry from cache to avoid 
+	 * another lookup.
+	 */
+	if (!d_unhashed(sdentry)) {
+		d_drop(sdentry);
+	}
+
+	/* Instantiate the new dentry */
+	dentry->d_op = &gfs_dops;
+
+	d_splice_alias(inode, dentry);
+
+	/* Return with success */
+	*f_dentry = dentry;
+	return 1;
+}
+
+/* 
+ * Check lock order for the new ino: 
+ * return 1: if it could lead to deadlock
+ * return 0: if ordering is ok.
+ */ 
+
+static int
+gfs_check_lock_order(struct gfs_holder *ghs, int n_gh, uint64_t ino)
+{
+	int x;
+
+	/* no fancy algorithm here since max n_gh is 4 */
+	for (x = 0; x < n_gh; x++) {
+		if (ino < ghs[x].gh_gl->gl_name.ln_number) 
+			return 1;
+	}
+
+	return 0;
+}
+
 /**
  * gfs_rename - Rename a file
  * @odir: Parent directory of old file name
@@ -935,17 +1004,21 @@
 	struct gfs_sbd *sdp = odip->i_sbd;
 	struct qstr name;
 	struct gfs_alloc *al;
-	struct gfs_holder ghs[4], r_gh;
+	struct gfs_holder ghs[5], r_gh;
 	unsigned int num_gh;
 	int dir_rename = FALSE;
 	int alloc_required;
 	unsigned int x;
 	int error;
+	int once_cnt=0, has_ndentry=0;
+	struct gfs_inum new_inum;
 
 	atomic_inc(&sdp->sd_ops_inode);
 
 	gfs_unlinked_limit(sdp);
 
+	odir->i_sb->s_type->fs_flags &= ~FS_ODD_RENAME;
+
 	if (ndentry->d_inode) {
 		nip = vn2ip(ndentry->d_inode);
 		if (ip == nip)
@@ -968,36 +1041,78 @@
 			goto fail;
 	}
 
-	num_gh = 1;
+	num_gh  = 1;
 	gfs_holder_init(odip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
 	if (odip != ndip) {
 		gfs_holder_init(ndip->i_gl, LM_ST_EXCLUSIVE, 0, ghs+num_gh);
 		num_gh++;
 	}
+
 	gfs_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs+num_gh);
 	num_gh++;
-
 	if (nip) {
 		gfs_holder_init(nip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
 		num_gh++;
 	}
 
 	error = gfs_glock_nq_m(num_gh, ghs);
-	if (error)
-		goto fail_uninit;
+        if (error)
+                goto fail_uninit;
 
 	/*  Check out the old directory  */
 
-	error = gfs_unlink_ok(odip, &odentry->d_name, ip);
+	error = gfs_unlink_ok(odip, &odentry->d_name, ip, NULL);
 	if (error)
 		goto fail_gunlock;
 
+gfs_rename_retry:
+
 	/*  Check out the new directory  */
 
 	if (nip) {
-		error = gfs_unlink_ok(ndip, &ndentry->d_name, nip);
-		if (error)
+		/* Zero out this field so we can know whether gfs_unlink_ok 
+		 * could fill in the correct inode numbers.
+		 */
+		new_inum.no_formal_ino = 0;
+		error = gfs_unlink_ok(ndip, &ndentry->d_name, nip, &new_inum);
+
+		if (unlikely(error)) {
+			/* 
+			 * Handle a rare race condition that the new file
+			 * could have been renamed by another node - the
+			 * correct inode number is passed into new_inum. 
+			 * We only allow this to happen once.
+			 */
+			if ((error == -ENOENT) && (!once_cnt) 
+					&& (new_inum.no_formal_ino)) 
+			{
+				if (gfs_check_lock_order(ghs, num_gh, 
+						new_inum.no_formal_ino)) 
+					/* fail rename with -ENOENT */
+					goto fail_gunlock;
+
+				if (gfs_refresh_dentry(sdp,&ndentry,&new_inum))
+				{
+					odir->i_sb->s_type->fs_flags 
+							|= FS_ODD_RENAME;
+					has_ndentry = 1;
+					nip = vn2ip(ndentry->d_inode);
+					if (nip) {
+					   once_cnt++;
+					   error = gfs_glock_nq_init(nip->i_gl,
+							LM_ST_EXCLUSIVE, 0,
+							&ghs[num_gh++]);
+					   if (error) { 
+						num_gh--;
+						error = -ENOENT;
+					   }
+					   else
+					   	goto gfs_rename_retry;
+					}
+				}
+			}
 			goto fail_gunlock;
+		}
 
 		if (nip->i_di.di_type == GFS_FILE_DIR) {
 			if (nip->i_di.di_entries < 2) {
@@ -1150,6 +1265,14 @@
 	if (dir_rename)
 		gfs_glock_dq_uninit(&r_gh);
 
+
+	/* dput new dentry as vfs layer still uses old entry */
+	if (unlikely(has_ndentry)) {
+		d_move(odentry, ndentry);
+		dput(ndentry);
+		dput(ndentry);
+	}
+
 	return 0;
 
  fail_end_trans:
@@ -1178,6 +1301,12 @@
 	if (dir_rename)
 		gfs_glock_dq_uninit(&r_gh);
 
+	if (unlikely(has_ndentry)) {
+		d_move(odentry, ndentry);
+		dput(ndentry);
+		dput(ndentry);
+	}
+
 	return error;
 }
 



^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2007-02-14 23:15 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-02-14 23:15 [Cluster-devel] cluster/gfs-kernel/src/gfs inode.c inode.h ops wcheng

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.