All of lore.kernel.org
 help / color / mirror / Atom feed
* [Cluster-devel] [GFS2 PATCH 0/3] Fix inode transition from unlinked to free
@ 2016-04-27 15:35 Bob Peterson
  2016-04-27 15:35 ` [Cluster-devel] [GFS2 PATCH 1/3] Revert "GFS2: Eliminate parameter non_block on gfs2_inode_lookup" Bob Peterson
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: Bob Peterson @ 2016-04-27 15:35 UTC (permalink / raw)
  To: cluster-devel.redhat.com

This is a set of three patches designed to fix the severely broken
transition from unlinked to free dinodes. Previously, I had posted
patches to ditch the code that filtered out I_FREEing inodes, but
now I'm adding it back in and leveraging it for the new patch. The
new patch basically adds retry loops to the glock delete_work_func,
so that when an inode is not found in memory, the operation is retried,
but only if it's in an I_FREEing state. So the delete_work_func now
becomes: (1) Try to find the inode in inode cache (gfs2_ilookup).
If the inode is in I_FREEING, we sleep and try again later, for a
certain retry period. After that, if it's still not found in cache
(whether not in cache or any other error) it goes on to call the
gfs2_lookup_by_inum for a certain number of retries. Again, if the
inode in still I_FREEing, we sleep and retry the operation.
Since there's so much common logic, I decided to combine the two
lookup functions (gfs2_ilookup and gfs2_iget) into a common function,
inode_lookup_common. I also had to add the logic for the lookup to
return -EAGAIN in the cases we need.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
---
Bob Peterson (3):
  Revert "GFS2: Eliminate parameter non_block on gfs2_inode_lookup"
  Revert "GFS2: Don't filter out I_FREEING inodes anymore"
  GFS2: Add retry loop to delete_work_func

 fs/gfs2/dir.c        |  2 +-
 fs/gfs2/export.c     |  2 +-
 fs/gfs2/glock.c      | 29 ++++++++++++++------
 fs/gfs2/inode.c      | 77 ++++++++++++++++++++++++++++++++++++++++++++++++----
 fs/gfs2/inode.h      |  5 ++--
 fs/gfs2/ops_fstype.c |  2 +-
 6 files changed, 99 insertions(+), 18 deletions(-)

-- 
2.5.5



^ permalink raw reply	[flat|nested] 7+ messages in thread

* [Cluster-devel] [GFS2 PATCH 1/3] Revert "GFS2: Eliminate parameter non_block on gfs2_inode_lookup"
  2016-04-27 15:35 [Cluster-devel] [GFS2 PATCH 0/3] Fix inode transition from unlinked to free Bob Peterson
@ 2016-04-27 15:35 ` Bob Peterson
  2016-04-27 15:35 ` [Cluster-devel] [GFS2 PATCH 2/3] Revert "GFS2: Don't filter out I_FREEING inodes anymore" Bob Peterson
  2016-04-27 15:35 ` [Cluster-devel] [GFS2 PATCH 3/3] GFS2: Add retry loop to delete_work_func Bob Peterson
  2 siblings, 0 replies; 7+ messages in thread
From: Bob Peterson @ 2016-04-27 15:35 UTC (permalink / raw)
  To: cluster-devel.redhat.com

This reverts commit 73b462d2808d7cbca4d7886cf6aaed850640e6cd.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
---
 fs/gfs2/dir.c        | 2 +-
 fs/gfs2/inode.c      | 5 +++--
 fs/gfs2/inode.h      | 3 ++-
 fs/gfs2/ops_fstype.c | 2 +-
 4 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 4a01f30..d4014af 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -1660,7 +1660,7 @@ struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *name,
 		brelse(bh);
 		if (fail_on_exist)
 			return ERR_PTR(-EEXIST);
-		inode = gfs2_inode_lookup(dir->i_sb, dtype, addr, formal_ino);
+		inode = gfs2_inode_lookup(dir->i_sb, dtype, addr, formal_ino, 0);
 		if (!IS_ERR(inode))
 			GFS2_I(inode)->i_rahead = rahead;
 		return inode;
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index aea002e..be6036a 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -80,12 +80,13 @@ static void gfs2_set_iop(struct inode *inode)
  * @sb: The super block
  * @no_addr: The inode number
  * @type: The type of the inode
+ * non_block: Can we block on inodes that are being freed?
  *
  * Returns: A VFS inode, or an error
  */
 
 struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
-				u64 no_addr, u64 no_formal_ino)
+				u64 no_addr, u64 no_formal_ino, int non_block)
 {
 	struct inode *inode;
 	struct gfs2_inode *ip;
@@ -169,7 +170,7 @@ struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
 	if (error)
 		goto fail;
 
-	inode = gfs2_inode_lookup(sb, DT_UNKNOWN, no_addr, 0);
+	inode = gfs2_inode_lookup(sb, DT_UNKNOWN, no_addr, 0, 1);
 	if (IS_ERR(inode))
 		goto fail;
 
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index e1af0d4..22c27a8 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -94,7 +94,8 @@ err:
 }
 
 extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, 
-				       u64 no_addr, u64 no_formal_ino);
+				       u64 no_addr, u64 no_formal_ino,
+				       int non_block);
 extern struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
 					 u64 *no_formal_ino,
 					 unsigned int blktype);
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 49b0bff..dbed9e2 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -454,7 +454,7 @@ static int gfs2_lookup_root(struct super_block *sb, struct dentry **dptr,
 	struct dentry *dentry;
 	struct inode *inode;
 
-	inode = gfs2_inode_lookup(sb, DT_DIR, no_addr, 0);
+	inode = gfs2_inode_lookup(sb, DT_DIR, no_addr, 0, 0);
 	if (IS_ERR(inode)) {
 		fs_err(sdp, "can't read in %s inode: %ld\n", name, PTR_ERR(inode));
 		return PTR_ERR(inode);
-- 
2.5.5



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [Cluster-devel] [GFS2 PATCH 2/3] Revert "GFS2: Don't filter out I_FREEING inodes anymore"
  2016-04-27 15:35 [Cluster-devel] [GFS2 PATCH 0/3] Fix inode transition from unlinked to free Bob Peterson
  2016-04-27 15:35 ` [Cluster-devel] [GFS2 PATCH 1/3] Revert "GFS2: Eliminate parameter non_block on gfs2_inode_lookup" Bob Peterson
@ 2016-04-27 15:35 ` Bob Peterson
  2016-04-27 15:35 ` [Cluster-devel] [GFS2 PATCH 3/3] GFS2: Add retry loop to delete_work_func Bob Peterson
  2 siblings, 0 replies; 7+ messages in thread
From: Bob Peterson @ 2016-04-27 15:35 UTC (permalink / raw)
  To: cluster-devel.redhat.com

This reverts commit ff34245d524a898eee6e013eb1ec165095277148.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
---
 fs/gfs2/export.c |  2 +-
 fs/gfs2/glock.c  |  2 +-
 fs/gfs2/inode.c  | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++++---
 fs/gfs2/inode.h  |  2 +-
 4 files changed, 58 insertions(+), 6 deletions(-)

diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c
index d5bda85..5d15e94 100644
--- a/fs/gfs2/export.c
+++ b/fs/gfs2/export.c
@@ -137,7 +137,7 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb,
 	struct gfs2_sbd *sdp = sb->s_fs_info;
 	struct inode *inode;
 
-	inode = gfs2_ilookup(sb, inum->no_addr);
+	inode = gfs2_ilookup(sb, inum->no_addr, 0);
 	if (inode) {
 		if (GFS2_I(inode)->i_no_formal_ino != inum->no_formal_ino) {
 			iput(inode);
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 3910cea..672de35 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -589,7 +589,7 @@ static void delete_work_func(struct work_struct *work)
 	/* Note: Unsafe to dereference ip as we don't hold right refs/locks */
 
 	if (ip)
-		inode = gfs2_ilookup(sdp->sd_vfs, no_addr);
+		inode = gfs2_ilookup(sdp->sd_vfs, no_addr, 1);
 	else
 		inode = gfs2_lookup_by_inum(sdp, no_addr, NULL, GFS2_BLKST_UNLINKED);
 	if (inode && !IS_ERR(inode)) {
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index be6036a..48c1418 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -37,9 +37,61 @@
 #include "super.h"
 #include "glops.h"
 
-struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr)
+struct gfs2_skip_data {
+	u64 no_addr;
+	int skipped;
+	int non_block;
+};
+
+static int iget_test(struct inode *inode, void *opaque)
+{
+	struct gfs2_inode *ip = GFS2_I(inode);
+	struct gfs2_skip_data *data = opaque;
+
+	if (ip->i_no_addr == data->no_addr) {
+		if (data->non_block &&
+		    inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) {
+			data->skipped = 1;
+			return 0;
+		}
+		return 1;
+	}
+	return 0;
+}
+
+static int iget_set(struct inode *inode, void *opaque)
 {
-	return ilookup(sb, (unsigned long)no_addr);
+	struct gfs2_inode *ip = GFS2_I(inode);
+	struct gfs2_skip_data *data = opaque;
+
+	if (data->skipped)
+		return -ENOENT;
+	inode->i_ino = (unsigned long)(data->no_addr);
+	ip->i_no_addr = data->no_addr;
+	return 0;
+}
+
+struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr, int non_block)
+{
+	unsigned long hash = (unsigned long)no_addr;
+	struct gfs2_skip_data data;
+
+	data.no_addr = no_addr;
+	data.skipped = 0;
+	data.non_block = non_block;
+	return ilookup5(sb, hash, iget_test, &data);
+}
+
+static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr,
+			       int non_block)
+{
+	struct gfs2_skip_data data;
+	unsigned long hash = (unsigned long)no_addr;
+
+	data.no_addr = no_addr;
+	data.skipped = 0;
+	data.non_block = non_block;
+	return iget5_locked(sb, hash, iget_test, iget_set, &data);
 }
 
 /**
@@ -93,7 +145,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
 	struct gfs2_glock *io_gl = NULL;
 	int error;
 
-	inode = iget_locked(sb, (unsigned long)no_addr);
+	inode = gfs2_iget(sb, no_addr, non_block);
 	if (!inode)
 		return ERR_PTR(-ENOMEM);
 
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index 22c27a8..ba4d949 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -99,7 +99,7 @@ extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type,
 extern struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
 					 u64 *no_formal_ino,
 					 unsigned int blktype);
-extern struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr);
+extern struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr, int nonblock);
 
 extern int gfs2_inode_refresh(struct gfs2_inode *ip);
 
-- 
2.5.5



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [Cluster-devel] [GFS2 PATCH 3/3] GFS2: Add retry loop to delete_work_func
  2016-04-27 15:35 [Cluster-devel] [GFS2 PATCH 0/3] Fix inode transition from unlinked to free Bob Peterson
  2016-04-27 15:35 ` [Cluster-devel] [GFS2 PATCH 1/3] Revert "GFS2: Eliminate parameter non_block on gfs2_inode_lookup" Bob Peterson
  2016-04-27 15:35 ` [Cluster-devel] [GFS2 PATCH 2/3] Revert "GFS2: Don't filter out I_FREEING inodes anymore" Bob Peterson
@ 2016-04-27 15:35 ` Bob Peterson
  2016-04-27 17:10   ` [Cluster-devel] [GFS2 PATCH 3/3 v2] " Bob Peterson
  2 siblings, 1 reply; 7+ messages in thread
From: Bob Peterson @ 2016-04-27 15:35 UTC (permalink / raw)
  To: cluster-devel.redhat.com

The delete work function, delete_work_func, often doesn't find
the inode needed to free an inode that's been marked unlinked.
That's because it only tries gfs2_ilookup once, and it's often
not found because of two things: The fact that gfs2_lookup_by_inum
is only called in an else condition, and the fact that the
non-blocking lookup often encounters inodes that are being
I_FREEd by the vfs. This patch allows it to retry the lookup when
under that circumstance, otherwise call the gfs2_lookup_by_inum.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
---
 fs/gfs2/glock.c | 29 +++++++++++++++++++++--------
 fs/gfs2/inode.c | 40 +++++++++++++++++++++++++++-------------
 2 files changed, 48 insertions(+), 21 deletions(-)

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 672de35..06b77cc 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -571,13 +571,15 @@ out_unlock:
 	return;
 }
 
+#define LOOKUP_RETRIES 30
+
 static void delete_work_func(struct work_struct *work)
 {
 	struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_delete);
 	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
-	struct gfs2_inode *ip;
-	struct inode *inode;
+	struct inode *inode = NULL;
 	u64 no_addr = gl->gl_name.ln_number;
+	int retries;
 
 	/* If someone's using this glock to create a new dinode, the block must
 	   have been freed by another node, then re-used, in which case our
@@ -585,13 +587,24 @@ static void delete_work_func(struct work_struct *work)
 	if (test_bit(GLF_INODE_CREATING, &gl->gl_flags))
 		goto out;
 
-	ip = gl->gl_object;
-	/* Note: Unsafe to dereference ip as we don't hold right refs/locks */
-
-	if (ip)
+	for (retries = 0; retries < LOOKUP_RETRIES; retries++) {
 		inode = gfs2_ilookup(sdp->sd_vfs, no_addr, 1);
-	else
-		inode = gfs2_lookup_by_inum(sdp, no_addr, NULL, GFS2_BLKST_UNLINKED);
+
+		if (inode == ERR_PTR(-EAGAIN))
+			msleep(10);
+		else
+			break;
+	}
+	if (inode == NULL) {
+		for (retries = 0; retries < LOOKUP_RETRIES; retries++) {
+			inode = gfs2_lookup_by_inum(sdp, no_addr, NULL,
+						    GFS2_BLKST_UNLINKED);
+			if (inode == ERR_PTR(-EAGAIN))
+				msleep(10);
+			else
+				break;
+		}
+	}
 	if (inode && !IS_ERR(inode)) {
 		d_prune_aliases(inode);
 		iput(inode);
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 48c1418..3cd06c7 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -37,6 +37,11 @@
 #include "super.h"
 #include "glops.h"
 
+enum {
+	LOOKUP_UNLOCKED = 0,
+	LOOKUP_LOCKED = 1,
+};
+
 struct gfs2_skip_data {
 	u64 no_addr;
 	int skipped;
@@ -71,27 +76,34 @@ static int iget_set(struct inode *inode, void *opaque)
 	return 0;
 }
 
-struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr, int non_block)
+static struct inode *inode_lookup_common(struct super_block *sb, u64 no_addr,
+					 int non_block, int locked)
 {
 	unsigned long hash = (unsigned long)no_addr;
-	struct gfs2_skip_data data;
+	struct gfs2_skip_data data = {.no_addr = no_addr, .skipped = 0,
+				      .non_block = non_block};
+	struct inode *inode;
 
-	data.no_addr = no_addr;
-	data.skipped = 0;
-	data.non_block = non_block;
-	return ilookup5(sb, hash, iget_test, &data);
+	if (locked == LOOKUP_LOCKED)
+		inode = iget5_locked(sb, hash, iget_test, iget_set, &data);
+	else
+		inode = ilookup5(sb, hash, iget_test, &data);
+
+	if (non_block && data.skipped)
+		return ERR_PTR(-EAGAIN);
+
+	return inode;
+}
+
+struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr, int non_block)
+{
+	return inode_lookup_common(sb, no_addr, non_block, LOOKUP_UNLOCKED);
 }
 
 static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr,
 			       int non_block)
 {
-	struct gfs2_skip_data data;
-	unsigned long hash = (unsigned long)no_addr;
-
-	data.no_addr = no_addr;
-	data.skipped = 0;
-	data.non_block = non_block;
-	return iget5_locked(sb, hash, iget_test, iget_set, &data);
+	return inode_lookup_common(sb, no_addr, non_block, LOOKUP_LOCKED);
 }
 
 /**
@@ -148,6 +160,8 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
 	inode = gfs2_iget(sb, no_addr, non_block);
 	if (!inode)
 		return ERR_PTR(-ENOMEM);
+	if (inode == ERR_PTR(-EAGAIN))
+		return inode;
 
 	ip = GFS2_I(inode);
 	ip->i_no_addr = no_addr;
-- 
2.5.5



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [Cluster-devel] [GFS2 PATCH 3/3 v2] GFS2: Add retry loop to delete_work_func
  2016-04-27 15:35 ` [Cluster-devel] [GFS2 PATCH 3/3] GFS2: Add retry loop to delete_work_func Bob Peterson
@ 2016-04-27 17:10   ` Bob Peterson
  2016-04-28  9:44     ` Steven Whitehouse
  0 siblings, 1 reply; 7+ messages in thread
From: Bob Peterson @ 2016-04-27 17:10 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Hi,

I made a slight correction to the third patch I posted previously.
I added the condition " || IS_ERR(inode)" to the delete_work_func,
which needs to be there. This is the corrected version.

The delete work function, delete_work_func, often doesn't find
the inode needed to free an inode that's been marked unlinked.
That's because it only tries gfs2_ilookup once, and it's often
not found because of two things: The fact that gfs2_lookup_by_inum
is only called in an else condition, and the fact that the
non-blocking lookup often encounters inodes that are being
I_FREEd by the vfs. This patch allows it to retry the lookup when
under that circumstance, otherwise call the gfs2_lookup_by_inum.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
---
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 672de35..0266b7c 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -571,13 +571,15 @@ out_unlock:
 	return;
 }
 
+#define LOOKUP_RETRIES 30
+
 static void delete_work_func(struct work_struct *work)
 {
 	struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_delete);
 	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
-	struct gfs2_inode *ip;
-	struct inode *inode;
+	struct inode *inode = NULL;
 	u64 no_addr = gl->gl_name.ln_number;
+	int retries;
 
 	/* If someone's using this glock to create a new dinode, the block must
 	   have been freed by another node, then re-used, in which case our
@@ -585,13 +587,24 @@ static void delete_work_func(struct work_struct *work)
 	if (test_bit(GLF_INODE_CREATING, &gl->gl_flags))
 		goto out;
 
-	ip = gl->gl_object;
-	/* Note: Unsafe to dereference ip as we don't hold right refs/locks */
-
-	if (ip)
+	for (retries = 0; retries < LOOKUP_RETRIES; retries++) {
 		inode = gfs2_ilookup(sdp->sd_vfs, no_addr, 1);
-	else
-		inode = gfs2_lookup_by_inum(sdp, no_addr, NULL, GFS2_BLKST_UNLINKED);
+
+		if (inode == ERR_PTR(-EAGAIN))
+			msleep(10);
+		else
+			break;
+	}
+	if (inode == NULL || IS_ERR(inode)) {
+		for (retries = 0; retries < LOOKUP_RETRIES; retries++) {
+			inode = gfs2_lookup_by_inum(sdp, no_addr, NULL,
+						    GFS2_BLKST_UNLINKED);
+			if (inode == ERR_PTR(-EAGAIN))
+				msleep(10);
+			else
+				break;
+		}
+	}
 	if (inode && !IS_ERR(inode)) {
 		d_prune_aliases(inode);
 		iput(inode);
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 48c1418..3cd06c7 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -37,6 +37,11 @@
 #include "super.h"
 #include "glops.h"
 
+enum {
+	LOOKUP_UNLOCKED = 0,
+	LOOKUP_LOCKED = 1,
+};
+
 struct gfs2_skip_data {
 	u64 no_addr;
 	int skipped;
@@ -71,27 +76,34 @@ static int iget_set(struct inode *inode, void *opaque)
 	return 0;
 }
 
-struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr, int non_block)
+static struct inode *inode_lookup_common(struct super_block *sb, u64 no_addr,
+					 int non_block, int locked)
 {
 	unsigned long hash = (unsigned long)no_addr;
-	struct gfs2_skip_data data;
+	struct gfs2_skip_data data = {.no_addr = no_addr, .skipped = 0,
+				      .non_block = non_block};
+	struct inode *inode;
 
-	data.no_addr = no_addr;
-	data.skipped = 0;
-	data.non_block = non_block;
-	return ilookup5(sb, hash, iget_test, &data);
+	if (locked == LOOKUP_LOCKED)
+		inode = iget5_locked(sb, hash, iget_test, iget_set, &data);
+	else
+		inode = ilookup5(sb, hash, iget_test, &data);
+
+	if (non_block && data.skipped)
+		return ERR_PTR(-EAGAIN);
+
+	return inode;
+}
+
+struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr, int non_block)
+{
+	return inode_lookup_common(sb, no_addr, non_block, LOOKUP_UNLOCKED);
 }
 
 static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr,
 			       int non_block)
 {
-	struct gfs2_skip_data data;
-	unsigned long hash = (unsigned long)no_addr;
-
-	data.no_addr = no_addr;
-	data.skipped = 0;
-	data.non_block = non_block;
-	return iget5_locked(sb, hash, iget_test, iget_set, &data);
+	return inode_lookup_common(sb, no_addr, non_block, LOOKUP_LOCKED);
 }
 
 /**
@@ -148,6 +160,8 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
 	inode = gfs2_iget(sb, no_addr, non_block);
 	if (!inode)
 		return ERR_PTR(-ENOMEM);
+	if (inode == ERR_PTR(-EAGAIN))
+		return inode;
 
 	ip = GFS2_I(inode);
 	ip->i_no_addr = no_addr;



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [Cluster-devel] [GFS2 PATCH 3/3 v2] GFS2: Add retry loop to delete_work_func
  2016-04-27 17:10   ` [Cluster-devel] [GFS2 PATCH 3/3 v2] " Bob Peterson
@ 2016-04-28  9:44     ` Steven Whitehouse
  2016-04-28 14:16       ` [Cluster-devel] [GFS2 PATCH 3/3 v3] " Bob Peterson
  0 siblings, 1 reply; 7+ messages in thread
From: Steven Whitehouse @ 2016-04-28  9:44 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Hi,

On 27/04/16 18:10, Bob Peterson wrote:
> Hi,
>
> I made a slight correction to the third patch I posted previously.
> I added the condition " || IS_ERR(inode)" to the delete_work_func,
> which needs to be there. This is the corrected version.
>
> The delete work function, delete_work_func, often doesn't find
> the inode needed to free an inode that's been marked unlinked.
> That's because it only tries gfs2_ilookup once, and it's often
> not found because of two things: The fact that gfs2_lookup_by_inum
> is only called in an else condition, and the fact that the
> non-blocking lookup often encounters inodes that are being
> I_FREEd by the vfs. This patch allows it to retry the lookup when
> under that circumstance, otherwise call the gfs2_lookup_by_inum.
>
> Signed-off-by: Bob Peterson <rpeterso@redhat.com>
> ---
> diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
> index 672de35..0266b7c 100644
> --- a/fs/gfs2/glock.c
> +++ b/fs/gfs2/glock.c
> @@ -571,13 +571,15 @@ out_unlock:
>   	return;
>   }
>   
> +#define LOOKUP_RETRIES 30
> +
>   static void delete_work_func(struct work_struct *work)
>   {
>   	struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_delete);
>   	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
> -	struct gfs2_inode *ip;
> -	struct inode *inode;
> +	struct inode *inode = NULL;
>   	u64 no_addr = gl->gl_name.ln_number;
> +	int retries;
>   
>   	/* If someone's using this glock to create a new dinode, the block must
>   	   have been freed by another node, then re-used, in which case our
> @@ -585,13 +587,24 @@ static void delete_work_func(struct work_struct *work)
>   	if (test_bit(GLF_INODE_CREATING, &gl->gl_flags))
>   		goto out;
>   
> -	ip = gl->gl_object;
> -	/* Note: Unsafe to dereference ip as we don't hold right refs/locks */
> -
> -	if (ip)
> +	for (retries = 0; retries < LOOKUP_RETRIES; retries++) {
>   		inode = gfs2_ilookup(sdp->sd_vfs, no_addr, 1);
> -	else
> -		inode = gfs2_lookup_by_inum(sdp, no_addr, NULL, GFS2_BLKST_UNLINKED);
> +
> +		if (inode == ERR_PTR(-EAGAIN))
> +			msleep(10);
> +		else
> +			break;
> +	}
This generally looks like a much better solution, however I'm always 
worried about arbitrary delays being added to the code. Is this just a 
wait for an inode in I_FREEING to go away, along with a timeout? It 
would be good to at least document why 30 loops here is the right 
amount. In other words will this still be sure to work on different 
machines with different timing characteristics?

I wonder whether it might not be better to reschedule the work function 
for later, rather than loop in the work function itself?

Steve.

> +	if (inode == NULL || IS_ERR(inode)) {
> +		for (retries = 0; retries < LOOKUP_RETRIES; retries++) {
> +			inode = gfs2_lookup_by_inum(sdp, no_addr, NULL,
> +						    GFS2_BLKST_UNLINKED);
> +			if (inode == ERR_PTR(-EAGAIN))
> +				msleep(10);
> +			else
> +				break;
> +		}
> +	}
>   	if (inode && !IS_ERR(inode)) {
>   		d_prune_aliases(inode);
>   		iput(inode);
> diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
> index 48c1418..3cd06c7 100644
> --- a/fs/gfs2/inode.c
> +++ b/fs/gfs2/inode.c
> @@ -37,6 +37,11 @@
>   #include "super.h"
>   #include "glops.h"
>   
> +enum {
> +	LOOKUP_UNLOCKED = 0,
> +	LOOKUP_LOCKED = 1,
> +};
> +
>   struct gfs2_skip_data {
>   	u64 no_addr;
>   	int skipped;
> @@ -71,27 +76,34 @@ static int iget_set(struct inode *inode, void *opaque)
>   	return 0;
>   }
>   
> -struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr, int non_block)
> +static struct inode *inode_lookup_common(struct super_block *sb, u64 no_addr,
> +					 int non_block, int locked)
>   {
>   	unsigned long hash = (unsigned long)no_addr;
> -	struct gfs2_skip_data data;
> +	struct gfs2_skip_data data = {.no_addr = no_addr, .skipped = 0,
> +				      .non_block = non_block};
> +	struct inode *inode;
>   
> -	data.no_addr = no_addr;
> -	data.skipped = 0;
> -	data.non_block = non_block;
> -	return ilookup5(sb, hash, iget_test, &data);
> +	if (locked == LOOKUP_LOCKED)
> +		inode = iget5_locked(sb, hash, iget_test, iget_set, &data);
> +	else
> +		inode = ilookup5(sb, hash, iget_test, &data);
> +
> +	if (non_block && data.skipped)
> +		return ERR_PTR(-EAGAIN);
> +
> +	return inode;
> +}
> +
> +struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr, int non_block)
> +{
> +	return inode_lookup_common(sb, no_addr, non_block, LOOKUP_UNLOCKED);
>   }
>   
>   static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr,
>   			       int non_block)
>   {
> -	struct gfs2_skip_data data;
> -	unsigned long hash = (unsigned long)no_addr;
> -
> -	data.no_addr = no_addr;
> -	data.skipped = 0;
> -	data.non_block = non_block;
> -	return iget5_locked(sb, hash, iget_test, iget_set, &data);
> +	return inode_lookup_common(sb, no_addr, non_block, LOOKUP_LOCKED);
>   }
>   
>   /**
> @@ -148,6 +160,8 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
>   	inode = gfs2_iget(sb, no_addr, non_block);
>   	if (!inode)
>   		return ERR_PTR(-ENOMEM);
> +	if (inode == ERR_PTR(-EAGAIN))
> +		return inode;
>   
>   	ip = GFS2_I(inode);
>   	ip->i_no_addr = no_addr;
>



^ permalink raw reply	[flat|nested] 7+ messages in thread

* [Cluster-devel] [GFS2 PATCH 3/3 v3] GFS2: Add retry loop to delete_work_func
  2016-04-28  9:44     ` Steven Whitehouse
@ 2016-04-28 14:16       ` Bob Peterson
  0 siblings, 0 replies; 7+ messages in thread
From: Bob Peterson @ 2016-04-28 14:16 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Hi Steve,

Comments below.

----- Original Message -----
> This generally looks like a much better solution, however I'm always
> worried about arbitrary delays being added to the code. Is this just a
> wait for an inode in I_FREEING to go away, along with a timeout? It
> would be good to at least document why 30 loops here is the right
> amount. In other words will this still be sure to work on different
> machines with different timing characteristics?
> 
> I wonder whether it might not be better to reschedule the work function
> for later, rather than loop in the work function itself?
> 
> Steve.

1. I chose 30 iterations arbitrarily based on instrumentation on the
   virt cluster I was using. I never saw it go over 29 retries.
   In my experience, virt clusters are faster and have more critical
   timing than bare metal, so I considered this worst case. But it is
   still arbitrary.
2. Your idea of re-queuing the delete work is an excellent one, and
   I've reworked the patch to do this. I'm testing the implementation now
   and it seems to be working well so far. This time my retry value
   is 30 seconds, with a 10ms delay between tries, but this is still
   arbitrary, so I'm open to suggestions.

Here is the replacement patch:

Patch description:

The delete work function, delete_work_func, often doesn't find
the inode needed to free an inode that's been marked unlinked.
That's because it only tries gfs2_ilookup once, and it's often
not found because of two things: The fact that gfs2_lookup_by_inum
is only called in an else condition, and the fact that the
non-blocking lookup often encounters inodes that are being
I_FREEd by the vfs. This patch allows it to retry the lookup when
under that circumstance, otherwise call the gfs2_lookup_by_inum.
If the inode is in I_FREEING, -EAGAIN is returned to the caller,
who then re-queues the delete work for later. After a certain
timeout value, the delete_work_func stops using ilookup and
uses lookup_by_inum instead. If that fails for a certain number
of retries, it gives up.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
---
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 672de35..59efb8b 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -571,12 +571,14 @@ out_unlock:
 	return;
 }
 
+#define LOOKUP_TIMEOUT (HZ >> 1)
+
 static void delete_work_func(struct work_struct *work)
 {
-	struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_delete);
+	struct gfs2_glock *gl = container_of(work, struct gfs2_glock,
+					     gl_delete.work);
 	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
-	struct gfs2_inode *ip;
-	struct inode *inode;
+	struct inode *inode = NULL;
 	u64 no_addr = gl->gl_name.ln_number;
 
 	/* If someone's using this glock to create a new dinode, the block must
@@ -585,13 +587,43 @@ static void delete_work_func(struct work_struct *work)
 	if (test_bit(GLF_INODE_CREATING, &gl->gl_flags))
 		goto out;
 
-	ip = gl->gl_object;
-	/* Note: Unsafe to dereference ip as we don't hold right refs/locks */
-
-	if (ip)
+	if (test_bit(GLF_TRY_ILOOKUP, &gl->gl_flags)) {
 		inode = gfs2_ilookup(sdp->sd_vfs, no_addr, 1);
-	else
-		inode = gfs2_lookup_by_inum(sdp, no_addr, NULL, GFS2_BLKST_UNLINKED);
+
+		if (inode == ERR_PTR(-EAGAIN)) {
+			if (time_before(jiffies, gl->gl_tchange +
+					LOOKUP_TIMEOUT)) {
+				gfs2_glock_hold(gl);
+				if (queue_delayed_work(gfs2_delete_workqueue,
+						       &gl->gl_delete, 10) == 0)
+					gfs2_glock_put(gl);
+				goto out;
+			} else {
+				clear_bit(GLF_TRY_ILOOKUP, &gl->gl_flags);
+				gl->gl_tchange = jiffies;
+			}
+		}
+	}
+
+	if (inode == NULL || IS_ERR(inode)) {
+		/* Note: This function uses the iopen glock only. It relies on
+		   the fact that gfs2_inode_lookup (called by lookup_by_inum)
+		   will return -EAGAIN before it does any manipulation of the
+		   iopen glock that might change gl_tchange. */
+		inode = gfs2_lookup_by_inum(sdp, no_addr, NULL,
+					    GFS2_BLKST_UNLINKED);
+		if (inode == ERR_PTR(-EAGAIN)) {
+			if (time_before(jiffies, gl->gl_tchange +
+					LOOKUP_TIMEOUT)) {
+				gfs2_glock_hold(gl);
+				if (queue_delayed_work(gfs2_delete_workqueue,
+						       &gl->gl_delete, 10) == 0) {
+					gfs2_glock_put(gl);
+				}
+			}
+			goto out;
+		}
+	}
 	if (inode && !IS_ERR(inode)) {
 		d_prune_aliases(inode);
 		iput(inode);
@@ -713,7 +745,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
 	gl->gl_object = NULL;
 	gl->gl_hold_time = GL_GLOCK_DFT_HOLD;
 	INIT_DELAYED_WORK(&gl->gl_work, glock_work_func);
-	INIT_WORK(&gl->gl_delete, delete_work_func);
+	INIT_DELAYED_WORK(&gl->gl_delete, delete_work_func);
 
 	mapping = gfs2_glock2aspace(gl);
 	if (mapping) {
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 5db59d4..c0a45ed 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -550,7 +550,10 @@ static void iopen_go_callback(struct gfs2_glock *gl, bool remote)
 	if (gl->gl_demote_state == LM_ST_UNLOCKED &&
 	    gl->gl_state == LM_ST_SHARED && ip) {
 		gl->gl_lockref.count++;
-		if (queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0)
+		gl->gl_tchange = jiffies;
+		set_bit(GLF_TRY_ILOOKUP, &gl->gl_flags);
+		if (queue_delayed_work(gfs2_delete_workqueue, &gl->gl_delete,
+			    0) == 0)
 			gl->gl_lockref.count--;
 	}
 }
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index a6a3389..8c79fe4 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -329,6 +329,7 @@ enum {
 	GLF_OBJECT			= 14, /* Used only for tracing */
 	GLF_BLOCKING			= 15,
 	GLF_INODE_CREATING		= 16, /* Inode creation occurring */
+	GLF_TRY_ILOOKUP			= 17, /* Try gfs2_ilookup for del */
 };
 
 struct gfs2_glock {
@@ -363,7 +364,7 @@ struct gfs2_glock {
 	struct delayed_work gl_work;
 	union {
 		/* For inode and iopen glocks only */
-		struct work_struct gl_delete;
+		struct delayed_work gl_delete;
 		/* For rgrp glocks only */
 		struct {
 			loff_t start;
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 48c1418..3cd06c7 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -37,6 +37,11 @@
 #include "super.h"
 #include "glops.h"
 
+enum {
+	LOOKUP_UNLOCKED = 0,
+	LOOKUP_LOCKED = 1,
+};
+
 struct gfs2_skip_data {
 	u64 no_addr;
 	int skipped;
@@ -71,27 +76,34 @@ static int iget_set(struct inode *inode, void *opaque)
 	return 0;
 }
 
-struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr, int non_block)
+static struct inode *inode_lookup_common(struct super_block *sb, u64 no_addr,
+					 int non_block, int locked)
 {
 	unsigned long hash = (unsigned long)no_addr;
-	struct gfs2_skip_data data;
+	struct gfs2_skip_data data = {.no_addr = no_addr, .skipped = 0,
+				      .non_block = non_block};
+	struct inode *inode;
 
-	data.no_addr = no_addr;
-	data.skipped = 0;
-	data.non_block = non_block;
-	return ilookup5(sb, hash, iget_test, &data);
+	if (locked == LOOKUP_LOCKED)
+		inode = iget5_locked(sb, hash, iget_test, iget_set, &data);
+	else
+		inode = ilookup5(sb, hash, iget_test, &data);
+
+	if (non_block && data.skipped)
+		return ERR_PTR(-EAGAIN);
+
+	return inode;
+}
+
+struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr, int non_block)
+{
+	return inode_lookup_common(sb, no_addr, non_block, LOOKUP_UNLOCKED);
 }
 
 static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr,
 			       int non_block)
 {
-	struct gfs2_skip_data data;
-	unsigned long hash = (unsigned long)no_addr;
-
-	data.no_addr = no_addr;
-	data.skipped = 0;
-	data.non_block = non_block;
-	return iget5_locked(sb, hash, iget_test, iget_set, &data);
+	return inode_lookup_common(sb, no_addr, non_block, LOOKUP_LOCKED);
 }
 
 /**
@@ -148,6 +160,8 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
 	inode = gfs2_iget(sb, no_addr, non_block);
 	if (!inode)
 		return ERR_PTR(-ENOMEM);
+	if (inode == ERR_PTR(-EAGAIN))
+		return inode;
 
 	ip = GFS2_I(inode);
 	ip->i_no_addr = no_addr;
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 07c0265..58c74ca 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1801,8 +1801,10 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip
 		 * answer to whether it is NULL or not.
 		 */
 		ip = gl->gl_object;
-
-		if (ip || queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0)
+		gl->gl_tchange = jiffies;
+		set_bit(GLF_TRY_ILOOKUP, &gl->gl_flags);
+		if (ip || queue_delayed_work(gfs2_delete_workqueue,
+					     &gl->gl_delete, 0) == 0)
 			gfs2_glock_put(gl);
 		else
 			found++;



^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2016-04-28 14:16 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-04-27 15:35 [Cluster-devel] [GFS2 PATCH 0/3] Fix inode transition from unlinked to free Bob Peterson
2016-04-27 15:35 ` [Cluster-devel] [GFS2 PATCH 1/3] Revert "GFS2: Eliminate parameter non_block on gfs2_inode_lookup" Bob Peterson
2016-04-27 15:35 ` [Cluster-devel] [GFS2 PATCH 2/3] Revert "GFS2: Don't filter out I_FREEING inodes anymore" Bob Peterson
2016-04-27 15:35 ` [Cluster-devel] [GFS2 PATCH 3/3] GFS2: Add retry loop to delete_work_func Bob Peterson
2016-04-27 17:10   ` [Cluster-devel] [GFS2 PATCH 3/3 v2] " Bob Peterson
2016-04-28  9:44     ` Steven Whitehouse
2016-04-28 14:16       ` [Cluster-devel] [GFS2 PATCH 3/3 v3] " Bob Peterson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.