All of lore.kernel.org
 help / color / mirror / Atom feed
From: Steven Whitehouse <swhiteho@redhat.com>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] [GFS2 PATCH 3/3 v2] GFS2: Add retry loop to delete_work_func
Date: Thu, 28 Apr 2016 10:44:41 +0100	[thread overview]
Message-ID: <5721DB89.7040906@redhat.com> (raw)
In-Reply-To: <853479097.58344937.1461777017183.JavaMail.zimbra@redhat.com>

Hi,

On 27/04/16 18:10, Bob Peterson wrote:
> Hi,
>
> I made a slight correction to the third patch I posted previously.
> I added the condition " || IS_ERR(inode)" to the delete_work_func,
> which needs to be there. This is the corrected version.
>
> The delete work function, delete_work_func, often doesn't find
> the inode needed to free an inode that's been marked unlinked.
> That's because it only tries gfs2_ilookup once, and it's often
> not found because of two things: The fact that gfs2_lookup_by_inum
> is only called in an else condition, and the fact that the
> non-blocking lookup often encounters inodes that are being
> I_FREEd by the vfs. This patch allows it to retry the lookup when
> under that circumstance, otherwise call the gfs2_lookup_by_inum.
>
> Signed-off-by: Bob Peterson <rpeterso@redhat.com>
> ---
> diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
> index 672de35..0266b7c 100644
> --- a/fs/gfs2/glock.c
> +++ b/fs/gfs2/glock.c
> @@ -571,13 +571,15 @@ out_unlock:
>   	return;
>   }
>   
> +#define LOOKUP_RETRIES 30
> +
>   static void delete_work_func(struct work_struct *work)
>   {
>   	struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_delete);
>   	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
> -	struct gfs2_inode *ip;
> -	struct inode *inode;
> +	struct inode *inode = NULL;
>   	u64 no_addr = gl->gl_name.ln_number;
> +	int retries;
>   
>   	/* If someone's using this glock to create a new dinode, the block must
>   	   have been freed by another node, then re-used, in which case our
> @@ -585,13 +587,24 @@ static void delete_work_func(struct work_struct *work)
>   	if (test_bit(GLF_INODE_CREATING, &gl->gl_flags))
>   		goto out;
>   
> -	ip = gl->gl_object;
> -	/* Note: Unsafe to dereference ip as we don't hold right refs/locks */
> -
> -	if (ip)
> +	for (retries = 0; retries < LOOKUP_RETRIES; retries++) {
>   		inode = gfs2_ilookup(sdp->sd_vfs, no_addr, 1);
> -	else
> -		inode = gfs2_lookup_by_inum(sdp, no_addr, NULL, GFS2_BLKST_UNLINKED);
> +
> +		if (inode == ERR_PTR(-EAGAIN))
> +			msleep(10);
> +		else
> +			break;
> +	}
This generally looks like a much better solution, however I'm always 
worried about arbitrary delays being added to the code. Is this just a 
wait for an inode in I_FREEING to go away, along with a timeout? It 
would be good to at least document why 30 loops here is the right 
amount. In other words will this still be sure to work on different 
machines with different timing characteristics?

I wonder whether it might not be better to reschedule the work function 
for later, rather than loop in the work function itself?

Steve.

> +	if (inode == NULL || IS_ERR(inode)) {
> +		for (retries = 0; retries < LOOKUP_RETRIES; retries++) {
> +			inode = gfs2_lookup_by_inum(sdp, no_addr, NULL,
> +						    GFS2_BLKST_UNLINKED);
> +			if (inode == ERR_PTR(-EAGAIN))
> +				msleep(10);
> +			else
> +				break;
> +		}
> +	}
>   	if (inode && !IS_ERR(inode)) {
>   		d_prune_aliases(inode);
>   		iput(inode);
> diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
> index 48c1418..3cd06c7 100644
> --- a/fs/gfs2/inode.c
> +++ b/fs/gfs2/inode.c
> @@ -37,6 +37,11 @@
>   #include "super.h"
>   #include "glops.h"
>   
> +enum {
> +	LOOKUP_UNLOCKED = 0,
> +	LOOKUP_LOCKED = 1,
> +};
> +
>   struct gfs2_skip_data {
>   	u64 no_addr;
>   	int skipped;
> @@ -71,27 +76,34 @@ static int iget_set(struct inode *inode, void *opaque)
>   	return 0;
>   }
>   
> -struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr, int non_block)
> +static struct inode *inode_lookup_common(struct super_block *sb, u64 no_addr,
> +					 int non_block, int locked)
>   {
>   	unsigned long hash = (unsigned long)no_addr;
> -	struct gfs2_skip_data data;
> +	struct gfs2_skip_data data = {.no_addr = no_addr, .skipped = 0,
> +				      .non_block = non_block};
> +	struct inode *inode;
>   
> -	data.no_addr = no_addr;
> -	data.skipped = 0;
> -	data.non_block = non_block;
> -	return ilookup5(sb, hash, iget_test, &data);
> +	if (locked == LOOKUP_LOCKED)
> +		inode = iget5_locked(sb, hash, iget_test, iget_set, &data);
> +	else
> +		inode = ilookup5(sb, hash, iget_test, &data);
> +
> +	if (non_block && data.skipped)
> +		return ERR_PTR(-EAGAIN);
> +
> +	return inode;
> +}
> +
> +struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr, int non_block)
> +{
> +	return inode_lookup_common(sb, no_addr, non_block, LOOKUP_UNLOCKED);
>   }
>   
>   static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr,
>   			       int non_block)
>   {
> -	struct gfs2_skip_data data;
> -	unsigned long hash = (unsigned long)no_addr;
> -
> -	data.no_addr = no_addr;
> -	data.skipped = 0;
> -	data.non_block = non_block;
> -	return iget5_locked(sb, hash, iget_test, iget_set, &data);
> +	return inode_lookup_common(sb, no_addr, non_block, LOOKUP_LOCKED);
>   }
>   
>   /**
> @@ -148,6 +160,8 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
>   	inode = gfs2_iget(sb, no_addr, non_block);
>   	if (!inode)
>   		return ERR_PTR(-ENOMEM);
> +	if (inode == ERR_PTR(-EAGAIN))
> +		return inode;
>   
>   	ip = GFS2_I(inode);
>   	ip->i_no_addr = no_addr;
>



  reply	other threads:[~2016-04-28  9:44 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-04-27 15:35 [Cluster-devel] [GFS2 PATCH 0/3] Fix inode transition from unlinked to free Bob Peterson
2016-04-27 15:35 ` [Cluster-devel] [GFS2 PATCH 1/3] Revert "GFS2: Eliminate parameter non_block on gfs2_inode_lookup" Bob Peterson
2016-04-27 15:35 ` [Cluster-devel] [GFS2 PATCH 2/3] Revert "GFS2: Don't filter out I_FREEING inodes anymore" Bob Peterson
2016-04-27 15:35 ` [Cluster-devel] [GFS2 PATCH 3/3] GFS2: Add retry loop to delete_work_func Bob Peterson
2016-04-27 17:10   ` [Cluster-devel] [GFS2 PATCH 3/3 v2] " Bob Peterson
2016-04-28  9:44     ` Steven Whitehouse [this message]
2016-04-28 14:16       ` [Cluster-devel] [GFS2 PATCH 3/3 v3] " Bob Peterson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5721DB89.7040906@redhat.com \
    --to=swhiteho@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.