[Cluster-devel] [PATCH] GFS2: don't overrun reserved revokes

All of lore.kernel.org
 help / color / mirror / Atom feed

* [Cluster-devel] [PATCH] GFS2: don't overrun reserved revokes
@ 2013-07-26 22:09 Benjamin Marzinski
  2013-07-29 10:49 ` Steven Whitehouse
  0 siblings, 1 reply; 4+ messages in thread
From: Benjamin Marzinski @ 2013-07-26 22:09 UTC (permalink / raw)
  To: cluster-devel.redhat.com

When run during fsync, a gfs2_log_flush could happen between the
time when gfs2_ail_flush checked the number of blocks to revoke,
and when it actually started the transaction to do those revokes.
This occassionally caused it to need more revokes than it reserved,
causing gfs2 to crash.

Instead of just reserving enough revokes to handle the blocks that
currently need them, this patch makes gfs2_ail_flush reserve the
maximum number of revokes it can, without increasing the total number
of reserved log blocks. This patch also passes the number of reserved
revokes to __gfs2_ail_flush() so that it doesn't go over its limit
and cause a crash like we're seeing. Non-fsync calls to __gfs2_ail_flush
will still cause a BUG() necessary revokes are skipped.

Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
---
 fs/gfs2/glops.c |   18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

Index: gfs2-3.0-nmw-130722/fs/gfs2/glops.c
===================================================================
--- gfs2-3.0-nmw-130722.orig/fs/gfs2/glops.c
+++ gfs2-3.0-nmw-130722/fs/gfs2/glops.c
@@ -47,7 +47,8 @@ static void gfs2_ail_error(struct gfs2_g
  * None of the buffers should be dirty, locked, or pinned.
  */
 
-static void __gfs2_ail_flush(struct gfs2_glock *gl, bool fsync)
+static void __gfs2_ail_flush(struct gfs2_glock *gl, bool fsync,
+			     unsigned int nr_revokes)
 {
 	struct gfs2_sbd *sdp = gl->gl_sbd;
 	struct list_head *head = &gl->gl_ail_list;
@@ -57,7 +58,9 @@ static void __gfs2_ail_flush(struct gfs2
 
 	gfs2_log_lock(sdp);
 	spin_lock(&sdp->sd_ail_lock);
-	list_for_each_entry_safe(bd, tmp, head, bd_ail_gl_list) {
+	list_for_each_entry_safe_reverse(bd, tmp, head, bd_ail_gl_list) {
+		if (nr_revokes == 0)
+			break;
 		bh = bd->bd_bh;
 		if (bh->b_state & b_state) {
 			if (fsync)
@@ -65,6 +68,7 @@ static void __gfs2_ail_flush(struct gfs2
 			gfs2_ail_error(gl, bh);
 		}
 		gfs2_trans_add_revoke(sdp, bd);
+		nr_revokes--;
 	}
 	GLOCK_BUG_ON(gl, !fsync && atomic_read(&gl->gl_ail_count));
 	spin_unlock(&sdp->sd_ail_lock);
@@ -91,7 +95,7 @@ static void gfs2_ail_empty_gl(struct gfs
 	WARN_ON_ONCE(current->journal_info);
 	current->journal_info = &tr;
 
-	__gfs2_ail_flush(gl, 0);
+	__gfs2_ail_flush(gl, 0, tr.tr_revokes);
 
 	gfs2_trans_end(sdp);
 	gfs2_log_flush(sdp, NULL);
@@ -101,15 +105,19 @@ void gfs2_ail_flush(struct gfs2_glock *g
 {
 	struct gfs2_sbd *sdp = gl->gl_sbd;
 	unsigned int revokes = atomic_read(&gl->gl_ail_count);
+	unsigned int max_revokes = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_log_descriptor)) / sizeof(u64);
 	int ret;
 
 	if (!revokes)
 		return;
 
-	ret = gfs2_trans_begin(sdp, 0, revokes);
+	while (revokes > max_revokes)
+		max_revokes += (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header)) / sizeof(u64);
+
+	ret = gfs2_trans_begin(sdp, 0, max_revokes);
 	if (ret)
 		return;
-	__gfs2_ail_flush(gl, fsync);
+	__gfs2_ail_flush(gl, fsync, max_revokes);
 	gfs2_trans_end(sdp);
 	gfs2_log_flush(sdp, NULL);
 }



^ permalink raw reply	[flat|nested] 4+ messages in thread

* [Cluster-devel] [PATCH] GFS2: don't overrun reserved revokes
  2013-07-26 22:09 [Cluster-devel] [PATCH] GFS2: don't overrun reserved revokes Benjamin Marzinski
@ 2013-07-29 10:49 ` Steven Whitehouse
  2013-08-05 21:40   ` Benjamin Marzinski
  0 siblings, 1 reply; 4+ messages in thread
From: Steven Whitehouse @ 2013-07-29 10:49 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Hi,

On Fri, 2013-07-26 at 17:09 -0500, Benjamin Marzinski wrote:
> When run during fsync, a gfs2_log_flush could happen between the
> time when gfs2_ail_flush checked the number of blocks to revoke,
> and when it actually started the transaction to do those revokes.
> This occassionally caused it to need more revokes than it reserved,
> causing gfs2 to crash.
> 
> Instead of just reserving enough revokes to handle the blocks that
> currently need them, this patch makes gfs2_ail_flush reserve the
> maximum number of revokes it can, without increasing the total number
> of reserved log blocks. This patch also passes the number of reserved
> revokes to __gfs2_ail_flush() so that it doesn't go over its limit
> and cause a crash like we're seeing. Non-fsync calls to __gfs2_ail_flush
> will still cause a BUG() necessary revokes are skipped.
> 
> Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
> ---
>  fs/gfs2/glops.c |   18 +++++++++++++-----
>  1 file changed, 13 insertions(+), 5 deletions(-)
> 
> Index: gfs2-3.0-nmw-130722/fs/gfs2/glops.c
> ===================================================================
> --- gfs2-3.0-nmw-130722.orig/fs/gfs2/glops.c
> +++ gfs2-3.0-nmw-130722/fs/gfs2/glops.c
> @@ -47,7 +47,8 @@ static void gfs2_ail_error(struct gfs2_g
>   * None of the buffers should be dirty, locked, or pinned.
>   */
>  
> -static void __gfs2_ail_flush(struct gfs2_glock *gl, bool fsync)
> +static void __gfs2_ail_flush(struct gfs2_glock *gl, bool fsync,
> +			     unsigned int nr_revokes)
>  {
>  	struct gfs2_sbd *sdp = gl->gl_sbd;
>  	struct list_head *head = &gl->gl_ail_list;
> @@ -57,7 +58,9 @@ static void __gfs2_ail_flush(struct gfs2
>  
>  	gfs2_log_lock(sdp);
>  	spin_lock(&sdp->sd_ail_lock);
> -	list_for_each_entry_safe(bd, tmp, head, bd_ail_gl_list) {
> +	list_for_each_entry_safe_reverse(bd, tmp, head, bd_ail_gl_list) {
> +		if (nr_revokes == 0)
> +			break;
>  		bh = bd->bd_bh;
>  		if (bh->b_state & b_state) {
>  			if (fsync)
> @@ -65,6 +68,7 @@ static void __gfs2_ail_flush(struct gfs2
>  			gfs2_ail_error(gl, bh);
>  		}
>  		gfs2_trans_add_revoke(sdp, bd);
> +		nr_revokes--;
>  	}
>  	GLOCK_BUG_ON(gl, !fsync && atomic_read(&gl->gl_ail_count));
>  	spin_unlock(&sdp->sd_ail_lock);
> @@ -91,7 +95,7 @@ static void gfs2_ail_empty_gl(struct gfs
>  	WARN_ON_ONCE(current->journal_info);
>  	current->journal_info = &tr;
>  
> -	__gfs2_ail_flush(gl, 0);
> +	__gfs2_ail_flush(gl, 0, tr.tr_revokes);
>  
>  	gfs2_trans_end(sdp);
>  	gfs2_log_flush(sdp, NULL);
> @@ -101,15 +105,19 @@ void gfs2_ail_flush(struct gfs2_glock *g
>  {
>  	struct gfs2_sbd *sdp = gl->gl_sbd;
>  	unsigned int revokes = atomic_read(&gl->gl_ail_count);
> +	unsigned int max_revokes = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_log_descriptor)) / sizeof(u64);
>  	int ret;
>  
>  	if (!revokes)
>  		return;
>  
> -	ret = gfs2_trans_begin(sdp, 0, revokes);
> +	while (revokes > max_revokes)
> +		max_revokes += (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header)) / sizeof(u64);
> +
> +	ret = gfs2_trans_begin(sdp, 0, max_revokes);
>  	if (ret)
>  		return;
> -	__gfs2_ail_flush(gl, fsync);
> +	__gfs2_ail_flush(gl, fsync, max_revokes);
>  	gfs2_trans_end(sdp);
>  	gfs2_log_flush(sdp, NULL);
>  }
> 

Will this always write back all the revokes that are queued up? We
really must do that here as fsync requires them to be on-disk. I wonder
whether one solution is just to add a loop to use multiple transactions
in the case that we cannot fit them all into a single transaction?

Steve.




^ permalink raw reply	[flat|nested] 4+ messages in thread

* [Cluster-devel] [PATCH] GFS2: don't overrun reserved revokes
  2013-07-29 10:49 ` Steven Whitehouse
@ 2013-08-05 21:40   ` Benjamin Marzinski
  2013-08-06  8:55     ` Steven Whitehouse
  0 siblings, 1 reply; 4+ messages in thread
From: Benjamin Marzinski @ 2013-08-05 21:40 UTC (permalink / raw)
  To: cluster-devel.redhat.com

On Mon, Jul 29, 2013 at 11:49:10AM +0100, Steven Whitehouse wrote:
> Hi,
> 
> On Fri, 2013-07-26 at 17:09 -0500, Benjamin Marzinski wrote:
> > When run during fsync, a gfs2_log_flush could happen between the
> > time when gfs2_ail_flush checked the number of blocks to revoke,
> > and when it actually started the transaction to do those revokes.
> > This occassionally caused it to need more revokes than it reserved,
> > causing gfs2 to crash.
> > 
> > Instead of just reserving enough revokes to handle the blocks that
> > currently need them, this patch makes gfs2_ail_flush reserve the
> > maximum number of revokes it can, without increasing the total number
> > of reserved log blocks. This patch also passes the number of reserved
> > revokes to __gfs2_ail_flush() so that it doesn't go over its limit
> > and cause a crash like we're seeing. Non-fsync calls to __gfs2_ail_flush
> > will still cause a BUG() necessary revokes are skipped.
> > 
> > Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
> > ---
> >  fs/gfs2/glops.c |   18 +++++++++++++-----
> >  1 file changed, 13 insertions(+), 5 deletions(-)
> > 
> > Index: gfs2-3.0-nmw-130722/fs/gfs2/glops.c
> > ===================================================================
> > --- gfs2-3.0-nmw-130722.orig/fs/gfs2/glops.c
> > +++ gfs2-3.0-nmw-130722/fs/gfs2/glops.c
> > @@ -47,7 +47,8 @@ static void gfs2_ail_error(struct gfs2_g
> >   * None of the buffers should be dirty, locked, or pinned.
> >   */
> >  
> > -static void __gfs2_ail_flush(struct gfs2_glock *gl, bool fsync)
> > +static void __gfs2_ail_flush(struct gfs2_glock *gl, bool fsync,
> > +			     unsigned int nr_revokes)
> >  {
> >  	struct gfs2_sbd *sdp = gl->gl_sbd;
> >  	struct list_head *head = &gl->gl_ail_list;
> > @@ -57,7 +58,9 @@ static void __gfs2_ail_flush(struct gfs2
> >  
> >  	gfs2_log_lock(sdp);
> >  	spin_lock(&sdp->sd_ail_lock);
> > -	list_for_each_entry_safe(bd, tmp, head, bd_ail_gl_list) {
> > +	list_for_each_entry_safe_reverse(bd, tmp, head, bd_ail_gl_list) {
> > +		if (nr_revokes == 0)
> > +			break;
> >  		bh = bd->bd_bh;
> >  		if (bh->b_state & b_state) {
> >  			if (fsync)
> > @@ -65,6 +68,7 @@ static void __gfs2_ail_flush(struct gfs2
> >  			gfs2_ail_error(gl, bh);
> >  		}
> >  		gfs2_trans_add_revoke(sdp, bd);
> > +		nr_revokes--;
> >  	}
> >  	GLOCK_BUG_ON(gl, !fsync && atomic_read(&gl->gl_ail_count));
> >  	spin_unlock(&sdp->sd_ail_lock);
> > @@ -91,7 +95,7 @@ static void gfs2_ail_empty_gl(struct gfs
> >  	WARN_ON_ONCE(current->journal_info);
> >  	current->journal_info = &tr;
> >  
> > -	__gfs2_ail_flush(gl, 0);
> > +	__gfs2_ail_flush(gl, 0, tr.tr_revokes);
> >  
> >  	gfs2_trans_end(sdp);
> >  	gfs2_log_flush(sdp, NULL);
> > @@ -101,15 +105,19 @@ void gfs2_ail_flush(struct gfs2_glock *g
> >  {
> >  	struct gfs2_sbd *sdp = gl->gl_sbd;
> >  	unsigned int revokes = atomic_read(&gl->gl_ail_count);
> > +	unsigned int max_revokes = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_log_descriptor)) / sizeof(u64);
> >  	int ret;
> >  
> >  	if (!revokes)
> >  		return;
> >  
> > -	ret = gfs2_trans_begin(sdp, 0, revokes);
> > +	while (revokes > max_revokes)
> > +		max_revokes += (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header)) / sizeof(u64);
> > +
> > +	ret = gfs2_trans_begin(sdp, 0, max_revokes);
> >  	if (ret)
> >  		return;
> > -	__gfs2_ail_flush(gl, fsync);
> > +	__gfs2_ail_flush(gl, fsync, max_revokes);
> >  	gfs2_trans_end(sdp);
> >  	gfs2_log_flush(sdp, NULL);
> >  }
> > 
> 
> Will this always write back all the revokes that are queued up? We
> really must do that here as fsync requires them to be on-disk. I wonder
> whether one solution is just to add a loop to use multiple transactions
> in the case that we cannot fit them all into a single transaction?

Assuming the old method was correct (aside from the crash), we are
certainly doing better here.  Where before we only wrote out the revokes
that were possible when we started the gfs2_ail_flush, in the new code,
we write out all those revokes, plus we allocate more space to handle
any others that become possible before we start the transaction.

I can certainly do a loop, to make sure that there is no possibility of
not being able to do all the revokes, but it didn't seem necessary to
me. Here's my reasoning.  The only place where this could happen is
gfs2_fsync. The others will still trigger a GLOCK_BUG_ON if we don't
write out everything. In gfs2_fsync, sync_inode_metadata() should do a
gfs2_log_flush and make sure that the inode is written to disk. After
that filemap_write_and_wait should write all the journaled data to disk.
At this point, every bit of data that was dirtied before gfs2_fsync was
called should be stable on disk.  That means that when gfs2_ail_flush
is called next, It should get the correct number of revokes to cover all
of these items, because they have all been flushed from the incore log
and written to their in-place locations.  Other processes could also be
writing to this file, and make it need more revokes by the time its
transaction starts, but as long as gfs2_fsync is doing its job of
getting the data stable on disk before calling gfs2_ail_flush (and it
appears to be), then gfs2_ail_sync doesn't need to worry if it can't
cover these new revokes because they happened after (or at least in
parallel with) the fsync.

Let me know if you think my reasoning is wrong.

-Ben

> 
> Steve.
> 



^ permalink raw reply	[flat|nested] 4+ messages in thread

* [Cluster-devel] [PATCH] GFS2: don't overrun reserved revokes
  2013-08-05 21:40   ` Benjamin Marzinski
@ 2013-08-06  8:55     ` Steven Whitehouse
  0 siblings, 0 replies; 4+ messages in thread
From: Steven Whitehouse @ 2013-08-06  8:55 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Hi,

On Mon, 2013-08-05 at 16:40 -0500, Benjamin Marzinski wrote:
> On Mon, Jul 29, 2013 at 11:49:10AM +0100, Steven Whitehouse wrote:
> > Hi,
> > 
> > On Fri, 2013-07-26 at 17:09 -0500, Benjamin Marzinski wrote:
> > > When run during fsync, a gfs2_log_flush could happen between the
> > > time when gfs2_ail_flush checked the number of blocks to revoke,
> > > and when it actually started the transaction to do those revokes.
> > > This occassionally caused it to need more revokes than it reserved,
> > > causing gfs2 to crash.
> > > 
> > > Instead of just reserving enough revokes to handle the blocks that
> > > currently need them, this patch makes gfs2_ail_flush reserve the
> > > maximum number of revokes it can, without increasing the total number
> > > of reserved log blocks. This patch also passes the number of reserved
> > > revokes to __gfs2_ail_flush() so that it doesn't go over its limit
> > > and cause a crash like we're seeing. Non-fsync calls to __gfs2_ail_flush
> > > will still cause a BUG() necessary revokes are skipped.
> > > 
> > > Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
> > > ---
> > >  fs/gfs2/glops.c |   18 +++++++++++++-----
> > >  1 file changed, 13 insertions(+), 5 deletions(-)
> > > 
> > > Index: gfs2-3.0-nmw-130722/fs/gfs2/glops.c
> > > ===================================================================
> > > --- gfs2-3.0-nmw-130722.orig/fs/gfs2/glops.c
> > > +++ gfs2-3.0-nmw-130722/fs/gfs2/glops.c
> > > @@ -47,7 +47,8 @@ static void gfs2_ail_error(struct gfs2_g
> > >   * None of the buffers should be dirty, locked, or pinned.
> > >   */
> > >  
> > > -static void __gfs2_ail_flush(struct gfs2_glock *gl, bool fsync)
> > > +static void __gfs2_ail_flush(struct gfs2_glock *gl, bool fsync,
> > > +			     unsigned int nr_revokes)
> > >  {
> > >  	struct gfs2_sbd *sdp = gl->gl_sbd;
> > >  	struct list_head *head = &gl->gl_ail_list;
> > > @@ -57,7 +58,9 @@ static void __gfs2_ail_flush(struct gfs2
> > >  
> > >  	gfs2_log_lock(sdp);
> > >  	spin_lock(&sdp->sd_ail_lock);
> > > -	list_for_each_entry_safe(bd, tmp, head, bd_ail_gl_list) {
> > > +	list_for_each_entry_safe_reverse(bd, tmp, head, bd_ail_gl_list) {
> > > +		if (nr_revokes == 0)
> > > +			break;
> > >  		bh = bd->bd_bh;
> > >  		if (bh->b_state & b_state) {
> > >  			if (fsync)
> > > @@ -65,6 +68,7 @@ static void __gfs2_ail_flush(struct gfs2
> > >  			gfs2_ail_error(gl, bh);
> > >  		}
> > >  		gfs2_trans_add_revoke(sdp, bd);
> > > +		nr_revokes--;
> > >  	}
> > >  	GLOCK_BUG_ON(gl, !fsync && atomic_read(&gl->gl_ail_count));
> > >  	spin_unlock(&sdp->sd_ail_lock);
> > > @@ -91,7 +95,7 @@ static void gfs2_ail_empty_gl(struct gfs
> > >  	WARN_ON_ONCE(current->journal_info);
> > >  	current->journal_info = &tr;
> > >  
> > > -	__gfs2_ail_flush(gl, 0);
> > > +	__gfs2_ail_flush(gl, 0, tr.tr_revokes);
> > >  
> > >  	gfs2_trans_end(sdp);
> > >  	gfs2_log_flush(sdp, NULL);
> > > @@ -101,15 +105,19 @@ void gfs2_ail_flush(struct gfs2_glock *g
> > >  {
> > >  	struct gfs2_sbd *sdp = gl->gl_sbd;
> > >  	unsigned int revokes = atomic_read(&gl->gl_ail_count);
> > > +	unsigned int max_revokes = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_log_descriptor)) / sizeof(u64);
> > >  	int ret;
> > >  
> > >  	if (!revokes)
> > >  		return;
> > >  
> > > -	ret = gfs2_trans_begin(sdp, 0, revokes);
> > > +	while (revokes > max_revokes)
> > > +		max_revokes += (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header)) / sizeof(u64);
> > > +
> > > +	ret = gfs2_trans_begin(sdp, 0, max_revokes);
> > >  	if (ret)
> > >  		return;
> > > -	__gfs2_ail_flush(gl, fsync);
> > > +	__gfs2_ail_flush(gl, fsync, max_revokes);
> > >  	gfs2_trans_end(sdp);
> > >  	gfs2_log_flush(sdp, NULL);
> > >  }
> > > 
> > 
> > Will this always write back all the revokes that are queued up? We
> > really must do that here as fsync requires them to be on-disk. I wonder
> > whether one solution is just to add a loop to use multiple transactions
> > in the case that we cannot fit them all into a single transaction?
> 
> Assuming the old method was correct (aside from the crash), we are
> certainly doing better here.  Where before we only wrote out the revokes
> that were possible when we started the gfs2_ail_flush, in the new code,
> we write out all those revokes, plus we allocate more space to handle
> any others that become possible before we start the transaction.
> 
> I can certainly do a loop, to make sure that there is no possibility of
> not being able to do all the revokes, but it didn't seem necessary to
> me. Here's my reasoning.  The only place where this could happen is
> gfs2_fsync. The others will still trigger a GLOCK_BUG_ON if we don't
> write out everything. In gfs2_fsync, sync_inode_metadata() should do a
> gfs2_log_flush and make sure that the inode is written to disk. After
> that filemap_write_and_wait should write all the journaled data to disk.
> At this point, every bit of data that was dirtied before gfs2_fsync was
> called should be stable on disk.  That means that when gfs2_ail_flush
> is called next, It should get the correct number of revokes to cover all
> of these items, because they have all been flushed from the incore log
> and written to their in-place locations.  Other processes could also be
> writing to this file, and make it need more revokes by the time its
> transaction starts, but as long as gfs2_fsync is doing its job of
> getting the data stable on disk before calling gfs2_ail_flush (and it
> appears to be), then gfs2_ail_sync doesn't need to worry if it can't
> cover these new revokes because they happened after (or at least in
> parallel with) the fsync.
> 
> Let me know if you think my reasoning is wrong.
> 
> -Ben
> 
> > 
> > Steve.
> > 

Hmm, ok. I see what you mean now. So in other words everything that was
written before the log flush will be revoked, and only things written
after that may be unstable. That sounds ok to me,

Steve.




^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2013-08-06  8:55 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-07-26 22:09 [Cluster-devel] [PATCH] GFS2: don't overrun reserved revokes Benjamin Marzinski
2013-07-29 10:49 ` Steven Whitehouse
2013-08-05 21:40   ` Benjamin Marzinski
2013-08-06  8:55     ` Steven Whitehouse

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.