All of lore.kernel.org
 help / color / mirror / Atom feed
* [Cluster-devel] gfs2: skip dlm_unlock calls in unmount
@ 2012-11-07 19:14 David Teigland
  2012-11-08 10:26 ` Steven Whitehouse
  0 siblings, 1 reply; 13+ messages in thread
From: David Teigland @ 2012-11-07 19:14 UTC (permalink / raw)
  To: cluster-devel.redhat.com

When unmounting, gfs2 does a full dlm_unlock operation on every
cached lock.  This can create a very large amount of work and can
take a long time to complete.  However, the vast majority of these
dlm unlock operations are unnecessary because after all the unlocks
are done, gfs2 leaves the dlm lockspace, which automatically clears
the locks of the leaving node, without unlocking each one individually.
So, gfs2 can skip explicit dlm unlocks, and use dlm_release_lockspace to
remove the locks implicitly.  The one exception is when the lock's lvb is
being used.  In this case, dlm_unlock is called because it may update the
lvb of the resource.

Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/gfs2/glock.c    |    1 +
 fs/gfs2/incore.h   |    1 +
 fs/gfs2/lock_dlm.c |    6 ++++++
 3 files changed, 8 insertions(+)

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index e6c2fd5..f3a5edb 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1528,6 +1528,7 @@ static void dump_glock_func(struct gfs2_glock *gl)
 
 void gfs2_gl_hash_clear(struct gfs2_sbd *sdp)
 {
+	set_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags);
 	glock_hash_walk(clear_glock, sdp);
 	flush_workqueue(glock_workqueue);
 	wait_event(sdp->sd_glock_wait, atomic_read(&sdp->sd_glock_disposal) == 0);
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 3d469d3..67a39cf 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -539,6 +539,7 @@ enum {
 	SDF_DEMOTE		= 5,
 	SDF_NOJOURNALID		= 6,
 	SDF_RORECOVERY		= 7, /* read only recovery */
+	SDF_SKIP_DLM_UNLOCK	= 8,
 };
 
 #define GFS2_FSNAME_LEN		256
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 0fb6539..efd0fb6 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -289,6 +289,12 @@ static void gdlm_put_lock(struct gfs2_glock *gl)
 	gfs2_glstats_inc(gl, GFS2_LKS_DCOUNT);
 	gfs2_sbstats_inc(gl, GFS2_LKS_DCOUNT);
 	gfs2_update_request_times(gl);
+
+	if (test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags) && !gl->gl_lvb[0]) {
+		gfs2_glock_free(gl);
+		return;
+	}
+
 	error = dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_VALBLK,
 			   NULL, gl);
 	if (error) {
-- 
1.7.10.1.362.g242cab3



^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [Cluster-devel] gfs2: skip dlm_unlock calls in unmount
  2012-11-07 19:14 [Cluster-devel] gfs2: skip dlm_unlock calls in unmount David Teigland
@ 2012-11-08 10:26 ` Steven Whitehouse
  2012-11-08 15:41   ` David Teigland
  0 siblings, 1 reply; 13+ messages in thread
From: Steven Whitehouse @ 2012-11-08 10:26 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Hi,

On Wed, 2012-11-07 at 14:14 -0500, David Teigland wrote:
> When unmounting, gfs2 does a full dlm_unlock operation on every
> cached lock.  This can create a very large amount of work and can
> take a long time to complete.  However, the vast majority of these
> dlm unlock operations are unnecessary because after all the unlocks
> are done, gfs2 leaves the dlm lockspace, which automatically clears
> the locks of the leaving node, without unlocking each one individually.
> So, gfs2 can skip explicit dlm unlocks, and use dlm_release_lockspace to
> remove the locks implicitly.  The one exception is when the lock's lvb is
> being used.  In this case, dlm_unlock is called because it may update the
> lvb of the resource.
> 

I'm wondering just how much we are likely to gain from this.... we
currently use LVBs for both quota (and more recently) rgrp too. If we
were to start using the LVBs for inodes and/or iopen locks eventually
then that would seem to rather reduce the benefits of this.

The other question is what the cost of conversion to NL vs unlock of an
NL lock is. Even with the patch we are still iterating over each lock to
do a conversion to NL in any case where the lock is not already in NL.
So all we are saving is the final NL -> unlocked change.

One thought is whether it would not be better to do a direct "whatever"
-> unlocked change in the first place, rather than splitting the
operation into two parts.

> Signed-off-by: David Teigland <teigland@redhat.com>
> ---
>  fs/gfs2/glock.c    |    1 +
>  fs/gfs2/incore.h   |    1 +
>  fs/gfs2/lock_dlm.c |    6 ++++++
>  3 files changed, 8 insertions(+)
> 
> diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
> index e6c2fd5..f3a5edb 100644
> --- a/fs/gfs2/glock.c
> +++ b/fs/gfs2/glock.c
> @@ -1528,6 +1528,7 @@ static void dump_glock_func(struct gfs2_glock *gl)
>  
>  void gfs2_gl_hash_clear(struct gfs2_sbd *sdp)
>  {
> +	set_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags);
>  	glock_hash_walk(clear_glock, sdp);
>  	flush_workqueue(glock_workqueue);
>  	wait_event(sdp->sd_glock_wait, atomic_read(&sdp->sd_glock_disposal) == 0);
> diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
> index 3d469d3..67a39cf 100644
> --- a/fs/gfs2/incore.h
> +++ b/fs/gfs2/incore.h
> @@ -539,6 +539,7 @@ enum {
>  	SDF_DEMOTE		= 5,
>  	SDF_NOJOURNALID		= 6,
>  	SDF_RORECOVERY		= 7, /* read only recovery */
> +	SDF_SKIP_DLM_UNLOCK	= 8,
>  };
>  
>  #define GFS2_FSNAME_LEN		256
> diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
> index 0fb6539..efd0fb6 100644
> --- a/fs/gfs2/lock_dlm.c
> +++ b/fs/gfs2/lock_dlm.c
> @@ -289,6 +289,12 @@ static void gdlm_put_lock(struct gfs2_glock *gl)
>  	gfs2_glstats_inc(gl, GFS2_LKS_DCOUNT);
>  	gfs2_sbstats_inc(gl, GFS2_LKS_DCOUNT);
>  	gfs2_update_request_times(gl);
> +
> +	if (test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags) && !gl->gl_lvb[0]) {
There is no guarantee that just because the first byte of the lvb is 0
it is not in use. So I'd suggest adding a flag to each struct gfs2_glops
to say whether the lvb is used by this type of glock or not and then
using that instead of this test.

We assume though that it is only ok to write the LVB when the lock is in
higher lock modes, so that it is not going to have changed since the
last lock state change (to NL in this case) so maybe we don't even need
to care about this? Or at least not unless we merge the two state
changes as mentioned above.

> +		gfs2_glock_free(gl);
> +		return;
> +	}
> +
>  	error = dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_VALBLK,
>  			   NULL, gl);
>  	if (error) {

Steve.




^ permalink raw reply	[flat|nested] 13+ messages in thread

* [Cluster-devel] gfs2: skip dlm_unlock calls in unmount
  2012-11-08 10:26 ` Steven Whitehouse
@ 2012-11-08 15:41   ` David Teigland
  2012-11-08 18:48     ` Steven Whitehouse
  0 siblings, 1 reply; 13+ messages in thread
From: David Teigland @ 2012-11-08 15:41 UTC (permalink / raw)
  To: cluster-devel.redhat.com

On Thu, Nov 08, 2012 at 10:26:53AM +0000, Steven Whitehouse wrote:
> Hi,
> 
> On Wed, 2012-11-07 at 14:14 -0500, David Teigland wrote:
> > When unmounting, gfs2 does a full dlm_unlock operation on every
> > cached lock.  This can create a very large amount of work and can
> > take a long time to complete.  However, the vast majority of these
> > dlm unlock operations are unnecessary because after all the unlocks
> > are done, gfs2 leaves the dlm lockspace, which automatically clears
> > the locks of the leaving node, without unlocking each one individually.
> > So, gfs2 can skip explicit dlm unlocks, and use dlm_release_lockspace to
> > remove the locks implicitly.  The one exception is when the lock's lvb is
> > being used.  In this case, dlm_unlock is called because it may update the
> > lvb of the resource.
> > 
> 
> I'm wondering just how much we are likely to gain from this.... we
> currently use LVBs for both quota (and more recently) rgrp too. If we
> were to start using the LVBs for inodes and/or iopen locks eventually
> then that would seem to rather reduce the benefits of this.

Considering what you say below, after you've converted to NL, there's no
more lvb to consider, so the lvb is not an issue in that case. The lvb is
only written if you're unlocking from PW or EX, so there's bound to always
be many unlocks that could be skipped.  I'll adjust the patch to skip
unlock unless there's an lvb and the mode is PW or EX.

> The other question is what the cost of conversion to NL vs unlock of an
> NL lock is. Even with the patch we are still iterating over each lock to
> do a conversion to NL in any case where the lock is not already in NL.
> So all we are saving is the final NL -> unlocked change.

yeah, I'd forgotten about that.

> One thought is whether it would not be better to do a direct "whatever"
> -> unlocked change in the first place, rather than splitting the
> operation into two parts.

Converting to NL would actually be less expensive than unlock because the
NL convert does not involve a reply message, but unlock does.

So skipping the unlocks is a first step that gives us a big benefit very
simply.  To benefit even further, we could later look into skipping the
"convert to NL" step also, and just abandoning the dlm locks in whatever
mode they're in; but that's probably not as simple a change.



^ permalink raw reply	[flat|nested] 13+ messages in thread

* [Cluster-devel] gfs2: skip dlm_unlock calls in unmount
  2012-11-08 15:41   ` David Teigland
@ 2012-11-08 18:48     ` Steven Whitehouse
  2012-11-08 19:59       ` David Teigland
  0 siblings, 1 reply; 13+ messages in thread
From: Steven Whitehouse @ 2012-11-08 18:48 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Hi,

On Thu, 2012-11-08 at 10:41 -0500, David Teigland wrote:
> On Thu, Nov 08, 2012 at 10:26:53AM +0000, Steven Whitehouse wrote:
> > Hi,
> > 
> > On Wed, 2012-11-07 at 14:14 -0500, David Teigland wrote:
> > > When unmounting, gfs2 does a full dlm_unlock operation on every
> > > cached lock.  This can create a very large amount of work and can
> > > take a long time to complete.  However, the vast majority of these
> > > dlm unlock operations are unnecessary because after all the unlocks
> > > are done, gfs2 leaves the dlm lockspace, which automatically clears
> > > the locks of the leaving node, without unlocking each one individually.
> > > So, gfs2 can skip explicit dlm unlocks, and use dlm_release_lockspace to
> > > remove the locks implicitly.  The one exception is when the lock's lvb is
> > > being used.  In this case, dlm_unlock is called because it may update the
> > > lvb of the resource.
> > > 
> > 
> > I'm wondering just how much we are likely to gain from this.... we
> > currently use LVBs for both quota (and more recently) rgrp too. If we
> > were to start using the LVBs for inodes and/or iopen locks eventually
> > then that would seem to rather reduce the benefits of this.
> 
> Considering what you say below, after you've converted to NL, there's no
> more lvb to consider, so the lvb is not an issue in that case. The lvb is
> only written if you're unlocking from PW or EX, so there's bound to always
> be many unlocks that could be skipped.  I'll adjust the patch to skip
> unlock unless there's an lvb and the mode is PW or EX.
> 
Ok

> > The other question is what the cost of conversion to NL vs unlock of an
> > NL lock is. Even with the patch we are still iterating over each lock to
> > do a conversion to NL in any case where the lock is not already in NL.
> > So all we are saving is the final NL -> unlocked change.
> 
> yeah, I'd forgotten about that.
> 
> > One thought is whether it would not be better to do a direct "whatever"
> > -> unlocked change in the first place, rather than splitting the
> > operation into two parts.
> 
> Converting to NL would actually be less expensive than unlock because the
> NL convert does not involve a reply message, but unlock does.
> 
I'm not entirely sure I follow... at least from the filesystem point of
view (and without your proposed change) both conversions and unlocks
result in a reply. Is this a dlm internal reply perhaps?

> So skipping the unlocks is a first step that gives us a big benefit very
> simply.  To benefit even further, we could later look into skipping the
> "convert to NL" step also, and just abandoning the dlm locks in whatever
> mode they're in; but that's probably not as simple a change.
> 

Yes, thats true... the issue is that the glock state machine treats all
glocks on an individual basis, and the demotion to NL also deals with
any writing back and invalidating of the cache thats required at the
same time. So that makes it tricky to separate from the requests to the
dlm.

That said, I'd like to be able to move towards dealing with batches of
glocks in the future, since that means we can provide a more favourable
ordering of i/o requests. That is not an easy thing to do though.

In addition to the benefit for umount, I'm also wondering whether, if
these unlocks are relatively slow, we should look at what happens during
normal operation, where we do from time to time, send unlock requests.
Those are mostly (though indirectly) in response to memory pressure. Is
there anything we can do there to speed things up I wonder?

Steve.





^ permalink raw reply	[flat|nested] 13+ messages in thread

* [Cluster-devel] gfs2: skip dlm_unlock calls in unmount
  2012-11-08 18:48     ` Steven Whitehouse
@ 2012-11-08 19:59       ` David Teigland
  2012-11-08 21:34         ` [Cluster-devel] [PATCH] " David Teigland
  2012-11-09 14:55         ` [Cluster-devel] " Steven Whitehouse
  0 siblings, 2 replies; 13+ messages in thread
From: David Teigland @ 2012-11-08 19:59 UTC (permalink / raw)
  To: cluster-devel.redhat.com

On Thu, Nov 08, 2012 at 06:48:19PM +0000, Steven Whitehouse wrote:
> > Converting to NL would actually be less expensive than unlock because the
> > NL convert does not involve a reply message, but unlock does.
> > 
> I'm not entirely sure I follow... at least from the filesystem point of
> view (and without your proposed change) both conversions and unlocks
> result in a reply. Is this a dlm internal reply perhaps?

Right, I was refering to the internal dlm reply over the network.

> > So skipping the unlocks is a first step that gives us a big benefit very
> > simply.  To benefit even further, we could later look into skipping the
> > "convert to NL" step also, and just abandoning the dlm locks in whatever
> > mode they're in; but that's probably not as simple a change.
> > 
> 
> Yes, thats true... the issue is that the glock state machine treats all
> glocks on an individual basis, and the demotion to NL also deals with
> any writing back and invalidating of the cache thats required at the
> same time. So that makes it tricky to separate from the requests to the
> dlm.
> 
> That said, I'd like to be able to move towards dealing with batches of
> glocks in the future, since that means we can provide a more favourable
> ordering of i/o requests. That is not an easy thing to do though.
> 
> In addition to the benefit for umount, I'm also wondering whether, if
> these unlocks are relatively slow, we should look at what happens during
> normal operation, where we do from time to time, send unlock requests.
> Those are mostly (though indirectly) in response to memory pressure. Is
> there anything we can do there to speed things up I wonder?

The main thing would be to not use a completion callback for dlm_unlock
(either make dlm not send one, or ignore it in gfs2).  This would let you
free the glock memory right away.

But, removing unlock completions can create new problems, because you'd
need to handle new errors from dlm_lock() when it ran up against an
incomplete unlock.  Dealing with that complication may negate any benefit
from ignoring unlock completions.  Unless, of course, you knew you
wouldn't be making any more dlm_lock calls on that lock, e.g. during
unmount.



^ permalink raw reply	[flat|nested] 13+ messages in thread

* [Cluster-devel] [PATCH] gfs2: skip dlm_unlock calls in unmount
  2012-11-08 19:59       ` David Teigland
@ 2012-11-08 21:34         ` David Teigland
  2012-11-09  9:45           ` Steven Whitehouse
  2012-11-13 15:58           ` David Teigland
  2012-11-09 14:55         ` [Cluster-devel] " Steven Whitehouse
  1 sibling, 2 replies; 13+ messages in thread
From: David Teigland @ 2012-11-08 21:34 UTC (permalink / raw)
  To: cluster-devel.redhat.com

When unmounting, gfs2 does a full dlm_unlock operation on every
cached lock.  This can create a very large amount of work and can
take a long time to complete.  However, the vast majority of these
dlm unlock operations are unnecessary because after all the unlocks
are done, gfs2 leaves the dlm lockspace, which automatically clears
the locks of the leaving node, without unlocking each one individually.
So, gfs2 can skip explicit dlm unlocks, and use dlm_release_lockspace to
remove the locks implicitly.  The one exception is when the lock's lvb is
being used.  In this case, dlm_unlock is called because it may update the
lvb of the resource.

Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/gfs2/glock.c    |    1 +
 fs/gfs2/incore.h   |    1 +
 fs/gfs2/lock_dlm.c |    8 ++++++++
 3 files changed, 10 insertions(+)

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index e6c2fd5..f3a5edb 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1528,6 +1528,7 @@ static void dump_glock_func(struct gfs2_glock *gl)
 
 void gfs2_gl_hash_clear(struct gfs2_sbd *sdp)
 {
+	set_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags);
 	glock_hash_walk(clear_glock, sdp);
 	flush_workqueue(glock_workqueue);
 	wait_event(sdp->sd_glock_wait, atomic_read(&sdp->sd_glock_disposal) == 0);
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 3d469d3..67a39cf 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -539,6 +539,7 @@ enum {
 	SDF_DEMOTE		= 5,
 	SDF_NOJOURNALID		= 6,
 	SDF_RORECOVERY		= 7, /* read only recovery */
+	SDF_SKIP_DLM_UNLOCK	= 8,
 };
 
 #define GFS2_FSNAME_LEN		256
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 0fb6539..806a639 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -289,6 +289,14 @@ static void gdlm_put_lock(struct gfs2_glock *gl)
 	gfs2_glstats_inc(gl, GFS2_LKS_DCOUNT);
 	gfs2_sbstats_inc(gl, GFS2_LKS_DCOUNT);
 	gfs2_update_request_times(gl);
+
+	/* don't want to skip dlm_unlock writing the lvb when lock is ex */
+	if (test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags) &&
+	    (!gl->gl_lvb[0] || gl->gl_state != LM_ST_EXCLUSIVE)) {
+		gfs2_glock_free(gl);
+		return;
+	}
+
 	error = dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_VALBLK,
 			   NULL, gl);
 	if (error) {
-- 
1.7.10.1.362.g242cab3



^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [Cluster-devel] [PATCH] gfs2: skip dlm_unlock calls in unmount
  2012-11-08 21:34         ` [Cluster-devel] [PATCH] " David Teigland
@ 2012-11-09  9:45           ` Steven Whitehouse
  2012-11-09 15:30             ` David Teigland
  2012-11-13 15:58           ` David Teigland
  1 sibling, 1 reply; 13+ messages in thread
From: Steven Whitehouse @ 2012-11-09  9:45 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Hi,

On Thu, 2012-11-08 at 16:34 -0500, David Teigland wrote:
> When unmounting, gfs2 does a full dlm_unlock operation on every
> cached lock.  This can create a very large amount of work and can
> take a long time to complete.  However, the vast majority of these
> dlm unlock operations are unnecessary because after all the unlocks
> are done, gfs2 leaves the dlm lockspace, which automatically clears
> the locks of the leaving node, without unlocking each one individually.
> So, gfs2 can skip explicit dlm unlocks, and use dlm_release_lockspace to
> remove the locks implicitly.  The one exception is when the lock's lvb is
> being used.  In this case, dlm_unlock is called because it may update the
> lvb of the resource.
> 
> Signed-off-by: David Teigland <teigland@redhat.com>
> ---
>  fs/gfs2/glock.c    |    1 +
>  fs/gfs2/incore.h   |    1 +
>  fs/gfs2/lock_dlm.c |    8 ++++++++
>  3 files changed, 10 insertions(+)
> 
> diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
> index e6c2fd5..f3a5edb 100644
> --- a/fs/gfs2/glock.c
> +++ b/fs/gfs2/glock.c
> @@ -1528,6 +1528,7 @@ static void dump_glock_func(struct gfs2_glock *gl)
>  
>  void gfs2_gl_hash_clear(struct gfs2_sbd *sdp)
>  {
> +	set_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags);
>  	glock_hash_walk(clear_glock, sdp);
>  	flush_workqueue(glock_workqueue);
>  	wait_event(sdp->sd_glock_wait, atomic_read(&sdp->sd_glock_disposal) == 0);
> diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
> index 3d469d3..67a39cf 100644
> --- a/fs/gfs2/incore.h
> +++ b/fs/gfs2/incore.h
> @@ -539,6 +539,7 @@ enum {
>  	SDF_DEMOTE		= 5,
>  	SDF_NOJOURNALID		= 6,
>  	SDF_RORECOVERY		= 7, /* read only recovery */
> +	SDF_SKIP_DLM_UNLOCK	= 8,
>  };
>  
>  #define GFS2_FSNAME_LEN		256
> diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
> index 0fb6539..806a639 100644
> --- a/fs/gfs2/lock_dlm.c
> +++ b/fs/gfs2/lock_dlm.c
> @@ -289,6 +289,14 @@ static void gdlm_put_lock(struct gfs2_glock *gl)
>  	gfs2_glstats_inc(gl, GFS2_LKS_DCOUNT);
>  	gfs2_sbstats_inc(gl, GFS2_LKS_DCOUNT);
>  	gfs2_update_request_times(gl);
> +
> +	/* don't want to skip dlm_unlock writing the lvb when lock is ex */
> +	if (test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags) &&
> +	    (!gl->gl_lvb[0] || gl->gl_state != LM_ST_EXCLUSIVE)) {
I'm still not happy with using !gl->gl_lvb[0] to determine whether the
LVB is in use or not. I think we need a better test, or alternatively
just test the lock state, since most locks will be NL anyway before they
get to this point in time,

Steve.

> +		gfs2_glock_free(gl);
> +		return;
> +	}
> +
>  	error = dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_VALBLK,
>  			   NULL, gl);
>  	if (error) {




^ permalink raw reply	[flat|nested] 13+ messages in thread

* [Cluster-devel] gfs2: skip dlm_unlock calls in unmount
  2012-11-08 19:59       ` David Teigland
  2012-11-08 21:34         ` [Cluster-devel] [PATCH] " David Teigland
@ 2012-11-09 14:55         ` Steven Whitehouse
  1 sibling, 0 replies; 13+ messages in thread
From: Steven Whitehouse @ 2012-11-09 14:55 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Hi,

On Thu, 2012-11-08 at 14:59 -0500, David Teigland wrote:
> On Thu, Nov 08, 2012 at 06:48:19PM +0000, Steven Whitehouse wrote:
> > > Converting to NL would actually be less expensive than unlock because the
> > > NL convert does not involve a reply message, but unlock does.
> > > 
> > I'm not entirely sure I follow... at least from the filesystem point of
> > view (and without your proposed change) both conversions and unlocks
> > result in a reply. Is this a dlm internal reply perhaps?
> 
> Right, I was refering to the internal dlm reply over the network.
> 
> > > So skipping the unlocks is a first step that gives us a big benefit very
> > > simply.  To benefit even further, we could later look into skipping the
> > > "convert to NL" step also, and just abandoning the dlm locks in whatever
> > > mode they're in; but that's probably not as simple a change.
> > > 
> > 
> > Yes, thats true... the issue is that the glock state machine treats all
> > glocks on an individual basis, and the demotion to NL also deals with
> > any writing back and invalidating of the cache thats required at the
> > same time. So that makes it tricky to separate from the requests to the
> > dlm.
> > 
> > That said, I'd like to be able to move towards dealing with batches of
> > glocks in the future, since that means we can provide a more favourable
> > ordering of i/o requests. That is not an easy thing to do though.
> > 
> > In addition to the benefit for umount, I'm also wondering whether, if
> > these unlocks are relatively slow, we should look at what happens during
> > normal operation, where we do from time to time, send unlock requests.
> > Those are mostly (though indirectly) in response to memory pressure. Is
> > there anything we can do there to speed things up I wonder?
> 
> The main thing would be to not use a completion callback for dlm_unlock
> (either make dlm not send one, or ignore it in gfs2).  This would let you
> free the glock memory right away.
> 
> But, removing unlock completions can create new problems, because you'd
> need to handle new errors from dlm_lock() when it ran up against an
> incomplete unlock.  Dealing with that complication may negate any benefit
> from ignoring unlock completions.  Unless, of course, you knew you
> wouldn't be making any more dlm_lock calls on that lock, e.g. during
> unmount.
> 

Yes, I'm all for keeping things simple if we can. I thought it might be
an interesting exercise to do a few measurements in this area. So I
wrote a script to process the output of the tracepoints in order to get
some figures to see where things stand at the moment.

To match the timings of the DLM lock times with state changes, I used
three tracepoints: gfs2_glock_lock_time, gfs2_glock_state_change and
gfs2_glock_put. I found that I had to up the value
of /sys/kernel/debug/tracing/buffer_size_kb quite a lot in order to
avoid having dropped tracepoints during the test.

My test was really simple: Mount a single node gfs2 using dlm, run
postmark with 100000 transactions and files and unmount.

I processed the results through the script and got the graphs which I've
attached. The two graphs are actually very similar to each other. The
first is a histogram (with log sized buckets, and points plotted at the
mid-point of each bucket) and the second is basically the same, but with
each bucket's count multiplied by the mid-point value of that bucket, so
that it represents the total time in seconds taken by the dlm requests
falling into that bucket.

I've included every state change which occurred during the test.
Although I can separate out final lock puts (i.e. NL to UN) the stats
can't separate out initial lock requests from conversions, so NL:EX
(i.e. NL to EX state change) may also include UN:EX for example.

So at the end of all this the two state changes which stick out as
taking longer than the others are NL:UN and EX:NL. Unfortunately it
doesn't tell us why that is... it may just be that these conversions
were requested when the DLM was especially busy, or when something else
was going on which slowed down the granting.

I suspect though that most (all?) of the NL:UN conversions were during
umount, since this workload doesn't generate any noticeable memory
pressure, so that it is unlikely that cached glocks are being ejected
during the run. That may well also be the case for the EX locks too -
since they will have been cached from the point that the objects in
question requested an EX lock as there are no other nodes requesting
demotions.

It would be interesting to see what the results look like from other
(more realistic) workloads,

Steve.

-------------- next part --------------
A non-text attachment was scrubbed...
Name: lock-frequency.pdf
Type: application/pdf
Size: 41111 bytes
Desc: not available
URL: <http://listman.redhat.com/archives/cluster-devel/attachments/20121109/75ca1f20/attachment.pdf>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: lock-times.pdf
Type: application/pdf
Size: 41496 bytes
Desc: not available
URL: <http://listman.redhat.com/archives/cluster-devel/attachments/20121109/75ca1f20/attachment-0001.pdf>

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [Cluster-devel] [PATCH] gfs2: skip dlm_unlock calls in unmount
  2012-11-09  9:45           ` Steven Whitehouse
@ 2012-11-09 15:30             ` David Teigland
  2012-11-12 10:44               ` Steven Whitehouse
  0 siblings, 1 reply; 13+ messages in thread
From: David Teigland @ 2012-11-09 15:30 UTC (permalink / raw)
  To: cluster-devel.redhat.com

On Fri, Nov 09, 2012 at 09:45:17AM +0000, Steven Whitehouse wrote:
> > +	if (test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags) &&
> > +	    (!gl->gl_lvb[0] || gl->gl_state != LM_ST_EXCLUSIVE)) {
> I'm still not happy with using !gl->gl_lvb[0] to determine whether the
> LVB is in use or not. I think we need a better test, or alternatively
> just test the lock state, since most locks will be NL anyway before they
> get to this point in time,

Yeah, a glock flag indicating the lvb is used would be best, I'll just
test the lock state.

This actually brings up another improvement you could make.  Right now
gfs2 enables the lvb on all locks, even though it only uses it on a small
minority.  Limiting the lvb to locks that need it would:

- save 64 bytes of memory for every local lock
  (32 in gfs2_glock, 32 in dlm_rsb)

- save 96 bytes of memory for every remote lock
  (32 in gfs2_glock, 32 in local dlm_rsb, 32 in remote dlm_lkb)

- save 32 bytes of network message size in many dlm messages

- save a lot of memcpying of zeros

- save some recovery time




^ permalink raw reply	[flat|nested] 13+ messages in thread

* [Cluster-devel] [PATCH] gfs2: skip dlm_unlock calls in unmount
  2012-11-09 15:30             ` David Teigland
@ 2012-11-12 10:44               ` Steven Whitehouse
  2012-11-12 15:46                 ` David Teigland
  0 siblings, 1 reply; 13+ messages in thread
From: Steven Whitehouse @ 2012-11-12 10:44 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Hi,

On Fri, 2012-11-09 at 10:30 -0500, David Teigland wrote:
> On Fri, Nov 09, 2012 at 09:45:17AM +0000, Steven Whitehouse wrote:
> > > +	if (test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags) &&
> > > +	    (!gl->gl_lvb[0] || gl->gl_state != LM_ST_EXCLUSIVE)) {
> > I'm still not happy with using !gl->gl_lvb[0] to determine whether the
> > LVB is in use or not. I think we need a better test, or alternatively
> > just test the lock state, since most locks will be NL anyway before they
> > get to this point in time,
> 
> Yeah, a glock flag indicating the lvb is used would be best, I'll just
> test the lock state.
> 
> This actually brings up another improvement you could make.  Right now
> gfs2 enables the lvb on all locks, even though it only uses it on a small
> minority.  Limiting the lvb to locks that need it would:
> 
> - save 64 bytes of memory for every local lock
>   (32 in gfs2_glock, 32 in dlm_rsb)
> 
> - save 96 bytes of memory for every remote lock
>   (32 in gfs2_glock, 32 in local dlm_rsb, 32 in remote dlm_lkb)
> 
> - save 32 bytes of network message size in many dlm messages
> 
> - save a lot of memcpying of zeros
> 
> - save some recovery time
> 
> 

Yes, although we did consider what the best thing to do was back at the
start of GFS2 development wrt LVBs. The actual overhead didn't seem too
much really. The previous implementation had the LVB hanging off the
glock from a pointer, so on 64 bit, that pointer alone was 8 bytes. We
also saved another 4 bytes (plus a further 4 for alignment) by not
requiring the atomic counter. So it seemed not unreasonable to just
inline the LVB into the glock.

Another for having it on all glocks was that if we did want to start
making use of it on different glock types in the future, we could do so
without having to worry about whether its value would be preserved or
not. Also, it removed some tests from the fast path of acquiring and
dropping locks.

Trying to reduce the size of the lock requests makes sense if that is
becoming a limiting factor in performance (is it? I'm not sure) so maybe
we should revisit this.

Steve.




^ permalink raw reply	[flat|nested] 13+ messages in thread

* [Cluster-devel] [PATCH] gfs2: skip dlm_unlock calls in unmount
  2012-11-12 10:44               ` Steven Whitehouse
@ 2012-11-12 15:46                 ` David Teigland
  0 siblings, 0 replies; 13+ messages in thread
From: David Teigland @ 2012-11-12 15:46 UTC (permalink / raw)
  To: cluster-devel.redhat.com

On Mon, Nov 12, 2012 at 10:44:36AM +0000, Steven Whitehouse wrote:
> > - save 64 bytes of memory for every local lock
> >   (32 in gfs2_glock, 32 in dlm_rsb)
> > 
> > - save 96 bytes of memory for every remote lock
> >   (32 in gfs2_glock, 32 in local dlm_rsb, 32 in remote dlm_lkb)
> > 
> > - save 32 bytes of network message size in many dlm messages
> > 
> > - save a lot of memcpying of zeros
> > 
> > - save some recovery time
> > 
> > 
> 
> Yes, although we did consider what the best thing to do was back at the
> start of GFS2 development wrt LVBs. The actual overhead didn't seem too
> much really. The previous implementation had the LVB hanging off the
> glock from a pointer, so on 64 bit, that pointer alone was 8 bytes. We
> also saved another 4 bytes (plus a further 4 for alignment) by not
> requiring the atomic counter. So it seemed not unreasonable to just
> inline the LVB into the glock.

I still think you'll save around 64-80 bytes per lock on average.

> Another for having it on all glocks was that if we did want to start
> making use of it on different glock types in the future, we could do so
> without having to worry about whether its value would be preserved or
> not. Also, it removed some tests from the fast path of acquiring and
> dropping locks.

Keep in mind that the dlm does not inline them, so using an lvb when it's
not needed creates extra work in the dlm.  This extra work probably
exceeds the extra work gfs2 would have to do with non-inlined lvbs.

> Trying to reduce the size of the lock requests makes sense if that is
> becoming a limiting factor in performance (is it? I'm not sure) so maybe
> we should revisit this.

I think it's worth a try, it's probably no less helpful than a lot of the
other optimizations we've added, which do add up together.




^ permalink raw reply	[flat|nested] 13+ messages in thread

* [Cluster-devel] [PATCH] gfs2: skip dlm_unlock calls in unmount
  2012-11-08 21:34         ` [Cluster-devel] [PATCH] " David Teigland
  2012-11-09  9:45           ` Steven Whitehouse
@ 2012-11-13 15:58           ` David Teigland
  2012-11-14 10:38             ` Steven Whitehouse
  1 sibling, 1 reply; 13+ messages in thread
From: David Teigland @ 2012-11-13 15:58 UTC (permalink / raw)
  To: cluster-devel.redhat.com

When unmounting, gfs2 does a full dlm_unlock operation on every
cached lock.  This can create a very large amount of work and can
take a long time to complete.  However, the vast majority of these
dlm unlock operations are unnecessary because after all the unlocks
are done, gfs2 leaves the dlm lockspace, which automatically clears
the locks of the leaving node, without unlocking each one individually.
So, gfs2 can skip explicit dlm unlocks, and use dlm_release_lockspace to
remove the locks implicitly.  The one exception is when the lock's lvb is
being used.  In this case, dlm_unlock is called because it may update the
lvb of the resource.

Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/gfs2/glock.c    |    1 +
 fs/gfs2/incore.h   |    1 +
 fs/gfs2/lock_dlm.c |    8 ++++++++
 3 files changed, 10 insertions(+)

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index e6c2fd5..f3a5edb 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1528,6 +1528,7 @@ static void dump_glock_func(struct gfs2_glock *gl)
 
 void gfs2_gl_hash_clear(struct gfs2_sbd *sdp)
 {
+	set_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags);
 	glock_hash_walk(clear_glock, sdp);
 	flush_workqueue(glock_workqueue);
 	wait_event(sdp->sd_glock_wait, atomic_read(&sdp->sd_glock_disposal) == 0);
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 3d469d3..67a39cf 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -539,6 +539,7 @@ enum {
 	SDF_DEMOTE		= 5,
 	SDF_NOJOURNALID		= 6,
 	SDF_RORECOVERY		= 7, /* read only recovery */
+	SDF_SKIP_DLM_UNLOCK	= 8,
 };
 
 #define GFS2_FSNAME_LEN		256
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 0fb6539..f6504d3 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -289,6 +289,14 @@ static void gdlm_put_lock(struct gfs2_glock *gl)
 	gfs2_glstats_inc(gl, GFS2_LKS_DCOUNT);
 	gfs2_sbstats_inc(gl, GFS2_LKS_DCOUNT);
 	gfs2_update_request_times(gl);
+
+	/* don't want to skip dlm_unlock writing the lvb when lock is ex */
+	if (test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags) &&
+	    gl->gl_state != LM_ST_EXCLUSIVE) {
+		gfs2_glock_free(gl);
+		return;
+	}
+
 	error = dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_VALBLK,
 			   NULL, gl);
 	if (error) {
-- 
1.7.10.1.362.g242cab3



^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [Cluster-devel] [PATCH] gfs2: skip dlm_unlock calls in unmount
  2012-11-13 15:58           ` David Teigland
@ 2012-11-14 10:38             ` Steven Whitehouse
  0 siblings, 0 replies; 13+ messages in thread
From: Steven Whitehouse @ 2012-11-14 10:38 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Hi,

Now pushed to the -nmw git tree. Thanks,

Steve.

On Tue, 2012-11-13 at 10:58 -0500, David Teigland wrote:
> When unmounting, gfs2 does a full dlm_unlock operation on every
> cached lock.  This can create a very large amount of work and can
> take a long time to complete.  However, the vast majority of these
> dlm unlock operations are unnecessary because after all the unlocks
> are done, gfs2 leaves the dlm lockspace, which automatically clears
> the locks of the leaving node, without unlocking each one individually.
> So, gfs2 can skip explicit dlm unlocks, and use dlm_release_lockspace to
> remove the locks implicitly.  The one exception is when the lock's lvb is
> being used.  In this case, dlm_unlock is called because it may update the
> lvb of the resource.
> 
> Signed-off-by: David Teigland <teigland@redhat.com>
> ---
>  fs/gfs2/glock.c    |    1 +
>  fs/gfs2/incore.h   |    1 +
>  fs/gfs2/lock_dlm.c |    8 ++++++++
>  3 files changed, 10 insertions(+)
> 
> diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
> index e6c2fd5..f3a5edb 100644
> --- a/fs/gfs2/glock.c
> +++ b/fs/gfs2/glock.c
> @@ -1528,6 +1528,7 @@ static void dump_glock_func(struct gfs2_glock *gl)
>  
>  void gfs2_gl_hash_clear(struct gfs2_sbd *sdp)
>  {
> +	set_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags);
>  	glock_hash_walk(clear_glock, sdp);
>  	flush_workqueue(glock_workqueue);
>  	wait_event(sdp->sd_glock_wait, atomic_read(&sdp->sd_glock_disposal) == 0);
> diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
> index 3d469d3..67a39cf 100644
> --- a/fs/gfs2/incore.h
> +++ b/fs/gfs2/incore.h
> @@ -539,6 +539,7 @@ enum {
>  	SDF_DEMOTE		= 5,
>  	SDF_NOJOURNALID		= 6,
>  	SDF_RORECOVERY		= 7, /* read only recovery */
> +	SDF_SKIP_DLM_UNLOCK	= 8,
>  };
>  
>  #define GFS2_FSNAME_LEN		256
> diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
> index 0fb6539..f6504d3 100644
> --- a/fs/gfs2/lock_dlm.c
> +++ b/fs/gfs2/lock_dlm.c
> @@ -289,6 +289,14 @@ static void gdlm_put_lock(struct gfs2_glock *gl)
>  	gfs2_glstats_inc(gl, GFS2_LKS_DCOUNT);
>  	gfs2_sbstats_inc(gl, GFS2_LKS_DCOUNT);
>  	gfs2_update_request_times(gl);
> +
> +	/* don't want to skip dlm_unlock writing the lvb when lock is ex */
> +	if (test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags) &&
> +	    gl->gl_state != LM_ST_EXCLUSIVE) {
> +		gfs2_glock_free(gl);
> +		return;
> +	}
> +
>  	error = dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_VALBLK,
>  			   NULL, gl);
>  	if (error) {




^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2012-11-14 10:38 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-11-07 19:14 [Cluster-devel] gfs2: skip dlm_unlock calls in unmount David Teigland
2012-11-08 10:26 ` Steven Whitehouse
2012-11-08 15:41   ` David Teigland
2012-11-08 18:48     ` Steven Whitehouse
2012-11-08 19:59       ` David Teigland
2012-11-08 21:34         ` [Cluster-devel] [PATCH] " David Teigland
2012-11-09  9:45           ` Steven Whitehouse
2012-11-09 15:30             ` David Teigland
2012-11-12 10:44               ` Steven Whitehouse
2012-11-12 15:46                 ` David Teigland
2012-11-13 15:58           ` David Teigland
2012-11-14 10:38             ` Steven Whitehouse
2012-11-09 14:55         ` [Cluster-devel] " Steven Whitehouse

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.