All of lore.kernel.org
 help / color / mirror / Atom feed
* [Ocfs2-devel] [PATCH] Wakeup down-convert thread just after clearing OCFS2_LOCK_UPCONVERT_FINISHING -v3
@ 2011-09-23  0:49 Wengang Wang
  2011-09-23  0:53 ` Sunil Mushran
  0 siblings, 1 reply; 9+ messages in thread
From: Wengang Wang @ 2011-09-23  0:49 UTC (permalink / raw)
  To: ocfs2-devel

When the lockres state UPCONVERT_FINISHING is cleared,
we should wake up the downconvert thread incase that lockres
is in the blocked queue. Currently we are not doing so and thus
are at the mercy of another event waking up the dc thread.

Signed-off-by: Wengang Wang <wen.gang.wang@oracle.com>
---
 fs/ocfs2/dlmglue.c |    9 ++++++++-
 1 files changed, 8 insertions(+), 1 deletions(-)

diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 7642d7c..524bd88 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -1195,6 +1195,7 @@ static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
 						int convert)
 {
 	unsigned long flags;
+	int kick_dc;
 
 	spin_lock_irqsave(&lockres->l_lock, flags);
 	lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
@@ -1203,9 +1204,12 @@ static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
 		lockres->l_action = OCFS2_AST_INVALID;
 	else
 		lockres->l_unlock_action = OCFS2_UNLOCK_INVALID;
+	kick_dc = (lockres->l_flags & OCFS2_LOCK_QUEUED);
 	spin_unlock_irqrestore(&lockres->l_lock, flags);
 
 	wake_up(&lockres->l_event);
+	if (kick_dc)
+		ocfs2_wake_downconvert_thread(ocfs2_get_lockres_osb(lockres));
 }
 
 /* Note: If we detect another process working on the lock (i.e.,
@@ -1373,6 +1377,7 @@ static int __ocfs2_cluster_lock(struct ocfs2_super *osb,
 	unsigned long flags;
 	unsigned int gen;
 	int noqueue_attempted = 0;
+	int kick_dc;
 
 	ocfs2_init_mask_waiter(&mw);
 
@@ -1500,8 +1505,10 @@ update_holders:
 	ret = 0;
 unlock:
 	lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
-
+	kick_dc = (lockres->l_flags & OCFS2_LOCK_QUEUED);
 	spin_unlock_irqrestore(&lockres->l_lock, flags);
+	if (kick_dc)
+		ocfs2_wake_downconvert_thread(osb);
 out:
 	/*
 	 * This is helping work around a lock inversion between the page lock
-- 
1.7.5.2

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [Ocfs2-devel] [PATCH] Wakeup down-convert thread just after clearing OCFS2_LOCK_UPCONVERT_FINISHING -v3
  2011-09-23  0:49 [Ocfs2-devel] [PATCH] Wakeup down-convert thread just after clearing OCFS2_LOCK_UPCONVERT_FINISHING -v3 Wengang Wang
@ 2011-09-23  0:53 ` Sunil Mushran
  0 siblings, 0 replies; 9+ messages in thread
From: Sunil Mushran @ 2011-09-23  0:53 UTC (permalink / raw)
  To: ocfs2-devel

Acked-by: Sunil Mushran <sunil.mushran@oracle.com>

On 09/22/2011 05:49 PM, Wengang Wang wrote:
> When the lockres state UPCONVERT_FINISHING is cleared,
> we should wake up the downconvert thread incase that lockres
> is in the blocked queue. Currently we are not doing so and thus
> are at the mercy of another event waking up the dc thread.
>
> Signed-off-by: Wengang Wang<wen.gang.wang@oracle.com>
> ---
>   fs/ocfs2/dlmglue.c |    9 ++++++++-
>   1 files changed, 8 insertions(+), 1 deletions(-)
>
> diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
> index 7642d7c..524bd88 100644
> --- a/fs/ocfs2/dlmglue.c
> +++ b/fs/ocfs2/dlmglue.c
> @@ -1195,6 +1195,7 @@ static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
>   						int convert)
>   {
>   	unsigned long flags;
> +	int kick_dc;
>
>   	spin_lock_irqsave(&lockres->l_lock, flags);
>   	lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
> @@ -1203,9 +1204,12 @@ static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
>   		lockres->l_action = OCFS2_AST_INVALID;
>   	else
>   		lockres->l_unlock_action = OCFS2_UNLOCK_INVALID;
> +	kick_dc = (lockres->l_flags&  OCFS2_LOCK_QUEUED);
>   	spin_unlock_irqrestore(&lockres->l_lock, flags);
>
>   	wake_up(&lockres->l_event);
> +	if (kick_dc)
> +		ocfs2_wake_downconvert_thread(ocfs2_get_lockres_osb(lockres));
>   }
>
>   /* Note: If we detect another process working on the lock (i.e.,
> @@ -1373,6 +1377,7 @@ static int __ocfs2_cluster_lock(struct ocfs2_super *osb,
>   	unsigned long flags;
>   	unsigned int gen;
>   	int noqueue_attempted = 0;
> +	int kick_dc;
>
>   	ocfs2_init_mask_waiter(&mw);
>
> @@ -1500,8 +1505,10 @@ update_holders:
>   	ret = 0;
>   unlock:
>   	lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
> -
> +	kick_dc = (lockres->l_flags&  OCFS2_LOCK_QUEUED);
>   	spin_unlock_irqrestore(&lockres->l_lock, flags);
> +	if (kick_dc)
> +		ocfs2_wake_downconvert_thread(osb);
>   out:
>   	/*
>   	 * This is helping work around a lock inversion between the page lock

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Ocfs2-devel] [PATCH] Wakeup down-convert thread just after clearing OCFS2_LOCK_UPCONVERT_FINISHING -v3
  2011-09-15  3:27 Wengang Wang
  2011-09-15 17:15 ` Sunil Mushran
@ 2011-11-17  9:33 ` Joel Becker
  1 sibling, 0 replies; 9+ messages in thread
From: Joel Becker @ 2011-11-17  9:33 UTC (permalink / raw)
  To: ocfs2-devel

On Thu, Sep 15, 2011 at 11:27:21AM +0800, Wengang Wang wrote:
> When the lockres state UPCONVERT_FINISHING is cleared,
> we should wake up the downconvert thread incase that lockres
> is in the blocked queue. Currently we are not doing so and thus
> are at the mercy of another event waking up the dc thread.
> 
> Signed-off-by: Wengang Wang <wen.gang.wang@oracle.com>
> ---
>  fs/ocfs2/dlmglue.c |    9 ++++++++-
>  1 files changed, 8 insertions(+), 1 deletions(-)
> 
> diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
> index 7642d7c..524bd88 100644
> --- a/fs/ocfs2/dlmglue.c
> +++ b/fs/ocfs2/dlmglue.c
> @@ -1195,6 +1195,7 @@ static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
>  						int convert)
>  {
>  	unsigned long flags;
> +	int kick_dc;
>  
>  	spin_lock_irqsave(&lockres->l_lock, flags);
>  	lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
> @@ -1203,9 +1204,12 @@ static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
>  		lockres->l_action = OCFS2_AST_INVALID;
>  	else
>  		lockres->l_unlock_action = OCFS2_UNLOCK_INVALID;
> +	kick_dc = (lockres->l_flags & OCFS2_LOCK_QUEUED);

	Point 1, you should be checking BLOCKED, which is how we
determine a lock that wants to downconvert.

>  	spin_unlock_irqrestore(&lockres->l_lock, flags);
>  
>  	wake_up(&lockres->l_event);
> +	if (kick_dc)
> +		ocfs2_wake_downconvert_thread(ocfs2_get_lockres_osb(lockres));

	You're waking up the downconvert thread, but you don't know if
it needs it.  You think it *might* need it.  That's not OK in this code.

>  }
>  
>  /* Note: If we detect another process working on the lock (i.e.,
> @@ -1373,6 +1377,7 @@ static int __ocfs2_cluster_lock(struct ocfs2_super *osb,
>  	unsigned long flags;
>  	unsigned int gen;
>  	int noqueue_attempted = 0;
> +	int kick_dc;
>  
>  	ocfs2_init_mask_waiter(&mw);
>  
> @@ -1500,8 +1505,10 @@ update_holders:
>  	ret = 0;
>  unlock:
>  	lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
> -
> +	kick_dc = (lockres->l_flags & OCFS2_LOCK_QUEUED);
>  	spin_unlock_irqrestore(&lockres->l_lock, flags);
> +	if (kick_dc)
> +		ocfs2_wake_downconvert_thread(osb);

	Same here.  Frankly, if we were upconverting, the holder is
going to eventually unlock, which will wake the downconvert thread.
That's how the machine works.  See the comment where we set
UPCONVERT_FINISHING.  We want the upconverter to have enough quantum to
get work done.
	You're going to break that, because this thread might want to
get to EX, and it runs before the older thread updates its hold on the
PR.

	Thread 1		Thread 2		Remote
        Asks for PR
	Master grants PR
							Asks for EX,
							setting BLOCKING
							on our node.
	AST sets UPCONVERT
				Asks for EX, sees
				blocking on Remote
				[kicks dc thread]
				clears UPCONVERT
-->
        _cluster_lock wakes,
        clears UPCONVERT,
        increments holders.

The part in [] is what you are changing.  The --> is where the dc thread
can steal the lock before Thread 1 increments the holder count.  We're
right back at the livelock that UPCONVERT_FINISHING was supposed to
prevent.
	I note that Thread 2 can still clear UPCONVERT and be caught by
the dc thread.  But because we're not waking the dc thread, it's a very
unlikely race.  If you wake the dc thread, it's a guaranteed race.
Also, in the current code, while it might race this time around, it
doesn't the next time.  No livelock.
	This is why Sunil said this is a very sensitive state machine.
	Can I get the info on the problem you actually encountered?  I
find it hard to believe the upconverter didn't eventually release his
hold.  That should wake the dc thread.

Joel

-- 

"But all my words come back to me
 In shades of mediocrity.
 Like emptiness in harmony
 I need someone to comfort me."

			http://www.jlbec.org/
			jlbec at evilplan.org

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Ocfs2-devel] [PATCH] Wakeup down-convert thread just after clearing OCFS2_LOCK_UPCONVERT_FINISHING -v3
  2011-09-16  0:53       ` Sunil Mushran
@ 2011-09-16  0:57         ` Wengang Wang
  0 siblings, 0 replies; 9+ messages in thread
From: Wengang Wang @ 2011-09-16  0:57 UTC (permalink / raw)
  To: ocfs2-devel

On 11-09-15 17:53, Sunil Mushran wrote:
> On 09/15/2011 05:42 PM, Wengang Wang wrote:
> >Hi Sunil,
> >
> >On 11-09-15 10:21, Sunil Mushran wrote:
> >>http://people.redhat.com/~teigland/make_panic
> >>
> >>This test has been useful in exposing dlmglue issues.
> >>
> >>On 09/15/2011 10:15 AM, Sunil Mushran wrote:
> >>>I am fine with the kick in recover from dlm error. Not so in cluster lock.
> >>>We have to be very very sure before meddling with that function. It is
> >>>a state machine with many hidden gotchas.
> >>>
> >>>So is this patch for a bug encountered or just code audit. Also, what kind
> >>>testing has been done.
> >Yes, a bug(hang) encountered. After testing and analysis, I found the root cause.
> >And it's NOT the dlm error case. The hang was fixed by the original patch.
> >I will email you about the orabug and testcase(pretty easy to reproduce).
> 
> 
> Which original patch?

The first drop of this patch.

thanks,
wengang.
> 
> 
> >I will try http://people.redhat.com/~teigland/make_panic to see if it
> >can find something.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Ocfs2-devel] [PATCH] Wakeup down-convert thread just after clearing OCFS2_LOCK_UPCONVERT_FINISHING -v3
  2011-09-16  0:42     ` Wengang Wang
@ 2011-09-16  0:53       ` Sunil Mushran
  2011-09-16  0:57         ` Wengang Wang
  0 siblings, 1 reply; 9+ messages in thread
From: Sunil Mushran @ 2011-09-16  0:53 UTC (permalink / raw)
  To: ocfs2-devel

On 09/15/2011 05:42 PM, Wengang Wang wrote:
> Hi Sunil,
>
> On 11-09-15 10:21, Sunil Mushran wrote:
>> http://people.redhat.com/~teigland/make_panic
>>
>> This test has been useful in exposing dlmglue issues.
>>
>> On 09/15/2011 10:15 AM, Sunil Mushran wrote:
>>> I am fine with the kick in recover from dlm error. Not so in cluster lock.
>>> We have to be very very sure before meddling with that function. It is
>>> a state machine with many hidden gotchas.
>>>
>>> So is this patch for a bug encountered or just code audit. Also, what kind
>>> testing has been done.
> Yes, a bug(hang) encountered. After testing and analysis, I found the root cause.
> And it's NOT the dlm error case. The hang was fixed by the original patch.
> I will email you about the orabug and testcase(pretty easy to reproduce).


Which original patch?


> I will try http://people.redhat.com/~teigland/make_panic to see if it
> can find something.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Ocfs2-devel] [PATCH] Wakeup down-convert thread just after clearing OCFS2_LOCK_UPCONVERT_FINISHING -v3
  2011-09-15 17:21   ` Sunil Mushran
@ 2011-09-16  0:42     ` Wengang Wang
  2011-09-16  0:53       ` Sunil Mushran
  0 siblings, 1 reply; 9+ messages in thread
From: Wengang Wang @ 2011-09-16  0:42 UTC (permalink / raw)
  To: ocfs2-devel

Hi Sunil,

On 11-09-15 10:21, Sunil Mushran wrote:
> http://people.redhat.com/~teigland/make_panic
> 
> This test has been useful in exposing dlmglue issues.
> 
> On 09/15/2011 10:15 AM, Sunil Mushran wrote:
> >I am fine with the kick in recover from dlm error. Not so in cluster lock.
> >We have to be very very sure before meddling with that function. It is
> >a state machine with many hidden gotchas.
> >
> >So is this patch for a bug encountered or just code audit. Also, what kind
> >testing has been done.

Yes, a bug(hang) encountered. After testing and analysis, I found the root cause.
And it's NOT the dlm error case. The hang was fixed by the original patch.
I will email you about the orabug and testcase(pretty easy to reproduce).

I will try http://people.redhat.com/~teigland/make_panic to see if it
can find something.

thanks,
wengang.

> >
> >On 09/14/2011 08:27 PM, Wengang Wang wrote:
> >>When the lockres state UPCONVERT_FINISHING is cleared,
> >>we should wake up the downconvert thread incase that lockres
> >>is in the blocked queue. Currently we are not doing so and thus
> >>are at the mercy of another event waking up the dc thread.
> >>
> >>Signed-off-by: Wengang Wang<wen.gang.wang@oracle.com>
> >>---
> >>   fs/ocfs2/dlmglue.c |    9 ++++++++-
> >>   1 files changed, 8 insertions(+), 1 deletions(-)
> >>
> >>diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
> >>index 7642d7c..524bd88 100644
> >>--- a/fs/ocfs2/dlmglue.c
> >>+++ b/fs/ocfs2/dlmglue.c
> >>@@ -1195,6 +1195,7 @@ static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
> >>   						int convert)
> >>   {
> >>   	unsigned long flags;
> >>+	int kick_dc;
> >>
> >>   	spin_lock_irqsave(&lockres->l_lock, flags);
> >>   	lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
> >>@@ -1203,9 +1204,12 @@ static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
> >>   		lockres->l_action = OCFS2_AST_INVALID;
> >>   	else
> >>   		lockres->l_unlock_action = OCFS2_UNLOCK_INVALID;
> >>+	kick_dc = (lockres->l_flags&   OCFS2_LOCK_QUEUED);
> >>   	spin_unlock_irqrestore(&lockres->l_lock, flags);
> >>
> >>   	wake_up(&lockres->l_event);
> >>+	if (kick_dc)
> >>+		ocfs2_wake_downconvert_thread(ocfs2_get_lockres_osb(lockres));
> >>   }
> >>
> >>   /* Note: If we detect another process working on the lock (i.e.,
> >>@@ -1373,6 +1377,7 @@ static int __ocfs2_cluster_lock(struct ocfs2_super *osb,
> >>   	unsigned long flags;
> >>   	unsigned int gen;
> >>   	int noqueue_attempted = 0;
> >>+	int kick_dc;
> >>
> >>   	ocfs2_init_mask_waiter(&mw);
> >>
> >>@@ -1500,8 +1505,10 @@ update_holders:
> >>   	ret = 0;
> >>   unlock:
> >>   	lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
> >>-
> >>+	kick_dc = (lockres->l_flags&   OCFS2_LOCK_QUEUED);
> >>   	spin_unlock_irqrestore(&lockres->l_lock, flags);
> >>+	if (kick_dc)
> >>+		ocfs2_wake_downconvert_thread(osb);
> >>   out:
> >>   	/*
> >>   	 * This is helping work around a lock inversion between the page lock
> >
> >_______________________________________________
> >Ocfs2-devel mailing list
> >Ocfs2-devel at oss.oracle.com
> >http://oss.oracle.com/mailman/listinfo/ocfs2-devel
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Ocfs2-devel] [PATCH] Wakeup down-convert thread just after clearing OCFS2_LOCK_UPCONVERT_FINISHING -v3
  2011-09-15 17:15 ` Sunil Mushran
@ 2011-09-15 17:21   ` Sunil Mushran
  2011-09-16  0:42     ` Wengang Wang
  0 siblings, 1 reply; 9+ messages in thread
From: Sunil Mushran @ 2011-09-15 17:21 UTC (permalink / raw)
  To: ocfs2-devel

http://people.redhat.com/~teigland/make_panic

This test has been useful in exposing dlmglue issues.

On 09/15/2011 10:15 AM, Sunil Mushran wrote:
> I am fine with the kick in recover from dlm error. Not so in cluster lock.
> We have to be very very sure before meddling with that function. It is
> a state machine with many hidden gotchas.
>
> So is this patch for a bug encountered or just code audit. Also, what kind
> testing has been done.
>
> On 09/14/2011 08:27 PM, Wengang Wang wrote:
>> When the lockres state UPCONVERT_FINISHING is cleared,
>> we should wake up the downconvert thread incase that lockres
>> is in the blocked queue. Currently we are not doing so and thus
>> are at the mercy of another event waking up the dc thread.
>>
>> Signed-off-by: Wengang Wang<wen.gang.wang@oracle.com>
>> ---
>>    fs/ocfs2/dlmglue.c |    9 ++++++++-
>>    1 files changed, 8 insertions(+), 1 deletions(-)
>>
>> diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
>> index 7642d7c..524bd88 100644
>> --- a/fs/ocfs2/dlmglue.c
>> +++ b/fs/ocfs2/dlmglue.c
>> @@ -1195,6 +1195,7 @@ static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
>>    						int convert)
>>    {
>>    	unsigned long flags;
>> +	int kick_dc;
>>
>>    	spin_lock_irqsave(&lockres->l_lock, flags);
>>    	lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
>> @@ -1203,9 +1204,12 @@ static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
>>    		lockres->l_action = OCFS2_AST_INVALID;
>>    	else
>>    		lockres->l_unlock_action = OCFS2_UNLOCK_INVALID;
>> +	kick_dc = (lockres->l_flags&   OCFS2_LOCK_QUEUED);
>>    	spin_unlock_irqrestore(&lockres->l_lock, flags);
>>
>>    	wake_up(&lockres->l_event);
>> +	if (kick_dc)
>> +		ocfs2_wake_downconvert_thread(ocfs2_get_lockres_osb(lockres));
>>    }
>>
>>    /* Note: If we detect another process working on the lock (i.e.,
>> @@ -1373,6 +1377,7 @@ static int __ocfs2_cluster_lock(struct ocfs2_super *osb,
>>    	unsigned long flags;
>>    	unsigned int gen;
>>    	int noqueue_attempted = 0;
>> +	int kick_dc;
>>
>>    	ocfs2_init_mask_waiter(&mw);
>>
>> @@ -1500,8 +1505,10 @@ update_holders:
>>    	ret = 0;
>>    unlock:
>>    	lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
>> -
>> +	kick_dc = (lockres->l_flags&   OCFS2_LOCK_QUEUED);
>>    	spin_unlock_irqrestore(&lockres->l_lock, flags);
>> +	if (kick_dc)
>> +		ocfs2_wake_downconvert_thread(osb);
>>    out:
>>    	/*
>>    	 * This is helping work around a lock inversion between the page lock
>
> _______________________________________________
> Ocfs2-devel mailing list
> Ocfs2-devel at oss.oracle.com
> http://oss.oracle.com/mailman/listinfo/ocfs2-devel

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Ocfs2-devel] [PATCH] Wakeup down-convert thread just after clearing OCFS2_LOCK_UPCONVERT_FINISHING -v3
  2011-09-15  3:27 Wengang Wang
@ 2011-09-15 17:15 ` Sunil Mushran
  2011-09-15 17:21   ` Sunil Mushran
  2011-11-17  9:33 ` Joel Becker
  1 sibling, 1 reply; 9+ messages in thread
From: Sunil Mushran @ 2011-09-15 17:15 UTC (permalink / raw)
  To: ocfs2-devel

I am fine with the kick in recover from dlm error. Not so in cluster lock.
We have to be very very sure before meddling with that function. It is
a state machine with many hidden gotchas.

So is this patch for a bug encountered or just code audit. Also, what kind
testing has been done.

On 09/14/2011 08:27 PM, Wengang Wang wrote:
> When the lockres state UPCONVERT_FINISHING is cleared,
> we should wake up the downconvert thread incase that lockres
> is in the blocked queue. Currently we are not doing so and thus
> are at the mercy of another event waking up the dc thread.
>
> Signed-off-by: Wengang Wang<wen.gang.wang@oracle.com>
> ---
>   fs/ocfs2/dlmglue.c |    9 ++++++++-
>   1 files changed, 8 insertions(+), 1 deletions(-)
>
> diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
> index 7642d7c..524bd88 100644
> --- a/fs/ocfs2/dlmglue.c
> +++ b/fs/ocfs2/dlmglue.c
> @@ -1195,6 +1195,7 @@ static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
>   						int convert)
>   {
>   	unsigned long flags;
> +	int kick_dc;
>
>   	spin_lock_irqsave(&lockres->l_lock, flags);
>   	lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
> @@ -1203,9 +1204,12 @@ static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
>   		lockres->l_action = OCFS2_AST_INVALID;
>   	else
>   		lockres->l_unlock_action = OCFS2_UNLOCK_INVALID;
> +	kick_dc = (lockres->l_flags&  OCFS2_LOCK_QUEUED);
>   	spin_unlock_irqrestore(&lockres->l_lock, flags);
>
>   	wake_up(&lockres->l_event);
> +	if (kick_dc)
> +		ocfs2_wake_downconvert_thread(ocfs2_get_lockres_osb(lockres));
>   }
>
>   /* Note: If we detect another process working on the lock (i.e.,
> @@ -1373,6 +1377,7 @@ static int __ocfs2_cluster_lock(struct ocfs2_super *osb,
>   	unsigned long flags;
>   	unsigned int gen;
>   	int noqueue_attempted = 0;
> +	int kick_dc;
>
>   	ocfs2_init_mask_waiter(&mw);
>
> @@ -1500,8 +1505,10 @@ update_holders:
>   	ret = 0;
>   unlock:
>   	lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
> -
> +	kick_dc = (lockres->l_flags&  OCFS2_LOCK_QUEUED);
>   	spin_unlock_irqrestore(&lockres->l_lock, flags);
> +	if (kick_dc)
> +		ocfs2_wake_downconvert_thread(osb);
>   out:
>   	/*
>   	 * This is helping work around a lock inversion between the page lock

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Ocfs2-devel] [PATCH] Wakeup down-convert thread just after clearing OCFS2_LOCK_UPCONVERT_FINISHING -v3
@ 2011-09-15  3:27 Wengang Wang
  2011-09-15 17:15 ` Sunil Mushran
  2011-11-17  9:33 ` Joel Becker
  0 siblings, 2 replies; 9+ messages in thread
From: Wengang Wang @ 2011-09-15  3:27 UTC (permalink / raw)
  To: ocfs2-devel

When the lockres state UPCONVERT_FINISHING is cleared,
we should wake up the downconvert thread incase that lockres
is in the blocked queue. Currently we are not doing so and thus
are at the mercy of another event waking up the dc thread.

Signed-off-by: Wengang Wang <wen.gang.wang@oracle.com>
---
 fs/ocfs2/dlmglue.c |    9 ++++++++-
 1 files changed, 8 insertions(+), 1 deletions(-)

diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 7642d7c..524bd88 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -1195,6 +1195,7 @@ static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
 						int convert)
 {
 	unsigned long flags;
+	int kick_dc;
 
 	spin_lock_irqsave(&lockres->l_lock, flags);
 	lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
@@ -1203,9 +1204,12 @@ static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
 		lockres->l_action = OCFS2_AST_INVALID;
 	else
 		lockres->l_unlock_action = OCFS2_UNLOCK_INVALID;
+	kick_dc = (lockres->l_flags & OCFS2_LOCK_QUEUED);
 	spin_unlock_irqrestore(&lockres->l_lock, flags);
 
 	wake_up(&lockres->l_event);
+	if (kick_dc)
+		ocfs2_wake_downconvert_thread(ocfs2_get_lockres_osb(lockres));
 }
 
 /* Note: If we detect another process working on the lock (i.e.,
@@ -1373,6 +1377,7 @@ static int __ocfs2_cluster_lock(struct ocfs2_super *osb,
 	unsigned long flags;
 	unsigned int gen;
 	int noqueue_attempted = 0;
+	int kick_dc;
 
 	ocfs2_init_mask_waiter(&mw);
 
@@ -1500,8 +1505,10 @@ update_holders:
 	ret = 0;
 unlock:
 	lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
-
+	kick_dc = (lockres->l_flags & OCFS2_LOCK_QUEUED);
 	spin_unlock_irqrestore(&lockres->l_lock, flags);
+	if (kick_dc)
+		ocfs2_wake_downconvert_thread(osb);
 out:
 	/*
 	 * This is helping work around a lock inversion between the page lock
-- 
1.7.5.2

^ permalink raw reply related	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2011-11-17  9:33 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-09-23  0:49 [Ocfs2-devel] [PATCH] Wakeup down-convert thread just after clearing OCFS2_LOCK_UPCONVERT_FINISHING -v3 Wengang Wang
2011-09-23  0:53 ` Sunil Mushran
  -- strict thread matches above, loose matches on Subject: below --
2011-09-15  3:27 Wengang Wang
2011-09-15 17:15 ` Sunil Mushran
2011-09-15 17:21   ` Sunil Mushran
2011-09-16  0:42     ` Wengang Wang
2011-09-16  0:53       ` Sunil Mushran
2011-09-16  0:57         ` Wengang Wang
2011-11-17  9:33 ` Joel Becker

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.