All of lore.kernel.org
 help / color / mirror / Atom feed
* [Ocfs2-devel] [PATCH 1/2] ocfs2 fix o2dlm dlm run purgelist
@ 2010-06-16  4:43 Srinivas Eeda
  2010-06-16  4:43 ` [Ocfs2-devel] [PATCH 2/2] ocfs2: o2dlm fix race in purge lockres and newlock (orabug 9094491) Srinivas Eeda
                   ` (3 more replies)
  0 siblings, 4 replies; 24+ messages in thread
From: Srinivas Eeda @ 2010-06-16  4:43 UTC (permalink / raw)
  To: ocfs2-devel

There are two problems in dlm_run_purgelist

1. If a lockres is found to be in use, dlm_run_purgelist keeps trying to purge
the same lockres instead of trying the next lockres.

2. When a lockres is found unused, dlm_run_purgelist releases lockres spinlock
before setting DLM_LOCK_RES_DROPPING_REF and calls dlm_purge_lockres.
spinlock is reacquired but in this window lockres can get reused. This leads
to BUG.

This patch modifies dlm_run_purgelist to skip lockres if it's in use and purge
 next lockres. It also sets DLM_LOCK_RES_DROPPING_REF before releasing the
lockres spinlock protecting it from getting reused.

Signed-off-by: Srinivas Eeda <srinivas.eeda@oracle.com>
---
 fs/ocfs2/dlm/dlmthread.c |  125 +++++++++++++++++++++++-----------------------
 1 files changed, 63 insertions(+), 62 deletions(-)

diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
index 11a6d1f..fb0be6c 100644
--- a/fs/ocfs2/dlm/dlmthread.c
+++ b/fs/ocfs2/dlm/dlmthread.c
@@ -158,39 +158,17 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm,
 	int master;
 	int ret = 0;
 
-	spin_lock(&res->spinlock);
-	if (!__dlm_lockres_unused(res)) {
-		mlog(0, "%s:%.*s: tried to purge but not unused\n",
-		     dlm->name, res->lockname.len, res->lockname.name);
-		__dlm_print_one_lock_resource(res);
-		spin_unlock(&res->spinlock);
-		BUG();
-	}
-
-	if (res->state & DLM_LOCK_RES_MIGRATING) {
-		mlog(0, "%s:%.*s: Delay dropref as this lockres is "
-		     "being remastered\n", dlm->name, res->lockname.len,
-		     res->lockname.name);
-		/* Re-add the lockres to the end of the purge list */
-		if (!list_empty(&res->purge)) {
-			list_del_init(&res->purge);
-			list_add_tail(&res->purge, &dlm->purge_list);
-		}
-		spin_unlock(&res->spinlock);
-		return 0;
-	}
-
 	master = (res->owner == dlm->node_num);
 
 	if (!master)
 		res->state |= DLM_LOCK_RES_DROPPING_REF;
-	spin_unlock(&res->spinlock);
 
 	mlog(0, "purging lockres %.*s, master = %d\n", res->lockname.len,
 	     res->lockname.name, master);
 
 	if (!master) {
 		/* drop spinlock...  retake below */
+		spin_unlock(&res->spinlock);
 		spin_unlock(&dlm->spinlock);
 
 		spin_lock(&res->spinlock);
@@ -208,48 +186,37 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm,
 		mlog(0, "%s:%.*s: dlm_deref_lockres returned %d\n",
 		     dlm->name, res->lockname.len, res->lockname.name, ret);
 		spin_lock(&dlm->spinlock);
+		spin_lock(&res->spinlock);
 	}
 
-	spin_lock(&res->spinlock);
-	if (!list_empty(&res->purge)) {
-		mlog(0, "removing lockres %.*s:%p from purgelist, "
-		     "master = %d\n", res->lockname.len, res->lockname.name,
-		     res, master);
-		list_del_init(&res->purge);
-		spin_unlock(&res->spinlock);
-		dlm_lockres_put(res);
-		dlm->purge_count--;
-	} else
-		spin_unlock(&res->spinlock);
-
-	__dlm_unhash_lockres(res);
-
 	/* lockres is not in the hash now.  drop the flag and wake up
 	 * any processes waiting in dlm_get_lock_resource. */
-	if (!master) {
-		spin_lock(&res->spinlock);
+	if (!master)
 		res->state &= ~DLM_LOCK_RES_DROPPING_REF;
-		spin_unlock(&res->spinlock);
-		wake_up(&res->wq);
-	}
 	return 0;
 }
 
 static void dlm_run_purge_list(struct dlm_ctxt *dlm,
 			       int purge_now)
 {
-	unsigned int run_max, unused;
+	unsigned int run_max;
 	unsigned long purge_jiffies;
 	struct dlm_lock_resource *lockres;
+	struct dlm_lock_resource *nextres;
 
 	spin_lock(&dlm->spinlock);
 	run_max = dlm->purge_count;
 
-	while(run_max && !list_empty(&dlm->purge_list)) {
-		run_max--;
+	if (list_empty(&dlm->purge_list)) {
+		spin_unlock(&dlm->spinlock);
+		return;
+	}
+
+	lockres = list_entry(dlm->purge_list.next,
+			     struct dlm_lock_resource, purge);
 
-		lockres = list_entry(dlm->purge_list.next,
-				     struct dlm_lock_resource, purge);
+	while(run_max && lockres && !list_empty(&dlm->purge_list)) {
+		run_max--;
 
 		/* Status of the lockres *might* change so double
 		 * check. If the lockres is unused, holding the dlm
@@ -257,15 +224,12 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm,
 		 * refs on it -- there's no need to keep the lockres
 		 * spinlock. */
 		spin_lock(&lockres->spinlock);
-		unused = __dlm_lockres_unused(lockres);
-		spin_unlock(&lockres->spinlock);
-
-		if (!unused)
-			continue;
 
 		purge_jiffies = lockres->last_used +
 			msecs_to_jiffies(DLM_PURGE_INTERVAL_MS);
 
+		mlog(0, "purging lockres %.*s\n", lockres->lockname.len,
+		     lockres->lockname.name);
 		/* Make sure that we want to be processing this guy at
 		 * this time. */
 		if (!purge_now && time_after(purge_jiffies, jiffies)) {
@@ -273,20 +237,57 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm,
 			 * in tail order, we can stop at the first
 			 * unpurgable resource -- anyone added after
 			 * him will have a greater last_used value */
+			spin_unlock(&lockres->spinlock);
 			break;
 		}
 
-		dlm_lockres_get(lockres);
-
+		/* If lockres is being used, or migrating purge next lockres */
+		if (!__dlm_lockres_unused(lockres) ||
+		    (lockres->state & DLM_LOCK_RES_MIGRATING)) {
+			if (!list_is_last(&lockres->purge, &dlm->purge_list))
+				nextres = list_entry(lockres->purge.next,
+					     struct dlm_lock_resource, purge);
+			else
+				nextres = NULL;
+			spin_unlock(&lockres->spinlock);
+			lockres = nextres;
+			continue;
+		}
+		
 		/* This may drop and reacquire the dlm spinlock if it
 		 * has to do migration. */
-		if (dlm_purge_lockres(dlm, lockres))
-			BUG();
-
-		dlm_lockres_put(lockres);
-
-		/* Avoid adding any scheduling latencies */
-		cond_resched_lock(&dlm->spinlock);
+		dlm_purge_lockres(dlm, lockres);
+		
+		/* before we free the lockres we get the next lockres */
+		if (list_empty(&lockres->purge))
+			/* Shouldn't be in this state. Start from beginning */
+			nextres = list_entry(dlm->purge_list.next,
+					     struct dlm_lock_resource, purge);
+		else if (!list_is_last(&lockres->purge, &dlm->purge_list))
+			nextres = list_entry(lockres->purge.next,
+					     struct dlm_lock_resource, purge);
+		else
+			nextres = NULL;
+
+		if (__dlm_lockres_unused(lockres)) {
+			if (!list_empty(&lockres->purge)) {
+				list_del_init(&lockres->purge);
+				dlm->purge_count--;
+			}
+			__dlm_unhash_lockres(lockres);
+			spin_unlock(&lockres->spinlock);
+			wake_up(&lockres->wq);
+			dlm_lockres_put(lockres);
+		} else
+			spin_unlock(&lockres->spinlock);
+		lockres = nextres;
+
+		/* Avoid adding any scheduling latencies. If dlm spinlock is
+		 * dropped, retry again from the beginning as purgelist could
+		 * have been modified */
+		if (cond_resched_lock(&dlm->spinlock))
+			lockres = list_entry(dlm->purge_list.next,
+					     struct dlm_lock_resource, purge);
 	}
 
 	spin_unlock(&dlm->spinlock);
@@ -733,7 +734,7 @@ in_progress:
 			/* unlikely, but we may need to give time to
 			 * other tasks */
 			if (!--n) {
-				mlog(0, "throttling dlm_thread\n");
+				mlog(0, "throttling dlm_thread n=%d\n", n);
 				break;
 			}
 		}
-- 
1.5.6.5

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [Ocfs2-devel] [PATCH 2/2] ocfs2: o2dlm fix race in purge lockres and newlock (orabug 9094491)
  2010-06-16  4:43 [Ocfs2-devel] [PATCH 1/2] ocfs2 fix o2dlm dlm run purgelist Srinivas Eeda
@ 2010-06-16  4:43 ` Srinivas Eeda
  2010-06-18  2:11   ` Sunil Mushran
  2010-06-16  6:06 ` [Ocfs2-devel] [PATCH 1/2] ocfs2 fix o2dlm dlm run purgelist Wengang Wang
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 24+ messages in thread
From: Srinivas Eeda @ 2010-06-16  4:43 UTC (permalink / raw)
  To: ocfs2-devel

This patch fixes the following hole.
dlmlock tries to create a new lock on a lockres that is on purge list. It calls
dlm_get_lockresource and later adds a lock to blocked list. But in this window,
dlm_thread can purge the lockres and unhash it. This will cause a BUG, as when
the AST comes back from the master lockres is not found

This patch marks the lockres with a new state DLM_LOCK_RES_IN_USE which would
protect lockres from dlm_thread purging it.

Signed-off-by: Srinivas Eeda <srinivas.eeda@oracle.com>
---
 fs/ocfs2/dlm/dlmcommon.h |    1 +
 fs/ocfs2/dlm/dlmlock.c   |    4 ++++
 fs/ocfs2/dlm/dlmmaster.c |    5 ++++-
 fs/ocfs2/dlm/dlmthread.c |    1 +
 4 files changed, 10 insertions(+), 1 deletions(-)

diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index 0102be3..6cf3c08 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -279,6 +279,7 @@ static inline void __dlm_set_joining_node(struct dlm_ctxt *dlm,
 #define DLM_LOCK_RES_DIRTY                0x00000008
 #define DLM_LOCK_RES_IN_PROGRESS          0x00000010
 #define DLM_LOCK_RES_MIGRATING            0x00000020
+#define DLM_LOCK_RES_IN_USE               0x00000100
 #define DLM_LOCK_RES_DROPPING_REF         0x00000040
 #define DLM_LOCK_RES_BLOCK_DIRTY          0x00001000
 #define DLM_LOCK_RES_SETREF_INPROG        0x00002000
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c
index 7333377..501ac40 100644
--- a/fs/ocfs2/dlm/dlmlock.c
+++ b/fs/ocfs2/dlm/dlmlock.c
@@ -134,6 +134,8 @@ static enum dlm_status dlmlock_master(struct dlm_ctxt *dlm,
 	if (status != DLM_NORMAL &&
 	    lock->ml.node != dlm->node_num) {
 		/* erf.  state changed after lock was dropped. */
+		/* DLM_LOCK_RES_IN_USE is set in dlm_get_lock_resource */
+		res->state &= ~DLM_LOCK_RES_IN_USE;
 		spin_unlock(&res->spinlock);
 		dlm_error(status);
 		return status;
@@ -180,6 +182,7 @@ static enum dlm_status dlmlock_master(struct dlm_ctxt *dlm,
 			kick_thread = 1;
 		}
 	}
+	res->state &= ~DLM_LOCK_RES_IN_USE;
 	/* reduce the inflight count, this may result in the lockres
 	 * being purged below during calc_usage */
 	if (lock->ml.node == dlm->node_num)
@@ -246,6 +249,7 @@ static enum dlm_status dlmlock_remote(struct dlm_ctxt *dlm,
 
 	spin_lock(&res->spinlock);
 	res->state &= ~DLM_LOCK_RES_IN_PROGRESS;
+	res->state &= ~DLM_LOCK_RES_IN_USE;
 	lock->lock_pending = 0;
 	if (status != DLM_NORMAL) {
 		if (status == DLM_RECOVERING &&
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 9289b43..f0f2d97 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -719,6 +719,7 @@ lookup:
 	if (tmpres) {
 		int dropping_ref = 0;
 
+		tmpres->state |= DLM_LOCK_RES_IN_USE;
 		spin_unlock(&dlm->spinlock);
 
 		spin_lock(&tmpres->spinlock);
@@ -731,8 +732,10 @@ lookup:
 		if (tmpres->owner == dlm->node_num) {
 			BUG_ON(tmpres->state & DLM_LOCK_RES_DROPPING_REF);
 			dlm_lockres_grab_inflight_ref(dlm, tmpres);
-		} else if (tmpres->state & DLM_LOCK_RES_DROPPING_REF)
+		} else if (tmpres->state & DLM_LOCK_RES_DROPPING_REF) {
+			tmpres->state &= ~DLM_LOCK_RES_IN_USE;
 			dropping_ref = 1;
+		}
 		spin_unlock(&tmpres->spinlock);
 
 		/* wait until done messaging the master, drop our ref to allow
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
index fb0be6c..2b82e0b 100644
--- a/fs/ocfs2/dlm/dlmthread.c
+++ b/fs/ocfs2/dlm/dlmthread.c
@@ -93,6 +93,7 @@ int __dlm_lockres_has_locks(struct dlm_lock_resource *res)
 int __dlm_lockres_unused(struct dlm_lock_resource *res)
 {
 	if (!__dlm_lockres_has_locks(res) &&
+	    !(res->state & DLM_LOCK_RES_IN_USE) &&
 	    (list_empty(&res->dirty) && !(res->state & DLM_LOCK_RES_DIRTY))) {
 		/* try not to scan the bitmap unless the first two
 		 * conditions are already true */
-- 
1.5.6.5

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [Ocfs2-devel] [PATCH 1/2] ocfs2 fix o2dlm dlm run purgelist
  2010-06-16  4:43 [Ocfs2-devel] [PATCH 1/2] ocfs2 fix o2dlm dlm run purgelist Srinivas Eeda
  2010-06-16  4:43 ` [Ocfs2-devel] [PATCH 2/2] ocfs2: o2dlm fix race in purge lockres and newlock (orabug 9094491) Srinivas Eeda
@ 2010-06-16  6:06 ` Wengang Wang
  2010-06-17  8:53   ` Srinivas Eeda
  2010-06-17 15:06   ` Sunil Mushran
  2010-06-17  1:39 ` Joel Becker
  2010-06-17  1:44 ` Sunil Mushran
  3 siblings, 2 replies; 24+ messages in thread
From: Wengang Wang @ 2010-06-16  6:06 UTC (permalink / raw)
  To: ocfs2-devel

still the question.
If you have sent DEREF request to the master, and the lockres became in-use
again, then the lockres remains in the hash table and also in the purge list.
So
1) If this node is the last ref, there is a possibility that the master
purged the lockres after receiving DEREF request from this node. In this
case, when this node does dlmlock_remote(), the lockres won't be found on the
master. How to deal with it?

2) The lockres on this node is going to be purged again, it means it will send
secondary DEREFs to the master. This is not good I think.

A thought is setting lockres->owner to DLM_LOCK_RES_OWNER_UNKNOWN after
sending a DEREF request againt this lockres. Also redo master reqeust
before locking on it.

Regards,
wengang.
On 10-06-15 21:43, Srinivas Eeda wrote:
> There are two problems in dlm_run_purgelist
> 
> 1. If a lockres is found to be in use, dlm_run_purgelist keeps trying to purge
> the same lockres instead of trying the next lockres.
> 
> 2. When a lockres is found unused, dlm_run_purgelist releases lockres spinlock
> before setting DLM_LOCK_RES_DROPPING_REF and calls dlm_purge_lockres.
> spinlock is reacquired but in this window lockres can get reused. This leads
> to BUG.
> 
> This patch modifies dlm_run_purgelist to skip lockres if it's in use and purge
>  next lockres. It also sets DLM_LOCK_RES_DROPPING_REF before releasing the
> lockres spinlock protecting it from getting reused.
> 
> Signed-off-by: Srinivas Eeda <srinivas.eeda@oracle.com>
> ---
>  fs/ocfs2/dlm/dlmthread.c |  125 +++++++++++++++++++++++-----------------------
>  1 files changed, 63 insertions(+), 62 deletions(-)
> 
> diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
> index 11a6d1f..fb0be6c 100644
> --- a/fs/ocfs2/dlm/dlmthread.c
> +++ b/fs/ocfs2/dlm/dlmthread.c
> @@ -158,39 +158,17 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm,
>  	int master;
>  	int ret = 0;
>  
> -	spin_lock(&res->spinlock);
> -	if (!__dlm_lockres_unused(res)) {
> -		mlog(0, "%s:%.*s: tried to purge but not unused\n",
> -		     dlm->name, res->lockname.len, res->lockname.name);
> -		__dlm_print_one_lock_resource(res);
> -		spin_unlock(&res->spinlock);
> -		BUG();
> -	}
> -
> -	if (res->state & DLM_LOCK_RES_MIGRATING) {
> -		mlog(0, "%s:%.*s: Delay dropref as this lockres is "
> -		     "being remastered\n", dlm->name, res->lockname.len,
> -		     res->lockname.name);
> -		/* Re-add the lockres to the end of the purge list */
> -		if (!list_empty(&res->purge)) {
> -			list_del_init(&res->purge);
> -			list_add_tail(&res->purge, &dlm->purge_list);
> -		}
> -		spin_unlock(&res->spinlock);
> -		return 0;
> -	}
> -
>  	master = (res->owner == dlm->node_num);
>  
>  	if (!master)
>  		res->state |= DLM_LOCK_RES_DROPPING_REF;
> -	spin_unlock(&res->spinlock);
>  
>  	mlog(0, "purging lockres %.*s, master = %d\n", res->lockname.len,
>  	     res->lockname.name, master);
>  
>  	if (!master) {
>  		/* drop spinlock...  retake below */
> +		spin_unlock(&res->spinlock);
>  		spin_unlock(&dlm->spinlock);
>  
>  		spin_lock(&res->spinlock);
> @@ -208,48 +186,37 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm,
>  		mlog(0, "%s:%.*s: dlm_deref_lockres returned %d\n",
>  		     dlm->name, res->lockname.len, res->lockname.name, ret);
>  		spin_lock(&dlm->spinlock);
> +		spin_lock(&res->spinlock);
>  	}
>  
> -	spin_lock(&res->spinlock);
> -	if (!list_empty(&res->purge)) {
> -		mlog(0, "removing lockres %.*s:%p from purgelist, "
> -		     "master = %d\n", res->lockname.len, res->lockname.name,
> -		     res, master);
> -		list_del_init(&res->purge);
> -		spin_unlock(&res->spinlock);
> -		dlm_lockres_put(res);
> -		dlm->purge_count--;
> -	} else
> -		spin_unlock(&res->spinlock);
> -
> -	__dlm_unhash_lockres(res);
> -
>  	/* lockres is not in the hash now.  drop the flag and wake up
>  	 * any processes waiting in dlm_get_lock_resource. */
> -	if (!master) {
> -		spin_lock(&res->spinlock);
> +	if (!master)
>  		res->state &= ~DLM_LOCK_RES_DROPPING_REF;
> -		spin_unlock(&res->spinlock);
> -		wake_up(&res->wq);
> -	}
>  	return 0;
>  }
>  
>  static void dlm_run_purge_list(struct dlm_ctxt *dlm,
>  			       int purge_now)
>  {
> -	unsigned int run_max, unused;
> +	unsigned int run_max;
>  	unsigned long purge_jiffies;
>  	struct dlm_lock_resource *lockres;
> +	struct dlm_lock_resource *nextres;
>  
>  	spin_lock(&dlm->spinlock);
>  	run_max = dlm->purge_count;
>  
> -	while(run_max && !list_empty(&dlm->purge_list)) {
> -		run_max--;
> +	if (list_empty(&dlm->purge_list)) {
> +		spin_unlock(&dlm->spinlock);
> +		return;
> +	}
> +
> +	lockres = list_entry(dlm->purge_list.next,
> +			     struct dlm_lock_resource, purge);
>  
> -		lockres = list_entry(dlm->purge_list.next,
> -				     struct dlm_lock_resource, purge);
> +	while(run_max && lockres && !list_empty(&dlm->purge_list)) {
> +		run_max--;
>  
>  		/* Status of the lockres *might* change so double
>  		 * check. If the lockres is unused, holding the dlm
> @@ -257,15 +224,12 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm,
>  		 * refs on it -- there's no need to keep the lockres
>  		 * spinlock. */
>  		spin_lock(&lockres->spinlock);
> -		unused = __dlm_lockres_unused(lockres);
> -		spin_unlock(&lockres->spinlock);
> -
> -		if (!unused)
> -			continue;
>  
>  		purge_jiffies = lockres->last_used +
>  			msecs_to_jiffies(DLM_PURGE_INTERVAL_MS);
>  
> +		mlog(0, "purging lockres %.*s\n", lockres->lockname.len,
> +		     lockres->lockname.name);
>  		/* Make sure that we want to be processing this guy at
>  		 * this time. */
>  		if (!purge_now && time_after(purge_jiffies, jiffies)) {
> @@ -273,20 +237,57 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm,
>  			 * in tail order, we can stop at the first
>  			 * unpurgable resource -- anyone added after
>  			 * him will have a greater last_used value */
> +			spin_unlock(&lockres->spinlock);
>  			break;
>  		}
>  
> -		dlm_lockres_get(lockres);
> -
> +		/* If lockres is being used, or migrating purge next lockres */
> +		if (!__dlm_lockres_unused(lockres) ||
> +		    (lockres->state & DLM_LOCK_RES_MIGRATING)) {
> +			if (!list_is_last(&lockres->purge, &dlm->purge_list))
> +				nextres = list_entry(lockres->purge.next,
> +					     struct dlm_lock_resource, purge);
> +			else
> +				nextres = NULL;
> +			spin_unlock(&lockres->spinlock);
> +			lockres = nextres;
> +			continue;
> +		}
> +		
>  		/* This may drop and reacquire the dlm spinlock if it
>  		 * has to do migration. */
> -		if (dlm_purge_lockres(dlm, lockres))
> -			BUG();
> -
> -		dlm_lockres_put(lockres);
> -
> -		/* Avoid adding any scheduling latencies */
> -		cond_resched_lock(&dlm->spinlock);
> +		dlm_purge_lockres(dlm, lockres);
> +		
> +		/* before we free the lockres we get the next lockres */
> +		if (list_empty(&lockres->purge))
> +			/* Shouldn't be in this state. Start from beginning */
> +			nextres = list_entry(dlm->purge_list.next,
> +					     struct dlm_lock_resource, purge);
> +		else if (!list_is_last(&lockres->purge, &dlm->purge_list))
> +			nextres = list_entry(lockres->purge.next,
> +					     struct dlm_lock_resource, purge);
> +		else
> +			nextres = NULL;
> +
> +		if (__dlm_lockres_unused(lockres)) {
> +			if (!list_empty(&lockres->purge)) {
> +				list_del_init(&lockres->purge);
> +				dlm->purge_count--;
> +			}
> +			__dlm_unhash_lockres(lockres);
> +			spin_unlock(&lockres->spinlock);
> +			wake_up(&lockres->wq);
> +			dlm_lockres_put(lockres);
> +		} else
> +			spin_unlock(&lockres->spinlock);
> +		lockres = nextres;
> +
> +		/* Avoid adding any scheduling latencies. If dlm spinlock is
> +		 * dropped, retry again from the beginning as purgelist could
> +		 * have been modified */
> +		if (cond_resched_lock(&dlm->spinlock))
> +			lockres = list_entry(dlm->purge_list.next,
> +					     struct dlm_lock_resource, purge);
>  	}
>  
>  	spin_unlock(&dlm->spinlock);
> @@ -733,7 +734,7 @@ in_progress:
>  			/* unlikely, but we may need to give time to
>  			 * other tasks */
>  			if (!--n) {
> -				mlog(0, "throttling dlm_thread\n");
> +				mlog(0, "throttling dlm_thread n=%d\n", n);
>  				break;
>  			}
>  		}
> -- 
> 1.5.6.5
> 
> 
> _______________________________________________
> Ocfs2-devel mailing list
> Ocfs2-devel at oss.oracle.com
> http://oss.oracle.com/mailman/listinfo/ocfs2-devel

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [Ocfs2-devel] [PATCH 1/2] ocfs2 fix o2dlm dlm run purgelist
  2010-06-16  4:43 [Ocfs2-devel] [PATCH 1/2] ocfs2 fix o2dlm dlm run purgelist Srinivas Eeda
  2010-06-16  4:43 ` [Ocfs2-devel] [PATCH 2/2] ocfs2: o2dlm fix race in purge lockres and newlock (orabug 9094491) Srinivas Eeda
  2010-06-16  6:06 ` [Ocfs2-devel] [PATCH 1/2] ocfs2 fix o2dlm dlm run purgelist Wengang Wang
@ 2010-06-17  1:39 ` Joel Becker
  2010-06-17  8:32   ` Srinivas Eeda
  2010-06-17  1:44 ` Sunil Mushran
  3 siblings, 1 reply; 24+ messages in thread
From: Joel Becker @ 2010-06-17  1:39 UTC (permalink / raw)
  To: ocfs2-devel

On Tue, Jun 15, 2010 at 09:43:02PM -0700, Srinivas Eeda wrote:
> There are two problems in dlm_run_purgelist
> 
> 1. If a lockres is found to be in use, dlm_run_purgelist keeps trying to purge
> the same lockres instead of trying the next lockres.
> 
> 2. When a lockres is found unused, dlm_run_purgelist releases lockres spinlock
> before setting DLM_LOCK_RES_DROPPING_REF and calls dlm_purge_lockres.
> spinlock is reacquired but in this window lockres can get reused. This leads
> to BUG.
> 
> This patch modifies dlm_run_purgelist to skip lockres if it's in use and purge
>  next lockres. It also sets DLM_LOCK_RES_DROPPING_REF before releasing the
> lockres spinlock protecting it from getting reused.
> 
> Signed-off-by: Srinivas Eeda <srinivas.eeda@oracle.com>

	I don't really like the way you did this.  You're absolutely
right that we need to hold the spinlock while setting DROPPING REF.  But
there's no need to lift the lockres check logic into run_purge_list.

> @@ -257,15 +224,12 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm,
>  		 * refs on it -- there's no need to keep the lockres
>  		 * spinlock. */
>  		spin_lock(&lockres->spinlock);
> -		unused = __dlm_lockres_unused(lockres);
> -		spin_unlock(&lockres->spinlock);
> -
> -		if (!unused)
> -			continue;
>  
>  		purge_jiffies = lockres->last_used +
>  			msecs_to_jiffies(DLM_PURGE_INTERVAL_MS);
>  
> +		mlog(0, "purging lockres %.*s\n", lockres->lockname.len,
> +		     lockres->lockname.name);
>  		/* Make sure that we want to be processing this guy at
>  		 * this time. */
>  		if (!purge_now && time_after(purge_jiffies, jiffies)) {

	In fact, I'd move the __dlm_lockres_unused() and
purge_now||time_after() checks into dlm_purge_lockres().  It can return
-EBUSY if the lockres is in use.  It can return -ETIME if purge_now==0
and time_after hits.  Then inside run_purge_list() you just do:

		spin_lock(&lockres->spinlock);
		ret = dlm_purge_lockres(dlm, res, purge_now);
		spin_unlock(&lockres->spinlock);
		if (ret == -EAGAIN)
			break;
		else if (ret == -EBUSY) {
			lockres = list_entry(lockres->next);
			continue;
		else if (ret)
			BUG();

	What about the dlm_lockres_get()?  That's only held while we
drop the dlm spinlock in dlm_purge_lockres(), so you can move it there.
You take the kref only after the _unused() and time_after() checks.
	This actually would make run_purge_list() more readable, not
less.

Joel

-- 

"There are only two ways to live your life. One is as though nothing
 is a miracle. The other is as though everything is a miracle."
        - Albert Einstein

Joel Becker
Principal Software Developer
Oracle
E-mail: joel.becker at oracle.com
Phone: (650) 506-8127

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [Ocfs2-devel] [PATCH 1/2] ocfs2 fix o2dlm dlm run purgelist
  2010-06-16  4:43 [Ocfs2-devel] [PATCH 1/2] ocfs2 fix o2dlm dlm run purgelist Srinivas Eeda
                   ` (2 preceding siblings ...)
  2010-06-17  1:39 ` Joel Becker
@ 2010-06-17  1:44 ` Sunil Mushran
  2010-06-17  6:05   ` Wengang Wang
  2010-06-17  8:32   ` Joel Becker
  3 siblings, 2 replies; 24+ messages in thread
From: Sunil Mushran @ 2010-06-17  1:44 UTC (permalink / raw)
  To: ocfs2-devel

One way to skip a lockres in the purgelist is to list_del_init() and
list_add_tail(). That simplifies the patch a lot.

I have attached a quick & dirty patch. See if that satisfies all the
requirements.

Sunil

On 06/15/2010 09:43 PM, Srinivas Eeda wrote:
> There are two problems in dlm_run_purgelist
>
> 1. If a lockres is found to be in use, dlm_run_purgelist keeps trying to purge
> the same lockres instead of trying the next lockres.
>
> 2. When a lockres is found unused, dlm_run_purgelist releases lockres spinlock
> before setting DLM_LOCK_RES_DROPPING_REF and calls dlm_purge_lockres.
> spinlock is reacquired but in this window lockres can get reused. This leads
> to BUG.
>
> This patch modifies dlm_run_purgelist to skip lockres if it's in use and purge
>   next lockres. It also sets DLM_LOCK_RES_DROPPING_REF before releasing the
> lockres spinlock protecting it from getting reused.
>
> Signed-off-by: Srinivas Eeda<srinivas.eeda@oracle.com>
> ---
>   fs/ocfs2/dlm/dlmthread.c |  125 +++++++++++++++++++++++-----------------------
>   1 files changed, 63 insertions(+), 62 deletions(-)
>
> diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
> index 11a6d1f..fb0be6c 100644
> --- a/fs/ocfs2/dlm/dlmthread.c
> +++ b/fs/ocfs2/dlm/dlmthread.c
> @@ -158,39 +158,17 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm,
>   	int master;
>   	int ret = 0;
>
> -	spin_lock(&res->spinlock);
> -	if (!__dlm_lockres_unused(res)) {
> -		mlog(0, "%s:%.*s: tried to purge but not unused\n",
> -		     dlm->name, res->lockname.len, res->lockname.name);
> -		__dlm_print_one_lock_resource(res);
> -		spin_unlock(&res->spinlock);
> -		BUG();
> -	}
> -
> -	if (res->state&  DLM_LOCK_RES_MIGRATING) {
> -		mlog(0, "%s:%.*s: Delay dropref as this lockres is "
> -		     "being remastered\n", dlm->name, res->lockname.len,
> -		     res->lockname.name);
> -		/* Re-add the lockres to the end of the purge list */
> -		if (!list_empty(&res->purge)) {
> -			list_del_init(&res->purge);
> -			list_add_tail(&res->purge,&dlm->purge_list);
> -		}
> -		spin_unlock(&res->spinlock);
> -		return 0;
> -	}
> -
>   	master = (res->owner == dlm->node_num);
>
>   	if (!master)
>   		res->state |= DLM_LOCK_RES_DROPPING_REF;
> -	spin_unlock(&res->spinlock);
>
>   	mlog(0, "purging lockres %.*s, master = %d\n", res->lockname.len,
>   	     res->lockname.name, master);
>
>   	if (!master) {
>   		/* drop spinlock...  retake below */
> +		spin_unlock(&res->spinlock);
>   		spin_unlock(&dlm->spinlock);
>
>   		spin_lock(&res->spinlock);
> @@ -208,48 +186,37 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm,
>   		mlog(0, "%s:%.*s: dlm_deref_lockres returned %d\n",
>   		     dlm->name, res->lockname.len, res->lockname.name, ret);
>   		spin_lock(&dlm->spinlock);
> +		spin_lock(&res->spinlock);
>   	}
>
> -	spin_lock(&res->spinlock);
> -	if (!list_empty(&res->purge)) {
> -		mlog(0, "removing lockres %.*s:%p from purgelist, "
> -		     "master = %d\n", res->lockname.len, res->lockname.name,
> -		     res, master);
> -		list_del_init(&res->purge);
> -		spin_unlock(&res->spinlock);
> -		dlm_lockres_put(res);
> -		dlm->purge_count--;
> -	} else
> -		spin_unlock(&res->spinlock);
> -
> -	__dlm_unhash_lockres(res);
> -
>   	/* lockres is not in the hash now.  drop the flag and wake up
>   	 * any processes waiting in dlm_get_lock_resource. */
> -	if (!master) {
> -		spin_lock(&res->spinlock);
> +	if (!master)
>   		res->state&= ~DLM_LOCK_RES_DROPPING_REF;
> -		spin_unlock(&res->spinlock);
> -		wake_up(&res->wq);
> -	}
>   	return 0;
>   }
>
>   static void dlm_run_purge_list(struct dlm_ctxt *dlm,
>   			       int purge_now)
>   {
> -	unsigned int run_max, unused;
> +	unsigned int run_max;
>   	unsigned long purge_jiffies;
>   	struct dlm_lock_resource *lockres;
> +	struct dlm_lock_resource *nextres;
>
>   	spin_lock(&dlm->spinlock);
>   	run_max = dlm->purge_count;
>
> -	while(run_max&&  !list_empty(&dlm->purge_list)) {
> -		run_max--;
> +	if (list_empty(&dlm->purge_list)) {
> +		spin_unlock(&dlm->spinlock);
> +		return;
> +	}
> +
> +	lockres = list_entry(dlm->purge_list.next,
> +			     struct dlm_lock_resource, purge);
>
> -		lockres = list_entry(dlm->purge_list.next,
> -				     struct dlm_lock_resource, purge);
> +	while(run_max&&  lockres&&  !list_empty(&dlm->purge_list)) {
> +		run_max--;
>
>   		/* Status of the lockres *might* change so double
>   		 * check. If the lockres is unused, holding the dlm
> @@ -257,15 +224,12 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm,
>   		 * refs on it -- there's no need to keep the lockres
>   		 * spinlock. */
>   		spin_lock(&lockres->spinlock);
> -		unused = __dlm_lockres_unused(lockres);
> -		spin_unlock(&lockres->spinlock);
> -
> -		if (!unused)
> -			continue;
>
>   		purge_jiffies = lockres->last_used +
>   			msecs_to_jiffies(DLM_PURGE_INTERVAL_MS);
>
> +		mlog(0, "purging lockres %.*s\n", lockres->lockname.len,
> +		     lockres->lockname.name);
>   		/* Make sure that we want to be processing this guy at
>   		 * this time. */
>   		if (!purge_now&&  time_after(purge_jiffies, jiffies)) {
> @@ -273,20 +237,57 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm,
>   			 * in tail order, we can stop at the first
>   			 * unpurgable resource -- anyone added after
>   			 * him will have a greater last_used value */
> +			spin_unlock(&lockres->spinlock);
>   			break;
>   		}
>
> -		dlm_lockres_get(lockres);
> -
> +		/* If lockres is being used, or migrating purge next lockres */
> +		if (!__dlm_lockres_unused(lockres) ||
> +		    (lockres->state&  DLM_LOCK_RES_MIGRATING)) {
> +			if (!list_is_last(&lockres->purge,&dlm->purge_list))
> +				nextres = list_entry(lockres->purge.next,
> +					     struct dlm_lock_resource, purge);
> +			else
> +				nextres = NULL;
> +			spin_unlock(&lockres->spinlock);
> +			lockres = nextres;
> +			continue;
> +		}
> +		
>   		/* This may drop and reacquire the dlm spinlock if it
>   		 * has to do migration. */
> -		if (dlm_purge_lockres(dlm, lockres))
> -			BUG();
> -
> -		dlm_lockres_put(lockres);
> -
> -		/* Avoid adding any scheduling latencies */
> -		cond_resched_lock(&dlm->spinlock);
> +		dlm_purge_lockres(dlm, lockres);
> +		
> +		/* before we free the lockres we get the next lockres */
> +		if (list_empty(&lockres->purge))
> +			/* Shouldn't be in this state. Start from beginning */
> +			nextres = list_entry(dlm->purge_list.next,
> +					     struct dlm_lock_resource, purge);
> +		else if (!list_is_last(&lockres->purge,&dlm->purge_list))
> +			nextres = list_entry(lockres->purge.next,
> +					     struct dlm_lock_resource, purge);
> +		else
> +			nextres = NULL;
> +
> +		if (__dlm_lockres_unused(lockres)) {
> +			if (!list_empty(&lockres->purge)) {
> +				list_del_init(&lockres->purge);
> +				dlm->purge_count--;
> +			}
> +			__dlm_unhash_lockres(lockres);
> +			spin_unlock(&lockres->spinlock);
> +			wake_up(&lockres->wq);
> +			dlm_lockres_put(lockres);
> +		} else
> +			spin_unlock(&lockres->spinlock);
> +		lockres = nextres;
> +
> +		/* Avoid adding any scheduling latencies. If dlm spinlock is
> +		 * dropped, retry again from the beginning as purgelist could
> +		 * have been modified */
> +		if (cond_resched_lock(&dlm->spinlock))
> +			lockres = list_entry(dlm->purge_list.next,
> +					     struct dlm_lock_resource, purge);
>   	}
>
>   	spin_unlock(&dlm->spinlock);
> @@ -733,7 +734,7 @@ in_progress:
>   			/* unlikely, but we may need to give time to
>   			 * other tasks */
>   			if (!--n) {
> -				mlog(0, "throttling dlm_thread\n");
> +				mlog(0, "throttling dlm_thread n=%d\n", n);
>   				break;
>   			}
>   		}
>    

-------------- next part --------------
A non-text attachment was scrubbed...
Name: srini.patch
Type: text/x-patch
Size: 1315 bytes
Desc: not available
Url : http://oss.oracle.com/pipermail/ocfs2-devel/attachments/20100616/6601081c/attachment.bin 

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [Ocfs2-devel] [PATCH 1/2] ocfs2 fix o2dlm dlm run purgelist
  2010-06-17  1:44 ` Sunil Mushran
@ 2010-06-17  6:05   ` Wengang Wang
  2010-06-17  8:32   ` Joel Becker
  1 sibling, 0 replies; 24+ messages in thread
From: Wengang Wang @ 2010-06-17  6:05 UTC (permalink / raw)
  To: ocfs2-devel

On 10-06-16 18:44, Sunil Mushran wrote:
> One way to skip a lockres in the purgelist is to list_del_init() and
> list_add_tail(). That simplifies the patch a lot.

For my knowledge, why not just list_move_tail()?

regards,
wengang. 

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [Ocfs2-devel] [PATCH 1/2] ocfs2 fix o2dlm dlm run purgelist
  2010-06-17  1:44 ` Sunil Mushran
  2010-06-17  6:05   ` Wengang Wang
@ 2010-06-17  8:32   ` Joel Becker
  2010-06-17  8:35     ` Srinivas Eeda
  1 sibling, 1 reply; 24+ messages in thread
From: Joel Becker @ 2010-06-17  8:32 UTC (permalink / raw)
  To: ocfs2-devel

On Wed, Jun 16, 2010 at 06:44:43PM -0700, Sunil Mushran wrote:
> One way to skip a lockres in the purgelist is to list_del_init() and
> list_add_tail(). That simplifies the patch a lot.
> 
> I have attached a quick & dirty patch. See if that satisfies all the
> requirements.

	As far as I can tell from reading the code, the time_after()
check is because they are time ordered.  Wouldn't moving it to the end
violate that?

Joel

-- 

"All alone at the end of the evening
 When the bright lights have faded to blue.
 I was thinking about a woman who had loved me
 And I never knew"

Joel Becker
Principal Software Developer
Oracle
E-mail: joel.becker at oracle.com
Phone: (650) 506-8127

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [Ocfs2-devel] [PATCH 1/2] ocfs2 fix o2dlm dlm run purgelist
  2010-06-17  1:39 ` Joel Becker
@ 2010-06-17  8:32   ` Srinivas Eeda
  2010-06-17  9:08     ` Joel Becker
  0 siblings, 1 reply; 24+ messages in thread
From: Srinivas Eeda @ 2010-06-17  8:32 UTC (permalink / raw)
  To: ocfs2-devel

On 6/16/2010 6:39 PM, Joel Becker wrote:
> On Tue, Jun 15, 2010 at 09:43:02PM -0700, Srinivas Eeda wrote:
>   
>> There are two problems in dlm_run_purgelist
>>
>> 1. If a lockres is found to be in use, dlm_run_purgelist keeps trying to purge
>> the same lockres instead of trying the next lockres.
>>
>> 2. When a lockres is found unused, dlm_run_purgelist releases lockres spinlock
>> before setting DLM_LOCK_RES_DROPPING_REF and calls dlm_purge_lockres.
>> spinlock is reacquired but in this window lockres can get reused. This leads
>> to BUG.
>>
>> This patch modifies dlm_run_purgelist to skip lockres if it's in use and purge
>>  next lockres. It also sets DLM_LOCK_RES_DROPPING_REF before releasing the
>> lockres spinlock protecting it from getting reused.
>>
>> Signed-off-by: Srinivas Eeda <srinivas.eeda@oracle.com>
>>     
>
> 	I don't really like the way you did this.  You're absolutely
> right that we need to hold the spinlock while setting DROPPING REF.  But
> there's no need to lift the lockres check logic into run_purge_list.
>   
As of today, we always get the lockres from the head of the 
dlm->purge_list. If it is in use, we keep trying. If that gets purged, 
we move to next res which again would be on the head of the 
dlm->purge_list. But with my change that won't be the case. If first 
(few) lockres are in use, we will try the one we could purge. So we 
should get the next lockres before list_del_init(&res->purge) happens 
and hence I moved the code. If you didn't like the delete code in 
dlm_run_purge_list, then we have to make dlm_purge_lockres to return the 
next lockres that should get purged.

If you are suggesting that we move the lockres to the tail if we found 
it in use, then the code will be lot more readable. The current code 
doesn't move the unused lockres to tail, so wanted to preserve that 
logic as I am not sure what was the original intent. If move used 
lockres to tail, would you suggest we also update lockres->last_used?
>> @@ -257,15 +224,12 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm,
>>  		 * refs on it -- there's no need to keep the lockres
>>  		 * spinlock. */
>>  		spin_lock(&lockres->spinlock);
>> -		unused = __dlm_lockres_unused(lockres);
>> -		spin_unlock(&lockres->spinlock);
>> -
>> -		if (!unused)
>> -			continue;
>>  
>>  		purge_jiffies = lockres->last_used +
>>  			msecs_to_jiffies(DLM_PURGE_INTERVAL_MS);
>>  
>> +		mlog(0, "purging lockres %.*s\n", lockres->lockname.len,
>> +		     lockres->lockname.name);
>>  		/* Make sure that we want to be processing this guy at
>>  		 * this time. */
>>  		if (!purge_now && time_after(purge_jiffies, jiffies)) {
>>     
>
> 	In fact, I'd move the __dlm_lockres_unused() and
> purge_now||time_after() checks into dlm_purge_lockres().  It can return
> -EBUSY if the lockres is in use.  It can return -ETIME if purge_now==0
> and time_after hits.  Then inside run_purge_list() you just do:
>
> 		spin_lock(&lockres->spinlock);
> 		ret = dlm_purge_lockres(dlm, res, purge_now);
> 		spin_unlock(&lockres->spinlock);
> 		if (ret == -EAGAIN)
> 			break;
> 		else if (ret == -EBUSY) {
> 			lockres = list_entry(lockres->next);
> 			continue;
> 		else if (ret)
> 			BUG();
>   
what would list_entry return, if the lockres was the last on the list. I 
was thinking it would return something random ..
> 	What about the dlm_lockres_get()?  That's only held while we
> drop the dlm spinlock in dlm_purge_lockres(), so you can move it there.
> You take the kref only after the _unused() and time_after() checks.
> 	This actually would make run_purge_list() more readable, not
> less.
>   
I agree that it is less readable now, but didn't find a better way as I 
am not sure how to get to the next lockres if the current lockres that 
got dropped is the middle one. :)
> Joel
>
>   
-------------- next part --------------
An HTML attachment was scrubbed...
URL: http://oss.oracle.com/pipermail/ocfs2-devel/attachments/20100617/bb534bfc/attachment-0001.html 

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [Ocfs2-devel] [PATCH 1/2] ocfs2 fix o2dlm dlm run purgelist
  2010-06-17  8:32   ` Joel Becker
@ 2010-06-17  8:35     ` Srinivas Eeda
  2010-06-17 14:48       ` Sunil Mushran
  0 siblings, 1 reply; 24+ messages in thread
From: Srinivas Eeda @ 2010-06-17  8:35 UTC (permalink / raw)
  To: ocfs2-devel

On 6/17/2010 1:32 AM, Joel Becker wrote:
> On Wed, Jun 16, 2010 at 06:44:43PM -0700, Sunil Mushran wrote:
>   
>> One way to skip a lockres in the purgelist is to list_del_init() and
>> list_add_tail(). That simplifies the patch a lot.
>>
>> I have attached a quick & dirty patch. See if that satisfies all the
>> requirements.
>>     
>
> 	As far as I can tell from reading the code, the time_after()
> check is because they are time ordered.  Wouldn't moving it to the end
> violate that?
>   
right. that's why I didn't want to move used lockres to tail :)
> Joel
>
>   
-------------- next part --------------
An HTML attachment was scrubbed...
URL: http://oss.oracle.com/pipermail/ocfs2-devel/attachments/20100617/ef913653/attachment.html 

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [Ocfs2-devel] [PATCH 1/2] ocfs2 fix o2dlm dlm run purgelist
  2010-06-16  6:06 ` [Ocfs2-devel] [PATCH 1/2] ocfs2 fix o2dlm dlm run purgelist Wengang Wang
@ 2010-06-17  8:53   ` Srinivas Eeda
  2010-06-17 11:05     ` Wengang Wang
  2010-06-17 15:06   ` Sunil Mushran
  1 sibling, 1 reply; 24+ messages in thread
From: Srinivas Eeda @ 2010-06-17  8:53 UTC (permalink / raw)
  To: ocfs2-devel

On 6/15/2010 11:06 PM, Wengang Wang wrote:
> still the question.
> If you have sent DEREF request to the master, and the lockres became in-use
> again, then the lockres remains in the hash table and also in the purge list.
> So
>   
Yes, that's a possibility. But there is not much we could do to cover 
that window other than making the non master nodes to avoid such races. 
Patch 2/2 fixes one such race.
> 1) If this node is the last ref, there is a possibility that the master
> purged the lockres after receiving DEREF request from this node. In this
> case, when this node does dlmlock_remote(), the lockres won't be found on the
> master. How to deal with it?
>   
patch 2/2 fixes this race. dlm_get_lock_resource will either wait for 
the lockres to get purged and starts everything fresh or marks the 
lockres in use so dlm_thread won't purge it.
> 2) The lockres on this node is going to be purged again, it means it will send
> secondary DEREFs to the master. This is not good I think.
>   
right, not a good idea to send deref again. We have to fix those cases.
> A thought is setting lockres->owner to DLM_LOCK_RES_OWNER_UNKNOWN after
> sending a DEREF request againt this lockres. Also redo master reqeust
> before locking on it.
>   
if you are referring to the hole in dlmlock_remote, patch 2/2 fixes it. 
Please review that patch and let me know :)
> Regards,
> wengang.
> On 10-06-15 21:43, Srinivas Eeda wrote:
>   
>> There are two problems in dlm_run_purgelist
>>
>> 1. If a lockres is found to be in use, dlm_run_purgelist keeps trying to purge
>> the same lockres instead of trying the next lockres.
>>
>> 2. When a lockres is found unused, dlm_run_purgelist releases lockres spinlock
>> before setting DLM_LOCK_RES_DROPPING_REF and calls dlm_purge_lockres.
>> spinlock is reacquired but in this window lockres can get reused. This leads
>> to BUG.
>>
>> This patch modifies dlm_run_purgelist to skip lockres if it's in use and purge
>>  next lockres. It also sets DLM_LOCK_RES_DROPPING_REF before releasing the
>> lockres spinlock protecting it from getting reused.
>>
>> Signed-off-by: Srinivas Eeda <srinivas.eeda@oracle.com>
>> ---
>>  fs/ocfs2/dlm/dlmthread.c |  125 +++++++++++++++++++++++-----------------------
>>  1 files changed, 63 insertions(+), 62 deletions(-)
>>
>> diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
>> index 11a6d1f..fb0be6c 100644
>> --- a/fs/ocfs2/dlm/dlmthread.c
>> +++ b/fs/ocfs2/dlm/dlmthread.c
>> @@ -158,39 +158,17 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm,
>>  	int master;
>>  	int ret = 0;
>>  
>> -	spin_lock(&res->spinlock);
>> -	if (!__dlm_lockres_unused(res)) {
>> -		mlog(0, "%s:%.*s: tried to purge but not unused\n",
>> -		     dlm->name, res->lockname.len, res->lockname.name);
>> -		__dlm_print_one_lock_resource(res);
>> -		spin_unlock(&res->spinlock);
>> -		BUG();
>> -	}
>> -
>> -	if (res->state & DLM_LOCK_RES_MIGRATING) {
>> -		mlog(0, "%s:%.*s: Delay dropref as this lockres is "
>> -		     "being remastered\n", dlm->name, res->lockname.len,
>> -		     res->lockname.name);
>> -		/* Re-add the lockres to the end of the purge list */
>> -		if (!list_empty(&res->purge)) {
>> -			list_del_init(&res->purge);
>> -			list_add_tail(&res->purge, &dlm->purge_list);
>> -		}
>> -		spin_unlock(&res->spinlock);
>> -		return 0;
>> -	}
>> -
>>  	master = (res->owner == dlm->node_num);
>>  
>>  	if (!master)
>>  		res->state |= DLM_LOCK_RES_DROPPING_REF;
>> -	spin_unlock(&res->spinlock);
>>  
>>  	mlog(0, "purging lockres %.*s, master = %d\n", res->lockname.len,
>>  	     res->lockname.name, master);
>>  
>>  	if (!master) {
>>  		/* drop spinlock...  retake below */
>> +		spin_unlock(&res->spinlock);
>>  		spin_unlock(&dlm->spinlock);
>>  
>>  		spin_lock(&res->spinlock);
>> @@ -208,48 +186,37 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm,
>>  		mlog(0, "%s:%.*s: dlm_deref_lockres returned %d\n",
>>  		     dlm->name, res->lockname.len, res->lockname.name, ret);
>>  		spin_lock(&dlm->spinlock);
>> +		spin_lock(&res->spinlock);
>>  	}
>>  
>> -	spin_lock(&res->spinlock);
>> -	if (!list_empty(&res->purge)) {
>> -		mlog(0, "removing lockres %.*s:%p from purgelist, "
>> -		     "master = %d\n", res->lockname.len, res->lockname.name,
>> -		     res, master);
>> -		list_del_init(&res->purge);
>> -		spin_unlock(&res->spinlock);
>> -		dlm_lockres_put(res);
>> -		dlm->purge_count--;
>> -	} else
>> -		spin_unlock(&res->spinlock);
>> -
>> -	__dlm_unhash_lockres(res);
>> -
>>  	/* lockres is not in the hash now.  drop the flag and wake up
>>  	 * any processes waiting in dlm_get_lock_resource. */
>> -	if (!master) {
>> -		spin_lock(&res->spinlock);
>> +	if (!master)
>>  		res->state &= ~DLM_LOCK_RES_DROPPING_REF;
>> -		spin_unlock(&res->spinlock);
>> -		wake_up(&res->wq);
>> -	}
>>  	return 0;
>>  }
>>  
>>  static void dlm_run_purge_list(struct dlm_ctxt *dlm,
>>  			       int purge_now)
>>  {
>> -	unsigned int run_max, unused;
>> +	unsigned int run_max;
>>  	unsigned long purge_jiffies;
>>  	struct dlm_lock_resource *lockres;
>> +	struct dlm_lock_resource *nextres;
>>  
>>  	spin_lock(&dlm->spinlock);
>>  	run_max = dlm->purge_count;
>>  
>> -	while(run_max && !list_empty(&dlm->purge_list)) {
>> -		run_max--;
>> +	if (list_empty(&dlm->purge_list)) {
>> +		spin_unlock(&dlm->spinlock);
>> +		return;
>> +	}
>> +
>> +	lockres = list_entry(dlm->purge_list.next,
>> +			     struct dlm_lock_resource, purge);
>>  
>> -		lockres = list_entry(dlm->purge_list.next,
>> -				     struct dlm_lock_resource, purge);
>> +	while(run_max && lockres && !list_empty(&dlm->purge_list)) {
>> +		run_max--;
>>  
>>  		/* Status of the lockres *might* change so double
>>  		 * check. If the lockres is unused, holding the dlm
>> @@ -257,15 +224,12 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm,
>>  		 * refs on it -- there's no need to keep the lockres
>>  		 * spinlock. */
>>  		spin_lock(&lockres->spinlock);
>> -		unused = __dlm_lockres_unused(lockres);
>> -		spin_unlock(&lockres->spinlock);
>> -
>> -		if (!unused)
>> -			continue;
>>  
>>  		purge_jiffies = lockres->last_used +
>>  			msecs_to_jiffies(DLM_PURGE_INTERVAL_MS);
>>  
>> +		mlog(0, "purging lockres %.*s\n", lockres->lockname.len,
>> +		     lockres->lockname.name);
>>  		/* Make sure that we want to be processing this guy at
>>  		 * this time. */
>>  		if (!purge_now && time_after(purge_jiffies, jiffies)) {
>> @@ -273,20 +237,57 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm,
>>  			 * in tail order, we can stop at the first
>>  			 * unpurgable resource -- anyone added after
>>  			 * him will have a greater last_used value */
>> +			spin_unlock(&lockres->spinlock);
>>  			break;
>>  		}
>>  
>> -		dlm_lockres_get(lockres);
>> -
>> +		/* If lockres is being used, or migrating purge next lockres */
>> +		if (!__dlm_lockres_unused(lockres) ||
>> +		    (lockres->state & DLM_LOCK_RES_MIGRATING)) {
>> +			if (!list_is_last(&lockres->purge, &dlm->purge_list))
>> +				nextres = list_entry(lockres->purge.next,
>> +					     struct dlm_lock_resource, purge);
>> +			else
>> +				nextres = NULL;
>> +			spin_unlock(&lockres->spinlock);
>> +			lockres = nextres;
>> +			continue;
>> +		}
>> +		
>>  		/* This may drop and reacquire the dlm spinlock if it
>>  		 * has to do migration. */
>> -		if (dlm_purge_lockres(dlm, lockres))
>> -			BUG();
>> -
>> -		dlm_lockres_put(lockres);
>> -
>> -		/* Avoid adding any scheduling latencies */
>> -		cond_resched_lock(&dlm->spinlock);
>> +		dlm_purge_lockres(dlm, lockres);
>> +		
>> +		/* before we free the lockres we get the next lockres */
>> +		if (list_empty(&lockres->purge))
>> +			/* Shouldn't be in this state. Start from beginning */
>> +			nextres = list_entry(dlm->purge_list.next,
>> +					     struct dlm_lock_resource, purge);
>> +		else if (!list_is_last(&lockres->purge, &dlm->purge_list))
>> +			nextres = list_entry(lockres->purge.next,
>> +					     struct dlm_lock_resource, purge);
>> +		else
>> +			nextres = NULL;
>> +
>> +		if (__dlm_lockres_unused(lockres)) {
>> +			if (!list_empty(&lockres->purge)) {
>> +				list_del_init(&lockres->purge);
>> +				dlm->purge_count--;
>> +			}
>> +			__dlm_unhash_lockres(lockres);
>> +			spin_unlock(&lockres->spinlock);
>> +			wake_up(&lockres->wq);
>> +			dlm_lockres_put(lockres);
>> +		} else
>> +			spin_unlock(&lockres->spinlock);
>> +		lockres = nextres;
>> +
>> +		/* Avoid adding any scheduling latencies. If dlm spinlock is
>> +		 * dropped, retry again from the beginning as purgelist could
>> +		 * have been modified */
>> +		if (cond_resched_lock(&dlm->spinlock))
>> +			lockres = list_entry(dlm->purge_list.next,
>> +					     struct dlm_lock_resource, purge);
>>  	}
>>  
>>  	spin_unlock(&dlm->spinlock);
>> @@ -733,7 +734,7 @@ in_progress:
>>  			/* unlikely, but we may need to give time to
>>  			 * other tasks */
>>  			if (!--n) {
>> -				mlog(0, "throttling dlm_thread\n");
>> +				mlog(0, "throttling dlm_thread n=%d\n", n);
>>  				break;
>>  			}
>>  		}
>> -- 
>> 1.5.6.5
>>
>>
>> _______________________________________________
>> Ocfs2-devel mailing list
>> Ocfs2-devel at oss.oracle.com
>> http://oss.oracle.com/mailman/listinfo/ocfs2-devel
>>     

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [Ocfs2-devel] [PATCH 1/2] ocfs2 fix o2dlm dlm run purgelist
  2010-06-17  8:32   ` Srinivas Eeda
@ 2010-06-17  9:08     ` Joel Becker
  0 siblings, 0 replies; 24+ messages in thread
From: Joel Becker @ 2010-06-17  9:08 UTC (permalink / raw)
  To: ocfs2-devel

On Thu, Jun 17, 2010 at 01:32:59AM -0700, Srinivas Eeda wrote:
> On 6/16/2010 6:39 PM, Joel Becker wrote:
> As of today, we always get the lockres from the head of the
> dlm->purge_list. If it is in use, we keep trying. If that gets
> purged, we move to next res which again would be on the head of the
> dlm->purge_list. But with my change that won't be the case. If first
> (few) lockres are in use, we will try the one we could purge. So we
> should get the next lockres before list_del_init(&res->purge)
> happens and hence I moved the code. If you didn't like the delete
> code in dlm_run_purge_list, then we have to make dlm_purge_lockres
> to return the next lockres that should get purged.

	Or it does everything but the delete, and then you have
run_purge_list do the delete right after returning when the return code
is zero.  My problem wasn't that you moved the delete, it was that you
moved all the other checks and things.

> If you are suggesting that we move the lockres to the tail if we
> found it in use, then the code will be lot more readable. The
> current code doesn't move the unused lockres to tail, so wanted to
> preserve that logic as I am not sure what was the original intent.
> If move used lockres to tail, would you suggest we also update
> lockres->last_used?

	Like I responded to Sunil, it sure looks like the code wants the
same order.  If moving to the end is safe, that's obviously the best
answer.

> what would list_entry return, if the lockres was the last on the
> list. I was thinking it would return something random ..

	It returns the head of the list.  These are doubly-linked lists.
list_empty() actually checks whether head->next == head.  You can just
have the while loop say "while (lockres->list != head)" to exit at the last
item.  Then when you say 'lockres = list_entry(lockres->next);
continue;", the while loop exits properly at the end of the list.

Joel

-- 

Life's Little Instruction Book #198

	"Feed a stranger's expired parking meter."

Joel Becker
Principal Software Developer
Oracle
E-mail: joel.becker at oracle.com
Phone: (650) 506-8127

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [Ocfs2-devel] [PATCH 1/2] ocfs2 fix o2dlm dlm run purgelist
  2010-06-17  8:53   ` Srinivas Eeda
@ 2010-06-17 11:05     ` Wengang Wang
  0 siblings, 0 replies; 24+ messages in thread
From: Wengang Wang @ 2010-06-17 11:05 UTC (permalink / raw)
  To: ocfs2-devel

On 10-06-17 01:53, Srinivas Eeda wrote:
> On 6/15/2010 11:06 PM, Wengang Wang wrote:
> >still the question.
> >If you have sent DEREF request to the master, and the lockres became in-use
> >again, then the lockres remains in the hash table and also in the purge list.
> >So
> Yes, that's a possibility. But there is not much we could do to
> cover that window other than making the non master nodes to avoid
> such races. Patch 2/2 fixes one such race.

Yes, we should make non master nodes to avoid such races. But you only
fixed one of such races by patch 2/2 :).
And probably we can't make sure how many such races there. A know case is
dlm_mig_lockres_handler(). We dropped dlm->spinlock and res->spinlock after
dlm_lookup_lockres(). Here it can be set DROPPING_REF state in dlm_thread().
dlm_thread() then drop the spinlocks and set deref msg. Before dlm_thread()
take the spinlocks back, dlm_mig_lockres_handler() continues, A lock(s) is
added to the lockres. The lockres became in use then. dlm_thread() take
back the spinlocks, found the lockres is in use, keep it in hashtable and in
purge list.

Your patch 2/2 fixes that problem well.
So far I have no good idea to fix all such potential races..

regards,
wengang.

> >1) If this node is the last ref, there is a possibility that the master
> >purged the lockres after receiving DEREF request from this node. In this
> >case, when this node does dlmlock_remote(), the lockres won't be found on the
> >master. How to deal with it?
> patch 2/2 fixes this race. dlm_get_lock_resource will either wait
> for the lockres to get purged and starts everything fresh or marks
> the lockres in use so dlm_thread won't purge it.
> >2) The lockres on this node is going to be purged again, it means it will send
> >secondary DEREFs to the master. This is not good I think.
> right, not a good idea to send deref again. We have to fix those cases.
> >A thought is setting lockres->owner to DLM_LOCK_RES_OWNER_UNKNOWN after
> >sending a DEREF request againt this lockres. Also redo master reqeust
> >before locking on it.
> if you are referring to the hole in dlmlock_remote, patch 2/2 fixes
> it. Please review that patch and let me know :)
> >Regards,
> >wengang.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [Ocfs2-devel] [PATCH 1/2] ocfs2 fix o2dlm dlm run purgelist
  2010-06-17  8:35     ` Srinivas Eeda
@ 2010-06-17 14:48       ` Sunil Mushran
  2010-06-17 16:55         ` Srinivas Eeda
  2010-06-17 19:28         ` Joel Becker
  0 siblings, 2 replies; 24+ messages in thread
From: Sunil Mushran @ 2010-06-17 14:48 UTC (permalink / raw)
  To: ocfs2-devel

On 06/17/2010 01:35 AM, Srinivas Eeda wrote:
> On 6/17/2010 1:32 AM, Joel Becker wrote:
>> On Wed, Jun 16, 2010 at 06:44:43PM -0700, Sunil Mushran wrote:
>>    
>>> One way to skip a lockres in the purgelist is to list_del_init() and
>>> list_add_tail(). That simplifies the patch a lot.
>>>
>>> I have attached a quick&  dirty patch. See if that satisfies all the
>>> requirements.
>>>      
>>
>> 	As far as I can tell from reading the code, the time_after()
>> check is because they are time ordered.  Wouldn't moving it to the end
>> violate that?
>>    
> right. that's why I didn't want to move used lockres to tail :)


No. There is no need for time ordering. Or, am I missing something?

We delay the purge incase the file system changes its mind and wants
to reuse it. By delaying, we hold onto the mastery information. That's it.

-------------- next part --------------
An HTML attachment was scrubbed...
URL: http://oss.oracle.com/pipermail/ocfs2-devel/attachments/20100617/d9035590/attachment-0001.html 

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [Ocfs2-devel] [PATCH 1/2] ocfs2 fix o2dlm dlm run purgelist
  2010-06-16  6:06 ` [Ocfs2-devel] [PATCH 1/2] ocfs2 fix o2dlm dlm run purgelist Wengang Wang
  2010-06-17  8:53   ` Srinivas Eeda
@ 2010-06-17 15:06   ` Sunil Mushran
  2010-06-17 16:56     ` Srinivas Eeda
  2010-06-18  2:37     ` Wengang Wang
  1 sibling, 2 replies; 24+ messages in thread
From: Sunil Mushran @ 2010-06-17 15:06 UTC (permalink / raw)
  To: ocfs2-devel

On 06/15/2010 11:06 PM, Wengang Wang wrote:
> still the question.
> If you have sent DEREF request to the master, and the lockres became in-use
> again, then the lockres remains in the hash table and also in the purge list.
> So
> 1) If this node is the last ref, there is a possibility that the master
> purged the lockres after receiving DEREF request from this node. In this
> case, when this node does dlmlock_remote(), the lockres won't be found on the
> master. How to deal with it?
>
> 2) The lockres on this node is going to be purged again, it means it will send
> secondary DEREFs to the master. This is not good I think.
>
> A thought is setting lockres->owner to DLM_LOCK_RES_OWNER_UNKNOWN after
> sending a DEREF request againt this lockres. Also redo master reqeust
> before locking on it.
>    

The fix we are working towards is to ensure that we set
DLM_LOCK_RES_DROPPING_REF once we are determined
to purge the lockres. As in, we should not let go of the spinlock
before we have either set the flag or decided against purging
that resource.

Once the flag is set, new users looking up the resource via
dlm_get_lock_resource() will notice the flag and will then wait
for that flag to be cleared before looking up the lockres hash
again. If all goes well, the lockres will not be found (because it
has since been unhashed) and it will be forced to go thru the
full mastery process.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [Ocfs2-devel] [PATCH 1/2] ocfs2 fix o2dlm dlm run purgelist
  2010-06-17 14:48       ` Sunil Mushran
@ 2010-06-17 16:55         ` Srinivas Eeda
  2010-06-17 19:31           ` Sunil Mushran
  2010-06-17 19:28         ` Joel Becker
  1 sibling, 1 reply; 24+ messages in thread
From: Srinivas Eeda @ 2010-06-17 16:55 UTC (permalink / raw)
  To: ocfs2-devel

On 6/17/2010 7:48 AM, Sunil Mushran wrote:
> On 06/17/2010 01:35 AM, Srinivas Eeda wrote:
>> On 6/17/2010 1:32 AM, Joel Becker wrote:
>>> On Wed, Jun 16, 2010 at 06:44:43PM -0700, Sunil Mushran wrote:
>>>   
>>>> One way to skip a lockres in the purgelist is to list_del_init() and
>>>> list_add_tail(). That simplifies the patch a lot.
>>>>
>>>> I have attached a quick & dirty patch. See if that satisfies all the
>>>> requirements.
>>>>     
>>>
>>> 	As far as I can tell from reading the code, the time_after()
>>> check is because they are time ordered.  Wouldn't moving it to the end
>>> violate that?
>>>   
>> right. that's why I didn't want to move used lockres to tail :)
>
>
> No. There is no need for time ordering. Or, am I missing something?
>
> We delay the purge incase the file system changes its mind and wants
> to reuse it. By delaying, we hold onto the mastery information. That's it.
>
So, should we preserve this? or purge the lockres  when it's on purge 
list? OR we could just update the last_used and move it to end of the 
list if it can be purged.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: http://oss.oracle.com/pipermail/ocfs2-devel/attachments/20100617/7cbbb416/attachment.html 

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [Ocfs2-devel] [PATCH 1/2] ocfs2 fix o2dlm dlm run purgelist
  2010-06-17 15:06   ` Sunil Mushran
@ 2010-06-17 16:56     ` Srinivas Eeda
  2010-06-18  2:37     ` Wengang Wang
  1 sibling, 0 replies; 24+ messages in thread
From: Srinivas Eeda @ 2010-06-17 16:56 UTC (permalink / raw)
  To: ocfs2-devel

Sunil,

as of now, there is still a window in dlm_get_lock_resource, where it 
finds the lockres but it doesn't protect it from getting purged. Second 
patch fixes this by marking it in_use, can you please review that one as 
well.

Thanks,
--Srini

On 6/17/2010 8:06 AM, Sunil Mushran wrote:
> On 06/15/2010 11:06 PM, Wengang Wang wrote:
>> still the question.
>> If you have sent DEREF request to the master, and the lockres became 
>> in-use
>> again, then the lockres remains in the hash table and also in the 
>> purge list.
>> So
>> 1) If this node is the last ref, there is a possibility that the master
>> purged the lockres after receiving DEREF request from this node. In this
>> case, when this node does dlmlock_remote(), the lockres won't be 
>> found on the
>> master. How to deal with it?
>>
>> 2) The lockres on this node is going to be purged again, it means it 
>> will send
>> secondary DEREFs to the master. This is not good I think.
>>
>> A thought is setting lockres->owner to DLM_LOCK_RES_OWNER_UNKNOWN after
>> sending a DEREF request againt this lockres. Also redo master reqeust
>> before locking on it.
>>    
>
> The fix we are working towards is to ensure that we set
> DLM_LOCK_RES_DROPPING_REF once we are determined
> to purge the lockres. As in, we should not let go of the spinlock
> before we have either set the flag or decided against purging
> that resource.
>
> Once the flag is set, new users looking up the resource via
> dlm_get_lock_resource() will notice the flag and will then wait
> for that flag to be cleared before looking up the lockres hash
> again. If all goes well, the lockres will not be found (because it
> has since been unhashed) and it will be forced to go thru the
> full mastery process.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [Ocfs2-devel] [PATCH 1/2] ocfs2 fix o2dlm dlm run purgelist
  2010-06-17 14:48       ` Sunil Mushran
  2010-06-17 16:55         ` Srinivas Eeda
@ 2010-06-17 19:28         ` Joel Becker
  2010-06-17 23:34           ` Sunil Mushran
  1 sibling, 1 reply; 24+ messages in thread
From: Joel Becker @ 2010-06-17 19:28 UTC (permalink / raw)
  To: ocfs2-devel

On Thu, Jun 17, 2010 at 07:48:38AM -0700, Sunil Mushran wrote:
> On 06/17/2010 01:35 AM, Srinivas Eeda wrote:
> >On 6/17/2010 1:32 AM, Joel Becker wrote:
> >>	As far as I can tell from reading the code, the time_after()
> >>check is because they are time ordered.  Wouldn't moving it to the end
> >>violate that?
> >right. that's why I didn't want to move used lockres to tail :)
> 
> 
> No. There is no need for time ordering. Or, am I missing something?
> 
> We delay the purge incase the file system changes its mind and wants
> to reuse it. By delaying, we hold onto the mastery information. That's it.

	The comment is right there:

                    /* Since resources are added to the purge list
                     * in tail order, we can stop at the first
                     * unpurgable resource -- anyone added after
                     * him will have a greater last_used value */
                    break;

So we're going to break out of this loop as soon we see a later time.
Anything you've moved to the back will be ignored until enough time
passes that the things in front of it are candidates for purge.
	You could, of course, just change the 'break' to a 'continue'
and find those, but I kind of like the short-circuit behavior.  I don't
know how long this list is, but walking a whole bunch of "not yet"
lockreses just to hopefully find one at the end seems silly.
	I think it is better to leave the busy ones at the front of the
list and just walk past them if they can't be dealt with now.

Joel

-- 

Life's Little Instruction Book #157 

	"Take time to smell the roses."

Joel Becker
Principal Software Developer
Oracle
E-mail: joel.becker at oracle.com
Phone: (650) 506-8127

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [Ocfs2-devel] [PATCH 1/2] ocfs2 fix o2dlm dlm run purgelist
  2010-06-17 16:55         ` Srinivas Eeda
@ 2010-06-17 19:31           ` Sunil Mushran
  0 siblings, 0 replies; 24+ messages in thread
From: Sunil Mushran @ 2010-06-17 19:31 UTC (permalink / raw)
  To: ocfs2-devel

We don't have to update the last used to move it to tail.  If it is  
now in use, means it is about to be taken out of the purgelist. We  
just need to move it from our way for the time being.

On Jun 17, 2010, at 9:55 AM, Srinivas Eeda <srinivas.eeda@oracle.com>  
wrote:

> On 6/17/2010 7:48 AM, Sunil Mushran wrote:
>>
>> On 06/17/2010 01:35 AM, Srinivas Eeda wrote:
>>>
>>> On 6/17/2010 1:32 AM, Joel Becker wrote:
>>>>
>>>> On Wed, Jun 16, 2010 at 06:44:43PM -0700, Sunil Mushran wrote:
>>>>
>>>>> One way to skip a lockres in the purgelist is to list_del_init()  
>>>>> and
>>>>> list_add_tail(). That simplifies the patch a lot.
>>>>>
>>>>> I have attached a quick & dirty patch. See if that satisfies all  
>>>>> the
>>>>> requirements.
>>>>>
>>>>
>>>> 	As far as I can tell from reading the code, the time_after()
>>>> check is because they are time ordered.  Wouldn't moving it to  
>>>> the end
>>>> violate that?
>>>>
>>> right. that's why I didn't want to move used lockres to tail :)
>>
>>
>> No. There is no need for time ordering. Or, am I missing something?
>>
>> We delay the purge incase the file system changes its mind and wants
>> to reuse it. By delaying, we hold onto the mastery information.  
>> That's it.
>>
> So, should we preserve this? or purge the lockres  when it's on  
> purge list? OR we could just update the last_used and move it to end  
> of the list if it can be purged.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [Ocfs2-devel] [PATCH 1/2] ocfs2 fix o2dlm dlm run purgelist
  2010-06-17 19:28         ` Joel Becker
@ 2010-06-17 23:34           ` Sunil Mushran
  0 siblings, 0 replies; 24+ messages in thread
From: Sunil Mushran @ 2010-06-17 23:34 UTC (permalink / raw)
  To: ocfs2-devel

On 06/17/2010 12:28 PM, Joel Becker wrote:
>> No. There is no need for time ordering. Or, am I missing something?
>>
>> We delay the purge incase the file system changes its mind and wants
>> to reuse it. By delaying, we hold onto the mastery information. That's it.
>>      
> 	The comment is right there:
>
>                      /* Since resources are added to the purge list
>                       * in tail order, we can stop at the first
>                       * unpurgable resource -- anyone added after
>                       * him will have a greater last_used value */
>                      break;
>
> So we're going to break out of this loop as soon we see a later time.
> Anything you've moved to the back will be ignored until enough time
> passes that the things in front of it are candidates for purge.
> 	You could, of course, just change the 'break' to a 'continue'
> and find those, but I kind of like the short-circuit behavior.  I don't
> know how long this list is, but walking a whole bunch of "not yet"
> lockreses just to hopefully find one at the end seems silly.
> 	I think it is better to leave the busy ones at the front of the
> list and just walk past them if they can't be dealt with now.
>    

That check can remain. The fact that a lockres in the purgelist was
seen to be in use, means that another thread got to it before the
dlm_thread and that that thread is about to remove that lockres from
the purgelist shortly. As in, as soon as the two spinlocks are relinquished.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [Ocfs2-devel] [PATCH 2/2] ocfs2: o2dlm fix race in purge lockres and newlock (orabug 9094491)
  2010-06-16  4:43 ` [Ocfs2-devel] [PATCH 2/2] ocfs2: o2dlm fix race in purge lockres and newlock (orabug 9094491) Srinivas Eeda
@ 2010-06-18  2:11   ` Sunil Mushran
  2010-06-18 16:32     ` Srinivas Eeda
  0 siblings, 1 reply; 24+ messages in thread
From: Sunil Mushran @ 2010-06-18  2:11 UTC (permalink / raw)
  To: ocfs2-devel

This patch looks ok on the surface. Should be usable. But checking into
the repo is another matter.

My problem is that this flag is very much like inflight_locks but
is not the same. inflight_locks are taken only on lockres' that are
mastered by the node. This new flag does the same but for non
mastered locks too. A better solution will be to merge the two.
And that means cleaning up inflight_locks.

The reason for the NAK for check in is that the code is adding
more messiness to an area that is already very messy.

Sunil

On 06/15/2010 09:43 PM, Srinivas Eeda wrote:
> This patch fixes the following hole.
> dlmlock tries to create a new lock on a lockres that is on purge list. It calls
> dlm_get_lockresource and later adds a lock to blocked list. But in this window,
> dlm_thread can purge the lockres and unhash it. This will cause a BUG, as when
> the AST comes back from the master lockres is not found
>
> This patch marks the lockres with a new state DLM_LOCK_RES_IN_USE which would
> protect lockres from dlm_thread purging it.
>
> Signed-off-by: Srinivas Eeda<srinivas.eeda@oracle.com>
> ---
>   fs/ocfs2/dlm/dlmcommon.h |    1 +
>   fs/ocfs2/dlm/dlmlock.c   |    4 ++++
>   fs/ocfs2/dlm/dlmmaster.c |    5 ++++-
>   fs/ocfs2/dlm/dlmthread.c |    1 +
>   4 files changed, 10 insertions(+), 1 deletions(-)
>
> diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
> index 0102be3..6cf3c08 100644
> --- a/fs/ocfs2/dlm/dlmcommon.h
> +++ b/fs/ocfs2/dlm/dlmcommon.h
> @@ -279,6 +279,7 @@ static inline void __dlm_set_joining_node(struct dlm_ctxt *dlm,
>   #define DLM_LOCK_RES_DIRTY                0x00000008
>   #define DLM_LOCK_RES_IN_PROGRESS          0x00000010
>   #define DLM_LOCK_RES_MIGRATING            0x00000020
> +#define DLM_LOCK_RES_IN_USE               0x00000100
>   #define DLM_LOCK_RES_DROPPING_REF         0x00000040
>   #define DLM_LOCK_RES_BLOCK_DIRTY          0x00001000
>   #define DLM_LOCK_RES_SETREF_INPROG        0x00002000
> diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c
> index 7333377..501ac40 100644
> --- a/fs/ocfs2/dlm/dlmlock.c
> +++ b/fs/ocfs2/dlm/dlmlock.c
> @@ -134,6 +134,8 @@ static enum dlm_status dlmlock_master(struct dlm_ctxt *dlm,
>   	if (status != DLM_NORMAL&&
>   	lock->ml.node != dlm->node_num) {
>   		/* erf.  state changed after lock was dropped. */
> +		/* DLM_LOCK_RES_IN_USE is set in dlm_get_lock_resource */
> +		res->state&= ~DLM_LOCK_RES_IN_USE;
>   		spin_unlock(&res->spinlock);
>   		dlm_error(status);
>   		return status;
> @@ -180,6 +182,7 @@ static enum dlm_status dlmlock_master(struct dlm_ctxt *dlm,
>   			kick_thread = 1;
>   		}
>   	}
> +	res->state&= ~DLM_LOCK_RES_IN_USE;
>   	/* reduce the inflight count, this may result in the lockres
>   	 * being purged below during calc_usage */
>   	if (lock->ml.node == dlm->node_num)
> @@ -246,6 +249,7 @@ static enum dlm_status dlmlock_remote(struct dlm_ctxt *dlm,
>
>   	spin_lock(&res->spinlock);
>   	res->state&= ~DLM_LOCK_RES_IN_PROGRESS;
> +	res->state&= ~DLM_LOCK_RES_IN_USE;
>   	lock->lock_pending = 0;
>   	if (status != DLM_NORMAL) {
>   		if (status == DLM_RECOVERING&&
> diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
> index 9289b43..f0f2d97 100644
> --- a/fs/ocfs2/dlm/dlmmaster.c
> +++ b/fs/ocfs2/dlm/dlmmaster.c
> @@ -719,6 +719,7 @@ lookup:
>   	if (tmpres) {
>   		int dropping_ref = 0;
>
> +		tmpres->state |= DLM_LOCK_RES_IN_USE;
>   		spin_unlock(&dlm->spinlock);
>
>   		spin_lock(&tmpres->spinlock);
> @@ -731,8 +732,10 @@ lookup:
>   		if (tmpres->owner == dlm->node_num) {
>   			BUG_ON(tmpres->state&  DLM_LOCK_RES_DROPPING_REF);
>   			dlm_lockres_grab_inflight_ref(dlm, tmpres);
> -		} else if (tmpres->state&  DLM_LOCK_RES_DROPPING_REF)
> +		} else if (tmpres->state&  DLM_LOCK_RES_DROPPING_REF) {
> +			tmpres->state&= ~DLM_LOCK_RES_IN_USE;
>   			dropping_ref = 1;
> +		}
>   		spin_unlock(&tmpres->spinlock);
>
>   		/* wait until done messaging the master, drop our ref to allow
> diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
> index fb0be6c..2b82e0b 100644
> --- a/fs/ocfs2/dlm/dlmthread.c
> +++ b/fs/ocfs2/dlm/dlmthread.c
> @@ -93,6 +93,7 @@ int __dlm_lockres_has_locks(struct dlm_lock_resource *res)
>   int __dlm_lockres_unused(struct dlm_lock_resource *res)
>   {
>   	if (!__dlm_lockres_has_locks(res)&&
> +	    !(res->state&  DLM_LOCK_RES_IN_USE)&&
>   	(list_empty(&res->dirty)&&  !(res->state&  DLM_LOCK_RES_DIRTY))) {
>   		/* try not to scan the bitmap unless the first two
>   		 * conditions are already true */
>    

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [Ocfs2-devel] [PATCH 1/2] ocfs2 fix o2dlm dlm run purgelist
  2010-06-17 15:06   ` Sunil Mushran
  2010-06-17 16:56     ` Srinivas Eeda
@ 2010-06-18  2:37     ` Wengang Wang
  2010-06-18 16:37       ` Sunil Mushran
  1 sibling, 1 reply; 24+ messages in thread
From: Wengang Wang @ 2010-06-18  2:37 UTC (permalink / raw)
  To: ocfs2-devel

On 10-06-17 08:06, Sunil Mushran wrote:
> On 06/15/2010 11:06 PM, Wengang Wang wrote:
> >still the question.
> >If you have sent DEREF request to the master, and the lockres became in-use
> >again, then the lockres remains in the hash table and also in the purge list.
> >So
> >1) If this node is the last ref, there is a possibility that the master
> >purged the lockres after receiving DEREF request from this node. In this
> >case, when this node does dlmlock_remote(), the lockres won't be found on the
> >master. How to deal with it?
> >
> >2) The lockres on this node is going to be purged again, it means it will send
> >secondary DEREFs to the master. This is not good I think.
> >
> >A thought is setting lockres->owner to DLM_LOCK_RES_OWNER_UNKNOWN after
> >sending a DEREF request againt this lockres. Also redo master reqeust
> >before locking on it.
> 
> The fix we are working towards is to ensure that we set
> DLM_LOCK_RES_DROPPING_REF once we are determined
> to purge the lockres. As in, we should not let go of the spinlock
> before we have either set the flag or decided against purging
> that resource.
> 
> Once the flag is set, new users looking up the resource via
> dlm_get_lock_resource() will notice the flag and will then wait
> for that flag to be cleared before looking up the lockres hash
> again. If all goes well, the lockres will not be found (because it
> has since been unhashed) and it will be forced to go thru the
> full mastery process.

That is ideal.
In many cases the lockres is not got via dlm_get_lock_resource(), but
via dlm_lookup_lockres()/__dlm_lookup_lockres, which doesn't set the new
IN-USE state, directly. dlm_lookup_lockres() takes and drops
dlm->spinlock. And some of caller of __dlm_lookup_lockres() drops the
spinlock as soon as it got the lockres. Such paths access the lockres
later after dropping dlm->spinlock and res->spinlock.
So there is a window that dlm_thread() get a chance to take the
dlm->spinlock and res->spinlock and set the DROPPING_REF state.
So whether new users can get the lockres depends on how "new" it is. If
finds the lockres after DROPPING_REF state is set, sure it works well. But
if it find it before DROPPING_REF is set, it won't protect the lockres
from purging since even it "gets" the lockres, the lockres can still in
unused state.

regards,
wengang.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [Ocfs2-devel] [PATCH 2/2] ocfs2: o2dlm fix race in purge lockres and newlock (orabug 9094491)
  2010-06-18  2:11   ` Sunil Mushran
@ 2010-06-18 16:32     ` Srinivas Eeda
  0 siblings, 0 replies; 24+ messages in thread
From: Srinivas Eeda @ 2010-06-18 16:32 UTC (permalink / raw)
  To: ocfs2-devel

On 6/17/2010 7:11 PM, Sunil Mushran wrote:
> This patch looks ok on the surface. Should be usable. But checking into
> the repo is another matter.
ok, won't checkin but will give this as one-off fix for now.
>
> My problem is that this flag is very much like inflight_locks but
> is not the same. inflight_locks are taken only on lockres' that are
> mastered by the node. This new flag does the same but for non
> mastered locks too. A better solution will be to merge the two.
> And that means cleaning up inflight_locks.
will look into this.
>
> The reason for the NAK for check in is that the code is adding
> more messiness to an area that is already very messy.
>
> Sunil
>
> On 06/15/2010 09:43 PM, Srinivas Eeda wrote:
>> This patch fixes the following hole.
>> dlmlock tries to create a new lock on a lockres that is on purge 
>> list. It calls
>> dlm_get_lockresource and later adds a lock to blocked list. But in 
>> this window,
>> dlm_thread can purge the lockres and unhash it. This will cause a 
>> BUG, as when
>> the AST comes back from the master lockres is not found
>>
>> This patch marks the lockres with a new state DLM_LOCK_RES_IN_USE 
>> which would
>> protect lockres from dlm_thread purging it.
>>
>> Signed-off-by: Srinivas Eeda<srinivas.eeda@oracle.com>
>> ---
>>   fs/ocfs2/dlm/dlmcommon.h |    1 +
>>   fs/ocfs2/dlm/dlmlock.c   |    4 ++++
>>   fs/ocfs2/dlm/dlmmaster.c |    5 ++++-
>>   fs/ocfs2/dlm/dlmthread.c |    1 +
>>   4 files changed, 10 insertions(+), 1 deletions(-)
>>
>> diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
>> index 0102be3..6cf3c08 100644
>> --- a/fs/ocfs2/dlm/dlmcommon.h
>> +++ b/fs/ocfs2/dlm/dlmcommon.h
>> @@ -279,6 +279,7 @@ static inline void __dlm_set_joining_node(struct 
>> dlm_ctxt *dlm,
>>   #define DLM_LOCK_RES_DIRTY                0x00000008
>>   #define DLM_LOCK_RES_IN_PROGRESS          0x00000010
>>   #define DLM_LOCK_RES_MIGRATING            0x00000020
>> +#define DLM_LOCK_RES_IN_USE               0x00000100
>>   #define DLM_LOCK_RES_DROPPING_REF         0x00000040
>>   #define DLM_LOCK_RES_BLOCK_DIRTY          0x00001000
>>   #define DLM_LOCK_RES_SETREF_INPROG        0x00002000
>> diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c
>> index 7333377..501ac40 100644
>> --- a/fs/ocfs2/dlm/dlmlock.c
>> +++ b/fs/ocfs2/dlm/dlmlock.c
>> @@ -134,6 +134,8 @@ static enum dlm_status dlmlock_master(struct 
>> dlm_ctxt *dlm,
>>       if (status != DLM_NORMAL&&
>>       lock->ml.node != dlm->node_num) {
>>           /* erf.  state changed after lock was dropped. */
>> +        /* DLM_LOCK_RES_IN_USE is set in dlm_get_lock_resource */
>> +        res->state&= ~DLM_LOCK_RES_IN_USE;
>>           spin_unlock(&res->spinlock);
>>           dlm_error(status);
>>           return status;
>> @@ -180,6 +182,7 @@ static enum dlm_status dlmlock_master(struct 
>> dlm_ctxt *dlm,
>>               kick_thread = 1;
>>           }
>>       }
>> +    res->state&= ~DLM_LOCK_RES_IN_USE;
>>       /* reduce the inflight count, this may result in the lockres
>>        * being purged below during calc_usage */
>>       if (lock->ml.node == dlm->node_num)
>> @@ -246,6 +249,7 @@ static enum dlm_status dlmlock_remote(struct 
>> dlm_ctxt *dlm,
>>
>>       spin_lock(&res->spinlock);
>>       res->state&= ~DLM_LOCK_RES_IN_PROGRESS;
>> +    res->state&= ~DLM_LOCK_RES_IN_USE;
>>       lock->lock_pending = 0;
>>       if (status != DLM_NORMAL) {
>>           if (status == DLM_RECOVERING&&
>> diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
>> index 9289b43..f0f2d97 100644
>> --- a/fs/ocfs2/dlm/dlmmaster.c
>> +++ b/fs/ocfs2/dlm/dlmmaster.c
>> @@ -719,6 +719,7 @@ lookup:
>>       if (tmpres) {
>>           int dropping_ref = 0;
>>
>> +        tmpres->state |= DLM_LOCK_RES_IN_USE;
>>           spin_unlock(&dlm->spinlock);
>>
>>           spin_lock(&tmpres->spinlock);
>> @@ -731,8 +732,10 @@ lookup:
>>           if (tmpres->owner == dlm->node_num) {
>>               BUG_ON(tmpres->state&  DLM_LOCK_RES_DROPPING_REF);
>>               dlm_lockres_grab_inflight_ref(dlm, tmpres);
>> -        } else if (tmpres->state&  DLM_LOCK_RES_DROPPING_REF)
>> +        } else if (tmpres->state&  DLM_LOCK_RES_DROPPING_REF) {
>> +            tmpres->state&= ~DLM_LOCK_RES_IN_USE;
>>               dropping_ref = 1;
>> +        }
>>           spin_unlock(&tmpres->spinlock);
>>
>>           /* wait until done messaging the master, drop our ref to allow
>> diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
>> index fb0be6c..2b82e0b 100644
>> --- a/fs/ocfs2/dlm/dlmthread.c
>> +++ b/fs/ocfs2/dlm/dlmthread.c
>> @@ -93,6 +93,7 @@ int __dlm_lockres_has_locks(struct 
>> dlm_lock_resource *res)
>>   int __dlm_lockres_unused(struct dlm_lock_resource *res)
>>   {
>>       if (!__dlm_lockres_has_locks(res)&&
>> +        !(res->state&  DLM_LOCK_RES_IN_USE)&&
>>       (list_empty(&res->dirty)&&  !(res->state&  DLM_LOCK_RES_DIRTY))) {
>>           /* try not to scan the bitmap unless the first two
>>            * conditions are already true */
>>    
>

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [Ocfs2-devel] [PATCH 1/2] ocfs2 fix o2dlm dlm run purgelist
  2010-06-18  2:37     ` Wengang Wang
@ 2010-06-18 16:37       ` Sunil Mushran
  2010-06-21  1:40         ` Wengang Wang
  0 siblings, 1 reply; 24+ messages in thread
From: Sunil Mushran @ 2010-06-18 16:37 UTC (permalink / raw)
  To: ocfs2-devel

On 06/17/2010 07:37 PM, Wengang Wang wrote:
> On 10-06-17 08:06, Sunil Mushran wrote:
>    
>> On 06/15/2010 11:06 PM, Wengang Wang wrote:
>>      
>>> still the question.
>>> If you have sent DEREF request to the master, and the lockres became in-use
>>> again, then the lockres remains in the hash table and also in the purge list.
>>> So
>>> 1) If this node is the last ref, there is a possibility that the master
>>> purged the lockres after receiving DEREF request from this node. In this
>>> case, when this node does dlmlock_remote(), the lockres won't be found on the
>>> master. How to deal with it?
>>>
>>> 2) The lockres on this node is going to be purged again, it means it will send
>>> secondary DEREFs to the master. This is not good I think.
>>>
>>> A thought is setting lockres->owner to DLM_LOCK_RES_OWNER_UNKNOWN after
>>> sending a DEREF request againt this lockres. Also redo master reqeust
>>> before locking on it.
>>>        
>> The fix we are working towards is to ensure that we set
>> DLM_LOCK_RES_DROPPING_REF once we are determined
>> to purge the lockres. As in, we should not let go of the spinlock
>> before we have either set the flag or decided against purging
>> that resource.
>>
>> Once the flag is set, new users looking up the resource via
>> dlm_get_lock_resource() will notice the flag and will then wait
>> for that flag to be cleared before looking up the lockres hash
>> again. If all goes well, the lockres will not be found (because it
>> has since been unhashed) and it will be forced to go thru the
>> full mastery process.
>>      
> That is ideal.
> In many cases the lockres is not got via dlm_get_lock_resource(), but
> via dlm_lookup_lockres()/__dlm_lookup_lockres, which doesn't set the new
> IN-USE state, directly. dlm_lookup_lockres() takes and drops
> dlm->spinlock. And some of caller of __dlm_lookup_lockres() drops the
> spinlock as soon as it got the lockres. Such paths access the lockres
> later after dropping dlm->spinlock and res->spinlock.
> So there is a window that dlm_thread() get a chance to take the
> dlm->spinlock and res->spinlock and set the DROPPING_REF state.
> So whether new users can get the lockres depends on how "new" it is. If
> finds the lockres after DROPPING_REF state is set, sure it works well. But
> if it find it before DROPPING_REF is set, it won't protect the lockres
> from purging since even it "gets" the lockres, the lockres can still in
> unused state.
>    

dlm_lookup_lockres() and friends just looks up the lockres hash.
dlm_get_lock_resource() also calls it. It inturn is called by dlmlock()
to find and/or create lockres and create a lock on that resource.

The other calls to dlm_lookup_lockres() are from handlers and those
handlers can only be tickled if a lock already exists. And if a lock
exits, then we cannot be purging the lockres.

The one exception is the create_lock handler and that only comes
into play on the lockres master. The inflight ref blocks removal of
such lockres in the window before the lock is created.

DROPPING_REF is only valid for non-master nodes. As in, only
a non-master node has to send a deref message to the master node.

Confused? Well, I think this needs to be documented. I guess I will
do that after I am done with the global heartbeat business.

Sunil

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [Ocfs2-devel] [PATCH 1/2] ocfs2 fix o2dlm dlm run purgelist
  2010-06-18 16:37       ` Sunil Mushran
@ 2010-06-21  1:40         ` Wengang Wang
  0 siblings, 0 replies; 24+ messages in thread
From: Wengang Wang @ 2010-06-21  1:40 UTC (permalink / raw)
  To: ocfs2-devel

On 10-06-18 09:37, Sunil Mushran wrote:
> On 06/17/2010 07:37 PM, Wengang Wang wrote:
> >On 10-06-17 08:06, Sunil Mushran wrote:
> >>On 06/15/2010 11:06 PM, Wengang Wang wrote:
> >>>still the question.
> >>>If you have sent DEREF request to the master, and the lockres became in-use
> >>>again, then the lockres remains in the hash table and also in the purge list.
> >>>So
> >>>1) If this node is the last ref, there is a possibility that the master
> >>>purged the lockres after receiving DEREF request from this node. In this
> >>>case, when this node does dlmlock_remote(), the lockres won't be found on the
> >>>master. How to deal with it?
> >>>
> >>>2) The lockres on this node is going to be purged again, it means it will send
> >>>secondary DEREFs to the master. This is not good I think.
> >>>
> >>>A thought is setting lockres->owner to DLM_LOCK_RES_OWNER_UNKNOWN after
> >>>sending a DEREF request againt this lockres. Also redo master reqeust
> >>>before locking on it.
> >>The fix we are working towards is to ensure that we set
> >>DLM_LOCK_RES_DROPPING_REF once we are determined
> >>to purge the lockres. As in, we should not let go of the spinlock
> >>before we have either set the flag or decided against purging
> >>that resource.
> >>
> >>Once the flag is set, new users looking up the resource via
> >>dlm_get_lock_resource() will notice the flag and will then wait
> >>for that flag to be cleared before looking up the lockres hash
> >>again. If all goes well, the lockres will not be found (because it
> >>has since been unhashed) and it will be forced to go thru the
> >>full mastery process.
> >That is ideal.
> >In many cases the lockres is not got via dlm_get_lock_resource(), but
> >via dlm_lookup_lockres()/__dlm_lookup_lockres, which doesn't set the new
> >IN-USE state, directly. dlm_lookup_lockres() takes and drops
> >dlm->spinlock. And some of caller of __dlm_lookup_lockres() drops the
> >spinlock as soon as it got the lockres. Such paths access the lockres
> >later after dropping dlm->spinlock and res->spinlock.
> >So there is a window that dlm_thread() get a chance to take the
> >dlm->spinlock and res->spinlock and set the DROPPING_REF state.
> >So whether new users can get the lockres depends on how "new" it is. If
> >finds the lockres after DROPPING_REF state is set, sure it works well. But
> >if it find it before DROPPING_REF is set, it won't protect the lockres
> >from purging since even it "gets" the lockres, the lockres can still in
> >unused state.
> 
> dlm_lookup_lockres() and friends just looks up the lockres hash.
> dlm_get_lock_resource() also calls it. It inturn is called by dlmlock()
> to find and/or create lockres and create a lock on that resource.

Yes you are right.
> The other calls to dlm_lookup_lockres() are from handlers and those
> handlers can only be tickled if a lock already exists. And if a lock
> exits, then we cannot be purging the lockres.
> 
> The one exception is the create_lock handler and that only comes
> into play on the lockres master. The inflight ref blocks removal of
> such lockres in the window before the lock is created.

I think there is another exception, dlm_mig_lockres_handler(). Could you check it
in my email(in this thread, to Srini, Message-ID:
<20100617110548.GA3178@laptop.us.oracle.com>).

> DROPPING_REF is only valid for non-master nodes. As in, only
> a non-master node has to send a deref message to the master node.

Yes I know.
> Confused? Well, I think this needs to be documented. I guess I will
> do that after I am done with the global heartbeat business.

No, I am clear. Well a document will greatly helpful!

regards,
wengang.

^ permalink raw reply	[flat|nested] 24+ messages in thread

end of thread, other threads:[~2010-06-21  1:40 UTC | newest]

Thread overview: 24+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-06-16  4:43 [Ocfs2-devel] [PATCH 1/2] ocfs2 fix o2dlm dlm run purgelist Srinivas Eeda
2010-06-16  4:43 ` [Ocfs2-devel] [PATCH 2/2] ocfs2: o2dlm fix race in purge lockres and newlock (orabug 9094491) Srinivas Eeda
2010-06-18  2:11   ` Sunil Mushran
2010-06-18 16:32     ` Srinivas Eeda
2010-06-16  6:06 ` [Ocfs2-devel] [PATCH 1/2] ocfs2 fix o2dlm dlm run purgelist Wengang Wang
2010-06-17  8:53   ` Srinivas Eeda
2010-06-17 11:05     ` Wengang Wang
2010-06-17 15:06   ` Sunil Mushran
2010-06-17 16:56     ` Srinivas Eeda
2010-06-18  2:37     ` Wengang Wang
2010-06-18 16:37       ` Sunil Mushran
2010-06-21  1:40         ` Wengang Wang
2010-06-17  1:39 ` Joel Becker
2010-06-17  8:32   ` Srinivas Eeda
2010-06-17  9:08     ` Joel Becker
2010-06-17  1:44 ` Sunil Mushran
2010-06-17  6:05   ` Wengang Wang
2010-06-17  8:32   ` Joel Becker
2010-06-17  8:35     ` Srinivas Eeda
2010-06-17 14:48       ` Sunil Mushran
2010-06-17 16:55         ` Srinivas Eeda
2010-06-17 19:31           ` Sunil Mushran
2010-06-17 19:28         ` Joel Becker
2010-06-17 23:34           ` Sunil Mushran

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.