All of lore.kernel.org
 help / color / mirror / Atom feed
* [Ocfs2-devel] [PATCH 3/3] ocfs2:freeze-thaw: make it work
@ 2010-01-09 18:00 Wengang Wang
  2010-01-16  2:22 ` Sunil Mushran
  0 siblings, 1 reply; 15+ messages in thread
From: Wengang Wang @ 2010-01-09 18:00 UTC (permalink / raw)
  To: ocfs2-devel

adds freeze_fs()/unfreeze_fs() for ocfs2 so that it supports freeze/thaw.

Authored-by: Tiger Yang <tiger.yang@oracle.com>
Authored-by: Wengang Wang <wen.gang.wang@oracle.com>
Signed-off-by: Wengang Wang <wen.gang.wang@oracle.com>
---
 fs/ocfs2/dlmglue.c |  108 +++++++++++++++++++++++++++++++++++++++++++++++-
 fs/ocfs2/dlmglue.h |    1 +
 fs/ocfs2/journal.c |    1 +
 fs/ocfs2/ocfs2.h   |    5 ++
 fs/ocfs2/super.c   |  117 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 231 insertions(+), 1 deletions(-)

diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index cf88af2..b1b8690 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -3928,10 +3928,116 @@ void ocfs2_set_locking_protocol(void)
 	ocfs2_stack_glue_set_locking_protocol(&lproto);
 }
 
+/*
+ * This is only ever run on behalf of another node.
+ */
+void ocfs2_freeze_worker(struct work_struct *work)
+{
+	struct super_block *sb;
+	int err = 0, err2;
+	struct ocfs2_super *osb = container_of(work, struct ocfs2_super,
+					       osb_freeze_work);
+
+	mlog(0, "flags=0x%lx, frozen=%d\n", osb->osb_flags, osb->sb->s_frozen);
+
+	sb = freeze_bdev(osb->sb->s_bdev);
+	if (IS_ERR(sb)) {
+		/* the error is returned by ocfs2_freeze_fs() */
+		err = PTR_ERR(sb);
+		if (err == -ENOTSUPP) {
+			/* we got freeze request but we don't support it */
+			BUG();
+		} else {
+			/* we don't issue cluster lock in ocfs2_freeze_fs() for
+			 * a freeze request(from other node). no other error
+			 * should be returned.
+			 */
+			BUG();
+		}
+	}
+
+	osb->frozen_by_master = 1;
+	spin_lock(&osb->osb_lock);
+	osb->osb_flags &= ~OCFS2_OSB_FREEZE_INPROG;
+	spin_unlock(&osb->osb_lock);
+
+	/* kick downconvert thread */
+	ocfs2_wake_downconvert_thread(osb);
+
+	/* waiting for THAW */
+	err = ocfs2_freeze_lock(osb, 0);
+	if (err)
+		mlog(ML_ERROR, "getting PR on freeze_lock failed,"
+		     "but going to thaw block device %s\n",  osb->dev_str);
+
+	err2 = thaw_bdev(osb->sb->s_bdev, osb->sb);
+	if (err2) {
+		if (err2 == -ENOTSUPP) {
+			/* we got freeze request but we don't support it */
+			BUG();
+		}
+		printk(KERN_WARNING "ocfs2: Thawing %s failed\n", osb->dev_str);
+	}
+
+	osb->frozen_by_master = 0;
+
+	if (!err)
+		ocfs2_freeze_unlock(osb, 0);
+}
+	
+static void ocfs2_queue_freeze_worker(struct ocfs2_super *osb)
+{
+	spin_lock(&osb->osb_lock);
+	if (!(osb->osb_flags & OCFS2_OSB_FREEZE_INPROG)) {
+		osb->osb_flags |= OCFS2_OSB_FREEZE_INPROG;
+		queue_work(ocfs2_wq, &osb->osb_freeze_work);
+	}
+	spin_unlock(&osb->osb_lock);
+}
+
 static int ocfs2_check_freeze_downconvert(struct ocfs2_lock_res *lockres,
 					  int new_level)
 {
-	return 1; //change me
+	struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres);
+	struct super_block *sb = osb->sb;
+
+	mlog(0, "flags=0x%lx, frozen=%d, level=%d, newlevel=%d\n",
+	     osb->osb_flags, sb->s_frozen, lockres->l_level, new_level);
+
+	if (new_level == LKM_PRMODE) {
+		/* other node is during mount or is waiting for THAW */
+		if (sb->s_frozen) {
+			/* this node didn't thaw yet, try later */
+			return 0;
+		} else {
+			/* this node has unlocked freeze_locky, it's ok to
+			 * downconvert it.
+			 */
+			return 1;
+		}
+	}
+
+	/* now new_level is NL. other node wants to freeze cluster.
+	 * the request shouldn't come when this node is still holding freeze_lock
+	 */
+	BUG_ON(lockres->l_ex_holders);
+
+	/* now this node is holding PR or is caching PR */
+	if (lockres->l_ro_holders) {
+		/* this node is during mount or is thawing, try later */
+		return 0;
+	}
+
+	/* now this node is caching PR, it's Ok to freeze for the request */
+	if (sb->s_frozen &&
+	    !(osb->osb_flags & OCFS2_OSB_FREEZE_INPROG)) {
+		/* ok, this node is frozen for the request */
+		return 1;
+	}
+
+	/* queue a work to do freeze for the request */
+	ocfs2_queue_freeze_worker(osb);
+	return 0;
 }
 
 static void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h
index 8687b81..96e0dae 100644
--- a/fs/ocfs2/dlmglue.h
+++ b/fs/ocfs2/dlmglue.h
@@ -168,6 +168,7 @@ void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb);
 struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void);
 void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug);
 
+void ocfs2_freeze_worker(struct work_struct *work);
 /* To set the locking protocol on module initialization */
 void ocfs2_set_locking_protocol(void);
 #endif	/* DLMGLUE_H */
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index bf34c49..45c5bfe 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -355,6 +355,7 @@ handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs)
 	if (ocfs2_is_hard_readonly(osb))
 		return ERR_PTR(-EROFS);
 
+	vfs_check_frozen(osb->sb, SB_FREEZE_TRANS);
 	BUG_ON(osb->journal->j_state == OCFS2_JOURNAL_FREE);
 	BUG_ON(max_buffs <= 0);
 
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index bf66978..62e1819 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -254,6 +254,7 @@ enum ocfs2_mount_options
 #define OCFS2_OSB_HARD_RO			0x0002
 #define OCFS2_OSB_ERROR_FS			0x0004
 #define OCFS2_OSB_DROP_DENTRY_LOCK_IMMED	0x0008
+#define OCFS2_OSB_FREEZE_INPROG			0x0010
 
 #define OCFS2_DEFAULT_ATIME_QUANTUM		60
 
@@ -356,6 +357,8 @@ struct ocfs2_super
 	struct ocfs2_lock_res osb_rename_lockres;
 	struct ocfs2_lock_res osb_nfs_sync_lockres;
 	struct ocfs2_lock_res osb_freeze_lockres;
+	int frozen_by_master;		/* a flag. 1 if FS is frozen on behalf
+					 * of another node */
 	struct ocfs2_dlm_debug *osb_dlm_debug;
 
 	struct dentry *osb_debug_root;
@@ -394,6 +397,8 @@ struct ocfs2_super
 	unsigned int			*osb_orphan_wipes;
 	wait_queue_head_t		osb_wipe_event;
 
+	/* osb_freeze_work is protected by osb->s_bdev->bd_fsfreeze_mutex */
+	struct work_struct		osb_freeze_work;
 	struct ocfs2_orphan_scan	osb_orphan_scan;
 
 	/* used to protect metaecc calculation check of xattr. */
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 9127760..4493163 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -135,6 +135,10 @@ static int ocfs2_susp_quotas(struct ocfs2_super *osb, int unsuspend);
 static int ocfs2_enable_quotas(struct ocfs2_super *osb);
 static void ocfs2_disable_quotas(struct ocfs2_super *osb);
 static int ocfs2_freeze_lock_supported(struct ocfs2_super *osb);
+static int is_kernel_thread(void);
+static int ocfs2_freeze_fs(struct super_block *sb);
+static int is_freeze_master(struct ocfs2_super *osb);
+static int ocfs2_unfreeze_fs(struct super_block *sb);
 
 static const struct super_operations ocfs2_sops = {
 	.statfs		= ocfs2_statfs,
@@ -149,6 +153,8 @@ static const struct super_operations ocfs2_sops = {
 	.show_options   = ocfs2_show_options,
 	.quota_read	= ocfs2_quota_read,
 	.quota_write	= ocfs2_quota_write,
+	.freeze_fs	= ocfs2_freeze_fs,
+	.unfreeze_fs	= ocfs2_unfreeze_fs,
 };
 
 enum {
@@ -2167,6 +2173,8 @@ static int ocfs2_initialize_super(struct super_block *sb,
 	INIT_WORK(&osb->dentry_lock_work, ocfs2_drop_dl_inodes);
 	osb->dentry_lock_list = NULL;
 
+	INIT_WORK(&osb->osb_freeze_work, ocfs2_freeze_worker);
+
 	/* get some pseudo constants for clustersize bits */
 	osb->s_clustersize_bits =
 		le32_to_cpu(di->id2.i_super.s_clustersize_bits);
@@ -2525,5 +2533,114 @@ void __ocfs2_abort(struct super_block* sb,
 	ocfs2_handle_error(sb);
 }
 
+static int is_kernel_thread()
+{
+	if (current->flags & PF_KTHREAD)
+		return 1;
+	return 0;
+}
+
+/* ocfs2_freeze_fs()/ocfs2_unfreeze_fs() are always called by freeze_bdev()/
+ * thaw_bdev(). bdev->bd_fsfreeze_mutex is used for synchronization. an extra
+ * ocfs2 mutex is not needed.
+ */
+static int ocfs2_freeze_fs(struct super_block *sb)
+{
+	int ret = 0;
+	struct ocfs2_super *osb = OCFS2_SB(sb);
+
+	mlog(0, "flags=0x%lx, frozen=%d\n", osb->osb_flags, osb->sb->s_frozen);
+
+	if (!ocfs2_freeze_lock_supported(osb)) {
+		return -ENOTSUPP;
+	}
+
+	printk(KERN_INFO "ocfs2: Request to freeze block device (%s)\n",
+	       osb->dev_str);
+
+	/* cluster lock is issued only when this is the IOCTL process.(other
+	 * case ocfs2_freeze_fs() is called in ocfs2_wq thread)
+	 */
+
+	if (!is_kernel_thread()) {
+		/* this is ioctl thread, issues cluster lock */
+		ret = ocfs2_freeze_lock(osb, 1);
+		if (ret) {
+			mlog_errno(ret);
+		} else {
+			printk(KERN_INFO "ocfs2: Block device (%s) frozen by local\n",
+			       osb->dev_str);
+		}
+	} else {
+		/* this is ocfs2_wq kernel thread. we do freeze on behalf of
+		 * the requesting node, don't issue cluster lock again.
+		 */
+		printk(KERN_INFO "ocfs2: Block device (%s) frozen by remote\n",
+		       osb->dev_str);
+	}
+
+	return ret;
+}
+
+static int is_freeze_master(struct ocfs2_super *osb)
+{
+	BUG_ON(osb->osb_freeze_lockres.l_ex_holders > 1);
+
+	if (osb->osb_freeze_lockres.l_ex_holders)
+		return 1;
+
+	return 0;
+}
+
+static int ocfs2_unfreeze_fs(struct super_block *sb)
+{
+	struct ocfs2_super *osb = OCFS2_SB(sb);
+
+	mlog(0, "flags=0x%lx, frozen=%d\n", osb->osb_flags, osb->sb->s_frozen);
+
+	if (!ocfs2_freeze_lock_supported(osb))
+		return -ENOTSUPP;
+
+	if (!is_kernel_thread()) {
+		/* this is the ioctl user thread. */
+		if (!is_freeze_master(osb)) {
+			/* THAW ioctl on a node other than the one on with cluster
+			 * is frozen. don't thaw in the case. returns -EINVAL so
+			 * that osb->sb->s_bdev->bd_fsfreeze_count can be decreased.
+			 */
+
+			if (!osb->frozen_by_master) {
+				/* this is from a nested cross cluster thaw
+				 * case:
+				 * frozen from another node(node A)
+				 * frozen from this node(not suppored though)
+				 * thawed from node A
+				 * thawed from this node(coming here)
+				 *
+				 * thaw this node only.
+				 */
+				printk(KERN_INFO "ocfs2: Block device (%s) thawed"
+				       " by local\n", osb->dev_str);
+				goto bail;
+			}
+
+			/* now the cluster still frozen by another node,
+			 * fails this request
+			 */
+			printk(KERN_INFO "ocfs2: thawing %s on invalid node\n",
+			       osb->dev_str);
+			return -EINVAL;
+		}
+		ocfs2_freeze_unlock(osb, 1);
+		printk(KERN_INFO "ocfs2: Block device (%s) thawed by local\n", osb->dev_str);
+	} else {
+		/* this is ocfs2_wq kernel thread. nothing to do. */
+		printk(KERN_INFO "ocfs2: Block device (%s) thawed by remote\n", osb->dev_str);
+	}
+bail:
+	return 0;
+}
+
+
 module_init(ocfs2_init);
 module_exit(ocfs2_exit);
-- 
1.6.5.2

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [Ocfs2-devel] [PATCH 3/3] ocfs2:freeze-thaw: make it work
  2010-01-09 18:00 [Ocfs2-devel] [PATCH 3/3] ocfs2:freeze-thaw: make it work Wengang Wang
@ 2010-01-16  2:22 ` Sunil Mushran
  2010-01-18 13:06   ` Wengang Wang
  0 siblings, 1 reply; 15+ messages in thread
From: Sunil Mushran @ 2010-01-16  2:22 UTC (permalink / raw)
  To: ocfs2-devel

Apart from this, you have to handle edge conditions. Like umount
and mount. What if the freeze is issued just before a node is about
to umount. Or, another node tries to mount the volume while it is
frozen.

Also, recovery. What if a node dies when a node is frozen. Think
how you will handle that.

Sunil

Wengang Wang wrote:
> adds freeze_fs()/unfreeze_fs() for ocfs2 so that it supports freeze/thaw.
>
> Authored-by: Tiger Yang <tiger.yang@oracle.com>
> Authored-by: Wengang Wang <wen.gang.wang@oracle.com>
> Signed-off-by: Wengang Wang <wen.gang.wang@oracle.com>
> ---
>  fs/ocfs2/dlmglue.c |  108 +++++++++++++++++++++++++++++++++++++++++++++++-
>  fs/ocfs2/dlmglue.h |    1 +
>  fs/ocfs2/journal.c |    1 +
>  fs/ocfs2/ocfs2.h   |    5 ++
>  fs/ocfs2/super.c   |  117 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>  5 files changed, 231 insertions(+), 1 deletions(-)
>
> diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
> index cf88af2..b1b8690 100644
> --- a/fs/ocfs2/dlmglue.c
> +++ b/fs/ocfs2/dlmglue.c
> @@ -3928,10 +3928,116 @@ void ocfs2_set_locking_protocol(void)
>  	ocfs2_stack_glue_set_locking_protocol(&lproto);
>  }
>  
> +/*
> + * This is only ever run on behalf of another node.
> + */
> +void ocfs2_freeze_worker(struct work_struct *work)
> +{
> +	struct super_block *sb;
> +	int err = 0, err2;
> +	struct ocfs2_super *osb = container_of(work, struct ocfs2_super,
> +					       osb_freeze_work);
> +
> +	mlog(0, "flags=0x%lx, frozen=%d\n", osb->osb_flags, osb->sb->s_frozen);
> +
> +	sb = freeze_bdev(osb->sb->s_bdev);
> +	if (IS_ERR(sb)) {
> +		/* the error is returned by ocfs2_freeze_fs() */
> +		err = PTR_ERR(sb);
> +		if (err == -ENOTSUPP) {
> +			/* we got freeze request but we don't support it */
> +			BUG();
> +		} else {
> +			/* we don't issue cluster lock in ocfs2_freeze_fs() for
> +			 * a freeze request(from other node). no other error
> +			 * should be returned.
> +			 */
> +			BUG();
> +		}
> +	}
>   

Why not one BUG. This needs some more thought. See if
we can gracefully handle these errors.

BUG should be reserved to detect code errors. Or, really weird
errors we don't expect. Like memory corruption. This is a simple
error. We should try to handle this gracefully.

> +
> +	osb->frozen_by_master = 1;
> +	spin_lock(&osb->osb_lock);
> +	osb->osb_flags &= ~OCFS2_OSB_FREEZE_INPROG;
> +	spin_unlock(&osb->osb_lock);
> +
> +	/* kick downconvert thread */
> +	ocfs2_wake_downconvert_thread(osb);
> +
> +	/* waiting for THAW */
> +	err = ocfs2_freeze_lock(osb, 0);
> +	if (err)
> +		mlog(ML_ERROR, "getting PR on freeze_lock failed,"
> +		     "but going to thaw block device %s\n",  osb->dev_str);
> +
> +	err2 = thaw_bdev(osb->sb->s_bdev, osb->sb);
> +	if (err2) {
> +		if (err2 == -ENOTSUPP) {
> +			/* we got freeze request but we don't support it */
> +			BUG();
> +		}
> +		printk(KERN_WARNING "ocfs2: Thawing %s failed\n", osb->dev_str);
> +	}
> +
> +	osb->frozen_by_master = 0;
> +
> +	if (!err)
> +		ocfs2_freeze_unlock(osb, 0);
> +}
> +	
> +static void ocfs2_queue_freeze_worker(struct ocfs2_super *osb)
> +{
> +	spin_lock(&osb->osb_lock);
> +	if (!(osb->osb_flags & OCFS2_OSB_FREEZE_INPROG)) {
> +		osb->osb_flags |= OCFS2_OSB_FREEZE_INPROG;
> +		queue_work(ocfs2_wq, &osb->osb_freeze_work);
> +	}
> +	spin_unlock(&osb->osb_lock);
> +}
> +
>  static int ocfs2_check_freeze_downconvert(struct ocfs2_lock_res *lockres,
>  					  int new_level)
>  {
> -	return 1; //change me
> +	struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres);
> +	struct super_block *sb = osb->sb;
> +
> +	mlog(0, "flags=0x%lx, frozen=%d, level=%d, newlevel=%d\n",
> +	     osb->osb_flags, sb->s_frozen, lockres->l_level, new_level);
> +
> +	if (new_level == LKM_PRMODE) {
> +		/* other node is during mount or is waiting for THAW */
> +		if (sb->s_frozen) {
> +			/* this node didn't thaw yet, try later */
> +			return 0;
> +		} else {
> +			/* this node has unlocked freeze_locky, it's ok to
> +			 * downconvert it.
> +			 */
> +			return 1;
> +		}
> +	}
> +
> +	/* now new_level is NL. other node wants to freeze cluster.
> +	 * the request shouldn't come when this node is still holding freeze_lock
> +	 */
> +	BUG_ON(lockres->l_ex_holders);
> +
> +	/* now this node is holding PR or is caching PR */
> +	if (lockres->l_ro_holders) {
> +		/* this node is during mount or is thawing, try later */
> +		return 0;
> +	}
> +
> +	/* now this node is caching PR, it's Ok to freeze for the request */
> +	if (sb->s_frozen &&
> +	    !(osb->osb_flags & OCFS2_OSB_FREEZE_INPROG)) {
> +		/* ok, this node is frozen for the request */
> +		return 1;
> +	}
> +
> +	/* queue a work to do freeze for the request */
> +	ocfs2_queue_freeze_worker(osb);
> +	return 0;
>  }
>  
>  static void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
> diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h
> index 8687b81..96e0dae 100644
> --- a/fs/ocfs2/dlmglue.h
> +++ b/fs/ocfs2/dlmglue.h
> @@ -168,6 +168,7 @@ void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb);
>  struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void);
>  void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug);
>  
> +void ocfs2_freeze_worker(struct work_struct *work);
>  /* To set the locking protocol on module initialization */
>  void ocfs2_set_locking_protocol(void);
>  #endif	/* DLMGLUE_H */
> diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
> index bf34c49..45c5bfe 100644
> --- a/fs/ocfs2/journal.c
> +++ b/fs/ocfs2/journal.c
> @@ -355,6 +355,7 @@ handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs)
>  	if (ocfs2_is_hard_readonly(osb))
>  		return ERR_PTR(-EROFS);
>  
> +	vfs_check_frozen(osb->sb, SB_FREEZE_TRANS);
>  	BUG_ON(osb->journal->j_state == OCFS2_JOURNAL_FREE);
>  	BUG_ON(max_buffs <= 0);
>  
> diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
> index bf66978..62e1819 100644
> --- a/fs/ocfs2/ocfs2.h
> +++ b/fs/ocfs2/ocfs2.h
> @@ -254,6 +254,7 @@ enum ocfs2_mount_options
>  #define OCFS2_OSB_HARD_RO			0x0002
>  #define OCFS2_OSB_ERROR_FS			0x0004
>  #define OCFS2_OSB_DROP_DENTRY_LOCK_IMMED	0x0008
> +#define OCFS2_OSB_FREEZE_INPROG			0x0010
>  
>  #define OCFS2_DEFAULT_ATIME_QUANTUM		60
>  
> @@ -356,6 +357,8 @@ struct ocfs2_super
>  	struct ocfs2_lock_res osb_rename_lockres;
>  	struct ocfs2_lock_res osb_nfs_sync_lockres;
>  	struct ocfs2_lock_res osb_freeze_lockres;
> +	int frozen_by_master;		/* a flag. 1 if FS is frozen on behalf
> +					 * of another node */
>  	struct ocfs2_dlm_debug *osb_dlm_debug;
>  
>  	struct dentry *osb_debug_root;
> @@ -394,6 +397,8 @@ struct ocfs2_super
>  	unsigned int			*osb_orphan_wipes;
>  	wait_queue_head_t		osb_wipe_event;
>  
> +	/* osb_freeze_work is protected by osb->s_bdev->bd_fsfreeze_mutex */
> +	struct work_struct		osb_freeze_work;
>  	struct ocfs2_orphan_scan	osb_orphan_scan;
>  
>  	/* used to protect metaecc calculation check of xattr. */
> diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
> index 9127760..4493163 100644
> --- a/fs/ocfs2/super.c
> +++ b/fs/ocfs2/super.c
> @@ -135,6 +135,10 @@ static int ocfs2_susp_quotas(struct ocfs2_super *osb, int unsuspend);
>  static int ocfs2_enable_quotas(struct ocfs2_super *osb);
>  static void ocfs2_disable_quotas(struct ocfs2_super *osb);
>  static int ocfs2_freeze_lock_supported(struct ocfs2_super *osb);
> +static int is_kernel_thread(void);
> +static int ocfs2_freeze_fs(struct super_block *sb);
> +static int is_freeze_master(struct ocfs2_super *osb);
> +static int ocfs2_unfreeze_fs(struct super_block *sb);
>  
>  static const struct super_operations ocfs2_sops = {
>  	.statfs		= ocfs2_statfs,
> @@ -149,6 +153,8 @@ static const struct super_operations ocfs2_sops = {
>  	.show_options   = ocfs2_show_options,
>  	.quota_read	= ocfs2_quota_read,
>  	.quota_write	= ocfs2_quota_write,
> +	.freeze_fs	= ocfs2_freeze_fs,
> +	.unfreeze_fs	= ocfs2_unfreeze_fs,
>  };
>  
>  enum {
> @@ -2167,6 +2173,8 @@ static int ocfs2_initialize_super(struct super_block *sb,
>  	INIT_WORK(&osb->dentry_lock_work, ocfs2_drop_dl_inodes);
>  	osb->dentry_lock_list = NULL;
>  
> +	INIT_WORK(&osb->osb_freeze_work, ocfs2_freeze_worker);
> +
>  	/* get some pseudo constants for clustersize bits */
>  	osb->s_clustersize_bits =
>  		le32_to_cpu(di->id2.i_super.s_clustersize_bits);
> @@ -2525,5 +2533,114 @@ void __ocfs2_abort(struct super_block* sb,
>  	ocfs2_handle_error(sb);
>  }
>  
> +static int is_kernel_thread()
> +{
> +	if (current->flags & PF_KTHREAD)
> +		return 1;
> +	return 0;
> +}
>   

static inline
return (current->flags & PF_KTHREAD);

> +
> +/* ocfs2_freeze_fs()/ocfs2_unfreeze_fs() are always called by freeze_bdev()/
> + * thaw_bdev(). bdev->bd_fsfreeze_mutex is used for synchronization. an extra
> + * ocfs2 mutex is not needed.
> + */
> +static int ocfs2_freeze_fs(struct super_block *sb)
> +{
> +	int ret = 0;
> +	struct ocfs2_super *osb = OCFS2_SB(sb);
> +
> +	mlog(0, "flags=0x%lx, frozen=%d\n", osb->osb_flags, osb->sb->s_frozen);
> +
> +	if (!ocfs2_freeze_lock_supported(osb)) {
> +		return -ENOTSUPP;
> +	}
> +
> +	printk(KERN_INFO "ocfs2: Request to freeze block device (%s)\n",
> +	       osb->dev_str);
>   

remove this printk

> +
> +	/* cluster lock is issued only when this is the IOCTL process.(other
> +	 * case ocfs2_freeze_fs() is called in ocfs2_wq thread)
> +	 */
> +
> +	if (!is_kernel_thread()) {
> +		/* this is ioctl thread, issues cluster lock */
> +		ret = ocfs2_freeze_lock(osb, 1);
> +		if (ret) {
> +			mlog_errno(ret);
> +		} else {
> +			printk(KERN_INFO "ocfs2: Block device (%s) frozen by local\n",
> +			       osb->dev_str);
> +		}
> +	} else {
> +		/* this is ocfs2_wq kernel thread. we do freeze on behalf of
> +		 * the requesting node, don't issue cluster lock again.
> +		 */
> +		printk(KERN_INFO "ocfs2: Block device (%s) frozen by remote\n",
> +		       osb->dev_str);
> +	}
>   

Make these KERN_NOTICEs.

> +
> +	return ret;
> +}
> +
> +static int is_freeze_master(struct ocfs2_super *osb)
> +{
> +	BUG_ON(osb->osb_freeze_lockres.l_ex_holders > 1);
> +
> +	if (osb->osb_freeze_lockres.l_ex_holders)
> +		return 1;
> +
> +	return 0;
> +}
> +
> +static int ocfs2_unfreeze_fs(struct super_block *sb)
> +{
> +	struct ocfs2_super *osb = OCFS2_SB(sb);
> +
> +	mlog(0, "flags=0x%lx, frozen=%d\n", osb->osb_flags, osb->sb->s_frozen);
> +
> +	if (!ocfs2_freeze_lock_supported(osb))
> +		return -ENOTSUPP;
>   

This check should move into the if below because then we can
make it a BUG_ON for the remote case.

> +
> +	if (!is_kernel_thread()) {
> +		/* this is the ioctl user thread. */
> +		if (!is_freeze_master(osb)) {
> +			/* THAW ioctl on a node other than the one on with cluster
> +			 * is frozen. don't thaw in the case. returns -EINVAL so
> +			 * that osb->sb->s_bdev->bd_fsfreeze_count can be decreased.
> +			 */
> +
> +			if (!osb->frozen_by_master) {
> +				/* this is from a nested cross cluster thaw
> +				 * case:
> +				 * frozen from another node(node A)
> +				 * frozen from this node(not suppored though)
> +				 * thawed from node A
> +				 * thawed from this node(coming here)
> +				 *
> +				 * thaw this node only.
> +				 */
>   

If we don't support nested freeze, then why add this code.

> +				printk(KERN_INFO "ocfs2: Block device (%s) thawed"
> +				       " by local\n", osb->dev_str);
> +				goto bail;
> +			}
> +
> +			/* now the cluster still frozen by another node,
> +			 * fails this request
> +			 */
> +			printk(KERN_INFO "ocfs2: thawing %s on invalid node\n",
> +			       osb->dev_str);
> +			return -EINVAL;
> +		}
>   

EINVAL is enough. No need for printks.

> +		ocfs2_freeze_unlock(osb, 1);
> +		printk(KERN_INFO "ocfs2: Block device (%s) thawed by local\n", osb->dev_str);
> +	} else {
> +		/* this is ocfs2_wq kernel thread. nothing to do. */
> +		printk(KERN_INFO "ocfs2: Block device (%s) thawed by remote\n", osb->dev_str);
> +	}
> +bail:
> +	return 0;
> +}
> +
> +
>  module_init(ocfs2_init);
>  module_exit(ocfs2_exit);
>   

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [Ocfs2-devel] [PATCH 3/3] ocfs2:freeze-thaw: make it work
  2010-01-16  2:22 ` Sunil Mushran
@ 2010-01-18 13:06   ` Wengang Wang
  2010-01-18 16:17     ` Wengang Wang
  2010-01-20  0:50     ` Sunil Mushran
  0 siblings, 2 replies; 15+ messages in thread
From: Wengang Wang @ 2010-01-18 13:06 UTC (permalink / raw)
  To: ocfs2-devel

Hi Sunil,

On 10-01-15 18:22, Sunil Mushran wrote:
> Apart from this, you have to handle edge conditions. Like umount
> and mount. What if the freeze is issued just before a node is about
> to umount. Or, another node tries to mount the volume while it is
> frozen.

Yes. there is problem when umount the volume when it's frozen.

no problem a node tries mount the volume while it's frozen. it just
waits for the cluster to be thawed. --sure maybe timeout if the
volume is not thawed in time.
>
> Also, recovery. What if a node dies when a node is frozen. Think
> how you will handle that.
>
I don't think there is problem if a node dies when cluster is frozen.
if the dead node is the master node, the cluster is thawed. --this
should be a note/limitation to man page I think.
if it's a non-master node, it's just fine. dead node performs no update
on the volume.

so what's your concern?

>> +	sb = freeze_bdev(osb->sb->s_bdev);
>> +	if (IS_ERR(sb)) {
>> +		/* the error is returned by ocfs2_freeze_fs() */
>> +		err = PTR_ERR(sb);
>> +		if (err == -ENOTSUPP) {
>> +			/* we got freeze request but we don't support it */
>> +			BUG();
>> +		} else {
>> +			/* we don't issue cluster lock in ocfs2_freeze_fs() for
>> +			 * a freeze request(from other node). no other error
>> +			 * should be returned.
>> +			 */
>> +			BUG();
>> +		}
>> +	}
>>   
>
> Why not one BUG. This needs some more thought. See if
> we can gracefully handle these errors.
It's more readable. or alternative way is log the errno before BUG().
maybe my comment in the cases are not clear.

>
> BUG should be reserved to detect code errors. Or, really weird
> errors we don't expect. Like memory corruption. This is a simple
> error. We should try to handle this gracefully.
>
actually, freeze_bdev() it's doesn't return errors except the error
turned by freeze_fs().
if ocfs2_freeze_worker() is called, ocfs2_freeze_fs() returning
-ENOTSUPP is logic bug. and this case, we don't call cluster lock in
ocfs2_freeze_fs(), so no other error is returned. thus it's also logic
bug if other error is returned.
in other words, freeze_bdev() shouldn't return error here. so I think the two
BUG() is no problem.


>>  +static int is_kernel_thread()
>> +{
>> +	if (current->flags & PF_KTHREAD)
>> +		return 1;
>> +	return 0;
>> +}
>>   
>
> static inline
> return (current->flags & PF_KTHREAD);

Ok. though it's not called heavily.
>> +static int ocfs2_freeze_fs(struct super_block *sb)
>> +{
>> +	int ret = 0;
>> +	struct ocfs2_super *osb = OCFS2_SB(sb);
>> +
>> +	mlog(0, "flags=0x%lx, frozen=%d\n", osb->osb_flags, osb->sb->s_frozen);
>> +
>> +	if (!ocfs2_freeze_lock_supported(osb)) {
>> +		return -ENOTSUPP;
>> +	}
>> +
>> +	printk(KERN_INFO "ocfs2: Request to freeze block device (%s)\n",
>> +	       osb->dev_str);
>>   
>
> remove this printk

Ok.
>> +
>> +	/* cluster lock is issued only when this is the IOCTL process.(other
>> +	 * case ocfs2_freeze_fs() is called in ocfs2_wq thread)
>> +	 */
>> +
>> +	if (!is_kernel_thread()) {
>> +		/* this is ioctl thread, issues cluster lock */
>> +		ret = ocfs2_freeze_lock(osb, 1);
>> +		if (ret) {
>> +			mlog_errno(ret);
>> +		} else {
>> +			printk(KERN_INFO "ocfs2: Block device (%s) frozen by local\n",
>> +			       osb->dev_str);
>> +		}
>> +	} else {
>> +		/* this is ocfs2_wq kernel thread. we do freeze on behalf of
>> +		 * the requesting node, don't issue cluster lock again.
>> +		 */
>> +		printk(KERN_INFO "ocfs2: Block device (%s) frozen by remote\n",
>> +		       osb->dev_str);
>> +	}
>>   
>
> Make these KERN_NOTICEs.

Ok.
>> +static int ocfs2_unfreeze_fs(struct super_block *sb)
>> +{
>> +	struct ocfs2_super *osb = OCFS2_SB(sb);
>> +
>> +	mlog(0, "flags=0x%lx, frozen=%d\n", osb->osb_flags, osb->sb->s_frozen);
>> +
>> +	if (!ocfs2_freeze_lock_supported(osb))
>> +		return -ENOTSUPP;
>>   
>
> This check should move into the if below because then we can
> make it a BUG_ON for the remote case.
>
Ok.

>> +
>> +	if (!is_kernel_thread()) {
>> +		/* this is the ioctl user thread. */
>> +		if (!is_freeze_master(osb)) {
>> +			/* THAW ioctl on a node other than the one on with cluster
>> +			 * is frozen. don't thaw in the case. returns -EINVAL so
>> +			 * that osb->sb->s_bdev->bd_fsfreeze_count can be decreased.
>> +			 */
>> +
>> +			if (!osb->frozen_by_master) {
>> +				/* this is from a nested cross cluster thaw
>> +				 * case:
>> +				 * frozen from another node(node A)
>> +				 * frozen from this node(not suppored though)
>> +				 * thawed from node A
>> +				 * thawed from this node(coming here)
>> +				 *
>> +				 * thaw this node only.
>> +				 */
>>   
>
> If we don't support nested freeze, then why add this code.

Though we don't support it, we hope to make the operation succeed.
the big pain is the we have no way to know if a nested freeze is done
againt ocfs2 volume. the second freeze doesn't come to ocfs2, it returns
by vfs(in freeze_bdev()). so we can prevent user to do a nested freeze.

we can't return error. if we do, the thaw fails with volume still frozen
and no further operation can recover the state.

>
>> +				printk(KERN_INFO "ocfs2: Block device (%s) thawed"
>> +				       " by local\n", osb->dev_str);
>> +				goto bail;
>> +			}
>> +
>> +			/* now the cluster still frozen by another node,
>> +			 * fails this request
>> +			 */
>> +			printk(KERN_INFO "ocfs2: thawing %s on invalid node\n",
>> +			       osb->dev_str);
>> +			return -EINVAL;
>> +		}
>>   
>
> EINVAL is enough. No need for printks.
OK.

regards,
wengang.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [Ocfs2-devel] [PATCH 3/3] ocfs2:freeze-thaw: make it work
  2010-01-18 13:06   ` Wengang Wang
@ 2010-01-18 16:17     ` Wengang Wang
  2010-01-20  0:50     ` Sunil Mushran
  1 sibling, 0 replies; 15+ messages in thread
From: Wengang Wang @ 2010-01-18 16:17 UTC (permalink / raw)
  To: ocfs2-devel

On 10-01-18 21:06, Wengang Wang wrote:
> Hi Sunil,
> 
> On 10-01-15 18:22, Sunil Mushran wrote:
> > Apart from this, you have to handle edge conditions. Like umount
> > and mount. What if the freeze is issued just before a node is about
> > to umount. Or, another node tries to mount the volume while it is
> > frozen.
> 
> Yes. there is problem when umount the volume when it's frozen.

the problem is the umount succeeded but ocfs2_cluster_lock()(NL->PR) still hanging
there waiting for thaw.

ocfs2_cluster_lock() is default a non-noqueue version of cluster lock. we can use a
noqueue version of ocfs2_freeze_lock(), say ocfs2_freeze_lock_giveup_on_unmount().
in the later function, it

1) checkes if the volume is being unmounted.
	sb->s_flags & MS_ACTIVE
if yes, return with error(which error number?).

2) calls ocfs2_cluster_lock() with DLM_LKF_NOQUEUE.
   if ocfs2_cluster_lock() returns -EAGAIN, goes to step 3).
   else return the value turned by ocfs2_cluster_lock() --0(lock got) or
other error.

3) sleep 5ms then go to step 1). 

we use ocfs2_freeze_lock_giveup_on_unmount() instead of
ocfs2_freeze_lock() in ocfs2_freeze_worker(). so it can jump out when
unmount comes instead of hanging up the ocfs2_wq worker queue.

no test for the above yet. what's your comment?

regards,
wengang.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [Ocfs2-devel] [PATCH 3/3] ocfs2:freeze-thaw: make it work
  2010-01-18 13:06   ` Wengang Wang
  2010-01-18 16:17     ` Wengang Wang
@ 2010-01-20  0:50     ` Sunil Mushran
  2010-01-20  4:55       ` Wengang Wang
  1 sibling, 1 reply; 15+ messages in thread
From: Sunil Mushran @ 2010-01-20  0:50 UTC (permalink / raw)
  To: ocfs2-devel

Wengang Wang wrote:

> Yes. there is problem when umount the volume when it's frozen.

Not just that. What if two nodes race. One umounts and one freezes.
As umount is a series of steps, you will have to figure out upto
where it is ok to participate in the freeze and where it is better
to wait for the umount to complete.

You will have to take deadlocks into account. e.g., umounting
flushes the local alloc. If you to delay responding to the freeze,
you may deadlock because the node that holds the global bitmap
lock may have been frozen the fs by then.

> no problem a node tries mount the volume while it's frozen. it just
> waits for the cluster to be thawed. --sure maybe timeout if the
> volume is not thawed in time.

Again, think of the race conditions. Hint: You may want to take the
superblock lock before taking the freeze lock.

> I don't think there is problem if a node dies when cluster is frozen.
> if the dead node is the master node, the cluster is thawed. --this
> should be a note/limitation to man page I think.
> if it's a non-master node, it's just fine. dead node performs no update
> on the volume.
>
> so what's your concern?

By master you mean the node that does the freeze, right?
If so, then yes.

But of another node dies, we may have to thaw the node then too because
we will have to replay the journal as part of recovery. Think about this.

>>> +	sb = freeze_bdev(osb->sb->s_bdev);
>>> +	if (IS_ERR(sb)) {
>>> +		/* the error is returned by ocfs2_freeze_fs() */
>>> +		err = PTR_ERR(sb);
>>> +		if (err == -ENOTSUPP) {
>>> +			/* we got freeze request but we don't support it */
>>> +			BUG();
>>> +		} else {
>>> +			/* we don't issue cluster lock in ocfs2_freeze_fs() for
>>> +			 * a freeze request(from other node). no other error
>>> +			 * should be returned.
>>> +			 */
>>> +			BUG();
>>> +		}
>>> +	}
>>>   
>> Why not one BUG. This needs some more thought. See if
>> we can gracefully handle these errors.
> It's more readable. or alternative way is log the errno before BUG().
> maybe my comment in the cases are not clear.

First problem is that the comments are redundant. People reading the
code know what ENOTSUPP means. Comment code that is not obvious to the
reader.

Secondly, why BUG? As in, why not handle this more gracefully. I understand
that this is a worker thread in a remote node that has no way of 
returning an
error.

One solution is to fire off a timer in the node doing the freezing that
does a cancel convert after say 30 secs. As in, if it is unable to get the
EX in that time, abort the freeze.

> Though we don't support it, we hope to make the operation succeed.
> the big pain is the we have no way to know if a nested freeze is done
> againt ocfs2 volume. the second freeze doesn't come to ocfs2, it returns
> by vfs(in freeze_bdev()). so we can prevent user to do a nested freeze.
>
> we can't return error. if we do, the thaw fails with volume still frozen
> and no further operation can recover the state.
>

Taking the superblock lock will prevent this.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [Ocfs2-devel] [PATCH 3/3] ocfs2:freeze-thaw: make it work
  2010-01-20  0:50     ` Sunil Mushran
@ 2010-01-20  4:55       ` Wengang Wang
  2010-01-20 18:22         ` Sunil Mushran
  2010-01-22  4:22         ` Wengang Wang
  0 siblings, 2 replies; 15+ messages in thread
From: Wengang Wang @ 2010-01-20  4:55 UTC (permalink / raw)
  To: ocfs2-devel

Hi Sunil,

On 10-01-19 16:50, Sunil Mushran wrote:
>> Yes. there is problem when umount the volume when it's frozen.
>
> Not just that. What if two nodes race. One umounts and one freezes.
> As umount is a series of steps, you will have to figure out upto
> where it is ok to participate in the freeze and where it is better
> to wait for the umount to complete.
>
> You will have to take deadlocks into account. e.g., umounting
> flushes the local alloc. If you to delay responding to the freeze,
> you may deadlock because the node that holds the global bitmap
> lock may have been frozen the fs by then.

Yes. needs more considerations.
>
>> no problem a node tries mount the volume while it's frozen. it just
>> waits for the cluster to be thawed. --sure maybe timeout if the
>> volume is not thawed in time.
>
> Again, think of the race conditions. Hint: You may want to take the
> superblock lock before taking the freeze lock.
>

seems we need a noqueue version of the ocfs2_freeze_lock(). timeout it
after some retries.

>> I don't think there is problem if a node dies when cluster is frozen.
>> if the dead node is the master node, the cluster is thawed. --this
>> should be a note/limitation to man page I think.
>> if it's a non-master node, it's just fine. dead node performs no update
>> on the volume.
>>
>> so what's your concern?
>
> By master you mean the node that does the freeze, right?
> If so, then yes.

yes.
> But of another node dies, we may have to thaw the node then too because
> we will have to replay the journal as part of recovery. Think about this.

Agree. ocfs2_sync_fs() ensures the finish of journal committing, but not ensures
the finish of checkpointing.
so it needs replay the journal if a node die.
>
>>>> +	sb = freeze_bdev(osb->sb->s_bdev);
>>>> +	if (IS_ERR(sb)) {
>>>> +		/* the error is returned by ocfs2_freeze_fs() */
>>>> +		err = PTR_ERR(sb);
>>>> +		if (err == -ENOTSUPP) {
>>>> +			/* we got freeze request but we don't support it */
>>>> +			BUG();
>>>> +		} else {
>>>> +			/* we don't issue cluster lock in ocfs2_freeze_fs() for
>>>> +			 * a freeze request(from other node). no other error
>>>> +			 * should be returned.
>>>> +			 */
>>>> +			BUG();
>>>> +		}
>>>> +	}
>>>>   
>>> Why not one BUG. This needs some more thought. See if
>>> we can gracefully handle these errors.
>> It's more readable. or alternative way is log the errno before BUG().
>> maybe my comment in the cases are not clear.
>
> First problem is that the comments are redundant. People reading the
> code know what ENOTSUPP means. Comment code that is not obvious to the
> reader.

Ok.
> Secondly, why BUG? As in, why not handle this more gracefully. I understand
> that this is a worker thread in a remote node that has no way of  
> returning an
> error.
>

Ok, I will consider how to do it gracefully.
> One solution is to fire off a timer in the node doing the freezing that
> does a cancel convert after say 30 secs. As in, if it is unable to get the
> EX in that time, abort the freeze.
>

that leads to the freeze operation last at lease 30 secs?

>> Though we don't support it, we hope to make the operation succeed.
>> the big pain is the we have no way to know if a nested freeze is done
>> againt ocfs2 volume. the second freeze doesn't come to ocfs2, it returns
>> by vfs(in freeze_bdev()). so we can prevent user to do a nested freeze.
>>
>> we can't return error. if we do, the thaw fails with volume still frozen
>> and no further operation can recover the state.
>>
>
> Taking the superblock lock will prevent this.
hope your detail for this.

regards,
wengang.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [Ocfs2-devel] [PATCH 3/3] ocfs2:freeze-thaw: make it work
  2010-01-20  4:55       ` Wengang Wang
@ 2010-01-20 18:22         ` Sunil Mushran
  2010-01-21  2:18           ` Wengang Wang
  2010-01-27 18:14           ` Wengang Wang
  2010-01-22  4:22         ` Wengang Wang
  1 sibling, 2 replies; 15+ messages in thread
From: Sunil Mushran @ 2010-01-20 18:22 UTC (permalink / raw)
  To: ocfs2-devel

Wengang Wang wrote:
> seems we need a noqueue version of the ocfs2_freeze_lock(). timeout it
> after some retries.

noqueue will not help. Launch a timer to fire off after 30 secs to do
a cancel convert. If freeze succeeds before that, then cancel the timer.

> that leads to the freeze operation last at lease 30 secs?

No. It will wait for max 30 secs for the freeze operation to succeed.
Once frozen, it will remain as is until thawed.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [Ocfs2-devel] [PATCH 3/3] ocfs2:freeze-thaw: make it work
  2010-01-20 18:22         ` Sunil Mushran
@ 2010-01-21  2:18           ` Wengang Wang
  2010-01-21  2:32             ` Sunil Mushran
  2010-01-27 18:14           ` Wengang Wang
  1 sibling, 1 reply; 15+ messages in thread
From: Wengang Wang @ 2010-01-21  2:18 UTC (permalink / raw)
  To: ocfs2-devel

Hi Sunil,

On 10-01-20 10:22, Sunil Mushran wrote:
> Wengang Wang wrote:
>> seems we need a noqueue version of the ocfs2_freeze_lock(). timeout it
>> after some retries.
>
> noqueue will not help. Launch a timer to fire off after 30 secs to do
> a cancel convert. If freeze succeeds before that, then cancel the timer.
>
>> that leads to the freeze operation last at lease 30 secs?
>
> No. It will wait for max 30 secs for the freeze operation to succeed.
> Once frozen, it will remain as is until thawed.

then you only meant the timer works for canceling ocfs2_freeze_lock()
for an EX. remember ocfs2_freeze_lock() is also called for a PR waiting
for thawing.

regards,
wengang

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [Ocfs2-devel] [PATCH 3/3] ocfs2:freeze-thaw: make it work
  2010-01-21  2:18           ` Wengang Wang
@ 2010-01-21  2:32             ` Sunil Mushran
  0 siblings, 0 replies; 15+ messages in thread
From: Sunil Mushran @ 2010-01-21  2:32 UTC (permalink / raw)
  To: ocfs2-devel

Wengang Wang wrote:
> then you only meant the timer works for canceling ocfs2_freeze_lock()
> for an EX. remember ocfs2_freeze_lock() is also called for a PR waiting
> for thawing.

Yes. Only for EX because that's what the node issuing the freeze
command takes.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [Ocfs2-devel] [PATCH 3/3] ocfs2:freeze-thaw: make it work
  2010-01-20  4:55       ` Wengang Wang
  2010-01-20 18:22         ` Sunil Mushran
@ 2010-01-22  4:22         ` Wengang Wang
  2010-01-22 22:35           ` Sunil Mushran
  1 sibling, 1 reply; 15+ messages in thread
From: Wengang Wang @ 2010-01-22  4:22 UTC (permalink / raw)
  To: ocfs2-devel

Hi Sunil,

> >> Though we don't support it, we hope to make the operation succeed.
> >> the big pain is the we have no way to know if a nested freeze is done
> >> againt ocfs2 volume. the second freeze doesn't come to ocfs2, it returns
> >> by vfs(in freeze_bdev()). so we can prevent user to do a nested freeze.
> >>
> >> we can't return error. if we do, the thaw fails with volume still frozen
> >> and no further operation can recover the state.
> >>
> >
> > Taking the superblock lock will prevent this.

could you give me the detail for that?

regards,
wengang.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [Ocfs2-devel] [PATCH 3/3] ocfs2:freeze-thaw: make it work
  2010-01-22  4:22         ` Wengang Wang
@ 2010-01-22 22:35           ` Sunil Mushran
  0 siblings, 0 replies; 15+ messages in thread
From: Sunil Mushran @ 2010-01-22 22:35 UTC (permalink / raw)
  To: ocfs2-devel

Wengang Wang wrote:
>>>> Though we don't support it, we hope to make the operation succeed.
>>>> the big pain is the we have no way to know if a nested freeze is done
>>>> againt ocfs2 volume. the second freeze doesn't come to ocfs2, it returns
>>>> by vfs(in freeze_bdev()). so we can prevent user to do a nested freeze.
>>>>
>>>> we can't return error. if we do, the thaw fails with volume still frozen
>>>> and no further operation can recover the state.
>>>>
>>> Taking the superblock lock will prevent this.
>
> could you give me the detail for that?

So is your concern racing freeze requests on multiple nodes?

Multiple freeze requests on a single node should work as is because
we only care about the final thaw().

But if there are multiple freeze requests on multiple nodes, both nodes
will race in freeze_fs(sb). The first node to get the sb lock, will attempt
to take the freeze lock. The loser will wait, but it will get the bast 
on the
freeze lock. In it, it will do mutex_trylock(&sb->bdev->bd_fsfreeze_mutex)
and fail.

One solution is for this node to refuse to downconvert the freeze lock. Wait
for the timeout at which time the freezer node will be forced to cancel 
convert.
At that time the loser node will get the freeze lock and everyone will live
happily ever after. ;)

Get my drift.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [Ocfs2-devel] [PATCH 3/3] ocfs2:freeze-thaw: make it work
  2010-01-20 18:22         ` Sunil Mushran
  2010-01-21  2:18           ` Wengang Wang
@ 2010-01-27 18:14           ` Wengang Wang
  2010-01-27 20:09             ` Sunil Mushran
  1 sibling, 1 reply; 15+ messages in thread
From: Wengang Wang @ 2010-01-27 18:14 UTC (permalink / raw)
  To: ocfs2-devel

Sunil,

On 10-01-20 10:22, Sunil Mushran wrote:
> Wengang Wang wrote:
>> seems we need a noqueue version of the ocfs2_freeze_lock(). timeout it
>> after some retries.
>
> noqueue will not help. Launch a timer to fire off after 30 secs to do
> a cancel convert. If freeze succeeds before that, then cancel the timer.

I think the timer is not a very good idea. Canceling an ocfs2 cluster
lock is not complex though it needs lines of code changes. 
By your word, I felt you are meaning that the timer is a separated thing
from ocfs2_cluster_lock(). If so, we have to ensure the ocfs2_cluster_lock()
is really issued before the timer acts. Though here 30s is good that but are
we sure that it must be enough at any case? --I think we aren't.
Making sure cluster lock is issued before the timer acts, I guess we
need more code change and I think that is not worthy --it introduces bugs
and it's only for freeze/thaw, not a common demand.

So to avoid very long time waiting of a mount when cluster is frozen, I
want it make tries of no-queue cluster lock. And so does it when acquiring EX.

regards,
wengang.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [Ocfs2-devel] [PATCH 3/3] ocfs2:freeze-thaw: make it work
  2010-01-27 18:14           ` Wengang Wang
@ 2010-01-27 20:09             ` Sunil Mushran
  2010-01-28 13:00               ` Wengang Wang
  0 siblings, 1 reply; 15+ messages in thread
From: Sunil Mushran @ 2010-01-27 20:09 UTC (permalink / raw)
  To: ocfs2-devel

Wengang Wang wrote:
> I think the timer is not a very good idea. Canceling an ocfs2 cluster
> lock is not complex though it needs lines of code changes. 
> By your word, I felt you are meaning that the timer is a separated thing
> from ocfs2_cluster_lock(). If so, we have to ensure the ocfs2_cluster_lock()
> is really issued before the timer acts. Though here 30s is good that but are
> we sure that it must be enough at any case? --I think we aren't.
> Making sure cluster lock is issued before the timer acts, I guess we
> need more code change and I think that is not worthy --it introduces bugs
> and it's only for freeze/thaw, not a common demand.
>
> So to avoid very long time waiting of a mount when cluster is frozen, I
> want it make tries of no-queue cluster lock. And so does it when acquiring EX.

noqueue will not work. noqueue means that the master cannot send a bast 
to the
holder. However, our notification scheme relies on the holder getting a 
bast.

Maybe you should ignore the timer bit for the time being. That's a 
separate piece
anycase. Implement the other pieces. We'll get to the cancel convert later.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [Ocfs2-devel] [PATCH 3/3] ocfs2:freeze-thaw: make it work
  2010-01-27 20:09             ` Sunil Mushran
@ 2010-01-28 13:00               ` Wengang Wang
  2010-01-28 18:08                 ` Sunil Mushran
  0 siblings, 1 reply; 15+ messages in thread
From: Wengang Wang @ 2010-01-28 13:00 UTC (permalink / raw)
  To: ocfs2-devel

Sunil,

On 10-01-27 12:09, Sunil Mushran wrote:
> Wengang Wang wrote:
>> I think the timer is not a very good idea. Canceling an ocfs2 cluster
>> lock is not complex though it needs lines of code changes. By your 
>> word, I felt you are meaning that the timer is a separated thing
>> from ocfs2_cluster_lock(). If so, we have to ensure the ocfs2_cluster_lock()
>> is really issued before the timer acts. Though here 30s is good that but are
>> we sure that it must be enough at any case? --I think we aren't.
>> Making sure cluster lock is issued before the timer acts, I guess we
>> need more code change and I think that is not worthy --it introduces bugs
>> and it's only for freeze/thaw, not a common demand.
>>
>> So to avoid very long time waiting of a mount when cluster is frozen, I
>> want it make tries of no-queue cluster lock. And so does it when acquiring EX.
>
> noqueue will not work. noqueue means that the master cannot send a bast  
> to the
> holder.

Oh, that's bad.

> However, our notification scheme relies on the holder getting a  
> bast.
>
Yes.

> Maybe you should ignore the timer bit for the time being. That's a  
> separate piece
> anycase. Implement the other pieces. We'll get to the cancel convert later.
cancel convert for what? only the freeze lock or a command interface for
all cluster lock?
I don't know if it's needed for a commond cluster lock. But anyway,
seems the freeze/thaw relies on a cancelable(or a timeout version of) cluster
lock. Otherwise, the lock will wait for thaw for ever. If there is a umount at
the time, ocfs2 cleaning up hangs(at flushing work queue). Yes maybe we have a
way to postpone the umount if we are aquiring the PR lock, but that also
means if the cluster is frozen, umount has to wait until cluster thawed(this
is bad).

So, if we have plan to implement cancelable cluster lock(or a timeout version of
cluster lock). I think we'd better do that before supporting freeze/thaw.

regards,
wengang.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [Ocfs2-devel] [PATCH 3/3] ocfs2:freeze-thaw: make it work
  2010-01-28 13:00               ` Wengang Wang
@ 2010-01-28 18:08                 ` Sunil Mushran
  0 siblings, 0 replies; 15+ messages in thread
From: Sunil Mushran @ 2010-01-28 18:08 UTC (permalink / raw)
  To: ocfs2-devel

Wengang Wang wrote:
> cancel convert for what? only the freeze lock or a command interface for
> all cluster lock?
> I don't know if it's needed for a commond cluster lock. But anyway,
> seems the freeze/thaw relies on a cancelable(or a timeout version of) cluster
> lock. Otherwise, the lock will wait for thaw for ever. If there is a umount at
> the time, ocfs2 cleaning up hangs(at flushing work queue). Yes maybe we have a
> way to postpone the umount if we are aquiring the PR lock, but that also
> means if the cluster is frozen, umount has to wait until cluster thawed(this
> is bad).
>
> So, if we have plan to implement cancelable cluster lock(or a timeout version of
> cluster lock). I think we'd better do that before supporting freeze/thaw.

No. Please ignore the timer bit for now. Implement the rest first.
We'll get to this later.

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2010-01-28 18:08 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-01-09 18:00 [Ocfs2-devel] [PATCH 3/3] ocfs2:freeze-thaw: make it work Wengang Wang
2010-01-16  2:22 ` Sunil Mushran
2010-01-18 13:06   ` Wengang Wang
2010-01-18 16:17     ` Wengang Wang
2010-01-20  0:50     ` Sunil Mushran
2010-01-20  4:55       ` Wengang Wang
2010-01-20 18:22         ` Sunil Mushran
2010-01-21  2:18           ` Wengang Wang
2010-01-21  2:32             ` Sunil Mushran
2010-01-27 18:14           ` Wengang Wang
2010-01-27 20:09             ` Sunil Mushran
2010-01-28 13:00               ` Wengang Wang
2010-01-28 18:08                 ` Sunil Mushran
2010-01-22  4:22         ` Wengang Wang
2010-01-22 22:35           ` Sunil Mushran

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.