linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 3/3] Add timeout feature
@ 2008-06-30 12:24 Takashi Sato
  2008-07-01  8:10 ` Christoph Hellwig
  0 siblings, 1 reply; 71+ messages in thread
From: Takashi Sato @ 2008-06-30 12:24 UTC (permalink / raw)
  To: akpm, viro
  Cc: linux-ext4, xfs, dm-devel, linux-fsdevel, linux-kernel, axboe,
	mtk.manpages

The timeout feature is added to freeze ioctl.  And new ioctl
to reset the timeout period is added.
o Freeze the filesystem
  int ioctl(int fd, int FIFREEZE, long *timeout_sec)
    fd: The file descriptor of the mountpoint
    FIFREEZE: request code for the freeze
    timeout_sec: the timeout period in seconds
             If it's 0 or 1, the timeout isn't set.
             This special case of "1" is implemented to keep
             the compatibility with XFS applications.
    Return value: 0 if the operation succeeds. Otherwise, -1

o Reset the timeout period
  int ioctl(int fd, int FIFREEZE_RESET_TIMEOUT, long *timeout_sec)
    fd:file descriptor of mountpoint
    FIFREEZE_RESET_TIMEOUT: request code for reset of timeout period
    timeout_sec: new timeout period in seconds
    Return value: 0 if the operation succeeds. Otherwise, -1
    Error number: If the filesystem has already been unfrozen,
                  errno is set to EINVAL.

Signed-off-by: Takashi Sato <t-sato@yk.jp.nec.com>
Signed-off-by: Masayuki Hamaguchi <m-hamaguchi@ys.jp.nec.com>
---
diff -uprN -X linux-2.6.26-rc7.org/Documentation/dontdiff linux-2.6.26-rc7-xfs/drivers/md/dm.c linux-2.6.26-rc7-timeout/
drivers/md/dm.c
--- linux-2.6.26-rc7-xfs/drivers/md/dm.c	2008-06-25 12:06:59.000000000 +0900
+++ linux-2.6.26-rc7-timeout/drivers/md/dm.c	2008-06-25 16:28:40.000000000 +0900
@@ -1407,7 +1407,7 @@ static int lock_fs(struct mapped_device 
 
 	WARN_ON(md->frozen_sb);
 
-	md->frozen_sb = freeze_bdev(md->suspended_bdev);
+	md->frozen_sb = freeze_bdev(md->suspended_bdev, 0);
 	if (IS_ERR(md->frozen_sb)) {
 		r = PTR_ERR(md->frozen_sb);
 		md->frozen_sb = NULL;
diff -uprN -X linux-2.6.26-rc7.org/Documentation/dontdiff linux-2.6.26-rc7-xfs/fs/block_dev.c linux-2.6.26-rc7-timeout/f
s/block_dev.c
--- linux-2.6.26-rc7-xfs/fs/block_dev.c	2008-06-25 12:07:19.000000000 +0900
+++ linux-2.6.26-rc7-timeout/fs/block_dev.c	2008-06-25 16:30:49.000000000 +0900
@@ -285,6 +285,8 @@ static void init_once(struct kmem_cache 
 	INIT_LIST_HEAD(&bdev->bd_holder_list);
 #endif
 	inode_init_once(&ei->vfs_inode);
+	/* Setup freeze timeout function. */
+	INIT_DELAYED_WORK(&bdev->bd_freeze_timeout, freeze_timeout);
 }
 
 static inline void __bd_forget(struct inode *inode)
diff -uprN -X linux-2.6.26-rc7.org/Documentation/dontdiff linux-2.6.26-rc7-xfs/fs/buffer.c linux-2.6.26-rc7-timeout/fs/b
uffer.c
--- linux-2.6.26-rc7-xfs/fs/buffer.c	2008-06-30 12:59:53.000000000 +0900
+++ linux-2.6.26-rc7-timeout/fs/buffer.c	2008-06-27 10:59:55.000000000 +0900
@@ -190,14 +190,18 @@ int fsync_bdev(struct block_device *bdev
 
 /**
  * freeze_bdev  --  lock a filesystem and force it into a consistent state
- * @bdev:	blockdevice to lock
+ * @bdev:              blockdevice to lock
+ * @timeout_msec:      timeout period
  *
  * This takes the block device bd_mount_sem to make sure no new mounts
  * happen on bdev until thaw_bdev() is called.
  * If a superblock is found on this device, we take the s_umount semaphore
  * on it to make sure nobody unmounts until the snapshot creation is done.
+ * If timeout_msec is bigger than 0, this registers the delayed work for
+ * timeout of the freeze feature.
  */
-struct super_block *freeze_bdev(struct block_device *bdev)
+struct super_block *freeze_bdev(struct block_device *bdev,
+				unsigned int timeout_msec)
 {
 	struct super_block *sb;
 
@@ -234,6 +238,10 @@ struct super_block *freeze_bdev(struct b
 	}
 
 	sync_blockdev(bdev);
+	/* Setup unfreeze timer. */
+	if (timeout_msec > 0)
+		add_freeze_timeout(bdev, timeout_msec);
+
 	clear_bit(BD_FREEZE_OP, &bdev->bd_state);
 
 	return sb;	/* thaw_bdev releases s->s_umount and bd_mount_sem */
@@ -258,6 +266,9 @@ int thaw_bdev(struct block_device *bdev,
 		return 0;
 	}
 
+	/* Delete unfreeze timer. */
+	del_freeze_timeout(bdev);
+
 	if (sb) {
 		BUG_ON(sb->s_bdev != bdev);
 
diff -uprN -X linux-2.6.26-rc7.org/Documentation/dontdiff linux-2.6.26-rc7-xfs/fs/ioctl.c linux-2.6.26-rc7-timeout/fs/io
ctl.c
--- linux-2.6.26-rc7-xfs/fs/ioctl.c	2008-06-25 12:07:20.000000000 +0900
+++ linux-2.6.26-rc7-timeout/fs/ioctl.c	2008-06-27 08:49:58.000000000 +0900
@@ -145,22 +145,47 @@ static int ioctl_fioasync(unsigned int f
  * ioctl_freeze - Freeze the filesystem.
  *
  * @filp:	target file
+ * @argp:       timeout value(sec)
  *
  * Call freeze_bdev() to freeze the filesystem.
  */
-static int ioctl_freeze(struct file *filp)
+static int ioctl_freeze(struct file *filp, int __user *argp)
 {
+	int timeout_sec;
+	unsigned int timeout_msec;
 	struct super_block *sb = filp->f_path.dentry->d_inode->i_sb;
+	int error;
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	/* If filesystem doesn't support freeze feature, return. */
+	/* If filesystem doesn't support freeze feature, return EOPNOTSUPP. */
 	if (sb->s_op->write_super_lockfs == NULL)
 		return -EOPNOTSUPP;
 
+	/* If a regular file or a directory isn't specified, return EINVAL. */
+	if (sb->s_bdev == NULL)
+		return -EINVAL;
+
+	/* arg(sec) to tick value. */
+	error = get_user(timeout_sec, argp);
+	if (error != 0)
+		return error;
+
+	if (timeout_sec < 0 || timeout_sec > UINT_MAX/1000)
+		return -EINVAL;
+
+	/*
+	 * If 1 is specified as the timeout period it is changed into 0
+	 * to retain compatibility with XFS's xfs_freeze.
+	 */
+	if (timeout_sec == 1)
+		timeout_sec = 0;
+
+	timeout_msec = timeout_sec * 1000;
+
 	/* Freeze */
-	sb = freeze_bdev(sb->s_bdev);
+	sb = freeze_bdev(sb->s_bdev, timeout_msec);
 	if (IS_ERR(sb))
 		return PTR_ERR(sb);
 	return 0;
@@ -180,11 +205,61 @@ static int ioctl_thaw(struct file *filp)
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
+	/* If a regular file or a directory isn't specified, return EINVAL. */
+	if (sb->s_bdev == NULL)
+		return -EINVAL;
+
 	/* Thaw */
 	return thaw_bdev(sb->s_bdev, sb);
 }
 
 /*
+ * ioctl_freeze_reset_timeout - Reset timeout for freeze.
+ *
+ * @filp:       target file
+ * @argp:       timeout value(sec)
+ *
+ * Reset timeout for freeze.
+ */
+static int
+ioctl_freeze_reset_timeout(struct file *filp, int __user *argp)
+{
+	int timeout_sec;
+	unsigned int timeout_msec;
+	struct super_block *sb = filp->f_path.dentry->d_inode->i_sb;
+	int error;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	/* If a regular file or a directory isn't specified, return EINVAL. */
+	if (sb->s_bdev == NULL)
+		return -EINVAL;
+
+	/* arg(sec) to tick value */
+	error = get_user(timeout_sec, argp);
+	if (error)
+		return error;
+
+	if (timeout_sec <= 0 || timeout_sec > UINT_MAX/1000)
+		return -EINVAL;
+
+	timeout_msec = timeout_sec * 1000;
+
+	if (test_and_set_bit(BD_FREEZE_OP, &sb->s_bdev->bd_state))
+		return -EBUSY;
+	if (sb->s_frozen == SB_UNFROZEN) {
+		clear_bit(BD_FREEZE_OP, &sb->s_bdev->bd_state);
+		return -EINVAL;
+	}
+	/* setup unfreeze timer */
+	add_freeze_timeout(sb->s_bdev, timeout_msec);
+	clear_bit(BD_FREEZE_OP, &sb->s_bdev->bd_state);
+
+	return 0;
+}
+
+/*
  * When you add any new common ioctls to the switches above and below
  * please update compat_sys_ioctl() too.
  *
@@ -227,13 +302,17 @@ int do_vfs_ioctl(struct file *filp, unsi
 		break;
 
 	case FIFREEZE:
-		error = ioctl_freeze(filp);
+		error = ioctl_freeze(filp, argp);
 		break;
 
 	case FITHAW:
 		error = ioctl_thaw(filp);
 		break;
 
+	case FIFREEZE_RESET_TIMEOUT:
+		error = ioctl_freeze_reset_timeout(filp, argp);
+		break;
+
 	default:
 		if (S_ISREG(filp->f_path.dentry->d_inode->i_mode))
 			error = file_ioctl(filp, cmd, arg);
diff -uprN -X linux-2.6.26-rc7.org/Documentation/dontdiff linux-2.6.26-rc7-xfs/fs/super.c linux-2.6.26-rc7-timeout/fs/su
per.c
--- linux-2.6.26-rc7-xfs/fs/super.c	2008-06-30 16:41:48.000000000 +0900
+++ linux-2.6.26-rc7-timeout/fs/super.c	2008-06-30 17:30:38.000000000 +0900
@@ -980,3 +980,60 @@ struct vfsmount *kern_mount_data(struct 
 }
 
 EXPORT_SYMBOL_GPL(kern_mount_data);
+
+/*
+ * freeze_timeout - Thaw the filesystem.
+ *
+ * @work:	work queue (delayed_work.work)
+ *
+ * Called by the delayed work when elapsing the timeout period.
+ * Thaw the filesystem.
+ */
+void freeze_timeout(struct work_struct *work)
+{
+	struct block_device *bd = container_of(work,
+			struct block_device, bd_freeze_timeout.work);
+	struct super_block *sb = get_super(bd);
+
+	thaw_bdev(bd, sb);
+
+	if (sb)
+		drop_super(sb);
+}
+EXPORT_SYMBOL_GPL(freeze_timeout);
+
+/*
+ * add_freeze_timeout - Add timeout for freeze.
+ *
+ * @bdev:		block device struct
+ * @timeout_msec:	timeout period
+ *
+ * Add the delayed work for freeze timeout to the delayed work queue.
+ */
+void add_freeze_timeout(struct block_device *bdev, unsigned int timeout_msec)
+{
+	s64 timeout_jiffies = msecs_to_jiffies(timeout_msec);
+
+	/* Set delayed work queue */
+	cancel_delayed_work_sync(&bdev->bd_freeze_timeout);
+	schedule_delayed_work(&bdev->bd_freeze_timeout, timeout_jiffies);
+}
+
+/*
+ * del_freeze_timeout - Delete timeout for freeze.
+ *
+ * @bdev:	block device struct
+ *
+ * Delete the delayed work for freeze timeout from the delayed work queue.
+ */
+void del_freeze_timeout(struct block_device *bdev)
+{
+	/*
+	 * It's possible that the delayed work task (freeze_timeout()) calls
+	 * del_freeze_timeout().  If the delayed work task calls
+	 * cancel_delayed_work_sync((), the deadlock will occur.
+	 * So we need this check (delayed_work_pending()).
+	 */
+	if (delayed_work_pending(&bdev->bd_freeze_timeout))
+		cancel_delayed_work_sync(&bdev->bd_freeze_timeout);
+}
diff -uprN -X linux-2.6.26-rc7.org/Documentation/dontdiff linux-2.6.26-rc7-xfs/fs/xfs/xfs_fsops.c linux-2.6.26-rc7-timeo
ut/fs/xfs/xfs_fsops.c
--- linux-2.6.26-rc7-xfs/fs/xfs/xfs_fsops.c	2008-06-25 12:07:19.000000000 +0900
+++ linux-2.6.26-rc7-timeout/fs/xfs/xfs_fsops.c	2008-06-25 16:30:48.000000000 +0900
@@ -619,7 +619,7 @@ xfs_fs_goingdown(
 {
 	switch (inflags) {
 	case XFS_FSOP_GOING_FLAGS_DEFAULT: {
-		struct super_block *sb = freeze_bdev(mp->m_super->s_bdev);
+		struct super_block *sb = freeze_bdev(mp->m_super->s_bdev, 0);
 
 		if (sb && !IS_ERR(sb)) {
 			xfs_force_shutdown(mp, SHUTDOWN_FORCE_UMOUNT);
diff -uprN -X linux-2.6.26-rc7.org/Documentation/dontdiff linux-2.6.26-rc7-xfs/include/linux/buffer_head.h linux-2.6.26-
rc7-timeout/include/linux/buffer_head.h
--- linux-2.6.26-rc7-xfs/include/linux/buffer_head.h	2008-06-25 12:07:24.000000000 +0900
+++ linux-2.6.26-rc7-timeout/include/linux/buffer_head.h	2008-06-27 11:11:38.000000000 +0900
@@ -170,7 +170,8 @@ int sync_blockdev(struct block_device *b
 void __wait_on_buffer(struct buffer_head *);
 wait_queue_head_t *bh_waitq_head(struct buffer_head *bh);
 int fsync_bdev(struct block_device *);
-struct super_block *freeze_bdev(struct block_device *);
+struct super_block *freeze_bdev(struct block_device *,
+				unsigned int timeout_msec);
 int thaw_bdev(struct block_device *, struct super_block *);
 int fsync_super(struct super_block *);
 int fsync_no_super(struct block_device *);
diff -uprN -X linux-2.6.26-rc7.org/Documentation/dontdiff linux-2.6.26-rc7-xfs/include/linux/fs.h linux-2.6.26-rc7-timeo
ut/include/linux/fs.h
--- linux-2.6.26-rc7-xfs/include/linux/fs.h	2008-06-30 16:42:11.000000000 +0900
+++ linux-2.6.26-rc7-timeout/include/linux/fs.h	2008-06-30 16:43:13.000000000 +0900
@@ -8,6 +8,7 @@
 
 #include <linux/limits.h>
 #include <linux/ioctl.h>
+#include <linux/workqueue.h>
 
 /*
  * It's silly to have NR_OPEN bigger than NR_FILE, but you can change
@@ -225,6 +226,7 @@ extern int dir_notify_enable;
 #define FIGETBSZ   _IO(0x00,2)	/* get the block size used for bmap */
 #define FIFREEZE	_IOWR('X', 119, int)	/* Freeze */
 #define FITHAW		_IOWR('X', 120, int)	/* Thaw */
+#define	FIFREEZE_RESET_TIMEOUT	_IO(0x00, 3)	/* Reset freeze timeout */
 
 #define	FS_IOC_GETFLAGS			_IOR('f', 1, long)
 #define	FS_IOC_SETFLAGS			_IOW('f', 2, long)
@@ -559,6 +561,8 @@ struct block_device {
 
 	/* State of the block device. (Used by freeze feature) */
 	unsigned long		bd_state;
+	/* Delayed work for freeze */
+	struct delayed_work     bd_freeze_timeout;
 };
 
 /*
@@ -2147,5 +2151,10 @@ int proc_nr_files(struct ctl_table *tabl
 
 int get_filesystem_list(char * buf);
 
+extern void add_freeze_timeout(struct block_device *bdev,
+				unsigned int timeout_msec);
+extern void del_freeze_timeout(struct block_device *bdev);
+extern void freeze_timeout(struct work_struct *work);
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_FS_H */

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 3/3] Add timeout feature
  2008-06-30 12:24 [PATCH 3/3] Add timeout feature Takashi Sato
@ 2008-07-01  8:10 ` Christoph Hellwig
  2008-07-01 10:52   ` [dm-devel] " Alasdair G Kergon
  2008-07-07 11:07   ` Pavel Machek
  0 siblings, 2 replies; 71+ messages in thread
From: Christoph Hellwig @ 2008-07-01  8:10 UTC (permalink / raw)
  To: Takashi Sato
  Cc: akpm, viro, linux-ext4, xfs, dm-devel, linux-fsdevel,
	linux-kernel, axboe, mtk.manpages

I still disagree with this whole patch.  There is not reason to let
the freeze request timeout - an auto-unfreezing will only confuse the
hell out of the caller.  The only reason where the current XFS freeze
call can hang and this would be theoretically useful is when the
filesystem is already frozen by someone else, but this should be fixed
by refusing to do the second freeze, as suggested in my comment to patch
1.


^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [dm-devel] Re: [PATCH 3/3] Add timeout feature
  2008-07-01  8:10 ` Christoph Hellwig
@ 2008-07-01 10:52   ` Alasdair G Kergon
  2008-07-03 12:11     ` Takashi Sato
  2008-07-07 11:07   ` Pavel Machek
  1 sibling, 1 reply; 71+ messages in thread
From: Alasdair G Kergon @ 2008-07-01 10:52 UTC (permalink / raw)
  To: Takashi Sato
  Cc: Christoph Hellwig, axboe, mtk.manpages, linux-kernel, xfs,
	dm-devel, viro, linux-fsdevel, akpm, linux-ext4

On Tue, Jul 01, 2008 at 04:10:26AM -0400, Christoph Hellwig wrote:
> I still disagree with this whole patch.  

Same here - if you want a timeout, what stops you from implementing it in a
userspace process?  If your concern is that the process might die without
thawing the filesystem, take a look at the userspace LVM/multipath code for
ideas - lock into memory, disable OOM killer, run from ramdisk etc.
In practice, those techniques seem to be good enough.

> call can hang and this would be theoretically useful is when the
> filesystem is already frozen by someone else, but this should be fixed
> by refusing to do the second freeze, as suggested in my comment to patch
> 1.

Similarly if a device-mapper device is involved, how should the following
sequence behave - A, B or C?

1. dmsetup suspend (freezes)
2. FIFREEZE
3. FITHAW
4. dmsetup resume (thaws)

A:
  1 succeeds, freezes
  2 succeeds, remains frozen
  3 succeeds, remains frozen
  4 succeeds, thaws

B:
  1 succeeds, freezes
  2 fails, remains frozen
  3 shouldn't be called because 2 failed but if it is: succeeds, thaws
  4 succeeds (already thawed, but still does the device-mapper parts)

C:
  1 succeeds, freezes
  2 fails, remains frozen
  3 fails (because device-mapper owns the freeze/thaw), remains frozen 
  4 succeeds, thaws

Alasdair
-- 
agk@redhat.com

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [dm-devel] Re: [PATCH 3/3] Add timeout feature
  2008-07-01 10:52   ` [dm-devel] " Alasdair G Kergon
@ 2008-07-03 12:11     ` Takashi Sato
  2008-07-03 12:47       ` Alasdair G Kergon
  2008-07-03 14:45       ` Eric Sandeen
  0 siblings, 2 replies; 71+ messages in thread
From: Takashi Sato @ 2008-07-03 12:11 UTC (permalink / raw)
  To: Christoph Hellwig, Alasdair G Kergon
  Cc: linux-ext4, Andrew Morton, linux-fsdevel, viro, dm-devel, xfs,
	linux-kernel, mtk.manpages, axboe

Hi Christoph and Alasdair,

> On Tue, Jul 01, 2008 at 04:10:26AM -0400, Christoph Hellwig wrote:
>> I still disagree with this whole patch.
>
> Same here - if you want a timeout, what stops you from implementing it in a
> userspace process?  If your concern is that the process might die without
> thawing the filesystem, take a look at the userspace LVM/multipath code for
> ideas - lock into memory, disable OOM killer, run from ramdisk etc.
> In practice, those techniques seem to be good enough.

If the freezer accesses the frozen filesystem and causes a deadlock,
the above ideas can't solve it.  The timeout is useful to solve such a deadlock.
If you don't need the timeout, you can disable it by specifying "0" as the
timeout period.

> Similarly if a device-mapper device is involved, how should the following
> sequence behave - A, B or C?
>
> 1. dmsetup suspend (freezes)
> 2. FIFREEZE
> 3. FITHAW
> 4. dmsetup resume (thaws)
[...]
> C:
>  1 succeeds, freezes
>  2 fails, remains frozen
>  3 fails (because device-mapper owns the freeze/thaw), remains frozen
>  4 succeeds, thaws

I think C is appropriate and the following change makes it possible.
How do you think?

1. Add the new bit flag(BD_FREEZE_DM) in block_device.bd_state.
   It means that the volume is frozen by the device-mapper.

2. Operate and check this bit flag as followings.

  - Bit operations in the device-mapper's freeze/thaw
    FREEZE:
      dm_suspend():   set BD_FREEZE_DM
        freeze_bdev():set BD_FREEZE_OP

    THAW:
        thaw_bdev():   clear BD_FREEZE_OP
      dm_resume():    clear BD_FREEZE_DM

  -  Checks in FIFREEZE/FITHAW
    FREEZE:
      ioctl_freeze(): Not need to check BD_FREEZE_DM
        freeze_bdev():set BD_FREEZE_OP

    THAW:
      ioctl_thaw():   If BD_FREEZE_DM is set, fail, otherwise, call thaw_bdev()
        thaw_bdev():   clear BD_FREEZE_OP

Cheers, Takashi

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [dm-devel] Re: [PATCH 3/3] Add timeout feature
  2008-07-03 12:11     ` Takashi Sato
@ 2008-07-03 12:47       ` Alasdair G Kergon
  2008-07-03 22:11         ` Dave Chinner
  2008-07-03 14:45       ` Eric Sandeen
  1 sibling, 1 reply; 71+ messages in thread
From: Alasdair G Kergon @ 2008-07-03 12:47 UTC (permalink / raw)
  To: Takashi Sato
  Cc: Christoph Hellwig, linux-ext4, Andrew Morton, linux-fsdevel,
	viro, dm-devel, xfs, linux-kernel, mtk.manpages, axboe

On Thu, Jul 03, 2008 at 09:11:05PM +0900, Takashi Sato wrote:
> If the freezer accesses the frozen filesystem and causes a deadlock,
> the above ideas can't solve it

But you could also say that if the 'freezer' process accesses the frozen
filesystem and deadlocks then that's just a bug and that userspace code
should be fixed and there's no need to introduce the complexity of a
timeout parameter.

> >Similarly if a device-mapper device is involved, how should the following
> >sequence behave - A, B or C?
> >
> >1. dmsetup suspend (freezes)
> >2. FIFREEZE
> >3. FITHAW
> >4. dmsetup resume (thaws)
> [...]
> >C:
> > 1 succeeds, freezes
> > 2 fails, remains frozen
> > 3 fails (because device-mapper owns the freeze/thaw), remains frozen
> > 4 succeeds, thaws
> 
> I think C is appropriate and the following change makes it possible.
> How do you think?
 
The point I'm trying to make here is:
  Under what real-world circumstances might multiple concurrent freezing
  attempts occur, and which of A, B or C (or other variations) would be
  the most appropriate way of handling such situations?

A common example is people running xfs_freeze followed by an lvm command
which also attempts to freeze the filesystem.

I can see a case for B or C, but personally I prefer A:

> > 1 succeeds, freezes
> > 2 succeeds, remains frozen
> > 3 succeeds, remains frozen
> > 4 succeeds, thaws

Alasdair
-- 
agk@redhat.com

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [dm-devel] Re: [PATCH 3/3] Add timeout feature
  2008-07-03 12:11     ` Takashi Sato
  2008-07-03 12:47       ` Alasdair G Kergon
@ 2008-07-03 14:45       ` Eric Sandeen
  1 sibling, 0 replies; 71+ messages in thread
From: Eric Sandeen @ 2008-07-03 14:45 UTC (permalink / raw)
  To: Takashi Sato
  Cc: Christoph Hellwig, Alasdair G Kergon, linux-ext4, Andrew Morton,
	linux-fsdevel, viro, dm-devel, xfs, linux-kernel, mtk.manpages,
	axboe

Takashi Sato wrote:
> Hi Christoph and Alasdair,
> 
>> On Tue, Jul 01, 2008 at 04:10:26AM -0400, Christoph Hellwig wrote:
>>> I still disagree with this whole patch.
>> Same here - if you want a timeout, what stops you from implementing it in a
>> userspace process?  If your concern is that the process might die without
>> thawing the filesystem, take a look at the userspace LVM/multipath code for
>> ideas - lock into memory, disable OOM killer, run from ramdisk etc.
>> In practice, those techniques seem to be good enough.
> 
> If the freezer accesses the frozen filesystem and causes a deadlock,
> the above ideas can't solve it.  The timeout is useful to solve such a deadlock.
> If you don't need the timeout, you can disable it by specifying "0" as the
> timeout period.
> 
>> Similarly if a device-mapper device is involved, how should the following
>> sequence behave - A, B or C?
>>
>> 1. dmsetup suspend (freezes)
>> 2. FIFREEZE
>> 3. FITHAW
>> 4. dmsetup resume (thaws)
> [...]
>> C:
>>  1 succeeds, freezes
>>  2 fails, remains frozen
>>  3 fails (because device-mapper owns the freeze/thaw), remains frozen
>>  4 succeeds, thaws
> 
> I think C is appropriate and the following change makes it possible.
> How do you think?
> 
> 1. Add the new bit flag(BD_FREEZE_DM) in block_device.bd_state.
>    It means that the volume is frozen by the device-mapper.

Will we add a new bit/flag for every possible subysstem that may call
freeze/thaw?  This seems odd to me.

They are different paths to the same underlying mechanism; it should not
matter if it is an existing freeze from DM or via FIFREEZE or via the
xfs ioctl, or any other mechanism should it?  I don't think this generic
interface should use any flag named *_DM, personally.

It seems that nested freeze requests must be handled in a generic way
regardless of what initiates any of the requests?

Refcounting freezes as Alasdair suggests seems to make sense to me, i.e.
freeze, freeze, thaw, thaw leads to:

>> > > 1 (freeze) succeeds, freezes (frozen++)
>> > > 2 (freeze) succeeds, remains frozen (frozen++)
>> > > 3 (thaw) succeeds, remains frozen (frozen--)
>> > > 4 (thaw) succeeds, thaws (frozen--)

that way each caller of freeze is guaranteed that the fs is frozen at
least until they call thaw?

Thanks,
-Eric

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [dm-devel] Re: [PATCH 3/3] Add timeout feature
  2008-07-03 12:47       ` Alasdair G Kergon
@ 2008-07-03 22:11         ` Dave Chinner
  2008-07-04 12:08           ` Takashi Sato
  0 siblings, 1 reply; 71+ messages in thread
From: Dave Chinner @ 2008-07-03 22:11 UTC (permalink / raw)
  To: Takashi Sato, Christoph Hellwig, linux-ext4, Andrew Morton,
	linux-fsdevel, viro, dm-devel, xfs, linux-kernel, mtk.manpages,
	axboe

On Thu, Jul 03, 2008 at 01:47:10PM +0100, Alasdair G Kergon wrote:
> On Thu, Jul 03, 2008 at 09:11:05PM +0900, Takashi Sato wrote:
> > If the freezer accesses the frozen filesystem and causes a deadlock,
> > the above ideas can't solve it
> 
> But you could also say that if the 'freezer' process accesses the frozen
> filesystem and deadlocks then that's just a bug and that userspace code
> should be fixed and there's no need to introduce the complexity of a
> timeout parameter.

Seconded - that was also my primary objection to the timeout code.

> The point I'm trying to make here is:
>   Under what real-world circumstances might multiple concurrent freezing
>   attempts occur, and which of A, B or C (or other variations) would be
>   the most appropriate way of handling such situations?
> 
> A common example is people running xfs_freeze followed by an lvm command
> which also attempts to freeze the filesystem.

Yes, I've seen that reported a number of times.

> I can see a case for B or C, but personally I prefer A:
> 
> > > 1 succeeds, freezes
> > > 2 succeeds, remains frozen
> > > 3 succeeds, remains frozen
> > > 4 succeeds, thaws

Agreed, though I'd modify the definition of that case to be "remain
frozen until the last thaw occurs". That has the advantage that
it's relatively simple to implement with just a counter...

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [dm-devel] Re: [PATCH 3/3] Add timeout feature
  2008-07-03 22:11         ` Dave Chinner
@ 2008-07-04 12:08           ` Takashi Sato
  0 siblings, 0 replies; 71+ messages in thread
From: Takashi Sato @ 2008-07-04 12:08 UTC (permalink / raw)
  To: Eric Sandeen, Alasdair G Kergon, Dave Chinner
  Cc: dm-devel, linux-fsdevel, Andrew Morton, viro, linux-ext4, xfs,
	Christoph Hellwig, linux-kernel, axboe, mtk.manpages

Hi Alasdair, Eric and Dave,

> On Thu, Jul 03, 2008 at 01:47:10PM +0100, Alasdair G Kergon wrote:
>> On Thu, Jul 03, 2008 at 09:11:05PM +0900, Takashi Sato wrote:
>> > If the freezer accesses the frozen filesystem and causes a deadlock,
>> > the above ideas can't solve it
>>
>> But you could also say that if the 'freezer' process accesses the frozen
>> filesystem and deadlocks then that's just a bug and that userspace code
>> should be fixed and there's no need to introduce the complexity of a
>> timeout parameter.
>
> Seconded - that was also my primary objection to the timeout code.

I will consider removing the timeout.

>> The point I'm trying to make here is:
>>   Under what real-world circumstances might multiple concurrent freezing
>>   attempts occur, and which of A, B or C (or other variations) would be
>>   the most appropriate way of handling such situations?
>>
>> A common example is people running xfs_freeze followed by an lvm command
>> which also attempts to freeze the filesystem.
>
> Yes, I've seen that reported a number of times.
>
>> I can see a case for B or C, but personally I prefer A:
>>
>> > > 1 succeeds, freezes
>> > > 2 succeeds, remains frozen
>> > > 3 succeeds, remains frozen
>> > > 4 succeeds, thaws
>
> Agreed, though I'd modify the definition of that case to be "remain
> frozen until the last thaw occurs". That has the advantage that
> it's relatively simple to implement with just a counter...

I agree this idea.
But I have one concern. When device-mapper's freeze follows FIFREEZE,
can device-mapper freeze only device-mapper's part correctly?
And when device-mapper's thaw follows FITHAW,
can device-mapper thaw only device-mapper's part?

Cheers, Takashi 

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 3/3] Add timeout feature
  2008-07-01  8:10 ` Christoph Hellwig
  2008-07-01 10:52   ` [dm-devel] " Alasdair G Kergon
@ 2008-07-07 11:07   ` Pavel Machek
  2008-07-08 23:10     ` Dave Chinner
  1 sibling, 1 reply; 71+ messages in thread
From: Pavel Machek @ 2008-07-07 11:07 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Takashi Sato, akpm, viro, linux-ext4, xfs, dm-devel,
	linux-fsdevel, linux-kernel, axboe, mtk.manpages

Hi!

> I still disagree with this whole patch.  There is not reason to let
> the freeze request timeout - an auto-unfreezing will only confuse the
> hell out of the caller.  The only reason where the current XFS freeze
> call can hang and this would be theoretically useful is when the

What happens when someone dirties so much data that vm swaps out
whatever process that frozen the filesystem?

I though that was why the timeout was there...
							Pavel
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 3/3] Add timeout feature
  2008-07-07 11:07   ` Pavel Machek
@ 2008-07-08 23:10     ` Dave Chinner
  2008-07-08 23:20       ` Pavel Machek
  0 siblings, 1 reply; 71+ messages in thread
From: Dave Chinner @ 2008-07-08 23:10 UTC (permalink / raw)
  To: Pavel Machek
  Cc: Christoph Hellwig, Takashi Sato, akpm, viro, linux-ext4, xfs,
	dm-devel, linux-fsdevel, linux-kernel, axboe, mtk.manpages

On Mon, Jul 07, 2008 at 01:07:31PM +0200, Pavel Machek wrote:
> Hi!
> 
> > I still disagree with this whole patch.  There is not reason to let
> > the freeze request timeout - an auto-unfreezing will only confuse the
> > hell out of the caller.  The only reason where the current XFS freeze
> > call can hang and this would be theoretically useful is when the
> 
> What happens when someone dirties so much data that vm swaps out
> whatever process that frozen the filesystem?

a) you can't dirty a frozen filesystem - by definition a frozen
   filesystem is a *clean filesystem* and *cannot be dirtied*.
b) Swap doesn't write through the filesystem
c) you can still read from a frozen filesystem to page your
   executableѕ in.
d) if dirtying another unfrozen filesystem swaps out your
   application so it can't run, then there's a major VM bug.
   Regardless, until the app completes it is relying on the
   filesystem being frozen, so it better remain frozen....

> I though that was why the timeout was there...

Not that I know of. 

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 3/3] Add timeout feature
  2008-07-08 23:10     ` Dave Chinner
@ 2008-07-08 23:20       ` Pavel Machek
  2008-07-09  0:52         ` Dave Chinner
  0 siblings, 1 reply; 71+ messages in thread
From: Pavel Machek @ 2008-07-08 23:20 UTC (permalink / raw)
  To: Christoph Hellwig, Takashi Sato, akpm, viro, linux-ext4, xfs,
	dm-devel, linux-fsdevel, linux-kernel, axboe, mtk.manpages

On Wed 2008-07-09 09:10:27, Dave Chinner wrote:
> On Mon, Jul 07, 2008 at 01:07:31PM +0200, Pavel Machek wrote:
> > Hi!
> > 
> > > I still disagree with this whole patch.  There is not reason to let
> > > the freeze request timeout - an auto-unfreezing will only confuse the
> > > hell out of the caller.  The only reason where the current XFS freeze
> > > call can hang and this would be theoretically useful is when the
> > 
> > What happens when someone dirties so much data that vm swaps out
> > whatever process that frozen the filesystem?
> 
> a) you can't dirty a frozen filesystem - by definition a frozen
>    filesystem is a *clean filesystem* and *cannot be dirtied*.

Can you stop me?

mmap("/some/huge_file", MAP_SHARED);

then write to memory mapping?

> b) Swap doesn't write through the filesystem
> c) you can still read from a frozen filesystem to page your
>    executable?? in.

atime modification should mean dirty data, right? And dirty data mean
memory pressure, right? 

> d) if dirtying another unfrozen filesystem swaps out your
                 ~~~~~~~
>    application so it can't run, then there's a major VM bug.
>    Regardless, until the app completes it is relying on the
>    filesystem being frozen, so it better remain frozen....

Agreed. With emphasis on "another".

> > I though that was why the timeout was there...
> 
> Not that I know of. 

Ok, lets see how you deal with mmap.
								Pavel

-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 3/3] Add timeout feature
  2008-07-08 23:20       ` Pavel Machek
@ 2008-07-09  0:52         ` Dave Chinner
  2008-07-09  1:09           ` Theodore Tso
  2008-07-09 20:44           ` Pavel Machek
  0 siblings, 2 replies; 71+ messages in thread
From: Dave Chinner @ 2008-07-09  0:52 UTC (permalink / raw)
  To: Pavel Machek
  Cc: Christoph Hellwig, Takashi Sato, akpm, viro, linux-ext4, xfs,
	dm-devel, linux-fsdevel, linux-kernel, axboe, mtk.manpages

On Wed, Jul 09, 2008 at 01:20:31AM +0200, Pavel Machek wrote:
> On Wed 2008-07-09 09:10:27, Dave Chinner wrote:
> > On Mon, Jul 07, 2008 at 01:07:31PM +0200, Pavel Machek wrote:
> > > Hi!
> > > 
> > > > I still disagree with this whole patch.  There is not reason to let
> > > > the freeze request timeout - an auto-unfreezing will only confuse the
> > > > hell out of the caller.  The only reason where the current XFS freeze
> > > > call can hang and this would be theoretically useful is when the
> > > 
> > > What happens when someone dirties so much data that vm swaps out
> > > whatever process that frozen the filesystem?
> > 
> > a) you can't dirty a frozen filesystem - by definition a frozen
> >    filesystem is a *clean filesystem* and *cannot be dirtied*.
> 
> Can you stop me?
> 
> mmap("/some/huge_file", MAP_SHARED);
> 
> then write to memory mapping?

Sure - we can put a hook in ->page_mkwrite() to prevent it.  We
don't right now because nobody in the real world really cares if one
half of a concurrent user data change is in the old snapshot or the
new one......

> > b) Swap doesn't write through the filesystem
> > c) you can still read from a frozen filesystem to page your
> >    executable?? in.
> 
> atime modification should mean dirty data, right?

Metadata, not data. If that's really a problem (and it never has
been for XFS because we always allow in memory changes to atime)
then touch_atime could be easily changed to avoid this...

> And dirty data mean
> memory pressure, right? 

If you walk enough inodes while the filesystem is frozen, it
theoretically could happen.  Typically a filesystem is only for a
few seconds at a time so in the real world this has never, ever been
a problem.

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 3/3] Add timeout feature
  2008-07-09  0:52         ` Dave Chinner
@ 2008-07-09  1:09           ` Theodore Tso
  2008-07-09  4:21             ` Brad Boyer
  2008-07-09  6:13             ` Miklos Szeredi
  2008-07-09 20:44           ` Pavel Machek
  1 sibling, 2 replies; 71+ messages in thread
From: Theodore Tso @ 2008-07-09  1:09 UTC (permalink / raw)
  To: Pavel Machek, Christoph Hellwig, Takashi Sato, akpm, viro,
	linux-ext4, xfs, dm-devel, linux-fsdevel, linux-kernel, axboe,
	mtk.manpages

On Wed, Jul 09, 2008 at 10:52:54AM +1000, Dave Chinner wrote:
> If you walk enough inodes while the filesystem is frozen, it
> theoretically could happen.  Typically a filesystem is only for a
> few seconds at a time so in the real world this has never, ever been
> a problem.

I had argued for the timeout (and so it's mostly my fault that
Takashi-San included it as a feature) mainly because I was (and still
amm) deeply paranoid about the competence of the application
programers who might use this feature.  I could see them screwing up
leaving it locked forever --- perhaps when their program core dumps or
when the user types ^C and they forgot to install a signal handler,
leaving the filesystem frozen forever.

In the meantime, user applications that try to create files on that
filesystem, or write to files already opened when the filesystem are
frozen will accumulate dirty pages in the page cache, until the system
finally falls over.

Think about some of the evil perpetrated by hal and the userspace
suspend-resume scripts (and how much complexity with random XML
fragments getting parsed by various dbus plugins), and tell me with a
straight face that you would trust these modern-day desktop
application writers with this interface.  Because they *will* find
some interesting way to (ab)use it.....

Also, I didn't think the extra code complexity to implements timeouts
was *that* bad --- it seemed fairly small for the functionality.

Best regards,

						- Ted

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 3/3] Add timeout feature
  2008-07-09  1:09           ` Theodore Tso
@ 2008-07-09  4:21             ` Brad Boyer
  2008-07-09  6:13             ` Miklos Szeredi
  1 sibling, 0 replies; 71+ messages in thread
From: Brad Boyer @ 2008-07-09  4:21 UTC (permalink / raw)
  To: Theodore Tso, Pavel Machek, Christoph Hellwig, Takashi Sato,
	akpm, viro, linux-ext4, xfs, dm-devel, linux-fsdevel,
	linux-kernel, axboe, mtk.manpages

On Tue, Jul 08, 2008 at 09:09:22PM -0400, Theodore Tso wrote:
> I had argued for the timeout (and so it's mostly my fault that
> Takashi-San included it as a feature) mainly because I was (and still
> amm) deeply paranoid about the competence of the application
> programers who might use this feature.  I could see them screwing up
> leaving it locked forever --- perhaps when their program core dumps or
> when the user types ^C and they forgot to install a signal handler,
> leaving the filesystem frozen forever.
> 
> In the meantime, user applications that try to create files on that
> filesystem, or write to files already opened when the filesystem are
> frozen will accumulate dirty pages in the page cache, until the system
> finally falls over.
> 
> Think about some of the evil perpetrated by hal and the userspace
> suspend-resume scripts (and how much complexity with random XML
> fragments getting parsed by various dbus plugins), and tell me with a
> straight face that you would trust these modern-day desktop
> application writers with this interface.  Because they *will* find
> some interesting way to (ab)use it.....
> 
> Also, I didn't think the extra code complexity to implements timeouts
> was *that* bad --- it seemed fairly small for the functionality.

Just as an extra point of reference, VxFS supports a freeze/thaw by
ioctl very similar to this including a timeout in seconds. This
means someone else thought it was a useful feature.

http://sfdoccentral.symantec.com/sf/5.0/linux/manpages/vxfs/vxfsio_7.html

	Brad Boyer
	flar@allandria.com


^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 3/3] Add timeout feature
  2008-07-09  1:09           ` Theodore Tso
  2008-07-09  4:21             ` Brad Boyer
@ 2008-07-09  6:13             ` Miklos Szeredi
  2008-07-09  6:16               ` Christoph Hellwig
  1 sibling, 1 reply; 71+ messages in thread
From: Miklos Szeredi @ 2008-07-09  6:13 UTC (permalink / raw)
  To: tytso
  Cc: pavel, hch, t-sato, akpm, viro, linux-ext4, xfs, dm-devel,
	linux-fsdevel, linux-kernel, axboe, mtk.manpages

On Tue, 8 Jul 2008, Theodore Tso wrote:
> On Wed, Jul 09, 2008 at 10:52:54AM +1000, Dave Chinner wrote:
> > If you walk enough inodes while the filesystem is frozen, it
> > theoretically could happen.  Typically a filesystem is only for a
> > few seconds at a time so in the real world this has never, ever been
> > a problem.
> 
> I had argued for the timeout (and so it's mostly my fault that
> Takashi-San included it as a feature) mainly because I was (and still
> amm) deeply paranoid about the competence of the application
> programers who might use this feature.  I could see them screwing up
> leaving it locked forever --- perhaps when their program core dumps or
> when the user types ^C and they forgot to install a signal handler,
> leaving the filesystem frozen forever.

A much better way to deal with that would be to limit the freeze to
the lifetime of the process somehow.  E.g. the freeze ioctl sets a bit
in struct file (I'm sure we have some available) and release on the
file checks this bit and thaws the filesystem.

This would mean that freeze and thaw will have to be done on the same
file descriptor, but this isn't unreasonable to expect, is it?

Miklos

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 3/3] Add timeout feature
  2008-07-09  6:13             ` Miklos Szeredi
@ 2008-07-09  6:16               ` Christoph Hellwig
  2008-07-09  6:22                 ` Miklos Szeredi
  0 siblings, 1 reply; 71+ messages in thread
From: Christoph Hellwig @ 2008-07-09  6:16 UTC (permalink / raw)
  To: Miklos Szeredi
  Cc: tytso, pavel, hch, t-sato, akpm, viro, linux-ext4, xfs, dm-devel,
	linux-fsdevel, linux-kernel, axboe, mtk.manpages

On Wed, Jul 09, 2008 at 08:13:21AM +0200, Miklos Szeredi wrote:
> This would mean that freeze and thaw will have to be done on the same
> file descriptor, but this isn't unreasonable to expect, is it?

It is certainly not the current use case, where you run one command
to freeze the filesystem and another one to unfreeze it.


^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 3/3] Add timeout feature
  2008-07-09  6:16               ` Christoph Hellwig
@ 2008-07-09  6:22                 ` Miklos Szeredi
  2008-07-09  6:41                   ` Arjan van de Ven
  0 siblings, 1 reply; 71+ messages in thread
From: Miklos Szeredi @ 2008-07-09  6:22 UTC (permalink / raw)
  To: hch
  Cc: miklos, tytso, pavel, hch, t-sato, akpm, viro, linux-ext4, xfs,
	dm-devel, linux-fsdevel, linux-kernel, axboe, mtk.manpages

On Wed, 9 Jul 2008, Christoph Hellwig wrote:
> On Wed, Jul 09, 2008 at 08:13:21AM +0200, Miklos Szeredi wrote:
> > This would mean that freeze and thaw will have to be done on the same
> > file descriptor, but this isn't unreasonable to expect, is it?
> 
> It is certainly not the current use case, where you run one command
> to freeze the filesystem and another one to unfreeze it.

So instead of

  freeze_fs mountpoint
  backup-command
  unfreeze_fs mountpoint

the user would have do to

  run_freezed mountpoint backup-command

I find the second one nicer, regardless of any reliability issues.

Miklos

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 3/3] Add timeout feature
  2008-07-09  6:22                 ` Miklos Szeredi
@ 2008-07-09  6:41                   ` Arjan van de Ven
  2008-07-09  6:48                     ` Miklos Szeredi
  2008-07-09  6:59                     ` Dave Chinner
  0 siblings, 2 replies; 71+ messages in thread
From: Arjan van de Ven @ 2008-07-09  6:41 UTC (permalink / raw)
  To: Miklos Szeredi
  Cc: hch, miklos, tytso, pavel, t-sato, akpm, viro, linux-ext4, xfs,
	dm-devel, linux-fsdevel, linux-kernel, axboe, mtk.manpages

On Wed, 09 Jul 2008 08:22:56 +0200
Miklos Szeredi <miklos@szeredi.hu> wrote:

> On Wed, 9 Jul 2008, Christoph Hellwig wrote:
> > On Wed, Jul 09, 2008 at 08:13:21AM +0200, Miklos Szeredi wrote:
> > > This would mean that freeze and thaw will have to be done on the
> > > same file descriptor, but this isn't unreasonable to expect, is
> > > it?
> > 
> > It is certainly not the current use case, where you run one command
> > to freeze the filesystem and another one to unfreeze it.
> 
> So instead of
> 
>   freeze_fs mountpoint
>   backup-command
>   unfreeze_fs mountpoint
> 
> the user would have do to
> 
>   run_freezed mountpoint backup-command
> 
> I find the second one nicer, regardless of any reliability issues.

nah he needs to do

make_snapshot ; backup-command ; unref_snapshot.

freezing isn't the right solution for the backup problem ;)


-- 
If you want to reach me at my work email, use arjan@linux.intel.com
For development, discussion and tips for power savings, 
visit http://www.lesswatts.org

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 3/3] Add timeout feature
  2008-07-09  6:41                   ` Arjan van de Ven
@ 2008-07-09  6:48                     ` Miklos Szeredi
  2008-07-09  6:55                       ` Arjan van de Ven
  2008-07-09  6:59                     ` Dave Chinner
  1 sibling, 1 reply; 71+ messages in thread
From: Miklos Szeredi @ 2008-07-09  6:48 UTC (permalink / raw)
  To: arjan
  Cc: miklos, hch, miklos, tytso, pavel, t-sato, akpm, viro,
	linux-ext4, xfs, dm-devel, linux-fsdevel, linux-kernel, axboe,
	mtk.manpages

On Tue, 8 Jul 2008, Arjan van de Ven wrote:
> nah he needs to do
> 
> make_snapshot ; backup-command ; unref_snapshot.
> 
> freezing isn't the right solution for the backup problem ;)

Confused, what's freezing _is_ for then?  Patch description says:

  Currently, ext3 in mainline Linux doesn't have the freeze feature which
  suspends write requests.  So, we cannot take a backup which keeps
  the filesystem's consistency with the storage device's features
  (snapshot and replication) while it is mounted.
  In many case, a commercial filesystem (e.g. VxFS) has
  the freeze feature and it would be used to get the consistent backup.
  If Linux's standard filesytem ext3 has the freeze feature, we can do it
  without a commercial filesystem.

Miklos

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 3/3] Add timeout feature
  2008-07-09  6:48                     ` Miklos Szeredi
@ 2008-07-09  6:55                       ` Arjan van de Ven
  2008-07-09  7:08                         ` Miklos Szeredi
  2008-07-09  7:13                         ` Dave Chinner
  0 siblings, 2 replies; 71+ messages in thread
From: Arjan van de Ven @ 2008-07-09  6:55 UTC (permalink / raw)
  To: Miklos Szeredi
  Cc: miklos, hch, tytso, pavel, t-sato, akpm, viro, linux-ext4, xfs,
	dm-devel, linux-fsdevel, linux-kernel, axboe, mtk.manpages

On Wed, 09 Jul 2008 08:48:42 +0200
Miklos Szeredi <miklos@szeredi.hu> wrote:

> On Tue, 8 Jul 2008, Arjan van de Ven wrote:
> > nah he needs to do
> > 
> > make_snapshot ; backup-command ; unref_snapshot.
> > 
> > freezing isn't the right solution for the backup problem ;)
> 
> Confused, what's freezing _is_ for then?  Patch description says:
> 
>   Currently, ext3 in mainline Linux doesn't have the freeze feature
> which suspends write requests.  So, we cannot take a backup which
> keeps the filesystem's consistency with the storage device's features
>   (snapshot and replication) while it is mounted.

I tihnk the idea there is

freeze . do the snapshot op . unfreeze . make backup of snapshot

one can argue about the need of doing the first 3 steps via a userland
loop; it sure sounds like one needs to be really careful to not do any
writes to the fs from the app that does snapshots (and that includes
doing any syscalls in the kernel that allocate memory.. just because
that already could cause unrelated data to be written from inside the
app. Not fun.)


-- 
If you want to reach me at my work email, use arjan@linux.intel.com
For development, discussion and tips for power savings, 
visit http://www.lesswatts.org

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 3/3] Add timeout feature
  2008-07-09  6:41                   ` Arjan van de Ven
  2008-07-09  6:48                     ` Miklos Szeredi
@ 2008-07-09  6:59                     ` Dave Chinner
  2008-07-09  7:13                       ` Miklos Szeredi
  1 sibling, 1 reply; 71+ messages in thread
From: Dave Chinner @ 2008-07-09  6:59 UTC (permalink / raw)
  To: Arjan van de Ven
  Cc: Miklos Szeredi, hch, tytso, pavel, t-sato, akpm, viro,
	linux-ext4, xfs, dm-devel, linux-fsdevel, linux-kernel, axboe,
	mtk.manpages

On Tue, Jul 08, 2008 at 11:41:20PM -0700, Arjan van de Ven wrote:
> On Wed, 09 Jul 2008 08:22:56 +0200
> Miklos Szeredi <miklos@szeredi.hu> wrote:
> > > On Wed, Jul 09, 2008 at 08:13:21AM +0200, Miklos Szeredi wrote:
> > > > This would mean that freeze and thaw will have to be done on the
> > > > same file descriptor, but this isn't unreasonable to expect, is
> > > > it?
> > > 
> > > It is certainly not the current use case, where you run one command
> > > to freeze the filesystem and another one to unfreeze it.
> > 
> > So instead of
> > 
> >   freeze_fs mountpoint
> >   backup-command
> >   unfreeze_fs mountpoint
> > 
> > the user would have do to
> > 
> >   run_freezed mountpoint backup-command
> > 
> > I find the second one nicer, regardless of any reliability issues.
> 
> nah he needs to do
> 
> make_snapshot ; backup-command ; unref_snapshot.
> 
> freezing isn't the right solution for the backup problem ;)

You're forgetting that to take a snapshot you generally need to
freeze the filesystem. ;) i.e:

freeze; make_snapshot; unfreeze
backup-command
unref_snapshot

Yes, dm_snapshot does the freeze/snapshot/unfreeze for you, but the
point is there is a freeze in the example you gave.

The argument against Miklos' version is that there may be multiple
commands to execute while the fs is frozen.

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 3/3] Add timeout feature
  2008-07-09  6:55                       ` Arjan van de Ven
@ 2008-07-09  7:08                         ` Miklos Szeredi
  2008-07-09 20:48                           ` Pavel Machek
  2008-07-09  7:13                         ` Dave Chinner
  1 sibling, 1 reply; 71+ messages in thread
From: Miklos Szeredi @ 2008-07-09  7:08 UTC (permalink / raw)
  To: arjan
  Cc: miklos, miklos, hch, tytso, pavel, t-sato, akpm, viro,
	linux-ext4, xfs, dm-devel, linux-fsdevel, linux-kernel, axboe,
	mtk.manpages

On Tue, 8 Jul 2008, Arjan van de Ven wrote:
> I tihnk the idea there is
> 
> freeze . do the snapshot op . unfreeze . make backup of snapshot

Ah, so then my proposal would become

  run_frozen mountpoint do-snapshot
  do-backup
  release-snapshot

and if they are afraid of deadlocks they can just implement the
timeout in userspace:

  run_frozen -t timeout mountpoint do-snapshot

'run_frozen' can be a trivial 30 line app, that can be guaranteed not
to deadlock.

> one can argue about the need of doing the first 3 steps via a userland
> loop; it sure sounds like one needs to be really careful to not do any
> writes to the fs from the app that does snapshots (and that includes
> doing any syscalls in the kernel that allocate memory.. just because
> that already could cause unrelated data to be written from inside the
> app. Not fun.)

Userland always has to be careful when messing with raw devices.  That
alone is not a reason to put the snapshotting facility in kernel IMO.

Miklos

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 3/3] Add timeout feature
  2008-07-09  6:59                     ` Dave Chinner
@ 2008-07-09  7:13                       ` Miklos Szeredi
  2008-07-09  7:33                         ` Dave Chinner
  0 siblings, 1 reply; 71+ messages in thread
From: Miklos Szeredi @ 2008-07-09  7:13 UTC (permalink / raw)
  To: david
  Cc: arjan, miklos, hch, tytso, pavel, t-sato, akpm, viro, linux-ext4,
	xfs, dm-devel, linux-fsdevel, linux-kernel, axboe, mtk.manpages

On Wed, 9 Jul 2008, Dave Chinner wrote:
> The argument against Miklos' version is that there may be multiple
> commands to execute while the fs is frozen.

Which is what a shell is for ;)

Miklos

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 3/3] Add timeout feature
  2008-07-09  6:55                       ` Arjan van de Ven
  2008-07-09  7:08                         ` Miklos Szeredi
@ 2008-07-09  7:13                         ` Dave Chinner
  2008-07-09 11:09                           ` Theodore Tso
  2008-07-09 13:53                           ` Arjan van de Ven
  1 sibling, 2 replies; 71+ messages in thread
From: Dave Chinner @ 2008-07-09  7:13 UTC (permalink / raw)
  To: Arjan van de Ven
  Cc: Miklos Szeredi, hch, tytso, pavel, t-sato, akpm, viro,
	linux-ext4, xfs, dm-devel, linux-fsdevel, linux-kernel, axboe,
	mtk.manpages

On Tue, Jul 08, 2008 at 11:55:02PM -0700, Arjan van de Ven wrote:
> On Wed, 09 Jul 2008 08:48:42 +0200
> Miklos Szeredi <miklos@szeredi.hu> wrote:
> 
> > On Tue, 8 Jul 2008, Arjan van de Ven wrote:
> > > nah he needs to do
> > > 
> > > make_snapshot ; backup-command ; unref_snapshot.
> > > 
> > > freezing isn't the right solution for the backup problem ;)
> > 
> > Confused, what's freezing _is_ for then?  Patch description says:
> > 
> >   Currently, ext3 in mainline Linux doesn't have the freeze feature
> > which suspends write requests.  So, we cannot take a backup which
> > keeps the filesystem's consistency with the storage device's features
> >   (snapshot and replication) while it is mounted.
> 
> I tihnk the idea there is
> 
> freeze . do the snapshot op . unfreeze . make backup of snapshot
> 
> one can argue about the need of doing the first 3 steps via a userland
> loop; it sure sounds like one needs to be really careful to not do any
> writes to the fs from the app that does snapshots (and that includes
> doing any syscalls in the kernel that allocate memory.. just because
> that already could cause unrelated data to be written from inside the
> app. Not fun.)

Bloody hell! Doesn't *anyone* understand that a frozen filesystem is
*clean*? That the process of freezing it ensures all dirty data and
metadata is written out before the freeze completes? And that once
frozen, it can't be dirtied until unfrozen?

That's 3 (or is it 4 - maybe 5 now that I think about it) different
ppl in 24 hours that have made this same broken argument about
being unable to write back dirty data on a frozen filesystem......

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 3/3] Add timeout feature
  2008-07-09  7:13                       ` Miklos Szeredi
@ 2008-07-09  7:33                         ` Dave Chinner
  2008-07-09  8:11                           ` Miklos Szeredi
  0 siblings, 1 reply; 71+ messages in thread
From: Dave Chinner @ 2008-07-09  7:33 UTC (permalink / raw)
  To: Miklos Szeredi
  Cc: arjan, hch, tytso, pavel, t-sato, akpm, viro, linux-ext4, xfs,
	dm-devel, linux-fsdevel, linux-kernel, axboe, mtk.manpages

On Wed, Jul 09, 2008 at 09:13:34AM +0200, Miklos Szeredi wrote:
> On Wed, 9 Jul 2008, Dave Chinner wrote:
> > The argument against Miklos' version is that there may be multiple
> > commands to execute while the fs is frozen.
> 
> Which is what a shell is for ;)

Yeah, weĺl, with your method I ca't tell a user to:

	# xfs_freeze -f /mntpt
	# xfs_db -r -c 'sb 0' -c 'p rootino' /dev/foo
	128
	# xfs_db -r -c 'ino 128' -c 'p core.format' /dev/foo
	core.format = 2 (extents)
	# xfs_db .....
	.....
	# xfs_freeze -u /mntpt

i.e. using the freeze to force all metadata to disk and
prevent it from changing while doing interactive debugging
of some problem.

Yes, a one-shot freeze/unfreeze mechanism works for some
use cases. The point is that it does not work for them all.....

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 3/3] Add timeout feature
  2008-07-09  7:33                         ` Dave Chinner
@ 2008-07-09  8:11                           ` Miklos Szeredi
  2008-07-09 11:15                             ` Dave Chinner
  0 siblings, 1 reply; 71+ messages in thread
From: Miklos Szeredi @ 2008-07-09  8:11 UTC (permalink / raw)
  To: david
  Cc: miklos, arjan, hch, tytso, pavel, t-sato, akpm, viro, linux-ext4,
	xfs, dm-devel, linux-fsdevel, linux-kernel, axboe, mtk.manpages

On Wed, 9 Jul 2008, Dave Chinner wrote:
> On Wed, Jul 09, 2008 at 09:13:34AM +0200, Miklos Szeredi wrote:
> > On Wed, 9 Jul 2008, Dave Chinner wrote:
> > > The argument against Miklos' version is that there may be multiple
> > > commands to execute while the fs is frozen.
> > 
> > Which is what a shell is for ;)
> 
> Yeah, weĺl, with your method I ca't tell a user to:
> 
> 	# xfs_freeze -f /mntpt
> 	# xfs_db -r -c 'sb 0' -c 'p rootino' /dev/foo
> 	128
> 	# xfs_db -r -c 'ino 128' -c 'p core.format' /dev/foo
> 	core.format = 2 (extents)
> 	# xfs_db .....
> 	.....
> 	# xfs_freeze -u /mntpt
> 
> i.e. using the freeze to force all metadata to disk and
> prevent it from changing while doing interactive debugging
> of some problem.

	# run_freeze /mntpt /bin/bash
	# ...
	# ^D

It's the same, no?

Miklos

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 3/3] Add timeout feature
  2008-07-09  7:13                         ` Dave Chinner
@ 2008-07-09 11:09                           ` Theodore Tso
  2008-07-09 11:49                             ` Dave Chinner
  2008-07-09 13:53                           ` Arjan van de Ven
  1 sibling, 1 reply; 71+ messages in thread
From: Theodore Tso @ 2008-07-09 11:09 UTC (permalink / raw)
  To: Arjan van de Ven, Miklos Szeredi, hch, pavel, t-sato, akpm, viro,
	linux-ext4, xfs, dm-devel, linux-fsdevel, linux-kernel, axboe,
	mtk.manpages

> 
> Bloody hell! Doesn't *anyone* understand that a frozen filesystem is
> *clean*? That the process of freezing it ensures all dirty data and
> metadata is written out before the freeze completes? And that once
> frozen, it can't be dirtied until unfrozen?

What do you mean by "it can't be diritied until unfrozen".  What
happens if I have a kernel compilation happening on a filesystem which
I am trying to freeze?   Does

(a) the freeze fail (because the checks equivalent to what happens
when you remount a filesystem read-only happen)?

(b) The process gets a kill -9 when it tries to write a file on the
frozen filesystem?

(c) The process gets a kill -STOP when it tries to write
to a file on the frozen filesystem?  

(d) The process won't fail, but just continue to run, filling the page
cache with dirty pages that can't be written out because the
filesystem is frozen?

If the answer is (b) or (c), and if you don't have a timeout, and the
backup process which has frozen the filesystem tries to write to the
filesystem, hilarity will ensue....

> That's 3 (or is it 4 - maybe 5 now that I think about it) different
> ppl in 24 hours that have made this same broken argument about
> being unable to write back dirty data on a frozen filesystem......

It's not a question of writing back dirty data, it's the fact that you
*can't*, leading to the page cache filling up wirth dirty data,
leading eventually to the OOM killer running --- and since the last
time I tried suggesting that if the process holding the file
descriptor freezing the filesystem, that idea got shot down (I see
it's been suggested again), if that happens, there is going to be no
other recovery path other than the Big Red Button.

      	       	    	       	       	   - Ted

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 3/3] Add timeout feature
  2008-07-09  8:11                           ` Miklos Szeredi
@ 2008-07-09 11:15                             ` Dave Chinner
  0 siblings, 0 replies; 71+ messages in thread
From: Dave Chinner @ 2008-07-09 11:15 UTC (permalink / raw)
  To: Miklos Szeredi
  Cc: arjan, hch, tytso, pavel, t-sato, akpm, viro, linux-ext4, xfs,
	dm-devel, linux-fsdevel, linux-kernel, axboe, mtk.manpages

On Wed, Jul 09, 2008 at 10:11:01AM +0200, Miklos Szeredi wrote:
> On Wed, 9 Jul 2008, Dave Chinner wrote:
> > On Wed, Jul 09, 2008 at 09:13:34AM +0200, Miklos Szeredi wrote:
> > > On Wed, 9 Jul 2008, Dave Chinner wrote:
> > > > The argument against Miklos' version is that there may be multiple
> > > > commands to execute while the fs is frozen.
> > > 
> > > Which is what a shell is for ;)
> > 
> > Yeah, weĺl, with your method I ca't tell a user to:
> > 
> > 	# xfs_freeze -f /mntpt
> > 	# xfs_db -r -c 'sb 0' -c 'p rootino' /dev/foo
> > 	128
> > 	# xfs_db -r -c 'ino 128' -c 'p core.format' /dev/foo
> > 	core.format = 2 (extents)
> > 	# xfs_db .....
> > 	.....
> > 	# xfs_freeze -u /mntpt
> > 
> > i.e. using the freeze to force all metadata to disk and
> > prevent it from changing while doing interactive debugging
> > of some problem.
> 
> 	# run_freeze /mntpt /bin/bash
> 	# ...
> 	# ^D
> 
> It's the same, no?

For that case, yeah. But it's a horrible hack - if that's the
best we can come up with for this freeze/unfreeze then we've
already lost.

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 3/3] Add timeout feature
  2008-07-09 11:09                           ` Theodore Tso
@ 2008-07-09 11:49                             ` Dave Chinner
  2008-07-09 12:24                               ` Theodore Tso
                                                 ` (2 more replies)
  0 siblings, 3 replies; 71+ messages in thread
From: Dave Chinner @ 2008-07-09 11:49 UTC (permalink / raw)
  To: Theodore Tso, Arjan van de Ven, Miklos Szeredi, hch, pavel,
	t-sato, akpm, viro, linux-ext4, xfs, dm-devel, linux-fsdevel,
	linux-kernel, axboe, mtk.manpages

On Wed, Jul 09, 2008 at 07:09:00AM -0400, Theodore Tso wrote:
> > 
> > Bloody hell! Doesn't *anyone* understand that a frozen filesystem is
> > *clean*? That the process of freezing it ensures all dirty data and
> > metadata is written out before the freeze completes? And that once
> > frozen, it can't be dirtied until unfrozen?
> 
> What do you mean by "it can't be diritied until unfrozen".  What
> happens if I have a kernel compilation happening on a filesystem which
> I am trying to freeze?   Does

> (a) the freeze fail (because the checks equivalent to what happens
> when you remount a filesystem read-only happen)?
> 
> (b) The process gets a kill -9 when it tries to write a file on the
> frozen filesystem?
> 
> (c) The process gets a kill -STOP when it tries to write
> to a file on the frozen filesystem?  
> 
> (d) The process won't fail, but just continue to run, filling the page
> cache with dirty pages that can't be written out because the
> filesystem is frozen?

(e) none of the above.  The kernel compilation will appear to pause
until the filesystem is unfrozen. No other visible effect should
occur. It will get blocked in a write or filesystem transaction
because the fs is frozen.

Look at vfs_check_frozen() - any call to that will block if the
filesystem is frozen or being frozen. The generic hook is in
__generic_file_aio_write_nolock() and various other filesystems have
calls in their specific write paths (fuse, ntfs, ocfs2, xfs, xip) to
do this.

For all other modifications, filesystem specific methods of
blocking transactions are used. XFS uses vfs_check_frozen() in
xfs_trans_alloc(), ext3 (and probably ocfs2) do it via
their ->write_super_lockfs method calling journal_lock_updates(),
ext4 via jbd2_lock_updates() and so on....

When the filesystem is unfrozen the journal is unlocked and
anything sleeping on the vfs_check_frozen() waitqueue is
woken.....

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 3/3] Add timeout feature
  2008-07-09 11:49                             ` Dave Chinner
@ 2008-07-09 12:24                               ` Theodore Tso
  2008-07-09 12:59                                 ` Olaf Frączyk
  2008-07-09 13:55                               ` Arjan van de Ven
  2008-07-09 13:58                               ` jim owens
  2 siblings, 1 reply; 71+ messages in thread
From: Theodore Tso @ 2008-07-09 12:24 UTC (permalink / raw)
  To: Arjan van de Ven, Miklos Szeredi, hch, pavel, t-sato, akpm, viro,
	linux-ext4, xfs, dm-devel, linux-fsdevel, linux-kernel, axboe,
	mtk.manpages

On Wed, Jul 09, 2008 at 09:49:58PM +1000, Dave Chinner wrote:
> (e) none of the above.  The kernel compilation will appear to pause
> until the filesystem is unfrozen. No other visible effect should
> occur. It will get blocked in a write or filesystem transaction
> because the fs is frozen.

So if the process which froze the filesystem accidentally tries
writing to a log file (or database file containing the backup
information, or whatever) that happens to be on the filesystem that is
frozen, that process will get blocked and you end up in a deadlock;
did I get that right?

						- Ted

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 3/3] Add timeout feature
  2008-07-09 12:24                               ` Theodore Tso
@ 2008-07-09 12:59                                 ` Olaf Frączyk
  2008-07-09 13:57                                   ` Arjan van de Ven
  0 siblings, 1 reply; 71+ messages in thread
From: Olaf Frączyk @ 2008-07-09 12:59 UTC (permalink / raw)
  To: Theodore Tso
  Cc: Arjan van de Ven, Miklos Szeredi, hch, pavel, t-sato, akpm, viro,
	linux-ext4, xfs, dm-devel, linux-fsdevel, linux-kernel, axboe,
	mtk.manpages

On Wed, 2008-07-09 at 08:24 -0400, Theodore Tso wrote:
> On Wed, Jul 09, 2008 at 09:49:58PM +1000, Dave Chinner wrote:
> > (e) none of the above.  The kernel compilation will appear to pause
> > until the filesystem is unfrozen. No other visible effect should
> > occur. It will get blocked in a write or filesystem transaction
> > because the fs is frozen.
> 
> So if the process which froze the filesystem accidentally tries
> writing to a log file (or database file containing the backup
> information, or whatever) that happens to be on the filesystem that is
> frozen, that process will get blocked and you end up in a deadlock;
> did I get that right?
Where do you see the deadlock?
The process doesn't have a lock on filesystem or something. You can
always unfreeze from another process.

Regards,

Olaf
-- 
Olaf Frączyk <olaf@cbk.poznan.pl>


^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 3/3] Add timeout feature
  2008-07-09  7:13                         ` Dave Chinner
  2008-07-09 11:09                           ` Theodore Tso
@ 2008-07-09 13:53                           ` Arjan van de Ven
  1 sibling, 0 replies; 71+ messages in thread
From: Arjan van de Ven @ 2008-07-09 13:53 UTC (permalink / raw)
  To: Dave Chinner
  Cc: Miklos Szeredi, hch, tytso, pavel, t-sato, akpm, viro,
	linux-ext4, xfs, dm-devel, linux-fsdevel, linux-kernel, axboe,
	mtk.manpages

On Wed, 9 Jul 2008 17:13:46 +1000
Dave Chinner <david@fromorbit.com> wrote:

> > one can argue about the need of doing the first 3 steps via a
> > userland loop; it sure sounds like one needs to be really careful
> > to not do any writes to the fs from the app that does snapshots
> > (and that includes doing any syscalls in the kernel that allocate
> > memory.. just because that already could cause unrelated data to be
> > written from inside the app. Not fun.)
> 
> Bloody hell! Doesn't *anyone* understand that a frozen filesystem is
> *clean*? That the process of freezing it ensures all dirty data and
> metadata is written out before the freeze completes? And that once
> frozen, it can't be dirtied until unfrozen?
> 
> That's 3 (or is it 4 - maybe 5 now that I think about it) different
> ppl in 24 hours that have made this same broken argument about
> being unable to write back dirty data on a frozen filesystem......

unless you also freeze the system as a whole (in a 'refrigerator
suspend' way).. the "clean" part is just about a nanosecond long. After
that stuff gets dirty again (you're doing that sendfile to receive more
packets from the FTP upload etc etc).
Sure you can pause those. But there's a real risk that you end up
pausing the app that you want to unfreeze the fs (via the memory
allocation->direct reclaim path). And no, mlock doesn't help.
Especially with delayed allocation, where data writes will cause
metadata activity, this is not just theory.

-- 
If you want to reach me at my work email, use arjan@linux.intel.com
For development, discussion and tips for power savings, 
visit http://www.lesswatts.org

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 3/3] Add timeout feature
  2008-07-09 11:49                             ` Dave Chinner
  2008-07-09 12:24                               ` Theodore Tso
@ 2008-07-09 13:55                               ` Arjan van de Ven
  2008-07-09 13:58                               ` jim owens
  2 siblings, 0 replies; 71+ messages in thread
From: Arjan van de Ven @ 2008-07-09 13:55 UTC (permalink / raw)
  To: Dave Chinner
  Cc: Theodore Tso, Miklos Szeredi, hch, pavel, t-sato, akpm, viro,
	linux-ext4, xfs, dm-devel, linux-fsdevel, linux-kernel, axboe,
	mtk.manpages

On Wed, 9 Jul 2008 21:49:58 +1000
Dave Chinner <david@fromorbit.com> wrote:

> 
> (e) none of the above.  The kernel compilation will appear to pause
> until the filesystem is unfrozen. No other visible effect should
> occur. It will get blocked in a write or filesystem transaction
> because the fs is frozen.
> 
> Look at vfs_check_frozen() - any call to that will block if the
> filesystem is frozen or being frozen. The generic hook is in
> __generic_file_aio_write_nolock() and various other filesystems have
> calls in their specific write paths (fuse, ntfs, ocfs2, xfs, xip) to
> do this.

yeah and mmap doesn't happen
> 
> For all other modifications, filesystem specific methods of
> blocking transactions are used. XFS uses vfs_check_frozen() in
> xfs_trans_alloc(), ext3 (and probably ocfs2) do it via
> their ->write_super_lockfs method calling journal_lock_updates(),
> ext4 via jbd2_lock_updates() and so on....

and what if it's the process that you need to unfreeze the fs later?
Good luck.

-- 
If you want to reach me at my work email, use arjan@linux.intel.com
For development, discussion and tips for power savings, 
visit http://www.lesswatts.org

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 3/3] Add timeout feature
  2008-07-09 12:59                                 ` Olaf Frączyk
@ 2008-07-09 13:57                                   ` Arjan van de Ven
  0 siblings, 0 replies; 71+ messages in thread
From: Arjan van de Ven @ 2008-07-09 13:57 UTC (permalink / raw)
  To: Olaf Frączyk
  Cc: Theodore Tso, Miklos Szeredi, hch, pavel, t-sato, akpm, viro,
	linux-ext4, xfs, dm-devel, linux-fsdevel, linux-kernel, axboe,
	mtk.manpages

On Wed, 09 Jul 2008 14:59:20 +0200
Olaf Frączyk <olaf@cbk.poznan.pl> wrote:

> On Wed, 2008-07-09 at 08:24 -0400, Theodore Tso wrote:
> > On Wed, Jul 09, 2008 at 09:49:58PM +1000, Dave Chinner wrote:
> > > (e) none of the above.  The kernel compilation will appear to
> > > pause until the filesystem is unfrozen. No other visible effect
> > > should occur. It will get blocked in a write or filesystem
> > > transaction because the fs is frozen.
> > 
> > So if the process which froze the filesystem accidentally tries
> > writing to a log file (or database file containing the backup
> > information, or whatever) that happens to be on the filesystem that
> > is frozen, that process will get blocked and you end up in a
> > deadlock; did I get that right?
> Where do you see the deadlock?
> The process doesn't have a lock on filesystem or something. You can
> always unfreeze from another process.
> 

if it's one of your main filesystems... good luck starting a shell
without writing a single thing to disk... FAIL.

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 3/3] Add timeout feature
  2008-07-09 11:49                             ` Dave Chinner
  2008-07-09 12:24                               ` Theodore Tso
  2008-07-09 13:55                               ` Arjan van de Ven
@ 2008-07-09 13:58                               ` jim owens
  2008-07-09 14:13                                 ` jim owens
                                                   ` (2 more replies)
  2 siblings, 3 replies; 71+ messages in thread
From: jim owens @ 2008-07-09 13:58 UTC (permalink / raw)
  To: linux-fsdevel
  Cc: Dave Chinner, Theodore Tso, Arjan van de Ven, Miklos Szeredi,
	hch, pavel, t-sato, akpm, viro, linux-ext4, xfs, dm-devel,
	linux-kernel, axboe, mtk.manpages

Jumping into the battle...

Advfs implemented freezefs and thawfs in 2001 so here is
the design rational from a commercial unix view.

Note -  We already had built-in snapshots for local disk
consistent backups so some choices might be different on Linux.

NEED - provide way for SAN and hardware raid storage to do
its snapshot/copy function while the system was in-use and
get an image that could mount cleanly.  Without freeze, at
a minimum we usually needed filesystem metadata recovery
to run, worst case is completely unusable snapshits :)

freezefs() is single-level:

    ENOTSUPPOTED - by any other fs
    EOK - done
    EINPROGRESS
    EALREADY

As implemented, freezefs only ensures the metadata is
consistent so the filesystem copy can mount anywhere.

This means ONLY SOME metadata (or no metadata) is flushed and
then all metadata updates are stopped.  User/kernel writes
to already allocated file pages WILL go to a frozen disk.

It also means writers that need storage allocation (not
delaloc or existing) and things that semantically must
force on-disk updates will hang during the freeze.

These semantics meet the need and has the advantage of the
best perfomance.  The design specification for freezefs
provided flags on the api to add more consistency options
later if they were desired:
   - flush all dirty metadata
   - flush all existing dirty file data
   - prevent new dirty file data to disk
but they would all add to the "kill the system" problem.

freezefs has the timeout argument and the default timeout
is a system config parameter:
   > 0 specifies the timeout value
   = 0 uses the default timeout
   < 0 disable timeout

A program could call the freezefs/thawfs api, but the
only current use is the separate commands

# freezefs
#   [do your hardware raid stuff]
# thawfs

This is either operator driven or script/cron driven
because hardware raid providers (especially our own)
are really unfriendly and not helpful.

NUMBER ONE RULE - freeze must not hang/crash the system
because that defeats the customer reason for wanting it.

WHY A TIMEOUT - need a way for operator to abort the
freeze because with a frozen filesystem they may not
even be able to do a login to thaw it!

Users get pissed when the system is hung for a long
time and our experience with SAN devices is that their
response to commands is often unreasonably long.

In addition to the user controllable timeout mechanism,
we internally implement AUTO-THAW in the filesystem
whenever necessary to prevent a kernel hang/crash.

If an AUTO-THAW occurs, we post to the log and an
event manager so the user knows the snapshot is bad.

jim



^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 3/3] Add timeout feature
  2008-07-09 13:58                               ` jim owens
@ 2008-07-09 14:13                                 ` jim owens
  2008-07-13 12:06                                 ` Pavel Machek
  2008-07-14 13:12                                 ` Takashi Sato
  2 siblings, 0 replies; 71+ messages in thread
From: jim owens @ 2008-07-09 14:13 UTC (permalink / raw)
  To: linux-fsdevel
  Cc: Dave Chinner, Theodore Tso, Arjan van de Ven, Miklos Szeredi,
	hch, pavel, t-sato, akpm, viro, linux-ext4, xfs, dm-devel,
	linux-kernel, axboe, mtk.manpages

Oops..

 > WHY A TIMEOUT - need a way for operator to abort the
 > freeze

instead of "abort" I should have said "limit" because
it is really proactive control (so they will not get
called in the middle of the night by pissed users).

I forgot to make it clear that TIMEOUT is the same as AUTO-THAW
in logging errors so the adnin knows they have a bad snapshot and
can do it again.

I also forgot to say that our customers all say they can deal
with retrying a snapshot, but not with unknown bad snapshots
and most definitely not with killing their 24/7 operations!

jim

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 3/3] Add timeout feature
  2008-07-09  0:52         ` Dave Chinner
  2008-07-09  1:09           ` Theodore Tso
@ 2008-07-09 20:44           ` Pavel Machek
  1 sibling, 0 replies; 71+ messages in thread
From: Pavel Machek @ 2008-07-09 20:44 UTC (permalink / raw)
  To: Christoph Hellwig, Takashi Sato, akpm, viro, linux-ext4, xfs,
	dm-devel, linux-fsdevel, linux-kernel, axboe, mtk.manpages

Hi!

> > > > > I still disagree with this whole patch.  There is not reason to let
> > > > > the freeze request timeout - an auto-unfreezing will only confuse the
> > > > > hell out of the caller.  The only reason where the current XFS freeze
> > > > > call can hang and this would be theoretically useful is when the
> > > > 
> > > > What happens when someone dirties so much data that vm swaps out
> > > > whatever process that frozen the filesystem?
> > > 
> > > a) you can't dirty a frozen filesystem - by definition a frozen
> > >    filesystem is a *clean filesystem* and *cannot be dirtied*.
> > 
> > Can you stop me?
> > 
> > mmap("/some/huge_file", MAP_SHARED);
> > 
> > then write to memory mapping?
> 
> Sure - we can put a hook in ->page_mkwrite() to prevent it.  We
> don't right now because nobody in the real world really cares if one
> half of a concurrent user data change is in the old snapshot or the
> new one......
> 
> > > b) Swap doesn't write through the filesystem
> > > c) you can still read from a frozen filesystem to page your
> > >    executable?? in.
> > 
> > atime modification should mean dirty data, right?
> 
> Metadata, not data. If that's really a problem (and it never has
> been for XFS because we always allow in memory changes to atime)
> then touch_atime could be easily changed to avoid this...
> 
> > And dirty data mean
> > memory pressure, right? 
> 
> If you walk enough inodes while the filesystem is frozen, it
> theoretically could happen.  Typically a filesystem is only for a
> few seconds at a time so in the real world this has never, ever been
> a problem.

So we have freezing interface that does not really freeze, and
that can break the system when filesystem is frozen for too long...
:-(.

Maybe you could use process freezer -- cgroup people are adding
userspace interface to that -- to solve those... but that would mean
stopping everyone but thread doing freezing...

								Pavel
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 3/3] Add timeout feature
  2008-07-09  7:08                         ` Miklos Szeredi
@ 2008-07-09 20:48                           ` Pavel Machek
  0 siblings, 0 replies; 71+ messages in thread
From: Pavel Machek @ 2008-07-09 20:48 UTC (permalink / raw)
  To: Miklos Szeredi
  Cc: arjan, hch, tytso, t-sato, akpm, viro, linux-ext4, xfs, dm-devel,
	linux-fsdevel, linux-kernel, axboe, mtk.manpages

On Wed 2008-07-09 09:08:07, Miklos Szeredi wrote:
> On Tue, 8 Jul 2008, Arjan van de Ven wrote:
> > I tihnk the idea there is
> > 
> > freeze . do the snapshot op . unfreeze . make backup of snapshot
> 
> Ah, so then my proposal would become
> 
>   run_frozen mountpoint do-snapshot
>   do-backup
>   release-snapshot
> 
> and if they are afraid of deadlocks they can just implement the
> timeout in userspace:
> 
>   run_frozen -t timeout mountpoint do-snapshot
> 
> 'run_frozen' can be a trivial 30 line app, that can be guaranteed not
> to deadlock.

Userland apps can be swapped out and need kernel memory allocations
during syscalls.

I bet even sleep(30) uses kmalloc internally.

So yes, even trivial applications can deadlock.

-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 3/3] Add timeout feature
  2008-07-09 13:58                               ` jim owens
  2008-07-09 14:13                                 ` jim owens
@ 2008-07-13 12:06                                 ` Pavel Machek
  2008-07-13 17:15                                   ` jim owens
  2008-07-14 13:12                                 ` Takashi Sato
  2 siblings, 1 reply; 71+ messages in thread
From: Pavel Machek @ 2008-07-13 12:06 UTC (permalink / raw)
  To: jim owens
  Cc: linux-fsdevel, Dave Chinner, Theodore Tso, Arjan van de Ven,
	Miklos Szeredi, hch, t-sato, akpm, viro, linux-ext4, xfs,
	dm-devel, linux-kernel, axboe, mtk.manpages

Hi!

> NEED - provide way for SAN and hardware raid storage to do
> its snapshot/copy function while the system was in-use and
> get an image that could mount cleanly.  Without freeze, at
> a minimum we usually needed filesystem metadata recovery
> to run, worst case is completely unusable snapshits :)
>
> freezefs() is single-level:
>
>    ENOTSUPPOTED - by any other fs
>    EOK - done
>    EINPROGRESS
>    EALREADY
>
> As implemented, freezefs only ensures the metadata is
> consistent so the filesystem copy can mount anywhere.
>
> This means ONLY SOME metadata (or no metadata) is flushed and
> then all metadata updates are stopped.  User/kernel writes
> to already allocated file pages WILL go to a frozen disk.

That's the difference here. They do write file data, and thus avoid
mmap()-writes problem.

...and they _still_ provide auto-thaw.
								Pavel
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 3/3] Add timeout feature
  2008-07-13 12:06                                 ` Pavel Machek
@ 2008-07-13 17:15                                   ` jim owens
  2008-07-14  6:36                                     ` Pavel Machek
  0 siblings, 1 reply; 71+ messages in thread
From: jim owens @ 2008-07-13 17:15 UTC (permalink / raw)
  To: Pavel Machek
  Cc: linux-fsdevel, Dave Chinner, Theodore Tso, Arjan van de Ven,
	Miklos Szeredi, hch, t-sato, akpm, viro, linux-ext4, xfs,
	dm-devel, linux-kernel, axboe, mtk.manpages

Pavel Machek wrote:

>>This means ONLY SOME metadata (or no metadata) is flushed and
>>then all metadata updates are stopped.  User/kernel writes
>>to already allocated file pages WILL go to a frozen disk.
> 
> That's the difference here. They do write file data, and thus avoid
> mmap()-writes problem.
> 
> ...and they _still_ provide auto-thaw.
> 								Pavel

One of the hardest things to make people understand is that
stopping file data writes in the filesystem during a freeze
is not just dangerous, it is also __worthless__ unless you
have a complete "user environment freeze" mechanism.

In a real 24/7 environment, the DB and application stack
may be poorly glued together stuff from multiple vendors.

And unless each independent component has a freeze and they
can all be coordinated, the data in the pipeline is never
stable enough to say "if you stop all writes to disk and
take a snapshot, this is the same as an orderly shutdown,
backup, restore, and startup".

If you need to stop applications before a freeze, there
is no reason to implement "stop writing file data to disk".

The only real way to make it work (and what the smart apps
do) is to have application "checkpoint" commands so they
can roll-back to a stable point from the snapshot while
allowing new user activity to proceed.

People who don't have checkpoints or some other way to
make their environment stable with a transitioning snapshot
must stop all user activity before snapshotting and have
maintenance windows defined to do that.

jim

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 3/3] Add timeout feature
  2008-07-13 17:15                                   ` jim owens
@ 2008-07-14  6:36                                     ` Pavel Machek
  2008-07-14 13:17                                       ` jim owens
  0 siblings, 1 reply; 71+ messages in thread
From: Pavel Machek @ 2008-07-14  6:36 UTC (permalink / raw)
  To: jim owens
  Cc: linux-fsdevel, Dave Chinner, Theodore Tso, Arjan van de Ven,
	Miklos Szeredi, hch, t-sato, akpm, viro, linux-ext4, xfs,
	dm-devel, linux-kernel, axboe, mtk.manpages

On Sun 2008-07-13 13:15:43, jim owens wrote:
> Pavel Machek wrote:
>
>>> This means ONLY SOME metadata (or no metadata) is flushed and
>>> then all metadata updates are stopped.  User/kernel writes
>>> to already allocated file pages WILL go to a frozen disk.
>>
>> That's the difference here. They do write file data, and thus avoid
>> mmap()-writes problem.
>>
>> ...and they _still_ provide auto-thaw.
>> 								Pavel
>
> One of the hardest things to make people understand is that
> stopping file data writes in the filesystem during a freeze
> is not just dangerous, it is also __worthless__ unless you
> have a complete "user environment freeze" mechanism.

Eh?

> And unless each independent component has a freeze and they
> can all be coordinated, the data in the pipeline is never
> stable enough to say "if you stop all writes to disk and
> take a snapshot, this is the same as an orderly shutdown,
> backup, restore, and startup".

If you also freeze data writes, at least snapshot is not worse than
_unexpected_ shutdown.

And apps should already be ready for unexpected shutdowns (using fsync
etc).

								Pavel

-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 3/3] Add timeout feature
  2008-07-09 13:58                               ` jim owens
  2008-07-09 14:13                                 ` jim owens
  2008-07-13 12:06                                 ` Pavel Machek
@ 2008-07-14 13:12                                 ` Takashi Sato
  2008-07-14 14:04                                   ` jim owens
  2 siblings, 1 reply; 71+ messages in thread
From: Takashi Sato @ 2008-07-14 13:12 UTC (permalink / raw)
  To: jim owens
  Cc: mtk.manpages, axboe, linux-kernel, dm-devel, xfs, linux-ext4,
	viro, akpm, pavel, linux-fsdevel, hch, Miklos Szeredi,
	Arjan van de Ven, Theodore Tso, Dave Chinner

Hi,

jim owens wrote:
> In addition to the user controllable timeout mechanism,
> we internally implement AUTO-THAW in the filesystem
> whenever necessary to prevent a kernel hang/crash.
>
> If an AUTO-THAW occurs, we post to the log and an
> event manager so the user knows the snapshot is bad.

I am interested in AUTO-THAW.
What is the difference between the timeout and AUTO-THAW?
When the kernel detects a deadlock, does it occur to solve it?

Cheers, Takashi

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 3/3] Add timeout feature
  2008-07-14  6:36                                     ` Pavel Machek
@ 2008-07-14 13:17                                       ` jim owens
  0 siblings, 0 replies; 71+ messages in thread
From: jim owens @ 2008-07-14 13:17 UTC (permalink / raw)
  To: Pavel Machek
  Cc: linux-fsdevel, Dave Chinner, Theodore Tso, Arjan van de Ven,
	Miklos Szeredi, hch, t-sato, akpm, viro, linux-ext4, xfs,
	dm-devel, linux-kernel, axboe, mtk.manpages

Pavel Machek wrote:

> If you also freeze data writes, at least snapshot is not worse than
> _unexpected_ shutdown.

True.  But the key point is also that stopping file writes is
__no better__ than an unexpected shutdown for applications.

Once kernel people accept that it is identical to an unknown
shutdown state, they realize that as you said...

> And apps should already be ready for unexpected shutdowns (using fsync
> etc).

The people writing the code outside the kernel are the ones
responsible for the logic of handling "we don't know what
application writes made it to disk".

Remember that immediately after fsync(), the next write
can make the file inconsistent.  For example, look at this
simple sales processing database type sequence:

    write(sale_last_name_file, "Mackek"
    write(sale_first_name_file, "Pavel")
    fsync(sale_last_name_file)
    fsync(sale_first_name_file)
       ...
    write(sale_last_name_file, "Owens")
    write(sale_first_name_file, "Jim")
    fsync(sale_last_name_file)

FREEZEFS [defined to NOT allow file data writes]

So if we restart from that snapshot, the sales order
thinks the customer is "Pavel Owens"... unless there
is some mechanism such as seqence numbers that tie
the files together.  And if they have such a mechanism
then it doesn't matter if we allow even more writes
such as:

    write(sale_first_name_file, "Ted")

There just is no way inside the kernel to know a
freeze was triggered at a stable application point
such that data before the freeze must be on disk
and data after the freez must not go to disk.

This issue is not unique to freeze, it is also
present with filesystems that have snapshots and
cow/versioning built in.

jim

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 3/3] Add timeout feature
  2008-07-14 13:12                                 ` Takashi Sato
@ 2008-07-14 14:04                                   ` jim owens
  0 siblings, 0 replies; 71+ messages in thread
From: jim owens @ 2008-07-14 14:04 UTC (permalink / raw)
  To: Takashi Sato
  Cc: mtk.manpages, axboe, linux-kernel, dm-devel, xfs, linux-ext4,
	viro, akpm, pavel, linux-fsdevel, hch, Miklos Szeredi,
	Arjan van de Ven, Theodore Tso, Dave Chinner

Takashi Sato wrote:

> What is the difference between the timeout and AUTO-THAW?
> When the kernel detects a deadlock, does it occur to solve it?

TIMEOUT is a user-specified limit for the freeze.  It is
not a deadlock preventer or deadlock breaker.  The reason
it exists is:

    - middle of the night (low but not zero users)
    - cron triggers freeze and hardware snapshot
    - san is overloaded by tape copy traffic so
      hardware will take 2 hours to ack snapshot done
    - user "company president" tries to create a report
      needed for an AM meeting with bankers
    - with so few users, system will just patiently
      wait for hardware to finish
    - after 10 minutes "company president" pages
      admin, admin's boss, and "IT vice president"
      in a real unhappy mood

AUTO-THAW is simply a name for the effect of all deadlock
preventer and deadlock breaker code that the kernel has
in the freeze implementation paths... if that code would
unfreeze the filesystem.  We also implemented deadlock
preventer code that does not thaw the freeze.

None of the AUTO-THAW code is there to stop a stupid
userspace program caller of freeze.  It handles things
like "a system in our cluster is going down so we
must have this filesystem unfrozen or the whole
cluster will crash".   In places where there could be
a kernel deadlock we made it "lock-only-if-non-blocking"
and if we could not wait to retry later, the failure
to lock would trigger an immediate unfreeze.

Deadlock prevention needs code in critical paths in more
than just filesystems.  Sometimes this is as simple as
an "I can't wait on freeze" flag added to a vm-filesystem
interface.

Timers just don't work for keeping the kernel alive
because they don't trigger on resource exhaustion.

jim

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 3/3] Add timeout feature
  2008-10-09 10:12               ` Takashi Sato
@ 2008-10-09 10:18                 ` Christoph Hellwig
  0 siblings, 0 replies; 71+ messages in thread
From: Christoph Hellwig @ 2008-10-09 10:18 UTC (permalink / raw)
  To: Takashi Sato
  Cc: Eric Sandeen, Christoph Hellwig, Ric Wheeler, Andrew Morton,
	Oleg Nesterov, linux-fsdevel, dm-devel, viro, linux-ext4, xfs,
	axboe, mtk.manpages, linux-kernel

On Thu, Oct 09, 2008 at 07:12:17PM +0900, Takashi Sato wrote:
> I think we need the timeout for the case someone dirties so much data
> with mmap, hence the freeze process is swapped out and cannot unfreeze.

That is not supposed to happen.  That's why write blocks early on a
frozen filesystem (the shared mmap write path is currently missing the
check, but that's a rather small patch)


^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 3/3] Add timeout feature
  2008-09-29 14:45             ` Eric Sandeen
  2008-09-29 22:08               ` jim owens
  2008-10-05 10:00               ` Pavel Machek
@ 2008-10-09 10:12               ` Takashi Sato
  2008-10-09 10:18                 ` Christoph Hellwig
  2 siblings, 1 reply; 71+ messages in thread
From: Takashi Sato @ 2008-10-09 10:12 UTC (permalink / raw)
  To: Eric Sandeen, Christoph Hellwig
  Cc: Ric Wheeler, Andrew Morton, Oleg Nesterov, linux-fsdevel,
	dm-devel, viro, linux-ext4, xfs, axboe, mtk.manpages,
	linux-kernel

Hi,

Eric Sandeen wrote:
> Christoph Hellwig wrote:
>> On Mon, Sep 29, 2008 at 09:36:04AM -0500, Eric Sandeen wrote:
>>> Christoph Hellwig wrote:
>>>> On Fri, Sep 26, 2008 at 05:52:35PM +0900, Takashi Sato wrote:
>>>>> I think that your concern is that the freezer cannot recognize the occurrence
>>>>> of a timeout and it continues the backup process and the backup data is
>>>>> corrupted finally.
>>>> What timeout should happen?  the freeze ioctl must not return until the
>>>> filesystem is a clean state and all writes are blocked.
>>> The suggestion was that *UN*freeze would return ETIMEDOUT if the
>>> filesystem had already unfrozen itself, I think.  That way you know that
>>> the snapshot you just took is worthless, at least.
>>
>> But why would the filesystem every unfreeze itself?  That defeats the
>> whole point of freezing it.
>
> I agree.  Was just trying to clarify the above point.
>
> But there have been what, 12 submissions now, with the unfreeze timeout
> in place so it's a persistent theme ;)
>
> Perhaps a demonstration of just how easy (or not easy) it is to deadlock
> a filesystem by freezing the root might be in order, at least.
>
> And even if it is relatively easy, I still maintain that it is the
> administrator's role to not inflict damage on the machine being
> administered.  There are a lot of potentially dangerous tools at root's
> disposal; why this particular one needs a nanny I'm still not quite sure.

I think we need the timeout for the case someone dirties so much data
with mmap, hence the freeze process is swapped out and cannot unfreeze.

Cheers, Takashi

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 3/3] Add timeout feature
  2008-09-29 14:45             ` Eric Sandeen
  2008-09-29 22:08               ` jim owens
@ 2008-10-05 10:00               ` Pavel Machek
  2008-10-09 10:12               ` Takashi Sato
  2 siblings, 0 replies; 71+ messages in thread
From: Pavel Machek @ 2008-10-05 10:00 UTC (permalink / raw)
  To: Eric Sandeen
  Cc: Christoph Hellwig, Takashi Sato, Ric Wheeler, Andrew Morton,
	Oleg Nesterov, linux-fsdevel, dm-devel, viro, linux-ext4, xfs,
	axboe, mtk.manpages, linux-kernel

On Mon 2008-09-29 09:45:31, Eric Sandeen wrote:
> Christoph Hellwig wrote:
> > On Mon, Sep 29, 2008 at 09:36:04AM -0500, Eric Sandeen wrote:
> >> Christoph Hellwig wrote:
> >>> On Fri, Sep 26, 2008 at 05:52:35PM +0900, Takashi Sato wrote:
> >>>> I think that your concern is that the freezer cannot recognize the occurrence
> >>>> of a timeout and it continues the backup process and the backup data is
> >>>> corrupted finally.
> >>> What timeout should happen?  the freeze ioctl must not return until the
> >>> filesystem is a clean state and all writes are blocked.
> >> The suggestion was that *UN*freeze would return ETIMEDOUT if the
> >> filesystem had already unfrozen itself, I think.  That way you know that
> >> the snapshot you just took is worthless, at least.
> > 
> > But why would the filesystem every unfreeze itself?  That defeats the
> > whole point of freezing it.
> 
> I agree.  Was just trying to clarify the above point.
> 
> But there have been what, 12 submissions now, with the unfreeze timeout
> in place so it's a persistent theme ;)
> 
> Perhaps a demonstration of just how easy (or not easy) it is to deadlock
> a filesystem by freezing the root might be in order, at least.
> 
> And even if it is relatively easy, I still maintain that it is the
> administrator's role to not inflict damage on the machine being
> administered.  There are a lot of potentially dangerous tools at root's
> disposal; why this particular one needs a nanny I'm still not quite sure.

Can you docuument what administrator must not do for freezing to be
safe?

What if so much dirty data accumulates on frozen filesystem that
there's not enough memory for the unfreeze tool?

									Pavel
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 3/3] Add timeout feature
  2008-09-29 14:45             ` Eric Sandeen
@ 2008-09-29 22:08               ` jim owens
  2008-10-05 10:00               ` Pavel Machek
  2008-10-09 10:12               ` Takashi Sato
  2 siblings, 0 replies; 71+ messages in thread
From: jim owens @ 2008-09-29 22:08 UTC (permalink / raw)
  To: Eric Sandeen
  Cc: Christoph Hellwig, Takashi Sato, Ric Wheeler, Andrew Morton,
	Oleg Nesterov, linux-fsdevel, dm-devel, viro, linux-ext4, xfs,
	axboe, mtk.manpages, linux-kernel

Eric Sandeen wrote:
 > Christoph Hellwig wrote:
 >> But why would the filesystem every unfreeze itself?  That defeats the
 >> whole point of freezing it.
 >
 > I agree.  Was just trying to clarify the above point.
 >
 > But there have been what, 12 submissions now, with the unfreeze timeout
 > in place so it's a persistent theme ;)
 >
 > Perhaps a demonstration of just how easy (or not easy) it is to deadlock
 > a filesystem by freezing the root might be in order, at least.
 >
 > And even if it is relatively easy, I still maintain that it is the
 > administrator's role to not inflict damage on the machine being
 > administered.  There are a lot of potentially dangerous tools at root's
 > disposal; why this particular one needs a nanny I'm still not quite sure.

Since this patch hit fsdev, there have been an equal number
of supporters and opponents of the timeout.

I'm not opposed to the timeout on the API, but I don't think
it is needed if we have a system configurable timeout (default
is no timeout) that can be changed by an admin.

My experience is that a timeout is not needed protect against
a stupid admin or against software bugs.

The justification for a timeout as far as I am concerned
is so the admin can log in and reset hung hardware.  If we
think there is no chance of forcing the external device that
went to sleep to respond so the system can continue to be used,
then I don't think a timeout has any valid use.

My timeout desire is based on some past SAN behavior and
I'm OK if people argue those devices should just be fixed.
But we argued the same thing and were ignored because bad
device behavior did not stop people from buying them.

jim

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 3/3] Add timeout feature
  2008-09-29 14:37           ` Christoph Hellwig
@ 2008-09-29 14:45             ` Eric Sandeen
  2008-09-29 22:08               ` jim owens
                                 ` (2 more replies)
  0 siblings, 3 replies; 71+ messages in thread
From: Eric Sandeen @ 2008-09-29 14:45 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Takashi Sato, Ric Wheeler, Andrew Morton, Oleg Nesterov,
	linux-fsdevel, dm-devel, viro, linux-ext4, xfs, axboe,
	mtk.manpages, linux-kernel

Christoph Hellwig wrote:
> On Mon, Sep 29, 2008 at 09:36:04AM -0500, Eric Sandeen wrote:
>> Christoph Hellwig wrote:
>>> On Fri, Sep 26, 2008 at 05:52:35PM +0900, Takashi Sato wrote:
>>>> I think that your concern is that the freezer cannot recognize the occurrence
>>>> of a timeout and it continues the backup process and the backup data is
>>>> corrupted finally.
>>> What timeout should happen?  the freeze ioctl must not return until the
>>> filesystem is a clean state and all writes are blocked.
>> The suggestion was that *UN*freeze would return ETIMEDOUT if the
>> filesystem had already unfrozen itself, I think.  That way you know that
>> the snapshot you just took is worthless, at least.
> 
> But why would the filesystem every unfreeze itself?  That defeats the
> whole point of freezing it.

I agree.  Was just trying to clarify the above point.

But there have been what, 12 submissions now, with the unfreeze timeout
in place so it's a persistent theme ;)

Perhaps a demonstration of just how easy (or not easy) it is to deadlock
a filesystem by freezing the root might be in order, at least.

And even if it is relatively easy, I still maintain that it is the
administrator's role to not inflict damage on the machine being
administered.  There are a lot of potentially dangerous tools at root's
disposal; why this particular one needs a nanny I'm still not quite sure.

-Eric

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 3/3] Add timeout feature
  2008-09-29 14:36         ` Eric Sandeen
@ 2008-09-29 14:37           ` Christoph Hellwig
  2008-09-29 14:45             ` Eric Sandeen
  0 siblings, 1 reply; 71+ messages in thread
From: Christoph Hellwig @ 2008-09-29 14:37 UTC (permalink / raw)
  To: Eric Sandeen
  Cc: Christoph Hellwig, Takashi Sato, Ric Wheeler, Andrew Morton,
	Oleg Nesterov, linux-fsdevel, dm-devel, viro, linux-ext4, xfs,
	axboe, mtk.manpages, linux-kernel

On Mon, Sep 29, 2008 at 09:36:04AM -0500, Eric Sandeen wrote:
> Christoph Hellwig wrote:
> > On Fri, Sep 26, 2008 at 05:52:35PM +0900, Takashi Sato wrote:
> >> I think that your concern is that the freezer cannot recognize the occurrence
> >> of a timeout and it continues the backup process and the backup data is
> >> corrupted finally.
> > 
> > What timeout should happen?  the freeze ioctl must not return until the
> > filesystem is a clean state and all writes are blocked.
> 
> The suggestion was that *UN*freeze would return ETIMEDOUT if the
> filesystem had already unfrozen itself, I think.  That way you know that
> the snapshot you just took is worthless, at least.

But why would the filesystem every unfreeze itself?  That defeats the
whole point of freezing it.


^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 3/3] Add timeout feature
  2008-09-29 14:13       ` Christoph Hellwig
@ 2008-09-29 14:36         ` Eric Sandeen
  2008-09-29 14:37           ` Christoph Hellwig
  0 siblings, 1 reply; 71+ messages in thread
From: Eric Sandeen @ 2008-09-29 14:36 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Takashi Sato, Ric Wheeler, Andrew Morton, Oleg Nesterov,
	linux-fsdevel, dm-devel, viro, linux-ext4, xfs, axboe,
	mtk.manpages, linux-kernel

Christoph Hellwig wrote:
> On Fri, Sep 26, 2008 at 05:52:35PM +0900, Takashi Sato wrote:
>> I think that your concern is that the freezer cannot recognize the occurrence
>> of a timeout and it continues the backup process and the backup data is
>> corrupted finally.
> 
> What timeout should happen?  the freeze ioctl must not return until the
> filesystem is a clean state and all writes are blocked.

The suggestion was that *UN*freeze would return ETIMEDOUT if the
filesystem had already unfrozen itself, I think.  That way you know that
the snapshot you just took is worthless, at least.

I'm still not really sold on the timeout, but I think the above was the
intent.

-Eric

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 3/3] Add timeout feature
  2008-09-26  8:52     ` Takashi Sato
  2008-09-26 10:58       ` Ric Wheeler
  2008-09-26 12:35       ` Valdis.Kletnieks
@ 2008-09-29 14:13       ` Christoph Hellwig
  2008-09-29 14:36         ` Eric Sandeen
  2 siblings, 1 reply; 71+ messages in thread
From: Christoph Hellwig @ 2008-09-29 14:13 UTC (permalink / raw)
  To: Takashi Sato
  Cc: Ric Wheeler, Christoph Hellwig, Andrew Morton, Oleg Nesterov,
	linux-fsdevel, dm-devel, viro, linux-ext4, xfs, axboe,
	mtk.manpages, linux-kernel

On Fri, Sep 26, 2008 at 05:52:35PM +0900, Takashi Sato wrote:
> I think that your concern is that the freezer cannot recognize the occurrence
> of a timeout and it continues the backup process and the backup data is
> corrupted finally.

What timeout should happen?  the freeze ioctl must not return until the
filesystem is a clean state and all writes are blocked.


^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 3/3] Add timeout feature
  2008-09-26 10:58       ` Ric Wheeler
@ 2008-09-29 11:11         ` Takashi Sato
  0 siblings, 0 replies; 71+ messages in thread
From: Takashi Sato @ 2008-09-29 11:11 UTC (permalink / raw)
  To: Ric Wheeler, Christoph Hellwig
  Cc: Andrew Morton, Oleg Nesterov, linux-fsdevel, dm-devel, viro,
	linux-ext4, xfs, axboe, mtk.manpages, linux-kernel

Hi Ric and Christoph,

Ric Wheeler wrote:
>>>> And as with all previous posting I still fundamentally disagree about
>>>> the need of this functionality.  We don't need a timeout for freezing.
>>>
>>> I agree with Christoph here, I think that the timeout is unneeded.
>>
>> I think that your concern is that the freezer cannot recognize the occurrence
>> of a timeout and it continues the backup process and the backup data is
>> corrupted finally.
>> If the freezer can recognize it by the unfreeze ioctl's errono, will your concern
>> be solved?
>> If so, I will implement it.
>>
>> Cheers, Takashi
>>
> I think that is certainly part a big part of my concern.
>
> Also note that the timeout seems to be quite low relative to say the standard timeout for a SCSI 
> device (30 seconds worst case).
>
> In general, I am quite supportive of the patch series and think that this is a great addition.

Thank you for your comments.
Christoph, do you have any comments about this solution?

If it's OK, I will change the freeze patch so that the unfreeze ioctl sets
ETIMEDOUT to errno when the timeout occurs.

Cheers, Takashi

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 3/3] Add timeout feature
  2008-09-26  8:52     ` Takashi Sato
  2008-09-26 10:58       ` Ric Wheeler
@ 2008-09-26 12:35       ` Valdis.Kletnieks
  2008-09-29 14:13       ` Christoph Hellwig
  2 siblings, 0 replies; 71+ messages in thread
From: Valdis.Kletnieks @ 2008-09-26 12:35 UTC (permalink / raw)
  To: Takashi Sato
  Cc: Ric Wheeler, Christoph Hellwig, Andrew Morton, Oleg Nesterov,
	linux-fsdevel, dm-devel, viro, linux-ext4, xfs, axboe,
	mtk.manpages, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 562 bytes --]

On Fri, 26 Sep 2008 17:52:35 +0900, Takashi Sato said:

(Sorry, am reading the threads out of temporal sequence....)

> I think that your concern is that the freezer cannot recognize the occurrence
> of a timeout and it continues the backup process and the backup data is
> corrupted finally.
> If the freezer can recognize it by the unfreeze ioctl's errono, will your concern
>  be solved?
> If so, I will implement it.

That would also address my concerns about merging patches 8 and 10 of the
other patch series (because patch 10 wouldn't be needed then)...


[-- Attachment #2: Type: application/pgp-signature, Size: 226 bytes --]

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 3/3] Add timeout feature
  2008-09-26  8:52     ` Takashi Sato
@ 2008-09-26 10:58       ` Ric Wheeler
  2008-09-29 11:11         ` Takashi Sato
  2008-09-26 12:35       ` Valdis.Kletnieks
  2008-09-29 14:13       ` Christoph Hellwig
  2 siblings, 1 reply; 71+ messages in thread
From: Ric Wheeler @ 2008-09-26 10:58 UTC (permalink / raw)
  To: Takashi Sato
  Cc: Christoph Hellwig, Andrew Morton, Oleg Nesterov, linux-fsdevel,
	dm-devel, viro, linux-ext4, xfs, axboe, mtk.manpages,
	linux-kernel

Takashi Sato wrote:
> Hi,
>
> Ric Wheeler wrote:
>> Christoph Hellwig wrote:
>>> On Mon, Sep 08, 2008 at 08:53:37PM +0900, Takashi Sato wrote:
>>>
>>>> The timeout feature is added to "freeze ioctl" to solve a deadlock
>>>> when the freezer accesses a frozen filesystem. And new ioctl
>>>> to reset the timeout period is added to extend the timeout period.
>>>> For example, the freezer resets the timeout period to 10 seconds 
>>>> every 5
>>>> seconds.  In this approach, even if the freezer causes a deadlock by
>>>> accessing the frozen filesystem, it will be solved by the timeout
>>>> in 10 seconds and the freezer will be able to recognize that
>>>> at the next reset of timeout period.
>>>>
>>>
>>> And as with all previous posting I still fundamentally disagree about
>>> the need of this functionality.  We don't need a timeout for freezing.
>>
>> I agree with Christoph here, I think that the timeout is unneeded.
>
> I think that your concern is that the freezer cannot recognize the 
> occurrence
> of a timeout and it continues the backup process and the backup data is
> corrupted finally.
> If the freezer can recognize it by the unfreeze ioctl's errono, will 
> your concern
> be solved?
> If so, I will implement it.
>
> Cheers, Takashi
>
I think that is certainly part a big part of my concern.

Also note that the timeout seems to be quite low relative to say the 
standard timeout for a SCSI device (30 seconds worst case).

In general, I am quite supportive of the patch series and think that 
this is a great addition.

Thanks!

Ric



^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 3/3] Add timeout feature
  2008-09-25 21:06   ` Ric Wheeler
@ 2008-09-26  8:52     ` Takashi Sato
  2008-09-26 10:58       ` Ric Wheeler
                         ` (2 more replies)
  0 siblings, 3 replies; 71+ messages in thread
From: Takashi Sato @ 2008-09-26  8:52 UTC (permalink / raw)
  To: Ric Wheeler, Christoph Hellwig
  Cc: Andrew Morton, Oleg Nesterov, linux-fsdevel, dm-devel, viro,
	linux-ext4, xfs, axboe, mtk.manpages, linux-kernel

Hi,

Ric Wheeler wrote:
> Christoph Hellwig wrote:
>> On Mon, Sep 08, 2008 at 08:53:37PM +0900, Takashi Sato wrote:
>>
>>> The timeout feature is added to "freeze ioctl" to solve a deadlock
>>> when the freezer accesses a frozen filesystem. And new ioctl
>>> to reset the timeout period is added to extend the timeout period.
>>> For example, the freezer resets the timeout period to 10 seconds every 5
>>> seconds.  In this approach, even if the freezer causes a deadlock by
>>> accessing the frozen filesystem, it will be solved by the timeout
>>> in 10 seconds and the freezer will be able to recognize that
>>> at the next reset of timeout period.
>>>
>>
>> And as with all previous posting I still fundamentally disagree about
>> the need of this functionality.  We don't need a timeout for freezing.
>
> I agree with Christoph here, I think that the timeout is unneeded.

I think that your concern is that the freezer cannot recognize the occurrence
of a timeout and it continues the backup process and the backup data is
corrupted finally.
If the freezer can recognize it by the unfreeze ioctl's errono, will your concern
 be solved?
If so, I will implement it.

Cheers, Takashi


^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 3/3] Add timeout feature
  2008-09-08 17:11 ` Christoph Hellwig
@ 2008-09-25 21:06   ` Ric Wheeler
  2008-09-26  8:52     ` Takashi Sato
  0 siblings, 1 reply; 71+ messages in thread
From: Ric Wheeler @ 2008-09-25 21:06 UTC (permalink / raw)
  To: Christoph Hellwig, Takashi Sato
  Cc: Andrew Morton, Oleg Nesterov, linux-fsdevel, dm-devel, viro,
	linux-ext4, xfs, axboe, mtk.manpages, linux-kernel

Christoph Hellwig wrote:
> On Mon, Sep 08, 2008 at 08:53:37PM +0900, Takashi Sato wrote:
>   
>> The timeout feature is added to "freeze ioctl" to solve a deadlock
>> when the freezer accesses a frozen filesystem. And new ioctl
>> to reset the timeout period is added to extend the timeout period.
>> For example, the freezer resets the timeout period to 10 seconds every 5
>> seconds.  In this approach, even if the freezer causes a deadlock by
>> accessing the frozen filesystem, it will be solved by the timeout
>> in 10 seconds and the freezer will be able to recognize that
>> at the next reset of timeout period.
>>     
>
> And as with all previous posting I still fundamentally disagree about
> the need of this functionality.  We don't need a timeout for freezing.
>
>   

I agree with Christoph here, I think that the timeout is unneeded.

Regards,

Ric


^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 3/3] Add timeout feature
  2008-09-08 11:53 Takashi Sato
@ 2008-09-08 17:11 ` Christoph Hellwig
  2008-09-25 21:06   ` Ric Wheeler
  0 siblings, 1 reply; 71+ messages in thread
From: Christoph Hellwig @ 2008-09-08 17:11 UTC (permalink / raw)
  To: Takashi Sato
  Cc: Andrew Morton, Christoph Hellwig, Oleg Nesterov, linux-fsdevel,
	dm-devel, viro, linux-ext4, xfs, axboe, mtk.manpages,
	linux-kernel

On Mon, Sep 08, 2008 at 08:53:37PM +0900, Takashi Sato wrote:
> The timeout feature is added to "freeze ioctl" to solve a deadlock
> when the freezer accesses a frozen filesystem. And new ioctl
> to reset the timeout period is added to extend the timeout period.
> For example, the freezer resets the timeout period to 10 seconds every 5
> seconds.  In this approach, even if the freezer causes a deadlock by
> accessing the frozen filesystem, it will be solved by the timeout
> in 10 seconds and the freezer will be able to recognize that
> at the next reset of timeout period.

And as with all previous posting I still fundamentally disagree about
the need of this functionality.  We don't need a timeout for freezing.


^ permalink raw reply	[flat|nested] 71+ messages in thread

* [PATCH 3/3] Add timeout feature
@ 2008-09-08 11:53 Takashi Sato
  2008-09-08 17:11 ` Christoph Hellwig
  0 siblings, 1 reply; 71+ messages in thread
From: Takashi Sato @ 2008-09-08 11:53 UTC (permalink / raw)
  To: Andrew Morton, Christoph Hellwig, Oleg Nesterov
  Cc: linux-fsdevel, dm-devel, viro, linux-ext4, xfs, axboe,
	mtk.manpages, linux-kernel

The timeout feature is added to "freeze ioctl" to solve a deadlock
when the freezer accesses a frozen filesystem. And new ioctl
to reset the timeout period is added to extend the timeout period.
For example, the freezer resets the timeout period to 10 seconds every 5
seconds.  In this approach, even if the freezer causes a deadlock by
accessing the frozen filesystem, it will be solved by the timeout
in 10 seconds and the freezer will be able to recognize that
at the next reset of timeout period.
o Freeze the filesystem
  int ioctl(int fd, int FIFREEZE, long *timeout_sec)
    fd: The file descriptor of the mountpoint
    FIFREEZE: request code for the freeze
    timeout_sec: the timeout period in seconds
             If it's 0 or 1, the timeout isn't set.
             This special case of "1" is implemented to keep
             the compatibility with XFS applications.
    Return value: 0 if the operation succeeds. Otherwise, -1

o Reset the timeout period
  int ioctl(int fd, int FIFREEZE_RESET_TIMEOUT, long *timeout_sec)
    fd:file descriptor of mountpoint
    FIFREEZE_RESET_TIMEOUT: request code for reset of timeout period
    timeout_sec: new timeout period in seconds
    Return value: 0 if the operation succeeds. Otherwise, -1
    Error number: If the filesystem has already been unfrozen,
                  errno is set to EINVAL.

Signed-off-by: Takashi Sato <t-sato@yk.jp.nec.com>
Signed-off-by: Masayuki Hamaguchi <m-hamaguchi@ys.jp.nec.com>
---
 drivers/md/dm.c             |    2 -
 fs/block_dev.c              |    2 +
 fs/buffer.c                 |   44 ++++++++++++++++++++++++---
 fs/ioctl.c                  |   71 ++++++++++++++++++++++++++++++++++++++++++--
 fs/super.c                  |   37 ++++++++++++++++++++++
 fs/xfs/xfs_fsops.c          |    2 -
 include/linux/buffer_head.h |    4 +-
 include/linux/fs.h          |    8 ++++
 8 files changed, 159 insertions(+), 11 deletions(-)

diff -uprN -X linux-2.6.27-rc5.org/Documentation/dontdiff linux-2.6.27-rc5-xfs/drivers/md/dm.c linux-2.6.27-rc5-timeout/
drivers/md/dm.c
--- linux-2.6.27-rc5-xfs/drivers/md/dm.c	2008-09-05 19:59:59.000000000 +0900
+++ linux-2.6.27-rc5-timeout/drivers/md/dm.c	2008-09-05 17:47:47.000000000 +0900
@@ -1451,7 +1451,7 @@ static int lock_fs(struct mapped_device 
 
 	WARN_ON(md->frozen_sb);
 
-	md->frozen_sb = freeze_bdev(md->suspended_bdev);
+	md->frozen_sb = freeze_bdev(md->suspended_bdev, 0);
 	if (IS_ERR(md->frozen_sb)) {
 		r = PTR_ERR(md->frozen_sb);
 		md->frozen_sb = NULL;
diff -uprN -X linux-2.6.27-rc5.org/Documentation/dontdiff linux-2.6.27-rc5-xfs/fs/block_dev.c linux-2.6.27-rc5-timeout/f
s/block_dev.c
--- linux-2.6.27-rc5-xfs/fs/block_dev.c	2008-09-05 20:00:29.000000000 +0900
+++ linux-2.6.27-rc5-timeout/fs/block_dev.c	2008-09-05 17:47:47.000000000 +0900
@@ -287,6 +287,8 @@ static void init_once(void *foo)
 	inode_init_once(&ei->vfs_inode);
 	/* Initialize mutex for freeze. */
 	mutex_init(&bdev->bd_fsfreeze_mutex);
+	/* Setup freeze timeout function. */
+	INIT_DELAYED_WORK(&bdev->bd_fsfreeze_timeout, fsfreeze_timeout);
 }
 
 static inline void __bd_forget(struct inode *inode)
diff -uprN -X linux-2.6.27-rc5.org/Documentation/dontdiff linux-2.6.27-rc5-xfs/fs/buffer.c linux-2.6.27-rc5-timeout/fs/b
uffer.c
--- linux-2.6.27-rc5-xfs/fs/buffer.c	2008-09-05 20:23:13.000000000 +0900
+++ linux-2.6.27-rc5-timeout/fs/buffer.c	2008-09-05 20:43:40.000000000 +0900
@@ -190,26 +190,36 @@ int fsync_bdev(struct block_device *bdev
 
 /**
  * freeze_bdev  --  lock a filesystem and force it into a consistent state
- * @bdev:	blockdevice to lock
+ * @bdev:              blockdevice to lock
+ * @timeout_msec:      timeout period
  *
  * This takes the block device bd_mount_sem to make sure no new mounts
  * happen on bdev until thaw_bdev() is called.
  * If a superblock is found on this device, we take the s_umount semaphore
  * on it to make sure nobody unmounts until the snapshot creation is done.
+ * If timeout_msec is bigger than 0, this registers the delayed work for
+ * timeout of the freeze feature.
  * The reference counter (bd_fsfreeze_count) guarantees that only the last
  * unfreeze process can unfreeze the frozen filesystem actually when multiple
  * freeze requests arrive simultaneously. It counts up in freeze_bdev() and
  * count down in thaw_bdev(). When it becomes 0, thaw_bdev() will unfreeze
  * actually.
  */
-struct super_block *freeze_bdev(struct block_device *bdev)
+struct super_block *freeze_bdev(struct block_device *bdev,
+				unsigned int timeout_msec)
 {
 	struct super_block *sb;
 
 	mutex_lock(&bdev->bd_fsfreeze_mutex);
 	if (bdev->bd_fsfreeze_count > 0) {
-		bdev->bd_fsfreeze_count++;
-		sb = get_super(bdev);
+		if ((delayed_work_pending(&bdev->bd_fsfreeze_timeout))
+				|| (timeout_msec != 0))
+			sb = ERR_PTR(-EBUSY);
+		else {
+			bdev->bd_fsfreeze_count++;
+			sb = get_super(bdev);
+		}
+
 		mutex_unlock(&bdev->bd_fsfreeze_mutex);
 		return sb;
 	}
@@ -233,6 +243,10 @@ struct super_block *freeze_bdev(struct b
 	}
 
 	sync_blockdev(bdev);
+	/* Setup unfreeze timer. */
+	if (timeout_msec > 0)
+		add_fsfreeze_timeout(bdev, timeout_msec);
+
 	mutex_unlock(&bdev->bd_fsfreeze_mutex);
 
 	return sb;	/* thaw_bdev releases s->s_umount and bd_mount_sem */
@@ -248,6 +262,22 @@ EXPORT_SYMBOL(freeze_bdev);
  */
 int thaw_bdev(struct block_device *bdev, struct super_block *sb)
 {
+	return thaw_bdev_core(bdev, sb, 1);
+}
+EXPORT_SYMBOL(thaw_bdev);
+
+/**
+ * thaw_bdev_core  -- unlock filesystem and delete timeout task
+ * @bdev:		blockdevice to unlock
+ * @sb:			associated superblock
+ * @del_timeout_task:	If it is 0 then don't delete timeout task else delete
+ *
+ * Unlocks the filesystem and marks it writeable again after freeze_bdev().
+ * And If del_timeout_task is 0 then don't delete timeout task else delete.
+ */
+int thaw_bdev_core(struct block_device *bdev,
+				struct super_block *sb, int del_timeout_task)
+{
 	mutex_lock(&bdev->bd_fsfreeze_mutex);
 	if (!bdev->bd_fsfreeze_count) {
 		mutex_unlock(&bdev->bd_fsfreeze_mutex);
@@ -262,6 +292,10 @@ int thaw_bdev(struct block_device *bdev,
 		return 0;
 	}
 
+	/* Delete unfreeze timer. */
+	if (del_timeout_task)
+		cancel_delayed_work_sync(&bdev->bd_fsfreeze_timeout);
+
 	if (sb) {
 		BUG_ON(sb->s_bdev != bdev);
 
@@ -277,7 +311,7 @@ int thaw_bdev(struct block_device *bdev,
 	mutex_unlock(&bdev->bd_fsfreeze_mutex);
 	return 0;
 }
-EXPORT_SYMBOL(thaw_bdev);
+EXPORT_SYMBOL(thaw_bdev_core);
 
 /*
  * Various filesystems appear to want __find_get_block to be non-blocking.
diff -uprN -X linux-2.6.27-rc5.org/Documentation/dontdiff linux-2.6.27-rc5-xfs/fs/ioctl.c linux-2.6.27-rc5-timeout/fs/io
ctl.c
--- linux-2.6.27-rc5-xfs/fs/ioctl.c	2008-09-05 20:22:46.000000000 +0900
+++ linux-2.6.27-rc5-timeout/fs/ioctl.c	2008-09-05 17:47:47.000000000 +0900
@@ -141,9 +141,12 @@ static int ioctl_fioasync(unsigned int f
 	return error;
 }
 
-static int ioctl_freeze(struct file *filp)
+static int ioctl_freeze(struct file *filp, int __user *argp)
 {
+	int timeout_sec;
+	unsigned int timeout_msec;
 	struct super_block *sb = filp->f_path.dentry->d_inode->i_sb;
+	int error;
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
@@ -156,8 +159,25 @@ static int ioctl_freeze(struct file *fil
 	if (sb->s_bdev == NULL)
 		return -EINVAL;
 
+	/* arg(sec) to tick value. */
+	error = get_user(timeout_sec, argp);
+	if (error != 0)
+		return error;
+
+	if (timeout_sec < 0 || timeout_sec > UINT_MAX/1000)
+		return -EINVAL;
+
+	/*
+	 * If 1 is specified as the timeout period it is changed into 0
+	 * to retain compatibility with XFS's xfs_freeze.
+	 */
+	if (timeout_sec == 1)
+		timeout_sec = 0;
+
+	timeout_msec = timeout_sec * 1000;
+
 	/* Freeze */
-	sb = freeze_bdev(sb->s_bdev);
+	sb = freeze_bdev(sb->s_bdev, timeout_msec);
 	if (IS_ERR(sb))
 		return PTR_ERR(sb);
 	return 0;
@@ -178,6 +198,47 @@ static int ioctl_thaw(struct file *filp)
 	return thaw_bdev(sb->s_bdev, sb);
 }
 
+static int
+ioctl_freeze_reset_timeout(struct file *filp, int __user *argp)
+{
+	int timeout_sec;
+	unsigned int timeout_msec;
+	struct super_block *sb = filp->f_path.dentry->d_inode->i_sb;
+	struct block_device *bdev = sb->s_bdev;
+	int error;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	/* If a blockdevice-backed filesystem isn't specified, return EINVAL. */
+	if (bdev == NULL)
+		return -EINVAL;
+
+	/* arg(sec) to tick value */
+	error = get_user(timeout_sec, argp);
+	if (error)
+		return error;
+
+	if (timeout_sec <= 1 || timeout_sec > UINT_MAX/1000)
+		return -EINVAL;
+
+	timeout_msec = timeout_sec * 1000;
+
+	mutex_lock(&bdev->bd_fsfreeze_mutex);
+	if (!bdev->bd_fsfreeze_count) {
+		mutex_unlock(&bdev->bd_fsfreeze_mutex);
+		return -EINVAL;
+	} else if (!delayed_work_pending(&bdev->bd_fsfreeze_timeout)) {
+		mutex_unlock(&bdev->bd_fsfreeze_mutex);
+		return -EBUSY;
+	}
+	/* setup unfreeze timer */
+	add_fsfreeze_timeout(bdev, timeout_msec);
+	mutex_unlock(&bdev->bd_fsfreeze_mutex);
+
+	return 0;
+}
+
 /*
  * When you add any new common ioctls to the switches above and below
  * please update compat_sys_ioctl() too.
@@ -221,13 +282,17 @@ int do_vfs_ioctl(struct file *filp, unsi
 		break;
 
 	case FIFREEZE:
-		error = ioctl_freeze(filp);
+		error = ioctl_freeze(filp, argp);
 		break;
 
 	case FITHAW:
 		error = ioctl_thaw(filp);
 		break;
 
+	case FIFREEZE_RESET_TIMEOUT:
+		error = ioctl_freeze_reset_timeout(filp, argp);
+		break;
+
 	default:
 		if (S_ISREG(filp->f_path.dentry->d_inode->i_mode))
 			error = file_ioctl(filp, cmd, arg);
diff -uprN -X linux-2.6.27-rc5.org/Documentation/dontdiff linux-2.6.27-rc5-xfs/fs/super.c linux-2.6.27-rc5-timeout/fs/su
per.c
--- linux-2.6.27-rc5-xfs/fs/super.c	2008-09-05 20:13:58.000000000 +0900
+++ linux-2.6.27-rc5-timeout/fs/super.c	2008-09-05 17:47:47.000000000 +0900
@@ -981,3 +981,40 @@ struct vfsmount *kern_mount_data(struct 
 }
 
 EXPORT_SYMBOL_GPL(kern_mount_data);
+
+/*
+ * fsfreeze_timeout - Thaw the filesystem.
+ *
+ * @work:	work queue (delayed_work.work)
+ *
+ * Called by the delayed work when elapsing the timeout period.
+ * Thaw the filesystem.
+ */
+void fsfreeze_timeout(struct work_struct *work)
+{
+	struct block_device *bd = container_of(work,
+			struct block_device, bd_fsfreeze_timeout.work);
+	struct super_block *sb = get_super(bd);
+
+	thaw_bdev_core(bd, sb, 0);
+
+	if (sb)
+		drop_super(sb);
+}
+
+/*
+ * add_fsfreeze_timeout - Add timeout for freeze.
+ *
+ * @bdev:		block device struct
+ * @timeout_msec:	timeout period
+ *
+ * Add the delayed work for freeze timeout to the delayed work queue.
+ */
+void add_fsfreeze_timeout(struct block_device *bdev, unsigned int timeout_msec)
+{
+	s64 timeout_jiffies = msecs_to_jiffies(timeout_msec);
+
+	/* Set delayed work queue */
+	cancel_delayed_work_sync(&bdev->bd_fsfreeze_timeout);
+	schedule_delayed_work(&bdev->bd_fsfreeze_timeout, timeout_jiffies);
+}
diff -uprN -X linux-2.6.27-rc5.org/Documentation/dontdiff linux-2.6.27-rc5-xfs/fs/xfs/xfs_fsops.c linux-2.6.27-rc5-timeo
ut/fs/xfs/xfs_fsops.c
--- linux-2.6.27-rc5-xfs/fs/xfs/xfs_fsops.c	2008-09-05 20:14:45.000000000 +0900
+++ linux-2.6.27-rc5-timeout/fs/xfs/xfs_fsops.c	2008-09-05 17:47:47.000000000 +0900
@@ -621,7 +621,7 @@ xfs_fs_goingdown(
 {
 	switch (inflags) {
 	case XFS_FSOP_GOING_FLAGS_DEFAULT: {
-		struct super_block *sb = freeze_bdev(mp->m_super->s_bdev);
+		struct super_block *sb = freeze_bdev(mp->m_super->s_bdev, 0);
 
 		if (sb && !IS_ERR(sb)) {
 			xfs_force_shutdown(mp, SHUTDOWN_FORCE_UMOUNT);
diff -uprN -X linux-2.6.27-rc5.org/Documentation/dontdiff linux-2.6.27-rc5-xfs/include/linux/buffer_head.h linux-2.6.27-
rc5-timeout/include/linux/buffer_head.h
--- linux-2.6.27-rc5-xfs/include/linux/buffer_head.h	2008-09-05 20:16:12.000000000 +0900
+++ linux-2.6.27-rc5-timeout/include/linux/buffer_head.h	2008-09-05 17:47:47.000000000 +0900
@@ -169,8 +169,10 @@ int sync_blockdev(struct block_device *b
 void __wait_on_buffer(struct buffer_head *);
 wait_queue_head_t *bh_waitq_head(struct buffer_head *bh);
 int fsync_bdev(struct block_device *);
-struct super_block *freeze_bdev(struct block_device *);
+struct super_block *freeze_bdev(struct block_device *,
+				unsigned int timeout_msec);
 int thaw_bdev(struct block_device *, struct super_block *);
+int thaw_bdev_core(struct block_device *, struct super_block *, int);
 int fsync_super(struct super_block *);
 int fsync_no_super(struct block_device *);
 struct buffer_head *__find_get_block(struct block_device *bdev, sector_t block,
diff -uprN -X linux-2.6.27-rc5.org/Documentation/dontdiff linux-2.6.27-rc5-xfs/include/linux/fs.h linux-2.6.27-rc5-timeo
ut/include/linux/fs.h
--- linux-2.6.27-rc5-xfs/include/linux/fs.h	2008-09-05 20:18:21.000000000 +0900
+++ linux-2.6.27-rc5-timeout/include/linux/fs.h	2008-09-05 17:47:47.000000000 +0900
@@ -8,6 +8,7 @@
 
 #include <linux/limits.h>
 #include <linux/ioctl.h>
+#include <linux/workqueue.h>
 
 /*
  * It's silly to have NR_OPEN bigger than NR_FILE, but you can change
@@ -228,6 +229,7 @@ extern int dir_notify_enable;
 #define FIGETBSZ   _IO(0x00,2)	/* get the block size used for bmap */
 #define FIFREEZE	_IOWR('X', 119, int)	/* Freeze */
 #define FITHAW		_IOWR('X', 120, int)	/* Thaw */
+#define	FIFREEZE_RESET_TIMEOUT	_IO(0x00, 3)	/* Reset freeze timeout */
 
 #define	FS_IOC_GETFLAGS			_IOR('f', 1, long)
 #define	FS_IOC_SETFLAGS			_IOW('f', 2, long)
@@ -581,6 +583,8 @@ struct block_device {
 	int			bd_fsfreeze_count;
 	/* Mutex for freeze */
 	struct mutex		bd_fsfreeze_mutex;
+	/* Delayed work for freeze */
+	struct delayed_work	bd_fsfreeze_timeout;
 };
 
 /*
@@ -2160,5 +2164,9 @@ int proc_nr_files(struct ctl_table *tabl
 
 int get_filesystem_list(char * buf);
 
+extern void add_fsfreeze_timeout(struct block_device *bdev,
+				unsigned int timeout_msec);
+extern void fsfreeze_timeout(struct work_struct *work);
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_FS_H */

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 3/3] Add timeout feature
  2008-08-21 20:20 ` Andrew Morton
  2008-08-22 18:16   ` Christoph Hellwig
  2008-08-24 17:03   ` Oleg Nesterov
@ 2008-08-29  9:39   ` Takashi Sato
  2 siblings, 0 replies; 71+ messages in thread
From: Takashi Sato @ 2008-08-29  9:39 UTC (permalink / raw)
  To: Andrew Morton, Oleg Nesterov
  Cc: linux-fsdevel, dm-devel, viro, linux-ext4, xfs,
	Christoph Hellwig, axboe, mtk.manpages, linux-kernel,
	Oleg Nesterov

Hi, Andrew and Oleg.

Andrew Morton wrote:
>On Mon, 18 Aug 2008 21:28:56 +0900
>Takashi Sato <t-sato@yk.jp.nec.com> wrote:
>
>> The timeout feature is added to freeze ioctl.  And new ioctl
>> to reset the timeout period is added.
>> o Freeze the filesystem
>>   int ioctl(int fd, int FIFREEZE, long *timeout_sec)
>>     fd: The file descriptor of the mountpoint
>>     FIFREEZE: request code for the freeze
>>     timeout_sec: the timeout period in seconds
>>              If it's 0 or 1, the timeout isn't set.
>>              This special case of "1" is implemented to keep
>>              the compatibility with XFS applications.
>>     Return value: 0 if the operation succeeds. Otherwise, -1
>> 
>> o Reset the timeout period
>>   int ioctl(int fd, int FIFREEZE_RESET_TIMEOUT, long *timeout_sec)
>>     fd:file descriptor of mountpoint
>>     FIFREEZE_RESET_TIMEOUT: request code for reset of timeout period
>>     timeout_sec: new timeout period in seconds
>>     Return value: 0 if the operation succeeds. Otherwise, -1
>>     Error number: If the filesystem has already been unfrozen,
>>                   errno is set to EINVAL.
>
>I don't think the changelogs actually explained why this feature is
>being added?

I will fix the changelogs as following.

-----------------------------------------------------------
The timeout feature is added to "freeze ioctl" to solve a deadlock
when the freezer accesses a frozen filesystem. And new ioctl
to reset the timeout period is added to extend the timeout period.
For example, the freezer resets the timeout period to 10 seconds every 5
seconds.  In this approach, even if the freezer causes a deadlock by
accessing the frozen filesystem, it will be solved by the timeout
in 10 seconds and the freezer will be able to recognize that
at the next reset of timeout period.
-----------------------------------------------------------

>
>Which userspace tools are expected to send these ioctls?  Something in
>util-linux?  dm-utils?  Are patches to those packages planned?

I think the management software for the storage device
will use these ioctls to take a consistent backup.

>>
>> ...
>>
>>  /*
>> + * ioctl_freeze_reset_timeout - Reset timeout for freeze.
>> + *
>> + * @filp:       target file
>> + * @argp:       timeout value(sec)
>> + *
>> + * Reset timeout for freeze.
>> + */
>> +static int
>> +ioctl_freeze_reset_timeout(struct file *filp, int __user *argp)
>> +{
>> +    int timeout_sec;
>> +    unsigned int timeout_msec;
>> +    struct super_block *sb = filp->f_path.dentry->d_inode->i_sb;
>> +    struct block_device *bdev = sb->s_bdev;
>> +    int error;
>> +
>> +    if (!capable(CAP_SYS_ADMIN))
>> +            return -EPERM;
>> +
>> +    /* If a regular file or a directory isn't specified, return EINVAL. */
>> +    if (bdev == NULL)
>> +            return -EINVAL;
>> +
>> +    /* arg(sec) to tick value */
>> +    error = get_user(timeout_sec, argp);
>> +    if (error)
>> +            return error;
>> +
>> +    if (timeout_sec <= 0 || timeout_sec > UINT_MAX/1000)
>> +            return -EINVAL;
>> +
>> +    timeout_msec = timeout_sec * 1000;
>> +
>> +    down(&bdev->bd_freeze_sem);
>> +    if (!bdev->bd_freeze_count) {
>> +            up(&bdev->bd_freeze_sem);
>> +            return -EINVAL;
>> +    }
>> +    /* setup unfreeze timer */
>> +    add_freeze_timeout(bdev, timeout_msec);
>> +    up(&bdev->bd_freeze_sem);
>> +
>> +    return 0;
>> +}
>
>This duplicates quite a bit of code from ioctl_freeze().  Can this be
>cleaned up?

I will clean it up.

>> +/*
>>   * When you add any new common ioctls to the switches above and below
>>   * please update compat_sys_ioctl() too.
>>   *
>> @@ -235,13 +302,17 @@ int do_vfs_ioctl(struct file *filp, unsi
>>              break;
>>  
>>      case FIFREEZE:
>> -            error = ioctl_freeze(filp);
>> +            error = ioctl_freeze(filp, argp);
>>              break;
>>  
>>      case FITHAW:
>>              error = ioctl_thaw(filp);
>>              break;
>>  
>> +    case FIFREEZE_RESET_TIMEOUT:
>> +            error = ioctl_freeze_reset_timeout(filp, argp);
>> +            break;
>> +
>>      default:
>>              if (S_ISREG(filp->f_path.dentry->d_inode->i_mode))
>>                      error = file_ioctl(filp, cmd, arg);
>>
>> ...
>>
>>  EXPORT_SYMBOL_GPL(kern_mount_data);
>> +
>> +/*
>> + * freeze_timeout - Thaw the filesystem.
>> + *
>> + * @work:   work queue (delayed_work.work)
>> + *
>> + * Called by the delayed work when elapsing the timeout period.
>> + * Thaw the filesystem.
>> + */
>> +void freeze_timeout(struct work_struct *work)
>> +{
>> +    struct block_device *bd = container_of(work,
>> +                    struct block_device, bd_freeze_timeout.work);
>> +    struct super_block *sb = get_super(bd);
>> +
>> +    thaw_bdev(bd, sb);
>> +
>> +    if (sb)
>> +            drop_super(sb);
>> +}
>> +EXPORT_SYMBOL_GPL(freeze_timeout);
>
>I can't see why this was exported.

It isn't needed, I will delete it.

>> +/*
>> + * add_freeze_timeout - Add timeout for freeze.
>> + *
>> + * @bdev:           block device struct
>> + * @timeout_msec:   timeout period
>> + *
>> + * Add the delayed work for freeze timeout to the delayed work queue.
>> + */
>> +void add_freeze_timeout(struct block_device *bdev, unsigned int timeout_msec)
>> +{
>> +    s64 timeout_jiffies = msecs_to_jiffies(timeout_msec);
>> +
>> +    /* Set delayed work queue */
>> +    cancel_delayed_work_sync(&bdev->bd_freeze_timeout);
>> +    schedule_delayed_work(&bdev->bd_freeze_timeout, timeout_jiffies);
>> +}
>
>I don't particularly like the names of these new global symbols.  The
>kernel already has a "freezer" thing, part of power-management. 
>Introducing another one is a bit confusing.
>
>otoh, freezer seems to have consistently used "freezer", so the 'r'
>arguable saves us.
>
>Still, I'd have thought that "fsfreeze" would have been a clearer, more
>specific identifier for the whole project.

I will rename the names of these global symbols.
For example, "add_fsfreeze_timeout"...

>> +/*
>> + * del_freeze_timeout - Delete timeout for freeze.
>> + *
>> + * @bdev:   block device struct
>> + *
>> + * Delete the delayed work for freeze timeout from the delayed work queue.
>> + */
>> +void del_freeze_timeout(struct block_device *bdev)
>> +{
>> +    /*
>> +     * It's possible that the delayed work task (freeze_timeout()) calls
>> +     * del_freeze_timeout().  If the delayed work task calls
>> +     * cancel_delayed_work_sync((), the deadlock will occur.
>> +     * So we need this check (delayed_work_pending()).
>> +     */
>> +    if (delayed_work_pending(&bdev->bd_freeze_timeout))
>> +            cancel_delayed_work_sync(&bdev->bd_freeze_timeout);
>> +}
>
>So if the calling task is keventd via run_workqueue() then
>delayed_work_pending() should return false due to run_workqueue()
>ordering, so we avoid the deadlock.
>
>Seems a bit racy if some other process starts the delayed-work while
>this function is running but I guess the new semaphore prevents that.
>
>Perhaps cancel_delayed_work_sync() shouldn't hang up if called from the
>work handler?

I think so.

In my current implementation,
the delayed work task calls thaw_bdev() to unfreeze a filesystem
and it calls del_freeze_timeout().  So, the deadlock occurs.
But I've found that the delayed work task doesn't need to call
del_freeze_timeout() because it is removed
from the delayed work queue automatically after the work finishes.
So I will fix thaw_bdev() so that it doesn't call
del_freeze_timeout() only when it's called by the delayed work task.
And I will delete delayed_work_pending() in del_freeze_timeout().

Cheers, Takashi

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 3/3] Add timeout feature
  2008-08-21 20:20 ` Andrew Morton
  2008-08-22 18:16   ` Christoph Hellwig
@ 2008-08-24 17:03   ` Oleg Nesterov
  2008-08-29  9:39   ` Takashi Sato
  2 siblings, 0 replies; 71+ messages in thread
From: Oleg Nesterov @ 2008-08-24 17:03 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Takashi Sato, linux-fsdevel, dm-devel, viro, linux-ext4, xfs,
	hch, axboe, mtk.manpages, linux-kernel

On 08/21, Andrew Morton wrote:
>
> On Mon, 18 Aug 2008 21:28:56 +0900
> Takashi Sato <t-sato@yk.jp.nec.com> wrote:
>
> > +void del_freeze_timeout(struct block_device *bdev)
> > +{
> > +	/*
> > +	 * It's possible that the delayed work task (freeze_timeout()) calls
> > +	 * del_freeze_timeout().  If the delayed work task calls
> > +	 * cancel_delayed_work_sync((), the deadlock will occur.
> > +	 * So we need this check (delayed_work_pending()).
> > +	 */
> > +	if (delayed_work_pending(&bdev->bd_freeze_timeout))
> > +		cancel_delayed_work_sync(&bdev->bd_freeze_timeout);
> > +}

I don't understand this patch, but the code above looks strange to me...

Let's suppose del_freeze_timeout() is called by ioctl_thaw()->thaw_bdev().
Now,

	IF delayed_work_pending() == T

		we can deadlock if the timer expires before
		cancel_delayed_work_sync() cancels it?
		in that case we are going to wait for this work,
		but freeze_timeout()->thaw_bdev() will block
		on ->bd_freeze_sem, no?

	ELSE

		we don't really flush the work, it is possible
		the timer has already expired and the work
		is pending. It will run later.

Perhaps this all is correct, but in that case, why can't we just do

	void del_freeze_timeout(struct block_device *bdev)
	{
		cancel_delayed_work(&bdev->bd_freeze_timeout);
	}

?

> Perhaps cancel_delayed_work_sync() shouldn't hang up if called from the
> work handler?

This is trivial,

	--- kernel/workqueue.c
	+++ kernel/workqueue.c
	@@ -516,6 +516,9 @@ static void wait_on_cpu_work(struct cpu_
		struct wq_barrier barr;
		int running = 0;
	 
	+	if (cwq->thread == current)
	+		return;
	+
		spin_lock_irq(&cwq->lock);
		if (unlikely(cwq->current_work == work)) {
			insert_wq_barrier(cwq, &barr, cwq->worklist.next);

but do we really need this?

We have a similar hack in flush_cpu_workqueue(), and we are going
to kill it once we fix the callers.

I dunno.

Oleg.


^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 3/3] Add timeout feature
  2008-08-21 20:20 ` Andrew Morton
@ 2008-08-22 18:16   ` Christoph Hellwig
  2008-08-24 17:03   ` Oleg Nesterov
  2008-08-29  9:39   ` Takashi Sato
  2 siblings, 0 replies; 71+ messages in thread
From: Christoph Hellwig @ 2008-08-22 18:16 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Takashi Sato, linux-fsdevel, dm-devel, viro, linux-ext4, xfs,
	hch, axboe, mtk.manpages, linux-kernel, Oleg Nesterov

On Thu, Aug 21, 2008 at 01:20:06PM -0700, Andrew Morton wrote:
> I don't think the changelogs actually explained why this feature is
> being added?
> 
> Which userspace tools are expected to send these ioctls?  Something in
> util-linux?  dm-utils?  Are patches to those packages planned?

Currently the only surspace using freeze and thaw is xfs_freeze from
xfsprogs, which would work for various other filesystems that implement
->write_super_lockfs now instead of just XFS with patch 1.

The freeze stuff in this third patch isn't and won't be used by
xfs_freeze and doesn't make all that much sense (and we already had a
lot of previous discussion on this..)


^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 3/3] Add timeout feature
  2008-08-18 12:28 Takashi Sato
@ 2008-08-21 20:20 ` Andrew Morton
  2008-08-22 18:16   ` Christoph Hellwig
                     ` (2 more replies)
  0 siblings, 3 replies; 71+ messages in thread
From: Andrew Morton @ 2008-08-21 20:20 UTC (permalink / raw)
  To: Takashi Sato
  Cc: linux-fsdevel, dm-devel, viro, linux-ext4, xfs, hch, axboe,
	mtk.manpages, linux-kernel, Oleg Nesterov

On Mon, 18 Aug 2008 21:28:56 +0900
Takashi Sato <t-sato@yk.jp.nec.com> wrote:

> The timeout feature is added to freeze ioctl.  And new ioctl
> to reset the timeout period is added.
> o Freeze the filesystem
>   int ioctl(int fd, int FIFREEZE, long *timeout_sec)
>     fd: The file descriptor of the mountpoint
>     FIFREEZE: request code for the freeze
>     timeout_sec: the timeout period in seconds
>              If it's 0 or 1, the timeout isn't set.
>              This special case of "1" is implemented to keep
>              the compatibility with XFS applications.
>     Return value: 0 if the operation succeeds. Otherwise, -1
> 
> o Reset the timeout period
>   int ioctl(int fd, int FIFREEZE_RESET_TIMEOUT, long *timeout_sec)
>     fd:file descriptor of mountpoint
>     FIFREEZE_RESET_TIMEOUT: request code for reset of timeout period
>     timeout_sec: new timeout period in seconds
>     Return value: 0 if the operation succeeds. Otherwise, -1
>     Error number: If the filesystem has already been unfrozen,
>                   errno is set to EINVAL.

I don't think the changelogs actually explained why this feature is
being added?

Which userspace tools are expected to send these ioctls?  Something in
util-linux?  dm-utils?  Are patches to those packages planned?

>
> ...
>
>  /*
> + * ioctl_freeze_reset_timeout - Reset timeout for freeze.
> + *
> + * @filp:       target file
> + * @argp:       timeout value(sec)
> + *
> + * Reset timeout for freeze.
> + */
> +static int
> +ioctl_freeze_reset_timeout(struct file *filp, int __user *argp)
> +{
> +	int timeout_sec;
> +	unsigned int timeout_msec;
> +	struct super_block *sb = filp->f_path.dentry->d_inode->i_sb;
> +	struct block_device *bdev = sb->s_bdev;
> +	int error;
> +
> +	if (!capable(CAP_SYS_ADMIN))
> +		return -EPERM;
> +
> +	/* If a regular file or a directory isn't specified, return EINVAL. */
> +	if (bdev == NULL)
> +		return -EINVAL;
> +
> +	/* arg(sec) to tick value */
> +	error = get_user(timeout_sec, argp);
> +	if (error)
> +		return error;
> +
> +	if (timeout_sec <= 0 || timeout_sec > UINT_MAX/1000)
> +		return -EINVAL;
> +
> +	timeout_msec = timeout_sec * 1000;
> +
> +	down(&bdev->bd_freeze_sem);
> +	if (!bdev->bd_freeze_count) {
> +		up(&bdev->bd_freeze_sem);
> +		return -EINVAL;
> +	}
> +	/* setup unfreeze timer */
> +	add_freeze_timeout(bdev, timeout_msec);
> +	up(&bdev->bd_freeze_sem);
> +
> +	return 0;
> +}

This duplicates quite a bit of code from ioctl_freeze().  Can this be
cleaned up?


> +/*
>   * When you add any new common ioctls to the switches above and below
>   * please update compat_sys_ioctl() too.
>   *
> @@ -235,13 +302,17 @@ int do_vfs_ioctl(struct file *filp, unsi
>  		break;
>  
>  	case FIFREEZE:
> -		error = ioctl_freeze(filp);
> +		error = ioctl_freeze(filp, argp);
>  		break;
>  
>  	case FITHAW:
>  		error = ioctl_thaw(filp);
>  		break;
>  
> +	case FIFREEZE_RESET_TIMEOUT:
> +		error = ioctl_freeze_reset_timeout(filp, argp);
> +		break;
> +
>  	default:
>  		if (S_ISREG(filp->f_path.dentry->d_inode->i_mode))
>  			error = file_ioctl(filp, cmd, arg);
>
> ...
>
>  EXPORT_SYMBOL_GPL(kern_mount_data);
> +
> +/*
> + * freeze_timeout - Thaw the filesystem.
> + *
> + * @work:	work queue (delayed_work.work)
> + *
> + * Called by the delayed work when elapsing the timeout period.
> + * Thaw the filesystem.
> + */
> +void freeze_timeout(struct work_struct *work)
> +{
> +	struct block_device *bd = container_of(work,
> +			struct block_device, bd_freeze_timeout.work);
> +	struct super_block *sb = get_super(bd);
> +
> +	thaw_bdev(bd, sb);
> +
> +	if (sb)
> +		drop_super(sb);
> +}
> +EXPORT_SYMBOL_GPL(freeze_timeout);

I can't see why this was exported.

> +/*
> + * add_freeze_timeout - Add timeout for freeze.
> + *
> + * @bdev:		block device struct
> + * @timeout_msec:	timeout period
> + *
> + * Add the delayed work for freeze timeout to the delayed work queue.
> + */
> +void add_freeze_timeout(struct block_device *bdev, unsigned int timeout_msec)
> +{
> +	s64 timeout_jiffies = msecs_to_jiffies(timeout_msec);
> +
> +	/* Set delayed work queue */
> +	cancel_delayed_work_sync(&bdev->bd_freeze_timeout);
> +	schedule_delayed_work(&bdev->bd_freeze_timeout, timeout_jiffies);
> +}

I don't particularly like the names of these new global symbols.  The
kernel already has a "freezer" thing, part of power-management. 
Introducing another one is a bit confusing.

otoh, freezer seems to have consistently used "freezer", so the 'r'
arguable saves us.

Still, I'd have thought that "fsfreeze" would have been a clearer, more
specific identifier for the whole project.

> +/*
> + * del_freeze_timeout - Delete timeout for freeze.
> + *
> + * @bdev:	block device struct
> + *
> + * Delete the delayed work for freeze timeout from the delayed work queue.
> + */
> +void del_freeze_timeout(struct block_device *bdev)
> +{
> +	/*
> +	 * It's possible that the delayed work task (freeze_timeout()) calls
> +	 * del_freeze_timeout().  If the delayed work task calls
> +	 * cancel_delayed_work_sync((), the deadlock will occur.
> +	 * So we need this check (delayed_work_pending()).
> +	 */
> +	if (delayed_work_pending(&bdev->bd_freeze_timeout))
> +		cancel_delayed_work_sync(&bdev->bd_freeze_timeout);
> +}

So if the calling task is keventd via run_workqueue() then
delayed_work_pending() should return false due to run_workqueue()
ordering, so we avoid the deadlock.

Seems a bit racy if some other process starts the delayed-work while
this function is running but I guess the new semaphore prevents that.

Perhaps cancel_delayed_work_sync() shouldn't hang up if called from the
work handler?



^ permalink raw reply	[flat|nested] 71+ messages in thread

* [PATCH 3/3] Add timeout feature
@ 2008-08-18 12:28 Takashi Sato
  2008-08-21 20:20 ` Andrew Morton
  0 siblings, 1 reply; 71+ messages in thread
From: Takashi Sato @ 2008-08-18 12:28 UTC (permalink / raw)
  To: Andrew Morton
  Cc: linux-fsdevel, dm-devel, viro, linux-ext4, xfs,
	Christoph Hellwig, axboe, mtk.manpages, linux-kernel

The timeout feature is added to freeze ioctl.  And new ioctl
to reset the timeout period is added.
o Freeze the filesystem
  int ioctl(int fd, int FIFREEZE, long *timeout_sec)
    fd: The file descriptor of the mountpoint
    FIFREEZE: request code for the freeze
    timeout_sec: the timeout period in seconds
             If it's 0 or 1, the timeout isn't set.
             This special case of "1" is implemented to keep
             the compatibility with XFS applications.
    Return value: 0 if the operation succeeds. Otherwise, -1

o Reset the timeout period
  int ioctl(int fd, int FIFREEZE_RESET_TIMEOUT, long *timeout_sec)
    fd:file descriptor of mountpoint
    FIFREEZE_RESET_TIMEOUT: request code for reset of timeout period
    timeout_sec: new timeout period in seconds
    Return value: 0 if the operation succeeds. Otherwise, -1
    Error number: If the filesystem has already been unfrozen,
                  errno is set to EINVAL.

Signed-off-by: Takashi Sato <t-sato@yk.jp.nec.com>
Signed-off-by: Masayuki Hamaguchi <m-hamaguchi@ys.jp.nec.com>
---
 drivers/md/dm.c             |    2 -
 fs/block_dev.c              |    2 +
 fs/buffer.c                 |   16 +++++++--
 fs/ioctl.c                  |   77 ++++++++++++++++++++++++++++++++++++++++++--
 fs/super.c                  |   57 ++++++++++++++++++++++++++++++++
 fs/xfs/xfs_fsops.c          |    2 -
 include/linux/buffer_head.h |    3 +
 include/linux/fs.h          |   10 +++++
 8 files changed, 160 insertions(+), 9 deletions(-)

diff -uprN -X linux-2.6.27-rc2.org/Documentation/dontdiff linux-2.6.27-rc2-xfs/drivers/md/dm.c linux-2.6.27-rc2-timeout/
drivers/md/dm.c
--- linux-2.6.27-rc2-xfs/drivers/md/dm.c	2008-08-07 09:00:27.000000000 +0900
+++ linux-2.6.27-rc2-timeout/drivers/md/dm.c	2008-08-07 09:14:30.000000000 +0900
@@ -1451,7 +1451,7 @@ static int lock_fs(struct mapped_device 
 
 	WARN_ON(md->frozen_sb);
 
-	md->frozen_sb = freeze_bdev(md->suspended_bdev);
+	md->frozen_sb = freeze_bdev(md->suspended_bdev, 0);
 	if (IS_ERR(md->frozen_sb)) {
 		r = PTR_ERR(md->frozen_sb);
 		md->frozen_sb = NULL;
diff -uprN -X linux-2.6.27-rc2.org/Documentation/dontdiff linux-2.6.27-rc2-xfs/fs/block_dev.c linux-2.6.27-rc2-timeout/f
s/block_dev.c
--- linux-2.6.27-rc2-xfs/fs/block_dev.c	2008-08-07 09:00:29.000000000 +0900
+++ linux-2.6.27-rc2-timeout/fs/block_dev.c	2008-08-07 09:14:30.000000000 +0900
@@ -287,6 +287,8 @@ static void init_once(void *foo)
 	inode_init_once(&ei->vfs_inode);
 	/* Initialize semaphore for freeze. */
 	sema_init(&bdev->bd_freeze_sem, 1);
+	/* Setup freeze timeout function. */
+	INIT_DELAYED_WORK(&bdev->bd_freeze_timeout, freeze_timeout);
 }
 
 static inline void __bd_forget(struct inode *inode)
diff -uprN -X linux-2.6.27-rc2.org/Documentation/dontdiff linux-2.6.27-rc2-xfs/fs/buffer.c linux-2.6.27-rc2-timeout/fs/b
uffer.c
--- linux-2.6.27-rc2-xfs/fs/buffer.c	2008-08-07 09:00:29.000000000 +0900
+++ linux-2.6.27-rc2-timeout/fs/buffer.c	2008-08-07 09:14:30.000000000 +0900
@@ -190,14 +190,18 @@ int fsync_bdev(struct block_device *bdev
 
 /**
  * freeze_bdev  --  lock a filesystem and force it into a consistent state
- * @bdev:	blockdevice to lock
+ * @bdev:              blockdevice to lock
+ * @timeout_msec:      timeout period
  *
  * This takes the block device bd_mount_sem to make sure no new mounts
  * happen on bdev until thaw_bdev() is called.
  * If a superblock is found on this device, we take the s_umount semaphore
  * on it to make sure nobody unmounts until the snapshot creation is done.
+ * If timeout_msec is bigger than 0, this registers the delayed work for
+ * timeout of the freeze feature.
  */
-struct super_block *freeze_bdev(struct block_device *bdev)
+struct super_block *freeze_bdev(struct block_device *bdev,
+				unsigned int timeout_msec)
 {
 	struct super_block *sb;
 
@@ -228,8 +232,11 @@ struct super_block *freeze_bdev(struct b
 	}
 
 	sync_blockdev(bdev);
-	up(&bdev->bd_freeze_sem);
+	/* Setup unfreeze timer. */
+	if (timeout_msec > 0)
+		add_freeze_timeout(bdev, timeout_msec);
 
+	up(&bdev->bd_freeze_sem);
 	return sb;	/* thaw_bdev releases s->s_umount and bd_mount_sem */
 }
 EXPORT_SYMBOL(freeze_bdev);
@@ -255,6 +262,9 @@ int thaw_bdev(struct block_device *bdev,
 		return 0;
 	}
 
+	/* Delete unfreeze timer. */
+	del_freeze_timeout(bdev);
+
 	if (sb) {
 		BUG_ON(sb->s_bdev != bdev);
 
diff -uprN -X linux-2.6.27-rc2.org/Documentation/dontdiff linux-2.6.27-rc2-xfs/fs/ioctl.c linux-2.6.27-rc2-timeout/fs/io
ctl.c
--- linux-2.6.27-rc2-xfs/fs/ioctl.c	2008-08-07 09:00:29.000000000 +0900
+++ linux-2.6.27-rc2-timeout/fs/ioctl.c	2008-08-07 09:14:30.000000000 +0900
@@ -145,12 +145,16 @@ static int ioctl_fioasync(unsigned int f
  * ioctl_freeze - Freeze the filesystem.
  *
  * @filp:	target file
+ * @argp:       timeout value(sec)
  *
  * Call freeze_bdev() to freeze the filesystem.
  */
-static int ioctl_freeze(struct file *filp)
+static int ioctl_freeze(struct file *filp, int __user *argp)
 {
+	int timeout_sec;
+	unsigned int timeout_msec;
 	struct super_block *sb = filp->f_path.dentry->d_inode->i_sb;
+	int error;
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
@@ -163,8 +167,25 @@ static int ioctl_freeze(struct file *fil
 	if (sb->s_bdev == NULL)
 		return -EINVAL;
 
+	/* arg(sec) to tick value. */
+	error = get_user(timeout_sec, argp);
+	if (error != 0)
+		return error;
+
+	if (timeout_sec < 0 || timeout_sec > UINT_MAX/1000)
+		return -EINVAL;
+
+	/*
+	 * If 1 is specified as the timeout period it is changed into 0
+	 * to retain compatibility with XFS's xfs_freeze.
+	 */
+	if (timeout_sec == 1)
+		timeout_sec = 0;
+
+	timeout_msec = timeout_sec * 1000;
+
 	/* Freeze */
-	sb = freeze_bdev(sb->s_bdev);
+	sb = freeze_bdev(sb->s_bdev, timeout_msec);
 	if (IS_ERR(sb))
 		return PTR_ERR(sb);
 	return 0;
@@ -193,6 +214,52 @@ static int ioctl_thaw(struct file *filp)
 }
 
 /*
+ * ioctl_freeze_reset_timeout - Reset timeout for freeze.
+ *
+ * @filp:       target file
+ * @argp:       timeout value(sec)
+ *
+ * Reset timeout for freeze.
+ */
+static int
+ioctl_freeze_reset_timeout(struct file *filp, int __user *argp)
+{
+	int timeout_sec;
+	unsigned int timeout_msec;
+	struct super_block *sb = filp->f_path.dentry->d_inode->i_sb;
+	struct block_device *bdev = sb->s_bdev;
+	int error;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	/* If a regular file or a directory isn't specified, return EINVAL. */
+	if (bdev == NULL)
+		return -EINVAL;
+
+	/* arg(sec) to tick value */
+	error = get_user(timeout_sec, argp);
+	if (error)
+		return error;
+
+	if (timeout_sec <= 0 || timeout_sec > UINT_MAX/1000)
+		return -EINVAL;
+
+	timeout_msec = timeout_sec * 1000;
+
+	down(&bdev->bd_freeze_sem);
+	if (!bdev->bd_freeze_count) {
+		up(&bdev->bd_freeze_sem);
+		return -EINVAL;
+	}
+	/* setup unfreeze timer */
+	add_freeze_timeout(bdev, timeout_msec);
+	up(&bdev->bd_freeze_sem);
+
+	return 0;
+}
+
+/*
  * When you add any new common ioctls to the switches above and below
  * please update compat_sys_ioctl() too.
  *
@@ -235,13 +302,17 @@ int do_vfs_ioctl(struct file *filp, unsi
 		break;
 
 	case FIFREEZE:
-		error = ioctl_freeze(filp);
+		error = ioctl_freeze(filp, argp);
 		break;
 
 	case FITHAW:
 		error = ioctl_thaw(filp);
 		break;
 
+	case FIFREEZE_RESET_TIMEOUT:
+		error = ioctl_freeze_reset_timeout(filp, argp);
+		break;
+
 	default:
 		if (S_ISREG(filp->f_path.dentry->d_inode->i_mode))
 			error = file_ioctl(filp, cmd, arg);
diff -uprN -X linux-2.6.27-rc2.org/Documentation/dontdiff linux-2.6.27-rc2-xfs/fs/super.c linux-2.6.27-rc2-timeout/fs/su
per.c
--- linux-2.6.27-rc2-xfs/fs/super.c	2008-08-07 09:00:29.000000000 +0900
+++ linux-2.6.27-rc2-timeout/fs/super.c	2008-08-07 09:14:30.000000000 +0900
@@ -981,3 +981,60 @@ struct vfsmount *kern_mount_data(struct 
 }
 
 EXPORT_SYMBOL_GPL(kern_mount_data);
+
+/*
+ * freeze_timeout - Thaw the filesystem.
+ *
+ * @work:	work queue (delayed_work.work)
+ *
+ * Called by the delayed work when elapsing the timeout period.
+ * Thaw the filesystem.
+ */
+void freeze_timeout(struct work_struct *work)
+{
+	struct block_device *bd = container_of(work,
+			struct block_device, bd_freeze_timeout.work);
+	struct super_block *sb = get_super(bd);
+
+	thaw_bdev(bd, sb);
+
+	if (sb)
+		drop_super(sb);
+}
+EXPORT_SYMBOL_GPL(freeze_timeout);
+
+/*
+ * add_freeze_timeout - Add timeout for freeze.
+ *
+ * @bdev:		block device struct
+ * @timeout_msec:	timeout period
+ *
+ * Add the delayed work for freeze timeout to the delayed work queue.
+ */
+void add_freeze_timeout(struct block_device *bdev, unsigned int timeout_msec)
+{
+	s64 timeout_jiffies = msecs_to_jiffies(timeout_msec);
+
+	/* Set delayed work queue */
+	cancel_delayed_work_sync(&bdev->bd_freeze_timeout);
+	schedule_delayed_work(&bdev->bd_freeze_timeout, timeout_jiffies);
+}
+
+/*
+ * del_freeze_timeout - Delete timeout for freeze.
+ *
+ * @bdev:	block device struct
+ *
+ * Delete the delayed work for freeze timeout from the delayed work queue.
+ */
+void del_freeze_timeout(struct block_device *bdev)
+{
+	/*
+	 * It's possible that the delayed work task (freeze_timeout()) calls
+	 * del_freeze_timeout().  If the delayed work task calls
+	 * cancel_delayed_work_sync((), the deadlock will occur.
+	 * So we need this check (delayed_work_pending()).
+	 */
+	if (delayed_work_pending(&bdev->bd_freeze_timeout))
+		cancel_delayed_work_sync(&bdev->bd_freeze_timeout);
+}
diff -uprN -X linux-2.6.27-rc2.org/Documentation/dontdiff linux-2.6.27-rc2-xfs/fs/xfs/xfs_fsops.c linux-2.6.27-rc2-timeo
ut/fs/xfs/xfs_fsops.c
--- linux-2.6.27-rc2-xfs/fs/xfs/xfs_fsops.c	2008-08-07 09:00:29.000000000 +0900
+++ linux-2.6.27-rc2-timeout/fs/xfs/xfs_fsops.c	2008-08-07 09:14:30.000000000 +0900
@@ -621,7 +621,7 @@ xfs_fs_goingdown(
 {
 	switch (inflags) {
 	case XFS_FSOP_GOING_FLAGS_DEFAULT: {
-		struct super_block *sb = freeze_bdev(mp->m_super->s_bdev);
+		struct super_block *sb = freeze_bdev(mp->m_super->s_bdev, 0);
 
 		if (sb && !IS_ERR(sb)) {
 			xfs_force_shutdown(mp, SHUTDOWN_FORCE_UMOUNT);
diff -uprN -X linux-2.6.27-rc2.org/Documentation/dontdiff linux-2.6.27-rc2-xfs/include/linux/buffer_head.h linux-2.6.27-
rc2-timeout/include/linux/buffer_head.h
--- linux-2.6.27-rc2-xfs/include/linux/buffer_head.h	2008-08-07 09:00:39.000000000 +0900
+++ linux-2.6.27-rc2-timeout/include/linux/buffer_head.h	2008-08-07 09:14:30.000000000 +0900
@@ -169,7 +169,8 @@ int sync_blockdev(struct block_device *b
 void __wait_on_buffer(struct buffer_head *);
 wait_queue_head_t *bh_waitq_head(struct buffer_head *bh);
 int fsync_bdev(struct block_device *);
-struct super_block *freeze_bdev(struct block_device *);
+struct super_block *freeze_bdev(struct block_device *,
+				unsigned int timeout_msec);
 int thaw_bdev(struct block_device *, struct super_block *);
 int fsync_super(struct super_block *);
 int fsync_no_super(struct block_device *);
diff -uprN -X linux-2.6.27-rc2.org/Documentation/dontdiff linux-2.6.27-rc2-xfs/include/linux/fs.h linux-2.6.27-rc2-timeo
ut/include/linux/fs.h
--- linux-2.6.27-rc2-xfs/include/linux/fs.h	2008-08-07 09:00:39.000000000 +0900
+++ linux-2.6.27-rc2-timeout/include/linux/fs.h	2008-08-07 09:14:30.000000000 +0900
@@ -8,6 +8,7 @@
 
 #include <linux/limits.h>
 #include <linux/ioctl.h>
+#include <linux/workqueue.h>
 
 /*
  * It's silly to have NR_OPEN bigger than NR_FILE, but you can change
@@ -228,6 +229,7 @@ extern int dir_notify_enable;
 #define FIGETBSZ   _IO(0x00,2)	/* get the block size used for bmap */
 #define FIFREEZE	_IOWR('X', 119, int)	/* Freeze */
 #define FITHAW		_IOWR('X', 120, int)	/* Thaw */
+#define	FIFREEZE_RESET_TIMEOUT	_IO(0x00, 3)	/* Reset freeze timeout */
 
 #define	FS_IOC_GETFLAGS			_IOR('f', 1, long)
 #define	FS_IOC_SETFLAGS			_IOW('f', 2, long)
@@ -576,10 +578,13 @@ struct block_device {
 	 * care to not mess up bd_private for that case.
 	 */
 	unsigned long		bd_private;
+
 	/* The counter of freeze processes */
 	int			bd_freeze_count;
 	/* Semaphore for freeze */
 	struct semaphore	bd_freeze_sem;
+	/* Delayed work for freeze */
+	struct delayed_work	bd_freeze_timeout;
 };
 
 /*
@@ -2159,5 +2164,10 @@ int proc_nr_files(struct ctl_table *tabl
 
 int get_filesystem_list(char * buf);
 
+extern void add_freeze_timeout(struct block_device *bdev,
+				unsigned int timeout_msec);
+extern void del_freeze_timeout(struct block_device *bdev);
+extern void freeze_timeout(struct work_struct *work);
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_FS_H */

^ permalink raw reply	[flat|nested] 71+ messages in thread

* [PATCH 3/3] Add timeout feature
@ 2008-07-22  9:36 Takashi Sato
  0 siblings, 0 replies; 71+ messages in thread
From: Takashi Sato @ 2008-07-22  9:36 UTC (permalink / raw)
  To: dm-devel, linux-fsdevel, Andrew Morton, viro, linux-ext4, xfs,
	Christoph Hellwig, axboe, mtk.manpages
  Cc: linux-kernel

The timeout feature is added to freeze ioctl.  And new ioctl
to reset the timeout period is added.
o Freeze the filesystem
  int ioctl(int fd, int FIFREEZE, long *timeout_sec)
    fd: The file descriptor of the mountpoint
    FIFREEZE: request code for the freeze
    timeout_sec: the timeout period in seconds
             If it's 0 or 1, the timeout isn't set.
             This special case of "1" is implemented to keep
             the compatibility with XFS applications.
    Return value: 0 if the operation succeeds. Otherwise, -1

o Reset the timeout period
  int ioctl(int fd, int FIFREEZE_RESET_TIMEOUT, long *timeout_sec)
    fd:file descriptor of mountpoint
    FIFREEZE_RESET_TIMEOUT: request code for reset of timeout period
    timeout_sec: new timeout period in seconds
    Return value: 0 if the operation succeeds. Otherwise, -1
    Error number: If the filesystem has already been unfrozen,
                  errno is set to EINVAL.

Signed-off-by: Takashi Sato <t-sato@yk.jp.nec.com>
Signed-off-by: Masayuki Hamaguchi <m-hamaguchi@ys.jp.nec.com>
---
 drivers/md/dm.c             |    2 -
 fs/block_dev.c              |    2 +
 fs/buffer.c                 |   16 +++++++--
 fs/ioctl.c                  |   77 ++++++++++++++++++++++++++++++++++++++++++--
 fs/super.c                  |   57 ++++++++++++++++++++++++++++++++
 fs/xfs/xfs_fsops.c          |    2 -
 include/linux/buffer_head.h |    3 +
 include/linux/fs.h          |   10 +++++
 8 files changed, 160 insertions(+), 9 deletions(-)

diff -uprN -X linux-2.6.26.org/Documentation/dontdiff linux-2.6.26-xfs/drivers/md/dm.c linux-2.6.26-timeout/drivers/md/d
m.c
--- linux-2.6.26-xfs/drivers/md/dm.c	2008-07-17 11:33:03.000000000 +0900
+++ linux-2.6.26-timeout/drivers/md/dm.c	2008-07-17 11:36:00.000000000 +0900
@@ -1407,7 +1407,7 @@ static int lock_fs(struct mapped_device 
 
 	WARN_ON(md->frozen_sb);
 
-	md->frozen_sb = freeze_bdev(md->suspended_bdev);
+	md->frozen_sb = freeze_bdev(md->suspended_bdev, 0);
 	if (IS_ERR(md->frozen_sb)) {
 		r = PTR_ERR(md->frozen_sb);
 		md->frozen_sb = NULL;
diff -uprN -X linux-2.6.26.org/Documentation/dontdiff linux-2.6.26-xfs/fs/block_dev.c linux-2.6.26-timeout/fs/block_dev.
c
--- linux-2.6.26-xfs/fs/block_dev.c	2008-07-18 15:53:51.000000000 +0900
+++ linux-2.6.26-timeout/fs/block_dev.c	2008-07-18 17:12:22.000000000 +0900
@@ -287,6 +287,8 @@ static void init_once(struct kmem_cache 
 	inode_init_once(&ei->vfs_inode);
 	/* Initialize semaphore for freeze. */
 	sema_init(&bdev->bd_freeze_sem, 1);
+	/* Setup freeze timeout function. */
+	INIT_DELAYED_WORK(&bdev->bd_freeze_timeout, freeze_timeout);
 }
 
 static inline void __bd_forget(struct inode *inode)
diff -uprN -X linux-2.6.26.org/Documentation/dontdiff linux-2.6.26-xfs/fs/buffer.c linux-2.6.26-timeout/fs/buffer.c
--- linux-2.6.26-xfs/fs/buffer.c	2008-07-18 15:53:51.000000000 +0900
+++ linux-2.6.26-timeout/fs/buffer.c	2008-07-18 17:17:50.000000000 +0900
@@ -190,14 +190,18 @@ int fsync_bdev(struct block_device *bdev
 
 /**
  * freeze_bdev  --  lock a filesystem and force it into a consistent state
- * @bdev:	blockdevice to lock
+ * @bdev:              blockdevice to lock
+ * @timeout_msec:      timeout period
  *
  * This takes the block device bd_mount_sem to make sure no new mounts
  * happen on bdev until thaw_bdev() is called.
  * If a superblock is found on this device, we take the s_umount semaphore
  * on it to make sure nobody unmounts until the snapshot creation is done.
+ * If timeout_msec is bigger than 0, this registers the delayed work for
+ * timeout of the freeze feature.
  */
-struct super_block *freeze_bdev(struct block_device *bdev)
+struct super_block *freeze_bdev(struct block_device *bdev,
+				unsigned int timeout_msec)
 {
 	struct super_block *sb;
 
@@ -228,8 +232,11 @@ struct super_block *freeze_bdev(struct b
 	}
 
 	sync_blockdev(bdev);
-	up(&bdev->bd_freeze_sem);
+	/* Setup unfreeze timer. */
+	if (timeout_msec > 0)
+		add_freeze_timeout(bdev, timeout_msec);
 
+	up(&bdev->bd_freeze_sem);
 	return sb;	/* thaw_bdev releases s->s_umount and bd_mount_sem */
 }
 EXPORT_SYMBOL(freeze_bdev);
@@ -255,6 +262,9 @@ int thaw_bdev(struct block_device *bdev,
 		return 0;
 	}
 
+	/* Delete unfreeze timer. */
+	del_freeze_timeout(bdev);
+
 	if (sb) {
 		BUG_ON(sb->s_bdev != bdev);
 
diff -uprN -X linux-2.6.26.org/Documentation/dontdiff linux-2.6.26-xfs/fs/ioctl.c linux-2.6.26-timeout/fs/ioctl.c
--- linux-2.6.26-xfs/fs/ioctl.c	2008-07-18 21:57:20.000000000 +0900
+++ linux-2.6.26-timeout/fs/ioctl.c	2008-07-22 11:50:42.000000000 +0900
@@ -145,12 +145,16 @@ static int ioctl_fioasync(unsigned int f
  * ioctl_freeze - Freeze the filesystem.
  *
  * @filp:	target file
+ * @argp:       timeout value(sec)
  *
  * Call freeze_bdev() to freeze the filesystem.
  */
-static int ioctl_freeze(struct file *filp)
+static int ioctl_freeze(struct file *filp, int __user *argp)
 {
+	int timeout_sec;
+	unsigned int timeout_msec;
 	struct super_block *sb = filp->f_path.dentry->d_inode->i_sb;
+	int error;
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
@@ -163,8 +167,25 @@ static int ioctl_freeze(struct file *fil
 	if (sb->s_bdev == NULL)
 		return -EINVAL;
 
+	/* arg(sec) to tick value. */
+	error = get_user(timeout_sec, argp);
+	if (error != 0)
+		return error;
+
+	if (timeout_sec < 0 || timeout_sec > UINT_MAX/1000)
+		return -EINVAL;
+
+	/*
+	 * If 1 is specified as the timeout period it is changed into 0
+	 * to retain compatibility with XFS's xfs_freeze.
+	 */
+	if (timeout_sec == 1)
+		timeout_sec = 0;
+
+	timeout_msec = timeout_sec * 1000;
+
 	/* Freeze */
-	sb = freeze_bdev(sb->s_bdev);
+	sb = freeze_bdev(sb->s_bdev, timeout_msec);
 	if (IS_ERR(sb))
 		return PTR_ERR(sb);
 	return 0;
@@ -193,6 +214,52 @@ static int ioctl_thaw(struct file *filp)
 }
 
 /*
+ * ioctl_freeze_reset_timeout - Reset timeout for freeze.
+ *
+ * @filp:       target file
+ * @argp:       timeout value(sec)
+ *
+ * Reset timeout for freeze.
+ */
+static int
+ioctl_freeze_reset_timeout(struct file *filp, int __user *argp)
+{
+	int timeout_sec;
+	unsigned int timeout_msec;
+	struct super_block *sb = filp->f_path.dentry->d_inode->i_sb;
+	struct block_device *bdev = sb->s_bdev;
+	int error;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	/* If a regular file or a directory isn't specified, return EINVAL. */
+	if (bdev == NULL)
+		return -EINVAL;
+
+	/* arg(sec) to tick value */
+	error = get_user(timeout_sec, argp);
+	if (error)
+		return error;
+
+	if (timeout_sec <= 0 || timeout_sec > UINT_MAX/1000)
+		return -EINVAL;
+
+	timeout_msec = timeout_sec * 1000;
+
+	down(&bdev->bd_freeze_sem);
+	if (!bdev->bd_freeze_count) {
+		up(&bdev->bd_freeze_sem);
+		return -EINVAL;
+	}
+	/* setup unfreeze timer */
+	add_freeze_timeout(bdev, timeout_msec);
+	up(&bdev->bd_freeze_sem);
+
+	return 0;
+}
+
+/*
  * When you add any new common ioctls to the switches above and below
  * please update compat_sys_ioctl() too.
  *
@@ -235,13 +302,17 @@ int do_vfs_ioctl(struct file *filp, unsi
 		break;
 
 	case FIFREEZE:
-		error = ioctl_freeze(filp);
+		error = ioctl_freeze(filp, argp);
 		break;
 
 	case FITHAW:
 		error = ioctl_thaw(filp);
 		break;
 
+	case FIFREEZE_RESET_TIMEOUT:
+		error = ioctl_freeze_reset_timeout(filp, argp);
+		break;
+
 	default:
 		if (S_ISREG(filp->f_path.dentry->d_inode->i_mode))
 			error = file_ioctl(filp, cmd, arg);
diff -uprN -X linux-2.6.26.org/Documentation/dontdiff linux-2.6.26-xfs/fs/super.c linux-2.6.26-timeout/fs/super.c
--- linux-2.6.26-xfs/fs/super.c	2008-07-17 11:33:13.000000000 +0900
+++ linux-2.6.26-timeout/fs/super.c	2008-07-17 11:36:00.000000000 +0900
@@ -980,3 +980,60 @@ struct vfsmount *kern_mount_data(struct 
 }
 
 EXPORT_SYMBOL_GPL(kern_mount_data);
+
+/*
+ * freeze_timeout - Thaw the filesystem.
+ *
+ * @work:	work queue (delayed_work.work)
+ *
+ * Called by the delayed work when elapsing the timeout period.
+ * Thaw the filesystem.
+ */
+void freeze_timeout(struct work_struct *work)
+{
+	struct block_device *bd = container_of(work,
+			struct block_device, bd_freeze_timeout.work);
+	struct super_block *sb = get_super(bd);
+
+	thaw_bdev(bd, sb);
+
+	if (sb)
+		drop_super(sb);
+}
+EXPORT_SYMBOL_GPL(freeze_timeout);
+
+/*
+ * add_freeze_timeout - Add timeout for freeze.
+ *
+ * @bdev:		block device struct
+ * @timeout_msec:	timeout period
+ *
+ * Add the delayed work for freeze timeout to the delayed work queue.
+ */
+void add_freeze_timeout(struct block_device *bdev, unsigned int timeout_msec)
+{
+	s64 timeout_jiffies = msecs_to_jiffies(timeout_msec);
+
+	/* Set delayed work queue */
+	cancel_delayed_work_sync(&bdev->bd_freeze_timeout);
+	schedule_delayed_work(&bdev->bd_freeze_timeout, timeout_jiffies);
+}
+
+/*
+ * del_freeze_timeout - Delete timeout for freeze.
+ *
+ * @bdev:	block device struct
+ *
+ * Delete the delayed work for freeze timeout from the delayed work queue.
+ */
+void del_freeze_timeout(struct block_device *bdev)
+{
+	/*
+	 * It's possible that the delayed work task (freeze_timeout()) calls
+	 * del_freeze_timeout().  If the delayed work task calls
+	 * cancel_delayed_work_sync((), the deadlock will occur.
+	 * So we need this check (delayed_work_pending()).
+	 */
+	if (delayed_work_pending(&bdev->bd_freeze_timeout))
+		cancel_delayed_work_sync(&bdev->bd_freeze_timeout);
+}
diff -uprN -X linux-2.6.26.org/Documentation/dontdiff linux-2.6.26-xfs/fs/xfs/xfs_fsops.c linux-2.6.26-timeout/fs/xfs/xf
s_fsops.c
--- linux-2.6.26-xfs/fs/xfs/xfs_fsops.c	2008-07-17 11:33:13.000000000 +0900
+++ linux-2.6.26-timeout/fs/xfs/xfs_fsops.c	2008-07-17 11:36:00.000000000 +0900
@@ -619,7 +619,7 @@ xfs_fs_goingdown(
 {
 	switch (inflags) {
 	case XFS_FSOP_GOING_FLAGS_DEFAULT: {
-		struct super_block *sb = freeze_bdev(mp->m_super->s_bdev);
+		struct super_block *sb = freeze_bdev(mp->m_super->s_bdev, 0);
 
 		if (sb && !IS_ERR(sb)) {
 			xfs_force_shutdown(mp, SHUTDOWN_FORCE_UMOUNT);
diff -uprN -X linux-2.6.26.org/Documentation/dontdiff linux-2.6.26-xfs/include/linux/buffer_head.h linux-2.6.26-timeout/
include/linux/buffer_head.h
--- linux-2.6.26-xfs/include/linux/buffer_head.h	2008-07-17 11:33:14.000000000 +0900
+++ linux-2.6.26-timeout/include/linux/buffer_head.h	2008-07-17 11:36:00.000000000 +0900
@@ -170,7 +170,8 @@ int sync_blockdev(struct block_device *b
 void __wait_on_buffer(struct buffer_head *);
 wait_queue_head_t *bh_waitq_head(struct buffer_head *bh);
 int fsync_bdev(struct block_device *);
-struct super_block *freeze_bdev(struct block_device *);
+struct super_block *freeze_bdev(struct block_device *,
+				unsigned int timeout_msec);
 int thaw_bdev(struct block_device *, struct super_block *);
 int fsync_super(struct super_block *);
 int fsync_no_super(struct block_device *);
diff -uprN -X linux-2.6.26.org/Documentation/dontdiff linux-2.6.26-xfs/include/linux/fs.h linux-2.6.26-timeout/include/l
inux/fs.h
--- linux-2.6.26-xfs/include/linux/fs.h	2008-07-18 15:53:51.000000000 +0900
+++ linux-2.6.26-timeout/include/linux/fs.h	2008-07-18 21:51:41.000000000 +0900
@@ -8,6 +8,7 @@
 
 #include <linux/limits.h>
 #include <linux/ioctl.h>
+#include <linux/workqueue.h>
 
 /*
  * It's silly to have NR_OPEN bigger than NR_FILE, but you can change
@@ -226,6 +227,7 @@ extern int dir_notify_enable;
 #define FIGETBSZ   _IO(0x00,2)	/* get the block size used for bmap */
 #define FIFREEZE	_IOWR('X', 119, int)	/* Freeze */
 #define FITHAW		_IOWR('X', 120, int)	/* Thaw */
+#define	FIFREEZE_RESET_TIMEOUT	_IO(0x00, 3)	/* Reset freeze timeout */
 
 #define	FS_IOC_GETFLAGS			_IOR('f', 1, long)
 #define	FS_IOC_SETFLAGS			_IOW('f', 2, long)
@@ -550,10 +552,13 @@ struct block_device {
 	 * care to not mess up bd_private for that case.
 	 */
 	unsigned long		bd_private;
+
 	/* The counter of freeze processes */
 	int			bd_freeze_count;
 	/* Semaphore for freeze */
 	struct semaphore	bd_freeze_sem;
+	/* Delayed work for freeze */
+	struct delayed_work	bd_freeze_timeout;
 };
 
 /*
@@ -2140,5 +2145,10 @@ int proc_nr_files(struct ctl_table *tabl
 
 int get_filesystem_list(char * buf);
 
+extern void add_freeze_timeout(struct block_device *bdev,
+				unsigned int timeout_msec);
+extern void del_freeze_timeout(struct block_device *bdev);
+extern void freeze_timeout(struct work_struct *work);
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_FS_H */

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 3/3] Add timeout feature
  2008-06-29 23:13       ` Takashi Sato
@ 2008-06-30  0:01         ` Andrew Morton
  0 siblings, 0 replies; 71+ messages in thread
From: Andrew Morton @ 2008-06-30  0:01 UTC (permalink / raw)
  To: Takashi Sato
  Cc: viro, linux-ext4, xfs, dm-devel, linux-fsdevel, linux-kernel,
	axboe, mtk.manpages

On Mon, 30 Jun 2008 08:13:07 +0900 "Takashi Sato" <t-sato@yk.jp.nec.com> wrote:

> > It's much better to use NULL here rather than literal zero because the
> > reader of this code can then say "ah-hah, we're passing in a pointer". 
> > Whereas plain old "0" could be a pointer or a scalar.
> 
> The second argument's type of freeze_bdev() is "long", not pointer as below.
> struct super_block *freeze_bdev(struct block_device *, long timeout_msec);

oh, ok, I goofed, sorry.

> So "0" is reasonable, isn't it?

yup.

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 3/3] Add timeout feature
  2008-06-27 18:57     ` Andrew Morton
@ 2008-06-29 23:13       ` Takashi Sato
  2008-06-30  0:01         ` Andrew Morton
  0 siblings, 1 reply; 71+ messages in thread
From: Takashi Sato @ 2008-06-29 23:13 UTC (permalink / raw)
  To: Andrew Morton
  Cc: viro, linux-ext4, xfs, dm-devel, linux-fsdevel, linux-kernel,
	axboe, mtk.manpages

Hi,

>> >>  case XFS_FSOP_GOING_FLAGS_DEFAULT: {
>> >> - struct super_block *sb = freeze_bdev(mp->m_super->s_bdev);
>> >> + struct super_block *sb = freeze_bdev(mp->m_super->s_bdev, 0);
>> >
>> > Using NULL here is clearer and will, I expect, avoid a sparse warning.
>> 
>> I checked it but I couldn't find a sparse warning in xfs_fsops.c.
>> Can you tell me how to use NULL?
> 
> struct super_block *sb = freeze_bdev(mp->m_super->s_bdev, NULL);
> 
> :)
> 
> It's much better to use NULL here rather than literal zero because the
> reader of this code can then say "ah-hah, we're passing in a pointer". 
> Whereas plain old "0" could be a pointer or a scalar.

The second argument's type of freeze_bdev() is "long", not pointer as below.
struct super_block *freeze_bdev(struct block_device *, long timeout_msec);

So "0" is reasonable, isn't it?

> We should always use NULL to represent a null pointer in the kernel. 
> The one acceptable exception is when testing for nullness:
> 
> if (ptr1)
> if (!ptr2)
> 
> Often people will use
> 
> if (ptr1 != NULL)
> if (ptr2 == NULL)
> 
> in this case as well.  (I prefer the shorter version personally, but
> either is OK).


^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 3/3] Add timeout feature
  2008-06-27 11:33   ` Takashi Sato
@ 2008-06-27 18:57     ` Andrew Morton
  2008-06-29 23:13       ` Takashi Sato
  0 siblings, 1 reply; 71+ messages in thread
From: Andrew Morton @ 2008-06-27 18:57 UTC (permalink / raw)
  To: Takashi Sato
  Cc: viro, linux-ext4, xfs, dm-devel, linux-fsdevel, linux-kernel,
	axboe, mtk.manpages

On Fri, 27 Jun 2008 20:33:58 +0900
"Takashi Sato" <t-sato@yk.jp.nec.com> wrote:

> >>  case XFS_FSOP_GOING_FLAGS_DEFAULT: {
> >> - struct super_block *sb = freeze_bdev(mp->m_super->s_bdev);
> >> + struct super_block *sb = freeze_bdev(mp->m_super->s_bdev, 0);
> >
> > Using NULL here is clearer and will, I expect, avoid a sparse warning.
> 
> I checked it but I couldn't find a sparse warning in xfs_fsops.c.
> Can you tell me how to use NULL?

	struct super_block *sb = freeze_bdev(mp->m_super->s_bdev, NULL);

:)

It's much better to use NULL here rather than literal zero because the
reader of this code can then say "ah-hah, we're passing in a pointer". 
Whereas plain old "0" could be a pointer or a scalar.

We should always use NULL to represent a null pointer in the kernel. 
The one acceptable exception is when testing for nullness:

	if (ptr1)
		if (!ptr2)

Often people will use

	if (ptr1 != NULL)
		if (ptr2 == NULL)

in this case as well.  (I prefer the shorter version personally, but
either is OK).



^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 3/3] Add timeout feature
  2008-06-24 22:09 ` Andrew Morton
@ 2008-06-27 11:33   ` Takashi Sato
  2008-06-27 18:57     ` Andrew Morton
  0 siblings, 1 reply; 71+ messages in thread
From: Takashi Sato @ 2008-06-27 11:33 UTC (permalink / raw)
  To: Andrew Morton
  Cc: viro, linux-ext4, xfs, dm-devel, linux-fsdevel, linux-kernel,
	axboe, mtk.manpages

Hi,

>> o Reset the timeout period
>>   int ioctl(int fd, int FIFREEZE_RESET_TIMEOUT, long *timeval)
>>     fd:file descriptor of mountpoint
>>     FIFREEZE_RESET_TIMEOUT: request code for reset of timeout period
>>     timeval: new timeout period in seconds
>>     Return value: 0 if the operation succeeds. Otherwise, -1
>>     Error number: If the filesystem has already been unfrozen,
>>                   errno is set to EINVAL.
>
> Please avoid the use of the term `timeval' here.  That term is closely
> associated with `struct timeval', and these are not being used here.  A
> better identifier would be timeout_secs - it tells the reader what it
> does, and it tells the reader what units it is in.  And when it comes
> to "time", it is very useful to tell people which units are being used,
> because there are many different ones.

OK.
I will use "timeout_secs" in the next version to match the source code.

>> -static int ioctl_freeze(struct file *filp)
>> +static int ioctl_freeze(struct file *filp, unsigned long arg)
>>  {
>> +    long timeout_sec;
>> +    long timeout_msec;
>>      struct super_block *sb = filp->f_path.dentry->d_inode->i_sb;
>> +    int error;
>>
>>      if (!capable(CAP_SYS_ADMIN))
>>          return -EPERM;
>> @@ -159,8 +163,27 @@ static int ioctl_freeze(struct file *fil
>>      if (sb->s_op->write_super_lockfs == NULL)
>>          return -EOPNOTSUPP;
>>
>> +    /* arg(sec) to tick value. */
>> +    error = get_user(timeout_sec, (long __user *) arg);>
> uh-oh, compat problems.
>
> A 32-bit application running under a 32-bit kernel will need to provide
> a 32-bit quantity.
>
> A 32-bit application running under a 64-bit kernel will need to provide
> a 64-bit quantity.
>
> I suggest that you go through the entire patch and redo this part of
> the kernel ABI in terms of __u32, and avoid any mention of "long" in
> the kernel<->userspace interface.

I agree.
I will replace long with a 32bit integer type.

>> +    /* arg(sec) to tick value. */
>> +    error = get_user(timeout_sec, (long __user *) arg);
>> +    if (timeout_sec < 0)
>> +        return -EINVAL;
>> +    else if (timeout_sec < 2)
>> +        timeout_sec = 0;> The `else' is unneeded.
>
> It would be clearer to code this all as:
>
> if (timeout_sec < 0)
>     return -EINVAL;
>
> if (timeout_sec == 1) {
>     /*
>      * If 1 is specified as the timeout period it is changed into 0
>      * to retain compatibility with XFS's xfs_freeze.
>      */
>     timeout_sec = 0;
> }

OK.

>> +    if (error)
>> +        return error;
>> +    timeout_msec = timeout_sec * 1000;
>> +    if (timeout_msec < 0)
>> +        return -EINVAL;
>
> um, OK, but timeout_msec could have overflowed during the
> multiplication and could be complete garbage.  I guess it doesn't
> matter much, but a cleaner approach might be:
>
> if (timeout_sec > LONG_MAX/1000)
>     return -EINVAL;
>
> or something like that.

OK.

> But otoh, why do the multiplication at all?  If we're able to handle
> the timeout down to the millisecond level, why not offer that
> resolution to userspace?  Offer the increased time granularity to the
> application?

I think there is no case the user specifies it in millisecond,
so the second granularity is more comfortable.

>> +    if (sb) {
>
> Can this happen?

I have found that sb == NULL never happen
but sb->s_bdev == NULL can happen when we call this ioctl
for a device file (not a directory or a regular file).
So I will add the following check.
if (sb->s_bdev == NULL)
    return -EINVAL;

>> +    if (test_and_set_bit(BD_FREEZE_OP, &sb->s_bdev->bd_state))
>> +        return -EBUSY;
>> +    if (sb->s_frozen == SB_UNFROZEN) {
>> +        clear_bit(BD_FREEZE_OP, &sb->s_bdev->bd_state);
>> +        return -EINVAL;
>> +    }
>> +    /* setup unfreeze timer */
>> +    if (timeout_msec > 0)
>
> What does this test do?  afacit it's special-casing the timeout_secs==0
> case.  Was this feature documented?  What does it do?

There is no special case of timeout_secs==0.
I will remove this check.

>> +void add_freeze_timeout(struct block_device *bdev, long timeout_msec)
>> +{
>> +    s64 timeout_jiffies = msecs_to_jiffies(timeout_msec);
>> +
>> +    /* Set delayed work queue */
>> +    cancel_delayed_work(&bdev->bd_freeze_timeout);
>
> Should this have used cancel_delayed_work_sync()?

Exactly.
I will replace cancel_delayed_work with cancel_delayed_work_sync.

>> +void del_freeze_timeout(struct block_device *bdev)
>> +{
>> + if (delayed_work_pending(&bdev->bd_freeze_timeout))
>
> Is this test needed?

It's not necessary because cancel_delayed_work_sync checks it itself.
I will remove it.

>> --- linux-2.6.26-rc7-xfs/fs/xfs/xfs_fsops.c 2008-06-23 11:55:15.000000000 +0900
>> +++ linux-2.6.26-rc7-timeout/fs/xfs/xfs_fsops.c 2008-06-23 11:56:47.000000000 +0900
>> @@ -619,7 +619,7 @@ xfs_fs_goingdown(
>>  {
>>  switch (inflags) {
>>  case XFS_FSOP_GOING_FLAGS_DEFAULT: {
>> - struct super_block *sb = freeze_bdev(mp->m_super->s_bdev);
>> + struct super_block *sb = freeze_bdev(mp->m_super->s_bdev, 0);
>
> Using NULL here is clearer and will, I expect, avoid a sparse warning.

I checked it but I couldn't find a sparse warning in xfs_fsops.c.
Can you tell me how to use NULL?

Cheers, Takashi

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 3/3] Add timeout feature
  2008-06-24  7:00 Takashi Sato
@ 2008-06-24 22:09 ` Andrew Morton
  2008-06-27 11:33   ` Takashi Sato
  0 siblings, 1 reply; 71+ messages in thread
From: Andrew Morton @ 2008-06-24 22:09 UTC (permalink / raw)
  To: Takashi Sato
  Cc: viro, linux-ext4, xfs, dm-devel, linux-fsdevel, linux-kernel,
	axboe, mtk.manpages

On Tue, 24 Jun 2008 16:00:56 +0900
Takashi Sato <t-sato@yk.jp.nec.com> wrote:

> The timeout feature is added to freeze ioctl.  And new ioctl
> to reset the timeout period is added.
> o Freeze the filesystem
>   int ioctl(int fd, int FIFREEZE, long *timeval)
>     fd: The file descriptor of the mountpoint
>     FIFREEZE: request code for the freeze
>     timeval: the timeout period in seconds
>              If it's 0 or 1, the timeout isn't set.
>              This special case of "1" is implemented to keep
>              the compatibility with XFS applications.
>     Return value: 0 if the operation succeeds. Otherwise, -1
> 
> o Reset the timeout period
>   int ioctl(int fd, int FIFREEZE_RESET_TIMEOUT, long *timeval)
>     fd:file descriptor of mountpoint
>     FIFREEZE_RESET_TIMEOUT: request code for reset of timeout period
>     timeval: new timeout period in seconds
>     Return value: 0 if the operation succeeds. Otherwise, -1
>     Error number: If the filesystem has already been unfrozen,
>                   errno is set to EINVAL.
> 

Please avoid the use of the term `timeval' here.  That term is closely
associated with `struct timeval', and these are not being used here.  A
better identifier would be timeout_secs - it tells the reader what it
does, and it tells the reader what units it is in.  And when it comes
to "time", it is very useful to tell people which units are being used,
because there are many different ones.

>
> ...
>
> --- linux-2.6.26-rc7-xfs/fs/buffer.c	2008-06-23 11:55:15.000000000 +0900
> +++ linux-2.6.26-rc7-timeout/fs/buffer.c	2008-06-23 11:56:47.000000000 +0900
> @@ -190,14 +190,17 @@ int fsync_bdev(struct block_device *bdev
>  
>  /**
>   * freeze_bdev  --  lock a filesystem and force it into a consistent state
> - * @bdev:	blockdevice to lock
> + * @bdev:              blockdevice to lock
> + * @timeout_msec:      timeout period
>   *
>   * This takes the block device bd_mount_sem to make sure no new mounts
>   * happen on bdev until thaw_bdev() is called.
>   * If a superblock is found on this device, we take the s_umount semaphore
>   * on it to make sure nobody unmounts until the snapshot creation is done.
> + * If timeout_msec is bigger than 0, this registers the delayed work for
> + * timeout of the freeze feature.
>   */
> -struct super_block *freeze_bdev(struct block_device *bdev)
> +struct super_block *freeze_bdev(struct block_device *bdev, long timeout_msec)

timeout_msec is good.

>  {
>  	struct super_block *sb;
>  
> @@ -234,6 +237,10 @@ struct super_block *freeze_bdev(struct b
>  	}
>  
>  	sync_blockdev(bdev);
> +	/* Setup unfreeze timer. */
> +	if (timeout_msec > 0)
> +		add_freeze_timeout(bdev, timeout_msec);
> +
>  	clear_bit(BD_FREEZE_OP, &bdev->bd_state);
>  
>  	return sb;	/* thaw_bdev releases s->s_umount and bd_mount_sem */
> @@ -258,6 +265,9 @@ int thaw_bdev(struct block_device *bdev,
>  		return 0;
>  	}
>  
> +	/* Delete unfreeze timer. */
> +	del_freeze_timeout(bdev);
> +
>  	if (sb) {
>  		BUG_ON(sb->s_bdev != bdev);
>  
> diff -uprN -X linux-2.6.26-rc7.org/Documentation/dontdiff linux-2.6.26-rc7-xfs/fs/ioctl.c linux-2.6.26-rc7-timeout/fs/io
> ctl.c
> --- linux-2.6.26-rc7-xfs/fs/ioctl.c	2008-06-23 11:55:15.000000000 +0900
> +++ linux-2.6.26-rc7-timeout/fs/ioctl.c	2008-06-23 11:56:47.000000000 +0900
> @@ -145,12 +145,16 @@ static int ioctl_fioasync(unsigned int f
>   * ioctl_freeze - Freeze the filesystem.
>   *
>   * @filp:	target file
> + * @argp:       timeout value(sec)
>   *
>   * Call freeze_bdev() to freeze the filesystem.
>   */
> -static int ioctl_freeze(struct file *filp)
> +static int ioctl_freeze(struct file *filp, unsigned long arg)
>  {
> +	long timeout_sec;
> +	long timeout_msec;
>  	struct super_block *sb = filp->f_path.dentry->d_inode->i_sb;
> +	int error;
>  
>  	if (!capable(CAP_SYS_ADMIN))
>  		return -EPERM;
> @@ -159,8 +163,27 @@ static int ioctl_freeze(struct file *fil
>  	if (sb->s_op->write_super_lockfs == NULL)
>  		return -EOPNOTSUPP;
>  
> +	/* arg(sec) to tick value. */
> +	error = get_user(timeout_sec, (long __user *) arg);

uh-oh, compat problems.

A 32-bit application running under a 32-bit kernel will need to provide
a 32-bit quantity.

A 32-bit application running under a 64-bit kernel will need to provide
a 64-bit quantity.

I suggest that you go through the entire patch and redo this part of
the kernel ABI in terms of __u32, and avoid any mention of "long" in
the kernel<->userspace interface.

> +	if (error != 0)
> +		return error;
> +	/*
> +	 * If 1 is specified as the timeout period,
> +	 * it will be changed into 0 to keep the compatibility
> +	 * of XFS application(xfs_freeze).
> +	 */
> +	if (timeout_sec < 0)
> +		return -EINVAL;
> +	else if (timeout_sec < 2)
> +		timeout_sec = 0;

The `else' is unneeded.

It would be clearer to code this all as:

	if (timeout_sec < 0)
		return -EINVAL;

	if (timeout_sec == 1) {
		/*
		 * If 1 is specified as the timeout period it is changed into 0
		 * to retain compatibility with XFS's xfs_freeze.
		 */
		timeout_sec = 0;
	}
		
	
> +	timeout_msec = timeout_sec * 1000;
> +	/* overflow case */
> +	if (timeout_msec < 0)
> +		return -EINVAL;
> +
>  	/* Freeze */
> -	sb = freeze_bdev(sb->s_bdev);
> +	sb = freeze_bdev(sb->s_bdev, timeout_msec);
>  	if (IS_ERR(sb))
>  		return PTR_ERR(sb);
>  	return 0;
> @@ -185,6 +208,51 @@ static int ioctl_thaw(struct file *filp)
>  }
>  
>  /*
> + * ioctl_freeze_reset_timeout - Reset timeout for freeze.
> + *
> + * @filp:       target file
> + * @argp:       timeout value(sec)
> + *
> + * Rest timeout for freeze.

typo

> + */
> +static int
> +ioctl_freeze_reset_timeout(struct file *filp, unsigned long arg)
> +{
> +	long timeout_sec;
> +	long timeout_msec;
> +	struct super_block *sb
> +			= filp->f_path.dentry->d_inode->i_sb;

Unneeded linebreak there.

> +	int error;
> +
> +	if (!capable(CAP_SYS_ADMIN))
> +		return -EPERM;
> +
> +	/* arg(sec) to tick value */
> +	error = get_user(timeout_sec, (long __user *) arg);

compat issues.

> +	if (error)
> +		return error;
> +	timeout_msec = timeout_sec * 1000;
> +	if (timeout_msec < 0)
> +		return -EINVAL;

um, OK, but timeout_msec could have overflowed during the
multiplication and could be complete garbage.  I guess it doesn't
matter much, but a cleaner approach might be:

	if (timeout_sec > LONG_MAX/1000)
		return -EINVAL;

or something like that.

But otoh, why do the multiplication at all?  If we're able to handle
the timeout down to the millisecond level, why not offer that
resolution to userspace?  Offer the increased time granularity to the
application?

> +	if (sb) {

Can this happen?

> +		if (test_and_set_bit(BD_FREEZE_OP, &sb->s_bdev->bd_state))
> +			return -EBUSY;
> +		if (sb->s_frozen == SB_UNFROZEN) {
> +			clear_bit(BD_FREEZE_OP, &sb->s_bdev->bd_state);
> +			return -EINVAL;
> +		}
> +		/* setup unfreeze timer */
> +		if (timeout_msec > 0)

What does this test do?  afacit it's special-casing the timeout_secs==0
case.  Was this feature documented?  What does it do?

> +			add_freeze_timeout(sb->s_bdev,
> +					timeout_msec);
> +		clear_bit(BD_FREEZE_OP, &sb->s_bdev->bd_state);
> +	}
> +
> +	return 0;
> +}
> +
>
> ...
>
> +void add_freeze_timeout(struct block_device *bdev, long timeout_msec)
> +{
> +	s64 timeout_jiffies = msecs_to_jiffies(timeout_msec);
> +
> +	/* Set delayed work queue */
> +	cancel_delayed_work(&bdev->bd_freeze_timeout);

Should this have used cancel_delayed_work_sync()?

> +	schedule_delayed_work(&bdev->bd_freeze_timeout, timeout_jiffies);
> +}
> +
> +/*
> + * del_freeze_timeout - Delete timeout for freeze.
> + *
> + * @bdev:	block device struct
> + *
> + * Delete the delayed work for freeze timeout from the delayed work queue.
> + */
> +void del_freeze_timeout(struct block_device *bdev)
> +{
> +	if (delayed_work_pending(&bdev->bd_freeze_timeout))

Is this test needed?

> +		cancel_delayed_work(&bdev->bd_freeze_timeout);

cancel_delayed_work_sync()?

> +}
> diff -uprN -X linux-2.6.26-rc7.org/Documentation/dontdiff linux-2.6.26-rc7-xfs/fs/xfs/xfs_fsops.c linux-2.6.26-rc7-timeo
> ut/fs/xfs/xfs_fsops.c
> --- linux-2.6.26-rc7-xfs/fs/xfs/xfs_fsops.c	2008-06-23 11:55:15.000000000 +0900
> +++ linux-2.6.26-rc7-timeout/fs/xfs/xfs_fsops.c	2008-06-23 11:56:47.000000000 +0900
> @@ -619,7 +619,7 @@ xfs_fs_goingdown(
>  {
>  	switch (inflags) {
>  	case XFS_FSOP_GOING_FLAGS_DEFAULT: {
> -		struct super_block *sb = freeze_bdev(mp->m_super->s_bdev);
> +		struct super_block *sb = freeze_bdev(mp->m_super->s_bdev, 0);

Using NULL here is clearer and will, I expect, avoid a sparse warning.

>
> ...
>


^ permalink raw reply	[flat|nested] 71+ messages in thread

* [PATCH 3/3] Add timeout feature
@ 2008-06-24  7:00 Takashi Sato
  2008-06-24 22:09 ` Andrew Morton
  0 siblings, 1 reply; 71+ messages in thread
From: Takashi Sato @ 2008-06-24  7:00 UTC (permalink / raw)
  To: akpm, viro
  Cc: linux-ext4, xfs, dm-devel, linux-fsdevel, linux-kernel, axboe,
	mtk.manpages

The timeout feature is added to freeze ioctl.  And new ioctl
to reset the timeout period is added.
o Freeze the filesystem
  int ioctl(int fd, int FIFREEZE, long *timeval)
    fd: The file descriptor of the mountpoint
    FIFREEZE: request code for the freeze
    timeval: the timeout period in seconds
             If it's 0 or 1, the timeout isn't set.
             This special case of "1" is implemented to keep
             the compatibility with XFS applications.
    Return value: 0 if the operation succeeds. Otherwise, -1

o Reset the timeout period
  int ioctl(int fd, int FIFREEZE_RESET_TIMEOUT, long *timeval)
    fd:file descriptor of mountpoint
    FIFREEZE_RESET_TIMEOUT: request code for reset of timeout period
    timeval: new timeout period in seconds
    Return value: 0 if the operation succeeds. Otherwise, -1
    Error number: If the filesystem has already been unfrozen,
                  errno is set to EINVAL.

Signed-off-by: Takashi Sato <t-sato@yk.jp.nec.com>
Signed-off-by: Masayuki Hamaguchi <m-hamaguchi@ys.jp.nec.com>
---
 drivers/md/dm.c             |    2 -
 fs/block_dev.c              |    2 +
 fs/buffer.c                 |   14 ++++++-
 fs/ioctl.c                  |   78 ++++++++++++++++++++++++++++++++++++++++++--
 fs/super.c                  |   51 ++++++++++++++++++++++++++++
 fs/xfs/xfs_fsops.c          |    2 -
 include/linux/buffer_head.h |    2 -
 include/linux/fs.h          |    8 ++++
 8 files changed, 151 insertions(+), 8 deletions(-)

diff -uprN -X linux-2.6.26-rc7.org/Documentation/dontdiff linux-2.6.26-rc7-xfs/drivers/md/dm.c linux-2.6.26-rc7-timeout/
drivers/md/dm.c
--- linux-2.6.26-rc7-xfs/drivers/md/dm.c	2008-06-23 11:55:11.000000000 +0900
+++ linux-2.6.26-rc7-timeout/drivers/md/dm.c	2008-06-23 11:56:47.000000000 +0900
@@ -1407,7 +1407,7 @@ static int lock_fs(struct mapped_device 
 
 	WARN_ON(md->frozen_sb);
 
-	md->frozen_sb = freeze_bdev(md->suspended_bdev);
+	md->frozen_sb = freeze_bdev(md->suspended_bdev, 0);
 	if (IS_ERR(md->frozen_sb)) {
 		r = PTR_ERR(md->frozen_sb);
 		md->frozen_sb = NULL;
diff -uprN -X linux-2.6.26-rc7.org/Documentation/dontdiff linux-2.6.26-rc7-xfs/fs/block_dev.c linux-2.6.26-rc7-timeout/f
s/block_dev.c
--- linux-2.6.26-rc7-xfs/fs/block_dev.c	2008-06-23 11:55:15.000000000 +0900
+++ linux-2.6.26-rc7-timeout/fs/block_dev.c	2008-06-23 11:56:47.000000000 +0900
@@ -285,6 +285,8 @@ static void init_once(struct kmem_cache 
 	INIT_LIST_HEAD(&bdev->bd_holder_list);
 #endif
 	inode_init_once(&ei->vfs_inode);
+	/* Setup freeze timeout function. */
+	INIT_DELAYED_WORK(&bdev->bd_freeze_timeout, freeze_timeout);
 }
 
 static inline void __bd_forget(struct inode *inode)
diff -uprN -X linux-2.6.26-rc7.org/Documentation/dontdiff linux-2.6.26-rc7-xfs/fs/buffer.c linux-2.6.26-rc7-timeout/fs/b
uffer.c
--- linux-2.6.26-rc7-xfs/fs/buffer.c	2008-06-23 11:55:15.000000000 +0900
+++ linux-2.6.26-rc7-timeout/fs/buffer.c	2008-06-23 11:56:47.000000000 +0900
@@ -190,14 +190,17 @@ int fsync_bdev(struct block_device *bdev
 
 /**
  * freeze_bdev  --  lock a filesystem and force it into a consistent state
- * @bdev:	blockdevice to lock
+ * @bdev:              blockdevice to lock
+ * @timeout_msec:      timeout period
  *
  * This takes the block device bd_mount_sem to make sure no new mounts
  * happen on bdev until thaw_bdev() is called.
  * If a superblock is found on this device, we take the s_umount semaphore
  * on it to make sure nobody unmounts until the snapshot creation is done.
+ * If timeout_msec is bigger than 0, this registers the delayed work for
+ * timeout of the freeze feature.
  */
-struct super_block *freeze_bdev(struct block_device *bdev)
+struct super_block *freeze_bdev(struct block_device *bdev, long timeout_msec)
 {
 	struct super_block *sb;
 
@@ -234,6 +237,10 @@ struct super_block *freeze_bdev(struct b
 	}
 
 	sync_blockdev(bdev);
+	/* Setup unfreeze timer. */
+	if (timeout_msec > 0)
+		add_freeze_timeout(bdev, timeout_msec);
+
 	clear_bit(BD_FREEZE_OP, &bdev->bd_state);
 
 	return sb;	/* thaw_bdev releases s->s_umount and bd_mount_sem */
@@ -258,6 +265,9 @@ int thaw_bdev(struct block_device *bdev,
 		return 0;
 	}
 
+	/* Delete unfreeze timer. */
+	del_freeze_timeout(bdev);
+
 	if (sb) {
 		BUG_ON(sb->s_bdev != bdev);
 
diff -uprN -X linux-2.6.26-rc7.org/Documentation/dontdiff linux-2.6.26-rc7-xfs/fs/ioctl.c linux-2.6.26-rc7-timeout/fs/io
ctl.c
--- linux-2.6.26-rc7-xfs/fs/ioctl.c	2008-06-23 11:55:15.000000000 +0900
+++ linux-2.6.26-rc7-timeout/fs/ioctl.c	2008-06-23 11:56:47.000000000 +0900
@@ -145,12 +145,16 @@ static int ioctl_fioasync(unsigned int f
  * ioctl_freeze - Freeze the filesystem.
  *
  * @filp:	target file
+ * @argp:       timeout value(sec)
  *
  * Call freeze_bdev() to freeze the filesystem.
  */
-static int ioctl_freeze(struct file *filp)
+static int ioctl_freeze(struct file *filp, unsigned long arg)
 {
+	long timeout_sec;
+	long timeout_msec;
 	struct super_block *sb = filp->f_path.dentry->d_inode->i_sb;
+	int error;
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
@@ -159,8 +163,27 @@ static int ioctl_freeze(struct file *fil
 	if (sb->s_op->write_super_lockfs == NULL)
 		return -EOPNOTSUPP;
 
+	/* arg(sec) to tick value. */
+	error = get_user(timeout_sec, (long __user *) arg);
+	if (error != 0)
+		return error;
+	/*
+	 * If 1 is specified as the timeout period,
+	 * it will be changed into 0 to keep the compatibility
+	 * of XFS application(xfs_freeze).
+	 */
+	if (timeout_sec < 0)
+		return -EINVAL;
+	else if (timeout_sec < 2)
+		timeout_sec = 0;
+
+	timeout_msec = timeout_sec * 1000;
+	/* overflow case */
+	if (timeout_msec < 0)
+		return -EINVAL;
+
 	/* Freeze */
-	sb = freeze_bdev(sb->s_bdev);
+	sb = freeze_bdev(sb->s_bdev, timeout_msec);
 	if (IS_ERR(sb))
 		return PTR_ERR(sb);
 	return 0;
@@ -185,6 +208,51 @@ static int ioctl_thaw(struct file *filp)
 }
 
 /*
+ * ioctl_freeze_reset_timeout - Reset timeout for freeze.
+ *
+ * @filp:       target file
+ * @argp:       timeout value(sec)
+ *
+ * Rest timeout for freeze.
+ */
+static int
+ioctl_freeze_reset_timeout(struct file *filp, unsigned long arg)
+{
+	long timeout_sec;
+	long timeout_msec;
+	struct super_block *sb
+			= filp->f_path.dentry->d_inode->i_sb;
+	int error;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	/* arg(sec) to tick value */
+	error = get_user(timeout_sec, (long __user *) arg);
+	if (error)
+		return error;
+	timeout_msec = timeout_sec * 1000;
+	if (timeout_msec < 0)
+		return -EINVAL;
+
+	if (sb) {
+		if (test_and_set_bit(BD_FREEZE_OP, &sb->s_bdev->bd_state))
+			return -EBUSY;
+		if (sb->s_frozen == SB_UNFROZEN) {
+			clear_bit(BD_FREEZE_OP, &sb->s_bdev->bd_state);
+			return -EINVAL;
+		}
+		/* setup unfreeze timer */
+		if (timeout_msec > 0)
+			add_freeze_timeout(sb->s_bdev,
+					timeout_msec);
+		clear_bit(BD_FREEZE_OP, &sb->s_bdev->bd_state);
+	}
+
+	return 0;
+}
+
+/*
  * When you add any new common ioctls to the switches above and below
  * please update compat_sys_ioctl() too.
  *
@@ -227,13 +295,17 @@ int do_vfs_ioctl(struct file *filp, unsi
 		break;
 
 	case FIFREEZE:
-		error = ioctl_freeze(filp);
+		error = ioctl_freeze(filp, arg);
 		break;
 
 	case FITHAW:
 		error = ioctl_thaw(filp);
 		break;
 
+	case FIFREEZE_RESET_TIMEOUT:
+		error = ioctl_freeze_reset_timeout(filp, arg);
+		break;
+
 	default:
 		if (S_ISREG(filp->f_path.dentry->d_inode->i_mode))
 			error = file_ioctl(filp, cmd, arg);
diff -uprN -X linux-2.6.26-rc7.org/Documentation/dontdiff linux-2.6.26-rc7-xfs/fs/super.c linux-2.6.26-rc7-timeout/fs/su
per.c
--- linux-2.6.26-rc7-xfs/fs/super.c	2008-06-23 11:55:15.000000000 +0900
+++ linux-2.6.26-rc7-timeout/fs/super.c	2008-06-23 11:56:47.000000000 +0900
@@ -1010,3 +1010,54 @@ struct vfsmount *kern_mount_data(struct 
 }
 
 EXPORT_SYMBOL_GPL(kern_mount_data);
+
+/*
+ * freeze_timeout - Thaw the filesystem.
+ *
+ * @work:	work queue (delayed_work.work)
+ *
+ * Called by the delayed work when elapsing the timeout period.
+ * Thaw the filesystem.
+ */
+void freeze_timeout(struct work_struct *work)
+{
+	struct block_device *bd = container_of(work,
+			struct block_device, bd_freeze_timeout.work);
+	struct super_block *sb = get_super_without_lock(bd);
+
+	thaw_bdev(bd, sb);
+
+	if (sb)
+		put_super(sb);
+}
+EXPORT_SYMBOL_GPL(freeze_timeout);
+
+/*
+ * add_freeze_timeout - Add timeout for freeze.
+ *
+ * @bdev:		block device struct
+ * @timeout_msec:	timeout period
+ *
+ * Add the delayed work for freeze timeout to the delayed work queue.
+ */
+void add_freeze_timeout(struct block_device *bdev, long timeout_msec)
+{
+	s64 timeout_jiffies = msecs_to_jiffies(timeout_msec);
+
+	/* Set delayed work queue */
+	cancel_delayed_work(&bdev->bd_freeze_timeout);
+	schedule_delayed_work(&bdev->bd_freeze_timeout, timeout_jiffies);
+}
+
+/*
+ * del_freeze_timeout - Delete timeout for freeze.
+ *
+ * @bdev:	block device struct
+ *
+ * Delete the delayed work for freeze timeout from the delayed work queue.
+ */
+void del_freeze_timeout(struct block_device *bdev)
+{
+	if (delayed_work_pending(&bdev->bd_freeze_timeout))
+		cancel_delayed_work(&bdev->bd_freeze_timeout);
+}
diff -uprN -X linux-2.6.26-rc7.org/Documentation/dontdiff linux-2.6.26-rc7-xfs/fs/xfs/xfs_fsops.c linux-2.6.26-rc7-timeo
ut/fs/xfs/xfs_fsops.c
--- linux-2.6.26-rc7-xfs/fs/xfs/xfs_fsops.c	2008-06-23 11:55:15.000000000 +0900
+++ linux-2.6.26-rc7-timeout/fs/xfs/xfs_fsops.c	2008-06-23 11:56:47.000000000 +0900
@@ -619,7 +619,7 @@ xfs_fs_goingdown(
 {
 	switch (inflags) {
 	case XFS_FSOP_GOING_FLAGS_DEFAULT: {
-		struct super_block *sb = freeze_bdev(mp->m_super->s_bdev);
+		struct super_block *sb = freeze_bdev(mp->m_super->s_bdev, 0);
 
 		if (sb && !IS_ERR(sb)) {
 			xfs_force_shutdown(mp, SHUTDOWN_FORCE_UMOUNT);
diff -uprN -X linux-2.6.26-rc7.org/Documentation/dontdiff linux-2.6.26-rc7-xfs/include/linux/buffer_head.h linux-2.6.26-
rc7-timeout/include/linux/buffer_head.h
--- linux-2.6.26-rc7-xfs/include/linux/buffer_head.h	2008-06-23 11:55:16.000000000 +0900
+++ linux-2.6.26-rc7-timeout/include/linux/buffer_head.h	2008-06-23 11:56:47.000000000 +0900
@@ -170,7 +170,7 @@ int sync_blockdev(struct block_device *b
 void __wait_on_buffer(struct buffer_head *);
 wait_queue_head_t *bh_waitq_head(struct buffer_head *bh);
 int fsync_bdev(struct block_device *);
-struct super_block *freeze_bdev(struct block_device *);
+struct super_block *freeze_bdev(struct block_device *, long timeout_msec);
 int thaw_bdev(struct block_device *, struct super_block *);
 int fsync_super(struct super_block *);
 int fsync_no_super(struct block_device *);
diff -uprN -X linux-2.6.26-rc7.org/Documentation/dontdiff linux-2.6.26-rc7-xfs/include/linux/fs.h linux-2.6.26-rc7-timeo
ut/include/linux/fs.h
--- linux-2.6.26-rc7-xfs/include/linux/fs.h	2008-06-23 11:55:16.000000000 +0900
+++ linux-2.6.26-rc7-timeout/include/linux/fs.h	2008-06-23 11:56:47.000000000 +0900
@@ -8,6 +8,7 @@
 
 #include <linux/limits.h>
 #include <linux/ioctl.h>
+#include <linux/workqueue.h>
 
 /*
  * It's silly to have NR_OPEN bigger than NR_FILE, but you can change
@@ -225,6 +226,7 @@ extern int dir_notify_enable;
 #define FIGETBSZ   _IO(0x00,2)	/* get the block size used for bmap */
 #define FIFREEZE	_IOWR('X', 119, int)	/* Freeze */
 #define FITHAW		_IOWR('X', 120, int)	/* Thaw */
+#define	FIFREEZE_RESET_TIMEOUT	_IO(0x00, 3)	/* Reset freeze timeout */
 
 #define	FS_IOC_GETFLAGS			_IOR('f', 1, long)
 #define	FS_IOC_SETFLAGS			_IOW('f', 2, long)
@@ -559,6 +561,8 @@ struct block_device {
 
 	/* State of the block device. (Used by freeze feature) */
 	unsigned long		bd_state;
+	/* Delayed work for freeze */
+	struct delayed_work     bd_freeze_timeout;
 };
 
 /*
@@ -2149,5 +2153,9 @@ int proc_nr_files(struct ctl_table *tabl
 
 int get_filesystem_list(char * buf);
 
+extern void add_freeze_timeout(struct block_device *bdev, long timeout_msec);
+extern void del_freeze_timeout(struct block_device *bdev);
+extern void freeze_timeout(struct work_struct *work);
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_FS_H */

^ permalink raw reply	[flat|nested] 71+ messages in thread

end of thread, other threads:[~2008-10-09 10:18 UTC | newest]

Thread overview: 71+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-06-30 12:24 [PATCH 3/3] Add timeout feature Takashi Sato
2008-07-01  8:10 ` Christoph Hellwig
2008-07-01 10:52   ` [dm-devel] " Alasdair G Kergon
2008-07-03 12:11     ` Takashi Sato
2008-07-03 12:47       ` Alasdair G Kergon
2008-07-03 22:11         ` Dave Chinner
2008-07-04 12:08           ` Takashi Sato
2008-07-03 14:45       ` Eric Sandeen
2008-07-07 11:07   ` Pavel Machek
2008-07-08 23:10     ` Dave Chinner
2008-07-08 23:20       ` Pavel Machek
2008-07-09  0:52         ` Dave Chinner
2008-07-09  1:09           ` Theodore Tso
2008-07-09  4:21             ` Brad Boyer
2008-07-09  6:13             ` Miklos Szeredi
2008-07-09  6:16               ` Christoph Hellwig
2008-07-09  6:22                 ` Miklos Szeredi
2008-07-09  6:41                   ` Arjan van de Ven
2008-07-09  6:48                     ` Miklos Szeredi
2008-07-09  6:55                       ` Arjan van de Ven
2008-07-09  7:08                         ` Miklos Szeredi
2008-07-09 20:48                           ` Pavel Machek
2008-07-09  7:13                         ` Dave Chinner
2008-07-09 11:09                           ` Theodore Tso
2008-07-09 11:49                             ` Dave Chinner
2008-07-09 12:24                               ` Theodore Tso
2008-07-09 12:59                                 ` Olaf Frączyk
2008-07-09 13:57                                   ` Arjan van de Ven
2008-07-09 13:55                               ` Arjan van de Ven
2008-07-09 13:58                               ` jim owens
2008-07-09 14:13                                 ` jim owens
2008-07-13 12:06                                 ` Pavel Machek
2008-07-13 17:15                                   ` jim owens
2008-07-14  6:36                                     ` Pavel Machek
2008-07-14 13:17                                       ` jim owens
2008-07-14 13:12                                 ` Takashi Sato
2008-07-14 14:04                                   ` jim owens
2008-07-09 13:53                           ` Arjan van de Ven
2008-07-09  6:59                     ` Dave Chinner
2008-07-09  7:13                       ` Miklos Szeredi
2008-07-09  7:33                         ` Dave Chinner
2008-07-09  8:11                           ` Miklos Szeredi
2008-07-09 11:15                             ` Dave Chinner
2008-07-09 20:44           ` Pavel Machek
  -- strict thread matches above, loose matches on Subject: below --
2008-09-08 11:53 Takashi Sato
2008-09-08 17:11 ` Christoph Hellwig
2008-09-25 21:06   ` Ric Wheeler
2008-09-26  8:52     ` Takashi Sato
2008-09-26 10:58       ` Ric Wheeler
2008-09-29 11:11         ` Takashi Sato
2008-09-26 12:35       ` Valdis.Kletnieks
2008-09-29 14:13       ` Christoph Hellwig
2008-09-29 14:36         ` Eric Sandeen
2008-09-29 14:37           ` Christoph Hellwig
2008-09-29 14:45             ` Eric Sandeen
2008-09-29 22:08               ` jim owens
2008-10-05 10:00               ` Pavel Machek
2008-10-09 10:12               ` Takashi Sato
2008-10-09 10:18                 ` Christoph Hellwig
2008-08-18 12:28 Takashi Sato
2008-08-21 20:20 ` Andrew Morton
2008-08-22 18:16   ` Christoph Hellwig
2008-08-24 17:03   ` Oleg Nesterov
2008-08-29  9:39   ` Takashi Sato
2008-07-22  9:36 Takashi Sato
2008-06-24  7:00 Takashi Sato
2008-06-24 22:09 ` Andrew Morton
2008-06-27 11:33   ` Takashi Sato
2008-06-27 18:57     ` Andrew Morton
2008-06-29 23:13       ` Takashi Sato
2008-06-30  0:01         ` Andrew Morton

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).