linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] lockfs patch for 2.6
@ 2004-03-09 21:31 Chris Mason
  2004-03-12  9:31 ` Christoph Hellwig
  2004-03-13 13:14 ` Christoph Hellwig
  0 siblings, 2 replies; 19+ messages in thread
From: Chris Mason @ 2004-03-09 21:31 UTC (permalink / raw)
  To: linux-kernel

Hello everyone,

In order to get consistent snapshots with the device mapper code, you
need to sync and lock down any filesystems sitting on top of the
device.  This isn't as critical in the 2.6 code since it can do writable
snapshots, but it is still nice to have things properly synced and
consistent.  

I've had various forms of this against 2.4, the ugly part was always the
locking to make sure a new FS wasn't mounted on the source while the
snapshot was being setup.  Here's my 2.6 version, with the DM code
contributed by Kevin Corry.  The basic idea is to add a semaphore to the
block device that gets used to make sure there are no new mounts.

This needs the rest of Joe's device mapper updates to be useful, but I
wanted to send here for comments on locking and other issues:

-chris

Index: linux.dm/fs/block_dev.c
===================================================================
--- linux.dm.orig/fs/block_dev.c	2004-02-27 15:47:10.796588369 -0500
+++ linux.dm/fs/block_dev.c	2004-02-27 15:48:41.512740351 -0500
@@ -242,6 +242,7 @@
 	{
 		memset(bdev, 0, sizeof(*bdev));
 		sema_init(&bdev->bd_sem, 1);
+		sema_init(&bdev->bd_mount_sem, 1);
 		INIT_LIST_HEAD(&bdev->bd_inodes);
 		INIT_LIST_HEAD(&bdev->bd_list);
 		inode_init_once(&ei->vfs_inode);
Index: linux.dm/fs/super.c
===================================================================
--- linux.dm.orig/fs/super.c	2004-02-27 15:47:11.065508887 -0500
+++ linux.dm/fs/super.c	2004-02-27 15:48:41.521737673 -0500
@@ -292,6 +292,62 @@
 }
 
 /*
+ * triggered by the device mapper code to lock a filesystem and force
+ * it into a consistent state.
+ *
+ * This takes the block device bd_mount_sem to make sure no new mounts
+ * happen on bdev until unlockfs is called.  If a super is found on this
+ * block device, we hould a read lock on the s->s_umount sem to make sure
+ * nobody unmounts until the snapshot creation is done
+ */
+void sync_super_lockfs(struct block_device *bdev) 
+{
+	struct super_block *sb;
+	down(&bdev->bd_mount_sem);
+	sb = get_super(bdev);
+	if (sb) {
+		lock_super(sb);
+		if (sb->s_dirt && sb->s_op->write_super)
+			sb->s_op->write_super(sb);
+		if (sb->s_op->write_super_lockfs)
+			sb->s_op->write_super_lockfs(sb);
+		unlock_super(sb);
+	}
+	/* unlockfs releases s->s_umount and bd_mount_sem */
+}
+
+void unlockfs(struct block_device *bdev)
+{
+	struct list_head *p;
+	/*
+	 * copied from get_super, but we need to
+	 * do special things since lockfs left the
+	 * s_umount sem held
+	 */
+	spin_lock(&sb_lock);
+	list_for_each(p, &super_blocks) {
+		struct super_block *s = sb_entry(p);
+		/*
+		 * if there is a super for this block device
+		 * in the list, get_super must have found it
+		 * during sync_super_lockfs, so our drop_super
+		 * will drop the reference created there.
+		 */
+		if (s->s_bdev == bdev && s->s_root) {
+			spin_unlock(&sb_lock);
+			if (s->s_op->unlockfs)
+				s->s_op->unlockfs(s);
+			drop_super(s);
+			goto unlock;
+		}
+	}
+	spin_unlock(&sb_lock);
+unlock:
+	up(&bdev->bd_mount_sem);
+}
+EXPORT_SYMBOL(unlockfs);
+
+/*
  * Note: check the dirty flag before waiting, so we don't
  * hold up the sync while mounting a device. (The newly
  * mounted device won't need syncing.)
@@ -612,7 +668,14 @@
 	if (IS_ERR(bdev))
 		return (struct super_block *)bdev;
 
+	/*
+	 * once the super is inserted into the list by sget, s_umount
+	 * will protect the lockfs code from trying to start a snapshot
+	 * while we are mounting
+	 */
+	down(&bdev->bd_mount_sem);
 	s = sget(fs_type, test_bdev_super, set_bdev_super, bdev);
+	up(&bdev->bd_mount_sem);
 	if (IS_ERR(s))
 		goto out;
 
Index: linux.dm/fs/buffer.c
===================================================================
--- linux.dm.orig/fs/buffer.c	2004-02-27 15:47:36.139106189 -0500
+++ linux.dm/fs/buffer.c	2004-02-27 15:48:41.516739161 -0500
@@ -260,6 +260,17 @@
 	return sync_blockdev(bdev);
 }
 
+int fsync_bdev_lockfs(struct block_device *bdev)
+{
+	int res;
+	res = fsync_bdev(bdev);
+	if (res)
+		return res;
+	sync_super_lockfs(bdev);
+	return sync_blockdev(bdev);
+}
+EXPORT_SYMBOL(fsync_bdev_lockfs);
+
 /*
  * sync everything.  Start out by waking pdflush, because that writes back
  * all queues in parallel.
Index: linux.dm/drivers/md/dm.c
===================================================================
--- linux.dm.orig/drivers/md/dm.c	2004-02-27 15:48:38.171735120 -0500
+++ linux.dm/drivers/md/dm.c	2004-02-27 15:48:41.510740946 -0500
@@ -12,6 +12,7 @@
 #include <linux/moduleparam.h>
 #include <linux/blkpg.h>
 #include <linux/bio.h>
+#include <linux/buffer_head.h>
 #include <linux/mempool.h>
 #include <linux/slab.h>
 
@@ -51,6 +52,7 @@
  */
 #define DMF_BLOCK_IO 0
 #define DMF_SUSPENDED 1
+#define DMF_FS_LOCKED 2
 
 struct mapped_device {
 	struct rw_semaphore lock;
@@ -844,6 +846,24 @@
 	return 0;
 }
 
+static void __lock_disk(struct gendisk *disk)
+{
+	struct block_device *bdev = bdget_disk(disk, 0);
+	if (bdev) {
+		fsync_bdev_lockfs(bdev);
+		bdput(bdev);
+	}
+}
+
+static void __unlock_disk(struct gendisk *disk)
+{
+	struct block_device *bdev = bdget_disk(disk, 0);
+	if (bdev) {
+		unlockfs(bdev);
+		bdput(bdev);
+	}
+}
+
 /*
  * We need to be able to change a mapping table under a mounted
  * filesystem.  For example we might want to move some data in
@@ -855,12 +875,23 @@
 {
 	DECLARE_WAITQUEUE(wait, current);
 
-	down_write(&md->lock);
+	/* Flush I/O to the device. */
+	down_read(&md->lock);
+	if (test_bit(DMF_BLOCK_IO, &md->flags)) {
+		up_read(&md->lock);
+		return -EINVAL;
+	}
+
+	if (!test_and_set_bit(DMF_FS_LOCKED, &md->flags)) {
+		__lock_disk(md->disk);
+	}
+	up_read(&md->lock);
 
 	/*
 	 * First we set the BLOCK_IO flag so no more ios will be
 	 * mapped.
 	 */
+	down_write(&md->lock);
 	if (test_bit(DMF_BLOCK_IO, &md->flags)) {
 		up_write(&md->lock);
 		return -EINVAL;
@@ -910,11 +941,13 @@
 	dm_table_resume_targets(md->map);
 	clear_bit(DMF_SUSPENDED, &md->flags);
 	clear_bit(DMF_BLOCK_IO, &md->flags);
+	clear_bit(DMF_FS_LOCKED, &md->flags);
 
 	def = bio_list_get(&md->deferred);
 	__flush_deferred_io(md, def);
 	up_write(&md->lock);
 
+	__unlock_disk(md->disk);
 	blk_run_queues();
 
 	return 0;
Index: linux.dm/include/linux/fs.h
===================================================================
--- linux.dm.orig/include/linux/fs.h	2004-02-27 15:47:33.193980650 -0500
+++ linux.dm/include/linux/fs.h	2004-02-27 15:48:41.532734400 -0500
@@ -343,6 +343,7 @@
 	struct inode *		bd_inode;	/* will die */
 	int			bd_openers;
 	struct semaphore	bd_sem;	/* open/close mutex */
+	struct semaphore	bd_mount_sem;	/* mount mutex */
 	struct list_head	bd_inodes;
 	void *			bd_holder;
 	int			bd_holders;
@@ -1218,6 +1219,7 @@
 extern int filemap_flush(struct address_space *);
 extern int filemap_fdatawait(struct address_space *);
 extern void sync_supers(void);
+extern void sync_super_lockfs(struct block_device *);
 extern void sync_filesystems(int wait);
 extern void emergency_sync(void);
 extern void emergency_remount(void);
Index: linux.dm/include/linux/buffer_head.h
===================================================================
--- linux.dm.orig/include/linux/buffer_head.h	2004-02-05 16:56:30.000000000 -0500
+++ linux.dm/include/linux/buffer_head.h	2004-02-27 15:48:41.530734995 -0500
@@ -164,6 +164,8 @@
 wait_queue_head_t *bh_waitq_head(struct buffer_head *bh);
 void wake_up_buffer(struct buffer_head *bh);
 int fsync_bdev(struct block_device *);
+int fsync_bdev_lockfs(struct block_device *);
+void unlockfs(struct block_device *);
 int fsync_super(struct super_block *);
 int fsync_no_super(struct block_device *);
 struct buffer_head *__find_get_block(struct block_device *, sector_t, int);





^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] lockfs patch for 2.6
  2004-03-09 21:31 [PATCH] lockfs patch for 2.6 Chris Mason
@ 2004-03-12  9:31 ` Christoph Hellwig
  2004-03-12 15:50   ` Chris Mason
  2004-03-13 13:14 ` Christoph Hellwig
  1 sibling, 1 reply; 19+ messages in thread
From: Christoph Hellwig @ 2004-03-12  9:31 UTC (permalink / raw)
  To: Chris Mason; +Cc: linux-kernel

On Tue, Mar 09, 2004 at 04:31:25PM -0500, Chris Mason wrote:
>  /*
> + * triggered by the device mapper code to lock a filesystem and force
> + * it into a consistent state.
> + *
> + * This takes the block device bd_mount_sem to make sure no new mounts
> + * happen on bdev until unlockfs is called.  If a super is found on this
> + * block device, we hould a read lock on the s->s_umount sem to make sure
> + * nobody unmounts until the snapshot creation is done
> + */
> +void sync_super_lockfs(struct block_device *bdev) 
> +{
> +	struct super_block *sb;
> +	down(&bdev->bd_mount_sem);
> +	sb = get_super(bdev);
> +	if (sb) {
> +		lock_super(sb);
> +		if (sb->s_dirt && sb->s_op->write_super)
> +			sb->s_op->write_super(sb);
> +		if (sb->s_op->write_super_lockfs)
> +			sb->s_op->write_super_lockfs(sb);

Can we please rename write_super_lockfs to a sane name?

freeze_fs/thaw_fs sounds like a good name.

> +void unlockfs(struct block_device *bdev)
> +{
> +	struct list_head *p;
> +	/*
> +	 * copied from get_super, but we need to
> +	 * do special things since lockfs left the
> +	 * s_umount sem held
> +	 */
> +	spin_lock(&sb_lock);
> +	list_for_each(p, &super_blocks) {
> +		struct super_block *s = sb_entry(p);
> +		/*
> +		 * if there is a super for this block device
> +		 * in the list, get_super must have found it
> +		 * during sync_super_lockfs, so our drop_super
> +		 * will drop the reference created there.
> +		 */
> +		if (s->s_bdev == bdev && s->s_root) {
> +			spin_unlock(&sb_lock);
> +			if (s->s_op->unlockfs)
> +				s->s_op->unlockfs(s);
> +			drop_super(s);
> +			goto unlock;
> +		}
> +	}
> +	spin_unlock(&sb_lock);
> +unlock:
> +	up(&bdev->bd_mount_sem);
> +}
> +EXPORT_SYMBOL(unlockfs);

This looks ugly.  What about returning the superblock from the freeze
routine so you can simply pass it into the thaw routine?

> ===================================================================
> --- linux.dm.orig/fs/buffer.c	2004-02-27 15:47:36.139106189 -0500
> +++ linux.dm/fs/buffer.c	2004-02-27 15:48:41.516739161 -0500
> @@ -260,6 +260,17 @@
>  	return sync_blockdev(bdev);
>  }
>  
> +int fsync_bdev_lockfs(struct block_device *bdev)
> +{
> +	int res;
> +	res = fsync_bdev(bdev);
> +	if (res)
> +		return res;
> +	sync_super_lockfs(bdev);
> +	return sync_blockdev(bdev);
> +}
> +EXPORT_SYMBOL(fsync_bdev_lockfs);

This looks grossly misnamed again.  And why do you need to have
sync_super_locks splitted out?  Calling it on it's own doesn't make much
sense.

> --- linux.dm.orig/include/linux/buffer_head.h	2004-02-05 16:56:30.000000000 -0500
> +++ linux.dm/include/linux/buffer_head.h	2004-02-27 15:48:41.530734995 -0500
> @@ -164,6 +164,8 @@
>  wait_queue_head_t *bh_waitq_head(struct buffer_head *bh);
>  void wake_up_buffer(struct buffer_head *bh);
>  int fsync_bdev(struct block_device *);
> +int fsync_bdev_lockfs(struct block_device *);
> +void unlockfs(struct block_device *);

Again rather misplaced.  Even a fs not using bufferheads at all would
benefit from the interface.


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] lockfs patch for 2.6
  2004-03-12  9:31 ` Christoph Hellwig
@ 2004-03-12 15:50   ` Chris Mason
  2004-03-12 15:51     ` Christoph Hellwig
  0 siblings, 1 reply; 19+ messages in thread
From: Chris Mason @ 2004-03-12 15:50 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-kernel

On Fri, 2004-03-12 at 04:31, Christoph Hellwig wrote:

> Can we please rename write_super_lockfs to a sane name?
> 
> freeze_fs/thaw_fs sounds like a good name.
> 
Sure.

> This looks ugly.  What about returning the superblock from the freeze
> routine so you can simply pass it into the thaw routine?
> 
I like it, will do.

> 
> This looks grossly misnamed again.  And why do you need to have
> sync_super_locks splitted out?  Calling it on it's own doesn't make much
> sense.
> 

Would you like this better:

device mapper code:
	fsync_bdev(bdev);
	s = freeze_fs(bdev);
	< create snap shot >
	thaw_fs(bdev, s);

thaw_fs needs the bdev so it can up the bdev mount semaphore.

-chris



^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] lockfs patch for 2.6
  2004-03-12 15:50   ` Chris Mason
@ 2004-03-12 15:51     ` Christoph Hellwig
  0 siblings, 0 replies; 19+ messages in thread
From: Christoph Hellwig @ 2004-03-12 15:51 UTC (permalink / raw)
  To: Chris Mason; +Cc: Christoph Hellwig, linux-kernel

On Fri, Mar 12, 2004 at 10:50:53AM -0500, Chris Mason wrote:
> Would you like this better:
> 
> device mapper code:
> 	fsync_bdev(bdev);
> 	s = freeze_fs(bdev);
> 	< create snap shot >
> 	thaw_fs(bdev, s);

Hmm, I actually thought about moving the fsync_bdev into
freeze_fs, but having it separate might more sense indeed.

> thaw_fs needs the bdev so it can up the bdev mount semaphore.

sb->s_bdev should do it aswell


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] lockfs patch for 2.6
  2004-03-09 21:31 [PATCH] lockfs patch for 2.6 Chris Mason
  2004-03-12  9:31 ` Christoph Hellwig
@ 2004-03-13 13:14 ` Christoph Hellwig
  2004-03-13 15:20   ` Chris Mason
  1 sibling, 1 reply; 19+ messages in thread
From: Christoph Hellwig @ 2004-03-13 13:14 UTC (permalink / raw)
  To: Chris Mason; +Cc: linux-kernel

On Tue, Mar 09, 2004 at 04:31:25PM -0500, Chris Mason wrote:
> Hello everyone,
> 
> In order to get consistent snapshots with the device mapper code, you
> need to sync and lock down any filesystems sitting on top of the
> device.  This isn't as critical in the 2.6 code since it can do writable
> snapshots, but it is still nice to have things properly synced and
> consistent.  
> 
> I've had various forms of this against 2.4, the ugly part was always the
> locking to make sure a new FS wasn't mounted on the source while the
> snapshot was being setup.  Here's my 2.6 version, with the DM code
> contributed by Kevin Corry.  The basic idea is to add a semaphore to the
> block device that gets used to make sure there are no new mounts.

Okay, I actually took a look at the XFS freeze code and it seems the
current infrastructure doesn't suite XFS very well.

What XFS currently does when freezing is

1.	set a flag in the mount structure that blocks all new writes
2.	flush all file data
3.	set a flag blocking all new transactions
4.	flush any dirty inode state into buffers
5.	push out all buffers to disk
6.	mark the filesystem clean

Now how does this fit into generic freeze/thaw fs structure?

1.	should probably move into the VFS (generic_file_write)

2,4,5	basically is fsync_bdev except that we have no chance to block
transaction that way.  So either we need two calls into the fs or have
some trivial state in the superblock that tells xfs to block transaction,
basically an enum { FS_UNFROZEN, FS_FROZEN_WRITE, FS_FROZEN_FULL };

and a function fs_check_frozen similar to xfs_check_frozen that makes the
caller block until the fs is unfrozen.

Doing it that way would get rid of lots of mess in XFS so I'm all for it :)

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] lockfs patch for 2.6
  2004-03-13 13:14 ` Christoph Hellwig
@ 2004-03-13 15:20   ` Chris Mason
  2004-03-13 16:33     ` Christoph Hellwig
  0 siblings, 1 reply; 19+ messages in thread
From: Chris Mason @ 2004-03-13 15:20 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-kernel

On Sat, 2004-03-13 at 08:14, Christoph Hellwig wrote:
> On Tue, Mar 09, 2004 at 04:31:25PM -0500, Chris Mason wrote:
> > Hello everyone,
> > 
> > In order to get consistent snapshots with the device mapper code, you
> > need to sync and lock down any filesystems sitting on top of the
> > device.  This isn't as critical in the 2.6 code since it can do writable
> > snapshots, but it is still nice to have things properly synced and
> > consistent.  
> > 
> > I've had various forms of this against 2.4, the ugly part was always the
> > locking to make sure a new FS wasn't mounted on the source while the
> > snapshot was being setup.  Here's my 2.6 version, with the DM code
> > contributed by Kevin Corry.  The basic idea is to add a semaphore to the
> > block device that gets used to make sure there are no new mounts.
> 
> Okay, I actually took a look at the XFS freeze code and it seems the
> current infrastructure doesn't suite XFS very well.
> 
> What XFS currently does when freezing is
> 
> 1.	set a flag in the mount structure that blocks all new writes
> 2.	flush all file data
> 3.	set a flag blocking all new transactions
> 4.	flush any dirty inode state into buffers
> 5.	push out all buffers to disk
> 6.	mark the filesystem clean
> 
> Now how does this fit into generic freeze/thaw fs structure?
> 
> 1.	should probably move into the VFS (generic_file_write)
> 
> 2,4,5	basically is fsync_bdev except that we have no chance to block
> transaction that way.  So either we need two calls into the fs or have
> some trivial state in the superblock that tells xfs to block transaction,
> basically an enum { FS_UNFROZEN, FS_FROZEN_WRITE, FS_FROZEN_FULL };
> 
> and a function fs_check_frozen similar to xfs_check_frozen that makes the
> caller block until the fs is unfrozen.
> 
> Doing it that way would get rid of lots of mess in XFS so I'm all for it :)

This is basically how reiserfs does it.  Various critical spots (like
the code to start a transaction) check to see if the FS is frozen.

I'll rework the patch as we've discussed on Friday, if you need it
broken up differently, please let me know.

-chris



^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] lockfs patch for 2.6
  2004-03-13 15:20   ` Chris Mason
@ 2004-03-13 16:33     ` Christoph Hellwig
  2004-03-14 14:00       ` Christoph Hellwig
  0 siblings, 1 reply; 19+ messages in thread
From: Christoph Hellwig @ 2004-03-13 16:33 UTC (permalink / raw)
  To: Chris Mason; +Cc: linux-kernel

On Sat, Mar 13, 2004 at 10:20:14AM -0500, Chris Mason wrote:
> This is basically how reiserfs does it.  Various critical spots (like
> the code to start a transaction) check to see if the FS is frozen.
> 
> I'll rework the patch as we've discussed on Friday, if you need it
> broken up differently, please let me know.

Here's the reworked patch I have, ignoring dm but converting the xfs-internal
interfaces that do freezing.  But without an interface change we still need
to do all the flushing a second time inside XFS which I absoutely dislike.

Let me think about how to do this nicer.

--- 1.155/fs/block_dev.c	Fri Mar 12 10:33:01 2004
+++ edited/fs/block_dev.c	Sat Mar 13 15:28:49 2004
@@ -251,6 +251,7 @@
 	{
 		memset(bdev, 0, sizeof(*bdev));
 		sema_init(&bdev->bd_sem, 1);
+		sema_init(&bdev->bd_mount_sem, 1);
 		INIT_LIST_HEAD(&bdev->bd_inodes);
 		INIT_LIST_HEAD(&bdev->bd_list);
 		inode_init_once(&ei->vfs_inode);
===== fs/buffer.c 1.224 vs edited =====
--- 1.224/fs/buffer.c	Sun Mar  7 08:16:11 2004
+++ edited/fs/buffer.c	Sat Mar 13 17:59:13 2004
@@ -260,6 +260,50 @@
 }
 
 /*
+ * triggered by the device mapper code to lock a filesystem and force
+ * it into a consistent state.
+ *
+ * This takes the block device bd_mount_sem to make sure no new mounts
+ * happen on bdev until unlockfs is called.  If a super is found on this
+ * block device, we hould a read lock on the s->s_umount sem to make sure
+ * nobody unmounts until the snapshot creation is done
+ */
+struct super_block *freeze_bdev(struct block_device *bdev)
+{
+	struct super_block *sb;
+
+	down(&bdev->bd_mount_sem);
+	sb = get_super(bdev);
+	if (sb && !(sb->s_flags & MS_RDONLY)) {
+		fsync_super(sb);
+		lock_super(sb);
+		if (sb->s_dirt && sb->s_op->write_super)
+			sb->s_op->write_super(sb);
+		if (sb->s_op->write_super_lockfs)
+			sb->s_op->write_super_lockfs(sb);
+		unlock_super(sb);
+	}
+
+	sync_blockdev(bdev);
+	return sb;	/* thaw_bdev releases s->s_umount and bd_mount_sem */
+}
+EXPORT_SYMBOL(freeze_bdev);
+
+void thaw_bdev(struct block_device *bdev, struct super_block *sb)
+{
+	if (sb) {
+		BUG_ON(sb->s_bdev != bdev);
+
+		if (sb->s_op->unlockfs)
+			sb->s_op->unlockfs(sb);
+		drop_super(sb);
+	}
+
+	up(&bdev->bd_mount_sem);
+}
+EXPORT_SYMBOL(thaw_bdev);
+
+/*
  * sync everything.  Start out by waking pdflush, because that writes back
  * all queues in parallel.
  */
===== fs/super.c 1.115 vs edited =====
--- 1.115/fs/super.c	Fri Mar 12 10:30:24 2004
+++ edited/fs/super.c	Sat Mar 13 15:30:35 2004
@@ -621,7 +621,14 @@
 	if (IS_ERR(bdev))
 		return (struct super_block *)bdev;
 
+	/*
+	 * once the super is inserted into the list by sget, s_umount
+	 * will protect the lockfs code from trying to start a snapshot
+	 * while we are mounting
+	 */
+	down(&bdev->bd_mount_sem);
 	s = sget(fs_type, test_bdev_super, set_bdev_super, bdev);
+	up(&bdev->bd_mount_sem);
 	if (IS_ERR(s))
 		goto out;
 
===== fs/xfs/xfs_fsops.c 1.11 vs edited =====
--- 1.11/fs/xfs/xfs_fsops.c	Fri Feb 27 07:28:05 2004
+++ edited/fs/xfs/xfs_fsops.c	Sat Mar 13 18:12:11 2004
@@ -582,15 +582,14 @@
 }
 
 int
-xfs_fs_freeze(
-	xfs_mount_t	*mp)
+xfs_freeze(
+	bhv_desc_t	*bdp)
 {
-	vfs_t		*vfsp;
+	vfs_t		*vfsp = bhvtovfs(bdp);
+	xfs_mount_t	*mp = XFS_BHVTOM(bdp);
 	/*REFERENCED*/
 	int		error;
 
-	vfsp = XFS_MTOVFS(mp);
-
 	/* Stop new writers */
 	xfs_start_freeze(mp, XFS_FREEZE_WRITE);
 
@@ -619,12 +618,11 @@
 	return 0;
 }
 
-int
-xfs_fs_thaw(
-	xfs_mount_t	*mp)
+void
+xfs_thaw(
+	bhv_desc_t	*bdp)
 {
-	xfs_finish_freeze(mp);
-	return 0;
+	xfs_finish_freeze(XFS_BHVTOM(bdp));
 }
 
 int
@@ -632,13 +630,21 @@
 	xfs_mount_t	*mp,
 	__uint32_t	inflags)
 {
-	switch (inflags)
-	{
-	case XFS_FSOP_GOING_FLAGS_DEFAULT:
-		xfs_fs_freeze(mp);
-		xfs_force_shutdown(mp, XFS_FORCE_UMOUNT);
-		xfs_fs_thaw(mp);
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	switch (inflags) {
+	case XFS_FSOP_GOING_FLAGS_DEFAULT: {
+		struct vfs *vfsp = XFS_MTOVFS(mp);
+		struct super_block *sb = freeze_bdev(vfsp->vfs_super->s_bdev);
+
+		if (sb) {
+			xfs_force_shutdown(mp, XFS_FORCE_UMOUNT);
+			thaw_bdev(sb->s_bdev, sb);
+		}
+	
 		break;
+	}
 	case XFS_FSOP_GOING_FLAGS_LOGFLUSH:
 		xfs_force_shutdown(mp, XFS_FORCE_UMOUNT);
 		break;
===== fs/xfs/xfs_fsops.h 1.3 vs edited =====
--- 1.3/fs/xfs/xfs_fsops.h	Fri Feb 27 07:28:05 2004
+++ edited/fs/xfs/xfs_fsops.h	Sat Mar 13 18:08:01 2004
@@ -60,12 +60,12 @@
 	xfs_fsop_resblks_t	*outval);
 
 int
-xfs_fs_freeze(
-	xfs_mount_t		*mp);
+xfs_freeze(
+	struct bhv_desc		*bdp);
 
-int
-xfs_fs_thaw(
-	xfs_mount_t		*mp);
+void
+xfs_thaw(
+	struct bhv_desc		*bdp);
 
 int
 xfs_fs_goingdown(
===== fs/xfs/linux/xfs_ioctl.c 1.21 vs edited =====
--- 1.21/fs/xfs/linux/xfs_ioctl.c	Fri Feb 27 07:28:05 2004
+++ edited/fs/xfs/linux/xfs_ioctl.c	Sat Mar 13 18:23:23 2004
@@ -825,13 +825,14 @@
 	case XFS_IOC_FREEZE:
 		if (!capable(CAP_SYS_ADMIN))
 			return -EPERM;
-		xfs_fs_freeze(mp);
+
+		freeze_bdev(inode->i_sb->s_bdev);
 		return 0;
 
 	case XFS_IOC_THAW:
 		if (!capable(CAP_SYS_ADMIN))
 			return -EPERM;
-		xfs_fs_thaw(mp);
+		thaw_bdev(inode->i_sb->s_bdev, inode->i_sb);
 		return 0;
 
 	case XFS_IOC_GOINGDOWN: {
===== fs/xfs/linux/xfs_super.c 1.70 vs edited =====
--- 1.70/fs/xfs/linux/xfs_super.c	Sat Mar  6 04:46:51 2004
+++ edited/fs/xfs/linux/xfs_super.c	Sat Mar 13 17:59:56 2004
@@ -590,14 +590,9 @@
 	struct super_block	*sb)
 {
 	vfs_t			*vfsp = LINVFS_GET_VFS(sb);
-	vnode_t			*vp;
 	int			error;
 
-	if (sb->s_flags & MS_RDONLY)
-		return;
-	VFS_ROOT(vfsp, &vp, error);
-	VOP_IOCTL(vp, LINVFS_GET_IP(vp), NULL, 0, XFS_IOC_FREEZE, 0, error);
-	VN_RELE(vp);
+	VFS_FREEZE(vfsp, error);
 }
 
 STATIC void
@@ -605,12 +600,8 @@
 	struct super_block	*sb)
 {
 	vfs_t			*vfsp = LINVFS_GET_VFS(sb);
-	vnode_t			*vp;
-	int			error;
 
-	VFS_ROOT(vfsp, &vp, error);
-	VOP_IOCTL(vp, LINVFS_GET_IP(vp), NULL, 0, XFS_IOC_THAW, 0, error);
-	VN_RELE(vp);
+	VFS_THAW(vfsp);
 }
 
 STATIC struct dentry *
===== fs/xfs/linux/xfs_vfs.c 1.11 vs edited =====
--- 1.11/fs/xfs/linux/xfs_vfs.c	Wed Mar  3 05:52:57 2004
+++ edited/fs/xfs/linux/xfs_vfs.c	Sat Mar 13 17:57:06 2004
@@ -230,6 +230,30 @@
 	((*bhvtovfsops(next)->vfs_force_shutdown)(next, fl, file, line));
 }
 
+int
+vfs_freeze(
+	struct bhv_desc		*bdp)
+{
+	struct bhv_desc		*next = bdp;
+
+	ASSERT(next);
+	while (! (bhvtovfsops(next))->vfs_freeze)
+		next = BHV_NEXT(next);
+	return ((*bhvtovfsops(next)->vfs_freeze)(next));
+}
+
+void
+vfs_thaw(
+	struct bhv_desc		*bdp)
+{
+	struct bhv_desc		*next = bdp;
+
+	ASSERT(next);
+	while (! (bhvtovfsops(next))->vfs_thaw)
+		next = BHV_NEXT(next);
+	((*bhvtovfsops(next)->vfs_thaw)(next));
+}
+
 vfs_t *
 vfs_allocate( void )
 {
===== fs/xfs/linux/xfs_vfs.h 1.18 vs edited =====
--- 1.18/fs/xfs/linux/xfs_vfs.h	Fri Jan  9 06:59:58 2004
+++ edited/fs/xfs/linux/xfs_vfs.h	Sat Mar 13 17:55:33 2004
@@ -112,6 +112,8 @@
 typedef void	(*vfs_init_vnode_t)(bhv_desc_t *,
 				struct vnode *, bhv_desc_t *, int);
 typedef void	(*vfs_force_shutdown_t)(bhv_desc_t *, int, char *, int);
+typedef int	(*vfs_freeze_t)(bhv_desc_t *);
+typedef void	(*vfs_thaw_t)(bhv_desc_t *);
 
 typedef struct vfsops {
 	bhv_position_t		vf_position;	/* behavior chain position */
@@ -128,6 +130,8 @@
 	vfs_quotactl_t		vfs_quotactl;	/* disk quota */
 	vfs_init_vnode_t	vfs_init_vnode;	/* initialize a new vnode */
 	vfs_force_shutdown_t	vfs_force_shutdown;	/* crash and burn */
+	vfs_freeze_t		vfs_freeze;	/* freeze fs for snapshot */
+	vfs_thaw_t		vfs_thaw;	/* .. and release it again */
 } vfsops_t;
 
 /*
@@ -147,6 +151,8 @@
 #define VFS_QUOTACTL(v, c,id,p, rv)	((rv) = vfs_quotactl(VHEAD(v), c,id,p))
 #define VFS_INIT_VNODE(v, vp,b,ul)	( vfs_init_vnode(VHEAD(v), vp,b,ul) )
 #define VFS_FORCE_SHUTDOWN(v, fl,f,l)	( vfs_force_shutdown(VHEAD(v), fl,f,l) )
+#define VFS_FREEZE(v, rv)		((rv) = vfs_freeze(VHEAD(v)))
+#define VFS_THAW(v)			( vfs_thaw(VHEAD(v)) )
 
 /*
  * PVFS's.  Operates on behavior descriptor pointers.
@@ -164,6 +170,8 @@
 #define PVFS_QUOTACTL(b, c,id,p, rv)	((rv) = vfs_quotactl(b, c,id,p))
 #define PVFS_INIT_VNODE(b, vp,b2,ul)	( vfs_init_vnode(b, vp,b2,ul) )
 #define PVFS_FORCE_SHUTDOWN(b, fl,f,l)	( vfs_force_shutdown(b, fl,f,l) )
+#define PVFS_FREEZE(b, rv)		((rv) = vfs_freeze(b))
+#define PVFS_THAW(b)			( vfs_thaw(b) )
 
 extern int vfs_mount(bhv_desc_t *, struct xfs_mount_args *, struct cred *);
 extern int vfs_parseargs(bhv_desc_t *, char *, struct xfs_mount_args *, int);
@@ -178,6 +186,8 @@
 extern int vfs_quotactl(bhv_desc_t *, int, int, caddr_t);
 extern void vfs_init_vnode(bhv_desc_t *, struct vnode *, bhv_desc_t *, int);
 extern void vfs_force_shutdown(bhv_desc_t *, int, char *, int);
+extern int vfs_freeze(bhv_desc_t *);
+extern void vfs_thaw(bhv_desc_t *);
 
 typedef struct bhv_vfsops {
 	struct vfsops		bhv_common;
===== include/linux/buffer_head.h 1.46 vs edited =====
--- 1.46/include/linux/buffer_head.h	Tue Jan 20 00:38:11 2004
+++ edited/include/linux/buffer_head.h	Sat Mar 13 15:35:32 2004
@@ -164,6 +164,8 @@
 wait_queue_head_t *bh_waitq_head(struct buffer_head *bh);
 void wake_up_buffer(struct buffer_head *bh);
 int fsync_bdev(struct block_device *);
+struct super_block *freeze_bdev(struct block_device *);
+void thaw_bdev(struct block_device *, struct super_block *);
 int fsync_super(struct super_block *);
 int fsync_no_super(struct block_device *);
 struct buffer_head *__find_get_block(struct block_device *, sector_t, int);
===== include/linux/fs.h 1.290 vs edited =====
--- 1.290/include/linux/fs.h	Fri Mar 12 10:32:59 2004
+++ edited/include/linux/fs.h	Sat Mar 13 15:35:44 2004
@@ -344,6 +344,7 @@
 	struct inode *		bd_inode;	/* will die */
 	int			bd_openers;
 	struct semaphore	bd_sem;	/* open/close mutex */
+	struct semaphore	bd_mount_sem;	/* mount mutex */
 	struct list_head	bd_inodes;
 	void *			bd_holder;
 	int			bd_holders;

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] lockfs patch for 2.6
  2004-03-13 16:33     ` Christoph Hellwig
@ 2004-03-14 14:00       ` Christoph Hellwig
  2004-03-14 15:23         ` Chris Mason
  2004-03-14 18:44         ` Andrew Morton
  0 siblings, 2 replies; 19+ messages in thread
From: Christoph Hellwig @ 2004-03-14 14:00 UTC (permalink / raw)
  To: Chris Mason, linux-kernel, linux-xfs

On Sat, Mar 13, 2004 at 04:33:46PM +0000, Christoph Hellwig wrote:
> Here's the reworked patch I have, ignoring dm but converting the xfs-internal
> interfaces that do freezing.  But without an interface change we still need
> to do all the flushing a second time inside XFS which I absoutely dislike.
> 
> Let me think about how to do this nicer.

Okay, here's a new patch that basically moves all the XFS freezing
infrastructure to the VFS, leavin only writing of the log record and some
transaction count handing inside XFS.

I've given it some heavy testing with xfs_freeze, dm is still not updated.

P.S. this patch now kills 16 lines of kernel code summarized :)

--- 1.155/fs/block_dev.c	Fri Mar 12 10:33:01 2004
+++ edited/fs/block_dev.c	Sat Mar 13 15:28:49 2004
@@ -251,6 +251,7 @@
 	{
 		memset(bdev, 0, sizeof(*bdev));
 		sema_init(&bdev->bd_sem, 1);
+		sema_init(&bdev->bd_mount_sem, 1);
 		INIT_LIST_HEAD(&bdev->bd_inodes);
 		INIT_LIST_HEAD(&bdev->bd_list);
 		inode_init_once(&ei->vfs_inode);
===== fs/buffer.c 1.224 vs edited =====
--- 1.224/fs/buffer.c	Sun Mar  7 08:16:11 2004
+++ edited/fs/buffer.c	Sun Mar 14 15:37:00 2004
@@ -260,6 +260,69 @@
 }
 
 /*
+ * triggered by the device mapper code to lock a filesystem and force
+ * it into a consistent state.
+ *
+ * This takes the block device bd_mount_sem to make sure no new mounts
+ * happen on bdev until unlockfs is called.  If a super is found on this
+ * block device, we hould a read lock on the s->s_umount sem to make sure
+ * nobody unmounts until the snapshot creation is done
+ */
+struct super_block *freeze_bdev(struct block_device *bdev)
+{
+	struct super_block *sb;
+
+	down(&bdev->bd_mount_sem);
+	sb = get_super(bdev);
+	if (sb && !(sb->s_flags & MS_RDONLY)) {
+		sb->s_frozen = SB_FREEZE_WRITE;
+		wmb();
+
+		sync_inodes_sb(sb, 0);
+		DQUOT_SYNC(sb);
+
+		sb->s_frozen = SB_FREEZE_TRANS;
+		wmb();
+		
+		lock_super(sb);
+		if (sb->s_dirt && sb->s_op->write_super)
+			sb->s_op->write_super(sb);
+		unlock_super(sb);
+
+		if (sb->s_op->sync_fs)
+			sb->s_op->sync_fs(sb, 1);
+	
+		sync_blockdev(sb->s_bdev);
+		sync_inodes_sb(sb, 1);
+		sync_blockdev(sb->s_bdev);
+
+		if (sb->s_op->write_super_lockfs)
+			sb->s_op->write_super_lockfs(sb);
+	}
+
+	sync_blockdev(bdev);
+	return sb;	/* thaw_bdev releases s->s_umount and bd_mount_sem */
+}
+EXPORT_SYMBOL(freeze_bdev);
+
+void thaw_bdev(struct block_device *bdev, struct super_block *sb)
+{
+	if (sb) {
+		BUG_ON(sb->s_bdev != bdev);
+
+		if (sb->s_op->unlockfs)
+			sb->s_op->unlockfs(sb);
+		sb->s_frozen = SB_UNFROZEN;
+		wmb();
+		wake_up(&sb->s_wait_unfrozen);
+		drop_super(sb);
+	}
+
+	up(&bdev->bd_mount_sem);
+}
+EXPORT_SYMBOL(thaw_bdev);
+
+/*
  * sync everything.  Start out by waking pdflush, because that writes back
  * all queues in parallel.
  */
===== fs/super.c 1.115 vs edited =====
--- 1.115/fs/super.c	Fri Mar 12 10:30:24 2004
+++ edited/fs/super.c	Sun Mar 14 14:24:56 2004
@@ -77,6 +77,7 @@
 		sema_init(&s->s_dquot.dqio_sem, 1);
 		sema_init(&s->s_dquot.dqonoff_sem, 1);
 		init_rwsem(&s->s_dquot.dqptr_sem);
+		init_waitqueue_head(&s->s_wait_unfrozen);
 		s->s_maxbytes = MAX_NON_LFS;
 		s->dq_op = sb_dquot_ops;
 		s->s_qcop = sb_quotactl_ops;
@@ -621,7 +622,14 @@
 	if (IS_ERR(bdev))
 		return (struct super_block *)bdev;
 
+	/*
+	 * once the super is inserted into the list by sget, s_umount
+	 * will protect the lockfs code from trying to start a snapshot
+	 * while we are mounting
+	 */
+	down(&bdev->bd_mount_sem);
 	s = sget(fs_type, test_bdev_super, set_bdev_super, bdev);
+	up(&bdev->bd_mount_sem);
 	if (IS_ERR(s))
 		goto out;
 
===== fs/xfs/xfs_fsops.c 1.11 vs edited =====
--- 1.11/fs/xfs/xfs_fsops.c	Fri Feb 27 07:28:05 2004
+++ edited/fs/xfs/xfs_fsops.c	Sun Mar 14 14:03:01 2004
@@ -582,63 +582,25 @@
 }
 
 int
-xfs_fs_freeze(
-	xfs_mount_t	*mp)
-{
-	vfs_t		*vfsp;
-	/*REFERENCED*/
-	int		error;
-
-	vfsp = XFS_MTOVFS(mp);
-
-	/* Stop new writers */
-	xfs_start_freeze(mp, XFS_FREEZE_WRITE);
-
-	/* Flush the refcache */
-	xfs_refcache_purge_mp(mp);
-
-	/* Flush delalloc and delwri data */
-	VFS_SYNC(vfsp, SYNC_DELWRI|SYNC_WAIT, NULL, error);
-
-	/* Pause transaction subsystem */
-	xfs_start_freeze(mp, XFS_FREEZE_TRANS);
-
-	/* Flush any remaining inodes into buffers */
-	VFS_SYNC(vfsp, SYNC_ATTR|SYNC_WAIT, NULL, error);
-
-	/* Push all buffers out to disk */
-	xfs_binval(mp->m_ddev_targp);
-	if (mp->m_rtdev_targp) {
-		xfs_binval(mp->m_rtdev_targp);
-	}
-
-	/* Push the superblock and write an unmount record */
-	xfs_log_unmount_write(mp);
-	xfs_unmountfs_writesb(mp);
-
-	return 0;
-}
-
-int
-xfs_fs_thaw(
-	xfs_mount_t	*mp)
-{
-	xfs_finish_freeze(mp);
-	return 0;
-}
-
-int
 xfs_fs_goingdown(
 	xfs_mount_t	*mp,
 	__uint32_t	inflags)
 {
-	switch (inflags)
-	{
-	case XFS_FSOP_GOING_FLAGS_DEFAULT:
-		xfs_fs_freeze(mp);
-		xfs_force_shutdown(mp, XFS_FORCE_UMOUNT);
-		xfs_fs_thaw(mp);
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	switch (inflags) {
+	case XFS_FSOP_GOING_FLAGS_DEFAULT: {
+		struct vfs *vfsp = XFS_MTOVFS(mp);
+		struct super_block *sb = freeze_bdev(vfsp->vfs_super->s_bdev);
+
+		if (sb) {
+			xfs_force_shutdown(mp, XFS_FORCE_UMOUNT);
+			thaw_bdev(sb->s_bdev, sb);
+		}
+	
 		break;
+	}
 	case XFS_FSOP_GOING_FLAGS_LOGFLUSH:
 		xfs_force_shutdown(mp, XFS_FORCE_UMOUNT);
 		break;
===== fs/xfs/xfs_fsops.h 1.3 vs edited =====
--- 1.3/fs/xfs/xfs_fsops.h	Fri Feb 27 07:28:05 2004
+++ edited/fs/xfs/xfs_fsops.h	Sun Mar 14 14:03:33 2004
@@ -60,14 +60,6 @@
 	xfs_fsop_resblks_t	*outval);
 
 int
-xfs_fs_freeze(
-	xfs_mount_t		*mp);
-
-int
-xfs_fs_thaw(
-	xfs_mount_t		*mp);
-
-int
 xfs_fs_goingdown(
 	xfs_mount_t		*mp,
 	__uint32_t		inflags);
===== fs/xfs/xfs_log.c 1.34 vs edited =====
--- 1.34/fs/xfs/xfs_log.c	Sat Mar  6 04:16:30 2004
+++ edited/fs/xfs/xfs_log.c	Sun Mar 14 13:59:50 2004
@@ -820,7 +820,7 @@
 	xlog_t		*log = mp->m_log;
 	vfs_t		*vfsp = XFS_MTOVFS(mp);
 
-	if (mp->m_frozen || XFS_FORCED_SHUTDOWN(mp) ||
+	if (vfsp->vfs_super->s_frozen || XFS_FORCED_SHUTDOWN(mp) ||
 	    (vfsp->vfs_flag & VFS_RDONLY))
 		return 0;
 
===== fs/xfs/xfs_mount.c 1.41 vs edited =====
--- 1.41/fs/xfs/xfs_mount.c	Fri Feb 27 07:54:47 2004
+++ edited/fs/xfs/xfs_mount.c	Sun Mar 14 14:01:45 2004
@@ -139,9 +139,6 @@
 	 */
 	xfs_trans_ail_init(mp);
 
-	/* Init freeze sync structures */
-	spinlock_init(&mp->m_freeze_lock, "xfs_freeze");
-	init_sv(&mp->m_wait_unfreeze, SV_DEFAULT, "xfs_freeze", 0);
 	atomic_set(&mp->m_active_trans, 0);
 
 	return mp;
@@ -191,8 +188,6 @@
 		VFS_REMOVEBHV(vfsp, &mp->m_bhv);
 	}
 
-	spinlock_destroy(&mp->m_freeze_lock);
-	sv_destroy(&mp->m_wait_unfreeze);
 	kmem_free(mp, sizeof(xfs_mount_t));
 }
 
@@ -1584,60 +1579,4 @@
 	}
 	xfs_mod_sb(tp, fields);
 	xfs_trans_commit(tp, 0, NULL);
-}
-
-/* Functions to lock access out of the filesystem for forced
- * shutdown or snapshot.
- */
-
-void
-xfs_start_freeze(
-	xfs_mount_t	*mp,
-	int		level)
-{
-	unsigned long	s = mutex_spinlock(&mp->m_freeze_lock);
-
-	mp->m_frozen = level;
-	mutex_spinunlock(&mp->m_freeze_lock, s);
-
-	if (level == XFS_FREEZE_TRANS) {
-		while (atomic_read(&mp->m_active_trans) > 0)
-			delay(100);
-	}
-}
-
-void
-xfs_finish_freeze(
-	xfs_mount_t	*mp)
-{
-	unsigned long	s = mutex_spinlock(&mp->m_freeze_lock);
-
-	if (mp->m_frozen) {
-		mp->m_frozen = 0;
-		sv_broadcast(&mp->m_wait_unfreeze);
-	}
-
-	mutex_spinunlock(&mp->m_freeze_lock, s);
-}
-
-void
-xfs_check_frozen(
-	xfs_mount_t	*mp,
-	bhv_desc_t	*bdp,
-	int		level)
-{
-	unsigned long	s;
-
-	if (mp->m_frozen) {
-		s = mutex_spinlock(&mp->m_freeze_lock);
-
-		if (mp->m_frozen < level) {
-			mutex_spinunlock(&mp->m_freeze_lock, s);
-		} else {
-			sv_wait(&mp->m_wait_unfreeze, 0, &mp->m_freeze_lock, s);
-		}
-	}
-
-	if (level == XFS_FREEZE_TRANS)
-		atomic_inc(&mp->m_active_trans);
 }
===== fs/xfs/xfs_mount.h 1.25 vs edited =====
--- 1.25/fs/xfs/xfs_mount.h	Wed Mar  3 05:52:57 2004
+++ edited/fs/xfs/xfs_mount.h	Sun Mar 14 14:02:01 2004
@@ -379,10 +379,6 @@
 	struct xfs_dmops	m_dm_ops;	/* vector of DMI ops */
 	struct xfs_qmops	m_qm_ops;	/* vector of XQM ops */
 	struct xfs_ioops	m_io_ops;	/* vector of I/O ops */
-	lock_t			m_freeze_lock;	/* Lock for m_frozen */
-	uint			m_frozen;	/* FS frozen for shutdown or
-						 * snapshot */
-	sv_t			m_wait_unfreeze;/* waiting to unfreeze */
 	atomic_t		m_active_trans;	/* number trans frozen */
 } xfs_mount_t;
 
@@ -557,16 +553,6 @@
 extern void	xfs_initialize_perag(xfs_mount_t *, int);
 extern void	xfs_xlatesb(void *, struct xfs_sb *, int, xfs_arch_t,
 			__int64_t);
-
-/*
- * Flags for freeze operations.
- */
-#define XFS_FREEZE_WRITE	1
-#define XFS_FREEZE_TRANS	2
-
-extern void	xfs_start_freeze(xfs_mount_t *, int);
-extern void	xfs_finish_freeze(xfs_mount_t *);
-extern void	xfs_check_frozen(xfs_mount_t *, bhv_desc_t *, int);
 
 extern struct vfsops xfs_vfsops;
 extern struct vnodeops xfs_vnodeops;
===== fs/xfs/xfs_trans.c 1.14 vs edited =====
--- 1.14/fs/xfs/xfs_trans.c	Fri Jan  9 00:18:09 2004
+++ edited/fs/xfs/xfs_trans.c	Sun Mar 14 14:01:41 2004
@@ -131,7 +131,9 @@
 	xfs_mount_t	*mp,
 	uint		type)
 {
-	xfs_check_frozen(mp, NULL, XFS_FREEZE_TRANS);
+	vfs_check_frozen(XFS_MTOVFS(mp)->vfs_super, SB_FREEZE_TRANS);
+	atomic_inc(&mp->m_active_trans);
+
 	return (_xfs_trans_alloc(mp, type));
 
 }
===== fs/xfs/xfs_vfsops.c 1.57 vs edited =====
--- 1.57/fs/xfs/xfs_vfsops.c	Wed Mar  3 05:52:58 2004
+++ edited/fs/xfs/xfs_vfsops.c	Sun Mar 14 14:42:21 2004
@@ -1544,6 +1544,11 @@
 		}
 	}
 
+	if (XFS_MTOVFS(mp)->vfs_super->s_frozen == SB_FREEZE_TRANS) {
+		while (atomic_read(&mp->m_active_trans) > 0)
+			delay(100);
+	}
+
 	return XFS_ERROR(last_error);
 }
 
@@ -1853,6 +1858,17 @@
 	return 0;
 }
 
+STATIC void
+xfs_freeze(
+	bhv_desc_t	*bdp)
+{
+	xfs_mount_t	*mp = XFS_BHVTOM(bdp);
+
+	/* Push the superblock and write an unmount record */
+	xfs_log_unmount_write(mp);
+	xfs_unmountfs_writesb(mp);
+}
+
 
 vfsops_t xfs_vfsops = {
 	BHV_IDENTITY_INIT(VFS_BHV_XFS,VFS_POSITION_XFS),
@@ -1869,4 +1885,5 @@
 	.vfs_quotactl		= (vfs_quotactl_t)fs_nosys,
 	.vfs_init_vnode		= xfs_initialize_vnode,
 	.vfs_force_shutdown	= xfs_do_force_shutdown,
+	.vfs_freeze		= xfs_freeze,
 };
===== fs/xfs/linux/xfs_ioctl.c 1.21 vs edited =====
--- 1.21/fs/xfs/linux/xfs_ioctl.c	Fri Feb 27 07:28:05 2004
+++ edited/fs/xfs/linux/xfs_ioctl.c	Sat Mar 13 18:23:23 2004
@@ -825,13 +825,14 @@
 	case XFS_IOC_FREEZE:
 		if (!capable(CAP_SYS_ADMIN))
 			return -EPERM;
-		xfs_fs_freeze(mp);
+
+		freeze_bdev(inode->i_sb->s_bdev);
 		return 0;
 
 	case XFS_IOC_THAW:
 		if (!capable(CAP_SYS_ADMIN))
 			return -EPERM;
-		xfs_fs_thaw(mp);
+		thaw_bdev(inode->i_sb->s_bdev, inode->i_sb);
 		return 0;
 
 	case XFS_IOC_GOINGDOWN: {
===== fs/xfs/linux/xfs_lrw.c 1.40 vs edited =====
--- 1.40/fs/xfs/linux/xfs_lrw.c	Wed Mar  3 05:52:57 2004
+++ edited/fs/xfs/linux/xfs_lrw.c	Sun Mar 14 14:47:00 2004
@@ -682,8 +682,6 @@
 	io = &xip->i_iocore;
 	mp = io->io_mount;
 
-	xfs_check_frozen(mp, bdp, XFS_FREEZE_WRITE);
-
 	if (XFS_FORCED_SHUTDOWN(mp)) {
 		return -EIO;
 	}
===== fs/xfs/linux/xfs_super.c 1.70 vs edited =====
--- 1.70/fs/xfs/linux/xfs_super.c	Sat Mar  6 04:46:51 2004
+++ edited/fs/xfs/linux/xfs_super.c	Sun Mar 14 14:43:46 2004
@@ -589,28 +589,7 @@
 linvfs_freeze_fs(
 	struct super_block	*sb)
 {
-	vfs_t			*vfsp = LINVFS_GET_VFS(sb);
-	vnode_t			*vp;
-	int			error;
-
-	if (sb->s_flags & MS_RDONLY)
-		return;
-	VFS_ROOT(vfsp, &vp, error);
-	VOP_IOCTL(vp, LINVFS_GET_IP(vp), NULL, 0, XFS_IOC_FREEZE, 0, error);
-	VN_RELE(vp);
-}
-
-STATIC void
-linvfs_unfreeze_fs(
-	struct super_block	*sb)
-{
-	vfs_t			*vfsp = LINVFS_GET_VFS(sb);
-	vnode_t			*vp;
-	int			error;
-
-	VFS_ROOT(vfsp, &vp, error);
-	VOP_IOCTL(vp, LINVFS_GET_IP(vp), NULL, 0, XFS_IOC_THAW, 0, error);
-	VN_RELE(vp);
+	VFS_FREEZE(LINVFS_GET_VFS(sb));
 }
 
 STATIC struct dentry *
@@ -850,7 +829,6 @@
 	.write_super		= linvfs_write_super,
 	.sync_fs		= linvfs_sync_super,
 	.write_super_lockfs	= linvfs_freeze_fs,
-	.unlockfs		= linvfs_unfreeze_fs,
 	.statfs			= linvfs_statfs,
 	.remount_fs		= linvfs_remount,
 	.show_options		= linvfs_show_options,
===== fs/xfs/linux/xfs_vfs.c 1.11 vs edited =====
--- 1.11/fs/xfs/linux/xfs_vfs.c	Wed Mar  3 05:52:57 2004
+++ edited/fs/xfs/linux/xfs_vfs.c	Sun Mar 14 14:43:15 2004
@@ -230,6 +230,18 @@
 	((*bhvtovfsops(next)->vfs_force_shutdown)(next, fl, file, line));
 }
 
+void
+vfs_freeze(
+	struct bhv_desc		*bdp)
+{
+	struct bhv_desc		*next = bdp;
+
+	ASSERT(next);
+	while (! (bhvtovfsops(next))->vfs_freeze)
+		next = BHV_NEXT(next);
+	((*bhvtovfsops(next)->vfs_freeze)(next));
+}
+
 vfs_t *
 vfs_allocate( void )
 {
===== fs/xfs/linux/xfs_vfs.h 1.18 vs edited =====
--- 1.18/fs/xfs/linux/xfs_vfs.h	Fri Jan  9 06:59:58 2004
+++ edited/fs/xfs/linux/xfs_vfs.h	Sun Mar 14 14:43:07 2004
@@ -112,6 +112,7 @@
 typedef void	(*vfs_init_vnode_t)(bhv_desc_t *,
 				struct vnode *, bhv_desc_t *, int);
 typedef void	(*vfs_force_shutdown_t)(bhv_desc_t *, int, char *, int);
+typedef void	(*vfs_freeze_t)(bhv_desc_t *);
 
 typedef struct vfsops {
 	bhv_position_t		vf_position;	/* behavior chain position */
@@ -128,6 +129,7 @@
 	vfs_quotactl_t		vfs_quotactl;	/* disk quota */
 	vfs_init_vnode_t	vfs_init_vnode;	/* initialize a new vnode */
 	vfs_force_shutdown_t	vfs_force_shutdown;	/* crash and burn */
+	vfs_freeze_t		vfs_freeze;	/* freeze fs for snapshot */
 } vfsops_t;
 
 /*
@@ -147,6 +149,7 @@
 #define VFS_QUOTACTL(v, c,id,p, rv)	((rv) = vfs_quotactl(VHEAD(v), c,id,p))
 #define VFS_INIT_VNODE(v, vp,b,ul)	( vfs_init_vnode(VHEAD(v), vp,b,ul) )
 #define VFS_FORCE_SHUTDOWN(v, fl,f,l)	( vfs_force_shutdown(VHEAD(v), fl,f,l) )
+#define VFS_FREEZE(v)			( vfs_freeze(VHEAD(v)) )
 
 /*
  * PVFS's.  Operates on behavior descriptor pointers.
@@ -164,6 +167,7 @@
 #define PVFS_QUOTACTL(b, c,id,p, rv)	((rv) = vfs_quotactl(b, c,id,p))
 #define PVFS_INIT_VNODE(b, vp,b2,ul)	( vfs_init_vnode(b, vp,b2,ul) )
 #define PVFS_FORCE_SHUTDOWN(b, fl,f,l)	( vfs_force_shutdown(b, fl,f,l) )
+#define PVFS_FREEZE(b)			( vfs_freeze(b) )
 
 extern int vfs_mount(bhv_desc_t *, struct xfs_mount_args *, struct cred *);
 extern int vfs_parseargs(bhv_desc_t *, char *, struct xfs_mount_args *, int);
@@ -178,6 +182,7 @@
 extern int vfs_quotactl(bhv_desc_t *, int, int, caddr_t);
 extern void vfs_init_vnode(bhv_desc_t *, struct vnode *, bhv_desc_t *, int);
 extern void vfs_force_shutdown(bhv_desc_t *, int, char *, int);
+extern void vfs_freeze(bhv_desc_t *);
 
 typedef struct bhv_vfsops {
 	struct vfsops		bhv_common;
===== include/linux/buffer_head.h 1.46 vs edited =====
--- 1.46/include/linux/buffer_head.h	Tue Jan 20 00:38:11 2004
+++ edited/include/linux/buffer_head.h	Sat Mar 13 15:35:32 2004
@@ -164,6 +164,8 @@
 wait_queue_head_t *bh_waitq_head(struct buffer_head *bh);
 void wake_up_buffer(struct buffer_head *bh);
 int fsync_bdev(struct block_device *);
+struct super_block *freeze_bdev(struct block_device *);
+void thaw_bdev(struct block_device *, struct super_block *);
 int fsync_super(struct super_block *);
 int fsync_no_super(struct block_device *);
 struct buffer_head *__find_get_block(struct block_device *, sector_t, int);
===== include/linux/fs.h 1.290 vs edited =====
--- 1.290/include/linux/fs.h	Fri Mar 12 10:32:59 2004
+++ edited/include/linux/fs.h	Sun Mar 14 14:08:14 2004
@@ -344,6 +344,7 @@
 	struct inode *		bd_inode;	/* will die */
 	int			bd_openers;
 	struct semaphore	bd_sem;	/* open/close mutex */
+	struct semaphore	bd_mount_sem;	/* mount mutex */
 	struct list_head	bd_inodes;
 	void *			bd_holder;
 	int			bd_holders;
@@ -712,6 +713,9 @@
 	struct list_head	s_instances;
 	struct quota_info	s_dquot;	/* Diskquota specific options */
 
+	int			s_frozen;
+	wait_queue_head_t	s_wait_unfrozen;
+
 	char s_id[32];				/* Informational name */
 
 	struct kobject           kobj;          /* anchor for sysfs */
@@ -723,6 +727,18 @@
 	 */
 	struct semaphore s_vfs_rename_sem;	/* Kludge */
 };
+
+/*
+ * Snapshotting support.
+ */
+enum {
+	SB_UNFROZEN = 0,
+	SB_FREEZE_WRITE	= 1,
+	SB_FREEZE_TRANS = 2,
+};
+
+#define vfs_check_frozen(sb, level) \
+	wait_event((sb)->s_wait_unfrozen, ((sb)->s_frozen < (level)))
 
 /*
  * Superblock locking.
===== mm/filemap.c 1.225 vs edited =====
--- 1.225/mm/filemap.c	Mon Mar  8 15:21:17 2004
+++ edited/mm/filemap.c	Sun Mar 14 14:16:40 2004
@@ -1746,6 +1746,8 @@
 	unsigned long	seg;
 	char __user	*buf;
 
+	vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
+
 	ocount = 0;
 	for (seg = 0; seg < nr_segs; seg++) {
 		const struct iovec *iv = &iov[seg];

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] lockfs patch for 2.6
  2004-03-14 14:00       ` Christoph Hellwig
@ 2004-03-14 15:23         ` Chris Mason
  2004-03-26 10:25           ` Christoph Hellwig
  2004-03-14 18:44         ` Andrew Morton
  1 sibling, 1 reply; 19+ messages in thread
From: Chris Mason @ 2004-03-14 15:23 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-kernel, linux-xfs

On Sun, 2004-03-14 at 09:00, Christoph Hellwig wrote:
> On Sat, Mar 13, 2004 at 04:33:46PM +0000, Christoph Hellwig wrote:
> > Here's the reworked patch I have, ignoring dm but converting the xfs-internal
> > interfaces that do freezing.  But without an interface change we still need
> > to do all the flushing a second time inside XFS which I absoutely dislike.
> > 
> > Let me think about how to do this nicer.
> 
> Okay, here's a new patch that basically moves all the XFS freezing
> infrastructure to the VFS, leavin only writing of the log record and some
> transaction count handing inside XFS.
> 
> I've given it some heavy testing with xfs_freeze, dm is still not updated.
> 
> P.S. this patch now kills 16 lines of kernel code summarized :)
> 

It looks good, I'll give it a try.

-chris



^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] lockfs patch for 2.6
  2004-03-14 14:00       ` Christoph Hellwig
  2004-03-14 15:23         ` Chris Mason
@ 2004-03-14 18:44         ` Andrew Morton
  2004-03-14 18:52           ` Christoph Hellwig
  2004-03-14 18:56           ` Chris Mason
  1 sibling, 2 replies; 19+ messages in thread
From: Andrew Morton @ 2004-03-14 18:44 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: mason, linux-kernel, linux-xfs

Christoph Hellwig <hch@infradead.org> wrote:
>
> + * This takes the block device bd_mount_sem to make sure no new mounts
>  + * happen on bdev until unlockfs is called.  If a super is found on this
>  + * block device, we hould a read lock on the s->s_umount sem to make sure
>  + * nobody unmounts until the snapshot creation is done
>  + */
>  +struct super_block *freeze_bdev(struct block_device *bdev)

I think you do need s_umount, as the comments say.  But this patch doesn't
touch it.


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] lockfs patch for 2.6
  2004-03-14 18:44         ` Andrew Morton
@ 2004-03-14 18:52           ` Christoph Hellwig
  2004-03-14 18:56           ` Chris Mason
  1 sibling, 0 replies; 19+ messages in thread
From: Christoph Hellwig @ 2004-03-14 18:52 UTC (permalink / raw)
  To: Andrew Morton; +Cc: mason, linux-kernel, linux-xfs

On Sun, Mar 14, 2004 at 10:44:39AM -0800, Andrew Morton wrote:
> > + * This takes the block device bd_mount_sem to make sure no new mounts
> >  + * happen on bdev until unlockfs is called.  If a super is found on this
> >  + * block device, we hould a read lock on the s->s_umount sem to make sure
> >  + * nobody unmounts until the snapshot creation is done
> >  + */
> >  +struct super_block *freeze_bdev(struct block_device *bdev)
> 
> I think you do need s_umount, as the comments say.  But this patch doesn't
> touch it.

get_super takes it for us.


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] lockfs patch for 2.6
  2004-03-14 18:44         ` Andrew Morton
  2004-03-14 18:52           ` Christoph Hellwig
@ 2004-03-14 18:56           ` Chris Mason
  1 sibling, 0 replies; 19+ messages in thread
From: Chris Mason @ 2004-03-14 18:56 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Christoph Hellwig, linux-kernel, linux-xfs

On Sun, 2004-03-14 at 13:44, Andrew Morton wrote:
> Christoph Hellwig <hch@infradead.org> wrote:
> >
> > + * This takes the block device bd_mount_sem to make sure no new mounts
> >  + * happen on bdev until unlockfs is called.  If a super is found on this
> >  + * block device, we hould a read lock on the s->s_umount sem to make sure
> >  + * nobody unmounts until the snapshot creation is done
> >  + */
> >  +struct super_block *freeze_bdev(struct block_device *bdev)
> 
> I think you do need s_umount, as the comments say.  But this patch doesn't
> touch it.

get_super gives us a read lock on it.

-chris



^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] lockfs patch for 2.6
  2004-03-14 15:23         ` Chris Mason
@ 2004-03-26 10:25           ` Christoph Hellwig
  2004-03-26 13:28             ` Chris Mason
  2004-04-01 20:35             ` Chris Mason
  0 siblings, 2 replies; 19+ messages in thread
From: Christoph Hellwig @ 2004-03-26 10:25 UTC (permalink / raw)
  To: Chris Mason; +Cc: linux-kernel

On Sun, Mar 14, 2004 at 10:23:31AM -0500, Chris Mason wrote:
> > P.S. this patch now kills 16 lines of kernel code summarized :)
> > 
> 
> It looks good, I'll give it a try.

ping?  I've seen you merged the old patch into the suse tree, and having
shipping distros with incompatible APIs doesn't sound exactly like a good
idea..


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] lockfs patch for 2.6
  2004-03-26 10:25           ` Christoph Hellwig
@ 2004-03-26 13:28             ` Chris Mason
  2004-04-01 20:35             ` Chris Mason
  1 sibling, 0 replies; 19+ messages in thread
From: Chris Mason @ 2004-03-26 13:28 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-kernel

On Fri, 2004-03-26 at 05:25, Christoph Hellwig wrote:
> On Sun, Mar 14, 2004 at 10:23:31AM -0500, Chris Mason wrote:
> > > P.S. this patch now kills 16 lines of kernel code summarized :)
> > > 
> > 
> > It looks good, I'll give it a try.
> 
> ping?  I've seen you merged the old patch into the suse tree, and having
> shipping distros with incompatible APIs doesn't sound exactly like a good
> idea..

I'm replacing the old one in the suse tree, I just got sucked into to a
few critical tasks.  Yours is the way to go.

-chris



^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] lockfs patch for 2.6
  2004-03-26 10:25           ` Christoph Hellwig
  2004-03-26 13:28             ` Chris Mason
@ 2004-04-01 20:35             ` Chris Mason
  2004-04-01 22:32               ` Kevin Corry
  2004-04-02 20:00               ` Kevin Corry
  1 sibling, 2 replies; 19+ messages in thread
From: Chris Mason @ 2004-04-01 20:35 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-kernel, thornber, kevcorry

[-- Attachment #1: Type: text/plain, Size: 961 bytes --]

On Fri, 2004-03-26 at 05:25, Christoph Hellwig wrote:
> On Sun, Mar 14, 2004 at 10:23:31AM -0500, Chris Mason wrote:
> > > P.S. this patch now kills 16 lines of kernel code summarized :)
> > > 
> > 
> > It looks good, I'll give it a try.
> 
> ping?  I've seen you merged the old patch into the suse tree, and having
> shipping distros with incompatible APIs doesn't sound exactly like a good
> idea..

Christoph's vfs patch looks good, I've stripped out the XFS bits (FS
parts should probably be in different patches) and made one small
change.  freeze/thaw now check to make sure bdev != NULL.

On the device mapper side, I had to add a struct super_block pointer to
the dm struct and changed it to pin the bdev struct during the freeze. 
Before, it could go away due to bdput, leading to deadlock when someone
else got a bdev struct with bd_mount_sem held.

New patches attached, along with an incremental to clearly show my
changes to the dm patch.

-chris


[-- Attachment #2: vfs-lockfs-2 --]
[-- Type: text/plain, Size: 5694 bytes --]

--- 1.225/mm/filemap.c	Mon Mar  8 15:21:17 2004
+++ edited/mm/filemap.c	Sun Mar 14 14:16:40 2004
@@ -1746,6 +1746,8 @@
 	unsigned long	seg;
 	char __user	*buf;
 
+	vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
+
 	ocount = 0;
 	for (seg = 0; seg < nr_segs; seg++) {
 		const struct iovec *iv = &iov[seg];


Index: linux.t/fs/block_dev.c
===================================================================
--- linux.t.orig/fs/block_dev.c	2004-04-01 08:55:22.000000000 -0500
+++ linux.t/fs/block_dev.c	2004-04-01 09:05:36.000000000 -0500
@@ -251,6 +251,7 @@ static void init_once(void * foo, kmem_c
 	{
 		memset(bdev, 0, sizeof(*bdev));
 		sema_init(&bdev->bd_sem, 1);
+		sema_init(&bdev->bd_mount_sem, 1);
 		INIT_LIST_HEAD(&bdev->bd_inodes);
 		INIT_LIST_HEAD(&bdev->bd_list);
 		inode_init_once(&ei->vfs_inode);
Index: linux.t/fs/super.c
===================================================================
--- linux.t.orig/fs/super.c	2004-04-01 08:54:28.000000000 -0500
+++ linux.t/fs/super.c	2004-04-01 09:05:36.000000000 -0500
@@ -77,6 +77,7 @@ static struct super_block *alloc_super(v
 		sema_init(&s->s_dquot.dqio_sem, 1);
 		sema_init(&s->s_dquot.dqonoff_sem, 1);
 		init_rwsem(&s->s_dquot.dqptr_sem);
+		init_waitqueue_head(&s->s_wait_unfrozen);
 		s->s_maxbytes = MAX_NON_LFS;
 		s->dq_op = sb_dquot_ops;
 		s->s_qcop = sb_quotactl_ops;
@@ -621,7 +622,14 @@ struct super_block *get_sb_bdev(struct f
 	if (IS_ERR(bdev))
 		return (struct super_block *)bdev;
 
+	/*
+	 * once the super is inserted into the list by sget, s_umount
+	 * will protect the lockfs code from trying to start a snapshot
+	 * while we are mounting
+	 */
+	down(&bdev->bd_mount_sem);
 	s = sget(fs_type, test_bdev_super, set_bdev_super, bdev);
+	up(&bdev->bd_mount_sem);
 	if (IS_ERR(s))
 		goto out;
 
Index: linux.t/fs/buffer.c
===================================================================
--- linux.t.orig/fs/buffer.c	2004-04-01 08:55:22.000000000 -0500
+++ linux.t/fs/buffer.c	2004-04-01 13:27:01.478991040 -0500
@@ -265,6 +265,73 @@ int fsync_bdev(struct block_device *bdev
 }
 
 /*
+ * triggered by the device mapper code to lock a filesystem and force
+ * it into a consistent state.
+ *
+ * This takes the block device bd_mount_sem to make sure no new mounts
+ * happen on bdev until unlockfs is called.  If a super is found on this
+ * block device, we hould a read lock on the s->s_umount sem to make sure
+ * nobody unmounts until the snapshot creation is done
+ */
+struct super_block *freeze_bdev(struct block_device *bdev)
+{
+	struct super_block *sb;
+
+	if (!bdev)
+		return NULL;
+	down(&bdev->bd_mount_sem);
+	sb = get_super(bdev);
+	if (sb && !(sb->s_flags & MS_RDONLY)) {
+		sb->s_frozen = SB_FREEZE_WRITE;
+		wmb();
+
+		sync_inodes_sb(sb, 0);
+		DQUOT_SYNC(sb);
+
+		sb->s_frozen = SB_FREEZE_TRANS;
+		wmb();
+		
+		lock_super(sb);
+		if (sb->s_dirt && sb->s_op->write_super)
+			sb->s_op->write_super(sb);
+		unlock_super(sb);
+
+		if (sb->s_op->sync_fs)
+			sb->s_op->sync_fs(sb, 1);
+	
+		sync_blockdev(sb->s_bdev);
+		sync_inodes_sb(sb, 1);
+		sync_blockdev(sb->s_bdev);
+
+		if (sb->s_op->write_super_lockfs)
+			sb->s_op->write_super_lockfs(sb);
+	}
+
+	sync_blockdev(bdev);
+	return sb;	/* thaw_bdev releases s->s_umount and bd_mount_sem */
+}
+EXPORT_SYMBOL(freeze_bdev);
+
+void thaw_bdev(struct block_device *bdev, struct super_block *sb)
+{
+	if (!bdev)
+		return;
+	if (sb) {
+		BUG_ON(sb->s_bdev != bdev);
+
+		if (sb->s_op->unlockfs)
+			sb->s_op->unlockfs(sb);
+		sb->s_frozen = SB_UNFROZEN;
+		wmb();
+		wake_up(&sb->s_wait_unfrozen);
+		drop_super(sb);
+	}
+
+	up(&bdev->bd_mount_sem);
+}
+EXPORT_SYMBOL(thaw_bdev);
+
+/*
  * sync everything.  Start out by waking pdflush, because that writes back
  * all queues in parallel.
  */
Index: linux.t/include/linux/fs.h
===================================================================
--- linux.t.orig/include/linux/fs.h	2004-04-01 08:55:22.000000000 -0500
+++ linux.t/include/linux/fs.h	2004-04-01 09:05:36.000000000 -0500
@@ -344,6 +344,7 @@ struct block_device {
 	struct inode *		bd_inode;	/* will die */
 	int			bd_openers;
 	struct semaphore	bd_sem;	/* open/close mutex */
+	struct semaphore	bd_mount_sem;	/* mount mutex */
 	struct list_head	bd_inodes;
 	void *			bd_holder;
 	int			bd_holders;
@@ -722,6 +723,9 @@ struct super_block {
 	struct list_head	s_instances;
 	struct quota_info	s_dquot;	/* Diskquota specific options */
 
+	int			s_frozen;
+	wait_queue_head_t	s_wait_unfrozen;
+
 	char s_id[32];				/* Informational name */
 
 	struct kobject           kobj;          /* anchor for sysfs */
@@ -735,6 +739,18 @@ struct super_block {
 };
 
 /*
+ * Snapshotting support.
+ */
+enum {
+	SB_UNFROZEN = 0,
+	SB_FREEZE_WRITE	= 1,
+	SB_FREEZE_TRANS = 2,
+};
+
+#define vfs_check_frozen(sb, level) \
+	wait_event((sb)->s_wait_unfrozen, ((sb)->s_frozen < (level)))
+
+/*
  * Superblock locking.
  */
 static inline void lock_super(struct super_block * sb)
Index: linux.t/include/linux/buffer_head.h
===================================================================
--- linux.t.orig/include/linux/buffer_head.h	2004-02-05 16:56:30.000000000 -0500
+++ linux.t/include/linux/buffer_head.h	2004-04-01 09:05:36.000000000 -0500
@@ -164,6 +164,8 @@ void __wait_on_buffer(struct buffer_head
 wait_queue_head_t *bh_waitq_head(struct buffer_head *bh);
 void wake_up_buffer(struct buffer_head *bh);
 int fsync_bdev(struct block_device *);
+struct super_block *freeze_bdev(struct block_device *);
+void thaw_bdev(struct block_device *, struct super_block *);
 int fsync_super(struct super_block *);
 int fsync_no_super(struct block_device *);
 struct buffer_head *__find_get_block(struct block_device *, sector_t, int);

[-- Attachment #3: dm-09-dm-lockfs --]
[-- Type: text/plain, Size: 3216 bytes --]

Lock the filesystem while a device is suspended.  [Kevin Corry, Joe Thornber]
Index: linux.t/drivers/md/dm.c
===================================================================
--- linux.t.orig/drivers/md/dm.c	2004-04-01 13:29:22.000000000 -0500
+++ linux.t/drivers/md/dm.c	2004-04-01 14:54:57.911207304 -0500
@@ -12,6 +12,7 @@
 #include <linux/moduleparam.h>
 #include <linux/blkpg.h>
 #include <linux/bio.h>
+#include <linux/buffer_head.h>
 #include <linux/mempool.h>
 #include <linux/slab.h>
 
@@ -46,6 +47,7 @@ struct target_io {
  */
 #define DMF_BLOCK_IO 0
 #define DMF_SUSPENDED 1
+#define DMF_FS_LOCKED 2
 
 struct mapped_device {
 	struct rw_semaphore lock;
@@ -80,6 +82,11 @@ struct mapped_device {
 	 */
 	uint32_t event_nr;
 	wait_queue_head_t eventq;
+
+	/*
+	 * freeze/thaw support require holding onto a super block
+	 */
+	struct super_block *frozen_sb;
 };
 
 #define MIN_IOS 256
@@ -895,6 +902,52 @@ int dm_swap_table(struct mapped_device *
 }
 
 /*
+ * Functions to lock and unlock any filesystem running on the
+ * device.
+ */
+static int __lock_fs(struct mapped_device *md)
+{
+	struct block_device *bdev;
+
+	if (test_and_set_bit(DMF_FS_LOCKED, &md->flags))
+		return 0;
+
+	bdev = bdget_disk(md->disk, 0);
+	if (!bdev) {
+		DMWARN("bdget failed in __lock_fs");
+		return -ENOMEM;
+	}
+
+	WARN_ON(md->frozen_sb);
+	md->frozen_sb = freeze_bdev(bdev);
+	/* don't bdput right now, we don't want the bdev
+	 * to go away while it is locked.  We'll bdput
+	 * in __unlock_fs
+	 */
+	return 0;
+}
+
+static int __unlock_fs(struct mapped_device *md)
+{
+	struct block_device *bdev;
+
+	if (!test_and_clear_bit(DMF_FS_LOCKED, &md->flags))
+		return 0;
+
+	bdev = bdget_disk(md->disk, 0);
+	if (!bdev) {
+		DMWARN("bdget failed in __unlock_fs");
+		return -ENOMEM;
+	}
+
+	thaw_bdev(bdev, md->frozen_sb);
+	md->frozen_sb = NULL;
+	bdput(bdev);
+	bdput(bdev);
+	return 0;
+}
+
+/*
  * We need to be able to change a mapping table under a mounted
  * filesystem.  For example we might want to move some data in
  * the background.  Before the table can be swapped with
@@ -906,13 +959,27 @@ int dm_suspend(struct mapped_device *md)
 	struct dm_table *map;
 	DECLARE_WAITQUEUE(wait, current);
 
-	down_write(&md->lock);
+	/* Flush I/O to the device. */
+	down_read(&md->lock);
+	if (test_bit(DMF_BLOCK_IO, &md->flags)) {
+		up_read(&md->lock);
+		return -EINVAL;
+	}
+
+	__lock_fs(md);
+	up_read(&md->lock);
 
 	/*
 	 * First we set the BLOCK_IO flag so no more ios will be
 	 * mapped.
 	 */
+	down_write(&md->lock);
 	if (test_bit(DMF_BLOCK_IO, &md->flags)) {
+		/*
+		 * If we get here we know another thread is
+		 * trying to suspend as well, so we leave the fs
+		 * locked for this thread.
+		 */
 		up_write(&md->lock);
 		return -EINVAL;
 	}
@@ -947,6 +1014,7 @@ int dm_suspend(struct mapped_device *md)
 
 	/* were we interrupted ? */
 	if (atomic_read(&md->pending)) {
+		__unlock_fs(md);
 		clear_bit(DMF_BLOCK_IO, &md->flags);
 		up_write(&md->lock);
 		return -EINTR;
@@ -984,6 +1052,7 @@ int dm_resume(struct mapped_device *md)
 	def = bio_list_get(&md->deferred);
 	__flush_deferred_io(md, def);
 	up_write(&md->lock);
+	__unlock_fs(md);
 	dm_table_unplug_all(map);
 	dm_table_put(map);
 

[-- Attachment #4: dm-incr --]
[-- Type: text/x-patch, Size: 851 bytes --]

diff -u linux.t/drivers/md/dm.c linux.t/drivers/md/dm.c
--- linux.t/drivers/md/dm.c	2004-03-16 11:30:55.360478210 -0500
+++ linux.t/drivers/md/dm.c	2004-04-01 14:54:57.911207304 -0500
@@ -82,6 +82,11 @@
 	 */
 	uint32_t event_nr;
 	wait_queue_head_t eventq;
+
+	/*
+	 * freeze/thaw support require holding onto a super block
+	 */
+	struct super_block *frozen_sb;
 };
 
 #define MIN_IOS 256
@@ -913,8 +918,12 @@
 		return -ENOMEM;
 	}
 
-	fsync_bdev_lockfs(bdev);
-	bdput(bdev);
+	WARN_ON(md->frozen_sb);
+	md->frozen_sb = freeze_bdev(bdev);
+	/* don't bdput right now, we don't want the bdev
+	 * to go away while it is locked.  We'll bdput
+	 * in __unlock_fs
+	 */
 	return 0;
 }
 
@@ -931,7 +940,9 @@
 		return -ENOMEM;
 	}
 
-	unlockfs(bdev);
+	thaw_bdev(bdev, md->frozen_sb);
+	md->frozen_sb = NULL;
+	bdput(bdev);
 	bdput(bdev);
 	return 0;
 }

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] lockfs patch for 2.6
  2004-04-01 20:35             ` Chris Mason
@ 2004-04-01 22:32               ` Kevin Corry
  2004-04-02 20:00               ` Kevin Corry
  1 sibling, 0 replies; 19+ messages in thread
From: Kevin Corry @ 2004-04-01 22:32 UTC (permalink / raw)
  To: Chris Mason; +Cc: Christoph Hellwig, linux-kernel, thornber

On Thursday 01 April 2004 2:35 pm, Chris Mason wrote:
> On Fri, 2004-03-26 at 05:25, Christoph Hellwig wrote:
> > On Sun, Mar 14, 2004 at 10:23:31AM -0500, Chris Mason wrote:
> > > > P.S. this patch now kills 16 lines of kernel code summarized :)
> > >
> > > It looks good, I'll give it a try.
> >
> > ping?  I've seen you merged the old patch into the suse tree, and having
> > shipping distros with incompatible APIs doesn't sound exactly like a good
> > idea..
>
> Christoph's vfs patch looks good, I've stripped out the XFS bits (FS
> parts should probably be in different patches) and made one small
> change.  freeze/thaw now check to make sure bdev != NULL.
>
> On the device mapper side, I had to add a struct super_block pointer to
> the dm struct and changed it to pin the bdev struct during the freeze.
> Before, it could go away due to bdput, leading to deadlock when someone
> else got a bdev struct with bd_mount_sem held.
>
> New patches attached, along with an incremental to clearly show my
> changes to the dm patch.
>
> -chris

Thanks Chris! I'll test the new VFS-lock code in the morning.

-- 
Kevin Corry
kevcorry@us.ibm.com
http://evms.sourceforge.net/

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] lockfs patch for 2.6
  2004-04-01 20:35             ` Chris Mason
  2004-04-01 22:32               ` Kevin Corry
@ 2004-04-02 20:00               ` Kevin Corry
  2004-04-02 20:02                 ` Christoph Hellwig
  1 sibling, 1 reply; 19+ messages in thread
From: Kevin Corry @ 2004-04-02 20:00 UTC (permalink / raw)
  To: Chris Mason; +Cc: Christoph Hellwig, linux-kernel, thornber

On Thursday 01 April 2004 2:35 pm, Chris Mason wrote:
> On Fri, 2004-03-26 at 05:25, Christoph Hellwig wrote:
> > On Sun, Mar 14, 2004 at 10:23:31AM -0500, Chris Mason wrote:
> > > > P.S. this patch now kills 16 lines of kernel code summarized :)
> > >
> > > It looks good, I'll give it a try.
> >
> > ping?  I've seen you merged the old patch into the suse tree, and having
> > shipping distros with incompatible APIs doesn't sound exactly like a good
> > idea..
>
> Christoph's vfs patch looks good, I've stripped out the XFS bits (FS
> parts should probably be in different patches) and made one small
> change.  freeze/thaw now check to make sure bdev != NULL.

Does this mean there are patches required for XFS to work properly with
this new VFS-lock patch? I'm getting hangs when suspending a DM device
that contains an XFS filesystem with active I/O. Ext3, Reiser, and JFS
seem to behave as expected.

Backtrace from the "cp" command on the XFS filesystem:

kernel: cp            D C1201BE0     0   784    727               (NOTLB)
kernel: ccf09cd0 00000086 00000000 c1201be0 c1202540 00000017 00000017 ccf09d78
kernel:        00000018 c1201be0 0001d058 bcc411f2 00000040 cf1f2348 cfd3b590 cfd3b79c
kernel:        ccf09000 ccf09d68 00000001 ccf09de4 c0148644 0000d0e4 c2c21000 00000000
kernel: Call Trace:
kernel:  [<c0148644>] generic_file_aio_write_nolock+0xe4/0xe20
kernel:  [<c0124e20>] autoremove_wake_function+0x0/0x50
kernel:  [<c01cb811>] pathrelse+0x31/0x50
kernel:  [<c0124e20>] autoremove_wake_function+0x0/0x50
kernel:  [<c01d722c>] do_journal_end+0x88c/0xbf0
kernel:  [<c0121e80>] default_wake_function+0x0/0x20
kernel:  [<c01d5c3c>] journal_end+0x9c/0xc0
kernel:  [<c02a30c8>] xfs_ichgtime+0xf8/0xfa
kernel:  [<c02cfdc4>] xfs_write+0x264/0x7d0
kernel:  [<c01932ac>] __mark_inode_dirty+0x17c/0x190
kernel:  [<c01471d8>] do_generic_mapping_read+0x128/0x3e0
kernel:  [<c018bfa1>] update_atime+0xd1/0xe0
kernel:  [<c02caff0>] linvfs_write+0xb0/0x120
kernel:  [<c016ba57>] do_sync_write+0x87/0xc0
kernel:  [<c0177ecf>] cp_new_stat64+0x10f/0x130
kernel:  [<c016bb3a>] vfs_write+0xaa/0x130
kernel:  [<c016bc5f>] sys_write+0x3f/0x60
kernel:  [<c043f1cf>] syscall_call+0x7/0xb

Backtrace from the "dmsetup resume" call on that device:

kernel: dm            D C1201BE0     0   788    758               (NOTLB)
kernel: c2f21c58 00000082 00000000 c1201be0 c1202540 00000000 00000000 00000000
kernel:        00000010 c1201be0 00035e9e fd17af8b 00000040 cf85d968 00000000 00000010
kernel:        c0fc210c cf85d7b0 fffeffff c2f21c88 c02e050c c0fa32fc c2f21c98 c0fc2118
kernel: Call Trace:
kernel:  [<c02e050c>] rwsem_down_read_failed+0xbc/0x1b0
kernel:  [<c029d5ec>] .text.lock.xfs_iget+0x71/0x145
kernel:  [<c02bd7bc>] xfs_sync_inodes+0x28c/0xaa0
kernel:  [<c02be2aa>] xfs_syncsub+0x2da/0x2f0
kernel:  [<c02bd526>] xfs_sync+0x26/0x30
kernel:  [<c02d19d1>] vfs_sync+0x41/0x50
kernel:  [<c0296ded>] xfs_fs_freeze+0x3d/0xb0
kernel:  [<c02cd2ac>] xfs_ioctl+0x7bc/0xa60
kernel:  [<c0154d93>] pagevec_lookup_tag+0x33/0x40
kernel:  [<c0146409>] wait_on_page_writeback_range+0x79/0x130
kernel:  [<c018b308>] igrab+0x88/0xa0
kernel:  [<c02d2794>] vn_hold+0x44/0x90
kernel:  [<c02bd32f>] xfs_root+0x1f/0x30
kernel:  [<c02d11e6>] linvfs_freeze_fs+0x66/0x80
kernel:  [<c016d686>] freeze_bdev+0x116/0x140
kernel:  [<c03a747a>] __lock_fs+0x5a/0xb0
kernel:  [<c03a75db>] dm_suspend+0x7b/0x210
kernel:  [<c0121e80>] default_wake_function+0x0/0x20
kernel:  [<c0121e80>] default_wake_function+0x0/0x20
kernel:  [<c03aa164>] __get_name_cell+0x14/0x70
kernel:  [<c03ab26c>] do_resume+0x16c/0x1b0
kernel:  [<c03ac2c6>] ctl_ioctl+0xe6/0x180
kernel:  [<c03ab2b0>] dev_suspend+0x0/0x20
kernel:  [<c0181b82>] sys_ioctl+0x152/0x300
kernel:  [<c043f1cf>] syscall_call+0x7/0xb


-- 
Kevin Corry
kevcorry@us.ibm.com
http://evms.sourceforge.net/

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] lockfs patch for 2.6
  2004-04-02 20:00               ` Kevin Corry
@ 2004-04-02 20:02                 ` Christoph Hellwig
  2004-04-02 20:26                   ` Kevin Corry
  0 siblings, 1 reply; 19+ messages in thread
From: Christoph Hellwig @ 2004-04-02 20:02 UTC (permalink / raw)
  To: Kevin Corry; +Cc: Chris Mason, linux-kernel, thornber

On Fri, Apr 02, 2004 at 02:00:18PM -0600, Kevin Corry wrote:
> > Christoph's vfs patch looks good, I've stripped out the XFS bits (FS
> > parts should probably be in different patches) and made one small
> > change.  freeze/thaw now check to make sure bdev != NULL.
> 
> Does this mean there are patches required for XFS to work properly with
> this new VFS-lock patch? I'm getting hangs when suspending a DM device
> that contains an XFS filesystem with active I/O. Ext3, Reiser, and JFS
> seem to behave as expected.

Yes.  Here's the patch I sent out earlier (xfs bits + common code):

--- 1.155/fs/block_dev.c	Fri Mar 12 10:33:01 2004
+++ edited/fs/block_dev.c	Sat Mar 13 15:28:49 2004
@@ -251,6 +251,7 @@
 	{
 		memset(bdev, 0, sizeof(*bdev));
 		sema_init(&bdev->bd_sem, 1);
+		sema_init(&bdev->bd_mount_sem, 1);
 		INIT_LIST_HEAD(&bdev->bd_inodes);
 		INIT_LIST_HEAD(&bdev->bd_list);
 		inode_init_once(&ei->vfs_inode);
===== fs/buffer.c 1.224 vs edited =====
--- 1.224/fs/buffer.c	Sun Mar  7 08:16:11 2004
+++ edited/fs/buffer.c	Sun Mar 14 15:37:00 2004
@@ -260,6 +260,69 @@
 }
 
 /*
+ * triggered by the device mapper code to lock a filesystem and force
+ * it into a consistent state.
+ *
+ * This takes the block device bd_mount_sem to make sure no new mounts
+ * happen on bdev until unlockfs is called.  If a super is found on this
+ * block device, we hould a read lock on the s->s_umount sem to make sure
+ * nobody unmounts until the snapshot creation is done
+ */
+struct super_block *freeze_bdev(struct block_device *bdev)
+{
+	struct super_block *sb;
+
+	down(&bdev->bd_mount_sem);
+	sb = get_super(bdev);
+	if (sb && !(sb->s_flags & MS_RDONLY)) {
+		sb->s_frozen = SB_FREEZE_WRITE;
+		wmb();
+
+		sync_inodes_sb(sb, 0);
+		DQUOT_SYNC(sb);
+
+		sb->s_frozen = SB_FREEZE_TRANS;
+		wmb();
+		
+		lock_super(sb);
+		if (sb->s_dirt && sb->s_op->write_super)
+			sb->s_op->write_super(sb);
+		unlock_super(sb);
+
+		if (sb->s_op->sync_fs)
+			sb->s_op->sync_fs(sb, 1);
+	
+		sync_blockdev(sb->s_bdev);
+		sync_inodes_sb(sb, 1);
+		sync_blockdev(sb->s_bdev);
+
+		if (sb->s_op->write_super_lockfs)
+			sb->s_op->write_super_lockfs(sb);
+	}
+
+	sync_blockdev(bdev);
+	return sb;	/* thaw_bdev releases s->s_umount and bd_mount_sem */
+}
+EXPORT_SYMBOL(freeze_bdev);
+
+void thaw_bdev(struct block_device *bdev, struct super_block *sb)
+{
+	if (sb) {
+		BUG_ON(sb->s_bdev != bdev);
+
+		if (sb->s_op->unlockfs)
+			sb->s_op->unlockfs(sb);
+		sb->s_frozen = SB_UNFROZEN;
+		wmb();
+		wake_up(&sb->s_wait_unfrozen);
+		drop_super(sb);
+	}
+
+	up(&bdev->bd_mount_sem);
+}
+EXPORT_SYMBOL(thaw_bdev);
+
+/*
  * sync everything.  Start out by waking pdflush, because that writes back
  * all queues in parallel.
  */
===== fs/super.c 1.115 vs edited =====
--- 1.115/fs/super.c	Fri Mar 12 10:30:24 2004
+++ edited/fs/super.c	Sun Mar 14 14:24:56 2004
@@ -77,6 +77,7 @@
 		sema_init(&s->s_dquot.dqio_sem, 1);
 		sema_init(&s->s_dquot.dqonoff_sem, 1);
 		init_rwsem(&s->s_dquot.dqptr_sem);
+		init_waitqueue_head(&s->s_wait_unfrozen);
 		s->s_maxbytes = MAX_NON_LFS;
 		s->dq_op = sb_dquot_ops;
 		s->s_qcop = sb_quotactl_ops;
@@ -621,7 +622,14 @@
 	if (IS_ERR(bdev))
 		return (struct super_block *)bdev;
 
+	/*
+	 * once the super is inserted into the list by sget, s_umount
+	 * will protect the lockfs code from trying to start a snapshot
+	 * while we are mounting
+	 */
+	down(&bdev->bd_mount_sem);
 	s = sget(fs_type, test_bdev_super, set_bdev_super, bdev);
+	up(&bdev->bd_mount_sem);
 	if (IS_ERR(s))
 		goto out;
 
===== fs/xfs/xfs_fsops.c 1.11 vs edited =====
--- 1.11/fs/xfs/xfs_fsops.c	Fri Feb 27 07:28:05 2004
+++ edited/fs/xfs/xfs_fsops.c	Sun Mar 14 14:03:01 2004
@@ -582,63 +582,25 @@
 }
 
 int
-xfs_fs_freeze(
-	xfs_mount_t	*mp)
-{
-	vfs_t		*vfsp;
-	/*REFERENCED*/
-	int		error;
-
-	vfsp = XFS_MTOVFS(mp);
-
-	/* Stop new writers */
-	xfs_start_freeze(mp, XFS_FREEZE_WRITE);
-
-	/* Flush the refcache */
-	xfs_refcache_purge_mp(mp);
-
-	/* Flush delalloc and delwri data */
-	VFS_SYNC(vfsp, SYNC_DELWRI|SYNC_WAIT, NULL, error);
-
-	/* Pause transaction subsystem */
-	xfs_start_freeze(mp, XFS_FREEZE_TRANS);
-
-	/* Flush any remaining inodes into buffers */
-	VFS_SYNC(vfsp, SYNC_ATTR|SYNC_WAIT, NULL, error);
-
-	/* Push all buffers out to disk */
-	xfs_binval(mp->m_ddev_targp);
-	if (mp->m_rtdev_targp) {
-		xfs_binval(mp->m_rtdev_targp);
-	}
-
-	/* Push the superblock and write an unmount record */
-	xfs_log_unmount_write(mp);
-	xfs_unmountfs_writesb(mp);
-
-	return 0;
-}
-
-int
-xfs_fs_thaw(
-	xfs_mount_t	*mp)
-{
-	xfs_finish_freeze(mp);
-	return 0;
-}
-
-int
 xfs_fs_goingdown(
 	xfs_mount_t	*mp,
 	__uint32_t	inflags)
 {
-	switch (inflags)
-	{
-	case XFS_FSOP_GOING_FLAGS_DEFAULT:
-		xfs_fs_freeze(mp);
-		xfs_force_shutdown(mp, XFS_FORCE_UMOUNT);
-		xfs_fs_thaw(mp);
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	switch (inflags) {
+	case XFS_FSOP_GOING_FLAGS_DEFAULT: {
+		struct vfs *vfsp = XFS_MTOVFS(mp);
+		struct super_block *sb = freeze_bdev(vfsp->vfs_super->s_bdev);
+
+		if (sb) {
+			xfs_force_shutdown(mp, XFS_FORCE_UMOUNT);
+			thaw_bdev(sb->s_bdev, sb);
+		}
+	
 		break;
+	}
 	case XFS_FSOP_GOING_FLAGS_LOGFLUSH:
 		xfs_force_shutdown(mp, XFS_FORCE_UMOUNT);
 		break;
===== fs/xfs/xfs_fsops.h 1.3 vs edited =====
--- 1.3/fs/xfs/xfs_fsops.h	Fri Feb 27 07:28:05 2004
+++ edited/fs/xfs/xfs_fsops.h	Sun Mar 14 14:03:33 2004
@@ -60,14 +60,6 @@
 	xfs_fsop_resblks_t	*outval);
 
 int
-xfs_fs_freeze(
-	xfs_mount_t		*mp);
-
-int
-xfs_fs_thaw(
-	xfs_mount_t		*mp);
-
-int
 xfs_fs_goingdown(
 	xfs_mount_t		*mp,
 	__uint32_t		inflags);
===== fs/xfs/xfs_log.c 1.34 vs edited =====
--- 1.34/fs/xfs/xfs_log.c	Sat Mar  6 04:16:30 2004
+++ edited/fs/xfs/xfs_log.c	Sun Mar 14 13:59:50 2004
@@ -820,7 +820,7 @@
 	xlog_t		*log = mp->m_log;
 	vfs_t		*vfsp = XFS_MTOVFS(mp);
 
-	if (mp->m_frozen || XFS_FORCED_SHUTDOWN(mp) ||
+	if (vfsp->vfs_super->s_frozen || XFS_FORCED_SHUTDOWN(mp) ||
 	    (vfsp->vfs_flag & VFS_RDONLY))
 		return 0;
 
===== fs/xfs/xfs_mount.c 1.41 vs edited =====
--- 1.41/fs/xfs/xfs_mount.c	Fri Feb 27 07:54:47 2004
+++ edited/fs/xfs/xfs_mount.c	Sun Mar 14 14:01:45 2004
@@ -139,9 +139,6 @@
 	 */
 	xfs_trans_ail_init(mp);
 
-	/* Init freeze sync structures */
-	spinlock_init(&mp->m_freeze_lock, "xfs_freeze");
-	init_sv(&mp->m_wait_unfreeze, SV_DEFAULT, "xfs_freeze", 0);
 	atomic_set(&mp->m_active_trans, 0);
 
 	return mp;
@@ -191,8 +188,6 @@
 		VFS_REMOVEBHV(vfsp, &mp->m_bhv);
 	}
 
-	spinlock_destroy(&mp->m_freeze_lock);
-	sv_destroy(&mp->m_wait_unfreeze);
 	kmem_free(mp, sizeof(xfs_mount_t));
 }
 
@@ -1584,60 +1579,4 @@
 	}
 	xfs_mod_sb(tp, fields);
 	xfs_trans_commit(tp, 0, NULL);
-}
-
-/* Functions to lock access out of the filesystem for forced
- * shutdown or snapshot.
- */
-
-void
-xfs_start_freeze(
-	xfs_mount_t	*mp,
-	int		level)
-{
-	unsigned long	s = mutex_spinlock(&mp->m_freeze_lock);
-
-	mp->m_frozen = level;
-	mutex_spinunlock(&mp->m_freeze_lock, s);
-
-	if (level == XFS_FREEZE_TRANS) {
-		while (atomic_read(&mp->m_active_trans) > 0)
-			delay(100);
-	}
-}
-
-void
-xfs_finish_freeze(
-	xfs_mount_t	*mp)
-{
-	unsigned long	s = mutex_spinlock(&mp->m_freeze_lock);
-
-	if (mp->m_frozen) {
-		mp->m_frozen = 0;
-		sv_broadcast(&mp->m_wait_unfreeze);
-	}
-
-	mutex_spinunlock(&mp->m_freeze_lock, s);
-}
-
-void
-xfs_check_frozen(
-	xfs_mount_t	*mp,
-	bhv_desc_t	*bdp,
-	int		level)
-{
-	unsigned long	s;
-
-	if (mp->m_frozen) {
-		s = mutex_spinlock(&mp->m_freeze_lock);
-
-		if (mp->m_frozen < level) {
-			mutex_spinunlock(&mp->m_freeze_lock, s);
-		} else {
-			sv_wait(&mp->m_wait_unfreeze, 0, &mp->m_freeze_lock, s);
-		}
-	}
-
-	if (level == XFS_FREEZE_TRANS)
-		atomic_inc(&mp->m_active_trans);
 }
===== fs/xfs/xfs_mount.h 1.25 vs edited =====
--- 1.25/fs/xfs/xfs_mount.h	Wed Mar  3 05:52:57 2004
+++ edited/fs/xfs/xfs_mount.h	Sun Mar 14 14:02:01 2004
@@ -379,10 +379,6 @@
 	struct xfs_dmops	m_dm_ops;	/* vector of DMI ops */
 	struct xfs_qmops	m_qm_ops;	/* vector of XQM ops */
 	struct xfs_ioops	m_io_ops;	/* vector of I/O ops */
-	lock_t			m_freeze_lock;	/* Lock for m_frozen */
-	uint			m_frozen;	/* FS frozen for shutdown or
-						 * snapshot */
-	sv_t			m_wait_unfreeze;/* waiting to unfreeze */
 	atomic_t		m_active_trans;	/* number trans frozen */
 } xfs_mount_t;
 
@@ -557,16 +553,6 @@
 extern void	xfs_initialize_perag(xfs_mount_t *, int);
 extern void	xfs_xlatesb(void *, struct xfs_sb *, int, xfs_arch_t,
 			__int64_t);
-
-/*
- * Flags for freeze operations.
- */
-#define XFS_FREEZE_WRITE	1
-#define XFS_FREEZE_TRANS	2
-
-extern void	xfs_start_freeze(xfs_mount_t *, int);
-extern void	xfs_finish_freeze(xfs_mount_t *);
-extern void	xfs_check_frozen(xfs_mount_t *, bhv_desc_t *, int);
 
 extern struct vfsops xfs_vfsops;
 extern struct vnodeops xfs_vnodeops;
===== fs/xfs/xfs_trans.c 1.14 vs edited =====
--- 1.14/fs/xfs/xfs_trans.c	Fri Jan  9 00:18:09 2004
+++ edited/fs/xfs/xfs_trans.c	Sun Mar 14 14:01:41 2004
@@ -131,7 +131,9 @@
 	xfs_mount_t	*mp,
 	uint		type)
 {
-	xfs_check_frozen(mp, NULL, XFS_FREEZE_TRANS);
+	vfs_check_frozen(XFS_MTOVFS(mp)->vfs_super, SB_FREEZE_TRANS);
+	atomic_inc(&mp->m_active_trans);
+
 	return (_xfs_trans_alloc(mp, type));
 
 }
===== fs/xfs/xfs_vfsops.c 1.57 vs edited =====
--- 1.57/fs/xfs/xfs_vfsops.c	Wed Mar  3 05:52:58 2004
+++ edited/fs/xfs/xfs_vfsops.c	Sun Mar 14 14:42:21 2004
@@ -1544,6 +1544,11 @@
 		}
 	}
 
+	if (XFS_MTOVFS(mp)->vfs_super->s_frozen == SB_FREEZE_TRANS) {
+		while (atomic_read(&mp->m_active_trans) > 0)
+			delay(100);
+	}
+
 	return XFS_ERROR(last_error);
 }
 
@@ -1853,6 +1858,17 @@
 	return 0;
 }
 
+STATIC void
+xfs_freeze(
+	bhv_desc_t	*bdp)
+{
+	xfs_mount_t	*mp = XFS_BHVTOM(bdp);
+
+	/* Push the superblock and write an unmount record */
+	xfs_log_unmount_write(mp);
+	xfs_unmountfs_writesb(mp);
+}
+
 
 vfsops_t xfs_vfsops = {
 	BHV_IDENTITY_INIT(VFS_BHV_XFS,VFS_POSITION_XFS),
@@ -1869,4 +1885,5 @@
 	.vfs_quotactl		= (vfs_quotactl_t)fs_nosys,
 	.vfs_init_vnode		= xfs_initialize_vnode,
 	.vfs_force_shutdown	= xfs_do_force_shutdown,
+	.vfs_freeze		= xfs_freeze,
 };
===== fs/xfs/linux/xfs_ioctl.c 1.21 vs edited =====
--- 1.21/fs/xfs/linux/xfs_ioctl.c	Fri Feb 27 07:28:05 2004
+++ edited/fs/xfs/linux/xfs_ioctl.c	Sat Mar 13 18:23:23 2004
@@ -825,13 +825,14 @@
 	case XFS_IOC_FREEZE:
 		if (!capable(CAP_SYS_ADMIN))
 			return -EPERM;
-		xfs_fs_freeze(mp);
+
+		freeze_bdev(inode->i_sb->s_bdev);
 		return 0;
 
 	case XFS_IOC_THAW:
 		if (!capable(CAP_SYS_ADMIN))
 			return -EPERM;
-		xfs_fs_thaw(mp);
+		thaw_bdev(inode->i_sb->s_bdev, inode->i_sb);
 		return 0;
 
 	case XFS_IOC_GOINGDOWN: {
===== fs/xfs/linux/xfs_lrw.c 1.40 vs edited =====
--- 1.40/fs/xfs/linux/xfs_lrw.c	Wed Mar  3 05:52:57 2004
+++ edited/fs/xfs/linux/xfs_lrw.c	Sun Mar 14 14:47:00 2004
@@ -682,8 +682,6 @@
 	io = &xip->i_iocore;
 	mp = io->io_mount;
 
-	xfs_check_frozen(mp, bdp, XFS_FREEZE_WRITE);
-
 	if (XFS_FORCED_SHUTDOWN(mp)) {
 		return -EIO;
 	}
===== fs/xfs/linux/xfs_super.c 1.70 vs edited =====
--- 1.70/fs/xfs/linux/xfs_super.c	Sat Mar  6 04:46:51 2004
+++ edited/fs/xfs/linux/xfs_super.c	Sun Mar 14 14:43:46 2004
@@ -589,28 +589,7 @@
 linvfs_freeze_fs(
 	struct super_block	*sb)
 {
-	vfs_t			*vfsp = LINVFS_GET_VFS(sb);
-	vnode_t			*vp;
-	int			error;
-
-	if (sb->s_flags & MS_RDONLY)
-		return;
-	VFS_ROOT(vfsp, &vp, error);
-	VOP_IOCTL(vp, LINVFS_GET_IP(vp), NULL, 0, XFS_IOC_FREEZE, 0, error);
-	VN_RELE(vp);
-}
-
-STATIC void
-linvfs_unfreeze_fs(
-	struct super_block	*sb)
-{
-	vfs_t			*vfsp = LINVFS_GET_VFS(sb);
-	vnode_t			*vp;
-	int			error;
-
-	VFS_ROOT(vfsp, &vp, error);
-	VOP_IOCTL(vp, LINVFS_GET_IP(vp), NULL, 0, XFS_IOC_THAW, 0, error);
-	VN_RELE(vp);
+	VFS_FREEZE(LINVFS_GET_VFS(sb));
 }
 
 STATIC struct dentry *
@@ -850,7 +829,6 @@
 	.write_super		= linvfs_write_super,
 	.sync_fs		= linvfs_sync_super,
 	.write_super_lockfs	= linvfs_freeze_fs,
-	.unlockfs		= linvfs_unfreeze_fs,
 	.statfs			= linvfs_statfs,
 	.remount_fs		= linvfs_remount,
 	.show_options		= linvfs_show_options,
===== fs/xfs/linux/xfs_vfs.c 1.11 vs edited =====
--- 1.11/fs/xfs/linux/xfs_vfs.c	Wed Mar  3 05:52:57 2004
+++ edited/fs/xfs/linux/xfs_vfs.c	Sun Mar 14 14:43:15 2004
@@ -230,6 +230,18 @@
 	((*bhvtovfsops(next)->vfs_force_shutdown)(next, fl, file, line));
 }
 
+void
+vfs_freeze(
+	struct bhv_desc		*bdp)
+{
+	struct bhv_desc		*next = bdp;
+
+	ASSERT(next);
+	while (! (bhvtovfsops(next))->vfs_freeze)
+		next = BHV_NEXT(next);
+	((*bhvtovfsops(next)->vfs_freeze)(next));
+}
+
 vfs_t *
 vfs_allocate( void )
 {
===== fs/xfs/linux/xfs_vfs.h 1.18 vs edited =====
--- 1.18/fs/xfs/linux/xfs_vfs.h	Fri Jan  9 06:59:58 2004
+++ edited/fs/xfs/linux/xfs_vfs.h	Sun Mar 14 14:43:07 2004
@@ -112,6 +112,7 @@
 typedef void	(*vfs_init_vnode_t)(bhv_desc_t *,
 				struct vnode *, bhv_desc_t *, int);
 typedef void	(*vfs_force_shutdown_t)(bhv_desc_t *, int, char *, int);
+typedef void	(*vfs_freeze_t)(bhv_desc_t *);
 
 typedef struct vfsops {
 	bhv_position_t		vf_position;	/* behavior chain position */
@@ -128,6 +129,7 @@
 	vfs_quotactl_t		vfs_quotactl;	/* disk quota */
 	vfs_init_vnode_t	vfs_init_vnode;	/* initialize a new vnode */
 	vfs_force_shutdown_t	vfs_force_shutdown;	/* crash and burn */
+	vfs_freeze_t		vfs_freeze;	/* freeze fs for snapshot */
 } vfsops_t;
 
 /*
@@ -147,6 +149,7 @@
 #define VFS_QUOTACTL(v, c,id,p, rv)	((rv) = vfs_quotactl(VHEAD(v), c,id,p))
 #define VFS_INIT_VNODE(v, vp,b,ul)	( vfs_init_vnode(VHEAD(v), vp,b,ul) )
 #define VFS_FORCE_SHUTDOWN(v, fl,f,l)	( vfs_force_shutdown(VHEAD(v), fl,f,l) )
+#define VFS_FREEZE(v)			( vfs_freeze(VHEAD(v)) )
 
 /*
  * PVFS's.  Operates on behavior descriptor pointers.
@@ -164,6 +167,7 @@
 #define PVFS_QUOTACTL(b, c,id,p, rv)	((rv) = vfs_quotactl(b, c,id,p))
 #define PVFS_INIT_VNODE(b, vp,b2,ul)	( vfs_init_vnode(b, vp,b2,ul) )
 #define PVFS_FORCE_SHUTDOWN(b, fl,f,l)	( vfs_force_shutdown(b, fl,f,l) )
+#define PVFS_FREEZE(b)			( vfs_freeze(b) )
 
 extern int vfs_mount(bhv_desc_t *, struct xfs_mount_args *, struct cred *);
 extern int vfs_parseargs(bhv_desc_t *, char *, struct xfs_mount_args *, int);
@@ -178,6 +182,7 @@
 extern int vfs_quotactl(bhv_desc_t *, int, int, caddr_t);
 extern void vfs_init_vnode(bhv_desc_t *, struct vnode *, bhv_desc_t *, int);
 extern void vfs_force_shutdown(bhv_desc_t *, int, char *, int);
+extern void vfs_freeze(bhv_desc_t *);
 
 typedef struct bhv_vfsops {
 	struct vfsops		bhv_common;
===== include/linux/buffer_head.h 1.46 vs edited =====
--- 1.46/include/linux/buffer_head.h	Tue Jan 20 00:38:11 2004
+++ edited/include/linux/buffer_head.h	Sat Mar 13 15:35:32 2004
@@ -164,6 +164,8 @@
 wait_queue_head_t *bh_waitq_head(struct buffer_head *bh);
 void wake_up_buffer(struct buffer_head *bh);
 int fsync_bdev(struct block_device *);
+struct super_block *freeze_bdev(struct block_device *);
+void thaw_bdev(struct block_device *, struct super_block *);
 int fsync_super(struct super_block *);
 int fsync_no_super(struct block_device *);
 struct buffer_head *__find_get_block(struct block_device *, sector_t, int);
===== include/linux/fs.h 1.290 vs edited =====
--- 1.290/include/linux/fs.h	Fri Mar 12 10:32:59 2004
+++ edited/include/linux/fs.h	Sun Mar 14 14:08:14 2004
@@ -344,6 +344,7 @@
 	struct inode *		bd_inode;	/* will die */
 	int			bd_openers;
 	struct semaphore	bd_sem;	/* open/close mutex */
+	struct semaphore	bd_mount_sem;	/* mount mutex */
 	struct list_head	bd_inodes;
 	void *			bd_holder;
 	int			bd_holders;
@@ -712,6 +713,9 @@
 	struct list_head	s_instances;
 	struct quota_info	s_dquot;	/* Diskquota specific options */
 
+	int			s_frozen;
+	wait_queue_head_t	s_wait_unfrozen;
+
 	char s_id[32];				/* Informational name */
 
 	struct kobject           kobj;          /* anchor for sysfs */
@@ -723,6 +727,18 @@
 	 */
 	struct semaphore s_vfs_rename_sem;	/* Kludge */
 };
+
+/*
+ * Snapshotting support.
+ */
+enum {
+	SB_UNFROZEN = 0,
+	SB_FREEZE_WRITE	= 1,
+	SB_FREEZE_TRANS = 2,
+};
+
+#define vfs_check_frozen(sb, level) \
+	wait_event((sb)->s_wait_unfrozen, ((sb)->s_frozen < (level)))
 
 /*
  * Superblock locking.
===== mm/filemap.c 1.225 vs edited =====
--- 1.225/mm/filemap.c	Mon Mar  8 15:21:17 2004
+++ edited/mm/filemap.c	Sun Mar 14 14:16:40 2004
@@ -1746,6 +1746,8 @@
 	unsigned long	seg;
 	char __user	*buf;
 
+	vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
+
 	ocount = 0;
 	for (seg = 0; seg < nr_segs; seg++) {
 		const struct iovec *iv = &iov[seg];

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] lockfs patch for 2.6
  2004-04-02 20:02                 ` Christoph Hellwig
@ 2004-04-02 20:26                   ` Kevin Corry
  0 siblings, 0 replies; 19+ messages in thread
From: Kevin Corry @ 2004-04-02 20:26 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Chris Mason, linux-kernel, thornber

On Friday 02 April 2004 2:02 pm, Christoph Hellwig wrote:
> On Fri, Apr 02, 2004 at 02:00:18PM -0600, Kevin Corry wrote:
> > > Christoph's vfs patch looks good, I've stripped out the XFS bits (FS
> > > parts should probably be in different patches) and made one small
> > > change.  freeze/thaw now check to make sure bdev != NULL.
> >
> > Does this mean there are patches required for XFS to work properly with
> > this new VFS-lock patch? I'm getting hangs when suspending a DM device
> > that contains an XFS filesystem with active I/O. Ext3, Reiser, and JFS
> > seem to behave as expected.
>
> Yes.  Here's the patch I sent out earlier (xfs bits + common code):

Thanks. Sorry I missed your patch the first time around. XFS works as expected 
now.

-- 
Kevin Corry
kevcorry@us.ibm.com
http://evms.sourceforge.net/

^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2004-04-02 20:27 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2004-03-09 21:31 [PATCH] lockfs patch for 2.6 Chris Mason
2004-03-12  9:31 ` Christoph Hellwig
2004-03-12 15:50   ` Chris Mason
2004-03-12 15:51     ` Christoph Hellwig
2004-03-13 13:14 ` Christoph Hellwig
2004-03-13 15:20   ` Chris Mason
2004-03-13 16:33     ` Christoph Hellwig
2004-03-14 14:00       ` Christoph Hellwig
2004-03-14 15:23         ` Chris Mason
2004-03-26 10:25           ` Christoph Hellwig
2004-03-26 13:28             ` Chris Mason
2004-04-01 20:35             ` Chris Mason
2004-04-01 22:32               ` Kevin Corry
2004-04-02 20:00               ` Kevin Corry
2004-04-02 20:02                 ` Christoph Hellwig
2004-04-02 20:26                   ` Kevin Corry
2004-03-14 18:44         ` Andrew Morton
2004-03-14 18:52           ` Christoph Hellwig
2004-03-14 18:56           ` Chris Mason

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).