All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/5] RFC: log all inode updates
@ 2012-02-07 18:10 Christoph Hellwig
  2012-02-07 18:10 ` [PATCH 1/5] xfs: use per-filesystem I/O completion workqueues Christoph Hellwig
  2012-02-07 18:10 ` [PATCH 2/5] xfs: do not require an ioend for new EOF calculation Christoph Hellwig
  0 siblings, 2 replies; 5+ messages in thread
From: Christoph Hellwig @ 2012-02-07 18:10 UTC (permalink / raw)
  To: xfs

This is my current state of the series to log all inode updates.  I
haven't really touched it for a while, and it's been solid in testing.
What is still missing is putting back an fdatasync that is optimized
over fsync, so this isn't quite ready for primetime yet.  But given
that the existing patches shouldn't change additional review and testing
is more than welcome.

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 1/5] xfs: use per-filesystem I/O completion workqueues
  2012-02-07 18:10 [PATCH 0/5] RFC: log all inode updates Christoph Hellwig
@ 2012-02-07 18:10 ` Christoph Hellwig
  2012-02-16  6:57   ` Dave Chinner
  2012-02-07 18:10 ` [PATCH 2/5] xfs: do not require an ioend for new EOF calculation Christoph Hellwig
  1 sibling, 1 reply; 5+ messages in thread
From: Christoph Hellwig @ 2012-02-07 18:10 UTC (permalink / raw)
  To: xfs

[-- Attachment #1: xfs-split-workqueues --]
[-- Type: text/plain, Size: 7446 bytes --]

The new concurrency managed workqueues are cheap enough that we can create
per-filesystem instead of global workqueues.  This allows us to remove the
trylock or defer scheme on the ilock, which is not helpful once we have
outstanding log reservations until finishing a size update.

Signed-off-by: Christoph Hellwig <hch@lst.de>

---
 fs/xfs/xfs_aops.c  |   39 ++++++++++-----------------------------
 fs/xfs/xfs_aops.h  |    2 --
 fs/xfs/xfs_buf.c   |   17 -----------------
 fs/xfs/xfs_mount.h |    3 +++
 fs/xfs/xfs_super.c |   43 ++++++++++++++++++++++++++++++++++++++++++-
 5 files changed, 55 insertions(+), 49 deletions(-)

Index: xfs/fs/xfs/xfs_aops.c
===================================================================
--- xfs.orig/fs/xfs/xfs_aops.c	2012-01-30 11:54:44.000000000 +0100
+++ xfs/fs/xfs/xfs_aops.c	2012-01-30 11:56:50.096005059 +0100
@@ -126,21 +126,15 @@ static inline bool xfs_ioend_is_append(s
 
 /*
  * Update on-disk file size now that data has been written to disk.
- *
- * This function does not block as blocking on the inode lock in IO completion
- * can lead to IO completion order dependency deadlocks.. If it can't get the
- * inode ilock it will return EAGAIN. Callers must handle this.
  */
-STATIC int
+STATIC void
 xfs_setfilesize(
-	xfs_ioend_t		*ioend)
+	struct xfs_ioend	*ioend)
 {
-	xfs_inode_t		*ip = XFS_I(ioend->io_inode);
+	struct xfs_inode	*ip = XFS_I(ioend->io_inode);
 	xfs_fsize_t		isize;
 
-	if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL))
-		return EAGAIN;
-
+	xfs_ilock(ip, XFS_ILOCK_EXCL);
 	isize = xfs_ioend_new_eof(ioend);
 	if (isize) {
 		trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size);
@@ -149,7 +143,6 @@ xfs_setfilesize(
 	}
 
 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
-	return 0;
 }
 
 /*
@@ -163,10 +156,12 @@ xfs_finish_ioend(
 	struct xfs_ioend	*ioend)
 {
 	if (atomic_dec_and_test(&ioend->io_remaining)) {
+		struct xfs_mount	*mp = XFS_I(ioend->io_inode)->i_mount;
+
 		if (ioend->io_type == IO_UNWRITTEN)
-			queue_work(xfsconvertd_workqueue, &ioend->io_work);
+			queue_work(mp->m_unwritten_workqueue, &ioend->io_work);
 		else if (xfs_ioend_is_append(ioend))
-			queue_work(xfsdatad_workqueue, &ioend->io_work);
+			queue_work(mp->m_data_workqueue, &ioend->io_work);
 		else
 			xfs_destroy_ioend(ioend);
 	}
@@ -207,23 +202,9 @@ xfs_end_io(
 	 * We might have to update the on-disk file size after extending
 	 * writes.
 	 */
-	error = xfs_setfilesize(ioend);
-	ASSERT(!error || error == EAGAIN);
-
+	xfs_setfilesize(ioend);
 done:
-	/*
-	 * If we didn't complete processing of the ioend, requeue it to the
-	 * tail of the workqueue for another attempt later. Otherwise destroy
-	 * it.
-	 */
-	if (error == EAGAIN) {
-		atomic_inc(&ioend->io_remaining);
-		xfs_finish_ioend(ioend);
-		/* ensure we don't spin on blocked ioends */
-		delay(1);
-	} else {
-		xfs_destroy_ioend(ioend);
-	}
+	xfs_destroy_ioend(ioend);
 }
 
 /*
Index: xfs/fs/xfs/xfs_aops.h
===================================================================
--- xfs.orig/fs/xfs/xfs_aops.h	2012-01-04 16:09:18.886530605 +0100
+++ xfs/fs/xfs/xfs_aops.h	2012-01-30 11:56:50.096005059 +0100
@@ -18,8 +18,6 @@
 #ifndef __XFS_AOPS_H__
 #define __XFS_AOPS_H__
 
-extern struct workqueue_struct *xfsdatad_workqueue;
-extern struct workqueue_struct *xfsconvertd_workqueue;
 extern mempool_t *xfs_ioend_pool;
 
 /*
Index: xfs/fs/xfs/xfs_buf.c
===================================================================
--- xfs.orig/fs/xfs/xfs_buf.c	2012-01-24 20:57:27.075957561 +0100
+++ xfs/fs/xfs/xfs_buf.c	2012-01-30 11:56:50.096005059 +0100
@@ -45,8 +45,6 @@ static kmem_zone_t *xfs_buf_zone;
 STATIC int xfsbufd(void *);
 
 static struct workqueue_struct *xfslogd_workqueue;
-struct workqueue_struct *xfsdatad_workqueue;
-struct workqueue_struct *xfsconvertd_workqueue;
 
 #ifdef XFS_BUF_LOCK_TRACKING
 # define XB_SET_OWNER(bp)	((bp)->b_last_holder = current->pid)
@@ -1793,21 +1791,8 @@ xfs_buf_init(void)
 	if (!xfslogd_workqueue)
 		goto out_free_buf_zone;
 
-	xfsdatad_workqueue = alloc_workqueue("xfsdatad", WQ_MEM_RECLAIM, 1);
-	if (!xfsdatad_workqueue)
-		goto out_destroy_xfslogd_workqueue;
-
-	xfsconvertd_workqueue = alloc_workqueue("xfsconvertd",
-						WQ_MEM_RECLAIM, 1);
-	if (!xfsconvertd_workqueue)
-		goto out_destroy_xfsdatad_workqueue;
-
 	return 0;
 
- out_destroy_xfsdatad_workqueue:
-	destroy_workqueue(xfsdatad_workqueue);
- out_destroy_xfslogd_workqueue:
-	destroy_workqueue(xfslogd_workqueue);
  out_free_buf_zone:
 	kmem_zone_destroy(xfs_buf_zone);
  out:
@@ -1817,8 +1802,6 @@ xfs_buf_init(void)
 void
 xfs_buf_terminate(void)
 {
-	destroy_workqueue(xfsconvertd_workqueue);
-	destroy_workqueue(xfsdatad_workqueue);
 	destroy_workqueue(xfslogd_workqueue);
 	kmem_zone_destroy(xfs_buf_zone);
 }
Index: xfs/fs/xfs/xfs_super.c
===================================================================
--- xfs.orig/fs/xfs/xfs_super.c	2012-01-24 20:57:27.000000000 +0100
+++ xfs/fs/xfs/xfs_super.c	2012-01-30 11:56:50.099338392 +0100
@@ -760,6 +760,40 @@ xfs_setup_devices(
 	return 0;
 }
 
+STATIC int
+xfs_init_mount_workqueues(
+	struct xfs_mount	*mp)
+{
+#define XFS_WQ_NAME_LEN		512
+	char			name[XFS_WQ_NAME_LEN];
+
+	snprintf(name, XFS_WQ_NAME_LEN, "xfs-data/%s", mp->m_fsname);
+	mp->m_data_workqueue = alloc_workqueue(name, WQ_MEM_RECLAIM, 1);
+	if (!mp->m_data_workqueue)
+		goto out;
+
+	snprintf(name, XFS_WQ_NAME_LEN, "xfs-conv/%s", mp->m_fsname);
+	mp->m_unwritten_workqueue = alloc_workqueue(name, WQ_MEM_RECLAIM, 1);
+	if (!mp->m_unwritten_workqueue)
+		goto out_destroy_data_iodone_queue;
+
+	return 0;
+
+out_destroy_data_iodone_queue:
+	destroy_workqueue(mp->m_data_workqueue);
+out:
+	return -ENOMEM;
+#undef XFS_WQ_NAME_LEN
+}
+
+STATIC void
+xfs_destroy_mount_workqueues(
+	struct xfs_mount	*mp)
+{
+	destroy_workqueue(mp->m_data_workqueue);
+	destroy_workqueue(mp->m_unwritten_workqueue);
+}
+
 /* Catch misguided souls that try to use this interface on XFS */
 STATIC struct inode *
 xfs_fs_alloc_inode(
@@ -983,6 +1017,7 @@ xfs_fs_put_super(
 	xfs_unmountfs(mp);
 	xfs_freesb(mp);
 	xfs_icsb_destroy_counters(mp);
+	xfs_destroy_mount_workqueues(mp);
 	xfs_close_devices(mp);
 	xfs_free_fsname(mp);
 	kfree(mp);
@@ -1309,10 +1344,14 @@ xfs_fs_fill_super(
 	if (error)
 		goto out_free_fsname;
 
-	error = xfs_icsb_init_counters(mp);
+	error = xfs_init_mount_workqueues(mp);
 	if (error)
 		goto out_close_devices;
 
+	error = xfs_icsb_init_counters(mp);
+	if (error)
+		goto out_destroy_workqueues;
+
 	error = xfs_readsb(mp, flags);
 	if (error)
 		goto out_destroy_counters;
@@ -1375,6 +1414,8 @@ xfs_fs_fill_super(
 	xfs_freesb(mp);
  out_destroy_counters:
 	xfs_icsb_destroy_counters(mp);
+out_destroy_workqueues:
+	xfs_destroy_mount_workqueues(mp);
  out_close_devices:
 	xfs_close_devices(mp);
  out_free_fsname:
Index: xfs/fs/xfs/xfs_mount.h
===================================================================
--- xfs.orig/fs/xfs/xfs_mount.h	2012-01-06 16:07:32.405601416 +0100
+++ xfs/fs/xfs/xfs_mount.h	2012-01-30 11:56:50.099338392 +0100
@@ -211,6 +211,9 @@ typedef struct xfs_mount {
 	struct shrinker		m_inode_shrink;	/* inode reclaim shrinker */
 	int64_t			m_low_space[XFS_LOWSP_MAX];
 						/* low free space thresholds */
+
+	struct workqueue_struct	*m_data_workqueue;
+	struct workqueue_struct	*m_unwritten_workqueue;
 } xfs_mount_t;
 
 /*

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 2/5] xfs: do not require an ioend for new EOF calculation
  2012-02-07 18:10 [PATCH 0/5] RFC: log all inode updates Christoph Hellwig
  2012-02-07 18:10 ` [PATCH 1/5] xfs: use per-filesystem I/O completion workqueues Christoph Hellwig
@ 2012-02-07 18:10 ` Christoph Hellwig
  2012-02-16  6:58   ` Dave Chinner
  1 sibling, 1 reply; 5+ messages in thread
From: Christoph Hellwig @ 2012-02-07 18:10 UTC (permalink / raw)
  To: xfs

[-- Attachment #1: xfs-simplify-eof-calculation --]
[-- Type: text/plain, Size: 3075 bytes --]

Replace xfs_ioend_new_eof with a new inline xfs_new_eof helper that
doesn't require and ioend, and is available also outside of xfs_aops.c.

Also make the code a bit more clear by using a normal if statement
instead of a slightly misleading MIN().

Signed-off-by: Christoph Hellwig <hch@lst.de>

---
 fs/xfs/xfs_aops.c  |   24 ++++--------------------
 fs/xfs/xfs_inode.h |   14 ++++++++++++++
 2 files changed, 18 insertions(+), 20 deletions(-)

Index: xfs/fs/xfs/xfs_aops.c
===================================================================
--- xfs.orig/fs/xfs/xfs_aops.c	2011-11-30 11:05:19.260046232 +0100
+++ xfs/fs/xfs/xfs_aops.c	2011-11-30 11:06:07.983115611 +0100
@@ -99,23 +99,6 @@ xfs_destroy_ioend(
 }
 
 /*
- * If the end of the current ioend is beyond the current EOF,
- * return the new EOF value, otherwise zero.
- */
-STATIC xfs_fsize_t
-xfs_ioend_new_eof(
-	xfs_ioend_t		*ioend)
-{
-	xfs_inode_t		*ip = XFS_I(ioend->io_inode);
-	xfs_fsize_t		isize;
-	xfs_fsize_t		bsize;
-
-	bsize = ioend->io_offset + ioend->io_size;
-	isize = MIN(i_size_read(VFS_I(ip)), bsize);
-	return isize > ip->i_d.di_size ? isize : 0;
-}
-
-/*
  * Fast and loose check if this write could update the on-disk inode size.
  */
 static inline bool xfs_ioend_is_append(struct xfs_ioend *ioend)
@@ -135,7 +118,7 @@ xfs_setfilesize(
 	xfs_fsize_t		isize;
 
 	xfs_ilock(ip, XFS_ILOCK_EXCL);
-	isize = xfs_ioend_new_eof(ioend);
+	isize = xfs_new_eof(ip, ioend->io_offset + ioend->io_size);
 	if (isize) {
 		trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size);
 		ip->i_d.di_size = isize;
@@ -357,6 +340,7 @@ xfs_submit_ioend_bio(
 	xfs_ioend_t		*ioend,
 	struct bio		*bio)
 {
+	struct xfs_inode	*ip = XFS_I(ioend->io_inode);
 	atomic_inc(&ioend->io_remaining);
 	bio->bi_private = ioend;
 	bio->bi_end_io = xfs_end_bio;
@@ -365,8 +349,8 @@ xfs_submit_ioend_bio(
 	 * If the I/O is beyond EOF we mark the inode dirty immediately
 	 * but don't update the inode size until I/O completion.
 	 */
-	if (xfs_ioend_new_eof(ioend))
-		xfs_mark_inode_dirty(XFS_I(ioend->io_inode));
+	if (xfs_new_eof(ip, ioend->io_offset + ioend->io_size))
+		xfs_mark_inode_dirty(ip);
 
 	submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio);
 }
Index: xfs/fs/xfs/xfs_inode.h
===================================================================
--- xfs.orig/fs/xfs/xfs_inode.h	2011-11-30 11:05:16.670060264 +0100
+++ xfs/fs/xfs/xfs_inode.h	2011-11-30 11:06:03.769805103 +0100
@@ -275,6 +275,20 @@ static inline xfs_fsize_t XFS_ISIZE(stru
 }
 
 /*
+ * If this I/O goes past the on-disk inode size update it unless it would
+ * be past the current in-core inode size.
+ */
+static inline xfs_fsize_t
+xfs_new_eof(struct xfs_inode *ip, xfs_fsize_t new_size)
+{
+	xfs_fsize_t i_size = i_size_read(VFS_I(ip));
+
+	if (new_size > i_size)
+		new_size = i_size;
+	return new_size > ip->i_d.di_size ? new_size : 0;
+}
+
+/*
  * i_flags helper functions
  */
 static inline void

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 1/5] xfs: use per-filesystem I/O completion workqueues
  2012-02-07 18:10 ` [PATCH 1/5] xfs: use per-filesystem I/O completion workqueues Christoph Hellwig
@ 2012-02-16  6:57   ` Dave Chinner
  0 siblings, 0 replies; 5+ messages in thread
From: Dave Chinner @ 2012-02-16  6:57 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: xfs

On Tue, Feb 07, 2012 at 01:10:38PM -0500, Christoph Hellwig wrote:
> The new concurrency managed workqueues are cheap enough that we can create
> per-filesystem instead of global workqueues.  This allows us to remove the
> trylock or defer scheme on the ilock, which is not helpful once we have
> outstanding log reservations until finishing a size update.

Agreed.

And it removes some of the issues with dependencies between
filesystems like loopback mounted XFS-on-XFS filesystems, though to
fix them completely we also need per-filesystem log workqueues. Is
it worth just converting that one as well in this patch just to
remove all global work queues?


> Signed-off-by: Christoph Hellwig <hch@lst.de>

....
> +STATIC int
> +xfs_init_mount_workqueues(
> +	struct xfs_mount	*mp)
> +{
> +#define XFS_WQ_NAME_LEN		512
> +	char			name[XFS_WQ_NAME_LEN];
> +
> +	snprintf(name, XFS_WQ_NAME_LEN, "xfs-data/%s", mp->m_fsname);
> +	mp->m_data_workqueue = alloc_workqueue(name, WQ_MEM_RECLAIM, 1);
> +	if (!mp->m_data_workqueue)
> +		goto out;

As of b196be8 ("workqueue: make alloc_workqueue() take printf fmt and
args for name"), this dance is not necessary. somethign like:

	mp->m_data_workqueue = alloc_workqueue(name, WQ_MEM_RECLAIM, 1,
						"xfs-data/%s", mp->m_fsname);

will work just fine.

Also, I thin kwe want significant parallelism on this workqueue -
having an inode block on an ilock shoul dnot stop us from processing
other ioends in the same fs....

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 2/5] xfs: do not require an ioend for new EOF calculation
  2012-02-07 18:10 ` [PATCH 2/5] xfs: do not require an ioend for new EOF calculation Christoph Hellwig
@ 2012-02-16  6:58   ` Dave Chinner
  0 siblings, 0 replies; 5+ messages in thread
From: Dave Chinner @ 2012-02-16  6:58 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: xfs

On Tue, Feb 07, 2012 at 01:10:39PM -0500, Christoph Hellwig wrote:
> Replace xfs_ioend_new_eof with a new inline xfs_new_eof helper that
> doesn't require and ioend, and is available also outside of xfs_aops.c.
> 
> Also make the code a bit more clear by using a normal if statement
> instead of a slightly misleading MIN().
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Looks good.

Reviewed-by: Dave Chinner <dchinner@redhat.com>

-- 
Dave Chinner
david@fromorbit.com

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2012-02-16  6:58 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-02-07 18:10 [PATCH 0/5] RFC: log all inode updates Christoph Hellwig
2012-02-07 18:10 ` [PATCH 1/5] xfs: use per-filesystem I/O completion workqueues Christoph Hellwig
2012-02-16  6:57   ` Dave Chinner
2012-02-07 18:10 ` [PATCH 2/5] xfs: do not require an ioend for new EOF calculation Christoph Hellwig
2012-02-16  6:58   ` Dave Chinner

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.