linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Darrick J. Wong" <darrick.wong@oracle.com>
To: axboe@kernel.dk, tytso@mit.edu, david@fromorbit.com,
	jmoyer@redhat.com, bpm@sgi.com, viro@zeniv.linux.org.uk,
	jack@suse.cz
Cc: linux-fsdevel@vger.kernel.org, hch@infradead.org,
	linux-ext4@vger.kernel.org, linux-kernel@vger.kernel.org,
	xfs@oss.sgi.com, djwong+kernel@djwong.org
Subject: [PATCH 4/9] xfs: honor the O_SYNC flag for aysnchronous direct I/O requests
Date: Mon, 19 Nov 2012 23:51:14 -0800	[thread overview]
Message-ID: <20121120075114.25270.40680.stgit@blackbox.djwong.org> (raw)
In-Reply-To: <20121120074116.24645.36369.stgit@blackbox.djwong.org>

If a file is opened with O_SYNC|O_DIRECT, the drive cache does not get
flushed after the write completion for AIOs.  This patch attempts to fix
that problem by marking an I/O as requiring a cache flush in endio
processing, and then issuing the cache flush after any unwritten extent
conversion is done.

From: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Jeff Moyer <jmoyer@redhat.com>
[darrick.wong@oracle.com: Rework patch to use per-mount workqueues]
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_aops.c  |   52 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 fs/xfs/xfs_aops.h  |    1 +
 fs/xfs/xfs_mount.h |    1 +
 fs/xfs/xfs_super.c |    8 ++++++++
 4 files changed, 61 insertions(+), 1 deletion(-)


diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index e57e2da..9cebbb7 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -173,6 +173,24 @@ xfs_setfilesize(
 }
 
 /*
+ * In the case of synchronous, AIO, O_DIRECT writes, we need to flush
+ * the disk cache when the I/O is complete.
+ */
+STATIC bool
+xfs_ioend_needs_cache_flush(
+	struct xfs_ioend	*ioend)
+{
+	struct xfs_inode *ip = XFS_I(ioend->io_inode);
+	struct xfs_mount *mp = ip->i_mount;
+
+	if (!(mp->m_flags & XFS_MOUNT_BARRIER))
+		return false;
+
+	return IS_SYNC(ioend->io_inode) ||
+	       (ioend->io_iocb->ki_filp->f_flags & O_DSYNC);
+}
+
+/*
  * Schedule IO completion handling on the final put of an ioend.
  *
  * If there is no work to do we might as well call it a day and free the
@@ -189,11 +207,30 @@ xfs_finish_ioend(
 			queue_work(mp->m_unwritten_workqueue, &ioend->io_work);
 		else if (ioend->io_append_trans)
 			queue_work(mp->m_data_workqueue, &ioend->io_work);
+		else if (ioend->io_needs_fsync)
+			queue_work(mp->m_aio_blkdev_flush_wq, &ioend->io_work);
 		else
 			xfs_destroy_ioend(ioend);
 	}
 }
 
+STATIC int
+xfs_ioend_force_cache_flush(
+	xfs_ioend_t	*ioend)
+{
+	struct xfs_inode *ip = XFS_I(ioend->io_inode);
+	struct xfs_mount *mp = ip->i_mount;
+	int		err = 0;
+	int		datasync;
+
+	datasync = !IS_SYNC(ioend->io_inode) &&
+		!(ioend->io_iocb->ki_filp->f_flags & __O_SYNC);
+	err = do_xfs_file_fsync(ip, mp, datasync);
+	xfs_destroy_ioend(ioend);
+	/* do_xfs_file_fsync returns -errno. our caller expects positive. */
+	return -err;
+}
+
 /*
  * IO write completion.
  */
@@ -250,12 +287,22 @@ xfs_end_io(
 		error = xfs_setfilesize(ioend);
 		if (error)
 			ioend->io_error = -error;
+	} else if (ioend->io_needs_fsync) {
+		error = xfs_ioend_force_cache_flush(ioend);
+		if (error && ioend->io_result > 0)
+			ioend->io_error = -error;
+		ioend->io_needs_fsync = 0;
 	} else {
 		ASSERT(!xfs_ioend_is_append(ioend));
 	}
 
 done:
-	xfs_destroy_ioend(ioend);
+	/* the honoring of O_SYNC has to be done last */
+	if (ioend->io_needs_fsync) {
+		atomic_inc(&ioend->io_remaining);
+		xfs_finish_ioend(ioend);
+	} else
+		xfs_destroy_ioend(ioend);
 }
 
 /*
@@ -292,6 +339,7 @@ xfs_alloc_ioend(
 	atomic_set(&ioend->io_remaining, 1);
 	ioend->io_isasync = 0;
 	ioend->io_isdirect = 0;
+	ioend->io_needs_fsync = 0;
 	ioend->io_error = 0;
 	ioend->io_list = NULL;
 	ioend->io_type = type;
@@ -1409,6 +1457,8 @@ xfs_end_io_direct_write(
 
 	if (is_async) {
 		ioend->io_isasync = 1;
+		if (xfs_ioend_needs_cache_flush(ioend))
+			ioend->io_needs_fsync = 1;
 		xfs_finish_ioend(ioend);
 	} else {
 		xfs_finish_ioend_sync(ioend);
diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h
index c325abb..e48c7c2 100644
--- a/fs/xfs/xfs_aops.h
+++ b/fs/xfs/xfs_aops.h
@@ -47,6 +47,7 @@ typedef struct xfs_ioend {
 	atomic_t		io_remaining;	/* hold count */
 	unsigned int		io_isasync : 1;	/* needs aio_complete */
 	unsigned int		io_isdirect : 1;/* direct I/O */
+	unsigned int		io_needs_fsync : 1; /* aio+dio+o_sync */
 	struct inode		*io_inode;	/* file being written to */
 	struct buffer_head	*io_buffer_head;/* buffer linked list head */
 	struct buffer_head	*io_buffer_tail;/* buffer linked list tail */
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index deee09e..ecd3d2e 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -209,6 +209,7 @@ typedef struct xfs_mount {
 	struct workqueue_struct	*m_data_workqueue;
 	struct workqueue_struct	*m_unwritten_workqueue;
 	struct workqueue_struct	*m_cil_workqueue;
+	struct workqueue_struct *m_aio_blkdev_flush_wq;
 } xfs_mount_t;
 
 /*
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 26a09bd..b05b557 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -863,8 +863,15 @@ xfs_init_mount_workqueues(
 			WQ_MEM_RECLAIM, 0, mp->m_fsname);
 	if (!mp->m_cil_workqueue)
 		goto out_destroy_unwritten;
+
+	mp->m_aio_blkdev_flush_wq = alloc_workqueue("xfs-aio-blkdev-flush/%s",
+			WQ_MEM_RECLAIM, 0, mp->m_fsname);
+	if (!mp->m_aio_blkdev_flush_wq)
+		goto out_destroy_cil_queue;
 	return 0;
 
+out_destroy_cil_queue:
+	destroy_workqueue(mp->m_cil_workqueue);
 out_destroy_unwritten:
 	destroy_workqueue(mp->m_unwritten_workqueue);
 out_destroy_data_iodone_queue:
@@ -877,6 +884,7 @@ STATIC void
 xfs_destroy_mount_workqueues(
 	struct xfs_mount	*mp)
 {
+	destroy_workqueue(mp->m_aio_blkdev_flush_wq);
 	destroy_workqueue(mp->m_cil_workqueue);
 	destroy_workqueue(mp->m_data_workqueue);
 	destroy_workqueue(mp->m_unwritten_workqueue);



  parent reply	other threads:[~2012-11-20  8:16 UTC|newest]

Thread overview: 36+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-11-20  7:41 [PATCH v1 0/9] fs: fix up AIO+DIO+O_SYNC to actually do the sync part Darrick J. Wong
2012-11-20  7:41 ` [PATCH 1/9] vfs: Handle O_SYNC AIO DIO in generic code properly Darrick J. Wong
2012-11-21 10:08   ` Christoph Hellwig
2012-11-21 16:58     ` Jeff Moyer
2012-11-21 18:29       ` Christoph Hellwig
2012-11-21 18:38         ` Jeff Moyer
2012-11-21 21:37         ` Jan Kara
2012-11-21 23:09           ` Jeffrey Ellis
2012-11-20  7:41 ` [PATCH 2/9] ext4: honor the O_SYNC flag for aysnchronous direct I/O requests Darrick J. Wong
2012-11-20 10:07   ` Jan Kara
2012-11-20 20:02     ` Jeff Moyer
2012-11-21  0:56       ` Jan Kara
2012-11-21 14:09         ` Jeff Moyer
2012-11-21 16:54           ` Jan Kara
2012-11-20  7:41 ` [PATCH 3/9] xfs: factor out everything but the filemap_write_and_wait from xfs_file_fsync Darrick J. Wong
2012-11-20 10:47   ` Dave Chinner
2012-11-21 10:09   ` Christoph Hellwig
2012-11-21 14:10     ` Jeff Moyer
2012-11-20  7:51 ` [PATCH 7/9] ocfs2: Use generic handlers of O_SYNC AIO DIO Darrick J. Wong
2012-11-21 19:32   ` Joel Becker
2012-11-20  7:51 ` [PATCH 5/9] btrfs: " Darrick J. Wong
2012-11-20  7:51 ` [PATCH 6/9] gfs2: " Darrick J. Wong
2012-11-20  7:51 ` Darrick J. Wong [this message]
2012-11-20 10:24   ` [PATCH 4/9] xfs: honor the O_SYNC flag for aysnchronous direct I/O requests Jan Kara
2012-11-20 11:20   ` Dave Chinner
2012-11-20 19:42     ` Jeff Moyer
2012-11-20 20:08       ` Dave Chinner
2012-11-20  7:51 ` [PATCH 8/9] filemap: don't call generic_write_sync for -EIOCBQUEUED Darrick J. Wong
2012-11-20  7:51 ` [PATCH 9/9] blkdev: Fix up AIO+DIO+O_SYNC to do the sync part correctly Darrick J. Wong
2012-11-20 10:15   ` Jan Kara
2012-11-20 20:47     ` Jeff Moyer
2012-11-21  0:57       ` Jan Kara
2012-11-20  8:38 ` [PATCH v1 0/9] fs: fix up AIO+DIO+O_SYNC to actually do the sync part Darrick J. Wong
2012-11-20 14:23 ` Jeff Moyer
2012-11-20 18:57   ` Darrick J. Wong
2012-11-20 19:05     ` Jeff Moyer

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20121120075114.25270.40680.stgit@blackbox.djwong.org \
    --to=darrick.wong@oracle.com \
    --cc=axboe@kernel.dk \
    --cc=bpm@sgi.com \
    --cc=david@fromorbit.com \
    --cc=djwong+kernel@djwong.org \
    --cc=hch@infradead.org \
    --cc=jack@suse.cz \
    --cc=jmoyer@redhat.com \
    --cc=linux-ext4@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=tytso@mit.edu \
    --cc=viro@zeniv.linux.org.uk \
    --cc=xfs@oss.sgi.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).