From: Christoph Hellwig <hch@lst.de>
To: xfs@oss.sgi.com
Cc: linux-nvdimm@ml01.01.org, linux-fsdevel@vger.kernel.org
Subject: [PATCH 7/8] xfs: split direct I/O and DAX path
Date: Wed, 22 Jun 2016 17:27:15 +0200 [thread overview]
Message-ID: <1466609236-23801-8-git-send-email-hch@lst.de> (raw)
In-Reply-To: <1466609236-23801-1-git-send-email-hch@lst.de>
So far the DAX code overloaded the direct I/O code path. There is very little
in common between the two, and untangling them allows to clean up both variants.
As a ѕide effect we also get separate trace points for both I/O types.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
fs/xfs/xfs_file.c | 139 ++++++++++++++++++++++++++++++++++++++++++-----------
fs/xfs/xfs_trace.h | 2 +
2 files changed, 112 insertions(+), 29 deletions(-)
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 24b267d..0e74325 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -262,13 +262,11 @@ xfs_file_dio_aio_read(
else
target = ip->i_mount->m_ddev_targp;
- if (!IS_DAX(inode)) {
- /* DIO must be aligned to device logical sector size */
- if ((iocb->ki_pos | count) & target->bt_logical_sectormask) {
- if (iocb->ki_pos == isize)
- return 0;
- return -EINVAL;
- }
+ /* DIO must be aligned to device logical sector size */
+ if ((iocb->ki_pos | count) & target->bt_logical_sectormask) {
+ if (iocb->ki_pos == isize)
+ return 0;
+ return -EINVAL;
}
/*
@@ -317,13 +315,37 @@ xfs_file_dio_aio_read(
}
data = *to;
- if (IS_DAX(inode)) {
- ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct,
- NULL, 0);
- } else {
- ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data,
- xfs_get_blocks_direct, NULL, NULL, 0);
+ ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data,
+ xfs_get_blocks_direct, NULL, NULL, 0);
+ if (ret > 0) {
+ iocb->ki_pos += ret;
+ iov_iter_advance(to, ret);
}
+ xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
+
+ file_accessed(iocb->ki_filp);
+ return ret;
+}
+
+STATIC ssize_t
+xfs_file_dax_read(
+ struct kiocb *iocb,
+ struct iov_iter *to)
+{
+ struct address_space *mapping = iocb->ki_filp->f_mapping;
+ struct inode *inode = mapping->host;
+ struct xfs_inode *ip = XFS_I(inode);
+ struct iov_iter data = *to;
+ size_t count = iov_iter_count(to);
+ ssize_t ret = 0;
+
+ trace_xfs_file_dax_read(ip, count, iocb->ki_pos);
+
+ if (!count)
+ return 0; /* skip atime */
+
+ xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
+ ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct, NULL, 0);
if (ret > 0) {
iocb->ki_pos += ret;
iov_iter_advance(to, ret);
@@ -356,7 +378,8 @@ xfs_file_read_iter(
struct kiocb *iocb,
struct iov_iter *to)
{
- struct xfs_mount *mp = XFS_I(file_inode(iocb->ki_filp))->i_mount;
+ struct inode *inode = file_inode(iocb->ki_filp);
+ struct xfs_mount *mp = XFS_I(inode)->i_mount;
ssize_t ret = 0;
XFS_STATS_INC(mp, xs_read_calls);
@@ -364,7 +387,9 @@ xfs_file_read_iter(
if (XFS_FORCED_SHUTDOWN(mp))
return -EIO;
- if (iocb->ki_flags & IOCB_DIRECT)
+ if (IS_DAX(inode))
+ ret = xfs_file_dax_read(iocb, to);
+ else if (iocb->ki_flags & IOCB_DIRECT)
ret = xfs_file_dio_aio_read(iocb, to);
else
ret = xfs_file_buffered_aio_read(iocb, to);
@@ -586,8 +611,7 @@ xfs_file_dio_aio_write(
mp->m_rtdev_targp : mp->m_ddev_targp;
/* DIO must be aligned to device logical sector size */
- if (!IS_DAX(inode) &&
- ((iocb->ki_pos | count) & target->bt_logical_sectormask))
+ if ((iocb->ki_pos | count) & target->bt_logical_sectormask)
return -EINVAL;
/* "unaligned" here means not aligned to a filesystem block */
@@ -656,14 +680,9 @@ xfs_file_dio_aio_write(
trace_xfs_file_direct_write(ip, count, iocb->ki_pos);
data = *from;
- if (IS_DAX(inode)) {
- ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct,
- xfs_end_io_direct_write, 0);
- } else {
- ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data,
- xfs_get_blocks_direct, xfs_end_io_direct_write,
- NULL, DIO_ASYNC_EXTEND);
- }
+ ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data,
+ xfs_get_blocks_direct, xfs_end_io_direct_write,
+ NULL, DIO_ASYNC_EXTEND);
/* see generic_file_direct_write() for why this is necessary */
if (mapping->nrpages) {
@@ -680,10 +699,70 @@ out:
xfs_rw_iunlock(ip, iolock);
/*
- * No fallback to buffered IO on errors for XFS. DAX can result in
- * partial writes, but direct IO will either complete fully or fail.
+ * No fallback to buffered IO on errors for XFS, direct IO will either
+ * complete fully or fail.
+ */
+ ASSERT(ret < 0 || ret == count);
+ return ret;
+}
+
+STATIC ssize_t
+xfs_file_dax_write(
+ struct kiocb *iocb,
+ struct iov_iter *from)
+{
+ struct address_space *mapping = iocb->ki_filp->f_mapping;
+ struct inode *inode = mapping->host;
+ struct xfs_inode *ip = XFS_I(inode);
+ struct xfs_mount *mp = ip->i_mount;
+ ssize_t ret = 0;
+ int unaligned_io = 0;
+ int iolock;
+ struct iov_iter data;
+
+ /* "unaligned" here means not aligned to a filesystem block */
+ if ((iocb->ki_pos & mp->m_blockmask) ||
+ ((iocb->ki_pos + iov_iter_count(from)) & mp->m_blockmask)) {
+ unaligned_io = 1;
+ iolock = XFS_IOLOCK_EXCL;
+ } else if (mapping->nrpages) {
+ iolock = XFS_IOLOCK_EXCL;
+ } else {
+ iolock = XFS_IOLOCK_SHARED;
+ }
+ xfs_rw_ilock(ip, iolock);
+
+ ret = xfs_file_aio_write_checks(iocb, from, &iolock);
+ if (ret)
+ goto out;
+
+ /*
+ * Yes, even DAX files can have page cache attached to them: A zeroed
+ * page is inserted into the pagecache when we have to serve a write
+ * fault on a hole. It should never be dirtied and can simply be
+ * dropped from the pagecache once we get real data for the page.
*/
- ASSERT(ret < 0 || ret == count || IS_DAX(VFS_I(ip)));
+ if (mapping->nrpages) {
+ ret = invalidate_inode_pages2(mapping);
+ WARN_ON_ONCE(ret);
+ }
+
+ if (iolock == XFS_IOLOCK_EXCL && !unaligned_io) {
+ xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
+ iolock = XFS_IOLOCK_SHARED;
+ }
+
+ trace_xfs_file_dax_write(ip, iov_iter_count(from), iocb->ki_pos);
+
+ data = *from;
+ ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct,
+ xfs_end_io_direct_write, 0);
+ if (ret > 0) {
+ iocb->ki_pos += ret;
+ iov_iter_advance(from, ret);
+ }
+out:
+ xfs_rw_iunlock(ip, iolock);
return ret;
}
@@ -765,7 +844,9 @@ xfs_file_write_iter(
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
return -EIO;
- if ((iocb->ki_flags & IOCB_DIRECT) || IS_DAX(inode))
+ if (IS_DAX(inode))
+ ret = xfs_file_dax_write(iocb, from);
+ else if (iocb->ki_flags & IOCB_DIRECT)
ret = xfs_file_dio_aio_write(iocb, from);
else
ret = xfs_file_buffered_aio_write(iocb, from);
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 2504f94..1451690 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -1165,8 +1165,10 @@ DEFINE_EVENT(xfs_file_class, name, \
TP_ARGS(ip, count, offset))
DEFINE_RW_EVENT(xfs_file_buffered_read);
DEFINE_RW_EVENT(xfs_file_direct_read);
+DEFINE_RW_EVENT(xfs_file_dax_read);
DEFINE_RW_EVENT(xfs_file_buffered_write);
DEFINE_RW_EVENT(xfs_file_direct_write);
+DEFINE_RW_EVENT(xfs_file_dax_write);
DEFINE_RW_EVENT(xfs_file_splice_read);
DECLARE_EVENT_CLASS(xfs_page_class,
--
2.1.4
next prev parent reply other threads:[~2016-06-22 15:44 UTC|newest]
Thread overview: 27+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-06-22 15:27 xfs: untangle the direct I/O and DAX path, fix DAX locking Christoph Hellwig
2016-06-22 15:27 ` [PATCH 1/8] xfs: don't pass ioflags around in the ioctl path Christoph Hellwig
2016-06-22 15:27 ` [PATCH 2/8] xfs: kill ioflags Christoph Hellwig
2016-06-22 15:27 ` [PATCH 3/8] xfs: remove s_maxbytes enforcement in xfs_file_read_iter Christoph Hellwig
2016-06-22 15:27 ` [PATCH 4/8] xfs: split xfs_file_read_iter into buffered and direct I/O helpers Christoph Hellwig
2016-06-22 15:27 ` [PATCH 5/8] xfs: stop using generic_file_read_iter for direct I/O Christoph Hellwig
2016-06-22 15:27 ` [PATCH 6/8] xfs: direct calls in the direct I/O path Christoph Hellwig
2016-06-22 15:27 ` Christoph Hellwig [this message]
2016-09-29 2:53 ` [PATCH 7/8] xfs: split direct I/O and DAX path Darrick J. Wong
2016-09-29 8:38 ` aio completions vs file_accessed race, was: " Christoph Hellwig
2016-09-29 20:18 ` Christoph Hellwig
2016-09-29 20:18 ` Christoph Hellwig
2016-09-29 20:33 ` Darrick J. Wong
2016-06-22 15:27 ` [PATCH 8/8] xfs: fix locking for DAX writes Christoph Hellwig
2016-06-23 14:22 ` Boaz Harrosh
2016-06-23 23:24 ` xfs: untangle the direct I/O and DAX path, fix DAX locking Dave Chinner
2016-06-24 1:14 ` Dan Williams
2016-06-24 7:13 ` Dave Chinner
2016-06-24 7:31 ` Christoph Hellwig
2016-06-24 7:26 ` Christoph Hellwig
2016-06-24 23:00 ` Dave Chinner
2016-06-28 13:10 ` Christoph Hellwig
2016-06-28 13:27 ` Boaz Harrosh
2016-06-28 13:39 ` Christoph Hellwig
2016-06-28 13:56 ` Boaz Harrosh
2016-06-28 15:39 ` Christoph Hellwig
2016-06-29 12:23 ` Boaz Harrosh
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1466609236-23801-8-git-send-email-hch@lst.de \
--to=hch@lst.de \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-nvdimm@ml01.01.org \
--cc=xfs@oss.sgi.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).