Linux-ext4 Archive on lore.kernel.org
 help / color / Atom feed
From: Christoph Hellwig <hch@lst.de>
To: linux-xfs@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	Waiman Long <longman@redhat.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Thomas Gleixner <tglx@linutronix.de>,
	Ingo Molnar <mingo@redhat.com>, Will Deacon <will@kernel.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	linux-ext4@vger.kernel.org, cluster-devel@redhat.com
Cc: linux-kernel@vger.kernel.org, linux-mm@kvack.org
Subject: [PATCH 10/12] xfs: hold i_rwsem until AIO completes
Date: Tue, 14 Jan 2020 17:12:23 +0100
Message-ID: <20200114161225.309792-11-hch@lst.de> (raw)
In-Reply-To: <20200114161225.309792-1-hch@lst.de>

Switch ext4 from the magic i_dio_count scheme to just hold i_rwsem
until the actual I/O has completed to reduce the locking complexity
and avoid nasty bugs due to missing inode_dio_wait calls.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/scrub/bmap.c    |  1 -
 fs/xfs/xfs_bmap_util.c |  3 ---
 fs/xfs/xfs_file.c      | 47 +++++++++++++-----------------------------
 fs/xfs/xfs_icache.c    |  3 +--
 fs/xfs/xfs_ioctl.c     |  1 -
 fs/xfs/xfs_iops.c      |  5 -----
 fs/xfs/xfs_reflink.c   |  2 --
 7 files changed, 15 insertions(+), 47 deletions(-)

diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c
index fa6ea6407992..d3e4068d3189 100644
--- a/fs/xfs/scrub/bmap.c
+++ b/fs/xfs/scrub/bmap.c
@@ -45,7 +45,6 @@ xchk_setup_inode_bmap(
 	 */
 	if (S_ISREG(VFS_I(sc->ip)->i_mode) &&
 	    sc->sm->sm_type == XFS_SCRUB_TYPE_BMBTD) {
-		inode_dio_wait(VFS_I(sc->ip));
 		error = filemap_write_and_wait(VFS_I(sc->ip)->i_mapping);
 		if (error)
 			goto out;
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index e62fb5216341..a454f481107e 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -674,9 +674,6 @@ xfs_free_eofblocks(
 		if (error)
 			return error;
 
-		/* wait on dio to ensure i_size has settled */
-		inode_dio_wait(VFS_I(ip));
-
 		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0,
 				&tp);
 		if (error) {
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 0cc843a4a163..d0ee7d2932e4 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -193,9 +193,11 @@ xfs_file_dio_aio_read(
 	} else {
 		xfs_ilock(ip, XFS_IOLOCK_SHARED);
 	}
-	ret = iomap_dio_rw(iocb, to, &xfs_read_iomap_ops, NULL, 0);
-	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
 
+	ret = iomap_dio_rw(iocb, to, &xfs_read_iomap_ops, NULL,
+			   IOMAP_DIO_RWSEM_SHARED);
+	if (ret != -EIOCBQUEUED)
+		xfs_iunlock(ip, XFS_IOLOCK_SHARED);
 	return ret;
 }
 
@@ -341,15 +343,6 @@ xfs_file_aio_write_checks(
 				xfs_ilock(ip, *iolock);
 				iov_iter_reexpand(from, count);
 			}
-			/*
-			 * We now have an IO submission barrier in place, but
-			 * AIO can do EOF updates during IO completion and hence
-			 * we now need to wait for all of them to drain. Non-AIO
-			 * DIO will have drained before we are given the
-			 * XFS_IOLOCK_EXCL, and so for most cases this wait is a
-			 * no-op.
-			 */
-			inode_dio_wait(inode);
 			drained_dio = true;
 			goto restart;
 		}
@@ -469,13 +462,7 @@ static const struct iomap_dio_ops xfs_dio_write_ops = {
  * needs to do sub-block zeroing and that requires serialisation against other
  * direct IOs to the same block. In this case we need to serialise the
  * submission of the unaligned IOs so that we don't get racing block zeroing in
- * the dio layer.  To avoid the problem with aio, we also need to wait for
- * outstanding IOs to complete so that unwritten extent conversion is completed
- * before we try to map the overlapping block. This is currently implemented by
- * hitting it with a big hammer (i.e. inode_dio_wait()).
- *
- * Returns with locks held indicated by @iolock and errors indicated by
- * negative return values.
+ * the dio layer.
  */
 STATIC ssize_t
 xfs_file_dio_aio_write(
@@ -546,18 +533,21 @@ xfs_file_dio_aio_write(
 	 * xfs_file_aio_write_checks() for other reasons.
 	 */
 	if (unaligned_io) {
-		inode_dio_wait(inode);
-		dio_flags = IOMAP_DIO_SYNCHRONOUS;
-	} else if (iolock == XFS_IOLOCK_EXCL) {
-		xfs_ilock_demote(ip, XFS_IOLOCK_EXCL);
-		iolock = XFS_IOLOCK_SHARED;
+		dio_flags = IOMAP_DIO_RWSEM_EXCL | IOMAP_DIO_SYNCHRONOUS;
+	} else {
+		if (iolock == XFS_IOLOCK_EXCL) {
+			xfs_ilock_demote(ip, XFS_IOLOCK_EXCL);
+			iolock = XFS_IOLOCK_SHARED;
+		}
+		dio_flags = IOMAP_DIO_RWSEM_SHARED;
 	}
 
 	trace_xfs_file_direct_write(ip, count, iocb->ki_pos);
 	ret = iomap_dio_rw(iocb, from, &xfs_direct_write_iomap_ops,
 			   &xfs_dio_write_ops, dio_flags);
 out:
-	xfs_iunlock(ip, iolock);
+	if (ret != -EIOCBQUEUED)
+		xfs_iunlock(ip, iolock);
 
 	/*
 	 * No fallback to buffered IO on errors for XFS, direct IO will either
@@ -819,15 +809,6 @@ xfs_file_fallocate(
 	if (error)
 		goto out_unlock;
 
-	/*
-	 * Must wait for all AIO to complete before we continue as AIO can
-	 * change the file size on completion without holding any locks we
-	 * currently hold. We must do this first because AIO can update both
-	 * the on disk and in memory inode sizes, and the operations that follow
-	 * require the in-memory size to be fully up-to-date.
-	 */
-	inode_dio_wait(inode);
-
 	/*
 	 * Now AIO and DIO has drained we flush and (if necessary) invalidate
 	 * the cached range over the first operation we are about to run.
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 8dc2e5414276..9e6f32fd32f5 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -1720,8 +1720,7 @@ xfs_prep_free_cowblocks(
 	 */
 	if ((VFS_I(ip)->i_state & I_DIRTY_PAGES) ||
 	    mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY) ||
-	    mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_WRITEBACK) ||
-	    atomic_read(&VFS_I(ip)->i_dio_count))
+	    mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_WRITEBACK))
 		return false;
 
 	return true;
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 7b35d62ede9f..331453f2c4be 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -548,7 +548,6 @@ xfs_ioc_space(
 	error = xfs_break_layouts(inode, &iolock, BREAK_UNMAP);
 	if (error)
 		goto out_unlock;
-	inode_dio_wait(inode);
 
 	switch (bf->l_whence) {
 	case 0: /*SEEK_SET*/
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 8afe69ca188b..700edeccc6bf 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -893,11 +893,6 @@ xfs_setattr_size(
 	if (error)
 		return error;
 
-	/*
-	 * Wait for all direct I/O to complete.
-	 */
-	inode_dio_wait(inode);
-
 	/*
 	 * File data changes must be complete before we start the transaction to
 	 * modify the inode.  This needs to be done before joining the inode to
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index de451235c4ee..f775e60ca6f7 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -1525,8 +1525,6 @@ xfs_reflink_unshare(
 
 	trace_xfs_reflink_unshare(ip, offset, len);
 
-	inode_dio_wait(inode);
-
 	error = iomap_file_unshare(inode, offset, len,
 			&xfs_buffered_write_iomap_ops);
 	if (error)
-- 
2.24.1


  parent reply index

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-01-14 16:12 RFC: hold i_rwsem until aio completes Christoph Hellwig
2020-01-14 16:12 ` [PATCH 01/12] mm: fix a comment in sys_swapon Christoph Hellwig
2020-02-10 23:29   ` Andrew Morton
2020-02-12  7:37     ` Christoph Hellwig
2020-01-14 16:12 ` [PATCH 02/12] locking/rwsem: Exit early when held by an anonymous owner Christoph Hellwig
2020-01-14 18:17   ` Waiman Long
2020-01-14 18:25     ` Christoph Hellwig
2020-01-14 18:33       ` Waiman Long
2020-01-14 18:55       ` Waiman Long
2020-01-14 16:12 ` [PATCH 03/12] xfs: fix IOCB_NOWAIT handling in xfs_file_dio_aio_read Christoph Hellwig
2020-01-14 16:12 ` [PATCH 04/12] gfs2: move setting current->backing_dev_info Christoph Hellwig
2020-01-14 16:12 ` [PATCH 05/12] gfs2: fix O_SYNC write handling Christoph Hellwig
2020-02-06 15:31   ` [Cluster-devel] " Andreas Gruenbacher
2020-01-14 16:12 ` [PATCH 06/12] iomap: pass a flags value to iomap_dio_rw Christoph Hellwig
2020-01-14 16:12 ` [PATCH 07/12] iomap: allow holding i_rwsem until aio completion Christoph Hellwig
2020-01-14 16:12 ` [PATCH 08/12] ext4: hold i_rwsem until AIO completes Christoph Hellwig
2020-01-14 21:50   ` Theodore Y. Ts'o
2020-01-15  6:48     ` Christoph Hellwig
2020-01-14 16:12 ` [PATCH 09/12] gfs2: " Christoph Hellwig
2020-01-14 16:12 ` Christoph Hellwig [this message]
2020-01-14 16:12 ` [PATCH 11/12] xfs: don't set IOMAP_DIO_SYNCHRONOUS for unaligned I/O Christoph Hellwig
2020-01-14 16:12 ` [PATCH 12/12] iomap: remove the inode_dio_begin/end calls Christoph Hellwig
2020-01-14 18:47 ` RFC: hold i_rwsem until aio completes Matthew Wilcox
2020-01-15  6:54   ` Christoph Hellwig
2020-01-14 19:27 ` Jason Gunthorpe
2020-01-15  6:56   ` Christoph Hellwig
2020-01-15 13:24     ` Jason Gunthorpe
2020-01-15 14:33       ` Peter Zijlstra
2020-01-15 14:49         ` Jason Gunthorpe
2020-01-15 19:03           ` Waiman Long
2020-01-15 19:07             ` Christoph Hellwig
2020-01-18 22:40         ` Matthew Wilcox
2020-01-15 15:36       ` Christoph Hellwig
2020-01-15 16:26         ` Jason Gunthorpe
2020-01-16 14:00 ` Jan Kara
2020-02-03 17:44   ` Christoph Hellwig
2020-01-18  9:28 ` Dave Chinner
2020-02-03 17:46   ` Christoph Hellwig
2020-02-03 23:02     ` Dave Chinner

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200114161225.309792-11-hch@lst.de \
    --to=hch@lst.de \
    --cc=akpm@linux-foundation.org \
    --cc=cluster-devel@redhat.com \
    --cc=linux-ext4@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-xfs@vger.kernel.org \
    --cc=longman@redhat.com \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    --cc=tglx@linutronix.de \
    --cc=will@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Linux-ext4 Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-ext4/0 linux-ext4/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-ext4 linux-ext4/ https://lore.kernel.org/linux-ext4 \
		linux-ext4@vger.kernel.org
	public-inbox-index linux-ext4

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-ext4


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git