Linux-XFS Archive on lore.kernel.org
 help / color / Atom feed
From: Christoph Hellwig <hch@lst.de>
To: linux-xfs@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	Waiman Long <longman@redhat.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Thomas Gleixner <tglx@linutronix.de>,
	Ingo Molnar <mingo@redhat.com>, Will Deacon <will@kernel.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	linux-ext4@vger.kernel.org, cluster-devel@redhat.com
Cc: linux-kernel@vger.kernel.org, linux-mm@kvack.org
Subject: [PATCH 08/12] ext4: hold i_rwsem until AIO completes
Date: Tue, 14 Jan 2020 17:12:21 +0100
Message-ID: <20200114161225.309792-9-hch@lst.de> (raw)
In-Reply-To: <20200114161225.309792-1-hch@lst.de>

Switch ext4 from the magic i_dio_count scheme to just hold i_rwsem
until the actual I/O has completed to reduce the locking complexity
and avoid nasty bugs due to missing inode_dio_wait calls.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/ext4/extents.c     | 12 ------------
 fs/ext4/file.c        | 21 +++++++++++++--------
 fs/ext4/inode.c       | 11 -----------
 fs/ext4/ioctl.c       |  5 -----
 fs/ext4/move_extent.c |  4 ----
 5 files changed, 13 insertions(+), 40 deletions(-)

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 0e8708b77da6..b6aa2d249b30 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4777,9 +4777,6 @@ static long ext4_zero_range(struct file *file, loff_t offset,
 	if (mode & FALLOC_FL_KEEP_SIZE)
 		flags |= EXT4_GET_BLOCKS_KEEP_SIZE;
 
-	/* Wait all existing dio workers, newcomers will block on i_mutex */
-	inode_dio_wait(inode);
-
 	/* Preallocate the range including the unaligned edges */
 	if (partial_begin || partial_end) {
 		ret = ext4_alloc_file_blocks(file,
@@ -4949,9 +4946,6 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
 			goto out;
 	}
 
-	/* Wait all existing dio workers, newcomers will block on i_mutex */
-	inode_dio_wait(inode);
-
 	ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, flags);
 	if (ret)
 		goto out;
@@ -5525,9 +5519,6 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
 		goto out_mutex;
 	}
 
-	/* Wait for existing dio to complete */
-	inode_dio_wait(inode);
-
 	/*
 	 * Prevent page faults from reinstantiating pages we have released from
 	 * page cache.
@@ -5678,9 +5669,6 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
 		goto out_mutex;
 	}
 
-	/* Wait for existing dio to complete */
-	inode_dio_wait(inode);
-
 	/*
 	 * Prevent page faults from reinstantiating pages we have released from
 	 * page cache.
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 08b603d0c638..b3410a3ede27 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -74,9 +74,10 @@ static ssize_t ext4_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
 		return generic_file_read_iter(iocb, to);
 	}
 
-	ret = iomap_dio_rw(iocb, to, &ext4_iomap_ops, NULL, 0);
-	inode_unlock_shared(inode);
-
+	ret = iomap_dio_rw(iocb, to, &ext4_iomap_ops, NULL,
+			   IOMAP_DIO_RWSEM_SHARED);
+	if (ret != -EIOCBQUEUED)
+		inode_unlock_shared(inode);
 	file_accessed(iocb->ki_filp);
 	return ret;
 }
@@ -405,7 +406,6 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	    !is_sync_kiocb(iocb) && ext4_unaligned_aio(inode, from, offset)) {
 		unaligned_aio = true;
 		dio_flags |= IOMAP_DIO_SYNCHRONOUS;
-		inode_dio_wait(inode);
 	}
 
 	/*
@@ -416,7 +416,10 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	if (!unaligned_aio && ext4_overwrite_io(inode, offset, count) &&
 	    ext4_should_dioread_nolock(inode)) {
 		overwrite = true;
+		dio_flags |= IOMAP_DIO_RWSEM_SHARED;
 		downgrade_write(&inode->i_rwsem);
+	} else {
+		dio_flags |= IOMAP_DIO_RWSEM_EXCL;
 	}
 
 	if (offset + count > EXT4_I(inode)->i_disksize) {
@@ -444,10 +447,12 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
 		ret = ext4_handle_inode_extension(inode, offset, ret, count);
 
 out:
-	if (overwrite)
-		inode_unlock_shared(inode);
-	else
-		inode_unlock(inode);
+	if (ret != -EIOCBQUEUED) {
+		if (overwrite)
+			inode_unlock_shared(inode);
+		else
+			inode_unlock(inode);
+	}
 
 	if (ret >= 0 && iov_iter_count(from)) {
 		ssize_t err;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 629a25d999f0..e2dac0727ab0 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3965,9 +3965,6 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
 
 	}
 
-	/* Wait all existing dio workers, newcomers will block on i_mutex */
-	inode_dio_wait(inode);
-
 	/*
 	 * Prevent page faults from reinstantiating pages we have released from
 	 * page cache.
@@ -5263,11 +5260,6 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
 				if (error)
 					goto err_out;
 			}
-			/*
-			 * Blocks are going to be removed from the inode. Wait
-			 * for dio in flight.
-			 */
-			inode_dio_wait(inode);
 		}
 
 		down_write(&EXT4_I(inode)->i_mmap_sem);
@@ -5798,9 +5790,6 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
 	if (is_journal_aborted(journal))
 		return -EROFS;
 
-	/* Wait for all existing dio workers */
-	inode_dio_wait(inode);
-
 	/*
 	 * Before flushing the journal and switching inode's aops, we have
 	 * to flush all dirty data the inode has. There can be outstanding
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index e8870fff8224..99d21d81074f 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -153,10 +153,6 @@ static long swap_inode_boot_loader(struct super_block *sb,
 	if (err)
 		goto err_out;
 
-	/* Wait for all existing dio workers */
-	inode_dio_wait(inode);
-	inode_dio_wait(inode_bl);
-
 	truncate_inode_pages(&inode->i_data, 0);
 	truncate_inode_pages(&inode_bl->i_data, 0);
 
@@ -364,7 +360,6 @@ static int ext4_ioctl_setflags(struct inode *inode,
 	 */
 	if (S_ISREG(inode->i_mode) && !IS_IMMUTABLE(inode) &&
 	    (flags & EXT4_IMMUTABLE_FL)) {
-		inode_dio_wait(inode);
 		err = filemap_write_and_wait(inode->i_mapping);
 		if (err)
 			goto flags_out;
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 30ce3dc69378..20240808569f 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -602,10 +602,6 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
 	/* Protect orig and donor inodes against a truncate */
 	lock_two_nondirectories(orig_inode, donor_inode);
 
-	/* Wait for all existing dio workers */
-	inode_dio_wait(orig_inode);
-	inode_dio_wait(donor_inode);
-
 	/* Protect extent tree against block allocations via delalloc */
 	ext4_double_down_write_data_sem(orig_inode, donor_inode);
 	/* Check the filesystem environment whether move_extent can be done */
-- 
2.24.1


  parent reply index

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-01-14 16:12 RFC: hold i_rwsem until aio completes Christoph Hellwig
2020-01-14 16:12 ` [PATCH 01/12] mm: fix a comment in sys_swapon Christoph Hellwig
2020-02-10 23:29   ` Andrew Morton
2020-02-12  7:37     ` Christoph Hellwig
2020-01-14 16:12 ` [PATCH 02/12] locking/rwsem: Exit early when held by an anonymous owner Christoph Hellwig
2020-01-14 18:17   ` Waiman Long
2020-01-14 18:25     ` Christoph Hellwig
2020-01-14 18:33       ` Waiman Long
2020-01-14 18:55       ` Waiman Long
2020-01-14 16:12 ` [PATCH 03/12] xfs: fix IOCB_NOWAIT handling in xfs_file_dio_aio_read Christoph Hellwig
2020-01-14 16:12 ` [PATCH 04/12] gfs2: move setting current->backing_dev_info Christoph Hellwig
2020-01-14 16:12 ` [PATCH 05/12] gfs2: fix O_SYNC write handling Christoph Hellwig
2020-02-06 15:31   ` [Cluster-devel] " Andreas Gruenbacher
2020-01-14 16:12 ` [PATCH 06/12] iomap: pass a flags value to iomap_dio_rw Christoph Hellwig
2020-01-14 16:12 ` [PATCH 07/12] iomap: allow holding i_rwsem until aio completion Christoph Hellwig
2020-01-14 16:12 ` Christoph Hellwig [this message]
2020-01-14 21:50   ` [PATCH 08/12] ext4: hold i_rwsem until AIO completes Theodore Y. Ts'o
2020-01-15  6:48     ` Christoph Hellwig
2020-01-14 16:12 ` [PATCH 09/12] gfs2: " Christoph Hellwig
2020-01-14 16:12 ` [PATCH 10/12] xfs: " Christoph Hellwig
2020-01-14 16:12 ` [PATCH 11/12] xfs: don't set IOMAP_DIO_SYNCHRONOUS for unaligned I/O Christoph Hellwig
2020-01-14 16:12 ` [PATCH 12/12] iomap: remove the inode_dio_begin/end calls Christoph Hellwig
2020-01-14 18:47 ` RFC: hold i_rwsem until aio completes Matthew Wilcox
2020-01-15  6:54   ` Christoph Hellwig
2020-01-14 19:27 ` Jason Gunthorpe
2020-01-15  6:56   ` Christoph Hellwig
2020-01-15 13:24     ` Jason Gunthorpe
2020-01-15 14:33       ` Peter Zijlstra
2020-01-15 14:49         ` Jason Gunthorpe
2020-01-15 19:03           ` Waiman Long
2020-01-15 19:07             ` Christoph Hellwig
2020-01-18 22:40         ` Matthew Wilcox
2020-01-15 15:36       ` Christoph Hellwig
2020-01-15 16:26         ` Jason Gunthorpe
2020-01-16 14:00 ` Jan Kara
2020-02-03 17:44   ` Christoph Hellwig
2020-01-18  9:28 ` Dave Chinner
2020-02-03 17:46   ` Christoph Hellwig
2020-02-03 23:02     ` Dave Chinner

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200114161225.309792-9-hch@lst.de \
    --to=hch@lst.de \
    --cc=akpm@linux-foundation.org \
    --cc=cluster-devel@redhat.com \
    --cc=linux-ext4@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-xfs@vger.kernel.org \
    --cc=longman@redhat.com \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    --cc=tglx@linutronix.de \
    --cc=will@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Linux-XFS Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-xfs/0 linux-xfs/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-xfs linux-xfs/ https://lore.kernel.org/linux-xfs \
		linux-xfs@vger.kernel.org
	public-inbox-index linux-xfs

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-xfs


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git