From: Christoph Hellwig <hch@lst.de> To: linux-xfs@vger.kernel.org, linux-fsdevel@vger.kernel.org, Waiman Long <longman@redhat.com>, Peter Zijlstra <peterz@infradead.org>, Thomas Gleixner <tglx@linutronix.de>, Ingo Molnar <mingo@redhat.com>, Will Deacon <will@kernel.org>, Andrew Morton <akpm@linux-foundation.org>, linux-ext4@vger.kernel.org, cluster-devel@redhat.com Cc: linux-kernel@vger.kernel.org, linux-mm@kvack.org Subject: [PATCH 08/12] ext4: hold i_rwsem until AIO completes Date: Tue, 14 Jan 2020 17:12:21 +0100 Message-ID: <20200114161225.309792-9-hch@lst.de> (raw) In-Reply-To: <20200114161225.309792-1-hch@lst.de> Switch ext4 from the magic i_dio_count scheme to just hold i_rwsem until the actual I/O has completed to reduce the locking complexity and avoid nasty bugs due to missing inode_dio_wait calls. Signed-off-by: Christoph Hellwig <hch@lst.de> --- fs/ext4/extents.c | 12 ------------ fs/ext4/file.c | 21 +++++++++++++-------- fs/ext4/inode.c | 11 ----------- fs/ext4/ioctl.c | 5 ----- fs/ext4/move_extent.c | 4 ---- 5 files changed, 13 insertions(+), 40 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 0e8708b77da6..b6aa2d249b30 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4777,9 +4777,6 @@ static long ext4_zero_range(struct file *file, loff_t offset, if (mode & FALLOC_FL_KEEP_SIZE) flags |= EXT4_GET_BLOCKS_KEEP_SIZE; - /* Wait all existing dio workers, newcomers will block on i_mutex */ - inode_dio_wait(inode); - /* Preallocate the range including the unaligned edges */ if (partial_begin || partial_end) { ret = ext4_alloc_file_blocks(file, @@ -4949,9 +4946,6 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) goto out; } - /* Wait all existing dio workers, newcomers will block on i_mutex */ - inode_dio_wait(inode); - ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, flags); if (ret) goto out; @@ -5525,9 +5519,6 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) goto out_mutex; } - /* Wait for existing dio to complete */ - inode_dio_wait(inode); - /* * Prevent page faults from reinstantiating pages we have released from * page cache. @@ -5678,9 +5669,6 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len) goto out_mutex; } - /* Wait for existing dio to complete */ - inode_dio_wait(inode); - /* * Prevent page faults from reinstantiating pages we have released from * page cache. diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 08b603d0c638..b3410a3ede27 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -74,9 +74,10 @@ static ssize_t ext4_dio_read_iter(struct kiocb *iocb, struct iov_iter *to) return generic_file_read_iter(iocb, to); } - ret = iomap_dio_rw(iocb, to, &ext4_iomap_ops, NULL, 0); - inode_unlock_shared(inode); - + ret = iomap_dio_rw(iocb, to, &ext4_iomap_ops, NULL, + IOMAP_DIO_RWSEM_SHARED); + if (ret != -EIOCBQUEUED) + inode_unlock_shared(inode); file_accessed(iocb->ki_filp); return ret; } @@ -405,7 +406,6 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from) !is_sync_kiocb(iocb) && ext4_unaligned_aio(inode, from, offset)) { unaligned_aio = true; dio_flags |= IOMAP_DIO_SYNCHRONOUS; - inode_dio_wait(inode); } /* @@ -416,7 +416,10 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from) if (!unaligned_aio && ext4_overwrite_io(inode, offset, count) && ext4_should_dioread_nolock(inode)) { overwrite = true; + dio_flags |= IOMAP_DIO_RWSEM_SHARED; downgrade_write(&inode->i_rwsem); + } else { + dio_flags |= IOMAP_DIO_RWSEM_EXCL; } if (offset + count > EXT4_I(inode)->i_disksize) { @@ -444,10 +447,12 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from) ret = ext4_handle_inode_extension(inode, offset, ret, count); out: - if (overwrite) - inode_unlock_shared(inode); - else - inode_unlock(inode); + if (ret != -EIOCBQUEUED) { + if (overwrite) + inode_unlock_shared(inode); + else + inode_unlock(inode); + } if (ret >= 0 && iov_iter_count(from)) { ssize_t err; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 629a25d999f0..e2dac0727ab0 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3965,9 +3965,6 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) } - /* Wait all existing dio workers, newcomers will block on i_mutex */ - inode_dio_wait(inode); - /* * Prevent page faults from reinstantiating pages we have released from * page cache. @@ -5263,11 +5260,6 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) if (error) goto err_out; } - /* - * Blocks are going to be removed from the inode. Wait - * for dio in flight. - */ - inode_dio_wait(inode); } down_write(&EXT4_I(inode)->i_mmap_sem); @@ -5798,9 +5790,6 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) if (is_journal_aborted(journal)) return -EROFS; - /* Wait for all existing dio workers */ - inode_dio_wait(inode); - /* * Before flushing the journal and switching inode's aops, we have * to flush all dirty data the inode has. There can be outstanding diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index e8870fff8224..99d21d81074f 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -153,10 +153,6 @@ static long swap_inode_boot_loader(struct super_block *sb, if (err) goto err_out; - /* Wait for all existing dio workers */ - inode_dio_wait(inode); - inode_dio_wait(inode_bl); - truncate_inode_pages(&inode->i_data, 0); truncate_inode_pages(&inode_bl->i_data, 0); @@ -364,7 +360,6 @@ static int ext4_ioctl_setflags(struct inode *inode, */ if (S_ISREG(inode->i_mode) && !IS_IMMUTABLE(inode) && (flags & EXT4_IMMUTABLE_FL)) { - inode_dio_wait(inode); err = filemap_write_and_wait(inode->i_mapping); if (err) goto flags_out; diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 30ce3dc69378..20240808569f 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -602,10 +602,6 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk, /* Protect orig and donor inodes against a truncate */ lock_two_nondirectories(orig_inode, donor_inode); - /* Wait for all existing dio workers */ - inode_dio_wait(orig_inode); - inode_dio_wait(donor_inode); - /* Protect extent tree against block allocations via delalloc */ ext4_double_down_write_data_sem(orig_inode, donor_inode); /* Check the filesystem environment whether move_extent can be done */ -- 2.24.1
next prev parent reply index Thread overview: 39+ messages / expand[flat|nested] mbox.gz Atom feed top 2020-01-14 16:12 RFC: hold i_rwsem until aio completes Christoph Hellwig 2020-01-14 16:12 ` [PATCH 01/12] mm: fix a comment in sys_swapon Christoph Hellwig 2020-02-10 23:29 ` Andrew Morton 2020-02-12 7:37 ` Christoph Hellwig 2020-01-14 16:12 ` [PATCH 02/12] locking/rwsem: Exit early when held by an anonymous owner Christoph Hellwig 2020-01-14 18:17 ` Waiman Long 2020-01-14 18:25 ` Christoph Hellwig 2020-01-14 18:33 ` Waiman Long 2020-01-14 18:55 ` Waiman Long 2020-01-14 16:12 ` [PATCH 03/12] xfs: fix IOCB_NOWAIT handling in xfs_file_dio_aio_read Christoph Hellwig 2020-01-14 16:12 ` [PATCH 04/12] gfs2: move setting current->backing_dev_info Christoph Hellwig 2020-01-14 16:12 ` [PATCH 05/12] gfs2: fix O_SYNC write handling Christoph Hellwig 2020-02-06 15:31 ` [Cluster-devel] " Andreas Gruenbacher 2020-01-14 16:12 ` [PATCH 06/12] iomap: pass a flags value to iomap_dio_rw Christoph Hellwig 2020-01-14 16:12 ` [PATCH 07/12] iomap: allow holding i_rwsem until aio completion Christoph Hellwig 2020-01-14 16:12 ` Christoph Hellwig [this message] 2020-01-14 21:50 ` [PATCH 08/12] ext4: hold i_rwsem until AIO completes Theodore Y. Ts'o 2020-01-15 6:48 ` Christoph Hellwig 2020-01-14 16:12 ` [PATCH 09/12] gfs2: " Christoph Hellwig 2020-01-14 16:12 ` [PATCH 10/12] xfs: " Christoph Hellwig 2020-01-14 16:12 ` [PATCH 11/12] xfs: don't set IOMAP_DIO_SYNCHRONOUS for unaligned I/O Christoph Hellwig 2020-01-14 16:12 ` [PATCH 12/12] iomap: remove the inode_dio_begin/end calls Christoph Hellwig 2020-01-14 18:47 ` RFC: hold i_rwsem until aio completes Matthew Wilcox 2020-01-15 6:54 ` Christoph Hellwig 2020-01-14 19:27 ` Jason Gunthorpe 2020-01-15 6:56 ` Christoph Hellwig 2020-01-15 13:24 ` Jason Gunthorpe 2020-01-15 14:33 ` Peter Zijlstra 2020-01-15 14:49 ` Jason Gunthorpe 2020-01-15 19:03 ` Waiman Long 2020-01-15 19:07 ` Christoph Hellwig 2020-01-18 22:40 ` Matthew Wilcox 2020-01-15 15:36 ` Christoph Hellwig 2020-01-15 16:26 ` Jason Gunthorpe 2020-01-16 14:00 ` Jan Kara 2020-02-03 17:44 ` Christoph Hellwig 2020-01-18 9:28 ` Dave Chinner 2020-02-03 17:46 ` Christoph Hellwig 2020-02-03 23:02 ` Dave Chinner
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20200114161225.309792-9-hch@lst.de \ --to=hch@lst.de \ --cc=akpm@linux-foundation.org \ --cc=cluster-devel@redhat.com \ --cc=linux-ext4@vger.kernel.org \ --cc=linux-fsdevel@vger.kernel.org \ --cc=linux-kernel@vger.kernel.org \ --cc=linux-mm@kvack.org \ --cc=linux-xfs@vger.kernel.org \ --cc=longman@redhat.com \ --cc=mingo@redhat.com \ --cc=peterz@infradead.org \ --cc=tglx@linutronix.de \ --cc=will@kernel.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
Linux-XFS Archive on lore.kernel.org Archives are clonable: git clone --mirror https://lore.kernel.org/linux-xfs/0 linux-xfs/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 linux-xfs linux-xfs/ https://lore.kernel.org/linux-xfs \ linux-xfs@vger.kernel.org public-inbox-index linux-xfs Example config snippet for mirrors Newsgroup available over NNTP: nntp://nntp.lore.kernel.org/org.kernel.vger.linux-xfs AGPL code for this site: git clone https://public-inbox.org/public-inbox.git