linux-btrfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Goldwyn Rodrigues <rgoldwyn@suse.de>
To: linux-fsdevel@vger.kernel.org
Cc: linux-btrfs@vger.kernel.org, david@fromorbit.com, hch@lst.de,
	johannes.thumshirn@wdc.com, dsterba@suse.com,
	darrick.wong@oracle.com, josef@toxicpanda.com,
	Goldwyn Rodrigues <rgoldwyn@suse.com>
Subject: [PATCH 05/14] btrfs: split btrfs_direct_IO to read and write
Date: Thu, 24 Sep 2020 11:39:12 -0500	[thread overview]
Message-ID: <20200924163922.2547-6-rgoldwyn@suse.de> (raw)
In-Reply-To: <20200924163922.2547-1-rgoldwyn@suse.de>

From: Goldwyn Rodrigues <rgoldwyn@suse.com>

The read and write DIO don't have anything in common except for the
call to iomap_dio_rw. Extract the write call into a new function to get
rid of conditional statements for direct write.

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 fs/btrfs/ctree.h |  5 ++-
 fs/btrfs/file.c  | 98 +++++++++++++++++++++++++++++++++++++++++++-----
 fs/btrfs/inode.c | 91 ++------------------------------------------
 3 files changed, 95 insertions(+), 99 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index cd644c755142..b47a8dcff028 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -28,6 +28,7 @@
 #include <linux/dynamic_debug.h>
 #include <linux/refcount.h>
 #include <linux/crc32c.h>
+#include <linux/iomap.h>
 #include "extent-io-tree.h"
 #include "extent_io.h"
 #include "extent_map.h"
@@ -3048,7 +3049,9 @@ int btrfs_writepage_cow_fixup(struct page *page, u64 start, u64 end);
 void btrfs_writepage_endio_finish_ordered(struct page *page, u64 start,
 					  u64 end, int uptodate);
 extern const struct dentry_operations btrfs_dentry_operations;
-ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter);
+extern const struct iomap_ops btrfs_dio_iomap_ops;
+extern const struct iomap_dio_ops btrfs_dio_ops;
+extern const struct iomap_dio_ops btrfs_sync_dops;
 
 /* ioctl.c */
 long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 038e0afaf3d0..910e2fd234a9 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1855,21 +1855,68 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb,
 	return num_written ? num_written : ret;
 }
 
-static ssize_t __btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
+static ssize_t check_direct_IO(struct btrfs_fs_info *fs_info,
+		const struct iov_iter *iter, loff_t offset)
+{
+	const unsigned int blocksize_mask = fs_info->sectorsize - 1;
+
+	if (offset & blocksize_mask)
+		return -EINVAL;
+
+	if (iov_iter_alignment(iter) & blocksize_mask)
+		return -EINVAL;
+
+	return 0;
+}
+
+static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file_inode(file);
-	loff_t pos;
-	ssize_t written;
+	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+	loff_t pos = iocb->ki_pos;
+	ssize_t written = 0;
+	bool relock = false;
 	ssize_t written_buffered;
 	loff_t endbyte;
 	int err;
 
-	written = btrfs_direct_IO(iocb, from);
+	if (check_direct_IO(fs_info, from, pos))
+		goto buffered;
+
+	/*
+	 * If the write DIO is beyond the EOF, we need update
+	 * the isize, but it is protected by i_mutex. So we can
+	 * not unlock the i_mutex at this case.
+	 */
+	if (pos + iov_iter_count(from) <= inode->i_size) {
+		inode_unlock(inode);
+		relock = true;
+	}
+	down_read(&BTRFS_I(inode)->dio_sem);
+
+	/*
+	 * We have are actually a sync iocb, so we need our fancy endio to know
+	 * if we need to sync.
+	 */
+	if (current->journal_info)
+		written = iomap_dio_rw(iocb, from, &btrfs_dio_iomap_ops,
+				&btrfs_sync_dops, is_sync_kiocb(iocb));
+	else
+		written = iomap_dio_rw(iocb, from, &btrfs_dio_iomap_ops,
+				&btrfs_dio_ops, is_sync_kiocb(iocb));
+
+	if (written == -ENOTBLK)
+		written = 0;
+
+	up_read(&BTRFS_I(inode)->dio_sem);
+	if (relock)
+		inode_lock(inode);
 
 	if (written < 0 || !iov_iter_count(from))
 		return written;
 
+buffered:
 	pos = iocb->ki_pos;
 	written_buffered = btrfs_buffered_write(iocb, from);
 	if (written_buffered < 0) {
@@ -2043,7 +2090,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
 			iocb->ki_flags &= ~IOCB_DSYNC;
 			current->journal_info = BTRFS_DIO_SYNC_STUB;
 		}
-		num_written = __btrfs_direct_write(iocb, from);
+		num_written = btrfs_direct_write(iocb, from);
 
 		/*
 		 * As stated above, we cleared journal_info, so we need to do
@@ -3618,16 +3665,47 @@ static int btrfs_file_open(struct inode *inode, struct file *filp)
 	return generic_file_open(inode, filp);
 }
 
+static int check_direct_read(struct btrfs_fs_info *fs_info,
+		const struct iov_iter *iter, loff_t offset)
+{
+	int ret;
+	int i, seg;
+
+	ret = check_direct_IO(fs_info, iter, offset);
+	if (ret < 0)
+		return ret;
+
+	if (!iter_is_iovec(iter))
+		return 0;
+
+	for (seg = 0; seg < iter->nr_segs; seg++)
+		for (i = seg + 1; i < iter->nr_segs; i++)
+			if (iter->iov[seg].iov_base == iter->iov[i].iov_base)
+				return -EINVAL;
+	return 0;
+}
+
+static ssize_t btrfs_direct_read(struct kiocb *iocb, struct iov_iter *to)
+{
+	struct inode *inode = file_inode(iocb->ki_filp);
+	ssize_t ret;
+
+	if (check_direct_read(btrfs_sb(inode->i_sb), to, iocb->ki_pos))
+		return 0;
+
+	inode_lock_shared(inode);
+	ret = iomap_dio_rw(iocb, to, &btrfs_dio_iomap_ops, &btrfs_dio_ops,
+			is_sync_kiocb(iocb));
+	inode_unlock_shared(inode);
+	return ret;
+}
+
 static ssize_t btrfs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 {
 	ssize_t ret = 0;
 
 	if (iocb->ki_flags & IOCB_DIRECT) {
-		struct inode *inode = file_inode(iocb->ki_filp);
-
-		inode_lock_shared(inode);
-		ret = btrfs_direct_IO(iocb, to);
-		inode_unlock_shared(inode);
+		ret = btrfs_direct_read(iocb, to);
 		if (ret < 0)
 			return ret;
 	}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 3db4697ff1ba..0730131b6590 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7898,39 +7898,6 @@ static blk_qc_t btrfs_submit_direct(struct inode *inode, struct iomap *iomap,
 	return BLK_QC_T_NONE;
 }
 
-static ssize_t check_direct_IO(struct btrfs_fs_info *fs_info,
-			       const struct iov_iter *iter, loff_t offset)
-{
-	int seg;
-	int i;
-	unsigned int blocksize_mask = fs_info->sectorsize - 1;
-	ssize_t retval = -EINVAL;
-
-	if (offset & blocksize_mask)
-		goto out;
-
-	if (iov_iter_alignment(iter) & blocksize_mask)
-		goto out;
-
-	/* If this is a write we don't need to check anymore */
-	if (iov_iter_rw(iter) != READ || !iter_is_iovec(iter))
-		return 0;
-	/*
-	 * Check to make sure we don't have duplicate iov_base's in this
-	 * iovec, if so return EINVAL, otherwise we'll get csum errors
-	 * when reading back.
-	 */
-	for (seg = 0; seg < iter->nr_segs; seg++) {
-		for (i = seg + 1; i < iter->nr_segs; i++) {
-			if (iter->iov[seg].iov_base == iter->iov[i].iov_base)
-				goto out;
-		}
-	}
-	retval = 0;
-out:
-	return retval;
-}
-
 static inline int btrfs_maybe_fsync_end_io(struct kiocb *iocb, ssize_t size,
 					   int error, unsigned flags)
 {
@@ -7955,72 +7922,20 @@ static inline int btrfs_maybe_fsync_end_io(struct kiocb *iocb, ssize_t size,
 	return 0;
 }
 
-static const struct iomap_ops btrfs_dio_iomap_ops = {
+const struct iomap_ops btrfs_dio_iomap_ops = {
 	.iomap_begin            = btrfs_dio_iomap_begin,
 	.iomap_end              = btrfs_dio_iomap_end,
 };
 
-static const struct iomap_dio_ops btrfs_dio_ops = {
+const struct iomap_dio_ops btrfs_dio_ops = {
 	.submit_io		= btrfs_submit_direct,
 };
 
-static const struct iomap_dio_ops btrfs_sync_dops = {
+const struct iomap_dio_ops btrfs_sync_dops = {
 	.submit_io		= btrfs_submit_direct,
 	.end_io			= btrfs_maybe_fsync_end_io,
 };
 
-ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
-{
-	struct file *file = iocb->ki_filp;
-	struct inode *inode = file->f_mapping->host;
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-	struct extent_changeset *data_reserved = NULL;
-	loff_t offset = iocb->ki_pos;
-	size_t count = 0;
-	bool relock = false;
-	ssize_t ret;
-
-	if (check_direct_IO(fs_info, iter, offset))
-		return 0;
-
-	count = iov_iter_count(iter);
-	if (iov_iter_rw(iter) == WRITE) {
-		/*
-		 * If the write DIO is beyond the EOF, we need update
-		 * the isize, but it is protected by i_mutex. So we can
-		 * not unlock the i_mutex at this case.
-		 */
-		if (offset + count <= inode->i_size) {
-			inode_unlock(inode);
-			relock = true;
-		}
-		down_read(&BTRFS_I(inode)->dio_sem);
-	}
-
-	/*
-	 * We have are actually a sync iocb, so we need our fancy endio to know
-	 * if we need to sync.
-	 */
-	if (current->journal_info)
-		ret = iomap_dio_rw(iocb, iter, &btrfs_dio_iomap_ops,
-				   &btrfs_sync_dops, is_sync_kiocb(iocb));
-	else
-		ret = iomap_dio_rw(iocb, iter, &btrfs_dio_iomap_ops,
-				   &btrfs_dio_ops, is_sync_kiocb(iocb));
-
-	if (ret == -ENOTBLK)
-		ret = 0;
-
-	if (iov_iter_rw(iter) == WRITE)
-		up_read(&BTRFS_I(inode)->dio_sem);
-
-	if (relock)
-		inode_lock(inode);
-
-	extent_changeset_free(data_reserved);
-	return ret;
-}
-
 static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 			u64 start, u64 len)
 {
-- 
2.26.2


  parent reply	other threads:[~2020-09-24 16:39 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-09-24 16:39 [PATCH 0/14 v3] BTRFS DIO inode locking/D_SYNC fix Goldwyn Rodrigues
2020-09-24 16:39 ` [PATCH 01/14] fs: remove dio_end_io() Goldwyn Rodrigues
2020-09-29 12:16   ` David Sterba
2020-09-24 16:39 ` [PATCH 02/14] btrfs: remove BTRFS_INODE_READDIO_NEED_LOCK Goldwyn Rodrigues
2020-09-24 16:39 ` [PATCH 03/14] iomap: Allow filesystem to call iomap_dio_complete without i_rwsem Goldwyn Rodrigues
2020-09-26  1:49   ` Darrick J. Wong
2020-09-24 16:39 ` [PATCH 04/14] iomap: Call inode_dio_end() before generic_write_sync() Goldwyn Rodrigues
2020-09-26  1:51   ` Darrick J. Wong
2020-09-28 15:04     ` David Sterba
2020-09-28 16:12       ` Darrick J. Wong
2020-09-24 16:39 ` Goldwyn Rodrigues [this message]
2020-09-24 16:39 ` [PATCH 06/14] btrfs: Move pos increment and pagecache extension to btrfs_buffered_write() Goldwyn Rodrigues
2020-09-24 16:39 ` [PATCH 07/14] btrfs: Move FS error state bit early during write Goldwyn Rodrigues
2020-09-24 16:39 ` [PATCH 08/14] btrfs: Introduce btrfs_write_check() Goldwyn Rodrigues
2020-10-09 14:21   ` Josef Bacik
2020-09-24 16:39 ` [PATCH 09/14] btrfs: Introduce btrfs_inode_lock()/unlock() Goldwyn Rodrigues
2020-09-24 16:39 ` [PATCH 10/14] btrfs: Push inode locking and unlocking into buffered/direct write Goldwyn Rodrigues
2020-09-24 16:39 ` [PATCH 11/14] btrfs: Use inode_lock_shared() for direct writes within EOF Goldwyn Rodrigues
2020-10-09 14:25   ` Josef Bacik
2020-09-24 16:39 ` [PATCH 12/14] btrfs: Remove dio_sem Goldwyn Rodrigues
2020-09-24 16:39 ` [PATCH 13/14] btrfs: Call iomap_dio_complete() without inode_lock Goldwyn Rodrigues
2020-09-24 16:39 ` [PATCH 14/14] btrfs: Revert 09745ff88d93 ("btrfs: dio iomap DSYNC workaround") Goldwyn Rodrigues

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200924163922.2547-6-rgoldwyn@suse.de \
    --to=rgoldwyn@suse.de \
    --cc=darrick.wong@oracle.com \
    --cc=david@fromorbit.com \
    --cc=dsterba@suse.com \
    --cc=hch@lst.de \
    --cc=johannes.thumshirn@wdc.com \
    --cc=josef@toxicpanda.com \
    --cc=linux-btrfs@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=rgoldwyn@suse.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).