From: Goldwyn Rodrigues <RGoldwyn@suse.com>
To: "hch@lst.de" <hch@lst.de>,
"linux-xfs@vger.kernel.org" <linux-xfs@vger.kernel.org>,
"linux-fsdevel@vger.kernel.org" <linux-fsdevel@vger.kernel.org>
Subject: Re: [PATCH 07/19] iomap: use a srcmap for a read-modify-write I/O
Date: Tue, 10 Sep 2019 12:48:14 +0000 [thread overview]
Message-ID: <1568119693.12944.16.camel@suse.com> (raw)
In-Reply-To: <20190909182722.16783-8-hch@lst.de>
On Mon, 2019-09-09 at 20:27 +0200, Christoph Hellwig wrote:
> From: Goldwyn Rodrigues <rgoldwyn@suse.com>
>
> The srcmap is used to identify where the read is to be performed
> from.
> It is passed to ->iomap_begin, which can fill it in if we need to
> read
> data for partially written blocks from a different location than the
> write target. The srcmap is only supported for buffered writes so
> far.
>
> Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
> [hch: merged two patches, removed the IOMAP_F_COW flag, use iomap as
> srcmap if not set, adjust length down to srcmap end as well]
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
> fs/dax.c | 9 ++++--
> fs/ext2/inode.c | 2 +-
> fs/ext4/inode.c | 2 +-
> fs/gfs2/bmap.c | 3 +-
> fs/iomap/apply.c | 23 +++++++++++----
> fs/iomap/buffered-io.c | 65 +++++++++++++++++++++++-----------------
> --
> fs/iomap/direct-io.c | 2 +-
> fs/iomap/fiemap.c | 4 +--
> fs/iomap/seek.c | 4 +--
> fs/iomap/swapfile.c | 3 +-
> fs/xfs/xfs_iomap.c | 9 ++++--
> include/linux/iomap.h | 5 ++--
> 12 files changed, 79 insertions(+), 52 deletions(-)
>
> diff --git a/fs/dax.c b/fs/dax.c
> index a237141d8787..8016be24bbd9 100644
> --- a/fs/dax.c
> +++ b/fs/dax.c
> @@ -1090,7 +1090,7 @@ EXPORT_SYMBOL_GPL(__dax_zero_page_range);
>
> static loff_t
> dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void
> *data,
> - struct iomap *iomap)
> + struct iomap *iomap, struct iomap *srcmap)
> {
> struct block_device *bdev = iomap->bdev;
> struct dax_device *dax_dev = iomap->dax_dev;
> @@ -1248,6 +1248,7 @@ static vm_fault_t dax_iomap_pte_fault(struct
> vm_fault *vmf, pfn_t *pfnp,
> unsigned long vaddr = vmf->address;
> loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT;
> struct iomap iomap = { 0 };
> + struct iomap srcmap = { 0 };
> unsigned flags = IOMAP_FAULT;
> int error, major = 0;
> bool write = vmf->flags & FAULT_FLAG_WRITE;
> @@ -1292,7 +1293,7 @@ static vm_fault_t dax_iomap_pte_fault(struct
> vm_fault *vmf, pfn_t *pfnp,
> * the file system block size to be equal the page size,
> which means
> * that we never have to deal with more than a single extent
> here.
> */
> - error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags,
> &iomap);
> + error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags,
> &iomap, &srcmap);
> if (iomap_errp)
> *iomap_errp = error;
> if (error) {
> @@ -1472,6 +1473,7 @@ static vm_fault_t dax_iomap_pmd_fault(struct
> vm_fault *vmf, pfn_t *pfnp,
> struct inode *inode = mapping->host;
> vm_fault_t result = VM_FAULT_FALLBACK;
> struct iomap iomap = { 0 };
> + struct iomap srcmap = { 0 };
> pgoff_t max_pgoff;
> void *entry;
> loff_t pos;
> @@ -1546,7 +1548,8 @@ static vm_fault_t dax_iomap_pmd_fault(struct
> vm_fault *vmf, pfn_t *pfnp,
> * to look up our filesystem block.
> */
> pos = (loff_t)xas.xa_index << PAGE_SHIFT;
> - error = ops->iomap_begin(inode, pos, PMD_SIZE, iomap_flags,
> &iomap);
> + error = ops->iomap_begin(inode, pos, PMD_SIZE, iomap_flags,
> &iomap,
> + &srcmap);
> if (error)
> goto unlock_entry;
>
> diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
> index 7004ce581a32..467c13ff6b40 100644
> --- a/fs/ext2/inode.c
> +++ b/fs/ext2/inode.c
> @@ -801,7 +801,7 @@ int ext2_get_block(struct inode *inode, sector_t
> iblock,
>
> #ifdef CONFIG_FS_DAX
> static int ext2_iomap_begin(struct inode *inode, loff_t offset,
> loff_t length,
> - unsigned flags, struct iomap *iomap)
> + unsigned flags, struct iomap *iomap, struct iomap
> *srcmap)
> {
> unsigned int blkbits = inode->i_blkbits;
> unsigned long first_block = offset >> blkbits;
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index 420fe3deed39..e2116e8228d2 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -3453,7 +3453,7 @@ static bool ext4_inode_datasync_dirty(struct
> inode *inode)
> }
>
> static int ext4_iomap_begin(struct inode *inode, loff_t offset,
> loff_t length,
> - unsigned flags, struct iomap *iomap)
> + unsigned flags, struct iomap *iomap, struct iomap
> *srcmap)
> {
> struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
> unsigned int blkbits = inode->i_blkbits;
> diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
> index 79581b9bdebb..0bf8e8fa82bd 100644
> --- a/fs/gfs2/bmap.c
> +++ b/fs/gfs2/bmap.c
> @@ -1123,7 +1123,8 @@ static int gfs2_iomap_begin_write(struct inode
> *inode, loff_t pos,
> }
>
> static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t
> length,
> - unsigned flags, struct iomap *iomap)
> + unsigned flags, struct iomap *iomap,
> + struct iomap *srcmap)
> {
> struct gfs2_inode *ip = GFS2_I(inode);
> struct metapath mp = { .mp_aheight = 1, };
> diff --git a/fs/iomap/apply.c b/fs/iomap/apply.c
> index 54c02aecf3cd..67efd86675e6 100644
> --- a/fs/iomap/apply.c
> +++ b/fs/iomap/apply.c
> @@ -24,7 +24,9 @@ iomap_apply(struct inode *inode, loff_t pos, loff_t
> length, unsigned flags,
> const struct iomap_ops *ops, void *data,
> iomap_actor_t actor)
> {
> struct iomap iomap = { 0 };
> + struct iomap srcmap = { 0 };
> loff_t written = 0, ret;
> + u64 end;
>
> /*
> * Need to map a range from start position for length bytes.
> This can
> @@ -38,7 +40,7 @@ iomap_apply(struct inode *inode, loff_t pos, loff_t
> length, unsigned flags,
> * expose transient stale data. If the reserve fails, we can
> safely
> * back out at this point as there is nothing to undo.
> */
> - ret = ops->iomap_begin(inode, pos, length, flags, &iomap);
> + ret = ops->iomap_begin(inode, pos, length, flags, &iomap,
> &srcmap);
> if (ret)
> return ret;
> if (WARN_ON(iomap.offset > pos))
> @@ -50,15 +52,26 @@ iomap_apply(struct inode *inode, loff_t pos,
> loff_t length, unsigned flags,
> * Cut down the length to the one actually provided by the
> filesystem,
> * as it might not be able to give us the whole size that we
> requested.
> */
> - if (iomap.offset + iomap.length < pos + length)
> - length = iomap.offset + iomap.length - pos;
> + end = iomap.offset + iomap.length;
> + if (srcmap.type)
> + end = min(end, srcmap.offset + srcmap.length);
> + if (pos + length > end)
> + length = end - pos;
>
Yes, that looks more correct. However, can we be smart and not bother
setting the minimum of end and srcmap.offset + srcmap.length if it is
not required? ie in situations where end coincides with block boundary.
Or if srcmap.length falls short, until the last block boundary of
iomap?
I did think about this scenario. This case is specific to CoW and
thought this is best handled by filesystem's iomap_begin(). If this
goes in, the filesystems would have to "falsify" srcmap length
information to maximize the amount of I/O that goes in one iteration.
--
Goldwyn
next prev parent reply other threads:[~2019-09-10 12:51 UTC|newest]
Thread overview: 54+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-09-09 18:27 iomap and xfs COW cleanups Christoph Hellwig
2019-09-09 18:27 ` [PATCH 01/19] iomap: better document the IOMAP_F_* flags Christoph Hellwig
2019-09-14 0:42 ` Allison Collins
2019-09-16 18:08 ` Darrick J. Wong
2019-09-09 18:27 ` [PATCH 02/19] iomap: remove the unused iomap argument to __iomap_write_end Christoph Hellwig
2019-09-14 0:42 ` Allison Collins
2019-09-16 18:10 ` Darrick J. Wong
2019-09-09 18:27 ` [PATCH 03/19] iomap: always use AOP_FLAG_NOFS in iomap_write_begin Christoph Hellwig
2019-09-16 18:11 ` Darrick J. Wong
2019-09-09 18:27 ` [PATCH 04/19] iomap: ignore non-shared or non-data blocks in xfs_file_dirty Christoph Hellwig
2019-09-16 18:12 ` Darrick J. Wong
2019-09-09 18:27 ` [PATCH 05/19] iomap: move the zeroing case out of iomap_read_page_sync Christoph Hellwig
2019-09-16 18:17 ` Darrick J. Wong
2019-09-09 18:27 ` [PATCH 06/19] iomap: use write_begin to read pages to unshare Christoph Hellwig
2019-09-16 18:34 ` Darrick J. Wong
2019-09-30 11:07 ` Christoph Hellwig
2019-10-08 15:12 ` Darrick J. Wong
2019-09-09 18:27 ` [PATCH 07/19] iomap: use a srcmap for a read-modify-write I/O Christoph Hellwig
2019-09-10 12:48 ` Goldwyn Rodrigues [this message]
2019-09-10 14:39 ` hch
2019-09-16 17:57 ` Darrick J. Wong
2019-09-16 18:42 ` Darrick J. Wong
2019-09-18 18:15 ` Darrick J. Wong
2019-09-09 18:27 ` [PATCH 08/19] xfs: also call xfs_file_iomap_end_delalloc for zeroing operations Christoph Hellwig
2019-09-18 17:09 ` Darrick J. Wong
2019-09-09 18:27 ` [PATCH 09/19] xfs: remove xfs_reflink_dirty_extents Christoph Hellwig
2019-09-18 17:17 ` Darrick J. Wong
2019-09-18 17:25 ` Christoph Hellwig
2019-09-18 17:31 ` Darrick J. Wong
2019-09-09 18:27 ` [PATCH 10/19] xfs: pass two imaps to xfs_reflink_allocate_cow Christoph Hellwig
2019-09-18 17:26 ` Darrick J. Wong
2019-09-30 11:10 ` Christoph Hellwig
2019-09-09 18:27 ` [PATCH 11/19] xfs: refactor xfs_file_iomap_begin_delay Christoph Hellwig
2019-09-18 17:30 ` Darrick J. Wong
2019-09-09 18:27 ` [PATCH 12/19] xfs: fill out the srcmap in iomap_begin Christoph Hellwig
2019-09-18 17:52 ` Darrick J. Wong
2019-10-01 6:26 ` Christoph Hellwig
2019-09-09 18:27 ` [PATCH 13/19] xfs: factor out a helper to calculate the end_fsb Christoph Hellwig
2019-09-14 0:42 ` Allison Collins
2019-09-18 17:55 ` Darrick J. Wong
2019-09-09 18:27 ` [PATCH 14/19] xfs: split out a new set of read-only iomap ops Christoph Hellwig
2019-09-18 17:56 ` Darrick J. Wong
2019-09-09 18:27 ` [PATCH 15/19] xfs: move xfs_file_iomap_begin_delay around Christoph Hellwig
2019-09-18 17:59 ` Darrick J. Wong
2019-09-30 11:14 ` Christoph Hellwig
2019-09-09 18:27 ` [PATCH 16/19] xfs: split the iomap ops for buffered vs direct writes Christoph Hellwig
2019-09-18 18:00 ` Darrick J. Wong
2019-09-09 18:27 ` [PATCH 17/19] xfs: rename the whichfork variable in xfs_buffered_write_iomap_begin Christoph Hellwig
2019-09-14 0:42 ` Allison Collins
2019-09-18 18:00 ` Darrick J. Wong
2019-09-09 18:27 ` [PATCH 18/19] xfs: cleanup xfs_iomap_write_unwritten Christoph Hellwig
2019-09-18 18:06 ` Darrick J. Wong
2019-09-09 18:27 ` [PATCH 19/19] xfs: improve the IOMAP_NOWAIT check for COW inodes Christoph Hellwig
2019-09-18 18:09 ` Darrick J. Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1568119693.12944.16.camel@suse.com \
--to=rgoldwyn@suse.com \
--cc=hch@lst.de \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-xfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).