All of lore.kernel.org
 help / color / mirror / Atom feed
From: Brian Foster <bfoster@redhat.com>
To: Christoph Hellwig <hch@lst.de>
Cc: linux-xfs@vger.kernel.org, linux-fsdevel@vger.kernel.org
Subject: Re: [PATCH 10/21] xfs: remove xfs_map_cow
Date: Mon, 4 Jun 2018 08:25:33 -0400	[thread overview]
Message-ID: <20180604122532.GC110455@bfoster.bfoster> (raw)
In-Reply-To: <20180531180759.21631-11-hch@lst.de>

On Thu, May 31, 2018 at 08:07:48PM +0200, Christoph Hellwig wrote:
> We can handle the existing cow mapping case as a special case directly
> in xfs_writepage_map, and share code for allocating delalloc blocks
> with regular I/O in xfs_map_blocks.  This means we need to always
> call xfs_map_blocks for reflink inodes, but we can still skip most of
> the work if it turns out that there is no COW mapping overlapping the
> current block.
> 
> As a subtle detail we need to start caching holes in the wpc to deal
> with the case of COW reservations between EOF.  But we'll need that
> infrastructure later anyway, so this is no big deal.
> 
> Based on a patch from Dave Chinner.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/xfs/xfs_aops.c | 184 ++++++++++++++++++++++------------------------
>  fs/xfs/xfs_aops.h |   4 +-
>  2 files changed, 90 insertions(+), 98 deletions(-)
> 
> diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
> index 9b5a4b6e268c..d536509ca05b 100644
> --- a/fs/xfs/xfs_aops.c
> +++ b/fs/xfs/xfs_aops.c
> @@ -375,70 +375,107 @@ xfs_end_bio(
>  
>  STATIC int
>  xfs_map_blocks(
> +	struct xfs_writepage_ctx *wpc,
>  	struct inode		*inode,
> -	loff_t			offset,
> -	struct xfs_bmbt_irec	*imap,
> -	int			type)
> +	loff_t			offset)
>  {
>  	struct xfs_inode	*ip = XFS_I(inode);
>  	struct xfs_mount	*mp = ip->i_mount;
>  	ssize_t			count = i_blocksize(inode);
> -	xfs_fileoff_t		offset_fsb, end_fsb;
> +	xfs_fileoff_t		offset_fsb = XFS_B_TO_FSBT(mp, offset), end_fsb;
> +	struct xfs_bmbt_irec	imap;
> +	int			whichfork = XFS_DATA_FORK;
>  	int			error = 0;
>  	int			nimaps = 1;
>  
>  	if (XFS_FORCED_SHUTDOWN(mp))
>  		return -EIO;
>  
> -	/*
> -	 * Truncate can race with writeback since writeback doesn't take the
> -	 * iolock and truncate decreases the file size before it starts
> -	 * truncating the pages between new_size and old_size.  Therefore, we
> -	 * can end up in the situation where writeback gets a CoW fork mapping
> -	 * but the truncate makes the mapping invalid and we end up in here
> -	 * trying to get a new mapping.  Bail out here so that we simply never
> -	 * get a valid mapping and so we drop the write altogether.  The page
> -	 * truncation will kill the contents anyway.
> -	 */
> -	if (type == XFS_IO_COW && offset > i_size_read(inode))
> -		return 0;
> -
> -	ASSERT(type != XFS_IO_COW);
> -
>  	xfs_ilock(ip, XFS_ILOCK_SHARED);
>  	ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
>  	       (ip->i_df.if_flags & XFS_IFEXTENTS));
>  	ASSERT(offset <= mp->m_super->s_maxbytes);
>  
> +	if (xfs_is_reflink_inode(ip) &&
> +	    xfs_reflink_find_cow_mapping(ip, offset, &imap)) {
> +		xfs_iunlock(ip, XFS_ILOCK_SHARED);
> +		/*
> +		 * Truncate can race with writeback since writeback doesn't
> +		 * take the iolock and truncate decreases the file size before
> +		 * it starts truncating the pages between new_size and old_size.
> +		 * Therefore, we can end up in the situation where writeback
> +		 * gets a CoW fork mapping but the truncate makes the mapping
> +		 * invalid and we end up in here trying to get a new mapping.
> +		 * bail out here so that we simply never get a valid mapping
> +		 * and so we drop the write altogether.  The page truncation
> +		 * will kill the contents anyway.
> +		 */
> +		if (offset > i_size_read(inode)) {
> +			whichfork = XFS_IO_HOLE;

Not sure what the point of this assignment is if we're going to return,
but that's not a valid fork either way. Did you mean to assign
wpc->io_type?

> +			return 0;
> +		}
> +		whichfork = XFS_COW_FORK;
> +		wpc->io_type = XFS_IO_COW;
> +		goto allocate_blocks;
> +	}
> +
> +	/*
> +	 * Map valid and no COW extent in the way?  We're done.
> +	 */
> +	if (wpc->imap_valid) {
> +		xfs_iunlock(ip, XFS_ILOCK_SHARED);
> +		return 0;
> +	}
> +
> +	/*
> +	 * If we don't have a valid map, now it's time to get a new one for this
> +	 * offset.  This will convert delayed allocations (including COW ones)
> +	 * into real extents.
> +	 */
>  	if (offset > mp->m_super->s_maxbytes - count)
>  		count = mp->m_super->s_maxbytes - offset;
>  	end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
>  	offset_fsb = XFS_B_TO_FSBT(mp, offset);
>  	error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb,
> -				imap, &nimaps, XFS_BMAPI_ENTIRE);
> +				&imap, &nimaps, XFS_BMAPI_ENTIRE);
>  	xfs_iunlock(ip, XFS_ILOCK_SHARED);
> -
>  	if (error)
>  		return error;
>  
> -	if (type == XFS_IO_DELALLOC &&
> -	    (!nimaps || isnullstartblock(imap->br_startblock))) {
> -		error = xfs_iomap_write_allocate(ip, XFS_DATA_FORK, offset,
> -				imap);
> -		if (!error)
> -			trace_xfs_map_blocks_alloc(ip, offset, count, type, imap);
> -		return error;
> +	if (!nimaps) {
> +		/*
> +		 * Lookup returns no match? Beyond eof? regardless,
> +		 * return it as a hole so we don't write it
> +		 */
> +		imap.br_startoff = offset_fsb;
> +		imap.br_blockcount = end_fsb - offset_fsb;
> +		imap.br_startblock = HOLESTARTBLOCK;
> +		wpc->io_type = XFS_IO_HOLE;
> +	} else if (imap.br_startblock == HOLESTARTBLOCK) {
> +		/* landed in a hole */
> +		wpc->io_type = XFS_IO_HOLE;
>  	}
>  
> +	if (wpc->io_type == XFS_IO_DELALLOC &&
> +	    (!nimaps || isnullstartblock(imap.br_startblock)))
> +		goto allocate_blocks;
> +
>  #ifdef DEBUG
> -	if (type == XFS_IO_UNWRITTEN) {
> +	if (wpc->io_type == XFS_IO_UNWRITTEN) {
>  		ASSERT(nimaps);
> -		ASSERT(imap->br_startblock != HOLESTARTBLOCK);
> -		ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
> +		ASSERT(imap.br_startblock != HOLESTARTBLOCK);
> +		ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
>  	}
>  #endif
> -	if (nimaps)
> -		trace_xfs_map_blocks_found(ip, offset, count, type, imap);
> +	wpc->imap = imap;
> +	trace_xfs_map_blocks_found(ip, offset, count, wpc->io_type, &imap);
> +	return 0;
> +allocate_blocks:
> +	error = xfs_iomap_write_allocate(ip, whichfork, offset, &imap);
> +	if (error)
> +		return error;
> +	wpc->imap = imap;
> +	trace_xfs_map_blocks_alloc(ip, offset, count, wpc->io_type, &imap);
>  	return 0;
>  }
>  
> @@ -759,56 +796,6 @@ xfs_aops_discard_page(
>  	xfs_vm_invalidatepage(page, 0, PAGE_SIZE);
>  }
>  
> -static int
> -xfs_map_cow(
> -	struct xfs_writepage_ctx *wpc,
> -	struct inode		*inode,
> -	loff_t			offset,
> -	unsigned int		*new_type)
> -{
> -	struct xfs_inode	*ip = XFS_I(inode);
> -	struct xfs_bmbt_irec	imap;
> -	bool			is_cow = false;
> -	int			error;
> -
> -	/*
> -	 * If we already have a valid COW mapping keep using it.
> -	 */
> -	if (wpc->io_type == XFS_IO_COW) {
> -		wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap, offset);
> -		if (wpc->imap_valid) {
> -			*new_type = XFS_IO_COW;
> -			return 0;
> -		}
> -	}
> -
> -	/*
> -	 * Else we need to check if there is a COW mapping at this offset.
> -	 */
> -	xfs_ilock(ip, XFS_ILOCK_SHARED);
> -	is_cow = xfs_reflink_find_cow_mapping(ip, offset, &imap);
> -	xfs_iunlock(ip, XFS_ILOCK_SHARED);
> -
> -	if (!is_cow)
> -		return 0;
> -
> -	/*
> -	 * And if the COW mapping has a delayed extent here we need to
> -	 * allocate real space for it now.
> -	 */
> -	if (isnullstartblock(imap.br_startblock)) {
> -		error = xfs_iomap_write_allocate(ip, XFS_COW_FORK, offset,
> -				&imap);
> -		if (error)
> -			return error;
> -	}
> -
> -	wpc->io_type = *new_type = XFS_IO_COW;
> -	wpc->imap_valid = true;
> -	wpc->imap = imap;
> -	return 0;
> -}
> -
>  /*
>   * We implement an immediate ioend submission policy here to avoid needing to
>   * chain multiple ioends and hence nest mempool allocations which can violate
> @@ -873,10 +860,13 @@ xfs_writepage_map(
>  			continue;
>  		}
>  
> -		if (xfs_is_reflink_inode(XFS_I(inode))) {
> -			error = xfs_map_cow(wpc, inode, offset, &new_type);
> -			if (error)
> -				goto out;
> +		/*
> +		 * If we already have a valid COW mapping keep using it.
> +		 */
> +		if (wpc->io_type == XFS_IO_COW &&
> +		    xfs_imap_valid(inode, &wpc->imap, offset)) {
> +			wpc->imap_valid = true;
> +			new_type = XFS_IO_COW;
>  		}

So this lifts the cow I/O type check from xfs_map_cow(). That path would
set imap_valid and new_type and return, which essentially looks like a
"blocks found" case where we go ahead and add the buffer to the ioend.
Now we set the same values, but we have to call xfs_map_blocks() to make
sure we cover the cow blocks overlap case that xfs_map_cow() used to
handle.

Shouldn't passing this check effectively jump straight to to the buffer
add? Otherwise it looks like we unconditionally attempt to allocate
blocks in the cow fork.

Alternatively...

>  
>  		if (wpc->io_type != new_type) {
> @@ -887,22 +877,22 @@ xfs_writepage_map(
>  		if (wpc->imap_valid)
>  			wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap,
>  							 offset);
> -		if (!wpc->imap_valid) {
> -			error = xfs_map_blocks(inode, offset, &wpc->imap,
> -					     wpc->io_type);
> +		if (!wpc->imap_valid || xfs_is_reflink_inode(XFS_I(inode))) {
> +			error = xfs_map_blocks(wpc, inode, offset);

... perhaps this could change to something like:

		if (!wpc->imap_valid ||
		    (xfs_is_reflink_inode(XFS_I(inode)) &&
		     wpc->io_type != XFS_IO_COW))
			error = ...

... so the logic is a bit more clear. E.g., we always need to look up in
the COW fork for reflink inodes unless we're already doing COW I/O. Hm?

Brian

>  			if (error)
>  				goto out;
>  			wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap,
>  							 offset);
>  		}
> -		if (wpc->imap_valid) {
> -			lock_buffer(bh);
> -			if (wpc->io_type != XFS_IO_OVERWRITE)
> -				xfs_map_at_offset(inode, bh, &wpc->imap, offset);
> -			xfs_add_to_ioend(inode, bh, offset, wpc, wbc, &submit_list);
> -			count++;
> -		}
>  
> +		if (!wpc->imap_valid || wpc->io_type == XFS_IO_HOLE)
> +			continue;
> +
> +		lock_buffer(bh);
> +		if (wpc->io_type != XFS_IO_OVERWRITE)
> +			xfs_map_at_offset(inode, bh, &wpc->imap, offset);
> +		xfs_add_to_ioend(inode, bh, offset, wpc, wbc, &submit_list);
> +		count++;
>  	} while (offset += len, ((bh = bh->b_this_page) != head));
>  
>  	ASSERT(wpc->ioend || list_empty(&submit_list));
> diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h
> index 69346d460dfa..b2ef5b661761 100644
> --- a/fs/xfs/xfs_aops.h
> +++ b/fs/xfs/xfs_aops.h
> @@ -29,6 +29,7 @@ enum {
>  	XFS_IO_UNWRITTEN,	/* covers allocated but uninitialized data */
>  	XFS_IO_OVERWRITE,	/* covers already allocated extent */
>  	XFS_IO_COW,		/* covers copy-on-write extent */
> +	XFS_IO_HOLE,		/* covers region without any block allocation */
>  };
>  
>  #define XFS_IO_TYPES \
> @@ -36,7 +37,8 @@ enum {
>  	{ XFS_IO_DELALLOC,		"delalloc" }, \
>  	{ XFS_IO_UNWRITTEN,		"unwritten" }, \
>  	{ XFS_IO_OVERWRITE,		"overwrite" }, \
> -	{ XFS_IO_COW,			"CoW" }
> +	{ XFS_IO_COW,			"CoW" }, \
> +	{ XFS_IO_HOLE,			"hole" }
>  
>  /*
>   * Structure for buffered I/O completions.
> -- 
> 2.17.0
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

  reply	other threads:[~2018-06-04 12:25 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-05-31 18:07 buffered writes without buffer heads in xfs and iomap v5 Christoph Hellwig
2018-05-31 18:07 ` [PATCH 01/21] fs: factor out a __generic_write_end helper Christoph Hellwig
2018-05-31 18:07 ` [PATCH 02/21] iomap: add initial support for writes without buffer heads Christoph Hellwig
2018-05-31 18:07 ` [PATCH 03/21] xfs: simplify xfs_bmap_punch_delalloc_range Christoph Hellwig
2018-05-31 18:07 ` [PATCH 04/21] xfs: simplify xfs_aops_discard_page Christoph Hellwig
2018-05-31 18:07 ` [PATCH 05/21] xfs: move locking into xfs_bmap_punch_delalloc_range Christoph Hellwig
2018-05-31 18:07 ` [PATCH 06/21] xfs: do not set the page uptodate in xfs_writepage_map Christoph Hellwig
2018-05-31 18:07 ` [PATCH 07/21] xfs: don't clear imap_valid for a non-uptodate buffers Christoph Hellwig
2018-05-31 18:07 ` [PATCH 08/21] xfs: don't use XFS_BMAPI_IGSTATE in xfs_map_blocks Christoph Hellwig
2018-06-04 12:25   ` Brian Foster
2018-05-31 18:07 ` [PATCH 09/21] xfs: remove xfs_reflink_trim_irec_to_next_cow Christoph Hellwig
2018-06-04 12:25   ` Brian Foster
2018-05-31 18:07 ` [PATCH 10/21] xfs: remove xfs_map_cow Christoph Hellwig
2018-06-04 12:25   ` Brian Foster [this message]
2018-06-13 15:30     ` Christoph Hellwig
2018-05-31 18:07 ` [PATCH 11/21] xfs: rename the offset variable in xfs_writepage_map Christoph Hellwig
2018-06-04 12:25   ` Brian Foster
2018-05-31 18:07 ` [PATCH 12/21] xfs: make xfs_writepage_map extent map centric Christoph Hellwig
2018-06-04 12:26   ` Brian Foster
2018-05-31 18:07 ` [PATCH 13/21] xfs: remove the now unused XFS_BMAPI_IGSTATE flag Christoph Hellwig
2018-06-04 12:27   ` Brian Foster
2018-05-31 18:07 ` [PATCH 14/21] xfs: remove xfs_reflink_find_cow_mapping Christoph Hellwig
2018-05-31 18:07 ` [PATCH 15/21] xfs: simplify xfs_map_blocks by using xfs_iext_lookup_extent directly Christoph Hellwig
2018-05-31 18:07 ` [PATCH 16/21] xfs: remove the imap_valid flag Christoph Hellwig
2018-05-31 18:07 ` [PATCH 17/21] xfs: don't look at buffer heads in xfs_add_to_ioend Christoph Hellwig
2018-05-31 18:07 ` [PATCH 18/21] xfs: move all writeback buffer_head manipulation into xfs_map_at_offset Christoph Hellwig
2018-05-31 18:07 ` [PATCH 19/21] xfs: remove xfs_start_page_writeback Christoph Hellwig
2018-05-31 18:07 ` [PATCH 20/21] xfs: refactor the tail of xfs_writepage_map Christoph Hellwig
2018-05-31 18:07 ` [PATCH 21/21] xfs: allow writeback on pages without buffer heads Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180604122532.GC110455@bfoster.bfoster \
    --to=bfoster@redhat.com \
    --cc=hch@lst.de \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-xfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.