All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Darrick J. Wong" <darrick.wong@oracle.com>
To: Brian Foster <bfoster@redhat.com>
Cc: linux-xfs@vger.kernel.org
Subject: Re: [PATCH 9/9] xfs: relog dirty buffers during swapext bmbt owner change
Date: Fri, 25 Aug 2017 09:53:59 -0700	[thread overview]
Message-ID: <20170825165359.GR4796@magnolia> (raw)
In-Reply-To: <20170825150557.43010-10-bfoster@redhat.com>

On Fri, Aug 25, 2017 at 11:05:57AM -0400, Brian Foster wrote:
> The owner change bmbt scan that occurs during extent swap operations
> does not handle ordered buffer failures. Buffers that cannot be
> marked ordered must be physically logged so previously dirty ranges
> of the buffer can be relogged in the transaction.
> 
> Since the bmbt scan may need to process and potentially log a large
> number of blocks, we can't expect to complete this operation in a
> single transaction. Update extent swap to use a permanent
> transaction with enough log reservation to physically log a buffer.
> Update the bmbt scan to physically log any buffers that cannot be
> ordered and to terminate the scan with -EAGAIN. On -EAGAIN, the
> caller rolls the transaction and restarts the scan. Finally, update
> the bmbt scan helper function to skip bmbt blocks that already match
> the expected owner so they are not reprocessed after scan restarts.
> 
> Signed-off-by: Brian Foster <bfoster@redhat.com>

Looks ok, I think...
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>

> ---
>  fs/xfs/libxfs/xfs_btree.c | 26 ++++++++++++++-------
>  fs/xfs/xfs_bmap_util.c    | 57 ++++++++++++++++++++++++++++++++++++++---------
>  2 files changed, 65 insertions(+), 18 deletions(-)
> 
> diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
> index d06b04d..c466a23 100644
> --- a/fs/xfs/libxfs/xfs_btree.c
> +++ b/fs/xfs/libxfs/xfs_btree.c
> @@ -4452,10 +4452,15 @@ xfs_btree_block_change_owner(
>  
>  	/* modify the owner */
>  	block = xfs_btree_get_block(cur, level, &bp);
> -	if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
> +	if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
> +		if (block->bb_u.l.bb_owner == cpu_to_be64(bbcoi->new_owner))
> +			return 0;
>  		block->bb_u.l.bb_owner = cpu_to_be64(bbcoi->new_owner);
> -	else
> +	} else {
> +		if (block->bb_u.s.bb_owner == cpu_to_be32(bbcoi->new_owner))
> +			return 0;
>  		block->bb_u.s.bb_owner = cpu_to_be32(bbcoi->new_owner);
> +	}
>  
>  	/*
>  	 * If the block is a root block hosted in an inode, we might not have a
> @@ -4464,14 +4469,19 @@ xfs_btree_block_change_owner(
>  	 * block is formatted into the on-disk inode fork. We still change it,
>  	 * though, so everything is consistent in memory.
>  	 */
> -	if (bp) {
> -		if (cur->bc_tp)
> -			xfs_trans_ordered_buf(cur->bc_tp, bp);
> -		else
> -			xfs_buf_delwri_queue(bp, bbcoi->buffer_list);
> -	} else {
> +	if (!bp) {
>  		ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
>  		ASSERT(level == cur->bc_nlevels - 1);
> +		return 0;
> +	}
> +
> +	if (cur->bc_tp) {
> +		if (!xfs_trans_ordered_buf(cur->bc_tp, bp)) {
> +			xfs_btree_log_block(cur, bp, XFS_BB_OWNER);
> +			return -EAGAIN;
> +		}
> +	} else {
> +		xfs_buf_delwri_queue(bp, bbcoi->buffer_list);
>  	}
>  
>  	return 0;
> diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
> index ee8fb9a..3e9b7a4 100644
> --- a/fs/xfs/xfs_bmap_util.c
> +++ b/fs/xfs/xfs_bmap_util.c
> @@ -1929,6 +1929,48 @@ xfs_swap_extent_forks(
>  	return 0;
>  }
>  
> +/*
> + * Fix up the owners of the bmbt blocks to refer to the current inode. The
> + * change owner scan attempts to order all modified buffers in the current
> + * transaction. In the event of ordered buffer failure, the offending buffer is
> + * physically logged as a fallback and the scan returns -EAGAIN. We must roll
> + * the transaction in this case to replenish the fallback log reservation and
> + * restart the scan. This process repeats until the scan completes.
> + */
> +static int
> +xfs_swap_change_owner(
> +	struct xfs_trans	**tpp,
> +	struct xfs_inode	*ip,
> +	struct xfs_inode	*tmpip)
> +{
> +	int			error;
> +	struct xfs_trans	*tp = *tpp;
> +
> +	do {
> +		error = xfs_bmbt_change_owner(tp, ip, XFS_DATA_FORK, ip->i_ino,
> +					      NULL);
> +		/* success or fatal error */
> +		if (error != -EAGAIN)
> +			break;
> +
> +		error = xfs_trans_roll(tpp, NULL);
> +		if (error)
> +			break;
> +		tp = *tpp;
> +
> +		/*
> +		 * Redirty both inodes so they can relog and keep the log tail
> +		 * moving forward.
> +		 */
> +		xfs_trans_ijoin(tp, ip, 0);
> +		xfs_trans_ijoin(tp, tmpip, 0);
> +		xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
> +		xfs_trans_log_inode(tp, tmpip, XFS_ILOG_CORE);
> +	} while (true);
> +
> +	return error;
> +}
> +
>  int
>  xfs_swap_extents(
>  	struct xfs_inode	*ip,	/* target inode */
> @@ -1943,7 +1985,7 @@ xfs_swap_extents(
>  	int			lock_flags;
>  	struct xfs_ifork	*cowfp;
>  	uint64_t		f;
> -	int			resblks;
> +	int			resblks = 0;
>  
>  	/*
>  	 * Lock the inodes against other IO, page faults and truncate to
> @@ -1991,11 +2033,8 @@ xfs_swap_extents(
>  			  XFS_SWAP_RMAP_SPACE_RES(mp,
>  				XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK),
>  				XFS_DATA_FORK);
> -		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks,
> -				0, 0, &tp);
> -	} else
> -		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0,
> -				0, 0, &tp);
> +	}
> +	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp);
>  	if (error)
>  		goto out_unlock;
>  
> @@ -2087,14 +2126,12 @@ xfs_swap_extents(
>  	 * inode number of the current inode.
>  	 */
>  	if (src_log_flags & XFS_ILOG_DOWNER) {
> -		error = xfs_bmbt_change_owner(tp, ip, XFS_DATA_FORK,
> -					      ip->i_ino, NULL);
> +		error = xfs_swap_change_owner(&tp, ip, tip);
>  		if (error)
>  			goto out_trans_cancel;
>  	}
>  	if (target_log_flags & XFS_ILOG_DOWNER) {
> -		error = xfs_bmbt_change_owner(tp, tip, XFS_DATA_FORK,
> -					      tip->i_ino, NULL);
> +		error = xfs_swap_change_owner(&tp, tip, ip);
>  		if (error)
>  			goto out_trans_cancel;
>  	}
> -- 
> 2.9.5
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

  reply	other threads:[~2017-08-25 16:54 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-08-25 15:05 [PATCH 0/9] xfs: xfs: refactor ordered buffer logging code Brian Foster
2017-08-25 15:05 ` [PATCH 1/9] xfs: open-code xfs_buf_item_dirty() Brian Foster
2017-08-25 15:26   ` Darrick J. Wong
2017-08-28  9:20   ` Christoph Hellwig
2017-08-25 15:05 ` [PATCH 2/9] xfs: remove unnecessary dirty bli format check for ordered bufs Brian Foster
2017-08-25 15:51   ` Darrick J. Wong
2017-08-28  9:25   ` Christoph Hellwig
2017-08-28 10:51     ` Brian Foster
2017-08-25 15:05 ` [PATCH 3/9] xfs: ordered buffer log items are never formatted Brian Foster
2017-08-25 15:26   ` Darrick J. Wong
2017-08-28  9:26   ` Christoph Hellwig
2017-08-25 15:05 ` [PATCH 4/9] xfs: refactor buffer logging into buffer dirtying helper Brian Foster
2017-08-28  9:28   ` Christoph Hellwig
2017-08-25 15:05 ` [PATCH 5/9] xfs: don't log dirty ranges for ordered buffers Brian Foster
2017-08-25 15:51   ` Darrick J. Wong
2017-08-28  9:29   ` Christoph Hellwig
2017-08-25 15:05 ` [PATCH 6/9] xfs: skip bmbt block ino validation during owner change Brian Foster
2017-08-25 15:35   ` Darrick J. Wong
2017-08-25 18:11     ` Brian Foster
2017-08-28  9:44   ` Christoph Hellwig
2017-08-25 15:05 ` [PATCH 7/9] xfs: move bmbt owner change to last step of extent swap Brian Foster
2017-08-25 15:57   ` Darrick J. Wong
2017-08-28  9:46   ` Christoph Hellwig
2017-08-25 15:05 ` [PATCH 8/9] xfs: disallow marking previously dirty buffers as ordered Brian Foster
2017-08-25 16:50   ` Darrick J. Wong
2017-08-28  9:34   ` Christoph Hellwig
2017-08-25 15:05 ` [PATCH 9/9] xfs: relog dirty buffers during swapext bmbt owner change Brian Foster
2017-08-25 16:53   ` Darrick J. Wong [this message]
2017-08-28  9:51   ` Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170825165359.GR4796@magnolia \
    --to=darrick.wong@oracle.com \
    --cc=bfoster@redhat.com \
    --cc=linux-xfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.