All of lore.kernel.org
 help / color / mirror / Atom feed
From: Dave Chinner <david@fromorbit.com>
To: "Darrick J. Wong" <darrick.wong@oracle.com>
Cc: linux-xfs@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	linux-btrfs@vger.kernel.org, ocfs2-devel@oss.oracle.com,
	sandeen@redhat.com
Subject: Re: [PATCH 02/15] xfs: refactor clonerange preparation into a separate helper
Date: Fri, 5 Oct 2018 15:28:09 +1000	[thread overview]
Message-ID: <20181005052809.GB12041@dastard> (raw)
In-Reply-To: <153870028762.29072.5369530877410002226.stgit@magnolia>

On Thu, Oct 04, 2018 at 05:44:47PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Refactor all the reflink preparation steps into a separate helper that
> we'll use to land all the upcoming fixes for insufficient input checks.

If I've read the patch right, this also changes the location of the
page cache truncation, right?  i.e. it now happens in the
xfs_reflink_remap_prep() function instead of after the remap?  I
think the commit message needs to mention that because it's a fix to
incorrect behaviour....

I've added:

--
This rework also moves the invalidation of the destination range to
the prep function so that it is done before the range is remapped.
This ensures that nobody can access the data in range being remapped
until the remap is complete.
--

Sound OK?

Otherwise this looks fine.

Reviewed-by: Dave Chinner <dchinner@redhat.com>

-Dave.

> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  fs/xfs/xfs_reflink.c |   96 +++++++++++++++++++++++++++++++++++++-------------
>  1 file changed, 71 insertions(+), 25 deletions(-)
> 
> 
> diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
> index 38f405415b88..80ca9b6793cd 100644
> --- a/fs/xfs/xfs_reflink.c
> +++ b/fs/xfs/xfs_reflink.c
> @@ -1195,11 +1195,33 @@ xfs_iolock_two_inodes_and_break_layout(
>  	return 0;
>  }
>  
> +/* Unlock both inodes after they've been prepped for a range clone. */
> +STATIC void
> +xfs_reflink_remap_unlock(
> +	struct file		*file_in,
> +	struct file		*file_out)
> +{
> +	struct inode		*inode_in = file_inode(file_in);
> +	struct xfs_inode	*src = XFS_I(inode_in);
> +	struct inode		*inode_out = file_inode(file_out);
> +	struct xfs_inode	*dest = XFS_I(inode_out);
> +	bool			same_inode = (inode_in == inode_out);
> +
> +	xfs_iunlock(dest, XFS_MMAPLOCK_EXCL);
> +	if (!same_inode)
> +		xfs_iunlock(src, XFS_MMAPLOCK_SHARED);
> +	inode_unlock(inode_out);
> +	if (!same_inode)
> +		inode_unlock_shared(inode_in);
> +}
> +
>  /*
> - * Link a range of blocks from one file to another.
> + * Prepare two files for range cloning.  Upon a successful return both inodes
> + * will have the iolock and mmaplock held, the page cache of the out file
> + * will be truncated, and any leases on the out file will have been broken.
>   */
> -int
> -xfs_reflink_remap_range(
> +STATIC int
> +xfs_reflink_remap_prep(
>  	struct file		*file_in,
>  	loff_t			pos_in,
>  	struct file		*file_out,
> @@ -1211,19 +1233,9 @@ xfs_reflink_remap_range(
>  	struct xfs_inode	*src = XFS_I(inode_in);
>  	struct inode		*inode_out = file_inode(file_out);
>  	struct xfs_inode	*dest = XFS_I(inode_out);
> -	struct xfs_mount	*mp = src->i_mount;
>  	bool			same_inode = (inode_in == inode_out);
> -	xfs_fileoff_t		sfsbno, dfsbno;
> -	xfs_filblks_t		fsblen;
> -	xfs_extlen_t		cowextsize;
>  	ssize_t			ret;
>  
> -	if (!xfs_sb_version_hasreflink(&mp->m_sb))
> -		return -EOPNOTSUPP;
> -
> -	if (XFS_FORCED_SHUTDOWN(mp))
> -		return -EIO;
> -
>  	/* Lock both files against IO */
>  	ret = xfs_iolock_two_inodes_and_break_layout(inode_in, inode_out);
>  	if (ret)
> @@ -1254,8 +1266,6 @@ xfs_reflink_remap_range(
>  	if (ret)
>  		goto out_unlock;
>  
> -	trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out);
> -
>  	/*
>  	 * Clear out post-eof preallocations because we don't have page cache
>  	 * backing the delayed allocations and they'll never get freed on
> @@ -1272,6 +1282,51 @@ xfs_reflink_remap_range(
>  	if (ret)
>  		goto out_unlock;
>  
> +	/* Zap any page cache for the destination file's range. */
> +	truncate_inode_pages_range(&inode_out->i_data, pos_out,
> +				   PAGE_ALIGN(pos_out + len) - 1);
> +	return 0;
> +out_unlock:
> +	xfs_reflink_remap_unlock(file_in, file_out);
> +	return ret;
> +}
> +
> +/*
> + * Link a range of blocks from one file to another.
> + */
> +int
> +xfs_reflink_remap_range(
> +	struct file		*file_in,
> +	loff_t			pos_in,
> +	struct file		*file_out,
> +	loff_t			pos_out,
> +	u64			len,
> +	bool			is_dedupe)
> +{
> +	struct inode		*inode_in = file_inode(file_in);
> +	struct xfs_inode	*src = XFS_I(inode_in);
> +	struct inode		*inode_out = file_inode(file_out);
> +	struct xfs_inode	*dest = XFS_I(inode_out);
> +	struct xfs_mount	*mp = src->i_mount;
> +	xfs_fileoff_t		sfsbno, dfsbno;
> +	xfs_filblks_t		fsblen;
> +	xfs_extlen_t		cowextsize;
> +	ssize_t			ret;
> +
> +	if (!xfs_sb_version_hasreflink(&mp->m_sb))
> +		return -EOPNOTSUPP;
> +
> +	if (XFS_FORCED_SHUTDOWN(mp))
> +		return -EIO;
> +
> +	/* Prepare and then clone file data. */
> +	ret = xfs_reflink_remap_prep(file_in, pos_in, file_out, pos_out,
> +			len, is_dedupe);
> +	if (ret)
> +		return ret;
> +
> +	trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out);
> +
>  	dfsbno = XFS_B_TO_FSBT(mp, pos_out);
>  	sfsbno = XFS_B_TO_FSBT(mp, pos_in);
>  	fsblen = XFS_B_TO_FSB(mp, len);
> @@ -1280,10 +1335,6 @@ xfs_reflink_remap_range(
>  	if (ret)
>  		goto out_unlock;
>  
> -	/* Zap any page cache for the destination file's range. */
> -	truncate_inode_pages_range(&inode_out->i_data, pos_out,
> -				   PAGE_ALIGN(pos_out + len) - 1);
> -
>  	/*
>  	 * Carry the cowextsize hint from src to dest if we're sharing the
>  	 * entire source file to the entire destination file, the source file
> @@ -1300,12 +1351,7 @@ xfs_reflink_remap_range(
>  			is_dedupe);
>  
>  out_unlock:
> -	xfs_iunlock(dest, XFS_MMAPLOCK_EXCL);
> -	if (!same_inode)
> -		xfs_iunlock(src, XFS_MMAPLOCK_SHARED);
> -	inode_unlock(inode_out);
> -	if (!same_inode)
> -		inode_unlock_shared(inode_in);
> +	xfs_reflink_remap_unlock(file_in, file_out);
>  	if (ret)
>  		trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_);
>  	return ret;
> 
> 

-- 
Dave Chinner
david@fromorbit.com

WARNING: multiple messages have this Message-ID (diff)
From: Dave Chinner <david@fromorbit.com>
To: "Darrick J. Wong" <darrick.wong@oracle.com>
Cc: linux-xfs@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	linux-btrfs@vger.kernel.org, ocfs2-devel@oss.oracle.com,
	sandeen@redhat.com
Subject: [Ocfs2-devel] [PATCH 02/15] xfs: refactor clonerange preparation into a separate helper
Date: Fri, 5 Oct 2018 15:28:09 +1000	[thread overview]
Message-ID: <20181005052809.GB12041@dastard> (raw)
In-Reply-To: <153870028762.29072.5369530877410002226.stgit@magnolia>

On Thu, Oct 04, 2018 at 05:44:47PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Refactor all the reflink preparation steps into a separate helper that
> we'll use to land all the upcoming fixes for insufficient input checks.

If I've read the patch right, this also changes the location of the
page cache truncation, right?  i.e. it now happens in the
xfs_reflink_remap_prep() function instead of after the remap?  I
think the commit message needs to mention that because it's a fix to
incorrect behaviour....

I've added:

--
This rework also moves the invalidation of the destination range to
the prep function so that it is done before the range is remapped.
This ensures that nobody can access the data in range being remapped
until the remap is complete.
--

Sound OK?

Otherwise this looks fine.

Reviewed-by: Dave Chinner <dchinner@redhat.com>

-Dave.

> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  fs/xfs/xfs_reflink.c |   96 +++++++++++++++++++++++++++++++++++++-------------
>  1 file changed, 71 insertions(+), 25 deletions(-)
> 
> 
> diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
> index 38f405415b88..80ca9b6793cd 100644
> --- a/fs/xfs/xfs_reflink.c
> +++ b/fs/xfs/xfs_reflink.c
> @@ -1195,11 +1195,33 @@ xfs_iolock_two_inodes_and_break_layout(
>  	return 0;
>  }
>  
> +/* Unlock both inodes after they've been prepped for a range clone. */
> +STATIC void
> +xfs_reflink_remap_unlock(
> +	struct file		*file_in,
> +	struct file		*file_out)
> +{
> +	struct inode		*inode_in = file_inode(file_in);
> +	struct xfs_inode	*src = XFS_I(inode_in);
> +	struct inode		*inode_out = file_inode(file_out);
> +	struct xfs_inode	*dest = XFS_I(inode_out);
> +	bool			same_inode = (inode_in == inode_out);
> +
> +	xfs_iunlock(dest, XFS_MMAPLOCK_EXCL);
> +	if (!same_inode)
> +		xfs_iunlock(src, XFS_MMAPLOCK_SHARED);
> +	inode_unlock(inode_out);
> +	if (!same_inode)
> +		inode_unlock_shared(inode_in);
> +}
> +
>  /*
> - * Link a range of blocks from one file to another.
> + * Prepare two files for range cloning.  Upon a successful return both inodes
> + * will have the iolock and mmaplock held, the page cache of the out file
> + * will be truncated, and any leases on the out file will have been broken.
>   */
> -int
> -xfs_reflink_remap_range(
> +STATIC int
> +xfs_reflink_remap_prep(
>  	struct file		*file_in,
>  	loff_t			pos_in,
>  	struct file		*file_out,
> @@ -1211,19 +1233,9 @@ xfs_reflink_remap_range(
>  	struct xfs_inode	*src = XFS_I(inode_in);
>  	struct inode		*inode_out = file_inode(file_out);
>  	struct xfs_inode	*dest = XFS_I(inode_out);
> -	struct xfs_mount	*mp = src->i_mount;
>  	bool			same_inode = (inode_in == inode_out);
> -	xfs_fileoff_t		sfsbno, dfsbno;
> -	xfs_filblks_t		fsblen;
> -	xfs_extlen_t		cowextsize;
>  	ssize_t			ret;
>  
> -	if (!xfs_sb_version_hasreflink(&mp->m_sb))
> -		return -EOPNOTSUPP;
> -
> -	if (XFS_FORCED_SHUTDOWN(mp))
> -		return -EIO;
> -
>  	/* Lock both files against IO */
>  	ret = xfs_iolock_two_inodes_and_break_layout(inode_in, inode_out);
>  	if (ret)
> @@ -1254,8 +1266,6 @@ xfs_reflink_remap_range(
>  	if (ret)
>  		goto out_unlock;
>  
> -	trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out);
> -
>  	/*
>  	 * Clear out post-eof preallocations because we don't have page cache
>  	 * backing the delayed allocations and they'll never get freed on
> @@ -1272,6 +1282,51 @@ xfs_reflink_remap_range(
>  	if (ret)
>  		goto out_unlock;
>  
> +	/* Zap any page cache for the destination file's range. */
> +	truncate_inode_pages_range(&inode_out->i_data, pos_out,
> +				   PAGE_ALIGN(pos_out + len) - 1);
> +	return 0;
> +out_unlock:
> +	xfs_reflink_remap_unlock(file_in, file_out);
> +	return ret;
> +}
> +
> +/*
> + * Link a range of blocks from one file to another.
> + */
> +int
> +xfs_reflink_remap_range(
> +	struct file		*file_in,
> +	loff_t			pos_in,
> +	struct file		*file_out,
> +	loff_t			pos_out,
> +	u64			len,
> +	bool			is_dedupe)
> +{
> +	struct inode		*inode_in = file_inode(file_in);
> +	struct xfs_inode	*src = XFS_I(inode_in);
> +	struct inode		*inode_out = file_inode(file_out);
> +	struct xfs_inode	*dest = XFS_I(inode_out);
> +	struct xfs_mount	*mp = src->i_mount;
> +	xfs_fileoff_t		sfsbno, dfsbno;
> +	xfs_filblks_t		fsblen;
> +	xfs_extlen_t		cowextsize;
> +	ssize_t			ret;
> +
> +	if (!xfs_sb_version_hasreflink(&mp->m_sb))
> +		return -EOPNOTSUPP;
> +
> +	if (XFS_FORCED_SHUTDOWN(mp))
> +		return -EIO;
> +
> +	/* Prepare and then clone file data. */
> +	ret = xfs_reflink_remap_prep(file_in, pos_in, file_out, pos_out,
> +			len, is_dedupe);
> +	if (ret)
> +		return ret;
> +
> +	trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out);
> +
>  	dfsbno = XFS_B_TO_FSBT(mp, pos_out);
>  	sfsbno = XFS_B_TO_FSBT(mp, pos_in);
>  	fsblen = XFS_B_TO_FSB(mp, len);
> @@ -1280,10 +1335,6 @@ xfs_reflink_remap_range(
>  	if (ret)
>  		goto out_unlock;
>  
> -	/* Zap any page cache for the destination file's range. */
> -	truncate_inode_pages_range(&inode_out->i_data, pos_out,
> -				   PAGE_ALIGN(pos_out + len) - 1);
> -
>  	/*
>  	 * Carry the cowextsize hint from src to dest if we're sharing the
>  	 * entire source file to the entire destination file, the source file
> @@ -1300,12 +1351,7 @@ xfs_reflink_remap_range(
>  			is_dedupe);
>  
>  out_unlock:
> -	xfs_iunlock(dest, XFS_MMAPLOCK_EXCL);
> -	if (!same_inode)
> -		xfs_iunlock(src, XFS_MMAPLOCK_SHARED);
> -	inode_unlock(inode_out);
> -	if (!same_inode)
> -		inode_unlock_shared(inode_in);
> +	xfs_reflink_remap_unlock(file_in, file_out);
>  	if (ret)
>  		trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_);
>  	return ret;
> 
> 

-- 
Dave Chinner
david at fromorbit.com

  reply	other threads:[~2018-10-05  5:28 UTC|newest]

Thread overview: 82+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-10-05  0:44 [PATCH 00/15] fs: fixes for serious clone/dedupe problems Darrick J. Wong
2018-10-05  0:44 ` [Ocfs2-devel] " Darrick J. Wong
2018-10-05  0:44 ` [PATCH 01/15] xfs: add a per-xfs trace_printk macro Darrick J. Wong
2018-10-05  0:44   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-05  0:44 ` [PATCH 02/15] xfs: refactor clonerange preparation into a separate helper Darrick J. Wong
2018-10-05  0:44   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-05  5:28   ` Dave Chinner [this message]
2018-10-05  5:28     ` Dave Chinner
2018-10-05 17:06     ` Darrick J. Wong
2018-10-05 17:06       ` [Ocfs2-devel] " Darrick J. Wong
2018-10-06 10:30     ` Christoph Hellwig
2018-10-06 10:30       ` [Ocfs2-devel] " Christoph Hellwig
2018-10-05  7:02   ` Dave Chinner
2018-10-05  7:02     ` [Ocfs2-devel] " Dave Chinner
2018-10-05  9:02     ` Dave Chinner
2018-10-05  9:02       ` [Ocfs2-devel] " Dave Chinner
2018-10-05 17:21       ` Darrick J. Wong
2018-10-05 17:21         ` [Ocfs2-devel] " Darrick J. Wong
2018-10-05 23:42         ` Dave Chinner
2018-10-05 23:42           ` [Ocfs2-devel] " Dave Chinner
2018-10-05  0:44 ` [PATCH 03/15] xfs: zero posteof blocks when cloning above eof Darrick J. Wong
2018-10-05  0:44   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-05  5:28   ` Dave Chinner
2018-10-05  5:28     ` [Ocfs2-devel] " Dave Chinner
2018-10-06 10:34   ` Christoph Hellwig
2018-10-06 10:34     ` [Ocfs2-devel] " Christoph Hellwig
2018-10-05  0:45 ` [PATCH 04/15] xfs: update ctime and remove suid before cloning files Darrick J. Wong
2018-10-05  0:45   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-05  5:30   ` Dave Chinner
2018-10-05  5:30     ` [Ocfs2-devel] " Dave Chinner
2018-10-06 10:35   ` Christoph Hellwig
2018-10-06 10:35     ` [Ocfs2-devel] " Christoph Hellwig
2018-10-05  0:45 ` [PATCH 05/15] vfs: check file ranges " Darrick J. Wong
2018-10-05  0:45   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-06 10:38   ` Christoph Hellwig
2018-10-06 10:38     ` [Ocfs2-devel] " Christoph Hellwig
2018-10-05  0:45 ` [PATCH 06/15] vfs: strengthen checking of file range inputs to clone/dedupe range Darrick J. Wong
2018-10-05  0:45   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-05  6:10   ` Amir Goldstein
2018-10-05 17:36     ` Darrick J. Wong
2018-10-05 17:36       ` [Ocfs2-devel] " Darrick J. Wong
2018-10-05  0:45 ` [PATCH 07/15] vfs: skip zero-length dedupe requests Darrick J. Wong
2018-10-05  0:45   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-05  8:39   ` Amir Goldstein
2018-10-06 10:39   ` Christoph Hellwig
2018-10-06 10:39     ` [Ocfs2-devel] " Christoph Hellwig
2018-10-05  0:45 ` [PATCH 08/15] vfs: change clone and dedupe range function pointers to return bytes completed Darrick J. Wong
2018-10-05  0:45   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-05  8:06   ` Amir Goldstein
2018-10-05 21:47     ` Darrick J. Wong
2018-10-05 21:47       ` [Ocfs2-devel] " Darrick J. Wong
2018-10-06 10:41   ` Christoph Hellwig
2018-10-06 10:41     ` [Ocfs2-devel] " Christoph Hellwig
2018-10-08 18:59     ` Darrick J. Wong
2018-10-08 18:59       ` [Ocfs2-devel] " Darrick J. Wong
2018-10-05  0:45 ` [PATCH 09/15] vfs: pass operation flags to {clone, dedupe}_file_range implementations Darrick J. Wong
2018-10-05  0:45   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-05  7:07   ` Amir Goldstein
2018-10-05 17:50     ` Darrick J. Wong
2018-10-05 17:50       ` [Ocfs2-devel] " Darrick J. Wong
2018-10-06 10:44       ` Christoph Hellwig
2018-10-06 10:44         ` [Ocfs2-devel] " Christoph Hellwig
2018-10-05  0:45 ` [PATCH 10/15] vfs: make cloning to source file eof more explicit Darrick J. Wong
2018-10-05  0:45   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-05  6:47   ` Amir Goldstein
2018-10-05  0:45 ` [PATCH 11/15] vfs: allow short clone and dedupe operations Darrick J. Wong
2018-10-05  0:45   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-05  0:46 ` [PATCH 12/15] vfs: implement opportunistic short dedupe Darrick J. Wong
2018-10-05  0:46   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-05  6:40   ` Amir Goldstein
2018-10-05 17:42     ` Darrick J. Wong
2018-10-05 17:42       ` [Ocfs2-devel] " Darrick J. Wong
2018-10-05  0:46 ` [PATCH 13/15] ocfs2: truncate page cache for clone destination file before remapping Darrick J. Wong
2018-10-05  0:46   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-05  0:46 ` [PATCH 14/15] ocfs2: support partial clone range and dedupe range Darrick J. Wong
2018-10-05  0:46   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-05  0:46 ` [PATCH 15/15] xfs: support returning partial reflink results Darrick J. Wong
2018-10-05  0:46   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-05  1:17 ` [PATCH 00/15] fs: fixes for serious clone/dedupe problems Dave Chinner
2018-10-05  1:17   ` [Ocfs2-devel] " Dave Chinner
2018-10-05  1:24   ` Darrick J. Wong
2018-10-05  1:24     ` [Ocfs2-devel] " Darrick J. Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20181005052809.GB12041@dastard \
    --to=david@fromorbit.com \
    --cc=darrick.wong@oracle.com \
    --cc=linux-btrfs@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-xfs@vger.kernel.org \
    --cc=ocfs2-devel@oss.oracle.com \
    --cc=sandeen@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.