Linux-BTRFS Archive on lore.kernel.org
 help / color / Atom feed
From: "Darrick J. Wong" <darrick.wong@oracle.com>
To: david@fromorbit.com, darrick.wong@oracle.com
Cc: sandeen@redhat.com, linux-nfs@vger.kernel.org,
	linux-cifs@vger.kernel.org, Amir Goldstein <amir73il@gmail.com>,
	linux-unionfs@vger.kernel.org, linux-xfs@vger.kernel.org,
	linux-mm@kvack.org, linux-btrfs@vger.kernel.org,
	linux-fsdevel@vger.kernel.org, Christoph Hellwig <hch@lst.de>,
	ocfs2-devel@oss.oracle.com
Subject: [PATCH 03/26] vfs: check file ranges before cloning files
Date: Mon, 15 Oct 2018 20:10:16 -0700
Message-ID: <153965941688.1256.14812185868033156908.stgit@magnolia> (raw)
In-Reply-To: <153965939489.1256.7400115244528045860.stgit@magnolia>

From: Darrick J. Wong <darrick.wong@oracle.com>

Move the file range checks from vfs_clone_file_prep into a separate
generic_remap_checks function so that all the checks are collected in a
central location.  This forms the basis for adding more checks from
generic_write_checks that will make cloning's input checking more
consistent with write input checking.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
---
 fs/ocfs2/refcounttree.c |    2 +
 fs/read_write.c         |   55 +++++++++----------------------------
 fs/xfs/xfs_reflink.c    |    2 +
 include/linux/fs.h      |    9 ++++--
 mm/filemap.c            |   69 +++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 90 insertions(+), 47 deletions(-)


diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 7a5ee145c733..19e03936c5e1 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4850,7 +4850,7 @@ int ocfs2_reflink_remap_range(struct file *file_in,
 	    (OCFS2_I(inode_out)->ip_flags & OCFS2_INODE_SYSTEM_FILE))
 		goto out_unlock;
 
-	ret = vfs_clone_file_prep_inodes(inode_in, pos_in, inode_out, pos_out,
+	ret = vfs_clone_file_prep(file_in, pos_in, file_out, pos_out,
 			&len, is_dedupe);
 	if (ret <= 0)
 		goto out_unlock;
diff --git a/fs/read_write.c b/fs/read_write.c
index 260797b01851..d6e8e242a15f 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1717,13 +1717,12 @@ static int clone_verify_area(struct file *file, loff_t pos, u64 len, bool write)
  * Returns: 0 for "nothing to clone", 1 for "something to clone", or
  * the usual negative error code.
  */
-int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in,
-			       struct inode *inode_out, loff_t pos_out,
-			       u64 *len, bool is_dedupe)
+int vfs_clone_file_prep(struct file *file_in, loff_t pos_in,
+			struct file *file_out, loff_t pos_out,
+			u64 *len, bool is_dedupe)
 {
-	loff_t bs = inode_out->i_sb->s_blocksize;
-	loff_t blen;
-	loff_t isize;
+	struct inode *inode_in = file_inode(file_in);
+	struct inode *inode_out = file_inode(file_out);
 	bool same_inode = (inode_in == inode_out);
 	int ret;
 
@@ -1740,10 +1739,10 @@ int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in,
 	if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
 		return -EINVAL;
 
-	isize = i_size_read(inode_in);
-
 	/* Zero length dedupe exits immediately; reflink goes to EOF. */
 	if (*len == 0) {
+		loff_t isize = i_size_read(inode_in);
+
 		if (is_dedupe || pos_in == isize)
 			return 0;
 		if (pos_in > isize)
@@ -1751,36 +1750,11 @@ int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in,
 		*len = isize - pos_in;
 	}
 
-	/* Ensure offsets don't wrap and the input is inside i_size */
-	if (pos_in + *len < pos_in || pos_out + *len < pos_out ||
-	    pos_in + *len > isize)
-		return -EINVAL;
-
-	/* Don't allow dedupe past EOF in the dest file */
-	if (is_dedupe) {
-		loff_t	disize;
-
-		disize = i_size_read(inode_out);
-		if (pos_out >= disize || pos_out + *len > disize)
-			return -EINVAL;
-	}
-
-	/* If we're linking to EOF, continue to the block boundary. */
-	if (pos_in + *len == isize)
-		blen = ALIGN(isize, bs) - pos_in;
-	else
-		blen = *len;
-
-	/* Only reflink if we're aligned to block boundaries */
-	if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_in + blen, bs) ||
-	    !IS_ALIGNED(pos_out, bs) || !IS_ALIGNED(pos_out + blen, bs))
-		return -EINVAL;
-
-	/* Don't allow overlapped reflink within the same file */
-	if (same_inode) {
-		if (pos_out + blen > pos_in && pos_out < pos_in + blen)
-			return -EINVAL;
-	}
+	/* Check that we don't violate system file offset limits. */
+	ret = generic_remap_checks(file_in, pos_in, file_out, pos_out, len,
+			is_dedupe);
+	if (ret)
+		return ret;
 
 	/* Wait for the completion of any pending IOs on both files */
 	inode_dio_wait(inode_in);
@@ -1813,7 +1787,7 @@ int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in,
 
 	return 1;
 }
-EXPORT_SYMBOL(vfs_clone_file_prep_inodes);
+EXPORT_SYMBOL(vfs_clone_file_prep);
 
 int do_clone_file_range(struct file *file_in, loff_t pos_in,
 			struct file *file_out, loff_t pos_out, u64 len)
@@ -1851,9 +1825,6 @@ int do_clone_file_range(struct file *file_in, loff_t pos_in,
 	if (ret)
 		return ret;
 
-	if (pos_in + len > i_size_read(inode_in))
-		return -EINVAL;
-
 	ret = file_in->f_op->clone_file_range(file_in, pos_in,
 			file_out, pos_out, len);
 	if (!ret) {
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 42ea7bab9144..281d5f53f2ec 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -1326,7 +1326,7 @@ xfs_reflink_remap_prep(
 	if (IS_DAX(inode_in) || IS_DAX(inode_out))
 		goto out_unlock;
 
-	ret = vfs_clone_file_prep_inodes(inode_in, pos_in, inode_out, pos_out,
+	ret = vfs_clone_file_prep(file_in, pos_in, file_out, pos_out,
 			len, is_dedupe);
 	if (ret <= 0)
 		goto out_unlock;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 897eae8faee1..ba93a6e7dac4 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1825,9 +1825,9 @@ extern ssize_t vfs_readv(struct file *, const struct iovec __user *,
 		unsigned long, loff_t *, rwf_t);
 extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *,
 				   loff_t, size_t, unsigned int);
-extern int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in,
-				      struct inode *inode_out, loff_t pos_out,
-				      u64 *len, bool is_dedupe);
+extern int vfs_clone_file_prep(struct file *file_in, loff_t pos_in,
+			       struct file *file_out, loff_t pos_out,
+			       u64 *count, bool is_dedupe);
 extern int do_clone_file_range(struct file *file_in, loff_t pos_in,
 			       struct file *file_out, loff_t pos_out, u64 len);
 extern int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
@@ -2967,6 +2967,9 @@ extern int sb_min_blocksize(struct super_block *, int);
 extern int generic_file_mmap(struct file *, struct vm_area_struct *);
 extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *);
 extern ssize_t generic_write_checks(struct kiocb *, struct iov_iter *);
+extern int generic_remap_checks(struct file *file_in, loff_t pos_in,
+				struct file *file_out, loff_t pos_out,
+				uint64_t *count, bool is_dedupe);
 extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *);
 extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *);
 extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *);
diff --git a/mm/filemap.c b/mm/filemap.c
index 52517f28e6f4..47e6bfd45a91 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2974,6 +2974,75 @@ inline ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from)
 }
 EXPORT_SYMBOL(generic_write_checks);
 
+/*
+ * Performs necessary checks before doing a clone.
+ *
+ * Can adjust amount of bytes to clone.
+ * Returns appropriate error code that caller should return or
+ * zero in case the clone should be allowed.
+ */
+int generic_remap_checks(struct file *file_in, loff_t pos_in,
+			 struct file *file_out, loff_t pos_out,
+			 uint64_t *req_count, bool is_dedupe)
+{
+	struct inode *inode_in = file_in->f_mapping->host;
+	struct inode *inode_out = file_out->f_mapping->host;
+	uint64_t count = *req_count;
+	uint64_t bcount;
+	loff_t size_in, size_out;
+	loff_t bs = inode_out->i_sb->s_blocksize;
+
+	/* The start of both ranges must be aligned to an fs block. */
+	if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_out, bs))
+		return -EINVAL;
+
+	/* Ensure offsets don't wrap. */
+	if (pos_in + count < pos_in || pos_out + count < pos_out)
+		return -EINVAL;
+
+	size_in = i_size_read(inode_in);
+	size_out = i_size_read(inode_out);
+
+	/* Dedupe requires both ranges to be within EOF. */
+	if (is_dedupe &&
+	    (pos_in >= size_in || pos_in + count > size_in ||
+	     pos_out >= size_out || pos_out + count > size_out))
+		return -EINVAL;
+
+	/* Ensure the infile range is within the infile. */
+	if (pos_in >= size_in)
+		return -EINVAL;
+	count = min(count, size_in - (uint64_t)pos_in);
+
+	/*
+	 * If the user wanted us to link to the infile's EOF, round up to the
+	 * next block boundary for this check.
+	 *
+	 * Otherwise, make sure the count is also block-aligned, having
+	 * already confirmed the starting offsets' block alignment.
+	 */
+	if (pos_in + count == size_in) {
+		bcount = ALIGN(size_in, bs) - pos_in;
+	} else {
+		if (!IS_ALIGNED(count, bs))
+			return -EINVAL;
+
+		bcount = count;
+	}
+
+	/* Don't allow overlapped cloning within the same file. */
+	if (inode_in == inode_out &&
+	    pos_out + bcount > pos_in &&
+	    pos_out < pos_in + bcount)
+		return -EINVAL;
+
+	/* For now we don't support changing the length. */
+	if (*req_count != count)
+		return -EINVAL;
+
+	return 0;
+}
+
 int pagecache_write_begin(struct file *file, struct address_space *mapping,
 				loff_t pos, unsigned len, unsigned flags,
 				struct page **pagep, void **fsdata)


  parent reply index

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-10-16  3:09 [PATCH v5 00/26] fs: fixes for serious clone/dedupe problems Darrick J. Wong
2018-10-16  3:10 ` [PATCH 01/26] xfs: add a per-xfs trace_printk macro Darrick J. Wong
2018-10-16  3:10 ` [PATCH 02/26] vfs: vfs_clone_file_prep_inodes should return EINVAL for a clone from beyond EOF Darrick J. Wong
2018-10-16  3:10 ` Darrick J. Wong [this message]
2018-10-16  3:10 ` [PATCH 04/26] vfs: exit early from zero length remap operations Darrick J. Wong
2018-10-17  8:26   ` Christoph Hellwig
2018-10-16  3:10 ` [PATCH 05/26] vfs: strengthen checking of file range inputs to generic_remap_checks Darrick J. Wong
2018-10-16  3:10 ` [PATCH 06/26] vfs: avoid problematic remapping requests into partial EOF block Darrick J. Wong
2018-10-16  3:10 ` [PATCH 07/26] vfs: skip zero-length dedupe requests Darrick J. Wong
2018-10-16  3:10 ` [PATCH 08/26] vfs: rename vfs_clone_file_prep to be more descriptive Darrick J. Wong
2018-10-16  3:10 ` [PATCH 09/26] vfs: rename clone_verify_area to remap_verify_area Darrick J. Wong
2018-10-16  3:11 ` [PATCH 10/26] vfs: combine the clone and dedupe into a single remap_file_range Darrick J. Wong
2018-10-17  8:28   ` Christoph Hellwig
2018-10-16  3:11 ` [PATCH 11/26] vfs: pass remap flags to generic_remap_file_range_prep Darrick J. Wong
2018-10-17  8:28   ` Christoph Hellwig
2018-10-16  3:11 ` [PATCH 12/26] vfs: pass remap flags to generic_remap_checks Darrick J. Wong
2018-10-17  8:29   ` Christoph Hellwig
2018-10-16  3:19 ` [PATCH 13/26] vfs: create generic_remap_file_range_touch to update inode metadata Darrick J. Wong
2018-10-17  8:33   ` Christoph Hellwig
2018-10-16  3:19 ` [PATCH 14/26] vfs: make remap_file_range functions take and return bytes completed Darrick J. Wong
2018-10-16  3:19 ` [PATCH 15/26] vfs: plumb remap flags through the vfs clone functions Darrick J. Wong
2018-10-16  3:19 ` [PATCH 16/26] vfs: plumb remap flags through the vfs dedupe functions Darrick J. Wong
2018-10-16  3:19 ` [PATCH 17/26] vfs: enable remap callers that can handle short operations Darrick J. Wong
2018-10-17  8:36   ` Christoph Hellwig
2018-10-17 16:35     ` Darrick J. Wong
2018-10-16  3:20 ` [PATCH 18/26] vfs: hide file range comparison function Darrick J. Wong
2018-10-16  3:20 ` [PATCH 19/26] vfs: clean up generic_remap_file_range_prep return value Darrick J. Wong
2018-10-17  8:37   ` Christoph Hellwig
2018-10-16  3:20 ` [PATCH 20/26] ocfs2: truncate page cache for clone destination file before remapping Darrick J. Wong
2018-10-16  3:20 ` [PATCH 21/26] ocfs2: fix pagecache truncation prior to reflink Darrick J. Wong
2018-10-16  3:20 ` [PATCH 22/26] ocfs2: support partial clone range and dedupe range Darrick J. Wong
2018-10-16  3:20 ` [PATCH 23/26] ocfs2: remove ocfs2_reflink_remap_range Darrick J. Wong
2018-10-16  3:20 ` [PATCH 24/26] xfs: fix pagecache truncation prior to reflink Darrick J. Wong
2018-10-17  8:37   ` Christoph Hellwig
2018-10-16  3:20 ` [PATCH 25/26] xfs: support returning partial reflink results Darrick J. Wong
2018-10-17  8:40   ` Christoph Hellwig
2018-10-17 16:40     ` Darrick J. Wong
2018-10-16  3:21 ` [PATCH 26/26] xfs: remove redundant remap partial EOF block checks Darrick J. Wong
2018-10-17  8:40   ` Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=153965941688.1256.14812185868033156908.stgit@magnolia \
    --to=darrick.wong@oracle.com \
    --cc=amir73il@gmail.com \
    --cc=david@fromorbit.com \
    --cc=hch@lst.de \
    --cc=linux-btrfs@vger.kernel.org \
    --cc=linux-cifs@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-nfs@vger.kernel.org \
    --cc=linux-unionfs@vger.kernel.org \
    --cc=linux-xfs@vger.kernel.org \
    --cc=ocfs2-devel@oss.oracle.com \
    --cc=sandeen@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Linux-BTRFS Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-btrfs/0 linux-btrfs/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-btrfs linux-btrfs/ https://lore.kernel.org/linux-btrfs \
		linux-btrfs@vger.kernel.org
	public-inbox-index linux-btrfs

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-btrfs


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git