All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Darrick J. Wong" <darrick.wong@oracle.com>
To: david@fromorbit.com, darrick.wong@oracle.com
Cc: sandeen@redhat.com, linux-nfs@vger.kernel.org,
	linux-cifs@vger.kernel.org, Amir Goldstein <amir73il@gmail.com>,
	linux-unionfs@vger.kernel.org, linux-xfs@vger.kernel.org,
	linux-mm@kvack.org, linux-btrfs@vger.kernel.org,
	linux-fsdevel@vger.kernel.org, ocfs2-devel@oss.oracle.com
Subject: [PATCH 16/28] vfs: enable remap callers that can handle short operations
Date: Sun, 21 Oct 2018 09:16:55 -0700	[thread overview]
Message-ID: <154013861584.29026.5158930020192209258.stgit@magnolia> (raw)
In-Reply-To: <154013850285.29026.16168387526580596209.stgit@magnolia>

From: Darrick J. Wong <darrick.wong@oracle.com>

Plumb in a remap flag that enables the filesystem remap handler to
shorten remapping requests for callers that can handle it.  Now
copy_file_range can report partial success (in case we run up against
alignment problems, resource limits, etc.).

We also enable CAN_SHORTEN for fideduperange to maintain existing
userspace-visible behavior where xfs/btrfs shorten the dedupe range to
avoid stale post-eof data exposure.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
---
 Documentation/filesystems/vfs.txt |    4 +++-
 fs/read_write.c                   |   28 ++++++++++++++++++++--------
 include/linux/fs.h                |    5 +++--
 mm/filemap.c                      |   11 +++++++----
 4 files changed, 33 insertions(+), 15 deletions(-)


diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 1bd2919deaca..5f71a252e2e0 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -970,7 +970,9 @@ otherwise noted.
 	negative error code if errors occurred before any bytes were remapped.
 	The remap_flags parameter accepts REMAP_FILE_* flags.  If
 	REMAP_FILE_DEDUP is set then the implementation must only remap if the
-	requested file ranges have identical contents.
+	requested file ranges have identical contents.  If REMAP_CAN_SHORTEN is
+	set, the caller is ok with the implementation shortening the request
+	length to satisfy alignment or EOF requirements (or any other reason).
 
   fadvise: possibly called by the fadvise64() system call.
 
diff --git a/fs/read_write.c b/fs/read_write.c
index ea30666013b0..c0bcc1a20650 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1593,7 +1593,8 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
 
 		cloned = file_in->f_op->remap_file_range(file_in, pos_in,
 				file_out, pos_out,
-				min_t(loff_t, MAX_RW_COUNT, len), 0);
+				min_t(loff_t, MAX_RW_COUNT, len),
+				REMAP_FILE_CAN_SHORTEN);
 		if (cloned > 0) {
 			ret = cloned;
 			goto done;
@@ -1721,6 +1722,8 @@ static int remap_verify_area(struct file *file, loff_t pos, loff_t len,
  * can't meaningfully compare post-EOF contents.
  *
  * For clone we only link a partial EOF block above the destination file's EOF.
+ *
+ * Shorten the request if possible.
  */
 static int generic_remap_check_len(struct inode *inode_in,
 				   struct inode *inode_out,
@@ -1729,16 +1732,24 @@ static int generic_remap_check_len(struct inode *inode_in,
 				   unsigned int remap_flags)
 {
 	u64 blkmask = i_blocksize(inode_in) - 1;
+	loff_t new_len = *len;
 
 	if ((*len & blkmask) == 0)
 		return 0;
 
-	if (remap_flags & REMAP_FILE_DEDUP)
-		*len &= ~blkmask;
-	else if (pos_out + *len < i_size_read(inode_out))
-		return -EINVAL;
+	if ((remap_flags & REMAP_FILE_DEDUP) ||
+	    pos_out + *len < i_size_read(inode_out))
+		new_len &= ~blkmask;
 
-	return 0;
+	if (new_len == *len)
+		return 0;
+
+	if (remap_flags & REMAP_FILE_CAN_SHORTEN) {
+		*len = new_len;
+		return 0;
+	}
+
+	return (remap_flags & REMAP_FILE_DEDUP) ? -EBADE : -EINVAL;
 }
 
 /*
@@ -2014,7 +2025,8 @@ loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
 {
 	loff_t ret;
 
-	WARN_ON_ONCE(remap_flags & ~(REMAP_FILE_DEDUP));
+	WARN_ON_ONCE(remap_flags & ~(REMAP_FILE_DEDUP |
+				     REMAP_FILE_CAN_SHORTEN));
 
 	ret = mnt_want_write_file(dst_file);
 	if (ret)
@@ -2115,7 +2127,7 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
 
 		deduped = vfs_dedupe_file_range_one(file, off, dst_file,
 						    info->dest_offset, len,
-						    0);
+						    REMAP_FILE_CAN_SHORTEN);
 		if (deduped == -EBADE)
 			info->status = FILE_DEDUPE_RANGE_DIFFERS;
 		else if (deduped < 0)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 544ab5083b48..34c22d695011 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1727,8 +1727,10 @@ struct block_device_operations;
  * See Documentation/filesystems/vfs.txt for more details about this call.
  *
  * REMAP_FILE_DEDUP: only remap if contents identical (i.e. deduplicate)
+ * REMAP_FILE_CAN_SHORTEN: caller can handle a shortened request
  */
 #define REMAP_FILE_DEDUP		(1 << 0)
+#define REMAP_FILE_CAN_SHORTEN		(1 << 1)
 
 /*
  * These flags signal that the caller is ok with altering various aspects of
@@ -1736,9 +1738,8 @@ struct block_device_operations;
  * implementation; the vfs remap helper functions can take advantage of them.
  * Flags in this category exist to preserve the quirky behavior of the hoisted
  * btrfs clone/dedupe ioctls.
- * There are no flags yet, but subsequent commits will add some.
  */
-#define REMAP_FILE_ADVISORY		(0)
+#define REMAP_FILE_ADVISORY		(REMAP_FILE_CAN_SHORTEN)
 
 struct iov_iter;
 
diff --git a/mm/filemap.c b/mm/filemap.c
index e9091d731f84..1775d4ad3317 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -3045,8 +3045,7 @@ int generic_remap_checks(struct file *file_in, loff_t pos_in,
 		bcount = ALIGN(size_in, bs) - pos_in;
 	} else {
 		if (!IS_ALIGNED(count, bs))
-			return -EINVAL;
-
+			count = ALIGN_DOWN(count, bs);
 		bcount = count;
 	}
 
@@ -3056,10 +3055,14 @@ int generic_remap_checks(struct file *file_in, loff_t pos_in,
 	    pos_out < pos_in + bcount)
 		return -EINVAL;
 
-	/* For now we don't support changing the length. */
-	if (*req_count != count)
+	/*
+	 * We shortened the request but the caller can't deal with that, so
+	 * bounce the request back to userspace.
+	 */
+	if (*req_count != count && !(remap_flags & REMAP_FILE_CAN_SHORTEN))
 		return -EINVAL;
 
+	*req_count = count;
 	return 0;
 }
 

WARNING: multiple messages have this Message-ID
From: Darrick J. Wong <darrick.wong@oracle.com>
To: david@fromorbit.com, darrick.wong@oracle.com
Cc: sandeen@redhat.com, linux-nfs@vger.kernel.org,
	linux-cifs@vger.kernel.org, Amir Goldstein <amir73il@gmail.com>,
	linux-unionfs@vger.kernel.org, linux-xfs@vger.kernel.org,
	linux-mm@kvack.org, linux-btrfs@vger.kernel.org,
	linux-fsdevel@vger.kernel.org, ocfs2-devel@oss.oracle.com
Subject: [Ocfs2-devel] [PATCH 16/28] vfs: enable remap callers that can handle short operations
Date: Sun, 21 Oct 2018 09:16:55 -0700	[thread overview]
Message-ID: <154013861584.29026.5158930020192209258.stgit@magnolia> (raw)
In-Reply-To: <154013850285.29026.16168387526580596209.stgit@magnolia>

From: Darrick J. Wong <darrick.wong@oracle.com>

Plumb in a remap flag that enables the filesystem remap handler to
shorten remapping requests for callers that can handle it.  Now
copy_file_range can report partial success (in case we run up against
alignment problems, resource limits, etc.).

We also enable CAN_SHORTEN for fideduperange to maintain existing
userspace-visible behavior where xfs/btrfs shorten the dedupe range to
avoid stale post-eof data exposure.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
---
 Documentation/filesystems/vfs.txt |    4 +++-
 fs/read_write.c                   |   28 ++++++++++++++++++++--------
 include/linux/fs.h                |    5 +++--
 mm/filemap.c                      |   11 +++++++----
 4 files changed, 33 insertions(+), 15 deletions(-)


diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 1bd2919deaca..5f71a252e2e0 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -970,7 +970,9 @@ otherwise noted.
 	negative error code if errors occurred before any bytes were remapped.
 	The remap_flags parameter accepts REMAP_FILE_* flags.  If
 	REMAP_FILE_DEDUP is set then the implementation must only remap if the
-	requested file ranges have identical contents.
+	requested file ranges have identical contents.  If REMAP_CAN_SHORTEN is
+	set, the caller is ok with the implementation shortening the request
+	length to satisfy alignment or EOF requirements (or any other reason).
 
   fadvise: possibly called by the fadvise64() system call.
 
diff --git a/fs/read_write.c b/fs/read_write.c
index ea30666013b0..c0bcc1a20650 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1593,7 +1593,8 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
 
 		cloned = file_in->f_op->remap_file_range(file_in, pos_in,
 				file_out, pos_out,
-				min_t(loff_t, MAX_RW_COUNT, len), 0);
+				min_t(loff_t, MAX_RW_COUNT, len),
+				REMAP_FILE_CAN_SHORTEN);
 		if (cloned > 0) {
 			ret = cloned;
 			goto done;
@@ -1721,6 +1722,8 @@ static int remap_verify_area(struct file *file, loff_t pos, loff_t len,
  * can't meaningfully compare post-EOF contents.
  *
  * For clone we only link a partial EOF block above the destination file's EOF.
+ *
+ * Shorten the request if possible.
  */
 static int generic_remap_check_len(struct inode *inode_in,
 				   struct inode *inode_out,
@@ -1729,16 +1732,24 @@ static int generic_remap_check_len(struct inode *inode_in,
 				   unsigned int remap_flags)
 {
 	u64 blkmask = i_blocksize(inode_in) - 1;
+	loff_t new_len = *len;
 
 	if ((*len & blkmask) == 0)
 		return 0;
 
-	if (remap_flags & REMAP_FILE_DEDUP)
-		*len &= ~blkmask;
-	else if (pos_out + *len < i_size_read(inode_out))
-		return -EINVAL;
+	if ((remap_flags & REMAP_FILE_DEDUP) ||
+	    pos_out + *len < i_size_read(inode_out))
+		new_len &= ~blkmask;
 
-	return 0;
+	if (new_len == *len)
+		return 0;
+
+	if (remap_flags & REMAP_FILE_CAN_SHORTEN) {
+		*len = new_len;
+		return 0;
+	}
+
+	return (remap_flags & REMAP_FILE_DEDUP) ? -EBADE : -EINVAL;
 }
 
 /*
@@ -2014,7 +2025,8 @@ loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
 {
 	loff_t ret;
 
-	WARN_ON_ONCE(remap_flags & ~(REMAP_FILE_DEDUP));
+	WARN_ON_ONCE(remap_flags & ~(REMAP_FILE_DEDUP |
+				     REMAP_FILE_CAN_SHORTEN));
 
 	ret = mnt_want_write_file(dst_file);
 	if (ret)
@@ -2115,7 +2127,7 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
 
 		deduped = vfs_dedupe_file_range_one(file, off, dst_file,
 						    info->dest_offset, len,
-						    0);
+						    REMAP_FILE_CAN_SHORTEN);
 		if (deduped == -EBADE)
 			info->status = FILE_DEDUPE_RANGE_DIFFERS;
 		else if (deduped < 0)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 544ab5083b48..34c22d695011 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1727,8 +1727,10 @@ struct block_device_operations;
  * See Documentation/filesystems/vfs.txt for more details about this call.
  *
  * REMAP_FILE_DEDUP: only remap if contents identical (i.e. deduplicate)
+ * REMAP_FILE_CAN_SHORTEN: caller can handle a shortened request
  */
 #define REMAP_FILE_DEDUP		(1 << 0)
+#define REMAP_FILE_CAN_SHORTEN		(1 << 1)
 
 /*
  * These flags signal that the caller is ok with altering various aspects of
@@ -1736,9 +1738,8 @@ struct block_device_operations;
  * implementation; the vfs remap helper functions can take advantage of them.
  * Flags in this category exist to preserve the quirky behavior of the hoisted
  * btrfs clone/dedupe ioctls.
- * There are no flags yet, but subsequent commits will add some.
  */
-#define REMAP_FILE_ADVISORY		(0)
+#define REMAP_FILE_ADVISORY		(REMAP_FILE_CAN_SHORTEN)
 
 struct iov_iter;
 
diff --git a/mm/filemap.c b/mm/filemap.c
index e9091d731f84..1775d4ad3317 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -3045,8 +3045,7 @@ int generic_remap_checks(struct file *file_in, loff_t pos_in,
 		bcount = ALIGN(size_in, bs) - pos_in;
 	} else {
 		if (!IS_ALIGNED(count, bs))
-			return -EINVAL;
-
+			count = ALIGN_DOWN(count, bs);
 		bcount = count;
 	}
 
@@ -3056,10 +3055,14 @@ int generic_remap_checks(struct file *file_in, loff_t pos_in,
 	    pos_out < pos_in + bcount)
 		return -EINVAL;
 
-	/* For now we don't support changing the length. */
-	if (*req_count != count)
+	/*
+	 * We shortened the request but the caller can't deal with that, so
+	 * bounce the request back to userspace.
+	 */
+	if (*req_count != count && !(remap_flags & REMAP_FILE_CAN_SHORTEN))
 		return -EINVAL;
 
+	*req_count = count;
 	return 0;
 }
 

  parent reply	other threads:[~2018-10-21 16:16 UTC|newest]

Thread overview: 83+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-10-21 16:15 [Ocfs2-devel] [PATCH v6 00/28] fs: fixes for serious clone/dedupe problems Darrick J. Wong
2018-10-21 16:15 ` [Ocfs2-devel] [PATCH 01/28] vfs: vfs_clone_file_prep_inodes should return EINVAL for a clone from beyond EOF Darrick J. Wong
2018-10-21 16:15 ` [Ocfs2-devel] [PATCH 02/28] vfs: check file ranges before cloning files Darrick J. Wong
2018-10-21 16:15 ` [Ocfs2-devel] [PATCH 03/28] vfs: exit early from zero length remap operations Darrick J. Wong
2018-10-21 16:15 ` [Ocfs2-devel] [PATCH 04/28] vfs: strengthen checking of file range inputs to generic_remap_checks Darrick J. Wong
2018-10-21 16:15 ` [Ocfs2-devel] [PATCH 05/28] vfs: avoid problematic remapping requests into partial EOF block Darrick J. Wong
2018-10-21 16:15 ` [Ocfs2-devel] [PATCH 06/28] vfs: skip zero-length dedupe requests Darrick J. Wong
2018-10-21 16:15 ` [Ocfs2-devel] [PATCH 07/28] vfs: rename vfs_clone_file_prep to be more descriptive Darrick J. Wong
2018-10-21 16:15 ` [Ocfs2-devel] [PATCH 08/28] vfs: rename clone_verify_area to remap_verify_area Darrick J. Wong
2018-10-21 16:16 ` [Ocfs2-devel] [PATCH 09/28] vfs: combine the clone and dedupe into a single remap_file_range Darrick J. Wong
2018-10-21 16:16 ` [Ocfs2-devel] [PATCH 10/28] vfs: pass remap flags to generic_remap_file_range_prep Darrick J. Wong
2018-10-21 16:16 ` [Ocfs2-devel] [PATCH 11/28] vfs: pass remap flags to generic_remap_checks Darrick J. Wong
2018-10-21 16:16 ` [Ocfs2-devel] [PATCH 12/28] vfs: remap helper should update destination inode metadata Darrick J. Wong
2018-10-21 16:16 ` [Ocfs2-devel] [PATCH 13/28] vfs: make remap_file_range functions take and return bytes completed Darrick J. Wong
2018-10-21 16:16 ` [Ocfs2-devel] [PATCH 14/28] vfs: plumb remap flags through the vfs clone functions Darrick J. Wong
2018-10-21 16:16 ` [Ocfs2-devel] [PATCH 15/28] vfs: plumb remap flags through the vfs dedupe functions Darrick J. Wong
2018-10-21 16:16 ` Darrick J. Wong [this message]
2018-10-21 16:17 ` [Ocfs2-devel] [PATCH 17/28] vfs: hide file range comparison function Darrick J. Wong
2018-10-21 16:17 ` [Ocfs2-devel] [PATCH 18/28] vfs: clean up generic_remap_file_range_prep return value Darrick J. Wong
2018-10-21 16:17 ` [Ocfs2-devel] [PATCH 19/28] ocfs2: truncate page cache for clone destination file before remapping Darrick J. Wong
2018-10-21 16:17 ` [Ocfs2-devel] [PATCH 20/28] ocfs2: fix pagecache truncation prior to reflink Darrick J. Wong
2018-10-21 16:17 ` [Ocfs2-devel] [PATCH 21/28] ocfs2: support partial clone range and dedupe range Darrick J. Wong
2018-10-21 16:17 ` [Ocfs2-devel] [PATCH 22/28] ocfs2: remove ocfs2_reflink_remap_range Darrick J. Wong
2018-10-21 16:17 ` [Ocfs2-devel] [PATCH 23/28] xfs: fix pagecache truncation prior to reflink Darrick J. Wong
2018-10-21 16:17 ` [Ocfs2-devel] [PATCH 24/28] xfs: clean up xfs_reflink_remap_blocks call site Darrick J. Wong
2018-10-22  2:11   ` Dave Chinner
2018-10-21 16:17 ` [Ocfs2-devel] [PATCH 25/28] xfs: support returning partial reflink results Darrick J. Wong
2018-10-22  2:14   ` Dave Chinner
2018-10-21 16:18 ` [Ocfs2-devel] [PATCH 26/28] xfs: remove redundant remap partial EOF block checks Darrick J. Wong
2018-10-21 16:18 ` [Ocfs2-devel] [PATCH 27/28] xfs: remove xfs_reflink_remap_range Darrick J. Wong
2018-10-22  2:17   ` Dave Chinner
2018-10-21 16:18 ` [Ocfs2-devel] [PATCH 28/28] xfs: remove [cm]time update from reflink calls Darrick J. Wong
2018-10-22  2:18   ` Dave Chinner
2018-10-22  2:21 ` [Ocfs2-devel] [PATCH v6 00/28] fs: fixes for serious clone/dedupe problems Dave Chinner
2018-10-22  4:37   ` Dave Chinner
2018-10-22  4:52     ` Al Viro
2018-10-22  5:08       ` Dave Chinner
2018-10-22  5:42         ` Amir Goldstein
2018-10-22  6:55           ` [Ocfs2-devel] " Dave Chinner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=154013861584.29026.5158930020192209258.stgit@magnolia \
    --to=darrick.wong@oracle.com \
    --cc=amir73il@gmail.com \
    --cc=david@fromorbit.com \
    --cc=linux-btrfs@vger.kernel.org \
    --cc=linux-cifs@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-nfs@vger.kernel.org \
    --cc=linux-unionfs@vger.kernel.org \
    --cc=linux-xfs@vger.kernel.org \
    --cc=ocfs2-devel@oss.oracle.com \
    --cc=sandeen@redhat.com \
    --subject='Re: [PATCH 16/28] vfs: enable remap callers that can handle short operations' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.