All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Darrick J. Wong" <darrick.wong@oracle.com>
To: david@fromorbit.com, darrick.wong@oracle.com
Cc: sandeen@redhat.com, linux-nfs@vger.kernel.org,
	linux-cifs@vger.kernel.org, linux-unionfs@vger.kernel.org,
	linux-xfs@vger.kernel.org, linux-mm@kvack.org,
	linux-btrfs@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	ocfs2-devel@oss.oracle.com
Subject: [PATCH 20/25] vfs: implement opportunistic short dedupe
Date: Tue, 09 Oct 2018 17:14:17 -0700	[thread overview]
Message-ID: <153913045787.32295.7018909865132108315.stgit@magnolia> (raw)
In-Reply-To: <153913023835.32295.13962696655740190941.stgit@magnolia>

From: Darrick J. Wong <darrick.wong@oracle.com>

For a given dedupe request, the bytes_deduped field in the control
structure tells userspace if we managed to deduplicate some, but not all
of, the requested regions starting from the file offsets supplied.
However, due to sloppy coding, the current dedupe code returns
FILE_DEDUPE_RANGE_DIFFERS if any part of the range is different.
Fix this so that we can actually support partial request completion.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/read_write.c |   59 +++++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 48 insertions(+), 11 deletions(-)


diff --git a/fs/read_write.c b/fs/read_write.c
index 57627202bd50..8be3c3add030 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1737,13 +1737,26 @@ static struct page *vfs_dedupe_get_page(struct inode *inode, loff_t offset)
 	return page;
 }
 
+static unsigned int vfs_dedupe_memcmp(const char *s1, const char *s2,
+				      unsigned int len)
+{
+	const char *orig_s1;
+
+	for (orig_s1 = s1; len > 0; s1++, s2++, len--)
+		if (*s1 != *s2)
+			break;
+
+	return s1 - orig_s1;
+}
+
 /*
  * Compare extents of two files to see if they are the same.
  * Caller must have locked both inodes to prevent write races.
  */
 static int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
 					 struct inode *dest, loff_t destoff,
-					 loff_t len, bool *is_same)
+					 loff_t *req_len,
+					 unsigned int remap_flags)
 {
 	loff_t src_poff;
 	loff_t dest_poff;
@@ -1751,8 +1764,11 @@ static int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
 	void *dest_addr;
 	struct page *src_page;
 	struct page *dest_page;
-	loff_t cmp_len;
+	loff_t len = *req_len;
+	loff_t same_len = 0;
 	bool same;
+	unsigned int cmp_len;
+	unsigned int cmp_same;
 	int error;
 
 	error = -EINVAL;
@@ -1762,7 +1778,7 @@ static int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
 		dest_poff = destoff & (PAGE_SIZE - 1);
 		cmp_len = min(PAGE_SIZE - src_poff,
 			      PAGE_SIZE - dest_poff);
-		cmp_len = min(cmp_len, len);
+		cmp_len = min_t(loff_t, cmp_len, len);
 		if (cmp_len <= 0)
 			goto out_error;
 
@@ -1784,7 +1800,10 @@ static int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
 		flush_dcache_page(src_page);
 		flush_dcache_page(dest_page);
 
-		if (memcmp(src_addr + src_poff, dest_addr + dest_poff, cmp_len))
+		cmp_same = vfs_dedupe_memcmp(src_addr + src_poff,
+					     dest_addr + dest_poff, cmp_len);
+		same_len += cmp_same;
+		if (cmp_same != cmp_len)
 			same = false;
 
 		kunmap_atomic(dest_addr);
@@ -1802,7 +1821,17 @@ static int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
 		len -= cmp_len;
 	}
 
-	*is_same = same;
+	/*
+	 * If less than the whole range matched, we have to back down to the
+	 * nearest block boundary.
+	 */
+	if (*req_len != same_len) {
+		if (!(remap_flags & RFR_CAN_SHORTEN))
+			return -EINVAL;
+
+		*req_len = ALIGN_DOWN(same_len, dest->i_sb->s_blocksize);
+	}
+
 	return 0;
 
 out_error:
@@ -1879,13 +1908,11 @@ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
 	 * Check that the extents are the same.
 	 */
 	if (is_dedupe) {
-		bool		is_same = false;
-
 		ret = vfs_dedupe_file_range_compare(inode_in, pos_in,
-				inode_out, pos_out, *len, &is_same);
+				inode_out, pos_out, len, remap_flags);
 		if (ret)
 			return ret;
-		if (!is_same)
+		if (*len == 0)
 			return -EBADE;
 	}
 
@@ -1988,7 +2015,7 @@ loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
 {
 	loff_t ret;
 
-	WARN_ON_ONCE(remap_flags & ~(RFR_IDENTICAL_DATA));
+	WARN_ON_ONCE(remap_flags & ~(RFR_IDENTICAL_DATA | RFR_CAN_SHORTEN));
 
 	ret = mnt_want_write_file(dst_file);
 	if (ret)
@@ -2037,6 +2064,7 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
 	int i;
 	int ret;
 	u16 count = same->dest_count;
+	unsigned int remap_flags = 0;
 	loff_t deduped;
 
 	if (!(file->f_mode & FMODE_READ))
@@ -2073,6 +2101,15 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
 		same->info[i].status = FILE_DEDUPE_RANGE_SAME;
 	}
 
+	/*
+	 * We can't allow the dedupe implementation to shorten the request if
+	 * there are multiple dedupe candidates because each candidate might
+	 * shorten the request by a different amount due to EOF and allocation
+	 * block size mismatches.
+	 */
+	if (count == 1)
+		remap_flags |= RFR_CAN_SHORTEN;
+
 	for (i = 0, info = same->info; i < count; i++, info++) {
 		struct fd dst_fd = fdget(info->dest_fd);
 		struct file *dst_file = dst_fd.file;
@@ -2089,7 +2126,7 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
 
 		deduped = vfs_dedupe_file_range_one(file, off, dst_file,
 						    info->dest_offset, len,
-						    0);
+						    remap_flags);
 		if (deduped == -EBADE)
 			info->status = FILE_DEDUPE_RANGE_DIFFERS;
 		else if (deduped < 0)

WARNING: multiple messages have this Message-ID (diff)
From: Darrick J. Wong <darrick.wong@oracle.com>
To: david@fromorbit.com, darrick.wong@oracle.com
Cc: sandeen@redhat.com, linux-nfs@vger.kernel.org,
	linux-cifs@vger.kernel.org, linux-unionfs@vger.kernel.org,
	linux-xfs@vger.kernel.org, linux-mm@kvack.org,
	linux-btrfs@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	ocfs2-devel@oss.oracle.com
Subject: [Ocfs2-devel] [PATCH 20/25] vfs: implement opportunistic short dedupe
Date: Tue, 09 Oct 2018 17:14:17 -0700	[thread overview]
Message-ID: <153913045787.32295.7018909865132108315.stgit@magnolia> (raw)
In-Reply-To: <153913023835.32295.13962696655740190941.stgit@magnolia>

From: Darrick J. Wong <darrick.wong@oracle.com>

For a given dedupe request, the bytes_deduped field in the control
structure tells userspace if we managed to deduplicate some, but not all
of, the requested regions starting from the file offsets supplied.
However, due to sloppy coding, the current dedupe code returns
FILE_DEDUPE_RANGE_DIFFERS if any part of the range is different.
Fix this so that we can actually support partial request completion.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/read_write.c |   59 +++++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 48 insertions(+), 11 deletions(-)


diff --git a/fs/read_write.c b/fs/read_write.c
index 57627202bd50..8be3c3add030 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1737,13 +1737,26 @@ static struct page *vfs_dedupe_get_page(struct inode *inode, loff_t offset)
 	return page;
 }
 
+static unsigned int vfs_dedupe_memcmp(const char *s1, const char *s2,
+				      unsigned int len)
+{
+	const char *orig_s1;
+
+	for (orig_s1 = s1; len > 0; s1++, s2++, len--)
+		if (*s1 != *s2)
+			break;
+
+	return s1 - orig_s1;
+}
+
 /*
  * Compare extents of two files to see if they are the same.
  * Caller must have locked both inodes to prevent write races.
  */
 static int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
 					 struct inode *dest, loff_t destoff,
-					 loff_t len, bool *is_same)
+					 loff_t *req_len,
+					 unsigned int remap_flags)
 {
 	loff_t src_poff;
 	loff_t dest_poff;
@@ -1751,8 +1764,11 @@ static int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
 	void *dest_addr;
 	struct page *src_page;
 	struct page *dest_page;
-	loff_t cmp_len;
+	loff_t len = *req_len;
+	loff_t same_len = 0;
 	bool same;
+	unsigned int cmp_len;
+	unsigned int cmp_same;
 	int error;
 
 	error = -EINVAL;
@@ -1762,7 +1778,7 @@ static int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
 		dest_poff = destoff & (PAGE_SIZE - 1);
 		cmp_len = min(PAGE_SIZE - src_poff,
 			      PAGE_SIZE - dest_poff);
-		cmp_len = min(cmp_len, len);
+		cmp_len = min_t(loff_t, cmp_len, len);
 		if (cmp_len <= 0)
 			goto out_error;
 
@@ -1784,7 +1800,10 @@ static int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
 		flush_dcache_page(src_page);
 		flush_dcache_page(dest_page);
 
-		if (memcmp(src_addr + src_poff, dest_addr + dest_poff, cmp_len))
+		cmp_same = vfs_dedupe_memcmp(src_addr + src_poff,
+					     dest_addr + dest_poff, cmp_len);
+		same_len += cmp_same;
+		if (cmp_same != cmp_len)
 			same = false;
 
 		kunmap_atomic(dest_addr);
@@ -1802,7 +1821,17 @@ static int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
 		len -= cmp_len;
 	}
 
-	*is_same = same;
+	/*
+	 * If less than the whole range matched, we have to back down to the
+	 * nearest block boundary.
+	 */
+	if (*req_len != same_len) {
+		if (!(remap_flags & RFR_CAN_SHORTEN))
+			return -EINVAL;
+
+		*req_len = ALIGN_DOWN(same_len, dest->i_sb->s_blocksize);
+	}
+
 	return 0;
 
 out_error:
@@ -1879,13 +1908,11 @@ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
 	 * Check that the extents are the same.
 	 */
 	if (is_dedupe) {
-		bool		is_same = false;
-
 		ret = vfs_dedupe_file_range_compare(inode_in, pos_in,
-				inode_out, pos_out, *len, &is_same);
+				inode_out, pos_out, len, remap_flags);
 		if (ret)
 			return ret;
-		if (!is_same)
+		if (*len == 0)
 			return -EBADE;
 	}
 
@@ -1988,7 +2015,7 @@ loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
 {
 	loff_t ret;
 
-	WARN_ON_ONCE(remap_flags & ~(RFR_IDENTICAL_DATA));
+	WARN_ON_ONCE(remap_flags & ~(RFR_IDENTICAL_DATA | RFR_CAN_SHORTEN));
 
 	ret = mnt_want_write_file(dst_file);
 	if (ret)
@@ -2037,6 +2064,7 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
 	int i;
 	int ret;
 	u16 count = same->dest_count;
+	unsigned int remap_flags = 0;
 	loff_t deduped;
 
 	if (!(file->f_mode & FMODE_READ))
@@ -2073,6 +2101,15 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
 		same->info[i].status = FILE_DEDUPE_RANGE_SAME;
 	}
 
+	/*
+	 * We can't allow the dedupe implementation to shorten the request if
+	 * there are multiple dedupe candidates because each candidate might
+	 * shorten the request by a different amount due to EOF and allocation
+	 * block size mismatches.
+	 */
+	if (count == 1)
+		remap_flags |= RFR_CAN_SHORTEN;
+
 	for (i = 0, info = same->info; i < count; i++, info++) {
 		struct fd dst_fd = fdget(info->dest_fd);
 		struct file *dst_file = dst_fd.file;
@@ -2089,7 +2126,7 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
 
 		deduped = vfs_dedupe_file_range_one(file, off, dst_file,
 						    info->dest_offset, len,
-						    0);
+						    remap_flags);
 		if (deduped == -EBADE)
 			info->status = FILE_DEDUPE_RANGE_DIFFERS;
 		else if (deduped < 0)

  parent reply	other threads:[~2018-10-10  0:14 UTC|newest]

Thread overview: 84+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-10-10  0:10 [PATCH v2 00/25] fs: fixes for serious clone/dedupe problems Darrick J. Wong
2018-10-10  0:10 ` [Ocfs2-devel] " Darrick J. Wong
2018-10-10  0:10 ` [PATCH 01/25] xfs: add a per-xfs trace_printk macro Darrick J. Wong
2018-10-10  0:10   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-10  0:36   ` Dave Chinner
2018-10-10  0:36     ` [Ocfs2-devel] " Dave Chinner
2018-10-10 15:00   ` [PATCH v2 " Darrick J. Wong
2018-10-10 15:00     ` [Ocfs2-devel] " Darrick J. Wong
2018-10-10  0:10 ` [PATCH 02/25] xfs: refactor clonerange preparation into a separate helper Darrick J. Wong
2018-10-10  0:10   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-10  0:10 ` [PATCH 03/25] xfs: zero posteof blocks when cloning above eof Darrick J. Wong
2018-10-10  0:10   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-10  0:11 ` [PATCH 04/25] xfs: update ctime and remove suid before cloning files Darrick J. Wong
2018-10-10  0:11   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-10  0:11 ` [PATCH 05/25] vfs: check file ranges " Darrick J. Wong
2018-10-10  0:11   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-10 23:06   ` Dave Chinner
2018-10-10 23:06     ` [Ocfs2-devel] " Dave Chinner
2018-10-10 23:13     ` Darrick J. Wong
2018-10-10 23:13       ` [Ocfs2-devel] " Darrick J. Wong
2018-10-10  0:11 ` [PATCH 06/25] vfs: strengthen checking of file range inputs to generic_remap_checks Darrick J. Wong
2018-10-10  0:11   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-10  5:23   ` Amir Goldstein
2018-10-10 17:01     ` Darrick J. Wong
2018-10-10 17:01       ` [Ocfs2-devel] " Darrick J. Wong
2018-10-10 17:26       ` Amir Goldstein
2018-10-10  0:11 ` [PATCH 07/25] vfs: skip zero-length dedupe requests Darrick J. Wong
2018-10-10  0:11   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-10  0:11 ` [PATCH 08/25] vfs: combine the clone and dedupe into a single remap_file_range Darrick J. Wong
2018-10-10  0:11   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-10  5:54   ` Amir Goldstein
2018-10-10 15:13     ` Darrick J. Wong
2018-10-10 15:13       ` [Ocfs2-devel] " Darrick J. Wong
2018-10-10 15:23       ` Amir Goldstein
2018-10-10  0:11 ` [PATCH 09/25] vfs: rename vfs_clone_file_prep to be more descriptive Darrick J. Wong
2018-10-10  0:11   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-10  0:11 ` [PATCH 10/25] vfs: rename clone_verify_area to remap_verify_area Darrick J. Wong
2018-10-10  0:11   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-10  0:13 ` [PATCH 11/25] vfs: create generic_remap_file_range_touch to update inode metadata Darrick J. Wong
2018-10-10  0:13   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-10  0:13 ` [PATCH 12/25] vfs: pass remap flags to generic_remap_file_range_prep Darrick J. Wong
2018-10-10  0:13   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-10  0:13 ` [PATCH 13/25] vfs: pass remap flags to generic_remap_checks Darrick J. Wong
2018-10-10  0:13   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-10  0:13 ` [PATCH 14/25] vfs: make remap_file_range functions take and return bytes completed Darrick J. Wong
2018-10-10  0:13   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-10  6:47   ` Amir Goldstein
2018-10-10 15:50     ` Darrick J. Wong
2018-10-10 15:50       ` [Ocfs2-devel] " Darrick J. Wong
2018-10-10 18:28       ` Amir Goldstein
2018-10-10 18:32         ` Darrick J. Wong
2018-10-10 18:32           ` [Ocfs2-devel] " Darrick J. Wong
2018-10-10  0:13 ` [PATCH 15/25] vfs: plumb RFR_* remap flags through the vfs clone functions Darrick J. Wong
2018-10-10  0:13   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-10  6:22   ` Amir Goldstein
2018-10-10  6:39     ` Amir Goldstein
2018-10-10  0:13 ` [PATCH 16/25] vfs: plumb RFR_* remap flags through the vfs dedupe functions Darrick J. Wong
2018-10-10  0:13   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-10  0:13 ` [PATCH 17/25] vfs: make remapping to source file eof more explicit Darrick J. Wong
2018-10-10  0:13   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-10 12:29   ` Amir Goldstein
2018-10-10 16:29     ` Darrick J. Wong
2018-10-10 16:29       ` [Ocfs2-devel] " Darrick J. Wong
2018-10-10 17:31       ` Amir Goldstein
2018-10-10  0:14 ` [PATCH 18/25] vfs: enable remap callers that can handle short operations Darrick J. Wong
2018-10-10  0:14   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-10  0:14 ` [PATCH 19/25] vfs: hide file range comparison function Darrick J. Wong
2018-10-10  0:14   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-10  0:14 ` Darrick J. Wong [this message]
2018-10-10  0:14   ` [Ocfs2-devel] [PATCH 20/25] vfs: implement opportunistic short dedupe Darrick J. Wong
2018-10-10  0:14 ` [PATCH 21/25] ocfs2: truncate page cache for clone destination file before remapping Darrick J. Wong
2018-10-10  0:14   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-10  0:14 ` [PATCH 22/25] ocfs2: fix pagecache truncation prior to reflink Darrick J. Wong
2018-10-10  0:14   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-10  0:14 ` [PATCH 23/25] ocfs2: support partial clone range and dedupe range Darrick J. Wong
2018-10-10  0:14   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-10  0:14 ` [PATCH 24/25] xfs: fix pagecache truncation prior to reflink Darrick J. Wong
2018-10-10  0:14   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-10  0:14 ` [PATCH 25/25] xfs: support returning partial reflink results Darrick J. Wong
2018-10-10  0:14   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-10  1:02 ` [PATCH v2 00/25] fs: fixes for serious clone/dedupe problems Dave Chinner
2018-10-10  1:02   ` [Ocfs2-devel] " Dave Chinner
2018-10-10  1:06   ` Darrick J. Wong
2018-10-10  1:06     ` [Ocfs2-devel] " Darrick J. Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=153913045787.32295.7018909865132108315.stgit@magnolia \
    --to=darrick.wong@oracle.com \
    --cc=david@fromorbit.com \
    --cc=linux-btrfs@vger.kernel.org \
    --cc=linux-cifs@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-nfs@vger.kernel.org \
    --cc=linux-unionfs@vger.kernel.org \
    --cc=linux-xfs@vger.kernel.org \
    --cc=ocfs2-devel@oss.oracle.com \
    --cc=sandeen@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.