All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Darrick J. Wong" <darrick.wong@oracle.com>
To: david@fromorbit.com, darrick.wong@oracle.com
Cc: linux-xfs@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	linux-btrfs@vger.kernel.org, ocfs2-devel@oss.oracle.com,
	sandeen@redhat.com
Subject: [PATCH 12/15] vfs: implement opportunistic short dedupe
Date: Thu, 04 Oct 2018 17:46:01 -0700	[thread overview]
Message-ID: <153870036143.29072.11970142092673351715.stgit@magnolia> (raw)
In-Reply-To: <153870027422.29072.7433543674436957232.stgit@magnolia>

From: Darrick J. Wong <darrick.wong@oracle.com>

For a given dedupe request, the bytes_deduped field in the control
structure tells userspace if we managed to deduplicate some, but not all
of, the requested regions starting from the file offsets supplied.
However, due to sloppy coding, the current dedupe code returns
FILE_DEDUPE_RANGE_DIFFERS if any part of the range is different.
Fix this so that we can actually support partial request completion.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/read_write.c    |   44 +++++++++++++++++++++++++++++++++++---------
 include/linux/fs.h |    2 +-
 2 files changed, 36 insertions(+), 10 deletions(-)


diff --git a/fs/read_write.c b/fs/read_write.c
index 292d68c2f47c..9be9f261edd2 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1781,13 +1781,11 @@ int vfs_clone_file_prep(struct file *file_in, loff_t pos_in,
 	 * Check that the extents are the same.
 	 */
 	if (is_dedupe) {
-		bool		is_same = false;
-
 		ret = vfs_dedupe_file_range_compare(inode_in, pos_in,
-				inode_out, pos_out, *len, &is_same);
+				inode_out, pos_out, len);
 		if (ret)
 			return ret;
-		if (!is_same)
+		if (*len == 0)
 			return -EBADE;
 	}
 
@@ -1872,13 +1870,30 @@ static struct page *vfs_dedupe_get_page(struct inode *inode, loff_t offset)
 	return page;
 }
 
+static unsigned int vfs_dedupe_memcmp(const char *s1, const char *s2,
+				      unsigned int cmp_len)
+{
+	const char *orig_s1 = s1;
+	const char *e1 = s1 + cmp_len;
+	const char *e2 = s2 + cmp_len;
+
+	while (s1 < e1 && s2 < e2) {
+		if (*s1 != *s2)
+			break;
+		s1++;
+		s2++;
+	}
+
+	return s1 - orig_s1;
+}
+
 /*
  * Compare extents of two files to see if they are the same.
  * Caller must have locked both inodes to prevent write races.
  */
 int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
 				  struct inode *dest, loff_t destoff,
-				  loff_t len, bool *is_same)
+				  loff_t *req_len)
 {
 	loff_t src_poff;
 	loff_t dest_poff;
@@ -1886,8 +1901,11 @@ int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
 	void *dest_addr;
 	struct page *src_page;
 	struct page *dest_page;
-	loff_t cmp_len;
+	loff_t len = *req_len;
+	loff_t same_len = 0;
 	bool same;
+	unsigned int cmp_len;
+	unsigned int cmp_same;
 	int error;
 
 	error = -EINVAL;
@@ -1897,7 +1915,7 @@ int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
 		dest_poff = destoff & (PAGE_SIZE - 1);
 		cmp_len = min(PAGE_SIZE - src_poff,
 			      PAGE_SIZE - dest_poff);
-		cmp_len = min(cmp_len, len);
+		cmp_len = min_t(loff_t, cmp_len, len);
 		if (cmp_len <= 0)
 			goto out_error;
 
@@ -1919,7 +1937,10 @@ int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
 		flush_dcache_page(src_page);
 		flush_dcache_page(dest_page);
 
-		if (memcmp(src_addr + src_poff, dest_addr + dest_poff, cmp_len))
+		cmp_same = vfs_dedupe_memcmp(src_addr + src_poff,
+					     dest_addr + dest_poff, cmp_len);
+		same_len += cmp_same;
+		if (cmp_same != cmp_len)
 			same = false;
 
 		kunmap_atomic(dest_addr);
@@ -1937,7 +1958,12 @@ int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
 		len -= cmp_len;
 	}
 
-	*is_same = same;
+	/*
+	 * If less than the whole range matched, we have to back down to the
+	 * nearest block boundary.
+	 */
+	if (*req_len != same_len)
+		*req_len = ALIGN_DOWN(same_len, dest->i_sb->s_blocksize);
 	return 0;
 
 out_error:
diff --git a/include/linux/fs.h b/include/linux/fs.h
index eb35363478e5..490128b84d10 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1840,7 +1840,7 @@ extern int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
 		struct file *file_out, loff_t pos_out, u64 len);
 extern int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
 					 struct inode *dest, loff_t destoff,
-					 loff_t len, bool *is_same);
+					 loff_t *len);
 extern int vfs_dedupe_file_range(struct file *file,
 				 struct file_dedupe_range *same);
 extern s64 vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,


WARNING: multiple messages have this Message-ID (diff)
From: Darrick J. Wong <darrick.wong@oracle.com>
To: david@fromorbit.com, darrick.wong@oracle.com
Cc: linux-xfs@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	linux-btrfs@vger.kernel.org, ocfs2-devel@oss.oracle.com,
	sandeen@redhat.com
Subject: [Ocfs2-devel] [PATCH 12/15] vfs: implement opportunistic short dedupe
Date: Thu, 04 Oct 2018 17:46:01 -0700	[thread overview]
Message-ID: <153870036143.29072.11970142092673351715.stgit@magnolia> (raw)
In-Reply-To: <153870027422.29072.7433543674436957232.stgit@magnolia>

From: Darrick J. Wong <darrick.wong@oracle.com>

For a given dedupe request, the bytes_deduped field in the control
structure tells userspace if we managed to deduplicate some, but not all
of, the requested regions starting from the file offsets supplied.
However, due to sloppy coding, the current dedupe code returns
FILE_DEDUPE_RANGE_DIFFERS if any part of the range is different.
Fix this so that we can actually support partial request completion.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/read_write.c    |   44 +++++++++++++++++++++++++++++++++++---------
 include/linux/fs.h |    2 +-
 2 files changed, 36 insertions(+), 10 deletions(-)


diff --git a/fs/read_write.c b/fs/read_write.c
index 292d68c2f47c..9be9f261edd2 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1781,13 +1781,11 @@ int vfs_clone_file_prep(struct file *file_in, loff_t pos_in,
 	 * Check that the extents are the same.
 	 */
 	if (is_dedupe) {
-		bool		is_same = false;
-
 		ret = vfs_dedupe_file_range_compare(inode_in, pos_in,
-				inode_out, pos_out, *len, &is_same);
+				inode_out, pos_out, len);
 		if (ret)
 			return ret;
-		if (!is_same)
+		if (*len == 0)
 			return -EBADE;
 	}
 
@@ -1872,13 +1870,30 @@ static struct page *vfs_dedupe_get_page(struct inode *inode, loff_t offset)
 	return page;
 }
 
+static unsigned int vfs_dedupe_memcmp(const char *s1, const char *s2,
+				      unsigned int cmp_len)
+{
+	const char *orig_s1 = s1;
+	const char *e1 = s1 + cmp_len;
+	const char *e2 = s2 + cmp_len;
+
+	while (s1 < e1 && s2 < e2) {
+		if (*s1 != *s2)
+			break;
+		s1++;
+		s2++;
+	}
+
+	return s1 - orig_s1;
+}
+
 /*
  * Compare extents of two files to see if they are the same.
  * Caller must have locked both inodes to prevent write races.
  */
 int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
 				  struct inode *dest, loff_t destoff,
-				  loff_t len, bool *is_same)
+				  loff_t *req_len)
 {
 	loff_t src_poff;
 	loff_t dest_poff;
@@ -1886,8 +1901,11 @@ int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
 	void *dest_addr;
 	struct page *src_page;
 	struct page *dest_page;
-	loff_t cmp_len;
+	loff_t len = *req_len;
+	loff_t same_len = 0;
 	bool same;
+	unsigned int cmp_len;
+	unsigned int cmp_same;
 	int error;
 
 	error = -EINVAL;
@@ -1897,7 +1915,7 @@ int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
 		dest_poff = destoff & (PAGE_SIZE - 1);
 		cmp_len = min(PAGE_SIZE - src_poff,
 			      PAGE_SIZE - dest_poff);
-		cmp_len = min(cmp_len, len);
+		cmp_len = min_t(loff_t, cmp_len, len);
 		if (cmp_len <= 0)
 			goto out_error;
 
@@ -1919,7 +1937,10 @@ int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
 		flush_dcache_page(src_page);
 		flush_dcache_page(dest_page);
 
-		if (memcmp(src_addr + src_poff, dest_addr + dest_poff, cmp_len))
+		cmp_same = vfs_dedupe_memcmp(src_addr + src_poff,
+					     dest_addr + dest_poff, cmp_len);
+		same_len += cmp_same;
+		if (cmp_same != cmp_len)
 			same = false;
 
 		kunmap_atomic(dest_addr);
@@ -1937,7 +1958,12 @@ int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
 		len -= cmp_len;
 	}
 
-	*is_same = same;
+	/*
+	 * If less than the whole range matched, we have to back down to the
+	 * nearest block boundary.
+	 */
+	if (*req_len != same_len)
+		*req_len = ALIGN_DOWN(same_len, dest->i_sb->s_blocksize);
 	return 0;
 
 out_error:
diff --git a/include/linux/fs.h b/include/linux/fs.h
index eb35363478e5..490128b84d10 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1840,7 +1840,7 @@ extern int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
 		struct file *file_out, loff_t pos_out, u64 len);
 extern int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
 					 struct inode *dest, loff_t destoff,
-					 loff_t len, bool *is_same);
+					 loff_t *len);
 extern int vfs_dedupe_file_range(struct file *file,
 				 struct file_dedupe_range *same);
 extern s64 vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,

  parent reply	other threads:[~2018-10-05  0:46 UTC|newest]

Thread overview: 82+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-10-05  0:44 [PATCH 00/15] fs: fixes for serious clone/dedupe problems Darrick J. Wong
2018-10-05  0:44 ` [Ocfs2-devel] " Darrick J. Wong
2018-10-05  0:44 ` [PATCH 01/15] xfs: add a per-xfs trace_printk macro Darrick J. Wong
2018-10-05  0:44   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-05  0:44 ` [PATCH 02/15] xfs: refactor clonerange preparation into a separate helper Darrick J. Wong
2018-10-05  0:44   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-05  5:28   ` Dave Chinner
2018-10-05  5:28     ` [Ocfs2-devel] " Dave Chinner
2018-10-05 17:06     ` Darrick J. Wong
2018-10-05 17:06       ` [Ocfs2-devel] " Darrick J. Wong
2018-10-06 10:30     ` Christoph Hellwig
2018-10-06 10:30       ` [Ocfs2-devel] " Christoph Hellwig
2018-10-05  7:02   ` Dave Chinner
2018-10-05  7:02     ` [Ocfs2-devel] " Dave Chinner
2018-10-05  9:02     ` Dave Chinner
2018-10-05  9:02       ` [Ocfs2-devel] " Dave Chinner
2018-10-05 17:21       ` Darrick J. Wong
2018-10-05 17:21         ` [Ocfs2-devel] " Darrick J. Wong
2018-10-05 23:42         ` Dave Chinner
2018-10-05 23:42           ` [Ocfs2-devel] " Dave Chinner
2018-10-05  0:44 ` [PATCH 03/15] xfs: zero posteof blocks when cloning above eof Darrick J. Wong
2018-10-05  0:44   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-05  5:28   ` Dave Chinner
2018-10-05  5:28     ` [Ocfs2-devel] " Dave Chinner
2018-10-06 10:34   ` Christoph Hellwig
2018-10-06 10:34     ` [Ocfs2-devel] " Christoph Hellwig
2018-10-05  0:45 ` [PATCH 04/15] xfs: update ctime and remove suid before cloning files Darrick J. Wong
2018-10-05  0:45   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-05  5:30   ` Dave Chinner
2018-10-05  5:30     ` [Ocfs2-devel] " Dave Chinner
2018-10-06 10:35   ` Christoph Hellwig
2018-10-06 10:35     ` [Ocfs2-devel] " Christoph Hellwig
2018-10-05  0:45 ` [PATCH 05/15] vfs: check file ranges " Darrick J. Wong
2018-10-05  0:45   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-06 10:38   ` Christoph Hellwig
2018-10-06 10:38     ` [Ocfs2-devel] " Christoph Hellwig
2018-10-05  0:45 ` [PATCH 06/15] vfs: strengthen checking of file range inputs to clone/dedupe range Darrick J. Wong
2018-10-05  0:45   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-05  6:10   ` Amir Goldstein
2018-10-05 17:36     ` Darrick J. Wong
2018-10-05 17:36       ` [Ocfs2-devel] " Darrick J. Wong
2018-10-05  0:45 ` [PATCH 07/15] vfs: skip zero-length dedupe requests Darrick J. Wong
2018-10-05  0:45   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-05  8:39   ` Amir Goldstein
2018-10-06 10:39   ` Christoph Hellwig
2018-10-06 10:39     ` [Ocfs2-devel] " Christoph Hellwig
2018-10-05  0:45 ` [PATCH 08/15] vfs: change clone and dedupe range function pointers to return bytes completed Darrick J. Wong
2018-10-05  0:45   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-05  8:06   ` Amir Goldstein
2018-10-05 21:47     ` Darrick J. Wong
2018-10-05 21:47       ` [Ocfs2-devel] " Darrick J. Wong
2018-10-06 10:41   ` Christoph Hellwig
2018-10-06 10:41     ` [Ocfs2-devel] " Christoph Hellwig
2018-10-08 18:59     ` Darrick J. Wong
2018-10-08 18:59       ` [Ocfs2-devel] " Darrick J. Wong
2018-10-05  0:45 ` [PATCH 09/15] vfs: pass operation flags to {clone, dedupe}_file_range implementations Darrick J. Wong
2018-10-05  0:45   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-05  7:07   ` Amir Goldstein
2018-10-05 17:50     ` Darrick J. Wong
2018-10-05 17:50       ` [Ocfs2-devel] " Darrick J. Wong
2018-10-06 10:44       ` Christoph Hellwig
2018-10-06 10:44         ` [Ocfs2-devel] " Christoph Hellwig
2018-10-05  0:45 ` [PATCH 10/15] vfs: make cloning to source file eof more explicit Darrick J. Wong
2018-10-05  0:45   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-05  6:47   ` Amir Goldstein
2018-10-05  0:45 ` [PATCH 11/15] vfs: allow short clone and dedupe operations Darrick J. Wong
2018-10-05  0:45   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-05  0:46 ` Darrick J. Wong [this message]
2018-10-05  0:46   ` [Ocfs2-devel] [PATCH 12/15] vfs: implement opportunistic short dedupe Darrick J. Wong
2018-10-05  6:40   ` Amir Goldstein
2018-10-05 17:42     ` Darrick J. Wong
2018-10-05 17:42       ` [Ocfs2-devel] " Darrick J. Wong
2018-10-05  0:46 ` [PATCH 13/15] ocfs2: truncate page cache for clone destination file before remapping Darrick J. Wong
2018-10-05  0:46   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-05  0:46 ` [PATCH 14/15] ocfs2: support partial clone range and dedupe range Darrick J. Wong
2018-10-05  0:46   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-05  0:46 ` [PATCH 15/15] xfs: support returning partial reflink results Darrick J. Wong
2018-10-05  0:46   ` [Ocfs2-devel] " Darrick J. Wong
2018-10-05  1:17 ` [PATCH 00/15] fs: fixes for serious clone/dedupe problems Dave Chinner
2018-10-05  1:17   ` [Ocfs2-devel] " Dave Chinner
2018-10-05  1:24   ` Darrick J. Wong
2018-10-05  1:24     ` [Ocfs2-devel] " Darrick J. Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=153870036143.29072.11970142092673351715.stgit@magnolia \
    --to=darrick.wong@oracle.com \
    --cc=david@fromorbit.com \
    --cc=linux-btrfs@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-xfs@vger.kernel.org \
    --cc=ocfs2-devel@oss.oracle.com \
    --cc=sandeen@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.