All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Darrick J. Wong" <darrick.wong@oracle.com>
To: david@fromorbit.com, darrick.wong@oracle.com
Cc: linux-fsdevel@vger.kernel.org, xfs@oss.sgi.com
Subject: [PATCH 52/58] xfs: emulate the btrfs dedupe extent same ioctl
Date: Tue, 06 Oct 2015 22:00:51 -0700	[thread overview]
Message-ID: <20151007050051.30457.45420.stgit@birch.djwong.org> (raw)
In-Reply-To: <20151007045443.30457.47038.stgit@birch.djwong.org>

Emulate the BTRFS_IOC_EXTENT_SAME ioctl.  This operation is similar
to clone_range, but the kernel must confirm that the contents of the
two extents are identical before performing the reflink.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_fs.h |   30 ++++++++++++
 fs/xfs/xfs_ioctl.c     |  124 ++++++++++++++++++++++++++++++++++++++++++++++--
 fs/xfs/xfs_ioctl32.c   |    1 
 fs/xfs/xfs_reflink.c   |  120 ++++++++++++++++++++++++++++++++++++++++++++++
 fs/xfs/xfs_reflink.h   |    6 ++
 5 files changed, 275 insertions(+), 6 deletions(-)


diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index 2c8cd04..c63afd4 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -570,8 +570,38 @@ struct xfs_clone_args {
 	__u64 dest_offset;
 };
 
+/* extent-same (dedupe) ioctls; these MUST match the btrfs ioctl definitions */
+#define XFS_EXTENT_DATA_SAME	0
+#define XFS_EXTENT_DATA_DIFFERS	1
+
+/* from struct btrfs_ioctl_file_extent_same_info */
+struct xfs_extent_data_info {
+	__s64 fd;		/* in - destination file */
+	__u64 logical_offset;	/* in - start of extent in destination */
+	__u64 bytes_deduped;	/* out - total # of bytes we were able
+				 * to dedupe from this file */
+	/* status of this dedupe operation:
+	 * 0 if dedup succeeds
+	 * < 0 for error
+	 * == XFS_SAME_DATA_DIFFERS if data differs
+	 */
+	__s32 status;		/* out - see above description */
+	__u32 reserved;
+};
+
+/* from struct btrfs_ioctl_file_extent_same_args */
+struct xfs_extent_data {
+	__u64 logical_offset;	/* in - start of extent in source */
+	__u64 length;		/* in - length of extent */
+	__u16 dest_count;	/* in - total elements in info array */
+	__u16 reserved1;
+	__u32 reserved2;
+	struct xfs_extent_data_info info[0];
+};
+
 #define XFS_IOC_CLONE		 _IOW (0x94, 9, int)
 #define XFS_IOC_CLONE_RANGE	 _IOW (0x94, 13, struct xfs_clone_args)
+#define XFS_IOC_FILE_EXTENT_SAME _IOWR(0x94, 54, struct xfs_extent_data)
 
 #ifndef HAVE_BBMACROS
 /*
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index ce4812e..50ea19e 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -1541,7 +1541,8 @@ xfs_ioctl_reflink(
 	loff_t		pos_in,
 	struct file	*file_out,
 	loff_t		pos_out,
-	size_t		len)
+	size_t		len,
+	bool		is_dedupe)
 {
 	struct inode	*inode_in;
 	struct inode	*inode_out;
@@ -1550,6 +1551,7 @@ xfs_ioctl_reflink(
 	loff_t		isize;
 	int		same_inode;
 	loff_t		blen;
+	unsigned int	flags;
 
 	if (len == 0)
 		return 0;
@@ -1629,8 +1631,12 @@ xfs_ioctl_reflink(
 	if (ret)
 		goto out_unlock;
 
+	flags = 0;
+	if (is_dedupe)
+		flags |= XFS_REFLINK_DEDUPE;
+
 	ret = xfs_reflink(XFS_I(inode_in), pos_in, XFS_I(inode_out),
-			pos_out, len);
+			pos_out, len, flags);
 	if (ret < 0)
 		goto out_unlock;
 
@@ -1652,6 +1658,112 @@ out_unlock:
 	return ret;
 }
 
+#define XFS_MAX_DEDUPE_LEN	(16 * 1024 * 1024)
+
+static long
+xfs_ioctl_file_extent_same(
+	struct file			*file,
+	struct xfs_extent_data __user	*argp)
+{
+	struct xfs_extent_data		*same;
+	struct xfs_extent_data_info	*info;
+	struct inode			*src;
+	u64				off;
+	u64				len;
+	int				i;
+	int				ret;
+	unsigned long			size;
+	bool				is_admin;
+	u16				count;
+
+	is_admin = capable(CAP_SYS_ADMIN);
+	src = file_inode(file);
+	if (!(file->f_mode & FMODE_READ))
+		return -EINVAL;
+
+	if (get_user(count, &argp->dest_count)) {
+		ret = -EFAULT;
+		goto out;
+	}
+
+	size = offsetof(struct xfs_extent_data __user,
+			info[count]);
+
+	same = memdup_user(argp, size);
+
+	if (IS_ERR(same)) {
+		ret = PTR_ERR(same);
+		goto out;
+	}
+
+	off = same->logical_offset;
+	len = same->length;
+
+	/*
+	 * Limit the total length we will dedupe for each operation.
+	 * This is intended to bound the total time spent in this
+	 * ioctl to something sane.
+	 */
+	if (len > XFS_MAX_DEDUPE_LEN)
+		len = XFS_MAX_DEDUPE_LEN;
+
+	ret = -EISDIR;
+	if (S_ISDIR(src->i_mode))
+		goto out;
+
+	ret = -EACCES;
+	if (!S_ISREG(src->i_mode))
+		goto out;
+
+	/* pre-format output fields to sane values */
+	for (i = 0; i < count; i++) {
+		same->info[i].bytes_deduped = 0ULL;
+		same->info[i].status = 0;
+	}
+
+	for (i = 0, info = same->info; i < count; i++, info++) {
+		struct inode *dst;
+		struct fd dst_file = fdget(info->fd);
+
+		if (!dst_file.file) {
+			info->status = -EBADF;
+			continue;
+		}
+		dst = file_inode(dst_file.file);
+
+		trace_xfs_ioctl_file_extent_same(file_inode(file), off, len,
+				dst, info->logical_offset);
+
+		info->bytes_deduped = 0;
+		if (!(is_admin || (dst_file.file->f_mode & FMODE_WRITE))) {
+			info->status = -EINVAL;
+		} else if (file->f_path.mnt != dst_file.file->f_path.mnt) {
+			info->status = -EXDEV;
+		} else if (S_ISDIR(dst->i_mode)) {
+			info->status = -EISDIR;
+		} else if (!S_ISREG(dst->i_mode)) {
+			info->status = -EACCES;
+		} else {
+			info->status = xfs_ioctl_reflink(file, off,
+							 dst_file.file,
+							 info->logical_offset,
+							 len, true);
+			if (info->status == -EBADE)
+				info->status = XFS_EXTENT_DATA_DIFFERS;
+			else if (info->status == 0)
+				info->bytes_deduped = len;
+		}
+		fdput(dst_file);
+	}
+
+	ret = copy_to_user(argp, same, size);
+	if (ret)
+		ret = -EFAULT;
+
+out:
+	return ret;
+}
+
 /*
  * Note: some of the ioctl's return positive numbers as a
  * byte count indicating success, such as readlink_by_handle.
@@ -1959,7 +2071,7 @@ xfs_file_ioctl(
 
 		trace_xfs_ioctl_clone(file_inode(src.file), file_inode(filp));
 
-		error = xfs_ioctl_reflink(src.file, 0, filp, 0, ~0ULL);
+		error = xfs_ioctl_reflink(src.file, 0, filp, 0, ~0ULL, false);
 		fdput(src);
 		if (error > 0)
 			error = 0;
@@ -1984,7 +2096,8 @@ xfs_file_ioctl(
 				file_inode(filp), args.dest_offset);
 
 		error = xfs_ioctl_reflink(src.file, args.src_offset, filp,
-					  args.dest_offset, args.src_length);
+					  args.dest_offset, args.src_length,
+					  false);
 		fdput(src);
 		if (error > 0)
 			error = 0;
@@ -1992,6 +2105,9 @@ xfs_file_ioctl(
 		return error;
 	}
 
+	case XFS_IOC_FILE_EXTENT_SAME:
+		return xfs_ioctl_file_extent_same(filp, arg);
+
 	default:
 		return -ENOTTY;
 	}
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index 76d8729..575c292 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -560,6 +560,7 @@ xfs_file_compat_ioctl(
 	case XFS_IOC_ERROR_CLEARALL:
 	case XFS_IOC_CLONE:
 	case XFS_IOC_CLONE_RANGE:
+	case XFS_IOC_FILE_EXTENT_SAME:
 		return xfs_file_ioctl(filp, cmd, p);
 #ifndef BROKEN_X86_ALIGNMENT
 	/* These are handled fine if no alignment issues */
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index ac81b02..dee3556 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -1386,6 +1386,103 @@ advloop:
 }
 #undef IMAPNEXT
 
+/*
+ * Read a page's worth of file data into the page cache.
+ */
+STATIC struct page *
+xfs_get_page(
+	struct inode	*inode,		/* inode */
+	xfs_off_t	offset)		/* where in the inode to read */
+{
+	struct address_space	*mapping;
+	struct page		*page;
+	pgoff_t			n;
+
+	n = offset >> PAGE_CACHE_SHIFT;
+	mapping = inode->i_mapping;
+	page = read_mapping_page(mapping, n, NULL);
+	if (IS_ERR(page))
+		return page;
+	if (!PageUptodate(page)) {
+		page_cache_release(page);
+		return NULL;
+	}
+	return page;
+}
+
+/*
+ * Compare extents of two files to see if they are the same.
+ */
+STATIC int
+xfs_compare_extents(
+	struct inode	*src,		/* first inode */
+	xfs_off_t	srcoff,		/* offset of first inode */
+	struct inode	*dest,		/* second inode */
+	xfs_off_t	destoff,	/* offset of second inode */
+	xfs_off_t	len,		/* length of data to compare */
+	bool		*is_same)	/* out: true if the contents match */
+{
+	xfs_off_t	src_poff;
+	xfs_off_t	dest_poff;
+	void		*src_addr;
+	void		*dest_addr;
+	struct page	*src_page;
+	struct page	*dest_page;
+	xfs_off_t	cmp_len;
+	bool		same;
+	int		error;
+
+	error = -EINVAL;
+	same = true;
+	while (len) {
+		src_poff = srcoff & (PAGE_CACHE_SIZE - 1);
+		dest_poff = destoff & (PAGE_CACHE_SIZE - 1);
+		cmp_len = min(PAGE_CACHE_SIZE - src_poff,
+			      PAGE_CACHE_SIZE - dest_poff);
+		cmp_len = min(cmp_len, len);
+		ASSERT(cmp_len > 0);
+
+		trace_xfs_reflink_compare_extents(XFS_I(src), srcoff, cmp_len,
+				XFS_I(dest), destoff);
+
+		src_page = xfs_get_page(src, srcoff);
+		if (!src_page)
+			goto out_error;
+		dest_page = xfs_get_page(dest, destoff);
+		if (!dest_page) {
+			page_cache_release(src_page);
+			goto out_error;
+		}
+		src_addr = kmap_atomic(src_page);
+		dest_addr = kmap_atomic(dest_page);
+
+		flush_dcache_page(src_page);
+		flush_dcache_page(dest_page);
+
+		if (memcmp(src_addr + src_poff, dest_addr + dest_poff, cmp_len))
+			same = false;
+
+		kunmap_atomic(src_addr);
+		kunmap_atomic(dest_addr);
+		page_cache_release(src_page);
+		page_cache_release(dest_page);
+
+		if (!same)
+			break;
+
+		srcoff += cmp_len;
+		destoff += cmp_len;
+		len -= cmp_len;
+	}
+
+	*is_same = same;
+	return 0;
+
+out_error:
+	trace_xfs_reflink_compare_extents_error(XFS_I(dest), error, _RET_IP_);
+	return error;
+}
+
 /**
  * xfs_reflink() - link a range of blocks from one inode to another
  *
@@ -1394,6 +1491,7 @@ advloop:
  * @dest: Inode to clone to
  * @destoff: Offset within @inode to start clone
  * @len: Original length, passed by user, of range to clone
+ * @flags: Flags to modify reflink's behavior
  */
 int
 xfs_reflink(
@@ -1401,12 +1499,14 @@ xfs_reflink(
 	xfs_off_t		srcoff,
 	struct xfs_inode	*dest,
 	xfs_off_t		destoff,
-	xfs_off_t		len)
+	xfs_off_t		len,
+	unsigned int		flags)
 {
 	struct xfs_mount	*mp = src->i_mount;
 	xfs_fileoff_t		sfsbno, dfsbno;
 	xfs_filblks_t		fsblen;
 	int			error;
+	bool			is_same;
 
 	if (!xfs_sb_version_hasreflink(&mp->m_sb))
 		return -EOPNOTSUPP;
@@ -1418,6 +1518,9 @@ xfs_reflink(
 	if (XFS_IS_REALTIME_INODE(src) || XFS_IS_REALTIME_INODE(dest))
 		return -EINVAL;
 
+	if (flags & ~XFS_REFLINK_ALL)
+		return -EINVAL;
+
 	trace_xfs_reflink_range(src, srcoff, len, dest, destoff);
 
 	/* Lock both files against IO */
@@ -1429,6 +1532,21 @@ xfs_reflink(
 		xfs_lock_two_inodes(src, dest, XFS_MMAPLOCK_EXCL);
 	}
 
+	/*
+	 * Check that the extents are the same.
+	 */
+	if (flags & XFS_REFLINK_DEDUPE) {
+		is_same = false;
+		error = xfs_compare_extents(VFS_I(src), srcoff, VFS_I(dest),
+				destoff, len, &is_same);
+		if (error)
+			goto out_error;
+		if (!is_same) {
+			error = -EBADE;
+			goto out_error;
+		}
+	}
+
 	error = set_inode_reflink_flag(src, dest);
 	if (error)
 		goto out_error;
diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h
index b633824..c60a9bd 100644
--- a/fs/xfs/xfs_reflink.h
+++ b/fs/xfs/xfs_reflink.h
@@ -44,7 +44,11 @@ extern int xfs_reflink_finish_fork_buf(struct xfs_inode *ip, struct xfs_buf *bp,
 		xfs_fileoff_t fileoff, struct xfs_trans *tp, int write_error,
 		xfs_fsblock_t old_fsbno);
 
+#define XFS_REFLINK_DEDUPE	1	/* only reflink if contents match */
+#define XFS_REFLINK_ALL		(XFS_REFLINK_DEDUPE)
+
 extern int xfs_reflink(struct xfs_inode *src, xfs_off_t srcoff,
-		struct xfs_inode *dest, xfs_off_t destoff, xfs_off_t len);
+		struct xfs_inode *dest, xfs_off_t destoff, xfs_off_t len,
+		unsigned int flags);
 
 #endif /* __XFS_REFLINK_H */


WARNING: multiple messages have this Message-ID (diff)
From: "Darrick J. Wong" <darrick.wong@oracle.com>
To: david@fromorbit.com, darrick.wong@oracle.com
Cc: linux-fsdevel@vger.kernel.org, xfs@oss.sgi.com
Subject: [PATCH 52/58] xfs: emulate the btrfs dedupe extent same ioctl
Date: Tue, 06 Oct 2015 22:00:51 -0700	[thread overview]
Message-ID: <20151007050051.30457.45420.stgit@birch.djwong.org> (raw)
In-Reply-To: <20151007045443.30457.47038.stgit@birch.djwong.org>

Emulate the BTRFS_IOC_EXTENT_SAME ioctl.  This operation is similar
to clone_range, but the kernel must confirm that the contents of the
two extents are identical before performing the reflink.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_fs.h |   30 ++++++++++++
 fs/xfs/xfs_ioctl.c     |  124 ++++++++++++++++++++++++++++++++++++++++++++++--
 fs/xfs/xfs_ioctl32.c   |    1 
 fs/xfs/xfs_reflink.c   |  120 ++++++++++++++++++++++++++++++++++++++++++++++
 fs/xfs/xfs_reflink.h   |    6 ++
 5 files changed, 275 insertions(+), 6 deletions(-)


diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index 2c8cd04..c63afd4 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -570,8 +570,38 @@ struct xfs_clone_args {
 	__u64 dest_offset;
 };
 
+/* extent-same (dedupe) ioctls; these MUST match the btrfs ioctl definitions */
+#define XFS_EXTENT_DATA_SAME	0
+#define XFS_EXTENT_DATA_DIFFERS	1
+
+/* from struct btrfs_ioctl_file_extent_same_info */
+struct xfs_extent_data_info {
+	__s64 fd;		/* in - destination file */
+	__u64 logical_offset;	/* in - start of extent in destination */
+	__u64 bytes_deduped;	/* out - total # of bytes we were able
+				 * to dedupe from this file */
+	/* status of this dedupe operation:
+	 * 0 if dedup succeeds
+	 * < 0 for error
+	 * == XFS_SAME_DATA_DIFFERS if data differs
+	 */
+	__s32 status;		/* out - see above description */
+	__u32 reserved;
+};
+
+/* from struct btrfs_ioctl_file_extent_same_args */
+struct xfs_extent_data {
+	__u64 logical_offset;	/* in - start of extent in source */
+	__u64 length;		/* in - length of extent */
+	__u16 dest_count;	/* in - total elements in info array */
+	__u16 reserved1;
+	__u32 reserved2;
+	struct xfs_extent_data_info info[0];
+};
+
 #define XFS_IOC_CLONE		 _IOW (0x94, 9, int)
 #define XFS_IOC_CLONE_RANGE	 _IOW (0x94, 13, struct xfs_clone_args)
+#define XFS_IOC_FILE_EXTENT_SAME _IOWR(0x94, 54, struct xfs_extent_data)
 
 #ifndef HAVE_BBMACROS
 /*
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index ce4812e..50ea19e 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -1541,7 +1541,8 @@ xfs_ioctl_reflink(
 	loff_t		pos_in,
 	struct file	*file_out,
 	loff_t		pos_out,
-	size_t		len)
+	size_t		len,
+	bool		is_dedupe)
 {
 	struct inode	*inode_in;
 	struct inode	*inode_out;
@@ -1550,6 +1551,7 @@ xfs_ioctl_reflink(
 	loff_t		isize;
 	int		same_inode;
 	loff_t		blen;
+	unsigned int	flags;
 
 	if (len == 0)
 		return 0;
@@ -1629,8 +1631,12 @@ xfs_ioctl_reflink(
 	if (ret)
 		goto out_unlock;
 
+	flags = 0;
+	if (is_dedupe)
+		flags |= XFS_REFLINK_DEDUPE;
+
 	ret = xfs_reflink(XFS_I(inode_in), pos_in, XFS_I(inode_out),
-			pos_out, len);
+			pos_out, len, flags);
 	if (ret < 0)
 		goto out_unlock;
 
@@ -1652,6 +1658,112 @@ out_unlock:
 	return ret;
 }
 
+#define XFS_MAX_DEDUPE_LEN	(16 * 1024 * 1024)
+
+static long
+xfs_ioctl_file_extent_same(
+	struct file			*file,
+	struct xfs_extent_data __user	*argp)
+{
+	struct xfs_extent_data		*same;
+	struct xfs_extent_data_info	*info;
+	struct inode			*src;
+	u64				off;
+	u64				len;
+	int				i;
+	int				ret;
+	unsigned long			size;
+	bool				is_admin;
+	u16				count;
+
+	is_admin = capable(CAP_SYS_ADMIN);
+	src = file_inode(file);
+	if (!(file->f_mode & FMODE_READ))
+		return -EINVAL;
+
+	if (get_user(count, &argp->dest_count)) {
+		ret = -EFAULT;
+		goto out;
+	}
+
+	size = offsetof(struct xfs_extent_data __user,
+			info[count]);
+
+	same = memdup_user(argp, size);
+
+	if (IS_ERR(same)) {
+		ret = PTR_ERR(same);
+		goto out;
+	}
+
+	off = same->logical_offset;
+	len = same->length;
+
+	/*
+	 * Limit the total length we will dedupe for each operation.
+	 * This is intended to bound the total time spent in this
+	 * ioctl to something sane.
+	 */
+	if (len > XFS_MAX_DEDUPE_LEN)
+		len = XFS_MAX_DEDUPE_LEN;
+
+	ret = -EISDIR;
+	if (S_ISDIR(src->i_mode))
+		goto out;
+
+	ret = -EACCES;
+	if (!S_ISREG(src->i_mode))
+		goto out;
+
+	/* pre-format output fields to sane values */
+	for (i = 0; i < count; i++) {
+		same->info[i].bytes_deduped = 0ULL;
+		same->info[i].status = 0;
+	}
+
+	for (i = 0, info = same->info; i < count; i++, info++) {
+		struct inode *dst;
+		struct fd dst_file = fdget(info->fd);
+
+		if (!dst_file.file) {
+			info->status = -EBADF;
+			continue;
+		}
+		dst = file_inode(dst_file.file);
+
+		trace_xfs_ioctl_file_extent_same(file_inode(file), off, len,
+				dst, info->logical_offset);
+
+		info->bytes_deduped = 0;
+		if (!(is_admin || (dst_file.file->f_mode & FMODE_WRITE))) {
+			info->status = -EINVAL;
+		} else if (file->f_path.mnt != dst_file.file->f_path.mnt) {
+			info->status = -EXDEV;
+		} else if (S_ISDIR(dst->i_mode)) {
+			info->status = -EISDIR;
+		} else if (!S_ISREG(dst->i_mode)) {
+			info->status = -EACCES;
+		} else {
+			info->status = xfs_ioctl_reflink(file, off,
+							 dst_file.file,
+							 info->logical_offset,
+							 len, true);
+			if (info->status == -EBADE)
+				info->status = XFS_EXTENT_DATA_DIFFERS;
+			else if (info->status == 0)
+				info->bytes_deduped = len;
+		}
+		fdput(dst_file);
+	}
+
+	ret = copy_to_user(argp, same, size);
+	if (ret)
+		ret = -EFAULT;
+
+out:
+	return ret;
+}
+
 /*
  * Note: some of the ioctl's return positive numbers as a
  * byte count indicating success, such as readlink_by_handle.
@@ -1959,7 +2071,7 @@ xfs_file_ioctl(
 
 		trace_xfs_ioctl_clone(file_inode(src.file), file_inode(filp));
 
-		error = xfs_ioctl_reflink(src.file, 0, filp, 0, ~0ULL);
+		error = xfs_ioctl_reflink(src.file, 0, filp, 0, ~0ULL, false);
 		fdput(src);
 		if (error > 0)
 			error = 0;
@@ -1984,7 +2096,8 @@ xfs_file_ioctl(
 				file_inode(filp), args.dest_offset);
 
 		error = xfs_ioctl_reflink(src.file, args.src_offset, filp,
-					  args.dest_offset, args.src_length);
+					  args.dest_offset, args.src_length,
+					  false);
 		fdput(src);
 		if (error > 0)
 			error = 0;
@@ -1992,6 +2105,9 @@ xfs_file_ioctl(
 		return error;
 	}
 
+	case XFS_IOC_FILE_EXTENT_SAME:
+		return xfs_ioctl_file_extent_same(filp, arg);
+
 	default:
 		return -ENOTTY;
 	}
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index 76d8729..575c292 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -560,6 +560,7 @@ xfs_file_compat_ioctl(
 	case XFS_IOC_ERROR_CLEARALL:
 	case XFS_IOC_CLONE:
 	case XFS_IOC_CLONE_RANGE:
+	case XFS_IOC_FILE_EXTENT_SAME:
 		return xfs_file_ioctl(filp, cmd, p);
 #ifndef BROKEN_X86_ALIGNMENT
 	/* These are handled fine if no alignment issues */
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index ac81b02..dee3556 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -1386,6 +1386,103 @@ advloop:
 }
 #undef IMAPNEXT
 
+/*
+ * Read a page's worth of file data into the page cache.
+ */
+STATIC struct page *
+xfs_get_page(
+	struct inode	*inode,		/* inode */
+	xfs_off_t	offset)		/* where in the inode to read */
+{
+	struct address_space	*mapping;
+	struct page		*page;
+	pgoff_t			n;
+
+	n = offset >> PAGE_CACHE_SHIFT;
+	mapping = inode->i_mapping;
+	page = read_mapping_page(mapping, n, NULL);
+	if (IS_ERR(page))
+		return page;
+	if (!PageUptodate(page)) {
+		page_cache_release(page);
+		return NULL;
+	}
+	return page;
+}
+
+/*
+ * Compare extents of two files to see if they are the same.
+ */
+STATIC int
+xfs_compare_extents(
+	struct inode	*src,		/* first inode */
+	xfs_off_t	srcoff,		/* offset of first inode */
+	struct inode	*dest,		/* second inode */
+	xfs_off_t	destoff,	/* offset of second inode */
+	xfs_off_t	len,		/* length of data to compare */
+	bool		*is_same)	/* out: true if the contents match */
+{
+	xfs_off_t	src_poff;
+	xfs_off_t	dest_poff;
+	void		*src_addr;
+	void		*dest_addr;
+	struct page	*src_page;
+	struct page	*dest_page;
+	xfs_off_t	cmp_len;
+	bool		same;
+	int		error;
+
+	error = -EINVAL;
+	same = true;
+	while (len) {
+		src_poff = srcoff & (PAGE_CACHE_SIZE - 1);
+		dest_poff = destoff & (PAGE_CACHE_SIZE - 1);
+		cmp_len = min(PAGE_CACHE_SIZE - src_poff,
+			      PAGE_CACHE_SIZE - dest_poff);
+		cmp_len = min(cmp_len, len);
+		ASSERT(cmp_len > 0);
+
+		trace_xfs_reflink_compare_extents(XFS_I(src), srcoff, cmp_len,
+				XFS_I(dest), destoff);
+
+		src_page = xfs_get_page(src, srcoff);
+		if (!src_page)
+			goto out_error;
+		dest_page = xfs_get_page(dest, destoff);
+		if (!dest_page) {
+			page_cache_release(src_page);
+			goto out_error;
+		}
+		src_addr = kmap_atomic(src_page);
+		dest_addr = kmap_atomic(dest_page);
+
+		flush_dcache_page(src_page);
+		flush_dcache_page(dest_page);
+
+		if (memcmp(src_addr + src_poff, dest_addr + dest_poff, cmp_len))
+			same = false;
+
+		kunmap_atomic(src_addr);
+		kunmap_atomic(dest_addr);
+		page_cache_release(src_page);
+		page_cache_release(dest_page);
+
+		if (!same)
+			break;
+
+		srcoff += cmp_len;
+		destoff += cmp_len;
+		len -= cmp_len;
+	}
+
+	*is_same = same;
+	return 0;
+
+out_error:
+	trace_xfs_reflink_compare_extents_error(XFS_I(dest), error, _RET_IP_);
+	return error;
+}
+
 /**
  * xfs_reflink() - link a range of blocks from one inode to another
  *
@@ -1394,6 +1491,7 @@ advloop:
  * @dest: Inode to clone to
  * @destoff: Offset within @inode to start clone
  * @len: Original length, passed by user, of range to clone
+ * @flags: Flags to modify reflink's behavior
  */
 int
 xfs_reflink(
@@ -1401,12 +1499,14 @@ xfs_reflink(
 	xfs_off_t		srcoff,
 	struct xfs_inode	*dest,
 	xfs_off_t		destoff,
-	xfs_off_t		len)
+	xfs_off_t		len,
+	unsigned int		flags)
 {
 	struct xfs_mount	*mp = src->i_mount;
 	xfs_fileoff_t		sfsbno, dfsbno;
 	xfs_filblks_t		fsblen;
 	int			error;
+	bool			is_same;
 
 	if (!xfs_sb_version_hasreflink(&mp->m_sb))
 		return -EOPNOTSUPP;
@@ -1418,6 +1518,9 @@ xfs_reflink(
 	if (XFS_IS_REALTIME_INODE(src) || XFS_IS_REALTIME_INODE(dest))
 		return -EINVAL;
 
+	if (flags & ~XFS_REFLINK_ALL)
+		return -EINVAL;
+
 	trace_xfs_reflink_range(src, srcoff, len, dest, destoff);
 
 	/* Lock both files against IO */
@@ -1429,6 +1532,21 @@ xfs_reflink(
 		xfs_lock_two_inodes(src, dest, XFS_MMAPLOCK_EXCL);
 	}
 
+	/*
+	 * Check that the extents are the same.
+	 */
+	if (flags & XFS_REFLINK_DEDUPE) {
+		is_same = false;
+		error = xfs_compare_extents(VFS_I(src), srcoff, VFS_I(dest),
+				destoff, len, &is_same);
+		if (error)
+			goto out_error;
+		if (!is_same) {
+			error = -EBADE;
+			goto out_error;
+		}
+	}
+
 	error = set_inode_reflink_flag(src, dest);
 	if (error)
 		goto out_error;
diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h
index b633824..c60a9bd 100644
--- a/fs/xfs/xfs_reflink.h
+++ b/fs/xfs/xfs_reflink.h
@@ -44,7 +44,11 @@ extern int xfs_reflink_finish_fork_buf(struct xfs_inode *ip, struct xfs_buf *bp,
 		xfs_fileoff_t fileoff, struct xfs_trans *tp, int write_error,
 		xfs_fsblock_t old_fsbno);
 
+#define XFS_REFLINK_DEDUPE	1	/* only reflink if contents match */
+#define XFS_REFLINK_ALL		(XFS_REFLINK_DEDUPE)
+
 extern int xfs_reflink(struct xfs_inode *src, xfs_off_t srcoff,
-		struct xfs_inode *dest, xfs_off_t destoff, xfs_off_t len);
+		struct xfs_inode *dest, xfs_off_t destoff, xfs_off_t len,
+		unsigned int flags);
 
 #endif /* __XFS_REFLINK_H */

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

  parent reply	other threads:[~2015-10-07  5:00 UTC|newest]

Thread overview: 131+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-10-07  4:54 [RFCv3 00/58] xfs: add reverse-mapping, reflink, and dedupe support Darrick J. Wong
2015-10-07  4:54 ` Darrick J. Wong
2015-10-07  4:54 ` [PATCH 01/58] libxfs: make xfs_alloc_fix_freelist non-static Darrick J. Wong
2015-10-07  4:54   ` Darrick J. Wong
2015-10-07  4:54 ` [PATCH 02/58] xfs: fix log ticket type printing Darrick J. Wong
2015-10-07  4:54   ` Darrick J. Wong
2015-10-07  4:55 ` [PATCH 03/58] xfs: introduce rmap btree definitions Darrick J. Wong
2015-10-07  4:55   ` Darrick J. Wong
2015-10-07  4:55 ` [PATCH 04/58] xfs: add rmap btree stats infrastructure Darrick J. Wong
2015-10-07  4:55 ` [PATCH 05/58] xfs: rmap btree add more reserved blocks Darrick J. Wong
2015-10-07  4:55   ` Darrick J. Wong
2015-10-07  4:55 ` [PATCH 06/58] xfs: add owner field to extent allocation and freeing Darrick J. Wong
2015-10-07  4:55   ` Darrick J. Wong
2015-10-07  4:55 ` [PATCH 07/58] xfs: add extended " Darrick J. Wong
2015-10-07  4:55   ` Darrick J. Wong
2015-10-07  4:55 ` [PATCH 08/58] xfs: introduce rmap extent operation stubs Darrick J. Wong
2015-10-07  4:55   ` Darrick J. Wong
2015-10-07  4:55 ` [PATCH 09/58] xfs: extend rmap extent operation stubs to take full owner info Darrick J. Wong
2015-10-07  4:55   ` Darrick J. Wong
2015-10-07  4:55 ` [PATCH 10/58] xfs: define the on-disk rmap btree format Darrick J. Wong
2015-10-07  4:55   ` Darrick J. Wong
2015-10-07  4:55 ` [PATCH 11/58] xfs: enhance " Darrick J. Wong
2015-10-07  4:55   ` Darrick J. Wong
2015-10-07  4:56 ` [PATCH 12/58] xfs: add rmap btree growfs support Darrick J. Wong
2015-10-07  4:56   ` Darrick J. Wong
2015-10-07  4:56 ` [PATCH 13/58] xfs: enhance " Darrick J. Wong
2015-10-07  4:56   ` Darrick J. Wong
2015-10-07  4:56 ` [PATCH 14/58] xfs: rmap btree transaction reservations Darrick J. Wong
2015-10-07  4:56   ` Darrick J. Wong
2015-10-07  4:56 ` [PATCH 15/58] xfs: rmap btree requires more reserved free space Darrick J. Wong
2015-10-07  4:56   ` Darrick J. Wong
2015-10-07  4:56 ` [PATCH 16/58] libxfs: fix min freelist length calculation Darrick J. Wong
2015-10-07  4:56   ` Darrick J. Wong
2015-10-07  4:56 ` [PATCH 17/58] xfs: add rmap btree operations Darrick J. Wong
2015-10-07  4:57 ` [PATCH 18/58] xfs: enhance " Darrick J. Wong
2015-10-07  4:57 ` [PATCH 19/58] xfs: add an extent to the rmap btree Darrick J. Wong
2015-10-07  4:57   ` Darrick J. Wong
2015-10-07  4:57 ` [PATCH 20/58] xfs: add tracepoints for the rmap-mirrors-bmbt functions Darrick J. Wong
2015-10-07  4:57   ` Darrick J. Wong
2015-10-07  4:57 ` [PATCH 21/58] xfs: teach rmap_alloc how to deal with our larger rmap btree Darrick J. Wong
2015-10-07  4:57   ` Darrick J. Wong
2015-10-07  4:57 ` [PATCH 22/58] xfs: remove an extent from the " Darrick J. Wong
2015-10-07  4:57   ` Darrick J. Wong
2015-10-07  4:57 ` [PATCH 23/58] xfs: enhanced " Darrick J. Wong
2015-10-07  4:57   ` Darrick J. Wong
2015-10-07  4:57 ` [PATCH 24/58] xfs: add rmap btree insert and delete helpers Darrick J. Wong
2015-10-07  4:57   ` Darrick J. Wong
2015-10-07  4:57 ` [PATCH 25/58] xfs: bmap btree changes should update rmap btree Darrick J. Wong
2015-10-07  4:57   ` Darrick J. Wong
2015-10-21 21:39   ` Darrick J. Wong
2015-10-21 21:39     ` Darrick J. Wong
2015-10-07  4:57 ` [PATCH 26/58] xfs: add rmap btree geometry feature flag Darrick J. Wong
2015-10-07  4:57   ` Darrick J. Wong
2015-10-07  4:58 ` [PATCH 27/58] xfs: add rmap btree block detection to log recovery Darrick J. Wong
2015-10-07  4:58   ` Darrick J. Wong
2015-10-07  4:58 ` [PATCH 28/58] xfs: enable the rmap btree functionality Darrick J. Wong
2015-10-07  4:58   ` Darrick J. Wong
2015-10-07  4:58 ` [PATCH 29/58] xfs: disable XFS_IOC_SWAPEXT when rmap btree is enabled Darrick J. Wong
2015-10-07  4:58   ` Darrick J. Wong
2015-10-07  4:58 ` [PATCH 30/58] xfs: implement " Darrick J. Wong
2015-10-07  4:58   ` Darrick J. Wong
2015-10-07  4:58 ` [PATCH 31/58] libxfs: refactor short btree block verification Darrick J. Wong
2015-10-07  4:58   ` Darrick J. Wong
2015-10-07  4:58 ` [PATCH 32/58] xfs: don't update rmapbt when fixing agfl Darrick J. Wong
2015-10-07  4:58   ` Darrick J. Wong
2015-10-07  4:58 ` [PATCH 33/58] xfs: introduce refcount btree definitions Darrick J. Wong
2015-10-07  4:58   ` Darrick J. Wong
2015-10-07  4:58 ` [PATCH 34/58] xfs: add refcount btree stats infrastructure Darrick J. Wong
2015-10-07  4:58   ` Darrick J. Wong
2015-10-07  4:58 ` [PATCH 35/58] xfs: refcount btree add more reserved blocks Darrick J. Wong
2015-10-07  4:58   ` Darrick J. Wong
2015-10-07  4:59 ` [PATCH 36/58] xfs: define the on-disk refcount btree format Darrick J. Wong
2015-10-07  4:59   ` Darrick J. Wong
2015-10-07  4:59 ` [PATCH 37/58] xfs: define tracepoints for refcount/reflink activities Darrick J. Wong
2015-10-07  4:59   ` Darrick J. Wong
2015-10-07  4:59 ` [PATCH 38/58] xfs: add refcount btree support to growfs Darrick J. Wong
2015-10-07  4:59   ` Darrick J. Wong
2015-10-07  4:59 ` [PATCH 39/58] xfs: add refcount btree operations Darrick J. Wong
2015-10-07  4:59   ` Darrick J. Wong
2015-10-07  4:59 ` [PATCH 40/58] libxfs: adjust refcount of an extent of blocks in refcount btree Darrick J. Wong
2015-10-07  4:59   ` Darrick J. Wong
2015-10-27 19:05   ` Darrick J. Wong
2015-10-27 19:05     ` Darrick J. Wong
2015-10-30 20:56     ` Darrick J. Wong
2015-10-30 20:56       ` Darrick J. Wong
2015-10-07  4:59 ` [PATCH 41/58] libxfs: adjust refcount when unmapping file blocks Darrick J. Wong
2015-10-07  4:59   ` Darrick J. Wong
2015-10-07  4:59 ` [PATCH 42/58] xfs: add refcount btree block detection to log recovery Darrick J. Wong
2015-10-07  4:59   ` Darrick J. Wong
2015-10-07  4:59 ` [PATCH 43/58] xfs: map an inode's offset to an exact physical block Darrick J. Wong
2015-10-07  4:59   ` Darrick J. Wong
2015-10-07  4:59 ` [PATCH 44/58] xfs: add reflink feature flag to geometry Darrick J. Wong
2015-10-07  4:59   ` Darrick J. Wong
2015-10-07  5:00 ` [PATCH 45/58] xfs: create a separate workqueue for copy-on-write activities Darrick J. Wong
2015-10-07  5:00   ` Darrick J. Wong
2015-10-07  5:00 ` [PATCH 46/58] xfs: implement copy-on-write for reflinked blocks Darrick J. Wong
2015-10-07  5:00   ` Darrick J. Wong
2015-10-07  5:00 ` [PATCH 47/58] xfs: handle directio " Darrick J. Wong
2015-10-07  5:00   ` Darrick J. Wong
2015-10-07  5:00 ` [PATCH 48/58] xfs: copy-on-write reflinked blocks when zeroing ranges of blocks Darrick J. Wong
2015-10-07  5:00   ` Darrick J. Wong
2015-10-21 21:17   ` Darrick J. Wong
2015-10-21 21:17     ` Darrick J. Wong
2015-10-07  5:00 ` [PATCH 49/58] xfs: clear inode reflink flag when freeing blocks Darrick J. Wong
2015-10-07  5:00   ` Darrick J. Wong
2015-10-07  5:00 ` [PATCH 50/58] xfs: reflink extents from one file to another Darrick J. Wong
2015-10-07  5:00   ` Darrick J. Wong
2015-10-07  5:12   ` kbuild test robot
2015-10-07  5:12     ` kbuild test robot
2015-10-07  5:00 ` [PATCH 51/58] xfs: add clone file and clone range ioctls Darrick J. Wong
2015-10-07  5:00   ` Darrick J. Wong
2015-10-07  5:13   ` kbuild test robot
2015-10-07  5:13     ` kbuild test robot
2015-10-07  6:46   ` kbuild test robot
2015-10-07  6:46     ` kbuild test robot
2015-10-07  7:35   ` kbuild test robot
2015-10-07  7:35     ` kbuild test robot
2015-10-07  5:00 ` Darrick J. Wong [this message]
2015-10-07  5:00   ` [PATCH 52/58] xfs: emulate the btrfs dedupe extent same ioctl Darrick J. Wong
2015-10-07  5:00 ` [PATCH 53/58] xfs: teach fiemap about reflink'd extents Darrick J. Wong
2015-10-07  5:00   ` Darrick J. Wong
2015-10-07  5:01 ` [PATCH 54/58] xfs: swap inode reflink flags when swapping inode extents Darrick J. Wong
2015-10-07  5:01   ` Darrick J. Wong
2015-10-07  5:01 ` [PATCH 55/58] vfs: add a FALLOC_FL_UNSHARE mode to fallocate to unshare a range of blocks Darrick J. Wong
2015-10-07  5:01   ` Darrick J. Wong
2015-10-07  5:01 ` [PATCH 56/58] xfs: unshare a range of blocks via fallocate Darrick J. Wong
2015-10-07  5:01   ` Darrick J. Wong
2015-10-07  5:01 ` [PATCH 57/58] xfs: support XFS_XFLAG_REFLINK (and FS_NOCOW_FL) on reflink filesystems Darrick J. Wong
2015-10-07  5:01   ` Darrick J. Wong
2015-10-07  5:01 ` [PATCH 58/58] xfs: recognize the reflink feature bit Darrick J. Wong
2015-10-07  5:01   ` Darrick J. Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20151007050051.30457.45420.stgit@birch.djwong.org \
    --to=darrick.wong@oracle.com \
    --cc=david@fromorbit.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=xfs@oss.sgi.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.