All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peng Tao <tao.peng@primarydata.com>
To: linux-nfs@vger.kernel.org
Cc: Trond Myklebust <trond.myklebust@primarydata.com>,
	Anna Schumaker <anna.schumaker@netapp.com>,
	Christoph Hellwig <hch@infradead.org>, Zach Brown <zab@zabbo.net>,
	Darren Hart <dvhart@linux.intel.com>,
	bfields@fieldses.org, Jeff Layton <jeff.layton@primarydata.com>,
	Peng Tao <tao.peng@primarydata.com>,
	linux-btrfs@vger.kernel.org, linux-fsdevel@vger.kernel.org
Subject: [PATCH RFC 01/11] vfs: pull btrfs clone API to vfs layer
Date: Tue, 25 Aug 2015 23:33:39 +0800	[thread overview]
Message-ID: <1440516829-116041-2-git-send-email-tao.peng@primarydata.com> (raw)
In-Reply-To: <1440516829-116041-1-git-send-email-tao.peng@primarydata.com>

Now that a few file systems are adding clone functionality, namingly
btrfs, NFS (later in the series) and XFS
(ttp://oss.sgi.com/archives/xfs/2015-06/msg00407.html), it makes sense
to pull the ioctl to common code.

Add vfs_file_clone_range() helper and .clone_range file operation interface
to allow underlying filesystems to clone between regular files.

The change in do_vfs_ioctl() is defered to next patch where btrfs
.clone_range is added, just so that we don't break btrfs CLONE ioctl
with this patch.

Cc: linux-btrfs@vger.kernel.org
Cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Peng Tao <tao.peng@primarydata.com>
---
 fs/ioctl.c              | 24 ++++++++++++++++++++++++
 fs/read_write.c         | 45 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/fs.h      |  4 ++++
 include/uapi/linux/fs.h |  9 +++++++++
 4 files changed, 82 insertions(+)

diff --git a/fs/ioctl.c b/fs/ioctl.c
index 5d01d26..726c5d7 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -215,6 +215,30 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg)
 	return error;
 }
 
+static long ioctl_file_clone(struct file *dst_file, unsigned long srcfd,
+			     u64 off, u64 olen, u64 destoff)
+{
+	struct fd src_file = fdget(srcfd);
+	int ret;
+
+	if (!src_file.file)
+		return -EBADF;
+	ret = vfs_file_clone_range(src_file.file, dst_file, off, olen, destoff);
+
+	fdput(src_file);
+	return ret;
+}
+
+static long ioctl_file_clone_range(struct file *file, void __user *argp)
+{
+	struct file_clone_range args;
+
+	if (copy_from_user(&args, argp, sizeof(args)))
+		return -EFAULT;
+	return ioctl_file_clone(file, args.src_fd, args.src_offset,
+				args.src_length, args.dest_offset);
+}
+
 #ifdef CONFIG_BLOCK
 
 static inline sector_t logical_to_blk(struct inode *inode, loff_t offset)
diff --git a/fs/read_write.c b/fs/read_write.c
index 819ef3f..beaad2c 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -16,6 +16,7 @@
 #include <linux/pagemap.h>
 #include <linux/splice.h>
 #include <linux/compat.h>
+#include <linux/mount.h>
 #include "internal.h"
 
 #include <asm/uaccess.h>
@@ -1327,3 +1328,47 @@ COMPAT_SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd,
 	return do_sendfile(out_fd, in_fd, NULL, count, 0);
 }
 #endif
+
+int vfs_file_clone_range(struct file *src_file, struct file *dst_file,
+			 loff_t off, size_t len, loff_t dstoff)
+{
+	struct inode *src_ino;
+	struct inode *dst_ino;
+	ssize_t ret;
+
+	if (!(src_file->f_mode & FMODE_READ) ||
+	    !(dst_file->f_mode & FMODE_WRITE) ||
+	    (dst_file->f_flags & O_APPEND) ||
+	    !src_file->f_op || !src_file->f_op->clone_range)
+		return -EINVAL;
+
+	src_ino = file_inode(src_file);
+	dst_ino = file_inode(dst_file);
+
+        if (S_ISDIR(src_ino->i_mode) || S_ISDIR(dst_ino->i_mode))
+                return -EISDIR;
+
+	/* sanity check on offsets and length */
+	if (off + len < off || dstoff + len < dstoff ||
+	    off + len > i_size_read(src_ino))
+		return -EINVAL;
+
+	if (src_ino->i_sb != dst_ino->i_sb ||
+	    src_file->f_path.mnt != dst_file->f_path.mnt)
+		return -EXDEV;
+
+	ret = mnt_want_write_file(dst_file);
+	if (ret)
+		return ret;
+
+	ret = src_file->f_op->clone_range(src_file, dst_file, off, len, dstoff);
+	if (!ret) {
+		fsnotify_access(src_file);
+		fsnotify_modify(dst_file);
+	}
+
+	mnt_drop_write_file(dst_file);
+
+	return ret;
+}
+EXPORT_SYMBOL(vfs_file_clone_range);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index cc008c3..612d7f4 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1628,6 +1628,8 @@ struct file_operations {
 	long (*fallocate)(struct file *file, int mode, loff_t offset,
 			  loff_t len);
 	void (*show_fdinfo)(struct seq_file *m, struct file *f);
+	int (*clone_range)(struct file *src_file, struct file *dst_file,
+			   loff_t off, size_t len, loff_t dstoff);
 #ifndef CONFIG_MMU
 	unsigned (*mmap_capabilities)(struct file *);
 #endif
@@ -2678,6 +2680,8 @@ int __dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t,
 int dax_pfn_mkwrite(struct vm_area_struct *, struct vm_fault *);
 #define dax_mkwrite(vma, vmf, gb, iod)		dax_fault(vma, vmf, gb, iod)
 #define __dax_mkwrite(vma, vmf, gb, iod)	__dax_fault(vma, vmf, gb, iod)
+int vfs_file_clone_range(struct file *src_file, struct file *dst_file,
+			 loff_t off, size_t len, loff_t dstoff);
 
 #ifdef CONFIG_BLOCK
 typedef void (dio_submit_t)(int rw, struct bio *bio, struct inode *inode,
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 9b964a5..ac7f1c5 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -39,6 +39,13 @@
 #define RENAME_EXCHANGE		(1 << 1)	/* Exchange source and dest */
 #define RENAME_WHITEOUT		(1 << 2)	/* Whiteout source */
 
+struct file_clone_range {
+	__s64 src_fd;
+	__u64 src_offset;
+	__u64 src_length;
+	__u64 dest_offset;
+};
+
 struct fstrim_range {
 	__u64 start;
 	__u64 len;
@@ -159,6 +166,8 @@ struct inodes_stat_t {
 #define FIFREEZE	_IOWR('X', 119, int)	/* Freeze */
 #define FITHAW		_IOWR('X', 120, int)	/* Thaw */
 #define FITRIM		_IOWR('X', 121, struct fstrim_range)	/* Trim */
+#define FICLONE		_IOW(0x94, 9, int)	/* Clone */
+#define FICLONERANGE	_IOW(0x94, 13, struct file_clone_range)	/* Clone range */
 
 #define	FS_IOC_GETFLAGS			_IOR('f', 1, long)
 #define	FS_IOC_SETFLAGS			_IOW('f', 2, long)
-- 
1.8.3.1


WARNING: multiple messages have this Message-ID (diff)
From: Peng Tao <tao.peng-7I+n7zu2hftEKMMhf/gKZA@public.gmane.org>
To: linux-nfs-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
Cc: Trond Myklebust
	<trond.myklebust-7I+n7zu2hftEKMMhf/gKZA@public.gmane.org>,
	Anna Schumaker
	<anna.schumaker-HgOvQuBEEgTQT0dZR+AlfA@public.gmane.org>,
	Christoph Hellwig <hch-wEGCiKHe2LqWVfeAwA7xHQ@public.gmane.org>,
	Zach Brown <zab-ugsP4Wv/S6ZeoWH0uzbU5w@public.gmane.org>,
	Darren Hart <dvhart-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>,
	bfields-uC3wQj2KruNg9hUCZPvPmw@public.gmane.org,
	Jeff Layton <jeff.layton-7I+n7zu2hftEKMMhf/gKZA@public.gmane.org>,
	Peng Tao <tao.peng-7I+n7zu2hftEKMMhf/gKZA@public.gmane.org>,
	linux-btrfs-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
Subject: [PATCH RFC 01/11] vfs: pull btrfs clone API to vfs layer
Date: Tue, 25 Aug 2015 23:33:39 +0800	[thread overview]
Message-ID: <1440516829-116041-2-git-send-email-tao.peng@primarydata.com> (raw)
In-Reply-To: <1440516829-116041-1-git-send-email-tao.peng-7I+n7zu2hftEKMMhf/gKZA@public.gmane.org>

Now that a few file systems are adding clone functionality, namingly
btrfs, NFS (later in the series) and XFS
(ttp://oss.sgi.com/archives/xfs/2015-06/msg00407.html), it makes sense
to pull the ioctl to common code.

Add vfs_file_clone_range() helper and .clone_range file operation interface
to allow underlying filesystems to clone between regular files.

The change in do_vfs_ioctl() is defered to next patch where btrfs
.clone_range is added, just so that we don't break btrfs CLONE ioctl
with this patch.

Cc: linux-btrfs-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
Cc: linux-fsdevel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
Signed-off-by: Peng Tao <tao.peng-7I+n7zu2hftEKMMhf/gKZA@public.gmane.org>
---
 fs/ioctl.c              | 24 ++++++++++++++++++++++++
 fs/read_write.c         | 45 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/fs.h      |  4 ++++
 include/uapi/linux/fs.h |  9 +++++++++
 4 files changed, 82 insertions(+)

diff --git a/fs/ioctl.c b/fs/ioctl.c
index 5d01d26..726c5d7 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -215,6 +215,30 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg)
 	return error;
 }
 
+static long ioctl_file_clone(struct file *dst_file, unsigned long srcfd,
+			     u64 off, u64 olen, u64 destoff)
+{
+	struct fd src_file = fdget(srcfd);
+	int ret;
+
+	if (!src_file.file)
+		return -EBADF;
+	ret = vfs_file_clone_range(src_file.file, dst_file, off, olen, destoff);
+
+	fdput(src_file);
+	return ret;
+}
+
+static long ioctl_file_clone_range(struct file *file, void __user *argp)
+{
+	struct file_clone_range args;
+
+	if (copy_from_user(&args, argp, sizeof(args)))
+		return -EFAULT;
+	return ioctl_file_clone(file, args.src_fd, args.src_offset,
+				args.src_length, args.dest_offset);
+}
+
 #ifdef CONFIG_BLOCK
 
 static inline sector_t logical_to_blk(struct inode *inode, loff_t offset)
diff --git a/fs/read_write.c b/fs/read_write.c
index 819ef3f..beaad2c 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -16,6 +16,7 @@
 #include <linux/pagemap.h>
 #include <linux/splice.h>
 #include <linux/compat.h>
+#include <linux/mount.h>
 #include "internal.h"
 
 #include <asm/uaccess.h>
@@ -1327,3 +1328,47 @@ COMPAT_SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd,
 	return do_sendfile(out_fd, in_fd, NULL, count, 0);
 }
 #endif
+
+int vfs_file_clone_range(struct file *src_file, struct file *dst_file,
+			 loff_t off, size_t len, loff_t dstoff)
+{
+	struct inode *src_ino;
+	struct inode *dst_ino;
+	ssize_t ret;
+
+	if (!(src_file->f_mode & FMODE_READ) ||
+	    !(dst_file->f_mode & FMODE_WRITE) ||
+	    (dst_file->f_flags & O_APPEND) ||
+	    !src_file->f_op || !src_file->f_op->clone_range)
+		return -EINVAL;
+
+	src_ino = file_inode(src_file);
+	dst_ino = file_inode(dst_file);
+
+        if (S_ISDIR(src_ino->i_mode) || S_ISDIR(dst_ino->i_mode))
+                return -EISDIR;
+
+	/* sanity check on offsets and length */
+	if (off + len < off || dstoff + len < dstoff ||
+	    off + len > i_size_read(src_ino))
+		return -EINVAL;
+
+	if (src_ino->i_sb != dst_ino->i_sb ||
+	    src_file->f_path.mnt != dst_file->f_path.mnt)
+		return -EXDEV;
+
+	ret = mnt_want_write_file(dst_file);
+	if (ret)
+		return ret;
+
+	ret = src_file->f_op->clone_range(src_file, dst_file, off, len, dstoff);
+	if (!ret) {
+		fsnotify_access(src_file);
+		fsnotify_modify(dst_file);
+	}
+
+	mnt_drop_write_file(dst_file);
+
+	return ret;
+}
+EXPORT_SYMBOL(vfs_file_clone_range);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index cc008c3..612d7f4 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1628,6 +1628,8 @@ struct file_operations {
 	long (*fallocate)(struct file *file, int mode, loff_t offset,
 			  loff_t len);
 	void (*show_fdinfo)(struct seq_file *m, struct file *f);
+	int (*clone_range)(struct file *src_file, struct file *dst_file,
+			   loff_t off, size_t len, loff_t dstoff);
 #ifndef CONFIG_MMU
 	unsigned (*mmap_capabilities)(struct file *);
 #endif
@@ -2678,6 +2680,8 @@ int __dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t,
 int dax_pfn_mkwrite(struct vm_area_struct *, struct vm_fault *);
 #define dax_mkwrite(vma, vmf, gb, iod)		dax_fault(vma, vmf, gb, iod)
 #define __dax_mkwrite(vma, vmf, gb, iod)	__dax_fault(vma, vmf, gb, iod)
+int vfs_file_clone_range(struct file *src_file, struct file *dst_file,
+			 loff_t off, size_t len, loff_t dstoff);
 
 #ifdef CONFIG_BLOCK
 typedef void (dio_submit_t)(int rw, struct bio *bio, struct inode *inode,
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 9b964a5..ac7f1c5 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -39,6 +39,13 @@
 #define RENAME_EXCHANGE		(1 << 1)	/* Exchange source and dest */
 #define RENAME_WHITEOUT		(1 << 2)	/* Whiteout source */
 
+struct file_clone_range {
+	__s64 src_fd;
+	__u64 src_offset;
+	__u64 src_length;
+	__u64 dest_offset;
+};
+
 struct fstrim_range {
 	__u64 start;
 	__u64 len;
@@ -159,6 +166,8 @@ struct inodes_stat_t {
 #define FIFREEZE	_IOWR('X', 119, int)	/* Freeze */
 #define FITHAW		_IOWR('X', 120, int)	/* Thaw */
 #define FITRIM		_IOWR('X', 121, struct fstrim_range)	/* Trim */
+#define FICLONE		_IOW(0x94, 9, int)	/* Clone */
+#define FICLONERANGE	_IOW(0x94, 13, struct file_clone_range)	/* Clone range */
 
 #define	FS_IOC_GETFLAGS			_IOR('f', 1, long)
 #define	FS_IOC_SETFLAGS			_IOW('f', 2, long)
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

  reply	other threads:[~2015-08-25 15:34 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-08-25 15:33 [PATCH RFC 00/11] NFS/NFSD: add NFSv42 COPY operation support Peng Tao
2015-08-25 15:33 ` Peng Tao [this message]
2015-08-25 15:33   ` [PATCH RFC 01/11] vfs: pull btrfs clone API to vfs layer Peng Tao
2015-08-26  1:40   ` Peng Tao
2015-08-26  1:50   ` Dave Chinner
2015-08-26  1:59     ` Dave Chinner
2015-08-26  4:09   ` Darrick J. Wong
2015-08-26  8:03     ` Peng Tao
2015-08-26  8:03       ` Peng Tao
2015-08-25 15:33 ` [PATCH RFC 02/11] vfs/btrfs: add .clone_range file operation Peng Tao
2015-08-26  8:01   ` David Sterba
2015-08-26  8:01     ` David Sterba
2015-08-26  8:31     ` Peng Tao
2015-08-26  8:31       ` Peng Tao
2015-08-26 13:00       ` David Sterba
2015-08-26 13:00         ` David Sterba
2015-08-26 13:07         ` Christoph Hellwig
2015-08-25 15:33 ` [PATCH RFC 03/11] nfs42: decode_layoutstats does not need res parameter Peng Tao
2015-08-25 16:04   ` Anna Schumaker
2015-08-25 15:33 ` [PATCH RFC 04/11] nfs42: remove unused declaration Peng Tao
2015-08-25 16:05   ` Anna Schumaker
2015-08-25 16:08     ` Peng Tao
2015-08-25 15:33 ` [PATCH RFC 05/11] nfs42: add CLONE xdr functions Peng Tao
2015-08-25 15:33 ` [PATCH RFC 06/11] nfs42: add CLONE proc functions Peng Tao
2015-08-25 15:33 ` [PATCH RFC 07/11] nfs42: add .copy_range file operation Peng Tao
2015-08-25 15:33 ` [PATCH RFC 08/11] nfs: get clone_blksize when probing fsinfo Peng Tao
2015-08-25 15:33 ` [PATCH RFC 09/11] nfs42: respect clone_blksize Peng Tao
2015-08-25 15:33 ` [PATCH RFC 10/11] nfsd: Pass filehandle to nfs4_preprocess_stateid_op() Peng Tao
2015-08-25 15:33 ` [PATCH RFC 11/11] NFSD: Implement the CLONE call Peng Tao
2015-08-25 22:09   ` J. Bruce Fields
2015-08-25 22:11   ` J. Bruce Fields
2015-08-26  1:33     ` Peng Tao
2015-08-25 15:42 ` [PATCH RFC 00/11] NFS/NFSD: add NFSv42 COPY operation support Peng Tao
2015-08-25 16:01   ` Anna Schumaker

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1440516829-116041-2-git-send-email-tao.peng@primarydata.com \
    --to=tao.peng@primarydata.com \
    --cc=anna.schumaker@netapp.com \
    --cc=bfields@fieldses.org \
    --cc=dvhart@linux.intel.com \
    --cc=hch@infradead.org \
    --cc=jeff.layton@primarydata.com \
    --cc=linux-btrfs@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-nfs@vger.kernel.org \
    --cc=trond.myklebust@primarydata.com \
    --cc=zab@zabbo.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.