From: Goldwyn Rodrigues <rgoldwyn@suse.com> While performing copy_file_range(), if superblocks of file_in and file_out don't match, instead of returning -EXDEV, perform splice for a faster copy. Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com> --- fs/read_write.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/read_write.c b/fs/read_write.c index 7d9dfb62ba7d..2c9e7a5ea806 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1546,6 +1546,8 @@ ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in, size_t len, unsigned int flags, unsigned int splice_flags) { + struct inode *inode_in = file_inode(file_in); + struct inode *inode_out = file_inode(file_out); ssize_t ret = 0; if (flags != 0) @@ -1554,6 +1556,9 @@ ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in, if (len == 0) return 0; + if (inode_in->i_sb != inode_out->i_sb) + goto splice; + /* * Try cloning first, this is supported by more file systems, and * more efficient if both clone and copy are supported (e.g. NFS). @@ -1571,7 +1576,7 @@ ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in, if (ret != -EOPNOTSUPP) return ret; } - +splice: ret = do_splice_direct(file_in, &pos_in, file_out, &pos_out, len > MAX_RW_COUNT ? MAX_RW_COUNT : len, splice_flags); return ret; @@ -1608,10 +1613,6 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, (file_out->f_flags & O_APPEND)) return -EBADF; - /* this could be relaxed once a method supports cross-fs copies */ - if (inode_in->i_sb != inode_out->i_sb) - return -EXDEV; - file_start_write(file_out); ret = do_copy_file_range(file_in, pos_in, -- 2.16.3
On 05/08/2018 11:24 PM, Goldwyn Rodrigues wrote:
> While performing copy_file_range(), if superblocks of file_in and
> file_out don't match, instead of returning -EXDEV, perform
> splice for a faster copy.
We have a userspace emulation in glibc which used to be quite faithful,
including the EXDEV error (which is not strictly necessary to produce).
Should we change glibc to perform a userspace copy if the system call
returns EXDEV due to an older kernel?
Thanks,
Florian
On Wed, May 9, 2018 at 12:24 AM, Goldwyn Rodrigues <rgoldwyn@suse.de> wrote: > From: Goldwyn Rodrigues <rgoldwyn@suse.com> > > While performing copy_file_range(), if superblocks of file_in and > file_out don't match, instead of returning -EXDEV, perform > splice for a faster copy. > > Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com> Reviewed-by: Amir Goldstein <amir73il@gmail.com> > --- > fs/read_write.c | 11 ++++++----- > 1 file changed, 6 insertions(+), 5 deletions(-) > > diff --git a/fs/read_write.c b/fs/read_write.c > index 7d9dfb62ba7d..2c9e7a5ea806 100644 > --- a/fs/read_write.c > +++ b/fs/read_write.c > @@ -1546,6 +1546,8 @@ ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in, > size_t len, unsigned int flags, > unsigned int splice_flags) > { > + struct inode *inode_in = file_inode(file_in); > + struct inode *inode_out = file_inode(file_out); > ssize_t ret = 0; > > if (flags != 0) > @@ -1554,6 +1556,9 @@ ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in, > if (len == 0) > return 0; > > + if (inode_in->i_sb != inode_out->i_sb) > + goto splice; > + > /* > * Try cloning first, this is supported by more file systems, and > * more efficient if both clone and copy are supported (e.g. NFS). > @@ -1571,7 +1576,7 @@ ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in, > if (ret != -EOPNOTSUPP) > return ret; > } > - > +splice: > ret = do_splice_direct(file_in, &pos_in, file_out, &pos_out, > len > MAX_RW_COUNT ? MAX_RW_COUNT : len, splice_flags); > return ret; > @@ -1608,10 +1613,6 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, > (file_out->f_flags & O_APPEND)) > return -EBADF; > > - /* this could be relaxed once a method supports cross-fs copies */ > - if (inode_in->i_sb != inode_out->i_sb) > - return -EXDEV; > - > file_start_write(file_out); > > ret = do_copy_file_range(file_in, pos_in, > -- > 2.16.3 > > -- > To unsubscribe from this list: send the line "unsubscribe linux-unionfs" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html
On 05/08/2018 04:57 PM, Florian Weimer wrote:
> On 05/08/2018 11:24 PM, Goldwyn Rodrigues wrote:
>> While performing copy_file_range(), if superblocks of file_in and
>> file_out don't match, instead of returning -EXDEV, perform
>> splice for a faster copy.
>
> We have a userspace emulation in glibc which used to be quite faithful,
> including the EXDEV error (which is not strictly necessary to produce).
>
> Should we change glibc to perform a userspace copy if the system call
> returns EXDEV due to an older kernel?
>
I don't seen any purpose. The user would anyways have to perform a copy
if it receives -EXDEV.
--
Goldwyn
If copy_file_range performs a copy using splice, it converts holes to zeros. This effort primarily changes this behavior to create holes when it is possible. Even if copy_file_range() or clone_file_range() does not work for different mounted filesystems, We should be able to splice files if they do not belong the same super_block. Changes since v1: - Fixed bug when hole/data offset is farther than len - [Amir] Refactor flags parameter Changes since v0: - [Amir] Carved out do_copy_file_range() which can be used by overlayfs -- Goldwyn
From: Goldwyn Rodrigues <rgoldwyn@suse.com> Preparatory patch to carve out do_copy_file_range() from vfs_copy_file_range Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com> --- fs/read_write.c | 60 ++++++++++++++++++++++++++++++++------------------------- 1 file changed, 34 insertions(+), 26 deletions(-) diff --git a/fs/read_write.c b/fs/read_write.c index c4eabbfc90df..525f2a67e15a 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1541,6 +1541,38 @@ COMPAT_SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, } #endif +static ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + size_t len, unsigned int flags) +{ + ssize_t ret = 0; + + if (len == 0) + return 0; + + /* + * Try cloning first, this is supported by more file systems, and + * more efficient if both clone and copy are supported (e.g. NFS). + */ + if (file_in->f_op->clone_file_range) { + ret = file_in->f_op->clone_file_range(file_in, pos_in, + file_out, pos_out, len); + if (ret == 0) + return len; + } + + if (file_out->f_op->copy_file_range) { + ret = file_out->f_op->copy_file_range(file_in, pos_in, file_out, + pos_out, len, flags); + if (ret != -EOPNOTSUPP) + return ret; + } + + ret = do_splice_direct(file_in, &pos_in, file_out, &pos_out, + len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0); + return ret; +} + /* * copy_file_range() differs from regular file read and write in that it * specifically allows return partial success. When it does so is up to @@ -1579,35 +1611,11 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, if (inode_in->i_sb != inode_out->i_sb) return -EXDEV; - if (len == 0) - return 0; - file_start_write(file_out); - /* - * Try cloning first, this is supported by more file systems, and - * more efficient if both clone and copy are supported (e.g. NFS). - */ - if (file_in->f_op->clone_file_range) { - ret = file_in->f_op->clone_file_range(file_in, pos_in, - file_out, pos_out, len); - if (ret == 0) { - ret = len; - goto done; - } - } - - if (file_out->f_op->copy_file_range) { - ret = file_out->f_op->copy_file_range(file_in, pos_in, file_out, - pos_out, len, flags); - if (ret != -EOPNOTSUPP) - goto done; - } - - ret = do_splice_direct(file_in, &pos_in, file_out, &pos_out, - len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0); + ret = do_copy_file_range(file_in, pos_in, + file_out, pos_out, len, flags); -done: if (ret > 0) { fsnotify_access(file_in); add_rchar(current, ret); -- 2.16.3
From: Goldwyn Rodrigues <rgoldwyn@suse.com> While performing copy_file_range(), if superblocks of file_in and file_out don't match, instead of returning -EXDEV, perform splice for a faster copy. Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com> Reviewed-by: Amir Goldstein <amir73il@gmail.com> --- fs/read_write.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/read_write.c b/fs/read_write.c index 525f2a67e15a..1b8fc9eada69 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1545,11 +1545,16 @@ static ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, size_t len, unsigned int flags) { + struct inode *inode_in = file_inode(file_in); + struct inode *inode_out = file_inode(file_out); ssize_t ret = 0; if (len == 0) return 0; + if (inode_in->i_sb != inode_out->i_sb) + goto splice; + /* * Try cloning first, this is supported by more file systems, and * more efficient if both clone and copy are supported (e.g. NFS). @@ -1567,7 +1572,7 @@ static ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in, if (ret != -EOPNOTSUPP) return ret; } - +splice: ret = do_splice_direct(file_in, &pos_in, file_out, &pos_out, len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0); return ret; @@ -1607,10 +1612,6 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, (file_out->f_flags & O_APPEND)) return -EBADF; - /* this could be relaxed once a method supports cross-fs copies */ - if (inode_in->i_sb != inode_out->i_sb) - return -EXDEV; - file_start_write(file_out); ret = do_copy_file_range(file_in, pos_in, -- 2.16.3
From: Goldwyn Rodrigues <rgoldwyn@suse.com> copy_file_range calls do_splice_direct() if fs->clone_file_range or fs->copy_file_range() is not available. However, do_splice_direct() converts holes to zeros. Detect holes in the file_in range, and create them in the corresponding file_out range. If there is already data present at the offset in file_out, attempt to punch a hole there. If the operation is not supported, fall back to performing splice on the whole range. Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com> Reviewed-by: Amir Goldstein <amir73il@gmail.com> --- fs/read_write.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 58 insertions(+), 4 deletions(-) diff --git a/fs/read_write.c b/fs/read_write.c index 1b8fc9eada69..e765fec656af 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -20,6 +20,7 @@ #include <linux/compat.h> #include <linux/mount.h> #include <linux/fs.h> +#include <linux/falloc.h> #include "internal.h" #include <linux/uaccess.h> @@ -1547,7 +1548,8 @@ static ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in, { struct inode *inode_in = file_inode(file_in); struct inode *inode_out = file_inode(file_out); - ssize_t ret = 0; + ssize_t ret = 0, total = 0; + loff_t size, end; if (len == 0) return 0; @@ -1572,10 +1574,62 @@ static ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in, if (ret != -EOPNOTSUPP) return ret; } + splice: - ret = do_splice_direct(file_in, &pos_in, file_out, &pos_out, - len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0); - return ret; + while (total < len) { + end = vfs_llseek(file_in, pos_in, SEEK_HOLE); + + /* Starting position is already in a hole */ + if (end == pos_in) + goto hole; + size = end - pos_in; +do_splice: + if (size > len - total) + size = len - total; + ret = do_splice_direct(file_in, &pos_in, file_out, &pos_out, + size, 0); + if (ret < 0) + goto out; + total += ret; + if (total == len) + break; +hole: + end = vfs_llseek(file_in, pos_in, SEEK_DATA); + if (end < 0) { + ret = end; + goto out; + } + size = end - pos_in; + if (size > len - total) + size = len - total; + /* Data on offset, punch holes */ + if (i_size_read(file_out->f_inode) > pos_out) { + ret = vfs_fallocate(file_out, + FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, + pos_out, size); + if (ret < 0) { + /* + * The filesystem does not support punching + * holes. Perform splice on the remaining range. + */ + if (ret == -EOPNOTSUPP) { + size = len - total; + goto do_splice; + } + goto out; + } + } + if (ret < 0) { + ret = end; + goto out; + } + pos_out += size; + pos_in = end; + total += size; + } + +out: + return total ? total : ret; } /* -- 2.16.3
From: Goldwyn Rodrigues <rgoldwyn@suse.com> This will preserve the holes by copying the chunks of data. If available it will use clone(). Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com> Reviewed-by: Amir Goldstein <amir73il@gmail.com> --- fs/overlayfs/copy_up.c | 28 ++++++++-------------------- fs/read_write.c | 10 ++++++---- include/linux/fs.h | 3 +++ 3 files changed, 17 insertions(+), 24 deletions(-) diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 8bede0742619..1f89380873ce 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -138,8 +138,7 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len) { struct file *old_file; struct file *new_file; - loff_t old_pos = 0; - loff_t new_pos = 0; + loff_t pos = 0; int error = 0; if (len == 0) @@ -155,38 +154,27 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len) goto out_fput; } - /* Try to use clone_file_range to clone up within the same fs */ - error = vfs_clone_file_range(old_file, 0, new_file, 0, len); - if (!error) - goto out; - /* Couldn't clone, so now we try to copy the data */ - error = 0; - - /* FIXME: copy up sparse files efficiently */ - while (len) { + while (pos < len) { size_t this_len = OVL_COPY_UP_CHUNK_SIZE; long bytes; - if (len < this_len) - this_len = len; + if (len - pos < this_len) + this_len = len - pos; if (signal_pending_state(TASK_KILLABLE, current)) { error = -EINTR; break; } - bytes = do_splice_direct(old_file, &old_pos, - new_file, &new_pos, - this_len, SPLICE_F_MOVE); + bytes = do_copy_file_range(old_file, pos, + new_file, pos, + this_len, 0, SPLICE_F_MOVE); if (bytes <= 0) { error = bytes; break; } - WARN_ON(old_pos != new_pos); - - len -= bytes; + pos += bytes; } -out: if (!error) error = vfs_fsync(new_file, 0); fput(new_file); diff --git a/fs/read_write.c b/fs/read_write.c index e765fec656af..50d7ef77410f 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1542,9 +1542,10 @@ COMPAT_SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, } #endif -static ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in, +ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, - size_t len, unsigned int flags) + size_t len, unsigned int flags, + unsigned int splice_flags) { struct inode *inode_in = file_inode(file_in); struct inode *inode_out = file_inode(file_out); @@ -1587,7 +1588,7 @@ static ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in, if (size > len - total) size = len - total; ret = do_splice_direct(file_in, &pos_in, file_out, &pos_out, - size, 0); + size, splice_flags); if (ret < 0) goto out; total += ret; @@ -1631,6 +1632,7 @@ static ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in, out: return total ? total : ret; } +EXPORT_SYMBOL(do_copy_file_range); /* * copy_file_range() differs from regular file read and write in that it @@ -1669,7 +1671,7 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, file_start_write(file_out); ret = do_copy_file_range(file_in, pos_in, - file_out, pos_out, len, flags); + file_out, pos_out, len, flags, 0); if (ret > 0) { fsnotify_access(file_in); diff --git a/include/linux/fs.h b/include/linux/fs.h index 760d8da1b6c7..d5349b17fa10 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1799,6 +1799,9 @@ extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *); extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *); extern ssize_t vfs_readv(struct file *, const struct iovec __user *, unsigned long, loff_t *, rwf_t); +extern ssize_t do_copy_file_range(struct file *, loff_t , struct file *, + loff_t , size_t, unsigned int, + unsigned int); extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *, loff_t, size_t, unsigned int); extern int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in, -- 2.16.3
On Thu, May 10, 2018 at 4:58 AM, Goldwyn Rodrigues <rgoldwyn@suse.de> wrote: > From: Goldwyn Rodrigues <rgoldwyn@suse.com> > > copy_file_range calls do_splice_direct() if fs->clone_file_range > or fs->copy_file_range() is not available. However, do_splice_direct() > converts holes to zeros. Detect holes in the file_in range, and > create them in the corresponding file_out range. > > If there is already data present at the offset in file_out, attempt > to punch a hole there. If the operation is not supported, fall > back to performing splice on the whole range. > > Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com> > Reviewed-by: Amir Goldstein <amir73il@gmail.com> > --- > fs/read_write.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++---- > 1 file changed, 58 insertions(+), 4 deletions(-) > > diff --git a/fs/read_write.c b/fs/read_write.c > index 1b8fc9eada69..e765fec656af 100644 > --- a/fs/read_write.c > +++ b/fs/read_write.c > @@ -20,6 +20,7 @@ > #include <linux/compat.h> > #include <linux/mount.h> > #include <linux/fs.h> > +#include <linux/falloc.h> > #include "internal.h" > > #include <linux/uaccess.h> > @@ -1547,7 +1548,8 @@ static ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in, > { > struct inode *inode_in = file_inode(file_in); > struct inode *inode_out = file_inode(file_out); > - ssize_t ret = 0; > + ssize_t ret = 0, total = 0; > + loff_t size, end; > > if (len == 0) > return 0; > @@ -1572,10 +1574,62 @@ static ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in, > if (ret != -EOPNOTSUPP) > return ret; > } > + > splice: > - ret = do_splice_direct(file_in, &pos_in, file_out, &pos_out, > - len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0); > - return ret; > + while (total < len) { > + end = vfs_llseek(file_in, pos_in, SEEK_HOLE); > + > + /* Starting position is already in a hole */ > + if (end == pos_in) > + goto hole; > + size = end - pos_in; > +do_splice: > + if (size > len - total) > + size = len - total; > + ret = do_splice_direct(file_in, &pos_in, file_out, &pos_out, > + size, 0); I wonder, can do_splice_direct() return short copy (< size)? If so, code below will try to punch a zero length hole. Best put some protection here, don't you think? > + if (ret < 0) > + goto out; > + total += ret; > + if (total == len) > + break; > +hole: > + end = vfs_llseek(file_in, pos_in, SEEK_DATA); > + if (end < 0) { > + ret = end; > + goto out; > + } > + size = end - pos_in; > + if (size > len - total) > + size = len - total; > + /* Data on offset, punch holes */ > + if (i_size_read(file_out->f_inode) > pos_out) { > + ret = vfs_fallocate(file_out, > + FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, > + pos_out, size); I'm afraid you have more re-factoring to do vfs_fallocate() does file_start_write() - you probably need do_fallocate(). > + if (ret < 0) { > + /* > + * The filesystem does not support punching > + * holes. Perform splice on the remaining range. > + */ > + if (ret == -EOPNOTSUPP) { > + size = len - total; > + goto do_splice; > + } > + goto out; > + } > + } > + if (ret < 0) { > + ret = end; > + goto out; > + } > + pos_out += size; > + pos_in = end; > + total += size; > + } > + > +out: > + return total ? total : ret; > } > > /* > -- > 2.16.3 > > -- > To unsubscribe from this list: send the line "unsubscribe linux-unionfs" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, May 10, 2018 at 4:58 AM, Goldwyn Rodrigues <rgoldwyn@suse.de> wrote:
> If copy_file_range performs a copy using splice, it converts holes
> to zeros. This effort primarily changes this behavior to create
> holes when it is possible.
>
> Even if copy_file_range() or clone_file_range() does not work for different
> mounted filesystems, We should be able to splice files if they do not
> belong the same super_block.
>
> Changes since v1:
> - Fixed bug when hole/data offset is farther than len
Please specify how you tested this work.
Did you run the xfstests generic/copy tests that Anna wrote?
If the tests caught your bugs - good, if they didn't, please
write more tests to cover the issues you found.
One thing I do not see in the existing tests is coverage for the
trivial case of "only holes" (i.e. touch x; truncate -s 10 x).
Thanks,
Amir.
On Thu, May 10, 2018 at 7:42 AM, Amir Goldstein <amir73il@gmail.com> wrote:
> On Thu, May 10, 2018 at 4:58 AM, Goldwyn Rodrigues <rgoldwyn@suse.de> wrote:
>> From: Goldwyn Rodrigues <rgoldwyn@suse.com>
>>
>> copy_file_range calls do_splice_direct() if fs->clone_file_range
>> or fs->copy_file_range() is not available. However, do_splice_direct()
>> converts holes to zeros. Detect holes in the file_in range, and
>> create them in the corresponding file_out range.
>>
>> If there is already data present at the offset in file_out, attempt
>> to punch a hole there. If the operation is not supported, fall
>> back to performing splice on the whole range.
>>
>> Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
>> Reviewed-by: Amir Goldstein <amir73il@gmail.com>
>> ---
>> fs/read_write.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++----
>> 1 file changed, 58 insertions(+), 4 deletions(-)
>>
>> diff --git a/fs/read_write.c b/fs/read_write.c
>> index 1b8fc9eada69..e765fec656af 100644
>> --- a/fs/read_write.c
>> +++ b/fs/read_write.c
>> @@ -20,6 +20,7 @@
>> #include <linux/compat.h>
>> #include <linux/mount.h>
>> #include <linux/fs.h>
>> +#include <linux/falloc.h>
>> #include "internal.h"
>>
>> #include <linux/uaccess.h>
>> @@ -1547,7 +1548,8 @@ static ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in,
>> {
>> struct inode *inode_in = file_inode(file_in);
>> struct inode *inode_out = file_inode(file_out);
>> - ssize_t ret = 0;
>> + ssize_t ret = 0, total = 0;
>> + loff_t size, end;
>>
>> if (len == 0)
>> return 0;
>> @@ -1572,10 +1574,62 @@ static ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in,
>> if (ret != -EOPNOTSUPP)
>> return ret;
>> }
>> +
>> splice:
>> - ret = do_splice_direct(file_in, &pos_in, file_out, &pos_out,
>> - len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0);
>> - return ret;
>> + while (total < len) {
>> + end = vfs_llseek(file_in, pos_in, SEEK_HOLE);
>> +
>> + /* Starting position is already in a hole */
>> + if (end == pos_in)
>> + goto hole;
>> + size = end - pos_in;
>> +do_splice:
>> + if (size > len - total)
>> + size = len - total;
>> + ret = do_splice_direct(file_in, &pos_in, file_out, &pos_out,
>> + size, 0);
>
> I wonder, can do_splice_direct() return short copy (< size)?
> If so, code below will try to punch a zero length hole.
> Best put some protection here, don't you think?
>
>> + if (ret < 0)
>> + goto out;
>> + total += ret;
>> + if (total == len)
>> + break;
>> +hole:
>> + end = vfs_llseek(file_in, pos_in, SEEK_DATA);
>> + if (end < 0) {
>> + ret = end;
>> + goto out;
>> + }
>> + size = end - pos_in;
>> + if (size > len - total)
>> + size = len - total;
>> + /* Data on offset, punch holes */
>> + if (i_size_read(file_out->f_inode) > pos_out) {
>> + ret = vfs_fallocate(file_out,
>> + FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
>> + pos_out, size);
>
> I'm afraid you have more re-factoring to do vfs_fallocate() does
> file_start_write() -
> you probably need do_fallocate().
>
>
I was trying to look for a pattern of what goes in vfs_ helpers and their
corresponding do_ helpers and I can't say I found a single pattern.
What stood out for me is the do_clone_file_range() is a wrapper
around vfs_clone_file_range() while do_truncate() is a helper
of vfs_truncate(). I did not survey all of those helpers, but I have
a feeling that the latter is the more common pattern and I know
who to blame for the former...
Anyway, this anomaly, explains why overlayfs calls
vfs_clone_file_range() and it cannot call vfs_fallocate()
from the copy up loop context.
I advise you to turn on LOCKDEP while testing to be warned
about this sort of things.
Thanks,
Amir.
On 05/10/2018 12:47 AM, Amir Goldstein wrote: > On Thu, May 10, 2018 at 4:58 AM, Goldwyn Rodrigues <rgoldwyn@suse.de> wrote: >> If copy_file_range performs a copy using splice, it converts holes >> to zeros. This effort primarily changes this behavior to create >> holes when it is possible. >> >> Even if copy_file_range() or clone_file_range() does not work for different >> mounted filesystems, We should be able to splice files if they do not >> belong the same super_block. >> >> Changes since v1: >> - Fixed bug when hole/data offset is farther than len > > Please specify how you tested this work. > Did you run the xfstests generic/copy tests that Anna wrote? > If the tests caught your bugs - good, if they didn't, please > write more tests to cover the issues you found. > One thing I do not see in the existing tests is coverage for the > trivial case of "only holes" (i.e. touch x; truncate -s 10 x). Yeah, that's probably an oversight on my part. I hadn't even thought about sparse files as I was writing those tests. Please add tests for holes! Anna > > Thanks, > Amir. >
From: Goldwyn Rodrigues <rgoldwyn@suse.com> While performing copy_file_range(), if superblocks of file_in and file_out don't match, instead of returning -EXDEV, perform splice for a faster copy. Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com> Reviewed-by: Amir Goldstein <amir73il@gmail.com> --- fs/read_write.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/read_write.c b/fs/read_write.c index 525f2a67e15a..1b8fc9eada69 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1545,11 +1545,16 @@ static ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, size_t len, unsigned int flags) { + struct inode *inode_in = file_inode(file_in); + struct inode *inode_out = file_inode(file_out); ssize_t ret = 0; if (len == 0) return 0; + if (inode_in->i_sb != inode_out->i_sb) + goto splice; + /* * Try cloning first, this is supported by more file systems, and * more efficient if both clone and copy are supported (e.g. NFS). @@ -1567,7 +1572,7 @@ static ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in, if (ret != -EOPNOTSUPP) return ret; } - +splice: ret = do_splice_direct(file_in, &pos_in, file_out, &pos_out, len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0); return ret; @@ -1607,10 +1612,6 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, (file_out->f_flags & O_APPEND)) return -EBADF; - /* this could be relaxed once a method supports cross-fs copies */ - if (inode_in->i_sb != inode_out->i_sb) - return -EXDEV; - file_start_write(file_out); ret = do_copy_file_range(file_in, pos_in, -- 2.16.3
From: Goldwyn Rodrigues <rgoldwyn@suse.com> While performing copy_file_range(), if superblocks of file_in and file_out don't match, instead of returning -EXDEV, perform splice for a faster copy. Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com> Reviewed-by: Amir Goldstein <amir73il@gmail.com> --- fs/read_write.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/read_write.c b/fs/read_write.c index 525f2a67e15a..1b8fc9eada69 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1545,11 +1545,16 @@ static ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, size_t len, unsigned int flags) { + struct inode *inode_in = file_inode(file_in); + struct inode *inode_out = file_inode(file_out); ssize_t ret = 0; if (len == 0) return 0; + if (inode_in->i_sb != inode_out->i_sb) + goto splice; + /* * Try cloning first, this is supported by more file systems, and * more efficient if both clone and copy are supported (e.g. NFS). @@ -1567,7 +1572,7 @@ static ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in, if (ret != -EOPNOTSUPP) return ret; } - +splice: ret = do_splice_direct(file_in, &pos_in, file_out, &pos_out, len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0); return ret; @@ -1607,10 +1612,6 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, (file_out->f_flags & O_APPEND)) return -EBADF; - /* this could be relaxed once a method supports cross-fs copies */ - if (inode_in->i_sb != inode_out->i_sb) - return -EXDEV; - file_start_write(file_out); ret = do_copy_file_range(file_in, pos_in, -- 2.16.3