If copy_file_range performs a copy using splice, it converts holes to zeros. This effort primarily changes this behavior to create holes when it is possible. Even if copy_file_range() or clone_file_range() does not work for different mounted filesystems, We should be able to splice files if they do not belong the same super_block. Changes since v0: - [Amir] Carved out do_copy_file_range() which can be used by overlayfs -- Goldwyn
From: Goldwyn Rodrigues <rgoldwyn@suse.com> Preparatory patch to carve out do_copy_file_range() from vfs_copy_file_range Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com> --- fs/read_write.c | 67 ++++++++++++++++++++++++++++++++------------------------- 1 file changed, 38 insertions(+), 29 deletions(-) diff --git a/fs/read_write.c b/fs/read_write.c index c4eabbfc90df..7d9dfb62ba7d 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1541,6 +1541,42 @@ COMPAT_SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, } #endif +ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + size_t len, unsigned int flags, + unsigned int splice_flags) +{ + ssize_t ret = 0; + + if (flags != 0) + return -EINVAL; + + if (len == 0) + return 0; + + /* + * Try cloning first, this is supported by more file systems, and + * more efficient if both clone and copy are supported (e.g. NFS). + */ + if (file_in->f_op->clone_file_range) { + ret = file_in->f_op->clone_file_range(file_in, pos_in, + file_out, pos_out, len); + if (ret == 0) + return len; + } + + if (file_out->f_op->copy_file_range) { + ret = file_out->f_op->copy_file_range(file_in, pos_in, file_out, + pos_out, len, flags); + if (ret != -EOPNOTSUPP) + return ret; + } + + ret = do_splice_direct(file_in, &pos_in, file_out, &pos_out, + len > MAX_RW_COUNT ? MAX_RW_COUNT : len, splice_flags); + return ret; +} + /* * copy_file_range() differs from regular file read and write in that it * specifically allows return partial success. When it does so is up to @@ -1554,9 +1590,6 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, struct inode *inode_out = file_inode(file_out); ssize_t ret; - if (flags != 0) - return -EINVAL; - if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) return -EISDIR; if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) @@ -1579,35 +1612,11 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, if (inode_in->i_sb != inode_out->i_sb) return -EXDEV; - if (len == 0) - return 0; - file_start_write(file_out); - /* - * Try cloning first, this is supported by more file systems, and - * more efficient if both clone and copy are supported (e.g. NFS). - */ - if (file_in->f_op->clone_file_range) { - ret = file_in->f_op->clone_file_range(file_in, pos_in, - file_out, pos_out, len); - if (ret == 0) { - ret = len; - goto done; - } - } - - if (file_out->f_op->copy_file_range) { - ret = file_out->f_op->copy_file_range(file_in, pos_in, file_out, - pos_out, len, flags); - if (ret != -EOPNOTSUPP) - goto done; - } - - ret = do_splice_direct(file_in, &pos_in, file_out, &pos_out, - len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0); + ret = do_copy_file_range(file_in, pos_in, + file_out, pos_out, len, flags, 0); -done: if (ret > 0) { fsnotify_access(file_in); add_rchar(current, ret); -- 2.16.3
From: Goldwyn Rodrigues <rgoldwyn@suse.com> While performing copy_file_range(), if superblocks of file_in and file_out don't match, instead of returning -EXDEV, perform splice for a faster copy. Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com> --- fs/read_write.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/read_write.c b/fs/read_write.c index 7d9dfb62ba7d..2c9e7a5ea806 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1546,6 +1546,8 @@ ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in, size_t len, unsigned int flags, unsigned int splice_flags) { + struct inode *inode_in = file_inode(file_in); + struct inode *inode_out = file_inode(file_out); ssize_t ret = 0; if (flags != 0) @@ -1554,6 +1556,9 @@ ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in, if (len == 0) return 0; + if (inode_in->i_sb != inode_out->i_sb) + goto splice; + /* * Try cloning first, this is supported by more file systems, and * more efficient if both clone and copy are supported (e.g. NFS). @@ -1571,7 +1576,7 @@ ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in, if (ret != -EOPNOTSUPP) return ret; } - +splice: ret = do_splice_direct(file_in, &pos_in, file_out, &pos_out, len > MAX_RW_COUNT ? MAX_RW_COUNT : len, splice_flags); return ret; @@ -1608,10 +1613,6 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, (file_out->f_flags & O_APPEND)) return -EBADF; - /* this could be relaxed once a method supports cross-fs copies */ - if (inode_in->i_sb != inode_out->i_sb) - return -EXDEV; - file_start_write(file_out); ret = do_copy_file_range(file_in, pos_in, -- 2.16.3
From: Goldwyn Rodrigues <rgoldwyn@suse.com> copy_file_range calls do_splice_direct() if fs->clone_file_range or fs->copy_file_range() is not available. However, do_splice_direct() converts holes to zeros. Detect holes in the file_in range, and create them in the corresponding file_out range. If there is already data present at the offset in file_out, attempt to punch a hole there. If the operation is not supported, fall back to performing splice on the whole range. Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com> --- fs/read_write.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 55 insertions(+), 5 deletions(-) diff --git a/fs/read_write.c b/fs/read_write.c index 2c9e7a5ea806..5df9d6e8ebee 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -20,6 +20,7 @@ #include <linux/compat.h> #include <linux/mount.h> #include <linux/fs.h> +#include <linux/falloc.h> #include "internal.h" #include <linux/uaccess.h> @@ -1541,14 +1542,15 @@ COMPAT_SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, } #endif -ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in, +static ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, size_t len, unsigned int flags, unsigned int splice_flags) { struct inode *inode_in = file_inode(file_in); struct inode *inode_out = file_inode(file_out); - ssize_t ret = 0; + ssize_t ret = 0, total = 0; + loff_t size, end; if (flags != 0) return -EINVAL; @@ -1576,10 +1578,58 @@ ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in, if (ret != -EOPNOTSUPP) return ret; } + splice: - ret = do_splice_direct(file_in, &pos_in, file_out, &pos_out, - len > MAX_RW_COUNT ? MAX_RW_COUNT : len, splice_flags); - return ret; + while (total < len) { + end = vfs_llseek(file_in, pos_in, SEEK_HOLE); + + /* Starting position is already in a hole */ + if (end == pos_in) + goto hole; + size = end - pos_in; +do_splice: + ret = do_splice_direct(file_in, &pos_in, file_out, &pos_out, + size, 0); + if (ret < 0) + goto out; + total += ret; + if (total == len) + break; +hole: + end = vfs_llseek(file_in, pos_in, SEEK_DATA); + if (end < 0) { + ret = end; + goto out; + } + size = end - pos_in; + /* Data on offset, punch holes */ + if (i_size_read(file_out->f_inode) > pos_out) { + ret = vfs_fallocate(file_out, + FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, + pos_out, size); + if (ret < 0) { + /* + * The filesystem does not support punching + * holes. Perform splice on the remaining range. + */ + if (ret == -EOPNOTSUPP) { + size = len - total; + goto do_splice; + } + goto out; + } + } + if (ret < 0) { + ret = end; + goto out; + } + pos_out += size; + pos_in = end; + total += size; + } + +out: + return total ? total : ret; } /* -- 2.16.3
From: Goldwyn Rodrigues <rgoldwyn@suse.com> This will preserve the holes and will clone(), if available. Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com> --- fs/overlayfs/copy_up.c | 28 ++++++++-------------------- fs/read_write.c | 3 ++- include/linux/fs.h | 3 +++ 3 files changed, 13 insertions(+), 21 deletions(-) diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 8bede0742619..1f89380873ce 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -138,8 +138,7 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len) { struct file *old_file; struct file *new_file; - loff_t old_pos = 0; - loff_t new_pos = 0; + loff_t pos = 0; int error = 0; if (len == 0) @@ -155,38 +154,27 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len) goto out_fput; } - /* Try to use clone_file_range to clone up within the same fs */ - error = vfs_clone_file_range(old_file, 0, new_file, 0, len); - if (!error) - goto out; - /* Couldn't clone, so now we try to copy the data */ - error = 0; - - /* FIXME: copy up sparse files efficiently */ - while (len) { + while (pos < len) { size_t this_len = OVL_COPY_UP_CHUNK_SIZE; long bytes; - if (len < this_len) - this_len = len; + if (len - pos < this_len) + this_len = len - pos; if (signal_pending_state(TASK_KILLABLE, current)) { error = -EINTR; break; } - bytes = do_splice_direct(old_file, &old_pos, - new_file, &new_pos, - this_len, SPLICE_F_MOVE); + bytes = do_copy_file_range(old_file, pos, + new_file, pos, + this_len, 0, SPLICE_F_MOVE); if (bytes <= 0) { error = bytes; break; } - WARN_ON(old_pos != new_pos); - - len -= bytes; + pos += bytes; } -out: if (!error) error = vfs_fsync(new_file, 0); fput(new_file); diff --git a/fs/read_write.c b/fs/read_write.c index 5df9d6e8ebee..57b5b74c982a 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1542,7 +1542,7 @@ COMPAT_SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, } #endif -static ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in, +ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, size_t len, unsigned int flags, unsigned int splice_flags) @@ -1631,6 +1631,7 @@ static ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in, out: return total ? total : ret; } +EXPORT_SYMBOL(do_copy_file_range); /* * copy_file_range() differs from regular file read and write in that it diff --git a/include/linux/fs.h b/include/linux/fs.h index 760d8da1b6c7..d5349b17fa10 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1799,6 +1799,9 @@ extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *); extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *); extern ssize_t vfs_readv(struct file *, const struct iovec __user *, unsigned long, loff_t *, rwf_t); +extern ssize_t do_copy_file_range(struct file *, loff_t , struct file *, + loff_t , size_t, unsigned int, + unsigned int); extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *, loff_t, size_t, unsigned int); extern int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in, -- 2.16.3
On 05/08/2018 11:24 PM, Goldwyn Rodrigues wrote:
> While performing copy_file_range(), if superblocks of file_in and
> file_out don't match, instead of returning -EXDEV, perform
> splice for a faster copy.
We have a userspace emulation in glibc which used to be quite faithful,
including the EXDEV error (which is not strictly necessary to produce).
Should we change glibc to perform a userspace copy if the system call
returns EXDEV due to an older kernel?
Thanks,
Florian
On 05/08/2018 11:24 PM, Goldwyn Rodrigues wrote:
> copy_file_range calls do_splice_direct() if fs->clone_file_range
> or fs->copy_file_range() is not available. However, do_splice_direct()
> converts holes to zeros. Detect holes in the file_in range, and
> create them in the corresponding file_out range.
Should this be restricted to calls to copy_file_range with a
(yet-to-be-defined) flag?
I was under the impression that the hole-filling nature of
copy_file_range was not a bug.
Thanks,
Florian
On Tue, May 08, 2018 at 11:59:45PM +0200, Florian Weimer wrote:
> On 05/08/2018 11:24 PM, Goldwyn Rodrigues wrote:
> >copy_file_range calls do_splice_direct() if fs->clone_file_range
> >or fs->copy_file_range() is not available. However, do_splice_direct()
> >converts holes to zeros. Detect holes in the file_in range, and
> >create them in the corresponding file_out range.
>
> Should this be restricted to calls to copy_file_range with a
> (yet-to-be-defined) flag?
>
> I was under the impression that the hole-filling nature of
> copy_file_range was not a bug.
It's not a bug, but it's currently undefined behaviour. i.e. some
filesystem implementations will hole fill while others won't, and
you cannot predict ahead of time what will happen.
Cheers,
Dave.
--
Dave Chinner
david@fromorbit.com
On Wed, May 9, 2018 at 12:24 AM, Goldwyn Rodrigues <rgoldwyn@suse.de> wrote: > From: Goldwyn Rodrigues <rgoldwyn@suse.com> > > While performing copy_file_range(), if superblocks of file_in and > file_out don't match, instead of returning -EXDEV, perform > splice for a faster copy. > > Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com> Reviewed-by: Amir Goldstein <amir73il@gmail.com> > --- > fs/read_write.c | 11 ++++++----- > 1 file changed, 6 insertions(+), 5 deletions(-) > > diff --git a/fs/read_write.c b/fs/read_write.c > index 7d9dfb62ba7d..2c9e7a5ea806 100644 > --- a/fs/read_write.c > +++ b/fs/read_write.c > @@ -1546,6 +1546,8 @@ ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in, > size_t len, unsigned int flags, > unsigned int splice_flags) > { > + struct inode *inode_in = file_inode(file_in); > + struct inode *inode_out = file_inode(file_out); > ssize_t ret = 0; > > if (flags != 0) > @@ -1554,6 +1556,9 @@ ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in, > if (len == 0) > return 0; > > + if (inode_in->i_sb != inode_out->i_sb) > + goto splice; > + > /* > * Try cloning first, this is supported by more file systems, and > * more efficient if both clone and copy are supported (e.g. NFS). > @@ -1571,7 +1576,7 @@ ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in, > if (ret != -EOPNOTSUPP) > return ret; > } > - > +splice: > ret = do_splice_direct(file_in, &pos_in, file_out, &pos_out, > len > MAX_RW_COUNT ? MAX_RW_COUNT : len, splice_flags); > return ret; > @@ -1608,10 +1613,6 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, > (file_out->f_flags & O_APPEND)) > return -EBADF; > > - /* this could be relaxed once a method supports cross-fs copies */ > - if (inode_in->i_sb != inode_out->i_sb) > - return -EXDEV; > - > file_start_write(file_out); > > ret = do_copy_file_range(file_in, pos_in, > -- > 2.16.3 > > -- > To unsubscribe from this list: send the line "unsubscribe linux-unionfs" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, May 9, 2018 at 12:24 AM, Goldwyn Rodrigues <rgoldwyn@suse.de> wrote: > From: Goldwyn Rodrigues <rgoldwyn@suse.com> > > copy_file_range calls do_splice_direct() if fs->clone_file_range > or fs->copy_file_range() is not available. However, do_splice_direct() > converts holes to zeros. Detect holes in the file_in range, and > create them in the corresponding file_out range. > > If there is already data present at the offset in file_out, attempt > to punch a hole there. If the operation is not supported, fall > back to performing splice on the whole range. > > Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com> Reviewed-by: Amir Goldstein <amir73il@gmail.com> but.. > --- > fs/read_write.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++----- > 1 file changed, 55 insertions(+), 5 deletions(-) > > diff --git a/fs/read_write.c b/fs/read_write.c > index 2c9e7a5ea806..5df9d6e8ebee 100644 > --- a/fs/read_write.c > +++ b/fs/read_write.c > @@ -20,6 +20,7 @@ > #include <linux/compat.h> > #include <linux/mount.h> > #include <linux/fs.h> > +#include <linux/falloc.h> > #include "internal.h" > > #include <linux/uaccess.h> > @@ -1541,14 +1542,15 @@ COMPAT_SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, > } > #endif > > -ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in, > +static ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in, Either make it static to begin with or non static and exported to begin with. Thanks, Amir.
On Wed, May 9, 2018 at 12:24 AM, Goldwyn Rodrigues <rgoldwyn@suse.de> wrote:
> From: Goldwyn Rodrigues <rgoldwyn@suse.com>
>
> This will preserve the holes and will clone(), if available.
>
> Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Only please mention in commit message that it changes behavoir
slightly for a very large file (clone in chunks).
I see no problem with this change.
And please test with xfstest overlay/001 with copies up a large
sparse file. test time should drop from ~30s to 0s.
If you like I can test that one for you.
I believe there are also generic copy_file_range tests in xfstests.
Thanks,
Amir.
On 05/08/2018 04:57 PM, Florian Weimer wrote:
> On 05/08/2018 11:24 PM, Goldwyn Rodrigues wrote:
>> While performing copy_file_range(), if superblocks of file_in and
>> file_out don't match, instead of returning -EXDEV, perform
>> splice for a faster copy.
>
> We have a userspace emulation in glibc which used to be quite faithful,
> including the EXDEV error (which is not strictly necessary to produce).
>
> Should we change glibc to perform a userspace copy if the system call
> returns EXDEV due to an older kernel?
>
I don't seen any purpose. The user would anyways have to perform a copy
if it receives -EXDEV.
--
Goldwyn
On 05/09/2018 12:50 AM, Amir Goldstein wrote: > On Wed, May 9, 2018 at 12:24 AM, Goldwyn Rodrigues <rgoldwyn@suse.de> wrote: >> From: Goldwyn Rodrigues <rgoldwyn@suse.com> >> >> This will preserve the holes and will clone(), if available. >> >> Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com> > Reviewed-by: Amir Goldstein <amir73il@gmail.com> > > Only please mention in commit message that it changes behavoir > slightly for a very large file (clone in chunks). Change behavior? Only it will have holes. It will still respect length. Actually, I found a bug when it would not respect length if offset is father than length which I have fixed. > I see no problem with this change. > > And please test with xfstest overlay/001 with copies up a large > sparse file. test time should drop from ~30s to 0s. Yup, it passes in 1s on my VM :) > If you like I can test that one for you. > I believe there are also generic copy_file_range tests in xfstests. > Thanks for the review -- Goldwyn
From: Goldwyn Rodrigues <rgoldwyn@suse.com> While performing copy_file_range(), if superblocks of file_in and file_out don't match, instead of returning -EXDEV, perform splice for a faster copy. Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com> Reviewed-by: Amir Goldstein <amir73il@gmail.com> --- fs/read_write.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/read_write.c b/fs/read_write.c index 525f2a67e15a..1b8fc9eada69 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1545,11 +1545,16 @@ static ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, size_t len, unsigned int flags) { + struct inode *inode_in = file_inode(file_in); + struct inode *inode_out = file_inode(file_out); ssize_t ret = 0; if (len == 0) return 0; + if (inode_in->i_sb != inode_out->i_sb) + goto splice; + /* * Try cloning first, this is supported by more file systems, and * more efficient if both clone and copy are supported (e.g. NFS). @@ -1567,7 +1572,7 @@ static ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in, if (ret != -EOPNOTSUPP) return ret; } - +splice: ret = do_splice_direct(file_in, &pos_in, file_out, &pos_out, len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0); return ret; @@ -1607,10 +1612,6 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, (file_out->f_flags & O_APPEND)) return -EBADF; - /* this could be relaxed once a method supports cross-fs copies */ - if (inode_in->i_sb != inode_out->i_sb) - return -EXDEV; - file_start_write(file_out); ret = do_copy_file_range(file_in, pos_in, -- 2.16.3
On Wed, May 9, 2018 at 10:13 PM, Goldwyn Rodrigues <rgoldwyn@suse.de> wrote:
>
>
> On 05/09/2018 12:50 AM, Amir Goldstein wrote:
>> On Wed, May 9, 2018 at 12:24 AM, Goldwyn Rodrigues <rgoldwyn@suse.de> wrote:
>>> From: Goldwyn Rodrigues <rgoldwyn@suse.com>
>>>
>>> This will preserve the holes and will clone(), if available.
>>>
>>> Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
>> Reviewed-by: Amir Goldstein <amir73il@gmail.com>
>>
>> Only please mention in commit message that it changes behavoir
>> slightly for a very large file (clone in chunks).
>
> Change behavior? Only it will have holes. It will still respect length.
> Actually, I found a bug when it would not respect length if offset is
> father than length which I have fixed.
What I meant is the change of behavior for when underlying fs supports
clone.
Your patch changes the behavior for a very large file from single call
to vfs_clone_file_range() on entire length to several calls in a loop.
Nevermind. It's too insignificant for anyone to care.
If overlayfs ever supports NFS as upper layer, we may want to rethink
this.
Thanks,
Amir.
From: Goldwyn Rodrigues <rgoldwyn@suse.com> While performing copy_file_range(), if superblocks of file_in and file_out don't match, instead of returning -EXDEV, perform splice for a faster copy. Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com> Reviewed-by: Amir Goldstein <amir73il@gmail.com> --- fs/read_write.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/read_write.c b/fs/read_write.c index 525f2a67e15a..1b8fc9eada69 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1545,11 +1545,16 @@ static ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, size_t len, unsigned int flags) { + struct inode *inode_in = file_inode(file_in); + struct inode *inode_out = file_inode(file_out); ssize_t ret = 0; if (len == 0) return 0; + if (inode_in->i_sb != inode_out->i_sb) + goto splice; + /* * Try cloning first, this is supported by more file systems, and * more efficient if both clone and copy are supported (e.g. NFS). @@ -1567,7 +1572,7 @@ static ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in, if (ret != -EOPNOTSUPP) return ret; } - +splice: ret = do_splice_direct(file_in, &pos_in, file_out, &pos_out, len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0); return ret; @@ -1607,10 +1612,6 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, (file_out->f_flags & O_APPEND)) return -EBADF; - /* this could be relaxed once a method supports cross-fs copies */ - if (inode_in->i_sb != inode_out->i_sb) - return -EXDEV; - file_start_write(file_out); ret = do_copy_file_range(file_in, pos_in, -- 2.16.3
From: Goldwyn Rodrigues <rgoldwyn@suse.com> While performing copy_file_range(), if superblocks of file_in and file_out don't match, instead of returning -EXDEV, perform splice for a faster copy. Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com> Reviewed-by: Amir Goldstein <amir73il@gmail.com> --- fs/read_write.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/read_write.c b/fs/read_write.c index 525f2a67e15a..1b8fc9eada69 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1545,11 +1545,16 @@ static ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, size_t len, unsigned int flags) { + struct inode *inode_in = file_inode(file_in); + struct inode *inode_out = file_inode(file_out); ssize_t ret = 0; if (len == 0) return 0; + if (inode_in->i_sb != inode_out->i_sb) + goto splice; + /* * Try cloning first, this is supported by more file systems, and * more efficient if both clone and copy are supported (e.g. NFS). @@ -1567,7 +1572,7 @@ static ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in, if (ret != -EOPNOTSUPP) return ret; } - +splice: ret = do_splice_direct(file_in, &pos_in, file_out, &pos_out, len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0); return ret; @@ -1607,10 +1612,6 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, (file_out->f_flags & O_APPEND)) return -EBADF; - /* this could be relaxed once a method supports cross-fs copies */ - if (inode_in->i_sb != inode_out->i_sb) - return -EXDEV; - file_start_write(file_out); ret = do_copy_file_range(file_in, pos_in, -- 2.16.3