[PATCH v2] ovl: improving copy-up efficiency for big sparse file

* [PATCH v2] ovl: improving copy-up efficiency for big sparse file
@ 2019-10-30 12:44 Chengguang Xu
  2019-10-30 15:50 ` Amir Goldstein
  0 siblings, 1 reply; 12+ messages in thread
From: Chengguang Xu @ 2019-10-30 12:44 UTC (permalink / raw)
  To: miklos, amir73il; +Cc: linux-unionfs, Chengguang Xu

Current copy-up is not efficient for big sparse file,
It's not only slow but also wasting more disk space
when the target lower file has huge hole inside.
This patch tries to recognize file hole and skip it
during copy-up.

In detail, this optimization checks the hole according
to copy-up chunk size so it may not recognize all kind
of holes in the file. However, it is easy to implement
and will be enough for most of the use case.

Additionally, this optimization relies on lseek(2)
SEEK_DATA implementation, so for some specific
filesystems which do not support this feature
will behave as before on copy-up.

Signed-off-by: Chengguang Xu <cgxu519@mykernel.net>
---

Hi Miklos, Amir

This is v2 version of hole copy-up improvement which
addressed amir's concerns in previous email.

Could you have a look at this patch?

There is a checkpatch warning but that is
false-positive warning, so you can ignore it.

I've tested the patch with the cases under overlay dir
(include overlay/066) in fstest for xfs/ext4 fstype, 
and passed most of the cases except below.

overlay/045     [not run] fsck.overlay utility required, skipped this test
overlay/046     [not run] fsck.overlay utility required, skipped this test
overlay/056     [not run] fsck.overlay utility required, skipped this test

Above three cases are fsck related cases,
I think they are not important for copy-up.

overlay/061     - output mismatch (see /home/cgxu/git/xfstests-dev/results//overlay/061.out.bad)
    --- tests/overlay/061.out   2019-05-28 09:54:42.320874925 +0800
    +++ /home/cgxu/git/xfstests-dev/results//overlay/061.out.bad        2019-10-30 16:11:50.490848367 +0800
    @@ -1,4 +1,4 @@
     QA output created by 061
    -00000000:  61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61  aaaaaaaaaaaaaaaa
    +00000000:  54 68 69 73 20 69 73 20 6f 6c 64 20 6e 65 77 73  This.is.old.news
     After mount cycle:
     00000000:  61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61  aaaaaaaaaaaaaaaa
    ...
    (Run 'diff -u /home/cgxu/git/xfstests-dev/tests/overlay/061.out /home/cgxu/git/xfstests-dev/results//overlay/061.out.bad'  to see the entire diff)

overlay/061 was failed with/without my patch and test results
were just same. have something already broken for the test?


v1->v2:
- Set file size when the hole is in the end of the file.
- Add a code comment for hole copy-up improvement.
- Check SEEK_DATA support before doing hole skip.
- Back to original copy-up when seek data fails(in error case).

 fs/overlayfs/copy_up.c | 78 ++++++++++++++++++++++++++++++++++--------
 1 file changed, 64 insertions(+), 14 deletions(-)

diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index b801c6353100..7d8a34c480f4 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -116,13 +116,30 @@ int ovl_copy_xattr(struct dentry *old, struct dentry *new)
 	return error;
 }
 
-static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len)
+static int ovl_set_size(struct dentry *upperdentry, struct kstat *stat)
+{
+	struct iattr attr = {
+		.ia_valid = ATTR_SIZE,
+		.ia_size = stat->size,
+	};
+
+	return notify_change(upperdentry, &attr, NULL);
+}
+
+static int ovl_copy_up_data(struct path *old, struct path *new,
+			    struct kstat *stat)
 {
 	struct file *old_file;
 	struct file *new_file;
+	loff_t len = stat->size;
 	loff_t old_pos = 0;
 	loff_t new_pos = 0;
 	loff_t cloned;
+	loff_t old_next_data_pos;
+	loff_t hole_len;
+	bool seek_support = false;
+	bool skip_hole = true;
+	bool set_size = false;
 	int error = 0;
 
 	if (len == 0)
@@ -144,7 +161,12 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len)
 		goto out;
 	/* Couldn't clone, so now we try to copy the data */
 
-	/* FIXME: copy up sparse files efficiently */
+	/* Check if lower fs supports seek operation */
+	if (old_file->f_mode & FMODE_LSEEK &&
+	    old_file->f_op->llseek) {
+		seek_support = true;
+	}
+
 	while (len) {
 		size_t this_len = OVL_COPY_UP_CHUNK_SIZE;
 		long bytes;
@@ -157,6 +179,38 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len)
 			break;
 		}
 
+		/*
+		 * Fill zero for hole will cost unnecessary disk space
+		 * and meanwhile slow down the copy-up speed, so we do
+		 * an optimization for hole during copy-up, it relies
+		 * on SEEK_DATA implementation and the hole check is
+		 * aligned to OVL_COPY_UP_CHUNK_SIZE. In other word,
+		 * we do not try to recognize all kind of holes here,
+		 * we just skip big enough hole for simplicity to
+		 * implement. If lower fs does not support SEEK_DATA
+		 * operation, the copy-up will behave as before.
+		 */
+
+		if (seek_support && skip_hole) {
+			old_next_data_pos = vfs_llseek(old_file,
+						old_pos, SEEK_DATA);
+			if (old_next_data_pos >= old_pos +
+						OVL_COPY_UP_CHUNK_SIZE) {
+				hole_len = (old_next_data_pos - old_pos) /
+						OVL_COPY_UP_CHUNK_SIZE *
+						OVL_COPY_UP_CHUNK_SIZE;
+				old_pos += hole_len;
+				new_pos += hole_len;
+				len -= hole_len;
+				continue;
+			} else if (old_next_data_pos == -ENXIO) {
+				set_size = true;
+				break;
+			} else if (old_next_data_pos < 0) {
+				skip_hole = false;
+			}
+		}
+
 		bytes = do_splice_direct(old_file, &old_pos,
 					 new_file, &new_pos,
 					 this_len, SPLICE_F_MOVE);
@@ -168,6 +222,12 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len)
 
 		len -= bytes;
 	}
+
+	if (!error && set_size) {
+		inode_lock(new->dentry->d_inode);
+		error = ovl_set_size(new->dentry, stat);
+		inode_unlock(new->dentry->d_inode);
+	}
 out:
 	if (!error)
 		error = vfs_fsync(new_file, 0);
@@ -177,16 +237,6 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len)
 	return error;
 }
 
-static int ovl_set_size(struct dentry *upperdentry, struct kstat *stat)
-{
-	struct iattr attr = {
-		.ia_valid = ATTR_SIZE,
-		.ia_size = stat->size,
-	};
-
-	return notify_change(upperdentry, &attr, NULL);
-}
-
 static int ovl_set_timestamps(struct dentry *upperdentry, struct kstat *stat)
 {
 	struct iattr attr = {
@@ -453,7 +503,7 @@ static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp)
 		upperpath.dentry = temp;
 
 		ovl_path_lowerdata(c->dentry, &datapath);
-		err = ovl_copy_up_data(&datapath, &upperpath, c->stat.size);
+		err = ovl_copy_up_data(&datapath, &upperpath, &c->stat);
 		if (err)
 			return err;
 	}
@@ -757,7 +807,7 @@ static int ovl_copy_up_meta_inode_data(struct ovl_copy_up_ctx *c)
 			goto out;
 	}
 
-	err = ovl_copy_up_data(&datapath, &upperpath, c->stat.size);
+	err = ovl_copy_up_data(&datapath, &upperpath, &c->stat);
 	if (err)
 		goto out_free;
 
-- 
2.20.1

^ permalink raw reply related	[flat|nested] 12+ messages in thread