All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] btrfs: Avoid trucating page or punching hole in a already existed hole.
@ 2014-05-30  7:16 Qu Wenruo
  2014-08-06 18:58 ` Filipe David Manana
  2014-08-27  8:19 ` Liu Bo
  0 siblings, 2 replies; 7+ messages in thread
From: Qu Wenruo @ 2014-05-30  7:16 UTC (permalink / raw)
  To: linux-btrfs

btrfs_punch_hole() will truncate unaligned pages or punch hole on a
already existed hole.
This will cause unneeded zero page or holes splitting the original huge
hole.

This patch will skip already existed holes before any page truncating or
hole punching.

Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
---
 fs/btrfs/file.c | 112 +++++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 98 insertions(+), 14 deletions(-)

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index ae6af07..93915d1 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2168,6 +2168,37 @@ out:
 	return 0;
 }
 
+/*
+ * Find a hole extent on given inode and change start/len to the end of hole
+ * extent.(hole/vacuum extent whose em->start <= start &&
+ *	   em->start + em->len > start)
+ * When a hole extent is found, return 1 and modify start/len.
+ */
+static int find_first_non_hole(struct inode *inode, u64 *start, u64 *len)
+{
+	struct extent_map *em;
+	int ret = 0;
+
+	em = btrfs_get_extent(inode, NULL, 0, *start, *len, 0);
+	if (IS_ERR_OR_NULL(em)) {
+		if (!em)
+			ret = -ENOMEM;
+		else
+			ret = PTR_ERR(em);
+		return ret;
+	}
+
+	/* Hole or vacuum extent(only exists in no-hole mode) */
+	if (em->block_start == EXTENT_MAP_HOLE) {
+		ret = 1;
+		*len = em->start + em->len > *start + *len ?
+		       0 : *start + *len - em->start - em->len;
+		*start = em->start + em->len;
+	}
+	free_extent_map(em);
+	return ret;
+}
+
 static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
 {
 	struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -2175,17 +2206,18 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
 	struct btrfs_path *path;
 	struct btrfs_block_rsv *rsv;
 	struct btrfs_trans_handle *trans;
-	u64 lockstart = round_up(offset, BTRFS_I(inode)->root->sectorsize);
-	u64 lockend = round_down(offset + len,
-				 BTRFS_I(inode)->root->sectorsize) - 1;
-	u64 cur_offset = lockstart;
+	u64 lockstart;
+	u64 lockend;
+	u64 tail_start;
+	u64 tail_len;
+	u64 orig_start = offset;
+	u64 cur_offset;
 	u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
 	u64 drop_end;
 	int ret = 0;
 	int err = 0;
 	int rsv_count;
-	bool same_page = ((offset >> PAGE_CACHE_SHIFT) ==
-			  ((offset + len - 1) >> PAGE_CACHE_SHIFT));
+	bool same_page;
 	bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
 	u64 ino_size = round_up(inode->i_size, PAGE_CACHE_SIZE);
 
@@ -2194,6 +2226,21 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
 		return ret;
 
 	mutex_lock(&inode->i_mutex);
+	ret = find_first_non_hole(inode, &offset, &len);
+	if (ret < 0)
+		goto out_only_mutex;
+	if (ret && !len) {
+		/* Already in a large hole */
+		ret = 0;
+		goto out_only_mutex;
+	}
+
+	lockstart = round_up(offset , BTRFS_I(inode)->root->sectorsize);
+	lockend = round_down(offset + len,
+			     BTRFS_I(inode)->root->sectorsize) - 1;
+	same_page = ((offset >> PAGE_CACHE_SHIFT) ==
+		    ((offset + len - 1) >> PAGE_CACHE_SHIFT));
+
 	/*
 	 * We needn't truncate any page which is beyond the end of the file
 	 * because we are sure there is no data there.
@@ -2205,8 +2252,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
 	if (same_page && len < PAGE_CACHE_SIZE) {
 		if (offset < ino_size)
 			ret = btrfs_truncate_page(inode, offset, len, 0);
-		mutex_unlock(&inode->i_mutex);
-		return ret;
+		goto out_only_mutex;
 	}
 
 	/* zero back part of the first page */
@@ -2218,12 +2264,39 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
 		}
 	}
 
-	/* zero the front end of the last page */
-	if (offset + len < ino_size) {
-		ret = btrfs_truncate_page(inode, offset + len, 0, 1);
-		if (ret) {
-			mutex_unlock(&inode->i_mutex);
-			return ret;
+	/* Check the aligned pages after the first unaligned page,
+	 * if offset != orig_start, which means the first unaligned page
+	 * including serveral following pages are already in holes,
+	 * the extra check can be skipped */
+	if (offset == orig_start) {
+		/* after truncate page, check hole again */
+		len = offset + len - lockstart;
+		offset = lockstart;
+		ret = find_first_non_hole(inode, &offset, &len);
+		if (ret < 0)
+			goto out_only_mutex;
+		if (ret && !len) {
+			ret = 0;
+			goto out_only_mutex;
+		}
+		lockstart = offset;
+	}
+
+	/* Check the tail unaligned part is in a hole */
+	tail_start = lockend + 1;
+	tail_len = offset + len - tail_start;
+	if (tail_len) {
+		ret = find_first_non_hole(inode, &tail_start, &tail_len);
+		if (unlikely(ret < 0))
+			goto out_only_mutex;
+		if (!ret) {
+			/* zero the front end of the last page */
+			if (tail_start + tail_len < ino_size) {
+				ret = btrfs_truncate_page(inode,
+						tail_start + tail_len, 0, 1);
+				if (ret)
+					goto out_only_mutex;
+				}
 		}
 	}
 
@@ -2299,6 +2372,8 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
 	BUG_ON(ret);
 	trans->block_rsv = rsv;
 
+	cur_offset = lockstart;
+	len = lockend - cur_offset;
 	while (cur_offset < lockend) {
 		ret = __btrfs_drop_extents(trans, root, inode, path,
 					   cur_offset, lockend + 1,
@@ -2339,6 +2414,14 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
 					      rsv, min_size);
 		BUG_ON(ret);	/* shouldn't happen */
 		trans->block_rsv = rsv;
+
+		ret = find_first_non_hole(inode, &cur_offset, &len);
+		if (unlikely(ret < 0))
+			break;
+		if (ret && !len) {
+			ret = 0;
+			break;
+		}
 	}
 
 	if (ret) {
@@ -2372,6 +2455,7 @@ out_free:
 out:
 	unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
 			     &cached_state, GFP_NOFS);
+out_only_mutex:
 	mutex_unlock(&inode->i_mutex);
 	if (ret && !err)
 		err = ret;
-- 
1.9.3


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH] btrfs: Avoid trucating page or punching hole in a already existed hole.
  2014-05-30  7:16 [PATCH] btrfs: Avoid trucating page or punching hole in a already existed hole Qu Wenruo
@ 2014-08-06 18:58 ` Filipe David Manana
  2014-08-07  0:41   ` Qu Wenruo
  2014-08-27  8:19 ` Liu Bo
  1 sibling, 1 reply; 7+ messages in thread
From: Filipe David Manana @ 2014-08-06 18:58 UTC (permalink / raw)
  To: Qu Wenruo; +Cc: linux-btrfs

On Fri, May 30, 2014 at 8:16 AM, Qu Wenruo <quwenruo@cn.fujitsu.com> wrote:
> btrfs_punch_hole() will truncate unaligned pages or punch hole on a
> already existed hole.
> This will cause unneeded zero page or holes splitting the original huge
> hole.
>
> This patch will skip already existed holes before any page truncating or
> hole punching.
>
> Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>

Hi Qu,

FYI, this change seems to introduce some regressions when using the
NO_HOLES feature, and it's easy to reproduce with xfstests, where at
least 3 tests fail in a deterministic way:

$ cat /home/fdmanana/git/hub/xfstests/local.config
export TEST_DEV=/dev/sdb
export TEST_DIR=/home/fdmanana/btrfs-tests/dev
export SCRATCH_MNT="/home/fdmanana/btrfs-tests/scratch_1"
export FSTYP=btrfs

$ cd /home/fdmanana/git/hub/xfstests
$ mkfs.btrfs -f -O no-holes /dev/sdb
Performing full device TRIM (100.00GiB) ...
Turning ON incompat feature 'extref': increased hardlink limit per file to 65536
fs created label (null) on /dev/sdb
nodesize 16384 leafsize 16384 sectorsize 4096 size 100.00GiB
Btrfs v3.14.1-96-gcc7fd5a-dirty

$ ./check generic/075  generic/091  generic/112
FSTYP         -- btrfs
PLATFORM      -- Linux/x86_64 debian-vm3 3.16.0-rc6-fdm-btrfs-next-38+

generic/075 87s ... [failed, exit status 1] - output mismatch (see
/home/fdmanana/git/hub/xfstests/results//generic/075.out.bad)
    --- tests/generic/075.out 2014-08-06 20:30:02.986012249 +0100
    +++ /home/fdmanana/git/hub/xfstests/results//generic/075.out.bad
2014-08-06 20:42:23.386012249 +0100
    @@ -4,15 +4,5 @@
     -----------------------------------------------
     fsx.0 : -d -N numops -S 0
     -----------------------------------------------
    -
    ------------------------------------------------
    -fsx.1 : -d -N numops -S 0 -x
    ------------------------------------------------
    ...
    (Run 'diff -u tests/generic/075.out
/home/fdmanana/git/hub/xfstests/results//generic/075.out.bad'  to see
the entire diff)
generic/091 56s ... [failed, exit status 1] - output mismatch (see
/home/fdmanana/git/hub/xfstests/results//generic/091.out.bad)
    --- tests/generic/091.out 2014-02-21 19:11:09.460443001 +0000
    +++ /home/fdmanana/git/hub/xfstests/results//generic/091.out.bad
2014-08-06 20:42:25.262012249 +0100
    @@ -1,7 +1,601 @@
     QA output created by 091
     fsx -N 10000 -l 500000 -r PSIZE -t BSIZE -w BSIZE -Z -R -W
    -fsx -N 10000 -o 8192 -l 500000 -r PSIZE -t BSIZE -w BSIZE -Z -R -W
    -fsx -N 10000 -o 32768 -l 500000 -r PSIZE -t BSIZE -w BSIZE -Z -R -W
    -fsx -N 10000 -o 8192 -l 500000 -r PSIZE -t BSIZE -w BSIZE -Z -R -W
    -fsx -N 10000 -o 32768 -l 500000 -r PSIZE -t BSIZE -w BSIZE -Z -R -W
    -fsx -N 10000 -o 128000 -l 500000 -r PSIZE -t BSIZE -w BSIZE -Z -W
    ...
    (Run 'diff -u tests/generic/091.out
/home/fdmanana/git/hub/xfstests/results//generic/091.out.bad'  to see
the entire diff)
generic/112 93s ... [failed, exit status 1] - output mismatch (see
/home/fdmanana/git/hub/xfstests/results//generic/112.out.bad)
    --- tests/generic/112.out 2014-02-21 19:11:09.460443001 +0000
    +++ /home/fdmanana/git/hub/xfstests/results//generic/112.out.bad
2014-08-06 20:42:28.930012249 +0100
    @@ -4,15 +4,5 @@
     -----------------------------------------------
     fsx.0 : -A -d -N numops -S 0
     -----------------------------------------------
    -
    ------------------------------------------------
    -fsx.1 : -A -d -N numops -S 0 -x
    ------------------------------------------------
    ...
    (Run 'diff -u tests/generic/112.out
/home/fdmanana/git/hub/xfstests/results//generic/112.out.bad'  to see
the entire diff)
Ran: generic/075 generic/091 generic/112
Failures: generic/075 generic/091 generic/112
Failed 3 of 3 tests


Without NO_HOLES, those tests pass. With NO_HOLES and without this
patch, the tests pass too.
Do you think you can take a look at this?

(A couple nitpick comments below too)

Thanks Qu


> ---
>  fs/btrfs/file.c | 112 +++++++++++++++++++++++++++++++++++++++++++++++++-------
>  1 file changed, 98 insertions(+), 14 deletions(-)
>
> diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
> index ae6af07..93915d1 100644
> --- a/fs/btrfs/file.c
> +++ b/fs/btrfs/file.c
> @@ -2168,6 +2168,37 @@ out:
>         return 0;
>  }
>
> +/*
> + * Find a hole extent on given inode and change start/len to the end of hole
> + * extent.(hole/vacuum extent whose em->start <= start &&
> + *        em->start + em->len > start)
> + * When a hole extent is found, return 1 and modify start/len.
> + */
> +static int find_first_non_hole(struct inode *inode, u64 *start, u64 *len)
> +{
> +       struct extent_map *em;
> +       int ret = 0;
> +
> +       em = btrfs_get_extent(inode, NULL, 0, *start, *len, 0);
> +       if (IS_ERR_OR_NULL(em)) {
> +               if (!em)
> +                       ret = -ENOMEM;
> +               else
> +                       ret = PTR_ERR(em);
> +               return ret;
> +       }
> +
> +       /* Hole or vacuum extent(only exists in no-hole mode) */
> +       if (em->block_start == EXTENT_MAP_HOLE) {
> +               ret = 1;
> +               *len = em->start + em->len > *start + *len ?
> +                      0 : *start + *len - em->start - em->len;
> +               *start = em->start + em->len;
> +       }
> +       free_extent_map(em);
> +       return ret;
> +}
> +
>  static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
>  {
>         struct btrfs_root *root = BTRFS_I(inode)->root;
> @@ -2175,17 +2206,18 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
>         struct btrfs_path *path;
>         struct btrfs_block_rsv *rsv;
>         struct btrfs_trans_handle *trans;
> -       u64 lockstart = round_up(offset, BTRFS_I(inode)->root->sectorsize);
> -       u64 lockend = round_down(offset + len,
> -                                BTRFS_I(inode)->root->sectorsize) - 1;
> -       u64 cur_offset = lockstart;
> +       u64 lockstart;
> +       u64 lockend;
> +       u64 tail_start;
> +       u64 tail_len;
> +       u64 orig_start = offset;
> +       u64 cur_offset;
>         u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
>         u64 drop_end;
>         int ret = 0;
>         int err = 0;
>         int rsv_count;
> -       bool same_page = ((offset >> PAGE_CACHE_SHIFT) ==
> -                         ((offset + len - 1) >> PAGE_CACHE_SHIFT));
> +       bool same_page;
>         bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
>         u64 ino_size = round_up(inode->i_size, PAGE_CACHE_SIZE);
>
> @@ -2194,6 +2226,21 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
>                 return ret;
>
>         mutex_lock(&inode->i_mutex);
> +       ret = find_first_non_hole(inode, &offset, &len);
> +       if (ret < 0)
> +               goto out_only_mutex;
> +       if (ret && !len) {
> +               /* Already in a large hole */
> +               ret = 0;
> +               goto out_only_mutex;
> +       }
> +
> +       lockstart = round_up(offset , BTRFS_I(inode)->root->sectorsize);

Nitpick, this should have caused checkpatch.pl to emit a warning
(space after the comma).

> +       lockend = round_down(offset + len,
> +                            BTRFS_I(inode)->root->sectorsize) - 1;
> +       same_page = ((offset >> PAGE_CACHE_SHIFT) ==
> +                   ((offset + len - 1) >> PAGE_CACHE_SHIFT));
> +
>         /*
>          * We needn't truncate any page which is beyond the end of the file
>          * because we are sure there is no data there.
> @@ -2205,8 +2252,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
>         if (same_page && len < PAGE_CACHE_SIZE) {
>                 if (offset < ino_size)
>                         ret = btrfs_truncate_page(inode, offset, len, 0);
> -               mutex_unlock(&inode->i_mutex);
> -               return ret;
> +               goto out_only_mutex;
>         }
>
>         /* zero back part of the first page */
> @@ -2218,12 +2264,39 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
>                 }
>         }
>
> -       /* zero the front end of the last page */
> -       if (offset + len < ino_size) {
> -               ret = btrfs_truncate_page(inode, offset + len, 0, 1);
> -               if (ret) {
> -                       mutex_unlock(&inode->i_mutex);
> -                       return ret;
> +       /* Check the aligned pages after the first unaligned page,
> +        * if offset != orig_start, which means the first unaligned page
> +        * including serveral following pages are already in holes,
> +        * the extra check can be skipped */
> +       if (offset == orig_start) {
> +               /* after truncate page, check hole again */
> +               len = offset + len - lockstart;
> +               offset = lockstart;
> +               ret = find_first_non_hole(inode, &offset, &len);
> +               if (ret < 0)
> +                       goto out_only_mutex;
> +               if (ret && !len) {
> +                       ret = 0;
> +                       goto out_only_mutex;
> +               }
> +               lockstart = offset;
> +       }
> +
> +       /* Check the tail unaligned part is in a hole */
> +       tail_start = lockend + 1;
> +       tail_len = offset + len - tail_start;
> +       if (tail_len) {
> +               ret = find_first_non_hole(inode, &tail_start, &tail_len);
> +               if (unlikely(ret < 0))
> +                       goto out_only_mutex;
> +               if (!ret) {
> +                       /* zero the front end of the last page */
> +                       if (tail_start + tail_len < ino_size) {
> +                               ret = btrfs_truncate_page(inode,
> +                                               tail_start + tail_len, 0, 1);
> +                               if (ret)
> +                                       goto out_only_mutex;
> +                               }

Nitpick, this last } is not aligned (same indentation level) with its
matching {.

>                 }
>         }
>
> @@ -2299,6 +2372,8 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
>         BUG_ON(ret);
>         trans->block_rsv = rsv;
>
> +       cur_offset = lockstart;
> +       len = lockend - cur_offset;
>         while (cur_offset < lockend) {
>                 ret = __btrfs_drop_extents(trans, root, inode, path,
>                                            cur_offset, lockend + 1,
> @@ -2339,6 +2414,14 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
>                                               rsv, min_size);
>                 BUG_ON(ret);    /* shouldn't happen */
>                 trans->block_rsv = rsv;
> +
> +               ret = find_first_non_hole(inode, &cur_offset, &len);
> +               if (unlikely(ret < 0))
> +                       break;
> +               if (ret && !len) {
> +                       ret = 0;
> +                       break;
> +               }
>         }
>
>         if (ret) {
> @@ -2372,6 +2455,7 @@ out_free:
>  out:
>         unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
>                              &cached_state, GFP_NOFS);
> +out_only_mutex:
>         mutex_unlock(&inode->i_mutex);
>         if (ret && !err)
>                 err = ret;
> --
> 1.9.3
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html



-- 
Filipe David Manana,

"Reasonable men adapt themselves to the world.
 Unreasonable men adapt the world to themselves.
 That's why all progress depends on unreasonable men."

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] btrfs: Avoid trucating page or punching hole in a already existed hole.
  2014-08-06 18:58 ` Filipe David Manana
@ 2014-08-07  0:41   ` Qu Wenruo
  0 siblings, 0 replies; 7+ messages in thread
From: Qu Wenruo @ 2014-08-07  0:41 UTC (permalink / raw)
  To: fdmanana; +Cc: linux-btrfs

Hi Filipe,

Thanks for the test result, I'll investigate it soon.

Also I'll fix the code style problem too.

Thanks,
Qu
-------- Original Message --------
Subject: Re: [PATCH] btrfs: Avoid trucating page or punching hole in a 
already existed hole.
From: Filipe David Manana <fdmanana@gmail.com>
To: Qu Wenruo <quwenruo@cn.fujitsu.com>
Date: 2014年08月07日 02:58
> On Fri, May 30, 2014 at 8:16 AM, Qu Wenruo <quwenruo@cn.fujitsu.com> wrote:
>> btrfs_punch_hole() will truncate unaligned pages or punch hole on a
>> already existed hole.
>> This will cause unneeded zero page or holes splitting the original huge
>> hole.
>>
>> This patch will skip already existed holes before any page truncating or
>> hole punching.
>>
>> Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
> Hi Qu,
>
> FYI, this change seems to introduce some regressions when using the
> NO_HOLES feature, and it's easy to reproduce with xfstests, where at
> least 3 tests fail in a deterministic way:
>
> $ cat /home/fdmanana/git/hub/xfstests/local.config
> export TEST_DEV=/dev/sdb
> export TEST_DIR=/home/fdmanana/btrfs-tests/dev
> export SCRATCH_MNT="/home/fdmanana/btrfs-tests/scratch_1"
> export FSTYP=btrfs
>
> $ cd /home/fdmanana/git/hub/xfstests
> $ mkfs.btrfs -f -O no-holes /dev/sdb
> Performing full device TRIM (100.00GiB) ...
> Turning ON incompat feature 'extref': increased hardlink limit per file to 65536
> fs created label (null) on /dev/sdb
> nodesize 16384 leafsize 16384 sectorsize 4096 size 100.00GiB
> Btrfs v3.14.1-96-gcc7fd5a-dirty
>
> $ ./check generic/075  generic/091  generic/112
> FSTYP         -- btrfs
> PLATFORM      -- Linux/x86_64 debian-vm3 3.16.0-rc6-fdm-btrfs-next-38+
>
> generic/075 87s ... [failed, exit status 1] - output mismatch (see
> /home/fdmanana/git/hub/xfstests/results//generic/075.out.bad)
>      --- tests/generic/075.out 2014-08-06 20:30:02.986012249 +0100
>      +++ /home/fdmanana/git/hub/xfstests/results//generic/075.out.bad
> 2014-08-06 20:42:23.386012249 +0100
>      @@ -4,15 +4,5 @@
>       -----------------------------------------------
>       fsx.0 : -d -N numops -S 0
>       -----------------------------------------------
>      -
>      ------------------------------------------------
>      -fsx.1 : -d -N numops -S 0 -x
>      ------------------------------------------------
>      ...
>      (Run 'diff -u tests/generic/075.out
> /home/fdmanana/git/hub/xfstests/results//generic/075.out.bad'  to see
> the entire diff)
> generic/091 56s ... [failed, exit status 1] - output mismatch (see
> /home/fdmanana/git/hub/xfstests/results//generic/091.out.bad)
>      --- tests/generic/091.out 2014-02-21 19:11:09.460443001 +0000
>      +++ /home/fdmanana/git/hub/xfstests/results//generic/091.out.bad
> 2014-08-06 20:42:25.262012249 +0100
>      @@ -1,7 +1,601 @@
>       QA output created by 091
>       fsx -N 10000 -l 500000 -r PSIZE -t BSIZE -w BSIZE -Z -R -W
>      -fsx -N 10000 -o 8192 -l 500000 -r PSIZE -t BSIZE -w BSIZE -Z -R -W
>      -fsx -N 10000 -o 32768 -l 500000 -r PSIZE -t BSIZE -w BSIZE -Z -R -W
>      -fsx -N 10000 -o 8192 -l 500000 -r PSIZE -t BSIZE -w BSIZE -Z -R -W
>      -fsx -N 10000 -o 32768 -l 500000 -r PSIZE -t BSIZE -w BSIZE -Z -R -W
>      -fsx -N 10000 -o 128000 -l 500000 -r PSIZE -t BSIZE -w BSIZE -Z -W
>      ...
>      (Run 'diff -u tests/generic/091.out
> /home/fdmanana/git/hub/xfstests/results//generic/091.out.bad'  to see
> the entire diff)
> generic/112 93s ... [failed, exit status 1] - output mismatch (see
> /home/fdmanana/git/hub/xfstests/results//generic/112.out.bad)
>      --- tests/generic/112.out 2014-02-21 19:11:09.460443001 +0000
>      +++ /home/fdmanana/git/hub/xfstests/results//generic/112.out.bad
> 2014-08-06 20:42:28.930012249 +0100
>      @@ -4,15 +4,5 @@
>       -----------------------------------------------
>       fsx.0 : -A -d -N numops -S 0
>       -----------------------------------------------
>      -
>      ------------------------------------------------
>      -fsx.1 : -A -d -N numops -S 0 -x
>      ------------------------------------------------
>      ...
>      (Run 'diff -u tests/generic/112.out
> /home/fdmanana/git/hub/xfstests/results//generic/112.out.bad'  to see
> the entire diff)
> Ran: generic/075 generic/091 generic/112
> Failures: generic/075 generic/091 generic/112
> Failed 3 of 3 tests
>
>
> Without NO_HOLES, those tests pass. With NO_HOLES and without this
> patch, the tests pass too.
> Do you think you can take a look at this?
>
> (A couple nitpick comments below too)
>
> Thanks Qu
>
>
>> ---
>>   fs/btrfs/file.c | 112 +++++++++++++++++++++++++++++++++++++++++++++++++-------
>>   1 file changed, 98 insertions(+), 14 deletions(-)
>>
>> diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
>> index ae6af07..93915d1 100644
>> --- a/fs/btrfs/file.c
>> +++ b/fs/btrfs/file.c
>> @@ -2168,6 +2168,37 @@ out:
>>          return 0;
>>   }
>>
>> +/*
>> + * Find a hole extent on given inode and change start/len to the end of hole
>> + * extent.(hole/vacuum extent whose em->start <= start &&
>> + *        em->start + em->len > start)
>> + * When a hole extent is found, return 1 and modify start/len.
>> + */
>> +static int find_first_non_hole(struct inode *inode, u64 *start, u64 *len)
>> +{
>> +       struct extent_map *em;
>> +       int ret = 0;
>> +
>> +       em = btrfs_get_extent(inode, NULL, 0, *start, *len, 0);
>> +       if (IS_ERR_OR_NULL(em)) {
>> +               if (!em)
>> +                       ret = -ENOMEM;
>> +               else
>> +                       ret = PTR_ERR(em);
>> +               return ret;
>> +       }
>> +
>> +       /* Hole or vacuum extent(only exists in no-hole mode) */
>> +       if (em->block_start == EXTENT_MAP_HOLE) {
>> +               ret = 1;
>> +               *len = em->start + em->len > *start + *len ?
>> +                      0 : *start + *len - em->start - em->len;
>> +               *start = em->start + em->len;
>> +       }
>> +       free_extent_map(em);
>> +       return ret;
>> +}
>> +
>>   static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
>>   {
>>          struct btrfs_root *root = BTRFS_I(inode)->root;
>> @@ -2175,17 +2206,18 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
>>          struct btrfs_path *path;
>>          struct btrfs_block_rsv *rsv;
>>          struct btrfs_trans_handle *trans;
>> -       u64 lockstart = round_up(offset, BTRFS_I(inode)->root->sectorsize);
>> -       u64 lockend = round_down(offset + len,
>> -                                BTRFS_I(inode)->root->sectorsize) - 1;
>> -       u64 cur_offset = lockstart;
>> +       u64 lockstart;
>> +       u64 lockend;
>> +       u64 tail_start;
>> +       u64 tail_len;
>> +       u64 orig_start = offset;
>> +       u64 cur_offset;
>>          u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
>>          u64 drop_end;
>>          int ret = 0;
>>          int err = 0;
>>          int rsv_count;
>> -       bool same_page = ((offset >> PAGE_CACHE_SHIFT) ==
>> -                         ((offset + len - 1) >> PAGE_CACHE_SHIFT));
>> +       bool same_page;
>>          bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
>>          u64 ino_size = round_up(inode->i_size, PAGE_CACHE_SIZE);
>>
>> @@ -2194,6 +2226,21 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
>>                  return ret;
>>
>>          mutex_lock(&inode->i_mutex);
>> +       ret = find_first_non_hole(inode, &offset, &len);
>> +       if (ret < 0)
>> +               goto out_only_mutex;
>> +       if (ret && !len) {
>> +               /* Already in a large hole */
>> +               ret = 0;
>> +               goto out_only_mutex;
>> +       }
>> +
>> +       lockstart = round_up(offset , BTRFS_I(inode)->root->sectorsize);
> Nitpick, this should have caused checkpatch.pl to emit a warning
> (space after the comma).
>
>> +       lockend = round_down(offset + len,
>> +                            BTRFS_I(inode)->root->sectorsize) - 1;
>> +       same_page = ((offset >> PAGE_CACHE_SHIFT) ==
>> +                   ((offset + len - 1) >> PAGE_CACHE_SHIFT));
>> +
>>          /*
>>           * We needn't truncate any page which is beyond the end of the file
>>           * because we are sure there is no data there.
>> @@ -2205,8 +2252,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
>>          if (same_page && len < PAGE_CACHE_SIZE) {
>>                  if (offset < ino_size)
>>                          ret = btrfs_truncate_page(inode, offset, len, 0);
>> -               mutex_unlock(&inode->i_mutex);
>> -               return ret;
>> +               goto out_only_mutex;
>>          }
>>
>>          /* zero back part of the first page */
>> @@ -2218,12 +2264,39 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
>>                  }
>>          }
>>
>> -       /* zero the front end of the last page */
>> -       if (offset + len < ino_size) {
>> -               ret = btrfs_truncate_page(inode, offset + len, 0, 1);
>> -               if (ret) {
>> -                       mutex_unlock(&inode->i_mutex);
>> -                       return ret;
>> +       /* Check the aligned pages after the first unaligned page,
>> +        * if offset != orig_start, which means the first unaligned page
>> +        * including serveral following pages are already in holes,
>> +        * the extra check can be skipped */
>> +       if (offset == orig_start) {
>> +               /* after truncate page, check hole again */
>> +               len = offset + len - lockstart;
>> +               offset = lockstart;
>> +               ret = find_first_non_hole(inode, &offset, &len);
>> +               if (ret < 0)
>> +                       goto out_only_mutex;
>> +               if (ret && !len) {
>> +                       ret = 0;
>> +                       goto out_only_mutex;
>> +               }
>> +               lockstart = offset;
>> +       }
>> +
>> +       /* Check the tail unaligned part is in a hole */
>> +       tail_start = lockend + 1;
>> +       tail_len = offset + len - tail_start;
>> +       if (tail_len) {
>> +               ret = find_first_non_hole(inode, &tail_start, &tail_len);
>> +               if (unlikely(ret < 0))
>> +                       goto out_only_mutex;
>> +               if (!ret) {
>> +                       /* zero the front end of the last page */
>> +                       if (tail_start + tail_len < ino_size) {
>> +                               ret = btrfs_truncate_page(inode,
>> +                                               tail_start + tail_len, 0, 1);
>> +                               if (ret)
>> +                                       goto out_only_mutex;
>> +                               }
> Nitpick, this last } is not aligned (same indentation level) with its
> matching {.
>
>>                  }
>>          }
>>
>> @@ -2299,6 +2372,8 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
>>          BUG_ON(ret);
>>          trans->block_rsv = rsv;
>>
>> +       cur_offset = lockstart;
>> +       len = lockend - cur_offset;
>>          while (cur_offset < lockend) {
>>                  ret = __btrfs_drop_extents(trans, root, inode, path,
>>                                             cur_offset, lockend + 1,
>> @@ -2339,6 +2414,14 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
>>                                                rsv, min_size);
>>                  BUG_ON(ret);    /* shouldn't happen */
>>                  trans->block_rsv = rsv;
>> +
>> +               ret = find_first_non_hole(inode, &cur_offset, &len);
>> +               if (unlikely(ret < 0))
>> +                       break;
>> +               if (ret && !len) {
>> +                       ret = 0;
>> +                       break;
>> +               }
>>          }
>>
>>          if (ret) {
>> @@ -2372,6 +2455,7 @@ out_free:
>>   out:
>>          unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
>>                               &cached_state, GFP_NOFS);
>> +out_only_mutex:
>>          mutex_unlock(&inode->i_mutex);
>>          if (ret && !err)
>>                  err = ret;
>> --
>> 1.9.3
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
>


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] btrfs: Avoid trucating page or punching hole in a already existed hole.
  2014-05-30  7:16 [PATCH] btrfs: Avoid trucating page or punching hole in a already existed hole Qu Wenruo
  2014-08-06 18:58 ` Filipe David Manana
@ 2014-08-27  8:19 ` Liu Bo
  2014-08-27  8:41   ` Filipe David Manana
  1 sibling, 1 reply; 7+ messages in thread
From: Liu Bo @ 2014-08-27  8:19 UTC (permalink / raw)
  To: Qu Wenruo; +Cc: linux-btrfs

On Fri, May 30, 2014 at 03:16:10PM +0800, Qu Wenruo wrote:
> btrfs_punch_hole() will truncate unaligned pages or punch hole on a
> already existed hole.
> This will cause unneeded zero page or holes splitting the original huge
> hole.
> 
> This patch will skip already existed holes before any page truncating or
> hole punching.
> 
> Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
> ---
>  fs/btrfs/file.c | 112 +++++++++++++++++++++++++++++++++++++++++++++++++-------
>  1 file changed, 98 insertions(+), 14 deletions(-)
> 
> diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
> index ae6af07..93915d1 100644
> --- a/fs/btrfs/file.c
> +++ b/fs/btrfs/file.c
> @@ -2168,6 +2168,37 @@ out:
>  	return 0;
>  }
>  
> +/*
> + * Find a hole extent on given inode and change start/len to the end of hole
> + * extent.(hole/vacuum extent whose em->start <= start &&
> + *	   em->start + em->len > start)
> + * When a hole extent is found, return 1 and modify start/len.
> + */
> +static int find_first_non_hole(struct inode *inode, u64 *start, u64 *len)
> +{
> +	struct extent_map *em;
> +	int ret = 0;
> +
> +	em = btrfs_get_extent(inode, NULL, 0, *start, *len, 0);
> +	if (IS_ERR_OR_NULL(em)) {
> +		if (!em)
> +			ret = -ENOMEM;
> +		else
> +			ret = PTR_ERR(em);
> +		return ret;
> +	}
> +
> +	/* Hole or vacuum extent(only exists in no-hole mode) */
> +	if (em->block_start == EXTENT_MAP_HOLE) {
> +		ret = 1;
> +		*len = em->start + em->len > *start + *len ?
> +		       0 : *start + *len - em->start - em->len;
> +		*start = em->start + em->len;
> +	}
> +	free_extent_map(em);
> +	return ret;
> +}
> +
>  static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
>  {
>  	struct btrfs_root *root = BTRFS_I(inode)->root;
> @@ -2175,17 +2206,18 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
>  	struct btrfs_path *path;
>  	struct btrfs_block_rsv *rsv;
>  	struct btrfs_trans_handle *trans;
> -	u64 lockstart = round_up(offset, BTRFS_I(inode)->root->sectorsize);
> -	u64 lockend = round_down(offset + len,
> -				 BTRFS_I(inode)->root->sectorsize) - 1;
> -	u64 cur_offset = lockstart;
> +	u64 lockstart;
> +	u64 lockend;
> +	u64 tail_start;
> +	u64 tail_len;
> +	u64 orig_start = offset;
> +	u64 cur_offset;
>  	u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
>  	u64 drop_end;
>  	int ret = 0;
>  	int err = 0;
>  	int rsv_count;
> -	bool same_page = ((offset >> PAGE_CACHE_SHIFT) ==
> -			  ((offset + len - 1) >> PAGE_CACHE_SHIFT));
> +	bool same_page;
>  	bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
>  	u64 ino_size = round_up(inode->i_size, PAGE_CACHE_SIZE);
>  
> @@ -2194,6 +2226,21 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
>  		return ret;
>  
>  	mutex_lock(&inode->i_mutex);
> +	ret = find_first_non_hole(inode, &offset, &len);
> +	if (ret < 0)
> +		goto out_only_mutex;
> +	if (ret && !len) {
> +		/* Already in a large hole */
> +		ret = 0;
> +		goto out_only_mutex;
> +	}
> +
> +	lockstart = round_up(offset , BTRFS_I(inode)->root->sectorsize);
> +	lockend = round_down(offset + len,
> +			     BTRFS_I(inode)->root->sectorsize) - 1;

Why do we round_up lockstart but round_down lockend?

For [0,4095], then lockstart is 4096 and lockend is (u64)-1, any thoughts?

thanks,
-liubo

> +	same_page = ((offset >> PAGE_CACHE_SHIFT) ==
> +		    ((offset + len - 1) >> PAGE_CACHE_SHIFT));
> +
>  	/*
>  	 * We needn't truncate any page which is beyond the end of the file
>  	 * because we are sure there is no data there.
> @@ -2205,8 +2252,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
>  	if (same_page && len < PAGE_CACHE_SIZE) {
>  		if (offset < ino_size)
>  			ret = btrfs_truncate_page(inode, offset, len, 0);
> -		mutex_unlock(&inode->i_mutex);
> -		return ret;
> +		goto out_only_mutex;
>  	}
>  
>  	/* zero back part of the first page */
> @@ -2218,12 +2264,39 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
>  		}
>  	}
>  
> -	/* zero the front end of the last page */
> -	if (offset + len < ino_size) {
> -		ret = btrfs_truncate_page(inode, offset + len, 0, 1);
> -		if (ret) {
> -			mutex_unlock(&inode->i_mutex);
> -			return ret;
> +	/* Check the aligned pages after the first unaligned page,
> +	 * if offset != orig_start, which means the first unaligned page
> +	 * including serveral following pages are already in holes,
> +	 * the extra check can be skipped */
> +	if (offset == orig_start) {
> +		/* after truncate page, check hole again */
> +		len = offset + len - lockstart;
> +		offset = lockstart;
> +		ret = find_first_non_hole(inode, &offset, &len);
> +		if (ret < 0)
> +			goto out_only_mutex;
> +		if (ret && !len) {
> +			ret = 0;
> +			goto out_only_mutex;
> +		}
> +		lockstart = offset;
> +	}
> +
> +	/* Check the tail unaligned part is in a hole */
> +	tail_start = lockend + 1;
> +	tail_len = offset + len - tail_start;
> +	if (tail_len) {
> +		ret = find_first_non_hole(inode, &tail_start, &tail_len);
> +		if (unlikely(ret < 0))
> +			goto out_only_mutex;
> +		if (!ret) {
> +			/* zero the front end of the last page */
> +			if (tail_start + tail_len < ino_size) {
> +				ret = btrfs_truncate_page(inode,
> +						tail_start + tail_len, 0, 1);
> +				if (ret)
> +					goto out_only_mutex;
> +				}
>  		}
>  	}
>  
> @@ -2299,6 +2372,8 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
>  	BUG_ON(ret);
>  	trans->block_rsv = rsv;
>  
> +	cur_offset = lockstart;
> +	len = lockend - cur_offset;
>  	while (cur_offset < lockend) {
>  		ret = __btrfs_drop_extents(trans, root, inode, path,
>  					   cur_offset, lockend + 1,
> @@ -2339,6 +2414,14 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
>  					      rsv, min_size);
>  		BUG_ON(ret);	/* shouldn't happen */
>  		trans->block_rsv = rsv;
> +
> +		ret = find_first_non_hole(inode, &cur_offset, &len);
> +		if (unlikely(ret < 0))
> +			break;
> +		if (ret && !len) {
> +			ret = 0;
> +			break;
> +		}
>  	}
>  
>  	if (ret) {
> @@ -2372,6 +2455,7 @@ out_free:
>  out:
>  	unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
>  			     &cached_state, GFP_NOFS);
> +out_only_mutex:
>  	mutex_unlock(&inode->i_mutex);
>  	if (ret && !err)
>  		err = ret;
> -- 
> 1.9.3
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] btrfs: Avoid trucating page or punching hole in a already existed hole.
  2014-08-27  8:19 ` Liu Bo
@ 2014-08-27  8:41   ` Filipe David Manana
  2014-08-27 10:34     ` Liu Bo
  0 siblings, 1 reply; 7+ messages in thread
From: Filipe David Manana @ 2014-08-27  8:41 UTC (permalink / raw)
  To: Liu Bo; +Cc: Qu Wenruo, linux-btrfs

On Wed, Aug 27, 2014 at 9:19 AM, Liu Bo <bo.li.liu@oracle.com> wrote:
> On Fri, May 30, 2014 at 03:16:10PM +0800, Qu Wenruo wrote:
>> btrfs_punch_hole() will truncate unaligned pages or punch hole on a
>> already existed hole.
>> This will cause unneeded zero page or holes splitting the original huge
>> hole.
>>
>> This patch will skip already existed holes before any page truncating or
>> hole punching.
>>
>> Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
>> ---
>>  fs/btrfs/file.c | 112 +++++++++++++++++++++++++++++++++++++++++++++++++-------
>>  1 file changed, 98 insertions(+), 14 deletions(-)
>>
>> diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
>> index ae6af07..93915d1 100644
>> --- a/fs/btrfs/file.c
>> +++ b/fs/btrfs/file.c
>> @@ -2168,6 +2168,37 @@ out:
>>       return 0;
>>  }
>>
>> +/*
>> + * Find a hole extent on given inode and change start/len to the end of hole
>> + * extent.(hole/vacuum extent whose em->start <= start &&
>> + *      em->start + em->len > start)
>> + * When a hole extent is found, return 1 and modify start/len.
>> + */
>> +static int find_first_non_hole(struct inode *inode, u64 *start, u64 *len)
>> +{
>> +     struct extent_map *em;
>> +     int ret = 0;
>> +
>> +     em = btrfs_get_extent(inode, NULL, 0, *start, *len, 0);
>> +     if (IS_ERR_OR_NULL(em)) {
>> +             if (!em)
>> +                     ret = -ENOMEM;
>> +             else
>> +                     ret = PTR_ERR(em);
>> +             return ret;
>> +     }
>> +
>> +     /* Hole or vacuum extent(only exists in no-hole mode) */
>> +     if (em->block_start == EXTENT_MAP_HOLE) {
>> +             ret = 1;
>> +             *len = em->start + em->len > *start + *len ?
>> +                    0 : *start + *len - em->start - em->len;
>> +             *start = em->start + em->len;
>> +     }
>> +     free_extent_map(em);
>> +     return ret;
>> +}
>> +
>>  static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
>>  {
>>       struct btrfs_root *root = BTRFS_I(inode)->root;
>> @@ -2175,17 +2206,18 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
>>       struct btrfs_path *path;
>>       struct btrfs_block_rsv *rsv;
>>       struct btrfs_trans_handle *trans;
>> -     u64 lockstart = round_up(offset, BTRFS_I(inode)->root->sectorsize);
>> -     u64 lockend = round_down(offset + len,
>> -                              BTRFS_I(inode)->root->sectorsize) - 1;
>> -     u64 cur_offset = lockstart;
>> +     u64 lockstart;
>> +     u64 lockend;
>> +     u64 tail_start;
>> +     u64 tail_len;
>> +     u64 orig_start = offset;
>> +     u64 cur_offset;
>>       u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
>>       u64 drop_end;
>>       int ret = 0;
>>       int err = 0;
>>       int rsv_count;
>> -     bool same_page = ((offset >> PAGE_CACHE_SHIFT) ==
>> -                       ((offset + len - 1) >> PAGE_CACHE_SHIFT));
>> +     bool same_page;
>>       bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
>>       u64 ino_size = round_up(inode->i_size, PAGE_CACHE_SIZE);
>>
>> @@ -2194,6 +2226,21 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
>>               return ret;
>>
>>       mutex_lock(&inode->i_mutex);
>> +     ret = find_first_non_hole(inode, &offset, &len);
>> +     if (ret < 0)
>> +             goto out_only_mutex;
>> +     if (ret && !len) {
>> +             /* Already in a large hole */
>> +             ret = 0;
>> +             goto out_only_mutex;
>> +     }
>> +
>> +     lockstart = round_up(offset , BTRFS_I(inode)->root->sectorsize);
>> +     lockend = round_down(offset + len,
>> +                          BTRFS_I(inode)->root->sectorsize) - 1;
>
> Why do we round_up lockstart but round_down lockend?
>
> For [0,4095], then lockstart is 4096 and lockend is (u64)-1, any thoughts?

Seems odd, but is it a problem for that case?
The same_page check below makes us return without locking the range in
the iotree using the computed values for lockstart and lockend.

>
> thanks,
> -liubo
>
>> +     same_page = ((offset >> PAGE_CACHE_SHIFT) ==
>> +                 ((offset + len - 1) >> PAGE_CACHE_SHIFT));
>> +
>>       /*
>>        * We needn't truncate any page which is beyond the end of the file
>>        * because we are sure there is no data there.
>> @@ -2205,8 +2252,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
>>       if (same_page && len < PAGE_CACHE_SIZE) {
>>               if (offset < ino_size)
>>                       ret = btrfs_truncate_page(inode, offset, len, 0);
>> -             mutex_unlock(&inode->i_mutex);
>> -             return ret;
>> +             goto out_only_mutex;
>>       }
>>
>>       /* zero back part of the first page */
>> @@ -2218,12 +2264,39 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
>>               }
>>       }
>>
>> -     /* zero the front end of the last page */
>> -     if (offset + len < ino_size) {
>> -             ret = btrfs_truncate_page(inode, offset + len, 0, 1);
>> -             if (ret) {
>> -                     mutex_unlock(&inode->i_mutex);
>> -                     return ret;
>> +     /* Check the aligned pages after the first unaligned page,
>> +      * if offset != orig_start, which means the first unaligned page
>> +      * including serveral following pages are already in holes,
>> +      * the extra check can be skipped */
>> +     if (offset == orig_start) {
>> +             /* after truncate page, check hole again */
>> +             len = offset + len - lockstart;
>> +             offset = lockstart;
>> +             ret = find_first_non_hole(inode, &offset, &len);
>> +             if (ret < 0)
>> +                     goto out_only_mutex;
>> +             if (ret && !len) {
>> +                     ret = 0;
>> +                     goto out_only_mutex;
>> +             }
>> +             lockstart = offset;
>> +     }
>> +
>> +     /* Check the tail unaligned part is in a hole */
>> +     tail_start = lockend + 1;
>> +     tail_len = offset + len - tail_start;
>> +     if (tail_len) {
>> +             ret = find_first_non_hole(inode, &tail_start, &tail_len);
>> +             if (unlikely(ret < 0))
>> +                     goto out_only_mutex;
>> +             if (!ret) {
>> +                     /* zero the front end of the last page */
>> +                     if (tail_start + tail_len < ino_size) {
>> +                             ret = btrfs_truncate_page(inode,
>> +                                             tail_start + tail_len, 0, 1);
>> +                             if (ret)
>> +                                     goto out_only_mutex;
>> +                             }
>>               }
>>       }
>>
>> @@ -2299,6 +2372,8 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
>>       BUG_ON(ret);
>>       trans->block_rsv = rsv;
>>
>> +     cur_offset = lockstart;
>> +     len = lockend - cur_offset;
>>       while (cur_offset < lockend) {
>>               ret = __btrfs_drop_extents(trans, root, inode, path,
>>                                          cur_offset, lockend + 1,
>> @@ -2339,6 +2414,14 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
>>                                             rsv, min_size);
>>               BUG_ON(ret);    /* shouldn't happen */
>>               trans->block_rsv = rsv;
>> +
>> +             ret = find_first_non_hole(inode, &cur_offset, &len);
>> +             if (unlikely(ret < 0))
>> +                     break;
>> +             if (ret && !len) {
>> +                     ret = 0;
>> +                     break;
>> +             }
>>       }
>>
>>       if (ret) {
>> @@ -2372,6 +2455,7 @@ out_free:
>>  out:
>>       unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
>>                            &cached_state, GFP_NOFS);
>> +out_only_mutex:
>>       mutex_unlock(&inode->i_mutex);
>>       if (ret && !err)
>>               err = ret;
>> --
>> 1.9.3
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html



-- 
Filipe David Manana,

"Reasonable men adapt themselves to the world.
 Unreasonable men adapt the world to themselves.
 That's why all progress depends on unreasonable men."

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] btrfs: Avoid trucating page or punching hole in a already existed hole.
  2014-08-27  8:41   ` Filipe David Manana
@ 2014-08-27 10:34     ` Liu Bo
  2014-09-01  1:06       ` Qu Wenruo
  0 siblings, 1 reply; 7+ messages in thread
From: Liu Bo @ 2014-08-27 10:34 UTC (permalink / raw)
  To: Filipe David Manana; +Cc: Qu Wenruo, linux-btrfs

On Wed, Aug 27, 2014 at 09:41:14AM +0100, Filipe David Manana wrote:
> On Wed, Aug 27, 2014 at 9:19 AM, Liu Bo <bo.li.liu@oracle.com> wrote:
> > On Fri, May 30, 2014 at 03:16:10PM +0800, Qu Wenruo wrote:
> >> btrfs_punch_hole() will truncate unaligned pages or punch hole on a
> >> already existed hole.
> >> This will cause unneeded zero page or holes splitting the original huge
> >> hole.
> >>
> >> This patch will skip already existed holes before any page truncating or
> >> hole punching.
> >>
> >> Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
> >> ---
> >>  fs/btrfs/file.c | 112 +++++++++++++++++++++++++++++++++++++++++++++++++-------
> >>  1 file changed, 98 insertions(+), 14 deletions(-)
> >>
> >> diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
> >> index ae6af07..93915d1 100644
> >> --- a/fs/btrfs/file.c
> >> +++ b/fs/btrfs/file.c
> >> @@ -2168,6 +2168,37 @@ out:
> >>       return 0;
> >>  }
> >>
> >> +/*
> >> + * Find a hole extent on given inode and change start/len to the end of hole
> >> + * extent.(hole/vacuum extent whose em->start <= start &&
> >> + *      em->start + em->len > start)
> >> + * When a hole extent is found, return 1 and modify start/len.
> >> + */
> >> +static int find_first_non_hole(struct inode *inode, u64 *start, u64 *len)
> >> +{
> >> +     struct extent_map *em;
> >> +     int ret = 0;
> >> +
> >> +     em = btrfs_get_extent(inode, NULL, 0, *start, *len, 0);
> >> +     if (IS_ERR_OR_NULL(em)) {
> >> +             if (!em)
> >> +                     ret = -ENOMEM;
> >> +             else
> >> +                     ret = PTR_ERR(em);
> >> +             return ret;
> >> +     }
> >> +
> >> +     /* Hole or vacuum extent(only exists in no-hole mode) */
> >> +     if (em->block_start == EXTENT_MAP_HOLE) {
> >> +             ret = 1;
> >> +             *len = em->start + em->len > *start + *len ?
> >> +                    0 : *start + *len - em->start - em->len;
> >> +             *start = em->start + em->len;
> >> +     }
> >> +     free_extent_map(em);
> >> +     return ret;
> >> +}
> >> +
> >>  static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
> >>  {
> >>       struct btrfs_root *root = BTRFS_I(inode)->root;
> >> @@ -2175,17 +2206,18 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
> >>       struct btrfs_path *path;
> >>       struct btrfs_block_rsv *rsv;
> >>       struct btrfs_trans_handle *trans;
> >> -     u64 lockstart = round_up(offset, BTRFS_I(inode)->root->sectorsize);
> >> -     u64 lockend = round_down(offset + len,
> >> -                              BTRFS_I(inode)->root->sectorsize) - 1;
> >> -     u64 cur_offset = lockstart;
> >> +     u64 lockstart;
> >> +     u64 lockend;
> >> +     u64 tail_start;
> >> +     u64 tail_len;
> >> +     u64 orig_start = offset;
> >> +     u64 cur_offset;
> >>       u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
> >>       u64 drop_end;
> >>       int ret = 0;
> >>       int err = 0;
> >>       int rsv_count;
> >> -     bool same_page = ((offset >> PAGE_CACHE_SHIFT) ==
> >> -                       ((offset + len - 1) >> PAGE_CACHE_SHIFT));
> >> +     bool same_page;
> >>       bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
> >>       u64 ino_size = round_up(inode->i_size, PAGE_CACHE_SIZE);
> >>
> >> @@ -2194,6 +2226,21 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
> >>               return ret;
> >>
> >>       mutex_lock(&inode->i_mutex);
> >> +     ret = find_first_non_hole(inode, &offset, &len);
> >> +     if (ret < 0)
> >> +             goto out_only_mutex;
> >> +     if (ret && !len) {
> >> +             /* Already in a large hole */
> >> +             ret = 0;
> >> +             goto out_only_mutex;
> >> +     }
> >> +
> >> +     lockstart = round_up(offset , BTRFS_I(inode)->root->sectorsize);
> >> +     lockend = round_down(offset + len,
> >> +                          BTRFS_I(inode)->root->sectorsize) - 1;
> >
> > Why do we round_up lockstart but round_down lockend?
> >
> > For [0,4095], then lockstart is 4096 and lockend is (u64)-1, any thoughts?
> 
> Seems odd, but is it a problem for that case?
> The same_page check below makes us return without locking the range in
> the iotree using the computed values for lockstart and lockend.

No problems so far luckily, but it's odd.

thanks,
-liubo

> 
> >
> > thanks,
> > -liubo
> >
> >> +     same_page = ((offset >> PAGE_CACHE_SHIFT) ==
> >> +                 ((offset + len - 1) >> PAGE_CACHE_SHIFT));
> >> +
> >>       /*
> >>        * We needn't truncate any page which is beyond the end of the file
> >>        * because we are sure there is no data there.
> >> @@ -2205,8 +2252,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
> >>       if (same_page && len < PAGE_CACHE_SIZE) {
> >>               if (offset < ino_size)
> >>                       ret = btrfs_truncate_page(inode, offset, len, 0);
> >> -             mutex_unlock(&inode->i_mutex);
> >> -             return ret;
> >> +             goto out_only_mutex;
> >>       }
> >>
> >>       /* zero back part of the first page */
> >> @@ -2218,12 +2264,39 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
> >>               }
> >>       }
> >>
> >> -     /* zero the front end of the last page */
> >> -     if (offset + len < ino_size) {
> >> -             ret = btrfs_truncate_page(inode, offset + len, 0, 1);
> >> -             if (ret) {
> >> -                     mutex_unlock(&inode->i_mutex);
> >> -                     return ret;
> >> +     /* Check the aligned pages after the first unaligned page,
> >> +      * if offset != orig_start, which means the first unaligned page
> >> +      * including serveral following pages are already in holes,
> >> +      * the extra check can be skipped */
> >> +     if (offset == orig_start) {
> >> +             /* after truncate page, check hole again */
> >> +             len = offset + len - lockstart;
> >> +             offset = lockstart;
> >> +             ret = find_first_non_hole(inode, &offset, &len);
> >> +             if (ret < 0)
> >> +                     goto out_only_mutex;
> >> +             if (ret && !len) {
> >> +                     ret = 0;
> >> +                     goto out_only_mutex;
> >> +             }
> >> +             lockstart = offset;
> >> +     }
> >> +
> >> +     /* Check the tail unaligned part is in a hole */
> >> +     tail_start = lockend + 1;
> >> +     tail_len = offset + len - tail_start;
> >> +     if (tail_len) {
> >> +             ret = find_first_non_hole(inode, &tail_start, &tail_len);
> >> +             if (unlikely(ret < 0))
> >> +                     goto out_only_mutex;
> >> +             if (!ret) {
> >> +                     /* zero the front end of the last page */
> >> +                     if (tail_start + tail_len < ino_size) {
> >> +                             ret = btrfs_truncate_page(inode,
> >> +                                             tail_start + tail_len, 0, 1);
> >> +                             if (ret)
> >> +                                     goto out_only_mutex;
> >> +                             }
> >>               }
> >>       }
> >>
> >> @@ -2299,6 +2372,8 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
> >>       BUG_ON(ret);
> >>       trans->block_rsv = rsv;
> >>
> >> +     cur_offset = lockstart;
> >> +     len = lockend - cur_offset;
> >>       while (cur_offset < lockend) {
> >>               ret = __btrfs_drop_extents(trans, root, inode, path,
> >>                                          cur_offset, lockend + 1,
> >> @@ -2339,6 +2414,14 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
> >>                                             rsv, min_size);
> >>               BUG_ON(ret);    /* shouldn't happen */
> >>               trans->block_rsv = rsv;
> >> +
> >> +             ret = find_first_non_hole(inode, &cur_offset, &len);
> >> +             if (unlikely(ret < 0))
> >> +                     break;
> >> +             if (ret && !len) {
> >> +                     ret = 0;
> >> +                     break;
> >> +             }
> >>       }
> >>
> >>       if (ret) {
> >> @@ -2372,6 +2455,7 @@ out_free:
> >>  out:
> >>       unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
> >>                            &cached_state, GFP_NOFS);
> >> +out_only_mutex:
> >>       mutex_unlock(&inode->i_mutex);
> >>       if (ret && !err)
> >>               err = ret;
> >> --
> >> 1.9.3
> >>
> >> --
> >> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> >> the body of a message to majordomo@vger.kernel.org
> >> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
> 
> 
> -- 
> Filipe David Manana,
> 
> "Reasonable men adapt themselves to the world.
>  Unreasonable men adapt the world to themselves.
>  That's why all progress depends on unreasonable men."

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] btrfs: Avoid trucating page or punching hole in a already existed hole.
  2014-08-27 10:34     ` Liu Bo
@ 2014-09-01  1:06       ` Qu Wenruo
  0 siblings, 0 replies; 7+ messages in thread
From: Qu Wenruo @ 2014-09-01  1:06 UTC (permalink / raw)
  To: bo.li.liu, Filipe David Manana; +Cc: linux-btrfs


-------- Original Message --------
Subject: Re: [PATCH] btrfs: Avoid trucating page or punching hole in a 
already existed hole.
From: Liu Bo <bo.li.liu@oracle.com>
To: Filipe David Manana <fdmanana@gmail.com>
Date: 2014年08月27日 18:34
> [snip]
>>> Why do we round_up lockstart but round_down lockend?
>>>
>>> For [0,4095], then lockstart is 4096 and lockend is (u64)-1, any thoughts?
>> Seems odd, but is it a problem for that case?
>> The same_page check below makes us return without locking the range in
>> the iotree using the computed values for lockstart and lockend.
> No problems so far luckily, but it's odd.
>
> thanks,
> -liubo
Sorry for the late replay. Off for serval days....

IMO, round up the start and round down the end is the correct way.

As shown in the following case(and is the most common case).
0        4K        8K        12K        16K
|    Data    |    Data    |    Data    |    Data    |
     |-------------Hole range--------|
     |-Zero--|--Hole extent--|-Zero--|

As the graph shows, the hole extent is the range aligned to page size and
*inside the hoped hole range*.

So, I round up start(offset) and round down end(offset + len) to get the
page aligned range.

Also, as mentioned by Filipe, for range in same page, it will be handled
specially without hitting the normal routine.

Thanks,
Qu

>>> thanks,
>>> -liubo
>>>
>>>> +     same_page = ((offset >> PAGE_CACHE_SHIFT) ==
>>>> +                 ((offset + len - 1) >> PAGE_CACHE_SHIFT));
>>>> +
>>>>        /*
>>>>         * We needn't truncate any page which is beyond the end of the file
>>>>         * because we are sure there is no data there.
>>>> @@ -2205,8 +2252,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
>>>>        if (same_page && len < PAGE_CACHE_SIZE) {
>>>>                if (offset < ino_size)
>>>>                        ret = btrfs_truncate_page(inode, offset, len, 0);
>>>> -             mutex_unlock(&inode->i_mutex);
>>>> -             return ret;
>>>> +             goto out_only_mutex;
>>>>        }
>>>>
>>>>        /* zero back part of the first page */
>>>> @@ -2218,12 +2264,39 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
>>>>                }
>>>>        }
>>>>
>>>> -     /* zero the front end of the last page */
>>>> -     if (offset + len < ino_size) {
>>>> -             ret = btrfs_truncate_page(inode, offset + len, 0, 1);
>>>> -             if (ret) {
>>>> -                     mutex_unlock(&inode->i_mutex);
>>>> -                     return ret;
>>>> +     /* Check the aligned pages after the first unaligned page,
>>>> +      * if offset != orig_start, which means the first unaligned page
>>>> +      * including serveral following pages are already in holes,
>>>> +      * the extra check can be skipped */
>>>> +     if (offset == orig_start) {
>>>> +             /* after truncate page, check hole again */
>>>> +             len = offset + len - lockstart;
>>>> +             offset = lockstart;
>>>> +             ret = find_first_non_hole(inode, &offset, &len);
>>>> +             if (ret < 0)
>>>> +                     goto out_only_mutex;
>>>> +             if (ret && !len) {
>>>> +                     ret = 0;
>>>> +                     goto out_only_mutex;
>>>> +             }
>>>> +             lockstart = offset;
>>>> +     }
>>>> +
>>>> +     /* Check the tail unaligned part is in a hole */
>>>> +     tail_start = lockend + 1;
>>>> +     tail_len = offset + len - tail_start;
>>>> +     if (tail_len) {
>>>> +             ret = find_first_non_hole(inode, &tail_start, &tail_len);
>>>> +             if (unlikely(ret < 0))
>>>> +                     goto out_only_mutex;
>>>> +             if (!ret) {
>>>> +                     /* zero the front end of the last page */
>>>> +                     if (tail_start + tail_len < ino_size) {
>>>> +                             ret = btrfs_truncate_page(inode,
>>>> +                                             tail_start + tail_len, 0, 1);
>>>> +                             if (ret)
>>>> +                                     goto out_only_mutex;
>>>> +                             }
>>>>                }
>>>>        }
>>>>
>>>> @@ -2299,6 +2372,8 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
>>>>        BUG_ON(ret);
>>>>        trans->block_rsv = rsv;
>>>>
>>>> +     cur_offset = lockstart;
>>>> +     len = lockend - cur_offset;
>>>>        while (cur_offset < lockend) {
>>>>                ret = __btrfs_drop_extents(trans, root, inode, path,
>>>>                                           cur_offset, lockend + 1,
>>>> @@ -2339,6 +2414,14 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
>>>>                                              rsv, min_size);
>>>>                BUG_ON(ret);    /* shouldn't happen */
>>>>                trans->block_rsv = rsv;
>>>> +
>>>> +             ret = find_first_non_hole(inode, &cur_offset, &len);
>>>> +             if (unlikely(ret < 0))
>>>> +                     break;
>>>> +             if (ret && !len) {
>>>> +                     ret = 0;
>>>> +                     break;
>>>> +             }
>>>>        }
>>>>
>>>>        if (ret) {
>>>> @@ -2372,6 +2455,7 @@ out_free:
>>>>   out:
>>>>        unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
>>>>                             &cached_state, GFP_NOFS);
>>>> +out_only_mutex:
>>>>        mutex_unlock(&inode->i_mutex);
>>>>        if (ret && !err)
>>>>                err = ret;
>>>> --
>>>> 1.9.3
>>>>
>>>> --
>>>> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
>>>> the body of a message to majordomo@vger.kernel.org
>>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>> --
>>> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
>>> the body of a message to majordomo@vger.kernel.org
>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>
>>
>> -- 
>> Filipe David Manana,
>>
>> "Reasonable men adapt themselves to the world.
>>   Unreasonable men adapt the world to themselves.
>>   That's why all progress depends on unreasonable men."


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2014-09-01  1:06 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-05-30  7:16 [PATCH] btrfs: Avoid trucating page or punching hole in a already existed hole Qu Wenruo
2014-08-06 18:58 ` Filipe David Manana
2014-08-07  0:41   ` Qu Wenruo
2014-08-27  8:19 ` Liu Bo
2014-08-27  8:41   ` Filipe David Manana
2014-08-27 10:34     ` Liu Bo
2014-09-01  1:06       ` Qu Wenruo

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.