All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] ext4: Do not zeroout uninitialized extents beyond i_size
@ 2010-04-09 17:22 Dmitry Monakhov
  2010-04-14 21:28 ` tytso
  2010-04-28  4:40 ` Aneesh Kumar K. V
  0 siblings, 2 replies; 11+ messages in thread
From: Dmitry Monakhov @ 2010-04-09 17:22 UTC (permalink / raw)
  To: linux-ext4; +Cc: aneesh.kumar, tytso, Dmitry Monakhov

Zerrout trick allow us to optimize cases where it is more reasonable
to explicitly zeroout extent and mark it as initialized instead of
splitting to several small ones.
But this optimization is not acceptable is extent is beyond i_size
Because it is not possible to have initialized blocks after i_size.
Fsck treat this as incorrect inode size.

BUG# 15742

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
---
 fs/ext4/extents.c |   49 ++++++++++++++++++++++++++++++++++++++-----------
 1 files changed, 38 insertions(+), 11 deletions(-)

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 8bdee27..bdf94f3 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2631,11 +2631,15 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 	struct ext4_extent *ex2 = NULL;
 	struct ext4_extent *ex3 = NULL;
 	struct ext4_extent_header *eh;
-	ext4_lblk_t ee_block;
+	ext4_lblk_t ee_block, eof_block;
 	unsigned int allocated, ee_len, depth;
 	ext4_fsblk_t newblock;
 	int err = 0;
 	int ret = 0;
+	int may_zeroout;
+	ext_debug("ext4_ext_convert_to_initialized: inode %lu, logical"
+		"block %llu, max_blocks %u ",
+		inode->i_ino, (unsigned long long)iblock, max_blocks);
 
 	depth = ext_depth(inode);
 	eh = path[depth].p_hdr;
@@ -2644,16 +2648,25 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 	ee_len = ext4_ext_get_actual_len(ex);
 	allocated = ee_len - (iblock - ee_block);
 	newblock = iblock - ee_block + ext_pblock(ex);
+	eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
+		inode->i_sb->s_blocksize_bits;
 	ex2 = ex;
 	orig_ex.ee_block = ex->ee_block;
 	orig_ex.ee_len   = cpu_to_le16(ee_len);
 	ext4_ext_store_pblock(&orig_ex, ext_pblock(ex));
 
+	/*
+	 * It is safe to convert extent to initialized via explicit
+	 * zeroout only if extent is fully insde i_size or new_size.
+	 */
+	may_zeroout = ee_block + ee_len <= iblock + max_blocks ||
+		ee_block + ee_len <= eof_block;
+
 	err = ext4_ext_get_access(handle, inode, path + depth);
 	if (err)
 		goto out;
 	/* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */
-	if (ee_len <= 2*EXT4_EXT_ZERO_LEN) {
+	if (ee_len <= 2*EXT4_EXT_ZERO_LEN && may_zeroout) {
 		err =  ext4_ext_zeroout(inode, &orig_ex);
 		if (err)
 			goto fix_extent_len;
@@ -2684,7 +2697,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 	if (allocated > max_blocks) {
 		unsigned int newdepth;
 		/* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */
-		if (allocated <= EXT4_EXT_ZERO_LEN) {
+		if (allocated <= EXT4_EXT_ZERO_LEN && may_zeroout) {
 			/*
 			 * iblock == ee_block is handled by the zerouout
 			 * at the beginning.
@@ -2760,7 +2773,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 		ex3->ee_len = cpu_to_le16(allocated - max_blocks);
 		ext4_ext_mark_uninitialized(ex3);
 		err = ext4_ext_insert_extent(handle, inode, path, ex3, 0);
-		if (err == -ENOSPC) {
+		if (err == -ENOSPC && may_zeroout) {
 			err =  ext4_ext_zeroout(inode, &orig_ex);
 			if (err)
 				goto fix_extent_len;
@@ -2784,8 +2797,11 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 		 * update the extent length after successful insert of the
 		 * split extent
 		 */
-		orig_ex.ee_len = cpu_to_le16(ee_len -
-						ext4_ext_get_actual_len(ex3));
+		ee_len -= ext4_ext_get_actual_len(ex3);
+		orig_ex.ee_len = cpu_to_le16(ee_len);
+		may_zeroout = ee_block + ee_len <= iblock + max_blocks ||
+			ee_block + ee_len <= eof_block;
+
 		depth = newdepth;
 		ext4_ext_drop_refs(path);
 		path = ext4_ext_find_extent(inode, iblock, path);
@@ -2809,7 +2825,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 		 * otherwise give the extent a chance to merge to left
 		 */
 		if (le16_to_cpu(orig_ex.ee_len) <= EXT4_EXT_ZERO_LEN &&
-							iblock != ee_block) {
+			iblock != ee_block && may_zeroout) {
 			err =  ext4_ext_zeroout(inode, &orig_ex);
 			if (err)
 				goto fix_extent_len;
@@ -2878,7 +2894,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 	goto out;
 insert:
 	err = ext4_ext_insert_extent(handle, inode, path, &newex, 0);
-	if (err == -ENOSPC) {
+	if (err == -ENOSPC && may_zeroout) {
 		err =  ext4_ext_zeroout(inode, &orig_ex);
 		if (err)
 			goto fix_extent_len;
@@ -2938,14 +2954,16 @@ static int ext4_split_unwritten_extents(handle_t *handle,
 	struct ext4_extent *ex2 = NULL;
 	struct ext4_extent *ex3 = NULL;
 	struct ext4_extent_header *eh;
-	ext4_lblk_t ee_block;
+	ext4_lblk_t ee_block, eof_block;
 	unsigned int allocated, ee_len, depth;
 	ext4_fsblk_t newblock;
 	int err = 0;
+	int may_zeroout;
 
 	ext_debug("ext4_split_unwritten_extents: inode %lu,"
 		  "iblock %llu, max_blocks %u\n", inode->i_ino,
 		  (unsigned long long)iblock, max_blocks);
+
 	depth = ext_depth(inode);
 	eh = path[depth].p_hdr;
 	ex = path[depth].p_ext;
@@ -2953,10 +2971,19 @@ static int ext4_split_unwritten_extents(handle_t *handle,
 	ee_len = ext4_ext_get_actual_len(ex);
 	allocated = ee_len - (iblock - ee_block);
 	newblock = iblock - ee_block + ext_pblock(ex);
+	eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
+		inode->i_sb->s_blocksize_bits;
+
 	ex2 = ex;
 	orig_ex.ee_block = ex->ee_block;
 	orig_ex.ee_len   = cpu_to_le16(ee_len);
 	ext4_ext_store_pblock(&orig_ex, ext_pblock(ex));
+	/*
+	 * It is safe to convert extent to initialized via explicit
+	 * zeroout only if extent is fully insde i_size or new_size.
+	 */
+	may_zeroout = ee_block + ee_len <= iblock + max_blocks ||
+		ee_block + ee_len <= eof_block;
 
 	/*
  	 * If the uninitialized extent begins at the same logical
@@ -2992,7 +3019,7 @@ static int ext4_split_unwritten_extents(handle_t *handle,
 		ex3->ee_len = cpu_to_le16(allocated - max_blocks);
 		ext4_ext_mark_uninitialized(ex3);
 		err = ext4_ext_insert_extent(handle, inode, path, ex3, flags);
-		if (err == -ENOSPC) {
+		if (err == -ENOSPC && may_zeroout) {
 			err =  ext4_ext_zeroout(inode, &orig_ex);
 			if (err)
 				goto fix_extent_len;
@@ -3063,7 +3090,7 @@ static int ext4_split_unwritten_extents(handle_t *handle,
 	goto out;
 insert:
 	err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
-	if (err == -ENOSPC) {
+	if (err == -ENOSPC && may_zeroout) {
 		err =  ext4_ext_zeroout(inode, &orig_ex);
 		if (err)
 			goto fix_extent_len;
-- 
1.6.6.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH] ext4: Do not zeroout uninitialized extents beyond i_size
  2010-04-09 17:22 [PATCH] ext4: Do not zeroout uninitialized extents beyond i_size Dmitry Monakhov
@ 2010-04-14 21:28 ` tytso
  2010-04-15  9:20   ` Dmitry Monakhov
  2010-04-28  4:40 ` Aneesh Kumar K. V
  1 sibling, 1 reply; 11+ messages in thread
From: tytso @ 2010-04-14 21:28 UTC (permalink / raw)
  To: Dmitry Monakhov; +Cc: linux-ext4, aneesh.kumar

This is a slightly revised version of the patch where I've made to
keep ext4_ext_convert_to_initialized() and
ext4_split_unwritten_extents() more in sync with each other.  (If you
use "meld" on the these two functions, it's really clear they are very
similar to each other, and really need to be combined --- my plan is
to smash the two functions together and call the result
ext4_prepare_uninit_extent().)

I think it should be functionality equivalent to Dmitriy's original
patch; aside from mostly syntatic/cosmetic changes and spelling fixes,
the only substantive change I made was simplifying the may_zeroout
calculation by changing how eof_block is calculated.  I've run it
through xfstests, and it's passing.

					- Ted

ext4: Do not zeroout uninitialized extents beyond i_size

From: Dmitry Monakhov <dmonakhov@openvz.org>

The extents code will sometimes zero out blocks and mark them as
initialized instead of splitting an extent into several smaller ones.
This optimization however, causes problems if the extent is beyond
i_size because fsck will complain if there are uninitialized blocks
after i_size as this can not be distinguished from an inode that has
an incorrect i_size field.

https://bugzilla.kernel.org/show_bug.cgi?id=15742

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/extents.c |   66 ++++++++++++++++++++++++++++++++++++++++-------------
 1 files changed, 50 insertions(+), 16 deletions(-)

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 228eeaf..7c0438e 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2631,11 +2631,21 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 	struct ext4_extent *ex2 = NULL;
 	struct ext4_extent *ex3 = NULL;
 	struct ext4_extent_header *eh;
-	ext4_lblk_t ee_block;
+	ext4_lblk_t ee_block, eof_block;
 	unsigned int allocated, ee_len, depth;
 	ext4_fsblk_t newblock;
 	int err = 0;
 	int ret = 0;
+	int may_zeroout;
+
+	ext_debug("ext4_ext_convert_to_initialized: inode %lu, logical"
+		"block %llu, max_blocks %u\n", inode->i_ino,
+		(unsigned long long)iblock, max_blocks);
+
+	eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
+		inode->i_sb->s_blocksize_bits;
+	if (eof_block < iblock + max_blocks)
+		eof_block = iblock + max_blocks;
 
 	depth = ext_depth(inode);
 	eh = path[depth].p_hdr;
@@ -2644,16 +2654,23 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 	ee_len = ext4_ext_get_actual_len(ex);
 	allocated = ee_len - (iblock - ee_block);
 	newblock = iblock - ee_block + ext_pblock(ex);
+
 	ex2 = ex;
 	orig_ex.ee_block = ex->ee_block;
 	orig_ex.ee_len   = cpu_to_le16(ee_len);
 	ext4_ext_store_pblock(&orig_ex, ext_pblock(ex));
 
+	/*
+	 * It is safe to convert extent to initialized via explicit
+	 * zeroout only if extent is fully insde i_size or new_size.
+	 */
+	may_zeroout = ee_block + ee_len <= eof_block;
+
 	err = ext4_ext_get_access(handle, inode, path + depth);
 	if (err)
 		goto out;
 	/* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */
-	if (ee_len <= 2*EXT4_EXT_ZERO_LEN) {
+	if (ee_len <= 2*EXT4_EXT_ZERO_LEN && may_zeroout) {
 		err =  ext4_ext_zeroout(inode, &orig_ex);
 		if (err)
 			goto fix_extent_len;
@@ -2684,7 +2701,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 	if (allocated > max_blocks) {
 		unsigned int newdepth;
 		/* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */
-		if (allocated <= EXT4_EXT_ZERO_LEN) {
+		if (allocated <= EXT4_EXT_ZERO_LEN && may_zeroout) {
 			/*
 			 * iblock == ee_block is handled by the zerouout
 			 * at the beginning.
@@ -2760,7 +2777,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 		ex3->ee_len = cpu_to_le16(allocated - max_blocks);
 		ext4_ext_mark_uninitialized(ex3);
 		err = ext4_ext_insert_extent(handle, inode, path, ex3, 0);
-		if (err == -ENOSPC) {
+		if (err == -ENOSPC && may_zeroout) {
 			err =  ext4_ext_zeroout(inode, &orig_ex);
 			if (err)
 				goto fix_extent_len;
@@ -2784,8 +2801,10 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 		 * update the extent length after successful insert of the
 		 * split extent
 		 */
-		orig_ex.ee_len = cpu_to_le16(ee_len -
-						ext4_ext_get_actual_len(ex3));
+		ee_len -= ext4_ext_get_actual_len(ex3);
+		orig_ex.ee_len = cpu_to_le16(ee_len);
+		may_zeroout = ee_block + ee_len <= eof_block;
+
 		depth = newdepth;
 		ext4_ext_drop_refs(path);
 		path = ext4_ext_find_extent(inode, iblock, path);
@@ -2809,7 +2828,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 		 * otherwise give the extent a chance to merge to left
 		 */
 		if (le16_to_cpu(orig_ex.ee_len) <= EXT4_EXT_ZERO_LEN &&
-							iblock != ee_block) {
+			iblock != ee_block && may_zeroout) {
 			err =  ext4_ext_zeroout(inode, &orig_ex);
 			if (err)
 				goto fix_extent_len;
@@ -2878,7 +2897,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 	goto out;
 insert:
 	err = ext4_ext_insert_extent(handle, inode, path, &newex, 0);
-	if (err == -ENOSPC) {
+	if (err == -ENOSPC && may_zeroout) {
 		err =  ext4_ext_zeroout(inode, &orig_ex);
 		if (err)
 			goto fix_extent_len;
@@ -2938,14 +2957,21 @@ static int ext4_split_unwritten_extents(handle_t *handle,
 	struct ext4_extent *ex2 = NULL;
 	struct ext4_extent *ex3 = NULL;
 	struct ext4_extent_header *eh;
-	ext4_lblk_t ee_block;
+	ext4_lblk_t ee_block, eof_block;
 	unsigned int allocated, ee_len, depth;
 	ext4_fsblk_t newblock;
 	int err = 0;
+	int may_zeroout;
+
+	ext_debug("ext4_split_unwritten_extents: inode %lu, logical"
+		"block %llu, max_blocks %u\n", inode->i_ino,
+		(unsigned long long)iblock, max_blocks);
+
+	eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
+		inode->i_sb->s_blocksize_bits;
+	if (eof_block < iblock + max_blocks)
+		eof_block = iblock + max_blocks;
 
-	ext_debug("ext4_split_unwritten_extents: inode %lu,"
-		  "iblock %llu, max_blocks %u\n", inode->i_ino,
-		  (unsigned long long)iblock, max_blocks);
 	depth = ext_depth(inode);
 	eh = path[depth].p_hdr;
 	ex = path[depth].p_ext;
@@ -2953,12 +2979,19 @@ static int ext4_split_unwritten_extents(handle_t *handle,
 	ee_len = ext4_ext_get_actual_len(ex);
 	allocated = ee_len - (iblock - ee_block);
 	newblock = iblock - ee_block + ext_pblock(ex);
+
 	ex2 = ex;
 	orig_ex.ee_block = ex->ee_block;
 	orig_ex.ee_len   = cpu_to_le16(ee_len);
 	ext4_ext_store_pblock(&orig_ex, ext_pblock(ex));
 
 	/*
+	 * It is safe to convert extent to initialized via explicit
+	 * zeroout only if extent is fully insde i_size or new_size.
+	 */
+	may_zeroout = ee_block + ee_len <= eof_block;
+
+	/*
  	 * If the uninitialized extent begins at the same logical
  	 * block where the write begins, and the write completely
  	 * covers the extent, then we don't need to split it.
@@ -2992,7 +3025,7 @@ static int ext4_split_unwritten_extents(handle_t *handle,
 		ex3->ee_len = cpu_to_le16(allocated - max_blocks);
 		ext4_ext_mark_uninitialized(ex3);
 		err = ext4_ext_insert_extent(handle, inode, path, ex3, flags);
-		if (err == -ENOSPC) {
+		if (err == -ENOSPC && may_zeroout) {
 			err =  ext4_ext_zeroout(inode, &orig_ex);
 			if (err)
 				goto fix_extent_len;
@@ -3016,8 +3049,9 @@ static int ext4_split_unwritten_extents(handle_t *handle,
 		 * update the extent length after successful insert of the
 		 * split extent
 		 */
-		orig_ex.ee_len = cpu_to_le16(ee_len -
-						ext4_ext_get_actual_len(ex3));
+		ee_len -= ext4_ext_get_actual_len(ex3);
+		orig_ex.ee_len = cpu_to_le16(ee_len);
+
 		depth = newdepth;
 		ext4_ext_drop_refs(path);
 		path = ext4_ext_find_extent(inode, iblock, path);
@@ -3063,7 +3097,7 @@ static int ext4_split_unwritten_extents(handle_t *handle,
 	goto out;
 insert:
 	err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
-	if (err == -ENOSPC) {
+	if (err == -ENOSPC && may_zeroout) {
 		err =  ext4_ext_zeroout(inode, &orig_ex);
 		if (err)
 			goto fix_extent_len;

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH] ext4: Do not zeroout uninitialized extents beyond i_size
  2010-04-14 21:28 ` tytso
@ 2010-04-15  9:20   ` Dmitry Monakhov
  2010-04-21 17:45     ` tytso
  0 siblings, 1 reply; 11+ messages in thread
From: Dmitry Monakhov @ 2010-04-15  9:20 UTC (permalink / raw)
  To: tytso; +Cc: linux-ext4, aneesh.kumar

[-- Attachment #1: Type: text/plain, Size: 9107 bytes --]

tytso@mit.edu writes:

> This is a slightly revised version of the patch where I've made to
> keep ext4_ext_convert_to_initialized() and
> ext4_split_unwritten_extents() more in sync with each other.  (If you
> use "meld" on the these two functions, it's really clear they are very
Yepp. Indeed function are almost identical, and sadly i've missed 
may_zeroout condition update in second one :(. see later in the text
> similar to each other, and really need to be combined --- my plan is
> to smash the two functions together and call the result
> ext4_prepare_uninit_extent().)
>
> I think it should be functionality equivalent to Dmitriy's original
> patch; aside from mostly syntatic/cosmetic changes and spelling fixes,
> the only substantive change I made was simplifying the may_zeroout
> calculation by changing how eof_block is calculated.  I've run it
> through xfstests, and it's passing.
Other useful test is to run fsstress with boosted write/fallocate
probability on small(1Gb)  filesystem
xfstests/ltp/fsstress -p10 -z -f mkdir=1 -f creat=1 -f write=10 \
                      -f resrvsp=10 -n999999999 &
sleep 100
killall -9 fsstress
#umount/e2fsck
>
> 					- Ted
>
> ext4: Do not zeroout uninitialized extents beyond i_size
>
> From: Dmitry Monakhov <dmonakhov@openvz.org>
>
> The extents code will sometimes zero out blocks and mark them as
> initialized instead of splitting an extent into several smaller ones.
> This optimization however, causes problems if the extent is beyond
> i_size because fsck will complain if there are uninitialized blocks
> after i_size as this can not be distinguished from an inode that has
> an incorrect i_size field.
>
> https://bugzilla.kernel.org/show_bug.cgi?id=15742
>
> Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
> ---
>  fs/ext4/extents.c |   66 ++++++++++++++++++++++++++++++++++++++++-------------
>  1 files changed, 50 insertions(+), 16 deletions(-)
>
> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> index 228eeaf..7c0438e 100644
> --- a/fs/ext4/extents.c
> +++ b/fs/ext4/extents.c
> @@ -2631,11 +2631,21 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
>  	struct ext4_extent *ex2 = NULL;
>  	struct ext4_extent *ex3 = NULL;
>  	struct ext4_extent_header *eh;
> -	ext4_lblk_t ee_block;
> +	ext4_lblk_t ee_block, eof_block;
>  	unsigned int allocated, ee_len, depth;
>  	ext4_fsblk_t newblock;
>  	int err = 0;
>  	int ret = 0;
> +	int may_zeroout;
> +
> +	ext_debug("ext4_ext_convert_to_initialized: inode %lu, logical"
> +		"block %llu, max_blocks %u\n", inode->i_ino,
> +		(unsigned long long)iblock, max_blocks);
> +
> +	eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
> +		inode->i_sb->s_blocksize_bits;
> +	if (eof_block < iblock + max_blocks)
> +		eof_block = iblock + max_blocks;
>  
>  	depth = ext_depth(inode);
>  	eh = path[depth].p_hdr;
> @@ -2644,16 +2654,23 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
>  	ee_len = ext4_ext_get_actual_len(ex);
>  	allocated = ee_len - (iblock - ee_block);
>  	newblock = iblock - ee_block + ext_pblock(ex);
> +
>  	ex2 = ex;
>  	orig_ex.ee_block = ex->ee_block;
>  	orig_ex.ee_len   = cpu_to_le16(ee_len);
>  	ext4_ext_store_pblock(&orig_ex, ext_pblock(ex));
>  
> +	/*
> +	 * It is safe to convert extent to initialized via explicit
> +	 * zeroout only if extent is fully insde i_size or new_size.
> +	 */
> +	may_zeroout = ee_block + ee_len <= eof_block;
> +
>  	err = ext4_ext_get_access(handle, inode, path + depth);
>  	if (err)
>  		goto out;
>  	/* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */
> -	if (ee_len <= 2*EXT4_EXT_ZERO_LEN) {
> +	if (ee_len <= 2*EXT4_EXT_ZERO_LEN && may_zeroout) {
>  		err =  ext4_ext_zeroout(inode, &orig_ex);
>  		if (err)
>  			goto fix_extent_len;
> @@ -2684,7 +2701,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
>  	if (allocated > max_blocks) {
>  		unsigned int newdepth;
>  		/* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */
> -		if (allocated <= EXT4_EXT_ZERO_LEN) {
> +		if (allocated <= EXT4_EXT_ZERO_LEN && may_zeroout) {
>  			/*
>  			 * iblock == ee_block is handled by the zerouout
>  			 * at the beginning.
> @@ -2760,7 +2777,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
>  		ex3->ee_len = cpu_to_le16(allocated - max_blocks);
>  		ext4_ext_mark_uninitialized(ex3);
>  		err = ext4_ext_insert_extent(handle, inode, path, ex3, 0);
> -		if (err == -ENOSPC) {
> +		if (err == -ENOSPC && may_zeroout) {
>  			err =  ext4_ext_zeroout(inode, &orig_ex);
>  			if (err)
>  				goto fix_extent_len;
> @@ -2784,8 +2801,10 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
>  		 * update the extent length after successful insert of the
>  		 * split extent
>  		 */
> -		orig_ex.ee_len = cpu_to_le16(ee_len -
> -						ext4_ext_get_actual_len(ex3));
> +		ee_len -= ext4_ext_get_actual_len(ex3);
> +		orig_ex.ee_len = cpu_to_le16(ee_len);
> +		may_zeroout = ee_block + ee_len <= eof_block;
> +
>  		depth = newdepth;
>  		ext4_ext_drop_refs(path);
>  		path = ext4_ext_find_extent(inode, iblock, path);
> @@ -2809,7 +2828,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
>  		 * otherwise give the extent a chance to merge to left
>  		 */
>  		if (le16_to_cpu(orig_ex.ee_len) <= EXT4_EXT_ZERO_LEN &&
> -							iblock != ee_block) {
> +			iblock != ee_block && may_zeroout) {
>  			err =  ext4_ext_zeroout(inode, &orig_ex);
>  			if (err)
>  				goto fix_extent_len;
> @@ -2878,7 +2897,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
>  	goto out;
>  insert:
>  	err = ext4_ext_insert_extent(handle, inode, path, &newex, 0);
> -	if (err == -ENOSPC) {
> +	if (err == -ENOSPC && may_zeroout) {
>  		err =  ext4_ext_zeroout(inode, &orig_ex);
>  		if (err)
>  			goto fix_extent_len;
> @@ -2938,14 +2957,21 @@ static int ext4_split_unwritten_extents(handle_t *handle,
>  	struct ext4_extent *ex2 = NULL;
>  	struct ext4_extent *ex3 = NULL;
>  	struct ext4_extent_header *eh;
> -	ext4_lblk_t ee_block;
> +	ext4_lblk_t ee_block, eof_block;
>  	unsigned int allocated, ee_len, depth;
>  	ext4_fsblk_t newblock;
>  	int err = 0;
> +	int may_zeroout;
> +
> +	ext_debug("ext4_split_unwritten_extents: inode %lu, logical"
> +		"block %llu, max_blocks %u\n", inode->i_ino,
> +		(unsigned long long)iblock, max_blocks);
> +
> +	eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
> +		inode->i_sb->s_blocksize_bits;
> +	if (eof_block < iblock + max_blocks)
> +		eof_block = iblock + max_blocks;
>  
> -	ext_debug("ext4_split_unwritten_extents: inode %lu,"
> -		  "iblock %llu, max_blocks %u\n", inode->i_ino,
> -		  (unsigned long long)iblock, max_blocks);
>  	depth = ext_depth(inode);
>  	eh = path[depth].p_hdr;
>  	ex = path[depth].p_ext;
> @@ -2953,12 +2979,19 @@ static int ext4_split_unwritten_extents(handle_t *handle,
>  	ee_len = ext4_ext_get_actual_len(ex);
>  	allocated = ee_len - (iblock - ee_block);
>  	newblock = iblock - ee_block + ext_pblock(ex);
> +
>  	ex2 = ex;
>  	orig_ex.ee_block = ex->ee_block;
>  	orig_ex.ee_len   = cpu_to_le16(ee_len);
>  	ext4_ext_store_pblock(&orig_ex, ext_pblock(ex));
>  
>  	/*
> +	 * It is safe to convert extent to initialized via explicit
> +	 * zeroout only if extent is fully insde i_size or new_size.
> +	 */
> +	may_zeroout = ee_block + ee_len <= eof_block;
> +
> +	/*
>   	 * If the uninitialized extent begins at the same logical
>   	 * block where the write begins, and the write completely
>   	 * covers the extent, then we don't need to split it.
> @@ -2992,7 +3025,7 @@ static int ext4_split_unwritten_extents(handle_t *handle,
>  		ex3->ee_len = cpu_to_le16(allocated - max_blocks);
>  		ext4_ext_mark_uninitialized(ex3);
>  		err = ext4_ext_insert_extent(handle, inode, path, ex3, flags);
> -		if (err == -ENOSPC) {
> +		if (err == -ENOSPC && may_zeroout) {
>  			err =  ext4_ext_zeroout(inode, &orig_ex);
>  			if (err)
>  				goto fix_extent_len;
> @@ -3016,8 +3049,9 @@ static int ext4_split_unwritten_extents(handle_t *handle,
>  		 * update the extent length after successful insert of the
>  		 * split extent
>  		 */
> -		orig_ex.ee_len = cpu_to_le16(ee_len -
> -						ext4_ext_get_actual_len(ex3));
> +		ee_len -= ext4_ext_get_actual_len(ex3);
> +		orig_ex.ee_len = cpu_to_le16(ee_len);
> +
We have to update may_zeroout since orig extent was changed.
+               may_zeroout = ee_block + ee_len <= eof_block;  
I've attached incremental patch for you.
In fact, this not result in any serious bugs but increase our changes to
use zeroout optimization and helps us to overcome ENOSPC if possible.
>  		depth = newdepth;
>  		ext4_ext_drop_refs(path);
>  		path = ext4_ext_find_extent(inode, iblock, path);
> @@ -3063,7 +3097,7 @@ static int ext4_split_unwritten_extents(handle_t *handle,
>  	goto out;
>  insert:
>  	err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
> -	if (err == -ENOSPC) {
> +	if (err == -ENOSPC && may_zeroout) {
>  		err =  ext4_ext_zeroout(inode, &orig_ex);
>  		if (err)
>  			goto fix_extent_len;


[-- Attachment #2: ext4-Do-not-zeroout-uninitialized-extents-beyond-i_s-2 --]
[-- Type: text/plain, Size: 401 bytes --]

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 2c8f73d..1938418 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3051,6 +3051,7 @@ static int ext4_split_unwritten_extents(handle_t *handle,
 		 */
 		ee_len -= ext4_ext_get_actual_len(ex3);
 		orig_ex.ee_len = cpu_to_le16(ee_len);
+		may_zeroout = ee_block + ee_len <= eof_block;
 
 		depth = newdepth;
 		ext4_ext_drop_refs(path);

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH] ext4: Do not zeroout uninitialized extents beyond i_size
  2010-04-15  9:20   ` Dmitry Monakhov
@ 2010-04-21 17:45     ` tytso
  0 siblings, 0 replies; 11+ messages in thread
From: tytso @ 2010-04-21 17:45 UTC (permalink / raw)
  To: Dmitry Monakhov; +Cc: linux-ext4, aneesh.kumar

On Thu, Apr 15, 2010 at 01:20:35PM +0400, Dmitry Monakhov wrote:
> I've attached incremental patch for you.
> In fact, this not result in any serious bugs but increase our changes to
> use zeroout optimization and helps us to overcome ENOSPC if possible.

I've integrated your incremental patch into what I have in the ext4
patch queue, thanks.  (It's been there for a while actually but due to
the LF collaboration summit I haven't had a chance to ack things that
I've integrated, update the patchwork site, etc.)

     		 	    	      	    - Ted

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] ext4: Do not zeroout uninitialized extents beyond i_size
  2010-04-09 17:22 [PATCH] ext4: Do not zeroout uninitialized extents beyond i_size Dmitry Monakhov
  2010-04-14 21:28 ` tytso
@ 2010-04-28  4:40 ` Aneesh Kumar K. V
  2010-04-28  7:38   ` Dmitry Monakhov
  1 sibling, 1 reply; 11+ messages in thread
From: Aneesh Kumar K. V @ 2010-04-28  4:40 UTC (permalink / raw)
  To: Dmitry Monakhov, linux-ext4; +Cc: tytso, Dmitry Monakhov

On Fri,  9 Apr 2010 21:22:28 +0400, Dmitry Monakhov <dmonakhov@openvz.org> wrote:
> Zerrout trick allow us to optimize cases where it is more reasonable
> to explicitly zeroout extent and mark it as initialized instead of
> splitting to several small ones.
> But this optimization is not acceptable is extent is beyond i_size
> Because it is not possible to have initialized blocks after i_size.
> Fsck treat this as incorrect inode size.
> 

With commit c8d46e41bc744c8fa0092112af3942fcd46c8b18 if we set
EXT4_EOFBLOCKS_FL we should be able to have blocks beyond i_size.
May be the zero out path should set the flag instead of doing all these
changes. Zero-out is already complex with all the ENOSPC related
consideration. I guess we should try to keep it simple.

-aneesh

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] ext4: Do not zeroout uninitialized extents beyond i_size
  2010-04-28  4:40 ` Aneesh Kumar K. V
@ 2010-04-28  7:38   ` Dmitry Monakhov
  2010-05-27 17:19     ` Aneesh Kumar K. V
  0 siblings, 1 reply; 11+ messages in thread
From: Dmitry Monakhov @ 2010-04-28  7:38 UTC (permalink / raw)
  To: Aneesh Kumar K. V; +Cc: linux-ext4, tytso

"Aneesh Kumar K. V" <aneesh.kumar@linux.vnet.ibm.com> writes:

> On Fri,  9 Apr 2010 21:22:28 +0400, Dmitry Monakhov <dmonakhov@openvz.org> wrote:
>> Zerrout trick allow us to optimize cases where it is more reasonable
>> to explicitly zeroout extent and mark it as initialized instead of
>> splitting to several small ones.
>> But this optimization is not acceptable is extent is beyond i_size
>> Because it is not possible to have initialized blocks after i_size.
>> Fsck treat this as incorrect inode size.
>> 
>
> With commit c8d46e41bc744c8fa0092112af3942fcd46c8b18 if we set
> EXT4_EOFBLOCKS_FL we should be able to have blocks beyond i_size.
> May be the zero out path should set the flag instead of doing all these
> changes. Zero-out is already complex with all the ENOSPC related
> consideration. I guess we should try to keep it simple.
For initialized extent beyond i_size? I've check fsck and seems that
is truly possible. So this optimization allow us to avoid some 
bad EIO situations. But we have to rework ext_get_blocks( ,create == 1)
to clear EXT4_EOFBLOCKS_FL if last block of latest_extent is requested.
I'll handle this.
>
> -aneesh
> --
> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] ext4: Do not zeroout uninitialized extents beyond i_size
  2010-04-28  7:38   ` Dmitry Monakhov
@ 2010-05-27 17:19     ` Aneesh Kumar K. V
  2010-06-03  8:32       ` Dmitry Monakhov
  0 siblings, 1 reply; 11+ messages in thread
From: Aneesh Kumar K. V @ 2010-05-27 17:19 UTC (permalink / raw)
  To: Dmitry Monakhov; +Cc: linux-ext4, tytso

On Wed, 28 Apr 2010 11:38:58 +0400, Dmitry Monakhov <dmonakhov@openvz.org> wrote:
> "Aneesh Kumar K. V" <aneesh.kumar@linux.vnet.ibm.com> writes:
> 
> > On Fri,  9 Apr 2010 21:22:28 +0400, Dmitry Monakhov <dmonakhov@openvz.org> wrote:
> >> Zerrout trick allow us to optimize cases where it is more reasonable
> >> to explicitly zeroout extent and mark it as initialized instead of
> >> splitting to several small ones.
> >> But this optimization is not acceptable is extent is beyond i_size
> >> Because it is not possible to have initialized blocks after i_size.
> >> Fsck treat this as incorrect inode size.
> >> 
> >
> > With commit c8d46e41bc744c8fa0092112af3942fcd46c8b18 if we set
> > EXT4_EOFBLOCKS_FL we should be able to have blocks beyond i_size.
> > May be the zero out path should set the flag instead of doing all these
> > changes. Zero-out is already complex with all the ENOSPC related
> > consideration. I guess we should try to keep it simple.
> For initialized extent beyond i_size? I've check fsck and seems that
> is truly possible. So this optimization allow us to avoid some 
> bad EIO situations. But we have to rework ext_get_blocks( ,create == 1)
> to clear EXT4_EOFBLOCKS_FL if last block of latest_extent is requested.
> I'll handle this.
> >


I thought this patch is going to reworked to use EOFBLOCKS_FL. But i see
Ted sent a pull request with the this patch. Did I miss something ?

-aneesh

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] ext4: Do not zeroout uninitialized extents beyond i_size
  2010-05-27 17:19     ` Aneesh Kumar K. V
@ 2010-06-03  8:32       ` Dmitry Monakhov
  2010-06-08 21:46         ` tytso
  0 siblings, 1 reply; 11+ messages in thread
From: Dmitry Monakhov @ 2010-06-03  8:32 UTC (permalink / raw)
  To: Aneesh Kumar K. V; +Cc: linux-ext4, tytso

"Aneesh Kumar K. V" <aneesh.kumar@linux.vnet.ibm.com> writes:

> On Wed, 28 Apr 2010 11:38:58 +0400, Dmitry Monakhov <dmonakhov@openvz.org> wrote:
>> "Aneesh Kumar K. V" <aneesh.kumar@linux.vnet.ibm.com> writes:
>> 
>> > On Fri,  9 Apr 2010 21:22:28 +0400, Dmitry Monakhov <dmonakhov@openvz.org> wrote:
>> >> Zerrout trick allow us to optimize cases where it is more reasonable
>> >> to explicitly zeroout extent and mark it as initialized instead of
>> >> splitting to several small ones.
>> >> But this optimization is not acceptable is extent is beyond i_size
>> >> Because it is not possible to have initialized blocks after i_size.
>> >> Fsck treat this as incorrect inode size.
>> >> 
>> >
>> > With commit c8d46e41bc744c8fa0092112af3942fcd46c8b18 if we set
>> > EXT4_EOFBLOCKS_FL we should be able to have blocks beyond i_size.
>> > May be the zero out path should set the flag instead of doing all these
>> > changes. Zero-out is already complex with all the ENOSPC related
>> > consideration. I guess we should try to keep it simple.
>> For initialized extent beyond i_size? I've check fsck and seems that
>> is truly possible. So this optimization allow us to avoid some 
>> bad EIO situations. But we have to rework ext_get_blocks( ,create == 1)
>> to clear EXT4_EOFBLOCKS_FL if last block of latest_extent is requested.
>> I'll handle this.
>> >
>
>
> I thought this patch is going to reworked to use EOFBLOCKS_FL. But i see
> Ted sent a pull request with the this patch. Did I miss something ?
Sorry for a long replay.
As far as i can see EXT4_EOFBLOCKS_FL flag is now allowed for
uninitialized extents only in e2fslib. So we have to change e2fslib
first and then revert the kernel zeroout restriction logic.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] ext4: Do not zeroout uninitialized extents beyond i_size
  2010-06-03  8:32       ` Dmitry Monakhov
@ 2010-06-08 21:46         ` tytso
  0 siblings, 0 replies; 11+ messages in thread
From: tytso @ 2010-06-08 21:46 UTC (permalink / raw)
  To: Dmitry Monakhov; +Cc: Aneesh Kumar K. V, linux-ext4

On Thu, Jun 03, 2010 at 12:32:09PM +0400, Dmitry Monakhov wrote:
> >
> > I thought this patch is going to reworked to use EOFBLOCKS_FL. But i see
> > Ted sent a pull request with the this patch. Did I miss something ?

I added it to the patch queue a week before you sent your comment that
we should rework this with EOFBLOCKS_FL, and I forgot to pull the
patch back.

> As far as i can see EXT4_EOFBLOCKS_FL flag is now allowed for
> uninitialized extents only in e2fslib. So we have to change e2fslib
> first and then revert the kernel zeroout restriction logic.

I think I'm missing something.  What change do you think is needed in
e2fsprogs?

						- Ted




^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] ext4: Do not zeroout uninitialized extents beyond i_size
  2010-04-09 17:14 Dmitry Monakhov
@ 2010-04-09 17:18 ` Dmitry Monakhov
  0 siblings, 0 replies; 11+ messages in thread
From: Dmitry Monakhov @ 2010-04-09 17:18 UTC (permalink / raw)
  To: linux-ext4; +Cc: aneesh.kumar, tytso

Dmitry Monakhov <dmonakhov@openvz.org> writes:

> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> index 8bdee27..54a9b80 100644
> --- a/fs/ext4/extents.c
> +++ b/fs/ext4/extents.c
> @@ -1,3 +1,4 @@
> +
>  /*
>   * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com
>   * Written by Alex Tomas <alex@clusterfs.com>
> @@ -2540,9 +2541,11 @@ static void bi_complete(struct bio *bio, int error)
>  {
>  	complete((struct completion *)bio->bi_private);
>  }
> -
> +#define ext4_ext_zeroout(inode,ext_nn) \
> +	__ext4_ext_zeroout(inode, ext_nn, iblock, max_blocks)
>  /* FIXME!! we need to try to merge to left or right after zero-out  */
> -static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
> +static int __ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex,
> +			ext4_lblk_t iblock, unsigned int max_blocks)
Oops, sorry, wrong patch attached :).
>  {
>  	int ret;
>  	struct bio *bio;
> @@ -2560,6 +2563,12 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
>  	/* convert ee_pblock to 512 byte sectors */
>  	ee_pblock = ee_pblock << (blkbits - 9);
>  
> +	printk("%s inode:%lu ext:[%d:%d] iblock:%d max:%d i_sz:%lld expand:%d\n",
> +		__FUNCTION__, inode->i_ino,
> +		le32_to_cpu(ex->ee_block), ee_len,
> +		iblock, max_blocks, inode->i_size,
> +		((inode->i_size +blocksize -1) >> blkbits) < iblock + max_blocks);
> +
>  	while (ee_len > 0) {
>  
>  		if (ee_len > BIO_MAX_PAGES)
> @@ -2631,11 +2640,15 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
>  	struct ext4_extent *ex2 = NULL;
>  	struct ext4_extent *ex3 = NULL;
>  	struct ext4_extent_header *eh;
> -	ext4_lblk_t ee_block;
> +	ext4_lblk_t ee_block, eof_block;
>  	unsigned int allocated, ee_len, depth;
>  	ext4_fsblk_t newblock;
>  	int err = 0;
>  	int ret = 0;
> +	int may_zeroout;
> +	ext_debug("ext4_ext_convert_to_initialized: inode %lu, logical"
> +		"block %llu, max_blocks %u ",
> +		inode->i_ino, (unsigned long long)iblock, max_blocks);
>  
>  	depth = ext_depth(inode);
>  	eh = path[depth].p_hdr;
> @@ -2644,16 +2657,25 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
>  	ee_len = ext4_ext_get_actual_len(ex);
>  	allocated = ee_len - (iblock - ee_block);
>  	newblock = iblock - ee_block + ext_pblock(ex);
> +	eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
> +		inode->i_sb->s_blocksize_bits;
>  	ex2 = ex;
>  	orig_ex.ee_block = ex->ee_block;
>  	orig_ex.ee_len   = cpu_to_le16(ee_len);
>  	ext4_ext_store_pblock(&orig_ex, ext_pblock(ex));
>  
> +	/*
> +	 * It is safe to convert extent to initialized via explicit
> +	 * zeroout only if extent is fully insde i_size or new_size.
> +	 */
> +	may_zeroout = ee_block + ee_len <= iblock + max_blocks ||
> +		ee_block + ee_len <= eof_block;
> +
>  	err = ext4_ext_get_access(handle, inode, path + depth);
>  	if (err)
>  		goto out;
>  	/* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */
> -	if (ee_len <= 2*EXT4_EXT_ZERO_LEN) {
> +	if (ee_len <= 2*EXT4_EXT_ZERO_LEN && may_zeroout) {
>  		err =  ext4_ext_zeroout(inode, &orig_ex);
>  		if (err)
>  			goto fix_extent_len;
> @@ -2684,7 +2706,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
>  	if (allocated > max_blocks) {
>  		unsigned int newdepth;
>  		/* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */
> -		if (allocated <= EXT4_EXT_ZERO_LEN) {
> +		if (allocated <= EXT4_EXT_ZERO_LEN && may_zeroout) {
>  			/*
>  			 * iblock == ee_block is handled by the zerouout
>  			 * at the beginning.
> @@ -2760,7 +2782,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
>  		ex3->ee_len = cpu_to_le16(allocated - max_blocks);
>  		ext4_ext_mark_uninitialized(ex3);
>  		err = ext4_ext_insert_extent(handle, inode, path, ex3, 0);
> -		if (err == -ENOSPC) {
> +		if (err == -ENOSPC && may_zeroout) {
>  			err =  ext4_ext_zeroout(inode, &orig_ex);
>  			if (err)
>  				goto fix_extent_len;
> @@ -2784,8 +2806,11 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
>  		 * update the extent length after successful insert of the
>  		 * split extent
>  		 */
> -		orig_ex.ee_len = cpu_to_le16(ee_len -
> -						ext4_ext_get_actual_len(ex3));
> +		ee_len -= ext4_ext_get_actual_len(ex3);
> +		orig_ex.ee_len = cpu_to_le16(ee_len);
> +		may_zeroout = ee_block + ee_len <= iblock + max_blocks ||
> +			ee_block + ee_len <= eof_block;
> +
>  		depth = newdepth;
>  		ext4_ext_drop_refs(path);
>  		path = ext4_ext_find_extent(inode, iblock, path);
> @@ -2809,7 +2834,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
>  		 * otherwise give the extent a chance to merge to left
>  		 */
>  		if (le16_to_cpu(orig_ex.ee_len) <= EXT4_EXT_ZERO_LEN &&
> -							iblock != ee_block) {
> +			iblock != ee_block && may_zeroout) {
>  			err =  ext4_ext_zeroout(inode, &orig_ex);
>  			if (err)
>  				goto fix_extent_len;
> @@ -2878,7 +2903,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
>  	goto out;
>  insert:
>  	err = ext4_ext_insert_extent(handle, inode, path, &newex, 0);
> -	if (err == -ENOSPC) {
> +	if (err == -ENOSPC && may_zeroout) {
>  		err =  ext4_ext_zeroout(inode, &orig_ex);
>  		if (err)
>  			goto fix_extent_len;
> @@ -2938,14 +2963,16 @@ static int ext4_split_unwritten_extents(handle_t *handle,
>  	struct ext4_extent *ex2 = NULL;
>  	struct ext4_extent *ex3 = NULL;
>  	struct ext4_extent_header *eh;
> -	ext4_lblk_t ee_block;
> +	ext4_lblk_t ee_block, eof_block;
>  	unsigned int allocated, ee_len, depth;
>  	ext4_fsblk_t newblock;
>  	int err = 0;
> +	int may_zeroout;
>  
>  	ext_debug("ext4_split_unwritten_extents: inode %lu,"
>  		  "iblock %llu, max_blocks %u\n", inode->i_ino,
>  		  (unsigned long long)iblock, max_blocks);
> +
>  	depth = ext_depth(inode);
>  	eh = path[depth].p_hdr;
>  	ex = path[depth].p_ext;
> @@ -2953,10 +2980,19 @@ static int ext4_split_unwritten_extents(handle_t *handle,
>  	ee_len = ext4_ext_get_actual_len(ex);
>  	allocated = ee_len - (iblock - ee_block);
>  	newblock = iblock - ee_block + ext_pblock(ex);
> +	eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
> +		inode->i_sb->s_blocksize_bits;
> +
>  	ex2 = ex;
>  	orig_ex.ee_block = ex->ee_block;
>  	orig_ex.ee_len   = cpu_to_le16(ee_len);
>  	ext4_ext_store_pblock(&orig_ex, ext_pblock(ex));
> +	/*
> +	 * It is safe to convert extent to initialized via explicit
> +	 * zeroout only if extent is fully insde i_size or new_size.
> +	 */
> +	may_zeroout = ee_block + ee_len <= iblock + max_blocks ||
> +		ee_block + ee_len <= eof_block;
>  
>  	/*
>   	 * If the uninitialized extent begins at the same logical
> @@ -2992,7 +3028,7 @@ static int ext4_split_unwritten_extents(handle_t *handle,
>  		ex3->ee_len = cpu_to_le16(allocated - max_blocks);
>  		ext4_ext_mark_uninitialized(ex3);
>  		err = ext4_ext_insert_extent(handle, inode, path, ex3, flags);
> -		if (err == -ENOSPC) {
> +		if (err == -ENOSPC && may_zeroout) {
>  			err =  ext4_ext_zeroout(inode, &orig_ex);
>  			if (err)
>  				goto fix_extent_len;
> @@ -3063,7 +3099,7 @@ static int ext4_split_unwritten_extents(handle_t *handle,
>  	goto out;
>  insert:
>  	err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
> -	if (err == -ENOSPC) {
> +	if (err == -ENOSPC && may_zeroout) {
>  		err =  ext4_ext_zeroout(inode, &orig_ex);
>  		if (err)
>  			goto fix_extent_len;

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH] ext4: Do not zeroout uninitialized extents beyond i_size
@ 2010-04-09 17:14 Dmitry Monakhov
  2010-04-09 17:18 ` Dmitry Monakhov
  0 siblings, 1 reply; 11+ messages in thread
From: Dmitry Monakhov @ 2010-04-09 17:14 UTC (permalink / raw)
  To: linux-ext4; +Cc: aneesh.kumar, tytso, Dmitry Monakhov

Zerrout trick allow us to optimize cases where it is more reasonable
to explicitly zeroout extent and mark it as initialized instead of
splitting to several small ones.
But this optimization is not acceptable is extent is beyond i_size
Because it is not possible to have initialized blocks after i_size.
Fsck treat this as incorrect inode size.

#BUG# (here suppose to be bug number, but bugzilla.kernel.org is too
       dammit slow)
##TESTCASE
/***********************************************
  gcc -Wall falloc_test.c -ofalloc_test
  This testcase check write to fallocated space
  mkfs.ext4 /dev/sdb1
  mount /dev/sdb1 /mnt
  ./falloc_test /mnt/F1
  umount /mnt
  fsck.ext4 -f /dev/sdb1
 ***********************************************/
#define _GNU_SOURCE
#include <unistd.h>
#include <fcntl.h>
#include <errno.h>
#include <stdio.h>
int main(int argc, char **argv)
{
	loff_t len, offset;
	int fd, ret;

	if (argc != 2) {
		printf("Usage: %s <fname>\n", argv[0]);
		return 1;
	}
	fd = open(argv[1], O_CREAT|O_RDWR, 0777);
	/* 8192 is less than EXT4_EXT_ZERO_LEN */
	ret = fallocate(fd, 0x1, 0, 8192);
	if (ret)
		return ret;
	/* Provoke reserved space convertation */
	return write(fd, "1", 1) != 1;
}


Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
---
 fs/ext4/extents.c |   62 +++++++++++++++++++++++++++++++++++++++++-----------
 1 files changed, 49 insertions(+), 13 deletions(-)

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 8bdee27..54a9b80 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1,3 +1,4 @@
+
 /*
  * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com
  * Written by Alex Tomas <alex@clusterfs.com>
@@ -2540,9 +2541,11 @@ static void bi_complete(struct bio *bio, int error)
 {
 	complete((struct completion *)bio->bi_private);
 }
-
+#define ext4_ext_zeroout(inode,ext_nn) \
+	__ext4_ext_zeroout(inode, ext_nn, iblock, max_blocks)
 /* FIXME!! we need to try to merge to left or right after zero-out  */
-static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
+static int __ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex,
+			ext4_lblk_t iblock, unsigned int max_blocks)
 {
 	int ret;
 	struct bio *bio;
@@ -2560,6 +2563,12 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
 	/* convert ee_pblock to 512 byte sectors */
 	ee_pblock = ee_pblock << (blkbits - 9);
 
+	printk("%s inode:%lu ext:[%d:%d] iblock:%d max:%d i_sz:%lld expand:%d\n",
+		__FUNCTION__, inode->i_ino,
+		le32_to_cpu(ex->ee_block), ee_len,
+		iblock, max_blocks, inode->i_size,
+		((inode->i_size +blocksize -1) >> blkbits) < iblock + max_blocks);
+
 	while (ee_len > 0) {
 
 		if (ee_len > BIO_MAX_PAGES)
@@ -2631,11 +2640,15 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 	struct ext4_extent *ex2 = NULL;
 	struct ext4_extent *ex3 = NULL;
 	struct ext4_extent_header *eh;
-	ext4_lblk_t ee_block;
+	ext4_lblk_t ee_block, eof_block;
 	unsigned int allocated, ee_len, depth;
 	ext4_fsblk_t newblock;
 	int err = 0;
 	int ret = 0;
+	int may_zeroout;
+	ext_debug("ext4_ext_convert_to_initialized: inode %lu, logical"
+		"block %llu, max_blocks %u ",
+		inode->i_ino, (unsigned long long)iblock, max_blocks);
 
 	depth = ext_depth(inode);
 	eh = path[depth].p_hdr;
@@ -2644,16 +2657,25 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 	ee_len = ext4_ext_get_actual_len(ex);
 	allocated = ee_len - (iblock - ee_block);
 	newblock = iblock - ee_block + ext_pblock(ex);
+	eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
+		inode->i_sb->s_blocksize_bits;
 	ex2 = ex;
 	orig_ex.ee_block = ex->ee_block;
 	orig_ex.ee_len   = cpu_to_le16(ee_len);
 	ext4_ext_store_pblock(&orig_ex, ext_pblock(ex));
 
+	/*
+	 * It is safe to convert extent to initialized via explicit
+	 * zeroout only if extent is fully insde i_size or new_size.
+	 */
+	may_zeroout = ee_block + ee_len <= iblock + max_blocks ||
+		ee_block + ee_len <= eof_block;
+
 	err = ext4_ext_get_access(handle, inode, path + depth);
 	if (err)
 		goto out;
 	/* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */
-	if (ee_len <= 2*EXT4_EXT_ZERO_LEN) {
+	if (ee_len <= 2*EXT4_EXT_ZERO_LEN && may_zeroout) {
 		err =  ext4_ext_zeroout(inode, &orig_ex);
 		if (err)
 			goto fix_extent_len;
@@ -2684,7 +2706,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 	if (allocated > max_blocks) {
 		unsigned int newdepth;
 		/* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */
-		if (allocated <= EXT4_EXT_ZERO_LEN) {
+		if (allocated <= EXT4_EXT_ZERO_LEN && may_zeroout) {
 			/*
 			 * iblock == ee_block is handled by the zerouout
 			 * at the beginning.
@@ -2760,7 +2782,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 		ex3->ee_len = cpu_to_le16(allocated - max_blocks);
 		ext4_ext_mark_uninitialized(ex3);
 		err = ext4_ext_insert_extent(handle, inode, path, ex3, 0);
-		if (err == -ENOSPC) {
+		if (err == -ENOSPC && may_zeroout) {
 			err =  ext4_ext_zeroout(inode, &orig_ex);
 			if (err)
 				goto fix_extent_len;
@@ -2784,8 +2806,11 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 		 * update the extent length after successful insert of the
 		 * split extent
 		 */
-		orig_ex.ee_len = cpu_to_le16(ee_len -
-						ext4_ext_get_actual_len(ex3));
+		ee_len -= ext4_ext_get_actual_len(ex3);
+		orig_ex.ee_len = cpu_to_le16(ee_len);
+		may_zeroout = ee_block + ee_len <= iblock + max_blocks ||
+			ee_block + ee_len <= eof_block;
+
 		depth = newdepth;
 		ext4_ext_drop_refs(path);
 		path = ext4_ext_find_extent(inode, iblock, path);
@@ -2809,7 +2834,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 		 * otherwise give the extent a chance to merge to left
 		 */
 		if (le16_to_cpu(orig_ex.ee_len) <= EXT4_EXT_ZERO_LEN &&
-							iblock != ee_block) {
+			iblock != ee_block && may_zeroout) {
 			err =  ext4_ext_zeroout(inode, &orig_ex);
 			if (err)
 				goto fix_extent_len;
@@ -2878,7 +2903,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 	goto out;
 insert:
 	err = ext4_ext_insert_extent(handle, inode, path, &newex, 0);
-	if (err == -ENOSPC) {
+	if (err == -ENOSPC && may_zeroout) {
 		err =  ext4_ext_zeroout(inode, &orig_ex);
 		if (err)
 			goto fix_extent_len;
@@ -2938,14 +2963,16 @@ static int ext4_split_unwritten_extents(handle_t *handle,
 	struct ext4_extent *ex2 = NULL;
 	struct ext4_extent *ex3 = NULL;
 	struct ext4_extent_header *eh;
-	ext4_lblk_t ee_block;
+	ext4_lblk_t ee_block, eof_block;
 	unsigned int allocated, ee_len, depth;
 	ext4_fsblk_t newblock;
 	int err = 0;
+	int may_zeroout;
 
 	ext_debug("ext4_split_unwritten_extents: inode %lu,"
 		  "iblock %llu, max_blocks %u\n", inode->i_ino,
 		  (unsigned long long)iblock, max_blocks);
+
 	depth = ext_depth(inode);
 	eh = path[depth].p_hdr;
 	ex = path[depth].p_ext;
@@ -2953,10 +2980,19 @@ static int ext4_split_unwritten_extents(handle_t *handle,
 	ee_len = ext4_ext_get_actual_len(ex);
 	allocated = ee_len - (iblock - ee_block);
 	newblock = iblock - ee_block + ext_pblock(ex);
+	eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
+		inode->i_sb->s_blocksize_bits;
+
 	ex2 = ex;
 	orig_ex.ee_block = ex->ee_block;
 	orig_ex.ee_len   = cpu_to_le16(ee_len);
 	ext4_ext_store_pblock(&orig_ex, ext_pblock(ex));
+	/*
+	 * It is safe to convert extent to initialized via explicit
+	 * zeroout only if extent is fully insde i_size or new_size.
+	 */
+	may_zeroout = ee_block + ee_len <= iblock + max_blocks ||
+		ee_block + ee_len <= eof_block;
 
 	/*
  	 * If the uninitialized extent begins at the same logical
@@ -2992,7 +3028,7 @@ static int ext4_split_unwritten_extents(handle_t *handle,
 		ex3->ee_len = cpu_to_le16(allocated - max_blocks);
 		ext4_ext_mark_uninitialized(ex3);
 		err = ext4_ext_insert_extent(handle, inode, path, ex3, flags);
-		if (err == -ENOSPC) {
+		if (err == -ENOSPC && may_zeroout) {
 			err =  ext4_ext_zeroout(inode, &orig_ex);
 			if (err)
 				goto fix_extent_len;
@@ -3063,7 +3099,7 @@ static int ext4_split_unwritten_extents(handle_t *handle,
 	goto out;
 insert:
 	err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
-	if (err == -ENOSPC) {
+	if (err == -ENOSPC && may_zeroout) {
 		err =  ext4_ext_zeroout(inode, &orig_ex);
 		if (err)
 			goto fix_extent_len;
-- 
1.6.6.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2010-06-08 21:46 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-04-09 17:22 [PATCH] ext4: Do not zeroout uninitialized extents beyond i_size Dmitry Monakhov
2010-04-14 21:28 ` tytso
2010-04-15  9:20   ` Dmitry Monakhov
2010-04-21 17:45     ` tytso
2010-04-28  4:40 ` Aneesh Kumar K. V
2010-04-28  7:38   ` Dmitry Monakhov
2010-05-27 17:19     ` Aneesh Kumar K. V
2010-06-03  8:32       ` Dmitry Monakhov
2010-06-08 21:46         ` tytso
  -- strict thread matches above, loose matches on Subject: below --
2010-04-09 17:14 Dmitry Monakhov
2010-04-09 17:18 ` Dmitry Monakhov

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.