All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] Add flag to files with blocks intentionally past EOF
@ 2010-01-19 21:45 Eric Sandeen
  2010-01-20  0:46 ` Mingming
                   ` (2 more replies)
  0 siblings, 3 replies; 11+ messages in thread
From: Eric Sandeen @ 2010-01-19 21:45 UTC (permalink / raw)
  To: ext4 development

From: Jiaying Zhang <jiayingz@google.com>

fallocate() may potentially instantiate blocks past EOF, depending
on the flags used when it is called.

e2fsck currently has a test for blocks past i_size, and it
sometimes trips up - noticeably on xfstests 013 which runs fsstress.

This patch from Jiayang does fix it up for me - it (along with
e2fsprogs updates and other patches recently from Aneesh) has
survived many fsstress runs in a row.

The setattr interface may also be used to clear the flag and remove
any blocks past EOF.

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
---

(just resending this since it probably got lost in the previous
thread - Jiaying didn't have a SOB line, but maybe that should
be added.  I have included the proper From: line for authorship)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 874d169..4c7cd9b 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -284,10 +284,11 @@ struct flex_groups {
 #define EXT4_TOPDIR_FL			0x00020000 /* Top of directory hierarchies*/
 #define EXT4_HUGE_FILE_FL               0x00040000 /* Set to each huge file */
 #define EXT4_EXTENTS_FL			0x00080000 /* Inode uses extents */
+#define EXT4_EOFBLOCKS_FL		0x00400000 /* Blocks allocated beyond EOF (bit reserved in fs.h) */
 #define EXT4_RESERVED_FL		0x80000000 /* reserved for ext4 lib */
 
-#define EXT4_FL_USER_VISIBLE		0x000BDFFF /* User visible flags */
-#define EXT4_FL_USER_MODIFIABLE		0x000B80FF /* User modifiable flags */
+#define EXT4_FL_USER_VISIBLE		0x004BDFFF /* User visible flags */
+#define EXT4_FL_USER_MODIFIABLE		0x004B80FF /* User modifiable flags */
 
 /* Flags that should be inherited by new inodes from their parent. */
 #define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 765a482..e7d5ba2 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3185,7 +3185,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
 {
 	struct ext4_ext_path *path = NULL;
 	struct ext4_extent_header *eh;
-	struct ext4_extent newex, *ex;
+	struct ext4_extent newex, *ex, *last_ex;
 	ext4_fsblk_t newblock;
 	int err = 0, depth, ret, cache_type;
 	unsigned int allocated = 0;
@@ -3366,6 +3366,14 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
 					EXT4_STATE_DIO_UNWRITTEN;;
 		}
 	}
+
+	if (unlikely(inode->i_flags & EXT4_EOFBLOCKS_FL)) {
+		BUG_ON(!eh->eh_entries);
+		last_ex = EXT_LAST_EXTENT(eh);
+		if (iblock + ar.len > le32_to_cpu(last_ex->ee_block)
+					+ ext4_ext_get_actual_len(last_ex))
+			inode->i_flags &= ~EXT4_EOFBLOCKS_FL;
+	}
 	err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
 	if (err) {
 		/* free data blocks we just allocated */
@@ -3499,6 +3507,13 @@ static void ext4_falloc_update_inode(struct inode *inode,
 			i_size_write(inode, new_size);
 		if (new_size > EXT4_I(inode)->i_disksize)
 			ext4_update_i_disksize(inode, new_size);
+	} else {
+		/*
+		 * Mark that we allocate beyond EOF so the subsequent truncate
+		 * can proceed even if the new size is the same as i_size.
+		 */
+		if (new_size > i_size_read(inode))
+			inode->i_flags |= EXT4_EOFBLOCKS_FL;
 	}
 
 }
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index cbf56da..bc31ea6 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4429,6 +4429,8 @@ void ext4_truncate(struct inode *inode)
 	if (!ext4_can_truncate(inode))
 		return;
 
+	inode->i_flags &= ~EXT4_EOFBLOCKS_FL;
+
 	if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC))
 		ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE;
 
@@ -4741,8 +4743,8 @@ void ext4_get_inode_flags(struct ext4_inode_info *ei)
 {
 	unsigned int flags = ei->vfs_inode.i_flags;
 
-	ei->i_flags &= ~(EXT4_SYNC_FL|EXT4_APPEND_FL|
-			EXT4_IMMUTABLE_FL|EXT4_NOATIME_FL|EXT4_DIRSYNC_FL);
+	ei->i_flags &= ~(EXT4_SYNC_FL|EXT4_APPEND_FL|EXT4_IMMUTABLE_FL|
+			EXT4_NOATIME_FL|EXT4_DIRSYNC_FL|EXT4_EOFBLOCKS_FL);
 	if (flags & S_SYNC)
 		ei->i_flags |= EXT4_SYNC_FL;
 	if (flags & S_APPEND)
@@ -4753,6 +4755,8 @@ void ext4_get_inode_flags(struct ext4_inode_info *ei)
 		ei->i_flags |= EXT4_NOATIME_FL;
 	if (flags & S_DIRSYNC)
 		ei->i_flags |= EXT4_DIRSYNC_FL;
+	if (flags & FS_EOFBLOCKS_FL)
+		ei->i_flags |= EXT4_EOFBLOCKS_FL;
 }
 
 static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode,
@@ -5284,7 +5288,9 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
 	}
 
 	if (S_ISREG(inode->i_mode) &&
-	    attr->ia_valid & ATTR_SIZE && attr->ia_size < inode->i_size) {
+	    attr->ia_valid & ATTR_SIZE &&
+	    (attr->ia_size < inode->i_size ||
+	     (inode->i_flags & EXT4_EOFBLOCKS_FL))) {
 		handle_t *handle;
 
 		handle = ext4_journal_start(inode, 3);
@@ -5315,6 +5321,11 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
 				goto err_out;
 			}
 		}
+		if ((inode->i_flags & EXT4_EOFBLOCKS_FL)) {
+			rc = vmtruncate(inode, attr->ia_size);
+			if (rc)
+				goto err_out;
+		}
 	}
 
 	rc = inode_setattr(inode, attr);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index b63d193..71f578e 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -92,6 +92,16 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 			flags &= ~EXT4_EXTENTS_FL;
 		}
 
+		if (flags & EXT4_EOFBLOCKS_FL) {
+			/* we don't support adding EOFBLOCKS flag */
+			if (!(oldflags & EXT4_EOFBLOCKS_FL)) {
+				err = -EOPNOTSUPP;
+				goto flags_out;
+			}
+		} else if (oldflags & EXT4_EOFBLOCKS_FL)
+			/* free the space reserved with fallocate KEEPSIZE */
+			vmtruncate(inode, inode->i_size);
+
 		handle = ext4_journal_start(inode, 1);
 		if (IS_ERR(handle)) {
 			err = PTR_ERR(handle);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9147ca8..db3ffb6 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -349,10 +349,11 @@ struct inodes_stat_t {
 #define FS_TOPDIR_FL			0x00020000 /* Top of directory hierarchies*/
 #define FS_EXTENT_FL			0x00080000 /* Extents */
 #define FS_DIRECTIO_FL			0x00100000 /* Use direct i/o */
+#define FS_EOFBLOCKS_FL			0x00400000 /* Blocks allocated beyond EOF */
 #define FS_RESERVED_FL			0x80000000 /* reserved for ext2 lib */
 
-#define FS_FL_USER_VISIBLE		0x0003DFFF /* User visible flags */
-#define FS_FL_USER_MODIFIABLE		0x000380FF /* User modifiable flags */
+#define FS_FL_USER_VISIBLE		0x0043DFFF /* User visible flags */
+#define FS_FL_USER_MODIFIABLE		0x004380FF /* User modifiable flags */
 
 
 #define SYNC_FILE_RANGE_WAIT_BEFORE	1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH] Add flag to files with blocks intentionally past EOF
  2010-01-19 21:45 [PATCH] Add flag to files with blocks intentionally past EOF Eric Sandeen
@ 2010-01-20  0:46 ` Mingming
  2010-01-20  3:58   ` Eric Sandeen
  2010-01-20  9:03 ` Aneesh Kumar K. V
  2010-01-21 18:00 ` [PATCH V2] " Eric Sandeen
  2 siblings, 1 reply; 11+ messages in thread
From: Mingming @ 2010-01-20  0:46 UTC (permalink / raw)
  To: Eric Sandeen; +Cc: ext4 development

On Tue, 2010-01-19 at 15:45 -0600, Eric Sandeen wrote:
> From: Jiaying Zhang <jiayingz@google.com>
> 
> fallocate() may potentially instantiate blocks past EOF, depending
> on the flags used when it is called.
> 
> e2fsck currently has a test for blocks past i_size, and it
> sometimes trips up - noticeably on xfstests 013 which runs fsstress.
> 
> This patch from Jiayang does fix it up for me - it (along with
> e2fsprogs updates and other patches recently from Aneesh) has
> survived many fsstress runs in a row.
> 
> The setattr interface may also be used to clear the flag and remove
> any blocks past EOF.
> 
> Signed-off-by: Eric Sandeen <sandeen@redhat.com>
> ---
> 
> (just resending this since it probably got lost in the previous
> thread - Jiaying didn't have a SOB line, but maybe that should
> be added.  I have included the proper From: line for authorship)
> 
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 874d169..4c7cd9b 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -284,10 +284,11 @@ struct flex_groups {
>  #define EXT4_TOPDIR_FL			0x00020000 /* Top of directory hierarchies*/
>  #define EXT4_HUGE_FILE_FL               0x00040000 /* Set to each huge file */
>  #define EXT4_EXTENTS_FL			0x00080000 /* Inode uses extents */
> +#define EXT4_EOFBLOCKS_FL		0x00400000 /* Blocks allocated beyond EOF (bit reserved in fs.h) */
>  #define EXT4_RESERVED_FL		0x80000000 /* reserved for ext4 lib */
> 
> -#define EXT4_FL_USER_VISIBLE		0x000BDFFF /* User visible flags */
> -#define EXT4_FL_USER_MODIFIABLE		0x000B80FF /* User modifiable flags */
> +#define EXT4_FL_USER_VISIBLE		0x004BDFFF /* User visible flags */
> +#define EXT4_FL_USER_MODIFIABLE		0x004B80FF /* User modifiable flags */
> 
>  /* Flags that should be inherited by new inodes from their parent. */
>  #define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\
> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> index 765a482..e7d5ba2 100644
> --- a/fs/ext4/extents.c
> +++ b/fs/ext4/extents.c
> @@ -3185,7 +3185,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
>  {
>  	struct ext4_ext_path *path = NULL;
>  	struct ext4_extent_header *eh;
> -	struct ext4_extent newex, *ex;
> +	struct ext4_extent newex, *ex, *last_ex;
>  	ext4_fsblk_t newblock;
>  	int err = 0, depth, ret, cache_type;
>  	unsigned int allocated = 0;
> @@ -3366,6 +3366,14 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
>  					EXT4_STATE_DIO_UNWRITTEN;;
>  		}
>  	}
> +
> +	if (unlikely(inode->i_flags & EXT4_EOFBLOCKS_FL)) {
> +		BUG_ON(!eh->eh_entries);

Perhaps BUG_ON() is too strong? Maybe add some warning messages first.

> +		last_ex = EXT_LAST_EXTENT(eh);
> +		if (iblock + ar.len > le32_to_cpu(last_ex->ee_block)
> +					+ ext4_ext_get_actual_len(last_ex))
> +			inode->i_flags &= ~EXT4_EOFBLOCKS_FL;
> +	}
>  	err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
>  	if (err) {
>  		/* free data blocks we just allocated */
> @@ -3499,6 +3507,13 @@ static void ext4_falloc_update_inode(struct inode *inode,
>  			i_size_write(inode, new_size);
>  		if (new_size > EXT4_I(inode)->i_disksize)
>  			ext4_update_i_disksize(inode, new_size);
> +	} else {
> +		/*
> +		 * Mark that we allocate beyond EOF so the subsequent truncate
> +		 * can proceed even if the new size is the same as i_size.
> +		 */
> +		if (new_size > i_size_read(inode))
> +			inode->i_flags |= EXT4_EOFBLOCKS_FL;
>  	}
> 
>  }
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index cbf56da..bc31ea6 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -4429,6 +4429,8 @@ void ext4_truncate(struct inode *inode)
>  	if (!ext4_can_truncate(inode))
>  		return;
> 
> +	inode->i_flags &= ~EXT4_EOFBLOCKS_FL;
> +
>  	if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC))
>  		ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE;
> 
> @@ -4741,8 +4743,8 @@ void ext4_get_inode_flags(struct ext4_inode_info *ei)
>  {
>  	unsigned int flags = ei->vfs_inode.i_flags;
> 
> -	ei->i_flags &= ~(EXT4_SYNC_FL|EXT4_APPEND_FL|
> -			EXT4_IMMUTABLE_FL|EXT4_NOATIME_FL|EXT4_DIRSYNC_FL);
> +	ei->i_flags &= ~(EXT4_SYNC_FL|EXT4_APPEND_FL|EXT4_IMMUTABLE_FL|
> +			EXT4_NOATIME_FL|EXT4_DIRSYNC_FL|EXT4_EOFBLOCKS_FL);
>  	if (flags & S_SYNC)
>  		ei->i_flags |= EXT4_SYNC_FL;
>  	if (flags & S_APPEND)
> @@ -4753,6 +4755,8 @@ void ext4_get_inode_flags(struct ext4_inode_info *ei)
>  		ei->i_flags |= EXT4_NOATIME_FL;
>  	if (flags & S_DIRSYNC)
>  		ei->i_flags |= EXT4_DIRSYNC_FL;
> +	if (flags & FS_EOFBLOCKS_FL)
> +		ei->i_flags |= EXT4_EOFBLOCKS_FL;
>  }
> 
>  static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode,
> @@ -5284,7 +5288,9 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
>  	}
> 
>  	if (S_ISREG(inode->i_mode) &&
> -	    attr->ia_valid & ATTR_SIZE && attr->ia_size < inode->i_size) {
> +	    attr->ia_valid & ATTR_SIZE &&
> +	    (attr->ia_size < inode->i_size ||
> +	     (inode->i_flags & EXT4_EOFBLOCKS_FL))) {
>  		handle_t *handle;
> 
>  		handle = ext4_journal_start(inode, 3);
> @@ -5315,6 +5321,11 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
>  				goto err_out;
>  			}
>  		}
> +		if ((inode->i_flags & EXT4_EOFBLOCKS_FL)) {
> +			rc = vmtruncate(inode, attr->ia_size);
> +			if (rc)
> +				goto err_out;
> +		}
>  	}
> 

I am a little lost why doing vmtruncate here...

>  	rc = inode_setattr(inode, attr);
> diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
> index b63d193..71f578e 100644
> --- a/fs/ext4/ioctl.c
> +++ b/fs/ext4/ioctl.c
> @@ -92,6 +92,16 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
>  			flags &= ~EXT4_EXTENTS_FL;
>  		}
> 
> +		if (flags & EXT4_EOFBLOCKS_FL) {
> +			/* we don't support adding EOFBLOCKS flag */
> +			if (!(oldflags & EXT4_EOFBLOCKS_FL)) {
> +				err = -EOPNOTSUPP;
> +				goto flags_out;
> +			}
> +		} else if (oldflags & EXT4_EOFBLOCKS_FL)
> +			/* free the space reserved with fallocate KEEPSIZE */
> +			vmtruncate(inode, inode->i_size);
> +
>  		handle = ext4_journal_start(inode, 1);
>  		if (IS_ERR(handle)) {
>  			err = PTR_ERR(handle);

And here...
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 9147ca8..db3ffb6 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -349,10 +349,11 @@ struct inodes_stat_t {
>  #define FS_TOPDIR_FL			0x00020000 /* Top of directory hierarchies*/
>  #define FS_EXTENT_FL			0x00080000 /* Extents */
>  #define FS_DIRECTIO_FL			0x00100000 /* Use direct i/o */
> +#define FS_EOFBLOCKS_FL			0x00400000 /* Blocks allocated beyond EOF */
>  #define FS_RESERVED_FL			0x80000000 /* reserved for ext2 lib */
> 
> -#define FS_FL_USER_VISIBLE		0x0003DFFF /* User visible flags */
> -#define FS_FL_USER_MODIFIABLE		0x000380FF /* User modifiable flags */
> +#define FS_FL_USER_VISIBLE		0x0043DFFF /* User visible flags */
> +#define FS_FL_USER_MODIFIABLE		0x004380FF /* User modifiable flags */
> 
> 
>  #define SYNC_FILE_RANGE_WAIT_BEFORE	1
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html



^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] Add flag to files with blocks intentionally past EOF
  2010-01-20  0:46 ` Mingming
@ 2010-01-20  3:58   ` Eric Sandeen
  2010-01-20  8:37     ` Andreas Dilger
  2010-01-20  9:15     ` Aneesh Kumar K. V
  0 siblings, 2 replies; 11+ messages in thread
From: Eric Sandeen @ 2010-01-20  3:58 UTC (permalink / raw)
  To: Mingming; +Cc: ext4 development

Mingming wrote:
> On Tue, 2010-01-19 at 15:45 -0600, Eric Sandeen wrote:
>> From: Jiaying Zhang <jiayingz@google.com>
>>
>> fallocate() may potentially instantiate blocks past EOF, depending
>> on the flags used when it is called.
>>
>> e2fsck currently has a test for blocks past i_size, and it
>> sometimes trips up - noticeably on xfstests 013 which runs fsstress.
>>
>> This patch from Jiayang does fix it up for me - it (along with
>> e2fsprogs updates and other patches recently from Aneesh) has
>> survived many fsstress runs in a row.
>>
>> The setattr interface may also be used to clear the flag and remove
>> any blocks past EOF.
>>
>> Signed-off-by: Eric Sandeen <sandeen@redhat.com>
>> ---
>>
>> (just resending this since it probably got lost in the previous
>> thread - Jiaying didn't have a SOB line, but maybe that should
>> be added.  I have included the proper From: line for authorship)
>>
>> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
>> index 874d169..4c7cd9b 100644
>> --- a/fs/ext4/ext4.h
>> +++ b/fs/ext4/ext4.h
>> @@ -284,10 +284,11 @@ struct flex_groups {
>>  #define EXT4_TOPDIR_FL			0x00020000 /* Top of directory hierarchies*/
>>  #define EXT4_HUGE_FILE_FL               0x00040000 /* Set to each huge file */
>>  #define EXT4_EXTENTS_FL			0x00080000 /* Inode uses extents */
>> +#define EXT4_EOFBLOCKS_FL		0x00400000 /* Blocks allocated beyond EOF (bit reserved in fs.h) */
>>  #define EXT4_RESERVED_FL		0x80000000 /* reserved for ext4 lib */
>>
>> -#define EXT4_FL_USER_VISIBLE		0x000BDFFF /* User visible flags */
>> -#define EXT4_FL_USER_MODIFIABLE		0x000B80FF /* User modifiable flags */
>> +#define EXT4_FL_USER_VISIBLE		0x004BDFFF /* User visible flags */
>> +#define EXT4_FL_USER_MODIFIABLE		0x004B80FF /* User modifiable flags */
>>
>>  /* Flags that should be inherited by new inodes from their parent. */
>>  #define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\
>> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
>> index 765a482..e7d5ba2 100644
>> --- a/fs/ext4/extents.c
>> +++ b/fs/ext4/extents.c
>> @@ -3185,7 +3185,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
>>  {
>>  	struct ext4_ext_path *path = NULL;
>>  	struct ext4_extent_header *eh;
>> -	struct ext4_extent newex, *ex;
>> +	struct ext4_extent newex, *ex, *last_ex;
>>  	ext4_fsblk_t newblock;
>>  	int err = 0, depth, ret, cache_type;
>>  	unsigned int allocated = 0;
>> @@ -3366,6 +3366,14 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
>>  					EXT4_STATE_DIO_UNWRITTEN;;
>>  		}
>>  	}
>> +
>> +	if (unlikely(inode->i_flags & EXT4_EOFBLOCKS_FL)) {
>> +		BUG_ON(!eh->eh_entries);
> 
> Perhaps BUG_ON() is too strong? Maybe add some warning messages first.

perhaps ... not sure how we would get here w/ no entries.

...

>> @@ -5315,6 +5321,11 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
>>  				goto err_out;
>>  			}
>>  		}
>> +		if ((inode->i_flags & EXT4_EOFBLOCKS_FL)) {
>> +			rc = vmtruncate(inode, attr->ia_size);
>> +			if (rc)
>> +				goto err_out;
>> +		}
>>  	}
>>
> 
> I am a little lost why doing vmtruncate here...

Hm first off I assume vmtruncate will clear blocks past that size,
but tonight I'm not seeing how it gets there.

Anyway, it looks like any setting of the size, truncate up or down
(or to current size) will clear any blocks past EOF.

>>  	rc = inode_setattr(inode, attr);
>> diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
>> index b63d193..71f578e 100644
>> --- a/fs/ext4/ioctl.c
>> +++ b/fs/ext4/ioctl.c
>> @@ -92,6 +92,16 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
>>  			flags &= ~EXT4_EXTENTS_FL;
>>  		}
>>
>> +		if (flags & EXT4_EOFBLOCKS_FL) {
>> +			/* we don't support adding EOFBLOCKS flag */
>> +			if (!(oldflags & EXT4_EOFBLOCKS_FL)) {
>> +				err = -EOPNOTSUPP;
>> +				goto flags_out;
>> +			}
>> +		} else if (oldflags & EXT4_EOFBLOCKS_FL)
>> +			/* free the space reserved with fallocate KEEPSIZE */
>> +			vmtruncate(inode, inode->i_size);
>> +
>>  		handle = ext4_journal_start(inode, 1);
>>  		if (IS_ERR(handle)) {
>>  			err = PTR_ERR(handle);
> 
> And here...

Well here we are clearing the EOFBLOCKS flag so we'd want to clear any
blocks past EOF.... but now, does vmtruncate do that?

Ok, count me as confused too, but mostly jsut so far as how does
vmtruncate clear the blocks beyond eof.  I guess I glossed over this when reading it.

-Eric

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] Add flag to files with blocks intentionally past EOF
  2010-01-20  3:58   ` Eric Sandeen
@ 2010-01-20  8:37     ` Andreas Dilger
  2010-01-20  9:15     ` Aneesh Kumar K. V
  1 sibling, 0 replies; 11+ messages in thread
From: Andreas Dilger @ 2010-01-20  8:37 UTC (permalink / raw)
  To: Eric Sandeen; +Cc: Mingming, ext4 development

On 2010-01-20, at 11:58, Eric Sandeen wrote:
> Mingming wrote:
>>
>> I am a little lost why doing vmtruncate here...
>
> Hm first off I assume vmtruncate will clear blocks past that size,
> but tonight I'm not seeing how it gets there.
>
> Anyway, it looks like any setting of the size, truncate up or down
> (or to current size) will clear any blocks past EOF.


Why not just call ext4_truncate(inode)?  Since these blocks are only  
fallocated, and pages couldn't (shouldn't?) be instantiated beyond  
i_size, we don't need to do anything at the mapping level, only at the  
inode level.

> Well here we are clearing the EOFBLOCKS flag so we'd want to clear any
> blocks past EOF.... but now, does vmtruncate do that?
>
> Ok, count me as confused too, but mostly jsut so far as how does
> vmtruncate clear the blocks beyond eof.  I guess I glossed over this  
> when reading it.


Has the new truncate API patchset gone into the kernel?  I recall  
there was some work to restructure the API and maybe you are a victim  
of understanding the old API?

Cheers, Andreas
--
Andreas Dilger
Sr. Staff Engineer, Lustre Group
Sun Microsystems of Canada, Inc.


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] Add flag to files with blocks intentionally past EOF
  2010-01-19 21:45 [PATCH] Add flag to files with blocks intentionally past EOF Eric Sandeen
  2010-01-20  0:46 ` Mingming
@ 2010-01-20  9:03 ` Aneesh Kumar K. V
  2010-01-22  0:32   ` Andreas Dilger
  2010-01-21 18:00 ` [PATCH V2] " Eric Sandeen
  2 siblings, 1 reply; 11+ messages in thread
From: Aneesh Kumar K. V @ 2010-01-20  9:03 UTC (permalink / raw)
  To: Eric Sandeen, ext4 development

On Tue, 19 Jan 2010 15:45:19 -0600, Eric Sandeen <sandeen@redhat.com> wrote:
> From: Jiaying Zhang <jiayingz@google.com>
> 
> fallocate() may potentially instantiate blocks past EOF, depending
> on the flags used when it is called.
> 
> e2fsck currently has a test for blocks past i_size, and it
> sometimes trips up - noticeably on xfstests 013 which runs fsstress.
> 
> This patch from Jiayang does fix it up for me - it (along with
> e2fsprogs updates and other patches recently from Aneesh) has
> survived many fsstress runs in a row.
> 
> The setattr interface may also be used to clear the flag and remove
> any blocks past EOF.
> 
> Signed-off-by: Eric Sandeen <sandeen@redhat.com>
> ---
> 
> (just resending this since it probably got lost in the previous
> thread - Jiaying didn't have a SOB line, but maybe that should
> be added.  I have included the proper From: line for authorship)
>

The patch i looked earlier was was early RFC and i didn't look the
full thread to find an updated patch was posted.(hmm getting used to
reading mails with notmuchmail.org)

http://article.gmane.org/gmane.comp.file-systems.ext4/15035

 
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 874d169..4c7cd9b 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -284,10 +284,11 @@ struct flex_groups {
>  #define EXT4_TOPDIR_FL			0x00020000 /* Top of directory hierarchies*/
>  #define EXT4_HUGE_FILE_FL               0x00040000 /* Set to each huge file */
>  #define EXT4_EXTENTS_FL			0x00080000 /* Inode uses extents */
> +#define EXT4_EOFBLOCKS_FL		0x00400000 /* Blocks allocated beyond EOF (bit reserved in fs.h) */
>  #define EXT4_RESERVED_FL		0x80000000 /* reserved for ext4 lib */
> 
> -#define EXT4_FL_USER_VISIBLE		0x000BDFFF /* User visible flags */
> -#define EXT4_FL_USER_MODIFIABLE		0x000B80FF /* User modifiable flags */
> +#define EXT4_FL_USER_VISIBLE		0x004BDFFF /* User visible flags */
> +#define EXT4_FL_USER_MODIFIABLE		0x004B80FF /* User modifiable flags */
> 
>  /* Flags that should be inherited by new inodes from their parent. */
>  #define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\
> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> index 765a482..e7d5ba2 100644
> --- a/fs/ext4/extents.c
> +++ b/fs/ext4/extents.c
> @@ -3185,7 +3185,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
>  {
>  	struct ext4_ext_path *path = NULL;
>  	struct ext4_extent_header *eh;
> -	struct ext4_extent newex, *ex;
> +	struct ext4_extent newex, *ex, *last_ex;
>  	ext4_fsblk_t newblock;
>  	int err = 0, depth, ret, cache_type;
>  	unsigned int allocated = 0;
> @@ -3366,6 +3366,14 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
>  					EXT4_STATE_DIO_UNWRITTEN;;
>  		}
>  	}
> +
> +	if (unlikely(inode->i_flags & EXT4_EOFBLOCKS_FL)) {
> +		BUG_ON(!eh->eh_entries);
> +		last_ex = EXT_LAST_EXTENT(eh);
> +		if (iblock + ar.len > le32_to_cpu(last_ex->ee_block)
> +					+ ext4_ext_get_actual_len(last_ex))
> +			inode->i_flags &= ~EXT4_EOFBLOCKS_FL;
> +	}


Why do we need to set inode->i_flag ? Can we make it Ext4 specific and
look at ext4_inode->i_flags . Also setting inode->i_flag with an EXT4
flag value is confusing.



>  	err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
>  	if (err) {
>  		/* free data blocks we just allocated */
> @@ -3499,6 +3507,13 @@ static void ext4_falloc_update_inode(struct inode *inode,
>  			i_size_write(inode, new_size);
>  		if (new_size > EXT4_I(inode)->i_disksize)
>  			ext4_update_i_disksize(inode, new_size);
> +	} else {
> +		/*
> +		 * Mark that we allocate beyond EOF so the subsequent truncate
> +		 * can proceed even if the new size is the same as i_size.
> +		 */
> +		if (new_size > i_size_read(inode))
> +			inode->i_flags |= EXT4_EOFBLOCKS_FL;
>  	}
> 
>  }
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index cbf56da..bc31ea6 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -4429,6 +4429,8 @@ void ext4_truncate(struct inode *inode)
>  	if (!ext4_can_truncate(inode))
>  		return;
> 
> +	inode->i_flags &= ~EXT4_EOFBLOCKS_FL;
> +
>  	if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC))
>  		ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE;
> 
> @@ -4741,8 +4743,8 @@ void ext4_get_inode_flags(struct ext4_inode_info *ei)
>  {
>  	unsigned int flags = ei->vfs_inode.i_flags;
> 
> -	ei->i_flags &= ~(EXT4_SYNC_FL|EXT4_APPEND_FL|
> -			EXT4_IMMUTABLE_FL|EXT4_NOATIME_FL|EXT4_DIRSYNC_FL);
> +	ei->i_flags &= ~(EXT4_SYNC_FL|EXT4_APPEND_FL|EXT4_IMMUTABLE_FL|
> +
> EXT4_NOATIME_FL|EXT4_DIRSYNC_FL|EXT4_EOFBLOCKS_FL);


Do we really need to allow the get and set of this flag. IMHO a truncate
should be the only API and the flag should be remove implicitly for that.


>  	if (flags & S_SYNC)
>  		ei->i_flags |= EXT4_SYNC_FL;
>  	if (flags & S_APPEND)
> @@ -4753,6 +4755,8 @@ void ext4_get_inode_flags(struct ext4_inode_info *ei)
>  		ei->i_flags |= EXT4_NOATIME_FL;
>  	if (flags & S_DIRSYNC)
>  		ei->i_flags |= EXT4_DIRSYNC_FL;
> +	if (flags & FS_EOFBLOCKS_FL)
> +		ei->i_flags |= EXT4_EOFBLOCKS_FL;
>  }
> 
>  static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode,
> @@ -5284,7 +5288,9 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
>  	}
> 
>  	if (S_ISREG(inode->i_mode) &&
> -	    attr->ia_valid & ATTR_SIZE && attr->ia_size < inode->i_size) {
> +	    attr->ia_valid & ATTR_SIZE &&
> +	    (attr->ia_size < inode->i_size ||
> +	     (inode->i_flags & EXT4_EOFBLOCKS_FL))) {
>  		handle_t *handle;
> 
>  		handle = ext4_journal_start(inode, 3);
> @@ -5315,6 +5321,11 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
>  				goto err_out;
>  			}
>  		}
> +		if ((inode->i_flags & EXT4_EOFBLOCKS_FL)) {
> +			rc = vmtruncate(inode, attr->ia_size);
> +			if (rc)
> +				goto err_out;
> +		}
>  	}
> 
>  	rc = inode_setattr(inode, attr);
> diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
> index b63d193..71f578e 100644
> --- a/fs/ext4/ioctl.c
> +++ b/fs/ext4/ioctl.c
> @@ -92,6 +92,16 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
>  			flags &= ~EXT4_EXTENTS_FL;
>  		}
> 
> +		if (flags & EXT4_EOFBLOCKS_FL) {
> +			/* we don't support adding EOFBLOCKS flag */
> +			if (!(oldflags & EXT4_EOFBLOCKS_FL)) {
> +				err = -EOPNOTSUPP;
> +				goto flags_out;
> +			}
> +		} else if (oldflags & EXT4_EOFBLOCKS_FL)
> +			/* free the space reserved with fallocate KEEPSIZE */
> +			vmtruncate(inode, inode->i_size);
> +
>  		handle = ext4_journal_start(inode, 1);
>  		if (IS_ERR(handle)) {
>  			err = PTR_ERR(handle);
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 9147ca8..db3ffb6 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -349,10 +349,11 @@ struct inodes_stat_t {
>  #define FS_TOPDIR_FL			0x00020000 /* Top of directory hierarchies*/
>  #define FS_EXTENT_FL			0x00080000 /* Extents */
>  #define FS_DIRECTIO_FL			0x00100000 /* Use direct i/o */
> +#define FS_EOFBLOCKS_FL			0x00400000 /* Blocks allocated beyond EOF */
>  #define FS_RESERVED_FL			0x80000000 /* reserved for ext2 lib */
> 
> -#define FS_FL_USER_VISIBLE		0x0003DFFF /* User visible flags */
> -#define FS_FL_USER_MODIFIABLE		0x000380FF /* User modifiable flags */
> +#define FS_FL_USER_VISIBLE		0x0043DFFF /* User visible flags */
> +#define FS_FL_USER_MODIFIABLE		0x004380FF /* User modifiable flags */
> 
> 
>  #define SYNC_FILE_RANGE_WAIT_BEFORE	1
> 

If we remove ext4_ioctl support i guess that patch can become much
simpler. 

-aneesh

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] Add flag to files with blocks intentionally past EOF
  2010-01-20  3:58   ` Eric Sandeen
  2010-01-20  8:37     ` Andreas Dilger
@ 2010-01-20  9:15     ` Aneesh Kumar K. V
  1 sibling, 0 replies; 11+ messages in thread
From: Aneesh Kumar K. V @ 2010-01-20  9:15 UTC (permalink / raw)
  To: Eric Sandeen, Mingming; +Cc: ext4 development

On Tue, 19 Jan 2010 21:58:47 -0600, Eric Sandeen <sandeen@redhat.com> wrote:
> Mingming wrote:
> > On Tue, 2010-01-19 at 15:45 -0600, Eric Sandeen wrote:
> >> From: Jiaying Zhang <jiayingz@google.com>
> >>
> >> fallocate() may potentially instantiate blocks past EOF, depending
> >> on the flags used when it is called.
> >>
> >> e2fsck currently has a test for blocks past i_size, and it
> >> sometimes trips up - noticeably on xfstests 013 which runs fsstress.
> >>
> >> This patch from Jiayang does fix it up for me - it (along with
> >> e2fsprogs updates and other patches recently from Aneesh) has
> >> survived many fsstress runs in a row.
> >>
> >> The setattr interface may also be used to clear the flag and remove
> >> any blocks past EOF.
> >>
> >> Signed-off-by: Eric Sandeen <sandeen@redhat.com>
> >> ---
> >>
> >> (just resending this since it probably got lost in the previous
> >> thread - Jiaying didn't have a SOB line, but maybe that should
> >> be added.  I have included the proper From: line for authorship)
> >>
> >> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> >> index 874d169..4c7cd9b 100644
> >> --- a/fs/ext4/ext4.h
> >> +++ b/fs/ext4/ext4.h
> >> @@ -284,10 +284,11 @@ struct flex_groups {
> >>  #define EXT4_TOPDIR_FL			0x00020000 /* Top of directory hierarchies*/
> >>  #define EXT4_HUGE_FILE_FL               0x00040000 /* Set to each huge file */
> >>  #define EXT4_EXTENTS_FL			0x00080000 /* Inode uses extents */
> >> +#define EXT4_EOFBLOCKS_FL		0x00400000 /* Blocks allocated beyond EOF (bit reserved in fs.h) */
> >>  #define EXT4_RESERVED_FL		0x80000000 /* reserved for ext4 lib */
> >>
> >> -#define EXT4_FL_USER_VISIBLE		0x000BDFFF /* User visible flags */
> >> -#define EXT4_FL_USER_MODIFIABLE		0x000B80FF /* User modifiable flags */
> >> +#define EXT4_FL_USER_VISIBLE		0x004BDFFF /* User visible flags */
> >> +#define EXT4_FL_USER_MODIFIABLE		0x004B80FF /* User modifiable flags */
> >>
> >>  /* Flags that should be inherited by new inodes from their parent. */
> >>  #define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\
> >> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> >> index 765a482..e7d5ba2 100644
> >> --- a/fs/ext4/extents.c
> >> +++ b/fs/ext4/extents.c
> >> @@ -3185,7 +3185,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
> >>  {
> >>  	struct ext4_ext_path *path = NULL;
> >>  	struct ext4_extent_header *eh;
> >> -	struct ext4_extent newex, *ex;
> >> +	struct ext4_extent newex, *ex, *last_ex;
> >>  	ext4_fsblk_t newblock;
> >>  	int err = 0, depth, ret, cache_type;
> >>  	unsigned int allocated = 0;
> >> @@ -3366,6 +3366,14 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
> >>  					EXT4_STATE_DIO_UNWRITTEN;;
> >>  		}
> >>  	}
> >> +
> >> +	if (unlikely(inode->i_flags & EXT4_EOFBLOCKS_FL)) {
> >> +		BUG_ON(!eh->eh_entries);
> > 
> > Perhaps BUG_ON() is too strong? Maybe add some warning messages first.
> 
> perhaps ... not sure how we would get here w/ no entries.
> 
> ...
> 
> >> @@ -5315,6 +5321,11 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
> >>  				goto err_out;
> >>  			}
> >>  		}
> >> +		if ((inode->i_flags & EXT4_EOFBLOCKS_FL)) {
> >> +			rc = vmtruncate(inode, attr->ia_size);
> >> +			if (rc)
> >> +				goto err_out;
> >> +		}
> >>  	}
> >>
> > 
> > I am a little lost why doing vmtruncate here...
> 
> Hm first off I assume vmtruncate will clear blocks past that size,
> but tonight I'm not seeing how it gets there.
> 
> Anyway, it looks like any setting of the size, truncate up or down
> (or to current size) will clear any blocks past EOF.


yes. earlier we were doing truncate only when attr->ia_size <
inode->i_size. Now we do when we find that inode have EXT4_EOFBLOCKS_FL
set. 


> 
> >>  	rc = inode_setattr(inode, attr);
> >> diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
> >> index b63d193..71f578e 100644
> >> --- a/fs/ext4/ioctl.c
> >> +++ b/fs/ext4/ioctl.c
> >> @@ -92,6 +92,16 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
> >>  			flags &= ~EXT4_EXTENTS_FL;
> >>  		}
> >>
> >> +		if (flags & EXT4_EOFBLOCKS_FL) {
> >> +			/* we don't support adding EOFBLOCKS flag */
> >> +			if (!(oldflags & EXT4_EOFBLOCKS_FL)) {
> >> +				err = -EOPNOTSUPP;
> >> +				goto flags_out;
> >> +			}
> >> +		} else if (oldflags & EXT4_EOFBLOCKS_FL)
> >> +			/* free the space reserved with fallocate KEEPSIZE */
> >> +			vmtruncate(inode, inode->i_size);
> >> +
> >>  		handle = ext4_journal_start(inode, 1);
> >>  		if (IS_ERR(handle)) {
> >>  			err = PTR_ERR(handle);
> > 
> > And here...
> 
> Well here we are clearing the EOFBLOCKS flag so we'd want to clear any
> blocks past EOF.... but now, does vmtruncate do that?
> 
> Ok, count me as confused too, but mostly jsut so far as how does
> vmtruncate clear the blocks beyond eof.  I guess I glossed over this when reading it.

Yes. vmtruncate ends up calling ext4_truncate which will do that it not
that is a bug. When we failed to copy for user space in case of write we
did call vmtruncate to free up the allocated blocks. So i guess it
should be working. But we should be able to check that easily.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH V2] Add flag to files with blocks intentionally past EOF
  2010-01-19 21:45 [PATCH] Add flag to files with blocks intentionally past EOF Eric Sandeen
  2010-01-20  0:46 ` Mingming
  2010-01-20  9:03 ` Aneesh Kumar K. V
@ 2010-01-21 18:00 ` Eric Sandeen
  2010-01-21 20:32   ` Jiaying Zhang
  2010-02-24 16:26   ` tytso
  2 siblings, 2 replies; 11+ messages in thread
From: Eric Sandeen @ 2010-01-21 18:00 UTC (permalink / raw)
  To: ext4 development; +Cc: Jiaying Zhang

From: Jiaying Zhang <jiayingz@google.com>

fallocate() may potentially instantiate blocks past EOF, depending
on the flags used when it is called.

e2fsck currently has a test for blocks past i_size, and it
sometimes trips up - noticeably on xfstests 013 which runs fsstress.

This patch from Jiayang does fix it up - it (along with
e2fsprogs updates and other patches recently from Aneesh) has
survived many fsstress runs in a row.


(Eric Sandeen: removed ioctl interface and minor cleanups)

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
---

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 874d169..1f6b936 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -284,6 +284,7 @@ struct flex_groups {
 #define EXT4_TOPDIR_FL			0x00020000 /* Top of directory hierarchies*/
 #define EXT4_HUGE_FILE_FL               0x00040000 /* Set to each huge file */
 #define EXT4_EXTENTS_FL			0x00080000 /* Inode uses extents */
+#define EXT4_EOFBLOCKS_FL		0x00400000 /* Blocks allocated beyond EOF */
 #define EXT4_RESERVED_FL		0x80000000 /* reserved for ext4 lib */
 
 #define EXT4_FL_USER_VISIBLE		0x000BDFFF /* User visible flags */
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 765a482..95e94ae 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3185,7 +3185,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
 {
 	struct ext4_ext_path *path = NULL;
 	struct ext4_extent_header *eh;
-	struct ext4_extent newex, *ex;
+	struct ext4_extent newex, *ex, *last_ex;
 	ext4_fsblk_t newblock;
 	int err = 0, depth, ret, cache_type;
 	unsigned int allocated = 0;
@@ -3366,6 +3366,19 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
 					EXT4_STATE_DIO_UNWRITTEN;;
 		}
 	}
+
+	if (unlikely(EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL)) {
+                if (eh->eh_entries) {
+			last_ex = EXT_LAST_EXTENT(eh);
+		    	if (iblock + ar.len > le32_to_cpu(last_ex->ee_block)
+					    + ext4_ext_get_actual_len(last_ex))
+				EXT4_I(inode)->i_flags &= ~EXT4_EOFBLOCKS_FL;
+		} else {
+                	WARN_ON(eh->eh_entries == 0);
+			ext4_error(inode->i_sb, __func__,
+				"inode#%lu, eh->eh_entries = 0!", inode->i_ino);
+		}
+	}
 	err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
 	if (err) {
 		/* free data blocks we just allocated */
@@ -3499,6 +3512,13 @@ static void ext4_falloc_update_inode(struct inode *inode,
 			i_size_write(inode, new_size);
 		if (new_size > EXT4_I(inode)->i_disksize)
 			ext4_update_i_disksize(inode, new_size);
+	} else {
+		/*
+		 * Mark that we allocate beyond EOF so the subsequent truncate
+		 * can proceed even if the new size is the same as i_size.
+		 */
+		if (new_size > i_size_read(inode))
+			EXT4_I(inode)->i_flags |= EXT4_EOFBLOCKS_FL;
 	}
 
 }
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index cbf56da..f5802e9 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4429,6 +4429,8 @@ void ext4_truncate(struct inode *inode)
 	if (!ext4_can_truncate(inode))
 		return;
 
+	EXT4_I(inode)->i_flags &= ~EXT4_EOFBLOCKS_FL;
+
 	if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC))
 		ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE;
 
@@ -5284,7 +5286,9 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
 	}
 
 	if (S_ISREG(inode->i_mode) &&
-	    attr->ia_valid & ATTR_SIZE && attr->ia_size < inode->i_size) {
+	    attr->ia_valid & ATTR_SIZE &&
+	    (attr->ia_size < inode->i_size ||
+	     (EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL))) {
 		handle_t *handle;
 
 		handle = ext4_journal_start(inode, 3);
@@ -5315,6 +5319,9 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
 				goto err_out;
 			}
 		}
+		/* ext4_truncate will clear the flag */
+		if ((EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL))
+			ext4_truncate(inode);
 	}
 
 	rc = inode_setattr(inode, attr);


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH V2] Add flag to files with blocks intentionally past EOF
  2010-01-21 18:00 ` [PATCH V2] " Eric Sandeen
@ 2010-01-21 20:32   ` Jiaying Zhang
  2010-02-24 16:26   ` tytso
  1 sibling, 0 replies; 11+ messages in thread
From: Jiaying Zhang @ 2010-01-21 20:32 UTC (permalink / raw)
  To: Eric Sandeen; +Cc: ext4 development

Eric,

Thank you very much for bringing the patch up-to-date and improving it!
I have been thinking to re-sync it and send it again but was pulled away
but other things.

The patch looks good. I think it is ok to drop the ioctl support at this
time. We can add it later if there are users need that feature.

Jiaying

On Thu, Jan 21, 2010 at 10:00 AM, Eric Sandeen <sandeen@redhat.com> wrote:
>
> From: Jiaying Zhang <jiayingz@google.com>
>
> fallocate() may potentially instantiate blocks past EOF, depending
> on the flags used when it is called.
>
> e2fsck currently has a test for blocks past i_size, and it
> sometimes trips up - noticeably on xfstests 013 which runs fsstress.
>
> This patch from Jiayang does fix it up - it (along with
> e2fsprogs updates and other patches recently from Aneesh) has
> survived many fsstress runs in a row.
>
>
> (Eric Sandeen: removed ioctl interface and minor cleanups)
>
> Signed-off-by: Eric Sandeen <sandeen@redhat.com>
> ---
>
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 874d169..1f6b936 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -284,6 +284,7 @@ struct flex_groups {
>  #define EXT4_TOPDIR_FL                 0x00020000 /* Top of directory hierarchies*/
>  #define EXT4_HUGE_FILE_FL               0x00040000 /* Set to each huge file */
>  #define EXT4_EXTENTS_FL                        0x00080000 /* Inode uses extents */
> +#define EXT4_EOFBLOCKS_FL              0x00400000 /* Blocks allocated beyond EOF */
>  #define EXT4_RESERVED_FL               0x80000000 /* reserved for ext4 lib */
>
>  #define EXT4_FL_USER_VISIBLE           0x000BDFFF /* User visible flags */
> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> index 765a482..95e94ae 100644
> --- a/fs/ext4/extents.c
> +++ b/fs/ext4/extents.c
> @@ -3185,7 +3185,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
>  {
>        struct ext4_ext_path *path = NULL;
>        struct ext4_extent_header *eh;
> -       struct ext4_extent newex, *ex;
> +       struct ext4_extent newex, *ex, *last_ex;
>        ext4_fsblk_t newblock;
>        int err = 0, depth, ret, cache_type;
>        unsigned int allocated = 0;
> @@ -3366,6 +3366,19 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
>                                        EXT4_STATE_DIO_UNWRITTEN;;
>                }
>        }
> +
> +       if (unlikely(EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL)) {
> +                if (eh->eh_entries) {
> +                       last_ex = EXT_LAST_EXTENT(eh);
> +                       if (iblock + ar.len > le32_to_cpu(last_ex->ee_block)
> +                                           + ext4_ext_get_actual_len(last_ex))
> +                               EXT4_I(inode)->i_flags &= ~EXT4_EOFBLOCKS_FL;
> +               } else {
> +                       WARN_ON(eh->eh_entries == 0);
> +                       ext4_error(inode->i_sb, __func__,
> +                               "inode#%lu, eh->eh_entries = 0!", inode->i_ino);
> +               }
> +       }
>        err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
>        if (err) {
>                /* free data blocks we just allocated */
> @@ -3499,6 +3512,13 @@ static void ext4_falloc_update_inode(struct inode *inode,
>                        i_size_write(inode, new_size);
>                if (new_size > EXT4_I(inode)->i_disksize)
>                        ext4_update_i_disksize(inode, new_size);
> +       } else {
> +               /*
> +                * Mark that we allocate beyond EOF so the subsequent truncate
> +                * can proceed even if the new size is the same as i_size.
> +                */
> +               if (new_size > i_size_read(inode))
> +                       EXT4_I(inode)->i_flags |= EXT4_EOFBLOCKS_FL;
>        }
>
>  }
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index cbf56da..f5802e9 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -4429,6 +4429,8 @@ void ext4_truncate(struct inode *inode)
>        if (!ext4_can_truncate(inode))
>                return;
>
> +       EXT4_I(inode)->i_flags &= ~EXT4_EOFBLOCKS_FL;
> +
>        if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC))
>                ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE;
>
> @@ -5284,7 +5286,9 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
>        }
>
>        if (S_ISREG(inode->i_mode) &&
> -           attr->ia_valid & ATTR_SIZE && attr->ia_size < inode->i_size) {
> +           attr->ia_valid & ATTR_SIZE &&
> +           (attr->ia_size < inode->i_size ||
> +            (EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL))) {
>                handle_t *handle;
>
>                handle = ext4_journal_start(inode, 3);
> @@ -5315,6 +5319,9 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
>                                goto err_out;
>                        }
>                }
> +               /* ext4_truncate will clear the flag */
> +               if ((EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL))
> +                       ext4_truncate(inode);
>        }
>
>        rc = inode_setattr(inode, attr);
>
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] Add flag to files with blocks intentionally past EOF
  2010-01-20  9:03 ` Aneesh Kumar K. V
@ 2010-01-22  0:32   ` Andreas Dilger
  2010-01-22 17:40     ` Eric Sandeen
  0 siblings, 1 reply; 11+ messages in thread
From: Andreas Dilger @ 2010-01-22  0:32 UTC (permalink / raw)
  To: Aneesh Kumar K. V; +Cc: Eric Sandeen, ext4 development

On 2010-01-20, at 02:03, Aneesh Kumar K. V wrote:
>> @@ -4741,8 +4743,8 @@ void ext4_get_inode_flags(struct  
>> ext4_inode_info *ei)
>> {
>> 	unsigned int flags = ei->vfs_inode.i_flags;
>>
>> -	ei->i_flags &= ~(EXT4_SYNC_FL|EXT4_APPEND_FL|
>> -			EXT4_IMMUTABLE_FL|EXT4_NOATIME_FL|EXT4_DIRSYNC_FL);
>> +	ei->i_flags &= ~(EXT4_SYNC_FL|EXT4_APPEND_FL|EXT4_IMMUTABLE_FL|
>> +
>> EXT4_NOATIME_FL|EXT4_DIRSYNC_FL|EXT4_EOFBLOCKS_FL);
>
>
> Do we really need to allow the get and set of this flag. IMHO a  
> truncate
> should be the only API and the flag should be remove implicitly for  
> that.

Since this flag is set on disk in the inode flags, it makes sense to  
allow clearing it via "chattr", just like "chattr +e" will remap a  
file to extent format.  The risk with setting it via truncate() is  
that this is racy with some other process writing to the file.  If we  
allow setting it via "chattr" this can be done in a non-racy manner,  
by locking the inode and dropping only the blocks beyond EOF.

>> @@ -92,6 +92,16 @@ long ext4_ioctl(struct file *filp, unsigned int  
>> cmd, unsigned long arg)
>> 			flags &= ~EXT4_EXTENTS_FL;
>> 		}
>>
>> +		if (flags & EXT4_EOFBLOCKS_FL) {
>> +			/* we don't support adding EOFBLOCKS flag */
>> +			if (!(oldflags & EXT4_EOFBLOCKS_FL)) {
>> +				err = -EOPNOTSUPP;
>> +				goto flags_out;
>> +			}
>> +		} else if (oldflags & EXT4_EOFBLOCKS_FL)
>> +			/* free the space reserved with fallocate KEEPSIZE */
>> +			vmtruncate(inode, inode->i_size);
>> +
>> 		handle = ext4_journal_start(inode, 1);
>> 		if (IS_ERR(handle)) {
>> 			err = PTR_ERR(handle);
>
> If we remove ext4_ioctl support i guess that patch can become much
> simpler.


Sure it will be simpler, but less useful.  The point of exposing this  
flag via lsattr is to allow userspace to determine which files are  
holding blocks beyond EOF, so that if the filesystem is getting too  
full it is possible to run e.g. "lsattr -R" and find files with this  
EOF attribute and truncate them.  Without keeping at least the  
EOFBLOCKS flag in USER_VISIBLE this is impossible.

Cheers, Andreas
--
Andreas Dilger
Sr. Staff Engineer, Lustre Group
Sun Microsystems of Canada, Inc.


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] Add flag to files with blocks intentionally past EOF
  2010-01-22  0:32   ` Andreas Dilger
@ 2010-01-22 17:40     ` Eric Sandeen
  0 siblings, 0 replies; 11+ messages in thread
From: Eric Sandeen @ 2010-01-22 17:40 UTC (permalink / raw)
  To: Andreas Dilger; +Cc: Aneesh Kumar K. V, ext4 development

Andreas Dilger wrote:
> On 2010-01-20, at 02:03, Aneesh Kumar K. V wrote:

...

>> If we remove ext4_ioctl support i guess that patch can become much
>> simpler.
> 
> 
> Sure it will be simpler, but less useful.  The point of exposing this
> flag via lsattr is to allow userspace to determine which files are
> holding blocks beyond EOF, so that if the filesystem is getting too full
> it is possible to run e.g. "lsattr -R" and find files with this EOF
> attribute and truncate them.  Without keeping at least the EOFBLOCKS
> flag in USER_VISIBLE this is impossible.

Well, I submitted an updated patch without it.  We can add another when
we sort out what we really want with the flags, but fixing the
corruption e2fsck finds seems paramount.

I think the case of stray files w/ blocks past EOF is probably pretty
rare - but I guess I agree, at least seeing the flags would be nice.

But let's handle that separately...

-Eric

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH V2] Add flag to files with blocks intentionally past EOF
  2010-01-21 18:00 ` [PATCH V2] " Eric Sandeen
  2010-01-21 20:32   ` Jiaying Zhang
@ 2010-02-24 16:26   ` tytso
  1 sibling, 0 replies; 11+ messages in thread
From: tytso @ 2010-02-24 16:26 UTC (permalink / raw)
  To: Eric Sandeen; +Cc: ext4 development, Jiaying Zhang

On Thu, Jan 21, 2010 at 12:00:30PM -0600, Eric Sandeen wrote:
> From: Jiaying Zhang <jiayingz@google.com>
> 
> fallocate() may potentially instantiate blocks past EOF, depending
> on the flags used when it is called.
> 
> e2fsck currently has a test for blocks past i_size, and it
> sometimes trips up - noticeably on xfstests 013 which runs fsstress.
> 
> This patch from Jiayang does fix it up - it (along with
> e2fsprogs updates and other patches recently from Aneesh) has
> survived many fsstress runs in a row.
> 
> 
> (Eric Sandeen: removed ioctl interface and minor cleanups)
> 
> Signed-off-by: Eric Sandeen <sandeen@redhat.com>

Applied to the maint branch, although I added back the ability to
truncate the fallocated blocks via chattr.

						- Ted

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2010-02-24 16:26 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-01-19 21:45 [PATCH] Add flag to files with blocks intentionally past EOF Eric Sandeen
2010-01-20  0:46 ` Mingming
2010-01-20  3:58   ` Eric Sandeen
2010-01-20  8:37     ` Andreas Dilger
2010-01-20  9:15     ` Aneesh Kumar K. V
2010-01-20  9:03 ` Aneesh Kumar K. V
2010-01-22  0:32   ` Andreas Dilger
2010-01-22 17:40     ` Eric Sandeen
2010-01-21 18:00 ` [PATCH V2] " Eric Sandeen
2010-01-21 20:32   ` Jiaying Zhang
2010-02-24 16:26   ` tytso

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.