From: Chao Yu f2fs_inode_info.flags is unsigned long variable, it has 32 bits in 32bit architecture, since we introduced FI_MMAP_FILE flag when we support data compression, we may access memory cross the border of .flags field, corrupting .i_sem field, result in below deadlock. To fix this issue, let's expand .flags as an array to grab enough space to store new flags. Call Trace: __schedule+0x8d0/0x13fc ? mark_held_locks+0xac/0x100 schedule+0xcc/0x260 rwsem_down_write_slowpath+0x3ab/0x65d down_write+0xc7/0xe0 f2fs_drop_nlink+0x3d/0x600 [f2fs] f2fs_delete_inline_entry+0x300/0x440 [f2fs] f2fs_delete_entry+0x3a1/0x7f0 [f2fs] f2fs_unlink+0x500/0x790 [f2fs] vfs_unlink+0x211/0x490 do_unlinkat+0x483/0x520 sys_unlink+0x4a/0x70 do_fast_syscall_32+0x12b/0x683 entry_SYSENTER_32+0xaa/0x102 Fixes: 4c8ff7095bef ("f2fs: support data compression") Signed-off-by: Chao Yu --- - change .flags as an array to expand scalability - fix wrong condition pointed out by Ondřej Jirman fs/f2fs/f2fs.h | 118 ++++++++++++++++++++++++++---------------------- fs/f2fs/inode.c | 4 +- 2 files changed, 68 insertions(+), 54 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index fcafa68212eb..83496fd67a01 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -682,6 +682,47 @@ enum { MAX_GC_FAILURE }; +/* used for f2fs_inode_info->flags */ +enum { + FI_NEW_INODE, /* indicate newly allocated inode */ + FI_DIRTY_INODE, /* indicate inode is dirty or not */ + FI_AUTO_RECOVER, /* indicate inode is recoverable */ + FI_DIRTY_DIR, /* indicate directory has dirty pages */ + FI_INC_LINK, /* need to increment i_nlink */ + FI_ACL_MODE, /* indicate acl mode */ + FI_NO_ALLOC, /* should not allocate any blocks */ + FI_FREE_NID, /* free allocated nide */ + FI_NO_EXTENT, /* not to use the extent cache */ + FI_INLINE_XATTR, /* used for inline xattr */ + FI_INLINE_DATA, /* used for inline data*/ + FI_INLINE_DENTRY, /* used for inline dentry */ + FI_APPEND_WRITE, /* inode has appended data */ + FI_UPDATE_WRITE, /* inode has in-place-update data */ + FI_NEED_IPU, /* used for ipu per file */ + FI_ATOMIC_FILE, /* indicate atomic file */ + FI_ATOMIC_COMMIT, /* indicate the state of atomical committing */ + FI_VOLATILE_FILE, /* indicate volatile file */ + FI_FIRST_BLOCK_WRITTEN, /* indicate #0 data block was written */ + FI_DROP_CACHE, /* drop dirty page cache */ + FI_DATA_EXIST, /* indicate data exists */ + FI_INLINE_DOTS, /* indicate inline dot dentries */ + FI_DO_DEFRAG, /* indicate defragment is running */ + FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */ + FI_NO_PREALLOC, /* indicate skipped preallocated blocks */ + FI_HOT_DATA, /* indicate file is hot */ + FI_EXTRA_ATTR, /* indicate file has extra attribute */ + FI_PROJ_INHERIT, /* indicate file inherits projectid */ + FI_PIN_FILE, /* indicate file should not be gced */ + FI_ATOMIC_REVOKE_REQUEST, /* request to drop atomic data */ + FI_VERITY_IN_PROGRESS, /* building fs-verity Merkle tree */ + FI_COMPRESSED_FILE, /* indicate file's data can be compressed */ + FI_MMAP_FILE, /* indicate file was mmapped */ + FI_MAX, /* max flag, never be used */ +}; + +/* f2fs_inode_info.flags array size */ +#define FI_ARRAY_SIZE (BIT_WORD(FI_MAX) + 1) + struct f2fs_inode_info { struct inode vfs_inode; /* serve a vfs inode */ unsigned long i_flags; /* keep an inode flags for ioctl */ @@ -694,7 +735,7 @@ struct f2fs_inode_info { umode_t i_acl_mode; /* keep file acl mode temporarily */ /* Use below internally in f2fs*/ - unsigned long flags; /* use to pass per-file flags */ + unsigned long flags[FI_ARRAY_SIZE]; /* use to pass per-file flags */ struct rw_semaphore i_sem; /* protect fi info */ atomic_t dirty_pages; /* # of dirty pages */ f2fs_hash_t chash; /* hash value of given file name */ @@ -2531,45 +2572,8 @@ static inline __u32 f2fs_mask_flags(umode_t mode, __u32 flags) return flags & F2FS_OTHER_FLMASK; } -/* used for f2fs_inode_info->flags */ -enum { - FI_NEW_INODE, /* indicate newly allocated inode */ - FI_DIRTY_INODE, /* indicate inode is dirty or not */ - FI_AUTO_RECOVER, /* indicate inode is recoverable */ - FI_DIRTY_DIR, /* indicate directory has dirty pages */ - FI_INC_LINK, /* need to increment i_nlink */ - FI_ACL_MODE, /* indicate acl mode */ - FI_NO_ALLOC, /* should not allocate any blocks */ - FI_FREE_NID, /* free allocated nide */ - FI_NO_EXTENT, /* not to use the extent cache */ - FI_INLINE_XATTR, /* used for inline xattr */ - FI_INLINE_DATA, /* used for inline data*/ - FI_INLINE_DENTRY, /* used for inline dentry */ - FI_APPEND_WRITE, /* inode has appended data */ - FI_UPDATE_WRITE, /* inode has in-place-update data */ - FI_NEED_IPU, /* used for ipu per file */ - FI_ATOMIC_FILE, /* indicate atomic file */ - FI_ATOMIC_COMMIT, /* indicate the state of atomical committing */ - FI_VOLATILE_FILE, /* indicate volatile file */ - FI_FIRST_BLOCK_WRITTEN, /* indicate #0 data block was written */ - FI_DROP_CACHE, /* drop dirty page cache */ - FI_DATA_EXIST, /* indicate data exists */ - FI_INLINE_DOTS, /* indicate inline dot dentries */ - FI_DO_DEFRAG, /* indicate defragment is running */ - FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */ - FI_NO_PREALLOC, /* indicate skipped preallocated blocks */ - FI_HOT_DATA, /* indicate file is hot */ - FI_EXTRA_ATTR, /* indicate file has extra attribute */ - FI_PROJ_INHERIT, /* indicate file inherits projectid */ - FI_PIN_FILE, /* indicate file should not be gced */ - FI_ATOMIC_REVOKE_REQUEST, /* request to drop atomic data */ - FI_VERITY_IN_PROGRESS, /* building fs-verity Merkle tree */ - FI_COMPRESSED_FILE, /* indicate file's data can be compressed */ - FI_MMAP_FILE, /* indicate file was mmapped */ -}; - static inline void __mark_inode_dirty_flag(struct inode *inode, - int flag, bool set) + unsigned long long flag, bool set) { switch (flag) { case FI_INLINE_XATTR: @@ -2586,22 +2590,32 @@ static inline void __mark_inode_dirty_flag(struct inode *inode, } } +static inline void __set_inode_flag(struct inode *inode, int flag) +{ + if (!test_bit(flag % BITS_PER_LONG, + &F2FS_I(inode)->flags[BIT_WORD(flag)])) + set_bit(flag % BITS_PER_LONG, + &F2FS_I(inode)->flags[BIT_WORD(flag)]); +} + static inline void set_inode_flag(struct inode *inode, int flag) { - if (!test_bit(flag, &F2FS_I(inode)->flags)) - set_bit(flag, &F2FS_I(inode)->flags); + __set_inode_flag(inode, flag); __mark_inode_dirty_flag(inode, flag, true); } static inline int is_inode_flag_set(struct inode *inode, int flag) { - return test_bit(flag, &F2FS_I(inode)->flags); + return test_bit(flag % BITS_PER_LONG, + &F2FS_I(inode)->flags[BIT_WORD(flag)]); } static inline void clear_inode_flag(struct inode *inode, int flag) { - if (test_bit(flag, &F2FS_I(inode)->flags)) - clear_bit(flag, &F2FS_I(inode)->flags); + if (test_bit(flag % BITS_PER_LONG, + &F2FS_I(inode)->flags[BIT_WORD(flag)])) + clear_bit(flag % BITS_PER_LONG, + &F2FS_I(inode)->flags[BIT_WORD(flag)]); __mark_inode_dirty_flag(inode, flag, false); } @@ -2689,22 +2703,20 @@ static inline void f2fs_i_pino_write(struct inode *inode, nid_t pino) static inline void get_inline_info(struct inode *inode, struct f2fs_inode *ri) { - struct f2fs_inode_info *fi = F2FS_I(inode); - if (ri->i_inline & F2FS_INLINE_XATTR) - set_bit(FI_INLINE_XATTR, &fi->flags); + __set_inode_flag(inode, FI_INLINE_XATTR); if (ri->i_inline & F2FS_INLINE_DATA) - set_bit(FI_INLINE_DATA, &fi->flags); + __set_inode_flag(inode, FI_INLINE_DATA); if (ri->i_inline & F2FS_INLINE_DENTRY) - set_bit(FI_INLINE_DENTRY, &fi->flags); + __set_inode_flag(inode, FI_INLINE_DENTRY); if (ri->i_inline & F2FS_DATA_EXIST) - set_bit(FI_DATA_EXIST, &fi->flags); + __set_inode_flag(inode, FI_DATA_EXIST); if (ri->i_inline & F2FS_INLINE_DOTS) - set_bit(FI_INLINE_DOTS, &fi->flags); + __set_inode_flag(inode, FI_INLINE_DOTS); if (ri->i_inline & F2FS_EXTRA_ATTR) - set_bit(FI_EXTRA_ATTR, &fi->flags); + __set_inode_flag(inode, FI_EXTRA_ATTR); if (ri->i_inline & F2FS_PIN_FILE) - set_bit(FI_PIN_FILE, &fi->flags); + __set_inode_flag(inode, FI_PIN_FILE); } static inline void set_raw_inline(struct inode *inode, struct f2fs_inode *ri) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 44e08bf2e2b4..5e441fdd4858 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -328,6 +328,7 @@ static int do_read_inode(struct inode *inode) struct f2fs_inode *ri; projid_t i_projid; int err; + int i; /* Check if ino is within scope */ if (f2fs_check_nid_range(sbi, inode->i_ino)) @@ -362,7 +363,8 @@ static int do_read_inode(struct inode *inode) fi->i_flags = le32_to_cpu(ri->i_flags); if (S_ISREG(inode->i_mode)) fi->i_flags &= ~F2FS_PROJINHERIT_FL; - fi->flags = 0; + for (i = 0; i < FI_ARRAY_SIZE; i++) + fi->flags[i] = 0; fi->i_advise = ri->i_advise; fi->i_pino = le32_to_cpu(ri->i_pino); fi->i_dir_level = ri->i_dir_level; -- 2.22.0