* [PATCH 1/5] f2fs: remove f2fs_bug_on in terms of max_depth
@ 2015-12-31 23:49 Jaegeuk Kim
2015-12-31 23:49 ` [PATCH 2/5] f2fs: write pending bios when cp_error is set Jaegeuk Kim
` (3 more replies)
0 siblings, 4 replies; 5+ messages in thread
From: Jaegeuk Kim @ 2015-12-31 23:49 UTC (permalink / raw)
To: linux-kernel, linux-fsdevel, linux-f2fs-devel; +Cc: Jaegeuk Kim
There is no report on this bug_on case, but if malicious attacker changed this
field intentionally, we can just reset it as a MAX value.
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
fs/f2fs/dir.c | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 3da5826..29bb8dd 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -172,8 +172,6 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
namehash = f2fs_dentry_hash(&name);
- f2fs_bug_on(F2FS_I_SB(dir), level > MAX_DIR_HASH_DEPTH);
-
nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level);
nblock = bucket_blocks(level);
@@ -238,6 +236,14 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
goto out;
max_depth = F2FS_I(dir)->i_current_depth;
+ if (unlikely(max_depth > MAX_DIR_HASH_DEPTH)) {
+ f2fs_msg(F2FS_I_SB(dir)->sb, KERN_WARNING,
+ "Corrupted max_depth of %lu: %u",
+ dir->i_ino, max_depth);
+ max_depth = MAX_DIR_HASH_DEPTH;
+ F2FS_I(dir)->i_current_depth = max_depth;
+ mark_inode_dirty(dir);
+ }
for (level = 0; level < max_depth; level++) {
de = find_in_level(dir, level, &fname, res_page);
--
2.6.3
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH 2/5] f2fs: write pending bios when cp_error is set
2015-12-31 23:49 [PATCH 1/5] f2fs: remove f2fs_bug_on in terms of max_depth Jaegeuk Kim
@ 2015-12-31 23:49 ` Jaegeuk Kim
2015-12-31 23:49 ` [PATCH 3/5] f2fs: use IPU for fdatasync Jaegeuk Kim
` (2 subsequent siblings)
3 siblings, 0 replies; 5+ messages in thread
From: Jaegeuk Kim @ 2015-12-31 23:49 UTC (permalink / raw)
To: linux-kernel, linux-fsdevel, linux-f2fs-devel; +Cc: Jaegeuk Kim
When testing ioc_shutdown, put_super is able to be hanged by waiting for
writebacking pages as follows.
INFO: task umount:2723 blocked for more than 120 seconds.
Tainted: G O 4.4.0-rc3+ #8
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
umount D ffff88000859f9d8 0 2723 2110 0x00000000
ffff88000859f9d8 0000000000000000 0000000000000000 ffffffff81e11540
ffff880078c225c0 ffff8800085a0000 ffff88007fc17440 7fffffffffffffff
ffffffff818239f0 ffff88000859fb48 ffff88000859f9f0 ffffffff8182310c
Call Trace:
[<ffffffff818239f0>] ? bit_wait+0x50/0x50
[<ffffffff8182310c>] schedule+0x3c/0x90
[<ffffffff81827fb9>] schedule_timeout+0x2d9/0x430
[<ffffffff810e0f8f>] ? mark_held_locks+0x6f/0xa0
[<ffffffff8111614d>] ? ktime_get+0x7d/0x140
[<ffffffff818239f0>] ? bit_wait+0x50/0x50
[<ffffffff8106a655>] ? kvm_clock_get_cycles+0x25/0x30
[<ffffffff8111617c>] ? ktime_get+0xac/0x140
[<ffffffff818239f0>] ? bit_wait+0x50/0x50
[<ffffffff81822564>] io_schedule_timeout+0xa4/0x110
[<ffffffff81823a25>] bit_wait_io+0x35/0x50
[<ffffffff818235bd>] __wait_on_bit+0x5d/0x90
[<ffffffff811b9e8b>] wait_on_page_bit+0xcb/0xf0
[<ffffffff810d5f90>] ? autoremove_wake_function+0x40/0x40
[<ffffffff811cf84c>] truncate_inode_pages_range+0x4bc/0x840
[<ffffffff811cfc3d>] truncate_inode_pages_final+0x4d/0x60
[<ffffffffc023ced5>] f2fs_evict_inode+0x75/0x400 [f2fs]
[<ffffffff812639bc>] evict+0xbc/0x190
[<ffffffff81263d19>] iput+0x229/0x2c0
[<ffffffffc0241885>] f2fs_put_super+0x105/0x1a0 [f2fs]
[<ffffffff8124756a>] generic_shutdown_super+0x6a/0xf0
[<ffffffff812478f7>] kill_block_super+0x27/0x70
[<ffffffffc0241290>] kill_f2fs_super+0x20/0x30 [f2fs]
[<ffffffff81247b03>] deactivate_locked_super+0x43/0x70
[<ffffffff81247f4c>] deactivate_super+0x5c/0x60
[<ffffffff81268d2f>] cleanup_mnt+0x3f/0x90
[<ffffffff81268dc2>] __cleanup_mnt+0x12/0x20
[<ffffffff810ac463>] task_work_run+0x73/0xa0
[<ffffffff810032ac>] exit_to_usermode_loop+0xcc/0xd0
[<ffffffff81003e7c>] syscall_return_slowpath+0xcc/0xe0
[<ffffffff81829ea2>] int_ret_from_sys_call+0x25/0x9f
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
fs/f2fs/checkpoint.c | 2 +-
fs/f2fs/data.c | 2 +-
fs/f2fs/node.c | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 6b89ac6..5dbafd5 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -237,7 +237,7 @@ static int f2fs_write_meta_page(struct page *page,
dec_page_count(sbi, F2FS_DIRTY_META);
unlock_page(page);
- if (wbc->for_reclaim)
+ if (wbc->for_reclaim || unlikely(f2fs_cp_error(sbi)))
f2fs_submit_merged_bio(sbi, META, WRITE);
return 0;
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 14b40a9..4851e84 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1179,7 +1179,7 @@ out:
unlock_page(page);
if (need_balance_fs)
f2fs_balance_fs(sbi);
- if (wbc->for_reclaim) {
+ if (wbc->for_reclaim || unlikely(f2fs_cp_error(sbi))) {
f2fs_submit_merged_bio(sbi, DATA, WRITE);
remove_dirty_inode(inode);
}
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 94d9753..669c44e 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1368,7 +1368,7 @@ static int f2fs_write_node_page(struct page *page,
up_read(&sbi->node_write);
unlock_page(page);
- if (wbc->for_reclaim)
+ if (wbc->for_reclaim || unlikely(f2fs_cp_error(sbi)))
f2fs_submit_merged_bio(sbi, NODE, WRITE);
return 0;
--
2.6.3
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH 3/5] f2fs: use IPU for fdatasync
2015-12-31 23:49 [PATCH 1/5] f2fs: remove f2fs_bug_on in terms of max_depth Jaegeuk Kim
2015-12-31 23:49 ` [PATCH 2/5] f2fs: write pending bios when cp_error is set Jaegeuk Kim
@ 2015-12-31 23:49 ` Jaegeuk Kim
2015-12-31 23:49 ` [PATCH 4/5] f2fs: monitor zombie_tree count Jaegeuk Kim
2015-12-31 23:49 ` [PATCH 5/5] f2fs: introduce zombie list for fast shrinking extent trees Jaegeuk Kim
3 siblings, 0 replies; 5+ messages in thread
From: Jaegeuk Kim @ 2015-12-31 23:49 UTC (permalink / raw)
To: linux-kernel, linux-fsdevel, linux-f2fs-devel; +Cc: Jaegeuk Kim
This patch fixes missing IPU condition when fdatasync is called.
With this patch, fdatasync is able to avoid additional node writes for recovery.
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
fs/f2fs/file.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index b04ab40..e3d32f6 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -202,7 +202,7 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
trace_f2fs_sync_file_enter(inode);
/* if fdatasync is triggered, let's do in-place-update */
- if (get_dirty_pages(inode) <= SM_I(sbi)->min_fsync_blocks)
+ if (datasync || get_dirty_pages(inode) <= SM_I(sbi)->min_fsync_blocks)
set_inode_flag(fi, FI_NEED_IPU);
ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
clear_inode_flag(fi, FI_NEED_IPU);
--
2.6.3
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH 4/5] f2fs: monitor zombie_tree count
2015-12-31 23:49 [PATCH 1/5] f2fs: remove f2fs_bug_on in terms of max_depth Jaegeuk Kim
2015-12-31 23:49 ` [PATCH 2/5] f2fs: write pending bios when cp_error is set Jaegeuk Kim
2015-12-31 23:49 ` [PATCH 3/5] f2fs: use IPU for fdatasync Jaegeuk Kim
@ 2015-12-31 23:49 ` Jaegeuk Kim
2015-12-31 23:49 ` [PATCH 5/5] f2fs: introduce zombie list for fast shrinking extent trees Jaegeuk Kim
3 siblings, 0 replies; 5+ messages in thread
From: Jaegeuk Kim @ 2015-12-31 23:49 UTC (permalink / raw)
To: linux-kernel, linux-fsdevel, linux-f2fs-devel; +Cc: Jaegeuk Kim
This patch adds an entry to show the number of zombie extent_tree.
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
fs/f2fs/debug.c | 5 +++--
fs/f2fs/f2fs.h | 2 +-
2 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index ed5dfcc..b73e8e1 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -39,6 +39,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->hit_total = si->hit_largest + si->hit_cached + si->hit_rbtree;
si->total_ext = atomic64_read(&sbi->total_hit_ext);
si->ext_tree = atomic_read(&sbi->total_ext_tree);
+ si->zombie_tree = atomic_read(&sbi->total_zombie_tree);
si->ext_node = atomic_read(&sbi->total_ext_node);
si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES);
si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS);
@@ -292,8 +293,8 @@ static int stat_show(struct seq_file *s, void *v)
!si->total_ext ? 0 :
div64_u64(si->hit_total * 100, si->total_ext),
si->hit_total, si->total_ext);
- seq_printf(s, " - Inner Struct Count: tree: %d, node: %d\n",
- si->ext_tree, si->ext_node);
+ seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n",
+ si->ext_tree, si->zombie_tree, si->ext_node);
seq_puts(s, "\nBalancing F2FS Async:\n");
seq_printf(s, " - inmem: %4d, wb: %4d\n",
si->inmem_pages, si->wb_pages);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index a339508..d81bf5a 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1890,7 +1890,7 @@ struct f2fs_stat_info {
int main_area_segs, main_area_sections, main_area_zones;
unsigned long long hit_largest, hit_cached, hit_rbtree;
unsigned long long hit_total, total_ext;
- int ext_tree, ext_node;
+ int ext_tree, zombie_tree, ext_node;
int ndirty_node, ndirty_meta;
int ndirty_dent, ndirty_dirs, ndirty_data, ndirty_files;
int nats, dirty_nats, sits, dirty_sits, fnids;
--
2.6.3
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH 5/5] f2fs: introduce zombie list for fast shrinking extent trees
2015-12-31 23:49 [PATCH 1/5] f2fs: remove f2fs_bug_on in terms of max_depth Jaegeuk Kim
` (2 preceding siblings ...)
2015-12-31 23:49 ` [PATCH 4/5] f2fs: monitor zombie_tree count Jaegeuk Kim
@ 2015-12-31 23:49 ` Jaegeuk Kim
3 siblings, 0 replies; 5+ messages in thread
From: Jaegeuk Kim @ 2015-12-31 23:49 UTC (permalink / raw)
To: linux-kernel, linux-fsdevel, linux-f2fs-devel; +Cc: Jaegeuk Kim
This patch removes refcount, and instead, adds zombie_list to shrink directly
without radix tree traverse.
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
fs/f2fs/extent_cache.c | 49 +++++++++++++++++++++----------------------------
fs/f2fs/f2fs.h | 3 ++-
2 files changed, 23 insertions(+), 29 deletions(-)
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index b37184f..4dee2be 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -68,13 +68,13 @@ static struct extent_tree *__grab_extent_tree(struct inode *inode)
et->root = RB_ROOT;
et->cached_en = NULL;
rwlock_init(&et->lock);
- atomic_set(&et->refcount, 0);
+ INIT_LIST_HEAD(&et->list);
et->count = 0;
atomic_inc(&sbi->total_ext_tree);
} else {
atomic_dec(&sbi->total_zombie_tree);
+ list_del_init(&et->list);
}
- atomic_inc(&et->refcount);
up_write(&sbi->extent_tree_lock);
/* never died until evict_inode */
@@ -551,9 +551,9 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
{
struct extent_tree *treevec[EXT_TREE_VEC_SIZE];
+ struct extent_tree *et, *next;
struct extent_node *en, *tmp;
unsigned long ino = F2FS_ROOT_INO(sbi);
- struct radix_tree_root *root = &sbi->extent_tree_root;
unsigned int found;
unsigned int node_cnt = 0, tree_cnt = 0;
int remained;
@@ -569,29 +569,20 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
goto out;
/* 1. remove unreferenced extent tree */
- while ((found = radix_tree_gang_lookup(root,
- (void **)treevec, ino, EXT_TREE_VEC_SIZE))) {
- unsigned i;
-
- ino = treevec[found - 1]->ino + 1;
- for (i = 0; i < found; i++) {
- struct extent_tree *et = treevec[i];
-
- if (!atomic_read(&et->refcount)) {
- write_lock(&et->lock);
- node_cnt += __free_extent_tree(sbi, et, true);
- write_unlock(&et->lock);
+ list_for_each_entry_safe(et, next, &sbi->zombie_list, list) {
+ write_lock(&et->lock);
+ node_cnt += __free_extent_tree(sbi, et, true);
+ write_unlock(&et->lock);
- radix_tree_delete(root, et->ino);
- kmem_cache_free(extent_tree_slab, et);
- atomic_dec(&sbi->total_ext_tree);
- atomic_dec(&sbi->total_zombie_tree);
- tree_cnt++;
+ list_del_init(&et->list);
+ radix_tree_delete(&sbi->extent_tree_root, et->ino);
+ kmem_cache_free(extent_tree_slab, et);
+ atomic_dec(&sbi->total_ext_tree);
+ atomic_dec(&sbi->total_zombie_tree);
+ tree_cnt++;
- if (node_cnt + tree_cnt >= nr_shrink)
- goto unlock_out;
- }
- }
+ if (node_cnt + tree_cnt >= nr_shrink)
+ goto unlock_out;
}
up_write(&sbi->extent_tree_lock);
@@ -619,7 +610,7 @@ free_node:
*/
ino = F2FS_ROOT_INO(sbi);
- while ((found = radix_tree_gang_lookup(root,
+ while ((found = radix_tree_gang_lookup(&sbi->extent_tree_root,
(void **)treevec, ino, EXT_TREE_VEC_SIZE))) {
unsigned i;
@@ -670,8 +661,10 @@ void f2fs_destroy_extent_tree(struct inode *inode)
return;
if (inode->i_nlink && !is_bad_inode(inode) && et->count) {
- atomic_dec(&et->refcount);
+ down_write(&sbi->extent_tree_lock);
+ list_add_tail(&et->list, &sbi->zombie_list);
atomic_inc(&sbi->total_zombie_tree);
+ up_write(&sbi->extent_tree_lock);
return;
}
@@ -680,8 +673,7 @@ void f2fs_destroy_extent_tree(struct inode *inode)
/* delete extent tree entry in radix tree */
down_write(&sbi->extent_tree_lock);
- atomic_dec(&et->refcount);
- f2fs_bug_on(sbi, atomic_read(&et->refcount) || et->count);
+ f2fs_bug_on(sbi, et->count);
radix_tree_delete(&sbi->extent_tree_root, inode->i_ino);
kmem_cache_free(extent_tree_slab, et);
atomic_dec(&sbi->total_ext_tree);
@@ -737,6 +729,7 @@ void init_extent_cache_info(struct f2fs_sb_info *sbi)
INIT_LIST_HEAD(&sbi->extent_list);
spin_lock_init(&sbi->extent_lock);
atomic_set(&sbi->total_ext_tree, 0);
+ INIT_LIST_HEAD(&sbi->zombie_list);
atomic_set(&sbi->total_zombie_tree, 0);
atomic_set(&sbi->total_ext_node, 0);
}
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index d81bf5a..e2990c9 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -359,8 +359,8 @@ struct extent_tree {
struct rb_root root; /* root of extent info rb-tree */
struct extent_node *cached_en; /* recently accessed extent node */
struct extent_info largest; /* largested extent info */
+ struct list_head list; /* to be used by sbi->zombie_list */
rwlock_t lock; /* protect extent info rb-tree */
- atomic_t refcount; /* reference count of rb-tree */
unsigned int count; /* # of extent node in rb-tree*/
};
@@ -764,6 +764,7 @@ struct f2fs_sb_info {
struct list_head extent_list; /* lru list for shrinker */
spinlock_t extent_lock; /* locking extent lru list */
atomic_t total_ext_tree; /* extent tree count */
+ struct list_head zombie_list; /* extent zombie tree list */
atomic_t total_zombie_tree; /* extent zombie tree count */
atomic_t total_ext_node; /* extent info count */
--
2.6.3
^ permalink raw reply related [flat|nested] 5+ messages in thread
end of thread, other threads:[~2015-12-31 23:51 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-12-31 23:49 [PATCH 1/5] f2fs: remove f2fs_bug_on in terms of max_depth Jaegeuk Kim
2015-12-31 23:49 ` [PATCH 2/5] f2fs: write pending bios when cp_error is set Jaegeuk Kim
2015-12-31 23:49 ` [PATCH 3/5] f2fs: use IPU for fdatasync Jaegeuk Kim
2015-12-31 23:49 ` [PATCH 4/5] f2fs: monitor zombie_tree count Jaegeuk Kim
2015-12-31 23:49 ` [PATCH 5/5] f2fs: introduce zombie list for fast shrinking extent trees Jaegeuk Kim
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).