linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/5] f2fs: remove f2fs_bug_on in terms of max_depth
@ 2015-12-31 23:49 Jaegeuk Kim
  2015-12-31 23:49 ` [PATCH 2/5] f2fs: write pending bios when cp_error is set Jaegeuk Kim
                   ` (3 more replies)
  0 siblings, 4 replies; 5+ messages in thread
From: Jaegeuk Kim @ 2015-12-31 23:49 UTC (permalink / raw)
  To: linux-kernel, linux-fsdevel, linux-f2fs-devel; +Cc: Jaegeuk Kim

There is no report on this bug_on case, but if malicious attacker changed this
field intentionally, we can just reset it as a MAX value.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/dir.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 3da5826..29bb8dd 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -172,8 +172,6 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
 
 	namehash = f2fs_dentry_hash(&name);
 
-	f2fs_bug_on(F2FS_I_SB(dir), level > MAX_DIR_HASH_DEPTH);
-
 	nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level);
 	nblock = bucket_blocks(level);
 
@@ -238,6 +236,14 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
 		goto out;
 
 	max_depth = F2FS_I(dir)->i_current_depth;
+	if (unlikely(max_depth > MAX_DIR_HASH_DEPTH)) {
+		f2fs_msg(F2FS_I_SB(dir)->sb, KERN_WARNING,
+				"Corrupted max_depth of %lu: %u",
+				dir->i_ino, max_depth);
+		max_depth = MAX_DIR_HASH_DEPTH;
+		F2FS_I(dir)->i_current_depth = max_depth;
+		mark_inode_dirty(dir);
+	}
 
 	for (level = 0; level < max_depth; level++) {
 		de = find_in_level(dir, level, &fname, res_page);
-- 
2.6.3


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 2/5] f2fs: write pending bios when cp_error is set
  2015-12-31 23:49 [PATCH 1/5] f2fs: remove f2fs_bug_on in terms of max_depth Jaegeuk Kim
@ 2015-12-31 23:49 ` Jaegeuk Kim
  2015-12-31 23:49 ` [PATCH 3/5] f2fs: use IPU for fdatasync Jaegeuk Kim
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 5+ messages in thread
From: Jaegeuk Kim @ 2015-12-31 23:49 UTC (permalink / raw)
  To: linux-kernel, linux-fsdevel, linux-f2fs-devel; +Cc: Jaegeuk Kim

When testing ioc_shutdown, put_super is able to be hanged by waiting for
writebacking pages as follows.

INFO: task umount:2723 blocked for more than 120 seconds.
      Tainted: G           O    4.4.0-rc3+ #8
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
umount          D ffff88000859f9d8     0  2723   2110 0x00000000
 ffff88000859f9d8 0000000000000000 0000000000000000 ffffffff81e11540
 ffff880078c225c0 ffff8800085a0000 ffff88007fc17440 7fffffffffffffff
 ffffffff818239f0 ffff88000859fb48 ffff88000859f9f0 ffffffff8182310c
Call Trace:
 [<ffffffff818239f0>] ? bit_wait+0x50/0x50
 [<ffffffff8182310c>] schedule+0x3c/0x90
 [<ffffffff81827fb9>] schedule_timeout+0x2d9/0x430
 [<ffffffff810e0f8f>] ? mark_held_locks+0x6f/0xa0
 [<ffffffff8111614d>] ? ktime_get+0x7d/0x140
 [<ffffffff818239f0>] ? bit_wait+0x50/0x50
 [<ffffffff8106a655>] ? kvm_clock_get_cycles+0x25/0x30
 [<ffffffff8111617c>] ? ktime_get+0xac/0x140
 [<ffffffff818239f0>] ? bit_wait+0x50/0x50
 [<ffffffff81822564>] io_schedule_timeout+0xa4/0x110
 [<ffffffff81823a25>] bit_wait_io+0x35/0x50
 [<ffffffff818235bd>] __wait_on_bit+0x5d/0x90
 [<ffffffff811b9e8b>] wait_on_page_bit+0xcb/0xf0
 [<ffffffff810d5f90>] ? autoremove_wake_function+0x40/0x40
 [<ffffffff811cf84c>] truncate_inode_pages_range+0x4bc/0x840
 [<ffffffff811cfc3d>] truncate_inode_pages_final+0x4d/0x60
 [<ffffffffc023ced5>] f2fs_evict_inode+0x75/0x400 [f2fs]
 [<ffffffff812639bc>] evict+0xbc/0x190
 [<ffffffff81263d19>] iput+0x229/0x2c0
 [<ffffffffc0241885>] f2fs_put_super+0x105/0x1a0 [f2fs]
 [<ffffffff8124756a>] generic_shutdown_super+0x6a/0xf0
 [<ffffffff812478f7>] kill_block_super+0x27/0x70
 [<ffffffffc0241290>] kill_f2fs_super+0x20/0x30 [f2fs]
 [<ffffffff81247b03>] deactivate_locked_super+0x43/0x70
 [<ffffffff81247f4c>] deactivate_super+0x5c/0x60
 [<ffffffff81268d2f>] cleanup_mnt+0x3f/0x90
 [<ffffffff81268dc2>] __cleanup_mnt+0x12/0x20
 [<ffffffff810ac463>] task_work_run+0x73/0xa0
 [<ffffffff810032ac>] exit_to_usermode_loop+0xcc/0xd0
 [<ffffffff81003e7c>] syscall_return_slowpath+0xcc/0xe0
 [<ffffffff81829ea2>] int_ret_from_sys_call+0x25/0x9f

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/checkpoint.c | 2 +-
 fs/f2fs/data.c       | 2 +-
 fs/f2fs/node.c       | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 6b89ac6..5dbafd5 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -237,7 +237,7 @@ static int f2fs_write_meta_page(struct page *page,
 	dec_page_count(sbi, F2FS_DIRTY_META);
 	unlock_page(page);
 
-	if (wbc->for_reclaim)
+	if (wbc->for_reclaim || unlikely(f2fs_cp_error(sbi)))
 		f2fs_submit_merged_bio(sbi, META, WRITE);
 	return 0;
 
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 14b40a9..4851e84 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1179,7 +1179,7 @@ out:
 	unlock_page(page);
 	if (need_balance_fs)
 		f2fs_balance_fs(sbi);
-	if (wbc->for_reclaim) {
+	if (wbc->for_reclaim || unlikely(f2fs_cp_error(sbi))) {
 		f2fs_submit_merged_bio(sbi, DATA, WRITE);
 		remove_dirty_inode(inode);
 	}
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 94d9753..669c44e 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1368,7 +1368,7 @@ static int f2fs_write_node_page(struct page *page,
 	up_read(&sbi->node_write);
 	unlock_page(page);
 
-	if (wbc->for_reclaim)
+	if (wbc->for_reclaim || unlikely(f2fs_cp_error(sbi)))
 		f2fs_submit_merged_bio(sbi, NODE, WRITE);
 
 	return 0;
-- 
2.6.3


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 3/5] f2fs: use IPU for fdatasync
  2015-12-31 23:49 [PATCH 1/5] f2fs: remove f2fs_bug_on in terms of max_depth Jaegeuk Kim
  2015-12-31 23:49 ` [PATCH 2/5] f2fs: write pending bios when cp_error is set Jaegeuk Kim
@ 2015-12-31 23:49 ` Jaegeuk Kim
  2015-12-31 23:49 ` [PATCH 4/5] f2fs: monitor zombie_tree count Jaegeuk Kim
  2015-12-31 23:49 ` [PATCH 5/5] f2fs: introduce zombie list for fast shrinking extent trees Jaegeuk Kim
  3 siblings, 0 replies; 5+ messages in thread
From: Jaegeuk Kim @ 2015-12-31 23:49 UTC (permalink / raw)
  To: linux-kernel, linux-fsdevel, linux-f2fs-devel; +Cc: Jaegeuk Kim

This patch fixes missing IPU condition when fdatasync is called.
With this patch, fdatasync is able to avoid additional node writes for recovery.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index b04ab40..e3d32f6 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -202,7 +202,7 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
 	trace_f2fs_sync_file_enter(inode);
 
 	/* if fdatasync is triggered, let's do in-place-update */
-	if (get_dirty_pages(inode) <= SM_I(sbi)->min_fsync_blocks)
+	if (datasync || get_dirty_pages(inode) <= SM_I(sbi)->min_fsync_blocks)
 		set_inode_flag(fi, FI_NEED_IPU);
 	ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
 	clear_inode_flag(fi, FI_NEED_IPU);
-- 
2.6.3


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 4/5] f2fs: monitor zombie_tree count
  2015-12-31 23:49 [PATCH 1/5] f2fs: remove f2fs_bug_on in terms of max_depth Jaegeuk Kim
  2015-12-31 23:49 ` [PATCH 2/5] f2fs: write pending bios when cp_error is set Jaegeuk Kim
  2015-12-31 23:49 ` [PATCH 3/5] f2fs: use IPU for fdatasync Jaegeuk Kim
@ 2015-12-31 23:49 ` Jaegeuk Kim
  2015-12-31 23:49 ` [PATCH 5/5] f2fs: introduce zombie list for fast shrinking extent trees Jaegeuk Kim
  3 siblings, 0 replies; 5+ messages in thread
From: Jaegeuk Kim @ 2015-12-31 23:49 UTC (permalink / raw)
  To: linux-kernel, linux-fsdevel, linux-f2fs-devel; +Cc: Jaegeuk Kim

This patch adds an entry to show the number of zombie extent_tree.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/debug.c | 5 +++--
 fs/f2fs/f2fs.h  | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index ed5dfcc..b73e8e1 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -39,6 +39,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
 	si->hit_total = si->hit_largest + si->hit_cached + si->hit_rbtree;
 	si->total_ext = atomic64_read(&sbi->total_hit_ext);
 	si->ext_tree = atomic_read(&sbi->total_ext_tree);
+	si->zombie_tree = atomic_read(&sbi->total_zombie_tree);
 	si->ext_node = atomic_read(&sbi->total_ext_node);
 	si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES);
 	si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS);
@@ -292,8 +293,8 @@ static int stat_show(struct seq_file *s, void *v)
 				!si->total_ext ? 0 :
 				div64_u64(si->hit_total * 100, si->total_ext),
 				si->hit_total, si->total_ext);
-		seq_printf(s, "  - Inner Struct Count: tree: %d, node: %d\n",
-				si->ext_tree, si->ext_node);
+		seq_printf(s, "  - Inner Struct Count: tree: %d(%d), node: %d\n",
+				si->ext_tree, si->zombie_tree, si->ext_node);
 		seq_puts(s, "\nBalancing F2FS Async:\n");
 		seq_printf(s, "  - inmem: %4d, wb: %4d\n",
 			   si->inmem_pages, si->wb_pages);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index a339508..d81bf5a 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1890,7 +1890,7 @@ struct f2fs_stat_info {
 	int main_area_segs, main_area_sections, main_area_zones;
 	unsigned long long hit_largest, hit_cached, hit_rbtree;
 	unsigned long long hit_total, total_ext;
-	int ext_tree, ext_node;
+	int ext_tree, zombie_tree, ext_node;
 	int ndirty_node, ndirty_meta;
 	int ndirty_dent, ndirty_dirs, ndirty_data, ndirty_files;
 	int nats, dirty_nats, sits, dirty_sits, fnids;
-- 
2.6.3


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 5/5] f2fs: introduce zombie list for fast shrinking extent trees
  2015-12-31 23:49 [PATCH 1/5] f2fs: remove f2fs_bug_on in terms of max_depth Jaegeuk Kim
                   ` (2 preceding siblings ...)
  2015-12-31 23:49 ` [PATCH 4/5] f2fs: monitor zombie_tree count Jaegeuk Kim
@ 2015-12-31 23:49 ` Jaegeuk Kim
  3 siblings, 0 replies; 5+ messages in thread
From: Jaegeuk Kim @ 2015-12-31 23:49 UTC (permalink / raw)
  To: linux-kernel, linux-fsdevel, linux-f2fs-devel; +Cc: Jaegeuk Kim

This patch removes refcount, and instead, adds zombie_list to shrink directly
without radix tree traverse.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/extent_cache.c | 49 +++++++++++++++++++++----------------------------
 fs/f2fs/f2fs.h         |  3 ++-
 2 files changed, 23 insertions(+), 29 deletions(-)

diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index b37184f..4dee2be 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -68,13 +68,13 @@ static struct extent_tree *__grab_extent_tree(struct inode *inode)
 		et->root = RB_ROOT;
 		et->cached_en = NULL;
 		rwlock_init(&et->lock);
-		atomic_set(&et->refcount, 0);
+		INIT_LIST_HEAD(&et->list);
 		et->count = 0;
 		atomic_inc(&sbi->total_ext_tree);
 	} else {
 		atomic_dec(&sbi->total_zombie_tree);
+		list_del_init(&et->list);
 	}
-	atomic_inc(&et->refcount);
 	up_write(&sbi->extent_tree_lock);
 
 	/* never died until evict_inode */
@@ -551,9 +551,9 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
 unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
 {
 	struct extent_tree *treevec[EXT_TREE_VEC_SIZE];
+	struct extent_tree *et, *next;
 	struct extent_node *en, *tmp;
 	unsigned long ino = F2FS_ROOT_INO(sbi);
-	struct radix_tree_root *root = &sbi->extent_tree_root;
 	unsigned int found;
 	unsigned int node_cnt = 0, tree_cnt = 0;
 	int remained;
@@ -569,29 +569,20 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
 		goto out;
 
 	/* 1. remove unreferenced extent tree */
-	while ((found = radix_tree_gang_lookup(root,
-				(void **)treevec, ino, EXT_TREE_VEC_SIZE))) {
-		unsigned i;
-
-		ino = treevec[found - 1]->ino + 1;
-		for (i = 0; i < found; i++) {
-			struct extent_tree *et = treevec[i];
-
-			if (!atomic_read(&et->refcount)) {
-				write_lock(&et->lock);
-				node_cnt += __free_extent_tree(sbi, et, true);
-				write_unlock(&et->lock);
+	list_for_each_entry_safe(et, next, &sbi->zombie_list, list) {
+		write_lock(&et->lock);
+		node_cnt += __free_extent_tree(sbi, et, true);
+		write_unlock(&et->lock);
 
-				radix_tree_delete(root, et->ino);
-				kmem_cache_free(extent_tree_slab, et);
-				atomic_dec(&sbi->total_ext_tree);
-				atomic_dec(&sbi->total_zombie_tree);
-				tree_cnt++;
+		list_del_init(&et->list);
+		radix_tree_delete(&sbi->extent_tree_root, et->ino);
+		kmem_cache_free(extent_tree_slab, et);
+		atomic_dec(&sbi->total_ext_tree);
+		atomic_dec(&sbi->total_zombie_tree);
+		tree_cnt++;
 
-				if (node_cnt + tree_cnt >= nr_shrink)
-					goto unlock_out;
-			}
-		}
+		if (node_cnt + tree_cnt >= nr_shrink)
+			goto unlock_out;
 	}
 	up_write(&sbi->extent_tree_lock);
 
@@ -619,7 +610,7 @@ free_node:
 	 */
 	ino = F2FS_ROOT_INO(sbi);
 
-	while ((found = radix_tree_gang_lookup(root,
+	while ((found = radix_tree_gang_lookup(&sbi->extent_tree_root,
 				(void **)treevec, ino, EXT_TREE_VEC_SIZE))) {
 		unsigned i;
 
@@ -670,8 +661,10 @@ void f2fs_destroy_extent_tree(struct inode *inode)
 		return;
 
 	if (inode->i_nlink && !is_bad_inode(inode) && et->count) {
-		atomic_dec(&et->refcount);
+		down_write(&sbi->extent_tree_lock);
+		list_add_tail(&et->list, &sbi->zombie_list);
 		atomic_inc(&sbi->total_zombie_tree);
+		up_write(&sbi->extent_tree_lock);
 		return;
 	}
 
@@ -680,8 +673,7 @@ void f2fs_destroy_extent_tree(struct inode *inode)
 
 	/* delete extent tree entry in radix tree */
 	down_write(&sbi->extent_tree_lock);
-	atomic_dec(&et->refcount);
-	f2fs_bug_on(sbi, atomic_read(&et->refcount) || et->count);
+	f2fs_bug_on(sbi, et->count);
 	radix_tree_delete(&sbi->extent_tree_root, inode->i_ino);
 	kmem_cache_free(extent_tree_slab, et);
 	atomic_dec(&sbi->total_ext_tree);
@@ -737,6 +729,7 @@ void init_extent_cache_info(struct f2fs_sb_info *sbi)
 	INIT_LIST_HEAD(&sbi->extent_list);
 	spin_lock_init(&sbi->extent_lock);
 	atomic_set(&sbi->total_ext_tree, 0);
+	INIT_LIST_HEAD(&sbi->zombie_list);
 	atomic_set(&sbi->total_zombie_tree, 0);
 	atomic_set(&sbi->total_ext_node, 0);
 }
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index d81bf5a..e2990c9 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -359,8 +359,8 @@ struct extent_tree {
 	struct rb_root root;		/* root of extent info rb-tree */
 	struct extent_node *cached_en;	/* recently accessed extent node */
 	struct extent_info largest;	/* largested extent info */
+	struct list_head list;		/* to be used by sbi->zombie_list */
 	rwlock_t lock;			/* protect extent info rb-tree */
-	atomic_t refcount;		/* reference count of rb-tree */
 	unsigned int count;		/* # of extent node in rb-tree*/
 };
 
@@ -764,6 +764,7 @@ struct f2fs_sb_info {
 	struct list_head extent_list;		/* lru list for shrinker */
 	spinlock_t extent_lock;			/* locking extent lru list */
 	atomic_t total_ext_tree;		/* extent tree count */
+	struct list_head zombie_list;		/* extent zombie tree list */
 	atomic_t total_zombie_tree;		/* extent zombie tree count */
 	atomic_t total_ext_node;		/* extent info count */
 
-- 
2.6.3


^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2015-12-31 23:51 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-12-31 23:49 [PATCH 1/5] f2fs: remove f2fs_bug_on in terms of max_depth Jaegeuk Kim
2015-12-31 23:49 ` [PATCH 2/5] f2fs: write pending bios when cp_error is set Jaegeuk Kim
2015-12-31 23:49 ` [PATCH 3/5] f2fs: use IPU for fdatasync Jaegeuk Kim
2015-12-31 23:49 ` [PATCH 4/5] f2fs: monitor zombie_tree count Jaegeuk Kim
2015-12-31 23:49 ` [PATCH 5/5] f2fs: introduce zombie list for fast shrinking extent trees Jaegeuk Kim

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).