linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/3] f2fs: remove build_free_nids() during checkpoint
@ 2017-02-14  2:06 Jaegeuk Kim
  2017-02-14  2:06 ` [PATCH 2/3] f2fs: add bitmaps for empty or full NAT blocks Jaegeuk Kim
  2017-02-14  2:06 ` [PATCH 3/3] f2fs: avoid reading NAT page by get_node_info Jaegeuk Kim
  0 siblings, 2 replies; 16+ messages in thread
From: Jaegeuk Kim @ 2017-02-14  2:06 UTC (permalink / raw)
  To: linux-kernel, linux-fsdevel, linux-f2fs-devel; +Cc: Jaegeuk Kim

Let's avoid build_free_nids() in checkpoint path.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/checkpoint.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index e6e42a4b1344..042f8d9afe44 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -999,8 +999,6 @@ static int block_operations(struct f2fs_sb_info *sbi)
 static void unblock_operations(struct f2fs_sb_info *sbi)
 {
 	up_write(&sbi->node_write);
-
-	build_free_nids(sbi, false);
 	f2fs_unlock_all(sbi);
 }
 
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 2/3] f2fs: add bitmaps for empty or full NAT blocks
  2017-02-14  2:06 [PATCH 1/3] f2fs: remove build_free_nids() during checkpoint Jaegeuk Kim
@ 2017-02-14  2:06 ` Jaegeuk Kim
  2017-02-23 11:41   ` [f2fs-dev] " Chao Yu
  2017-02-23 22:58   ` [PATCH 2/3 v2] " Jaegeuk Kim
  2017-02-14  2:06 ` [PATCH 3/3] f2fs: avoid reading NAT page by get_node_info Jaegeuk Kim
  1 sibling, 2 replies; 16+ messages in thread
From: Jaegeuk Kim @ 2017-02-14  2:06 UTC (permalink / raw)
  To: linux-kernel, linux-fsdevel, linux-f2fs-devel; +Cc: Jaegeuk Kim

This patches adds bitmaps to represent empty or full NAT blocks containing
free nid entries.

If we can find valid crc|cp_ver in the last block of checkpoint pack, we'll
use these bitmaps when building free nids. In order to avoid checkpointing
burden, up-to-date bitmaps will be flushed only during umount time. So,
normally we can get this gain, but when power-cut happens, we rely on fsck.f2fs
which recovers this bitmap again.

After this patch, we build free nids from nid #0 at mount time to make more
full NAT blocks, but in runtime, we check empty NAT blocks to load free nids
without loading any NAT pages from disk.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/checkpoint.c    |  29 +++++++-
 fs/f2fs/debug.c         |   1 +
 fs/f2fs/f2fs.h          |  23 +++++-
 fs/f2fs/node.c          | 188 +++++++++++++++++++++++++++++++++++++++++++-----
 fs/f2fs/segment.c       |   2 +-
 include/linux/f2fs_fs.h |   1 +
 6 files changed, 224 insertions(+), 20 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 042f8d9afe44..783c5c3f16a4 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -1024,6 +1024,10 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 
 	spin_lock(&sbi->cp_lock);
 
+	if (ckpt->cp_pack_total_block_count >
+			sbi->blocks_per_seg - NM_I(sbi)->nat_bits_blocks)
+		disable_nat_bits(sbi, false);
+
 	if (cpc->reason == CP_UMOUNT)
 		__set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
 	else
@@ -1136,6 +1140,29 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 
 	start_blk = __start_cp_next_addr(sbi);
 
+	/* write nat bits */
+	if (cpc->reason == CP_UMOUNT &&
+				is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) {
+		__u64 cp_ver = le64_to_cpu(ckpt->checkpoint_ver);
+		unsigned int i;
+		block_t blk;
+
+		cp_ver |= ((__u64)crc32 << 32);
+		*(__le64 *)nm_i->nat_bits = cpu_to_le64(cp_ver);
+
+		blk = start_blk + sbi->blocks_per_seg - nm_i->nat_bits_blocks;
+		for (i = 0; i < nm_i->nat_bits_blocks; i++)
+			update_meta_page(sbi, nm_i->nat_bits +
+					(i << F2FS_BLKSIZE_BITS), blk + i);
+
+		/* Flush all the NAT BITS pages */
+		while (get_pages(sbi, F2FS_DIRTY_META)) {
+			sync_meta_pages(sbi, META, LONG_MAX);
+			if (unlikely(f2fs_cp_error(sbi)))
+				return -EIO;
+		}
+	}
+
 	/* need to wait for end_io results */
 	wait_on_all_pages_writeback(sbi);
 	if (unlikely(f2fs_cp_error(sbi)))
@@ -1272,7 +1299,7 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 	ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver);
 
 	/* write cached NAT/SIT entries to NAT/SIT area */
-	flush_nat_entries(sbi);
+	flush_nat_entries(sbi, cpc);
 	flush_sit_entries(sbi, cpc);
 
 	/* unlock all the fs_lock[] in do_checkpoint() */
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index de8da9fc5c99..015ad2b73a92 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -193,6 +193,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
 	/* build nm */
 	si->base_mem += sizeof(struct f2fs_nm_info);
 	si->base_mem += __bitmap_size(sbi, NAT_BITMAP);
+	si->base_mem += (NM_I(sbi)->nat_bits_blocks << F2FS_BLKSIZE_BITS);
 
 get_cache:
 	si->cache_mem = 0;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 5c87d763a347..d263dade5e4c 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -547,6 +547,7 @@ struct f2fs_nm_info {
 	struct list_head nat_entries;	/* cached nat entry list (clean) */
 	unsigned int nat_cnt;		/* the # of cached nat entries */
 	unsigned int dirty_nat_cnt;	/* total num of nat entries in set */
+	unsigned int nat_blocks;	/* # of nat blocks */
 
 	/* free node ids management */
 	struct radix_tree_root free_nid_root;/* root of the free_nid cache */
@@ -557,6 +558,11 @@ struct f2fs_nm_info {
 
 	/* for checkpoint */
 	char *nat_bitmap;		/* NAT bitmap pointer */
+
+	unsigned int nat_bits_blocks;	/* # of nat bits blocks */
+	unsigned char *nat_bits;	/* NAT bits blocks */
+	unsigned char *full_nat_bits;	/* full NAT pages */
+	unsigned char *empty_nat_bits;	/* empty NAT pages */
 #ifdef CONFIG_F2FS_CHECK_FS
 	char *nat_bitmap_mir;		/* NAT bitmap mirror */
 #endif
@@ -1158,6 +1164,19 @@ static inline void clear_ckpt_flags(struct f2fs_sb_info *sbi, unsigned int f)
 	spin_unlock(&sbi->cp_lock);
 }
 
+static inline void disable_nat_bits(struct f2fs_sb_info *sbi, bool lock)
+{
+	set_sbi_flag(sbi, SBI_NEED_FSCK);
+
+	if (lock)
+		spin_lock(&sbi->cp_lock);
+	__clear_ckpt_flags(F2FS_CKPT(sbi), CP_NAT_BITS_FLAG);
+	kfree(NM_I(sbi)->nat_bits);
+	NM_I(sbi)->nat_bits = NULL;
+	if (lock)
+		spin_unlock(&sbi->cp_lock);
+}
+
 static inline void f2fs_lock_op(struct f2fs_sb_info *sbi)
 {
 	down_read(&sbi->cp_rwsem);
@@ -2118,7 +2137,7 @@ void move_node_page(struct page *node_page, int gc_type);
 int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
 			struct writeback_control *wbc, bool atomic);
 int sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc);
-void build_free_nids(struct f2fs_sb_info *sbi, bool sync);
+void build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount);
 bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid);
 void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid);
 void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid);
@@ -2129,7 +2148,7 @@ int recover_xattr_data(struct inode *inode, struct page *page,
 int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page);
 int restore_node_summary(struct f2fs_sb_info *sbi,
 			unsigned int segno, struct f2fs_summary_block *sum);
-void flush_nat_entries(struct f2fs_sb_info *sbi);
+void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc);
 int build_node_manager(struct f2fs_sb_info *sbi);
 void destroy_node_manager(struct f2fs_sb_info *sbi);
 int __init create_node_manager_caches(void);
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index f8fb25a27b46..e001d084667f 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -338,6 +338,11 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
 		set_nat_flag(e, IS_CHECKPOINTED, false);
 	__set_nat_cache_dirty(nm_i, e);
 
+	if (is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG) &&
+					new_blkaddr == NEW_ADDR)
+		test_and_clear_bit_le(ni->nid / NAT_ENTRY_PER_BLOCK,
+					nm_i->empty_nat_bits);
+
 	/* update fsync_mark if its inode nat entry is still alive */
 	if (ni->nid != ni->ino)
 		e = __lookup_nat_cache(nm_i, ni->ino);
@@ -1839,7 +1844,59 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
 	}
 }
 
-static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
+static int scan_nat_bits(struct f2fs_sb_info *sbi)
+{
+	struct f2fs_nm_info *nm_i = NM_I(sbi);
+	struct page *page;
+	unsigned int i = 0;
+	nid_t target = FREE_NID_PAGES * NAT_ENTRY_PER_BLOCK;
+	nid_t nid;
+
+	if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
+		return -EAGAIN;
+
+	down_read(&nm_i->nat_tree_lock);
+check_empty:
+	i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i);
+	if (i >= nm_i->nat_blocks) {
+		i = 0;
+		goto check_partial;
+	}
+
+	for (nid = i * NAT_ENTRY_PER_BLOCK; nid < (i + 1) * NAT_ENTRY_PER_BLOCK;
+									nid++) {
+		if (unlikely(nid >= nm_i->max_nid))
+			break;
+		add_free_nid(sbi, nid, true);
+	}
+
+	if (nm_i->nid_cnt[FREE_NID_LIST] >= target)
+		goto out;
+	i++;
+	goto check_empty;
+
+check_partial:
+	i = find_next_zero_bit_le(nm_i->full_nat_bits, nm_i->nat_blocks, i);
+	if (i >= nm_i->nat_blocks) {
+		disable_nat_bits(sbi, true);
+		return -EINVAL;
+	}
+
+	nid = i * NAT_ENTRY_PER_BLOCK;
+	page = get_current_nat_page(sbi, nid);
+	scan_nat_page(sbi, page, nid);
+	f2fs_put_page(page, 1);
+
+	if (nm_i->nid_cnt[FREE_NID_LIST] < target) {
+		i++;
+		goto check_partial;
+	}
+out:
+	up_read(&nm_i->nat_tree_lock);
+	return 0;
+}
+
+static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
 {
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
 	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
@@ -1854,6 +1911,20 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
 	if (!sync && !available_free_memory(sbi, FREE_NIDS))
 		return;
 
+	/* try to find free nids with nat_bits */
+	if (!mount && !scan_nat_bits(sbi) && nm_i->nid_cnt[FREE_NID_LIST])
+		return;
+
+	/* find next valid candidate */
+	if (is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) {
+		int idx = find_next_zero_bit_le(nm_i->full_nat_bits,
+					nm_i->nat_blocks, 0);
+		if (idx >= nm_i->nat_blocks)
+			set_sbi_flag(sbi, SBI_NEED_FSCK);
+		else
+			nid = idx * NAT_ENTRY_PER_BLOCK;
+	}
+
 	/* readahead nat pages to be scanned */
 	ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
 							META_NAT, true);
@@ -1896,10 +1967,10 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
 					nm_i->ra_nid_pages, META_NAT, false);
 }
 
-void build_free_nids(struct f2fs_sb_info *sbi, bool sync)
+void build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
 {
 	mutex_lock(&NM_I(sbi)->build_lock);
-	__build_free_nids(sbi, sync);
+	__build_free_nids(sbi, sync, mount);
 	mutex_unlock(&NM_I(sbi)->build_lock);
 }
 
@@ -1941,7 +2012,7 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
 	spin_unlock(&nm_i->nid_list_lock);
 
 	/* Let's scan nat pages and its caches to get free nids */
-	build_free_nids(sbi, true);
+	build_free_nids(sbi, true, false);
 	goto retry;
 }
 
@@ -2233,8 +2304,39 @@ static void __adjust_nat_entry_set(struct nat_entry_set *nes,
 	list_add_tail(&nes->set_list, head);
 }
 
+void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
+						struct page *page)
+{
+	struct f2fs_nm_info *nm_i = NM_I(sbi);
+	unsigned int nat_index = start_nid / NAT_ENTRY_PER_BLOCK;
+	struct f2fs_nat_block *nat_blk = page_address(page);
+	int valid = 0;
+	int i;
+
+	if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
+		return;
+
+	for (i = 0; i < NAT_ENTRY_PER_BLOCK; i++) {
+		if (start_nid == 0 && i == 0)
+			valid++;
+		if (nat_blk->entries[i].block_addr)
+			valid++;
+	}
+	if (valid == 0) {
+		test_and_set_bit_le(nat_index, nm_i->empty_nat_bits);
+		test_and_clear_bit_le(nat_index, nm_i->full_nat_bits);
+		return;
+	}
+
+	test_and_clear_bit_le(nat_index, nm_i->empty_nat_bits);
+	if (valid == NAT_ENTRY_PER_BLOCK)
+		test_and_set_bit_le(nat_index, nm_i->full_nat_bits);
+	else
+		test_and_clear_bit_le(nat_index, nm_i->full_nat_bits);
+}
+
 static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
-					struct nat_entry_set *set)
+		struct nat_entry_set *set, struct cp_control *cpc)
 {
 	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
 	struct f2fs_journal *journal = curseg->journal;
@@ -2249,7 +2351,8 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
 	 * #1, flush nat entries to journal in current hot data summary block.
 	 * #2, flush nat entries to nat page.
 	 */
-	if (!__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL))
+	if (cpc->reason == CP_UMOUNT ||
+		!__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL))
 		to_journal = false;
 
 	if (to_journal) {
@@ -2289,10 +2392,12 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
 		}
 	}
 
-	if (to_journal)
+	if (to_journal) {
 		up_write(&curseg->journal_rwsem);
-	else
+	} else {
+		__update_nat_bits(sbi, start_nid, page);
 		f2fs_put_page(page, 1);
+	}
 
 	f2fs_bug_on(sbi, set->entry_cnt);
 
@@ -2303,7 +2408,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
 /*
  * This function is called during the checkpointing process.
  */
-void flush_nat_entries(struct f2fs_sb_info *sbi)
+void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 {
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
 	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
@@ -2324,7 +2429,8 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
 	 * entries, remove all entries from journal and merge them
 	 * into nat entry set.
 	 */
-	if (!__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
+	if (cpc->reason == CP_UMOUNT ||
+		!__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
 		remove_nats_in_journal(sbi);
 
 	while ((found = __gang_lookup_nat_set(nm_i,
@@ -2338,27 +2444,72 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
 
 	/* flush dirty nats in nat entry set */
 	list_for_each_entry_safe(set, tmp, &sets, set_list)
-		__flush_nat_entry_set(sbi, set);
+		__flush_nat_entry_set(sbi, set, cpc);
 
 	up_write(&nm_i->nat_tree_lock);
 
 	f2fs_bug_on(sbi, nm_i->dirty_nat_cnt);
 }
 
+static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
+{
+	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
+	struct f2fs_nm_info *nm_i = NM_I(sbi);
+	unsigned int nat_bits_bytes = nm_i->nat_blocks / BITS_PER_BYTE;
+	unsigned int i;
+	__u64 cp_ver = le64_to_cpu(ckpt->checkpoint_ver);
+	size_t crc_offset = le32_to_cpu(ckpt->checksum_offset);
+	__u64 crc = le32_to_cpu(*((__le32 *)
+				((unsigned char *)ckpt + crc_offset)));
+	block_t nat_bits_addr;
+
+	if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
+		return 0;
+
+	nm_i->nat_bits_blocks = F2FS_BYTES_TO_BLK((nat_bits_bytes << 1) + 8 +
+						F2FS_BLKSIZE - 1);
+	nm_i->nat_bits = kzalloc(nm_i->nat_bits_blocks << F2FS_BLKSIZE_BITS,
+						GFP_KERNEL);
+	if (!nm_i->nat_bits)
+		return -ENOMEM;
+
+	nat_bits_addr = __start_cp_addr(sbi) + sbi->blocks_per_seg -
+						nm_i->nat_bits_blocks;
+	for (i = 0; i < nm_i->nat_bits_blocks; i++) {
+		struct page *page = get_meta_page(sbi, nat_bits_addr++);
+
+		memcpy(nm_i->nat_bits + (i << F2FS_BLKSIZE_BITS),
+					page_address(page), F2FS_BLKSIZE);
+		f2fs_put_page(page, 1);
+	}
+
+	cp_ver |= (crc << 32);
+	if (cpu_to_le64(cp_ver) != *(__le64 *)nm_i->nat_bits) {
+		disable_nat_bits(sbi, true);
+		return 0;
+	}
+
+	nm_i->full_nat_bits = nm_i->nat_bits + 8;
+	nm_i->empty_nat_bits = nm_i->full_nat_bits + nat_bits_bytes;
+
+	f2fs_msg(sbi->sb, KERN_NOTICE, "Found nat_bits in checkpoint");
+	return 0;
+}
+
 static int init_node_manager(struct f2fs_sb_info *sbi)
 {
 	struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi);
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
 	unsigned char *version_bitmap;
-	unsigned int nat_segs, nat_blocks;
+	unsigned int nat_segs;
+	int err;
 
 	nm_i->nat_blkaddr = le32_to_cpu(sb_raw->nat_blkaddr);
 
 	/* segment_count_nat includes pair segment so divide to 2. */
 	nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1;
-	nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
-
-	nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks;
+	nm_i->nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
+	nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nm_i->nat_blocks;
 
 	/* not used nids: 0, node, meta, (and root counted as valid node) */
 	nm_i->available_nids = nm_i->max_nid - sbi->total_valid_node_count -
@@ -2392,6 +2543,10 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
 	if (!nm_i->nat_bitmap)
 		return -ENOMEM;
 
+	err = __get_nat_bitmaps(sbi);
+	if (err)
+		return err;
+
 #ifdef CONFIG_F2FS_CHECK_FS
 	nm_i->nat_bitmap_mir = kmemdup(version_bitmap, nm_i->bitmap_size,
 					GFP_KERNEL);
@@ -2414,7 +2569,7 @@ int build_node_manager(struct f2fs_sb_info *sbi)
 	if (err)
 		return err;
 
-	build_free_nids(sbi, true);
+	build_free_nids(sbi, true, true);
 	return 0;
 }
 
@@ -2473,6 +2628,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
 	up_write(&nm_i->nat_tree_lock);
 
 	kfree(nm_i->nat_bitmap);
+	kfree(nm_i->nat_bits);
 #ifdef CONFIG_F2FS_CHECK_FS
 	kfree(nm_i->nat_bitmap_mir);
 #endif
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index df2ff5cfe8f4..8e1ec248c653 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -386,7 +386,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
 	if (!available_free_memory(sbi, FREE_NIDS))
 		try_to_free_nids(sbi, MAX_FREE_NIDS);
 	else
-		build_free_nids(sbi, false);
+		build_free_nids(sbi, false, false);
 
 	if (!is_idle(sbi))
 		return;
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index f0748524ca8c..1c92ace2e8f8 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -114,6 +114,7 @@ struct f2fs_super_block {
 /*
  * For checkpoint
  */
+#define CP_NAT_BITS_FLAG	0x00000080
 #define CP_CRC_RECOVERY_FLAG	0x00000040
 #define CP_FASTBOOT_FLAG	0x00000020
 #define CP_FSCK_FLAG		0x00000010
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 3/3] f2fs: avoid reading NAT page by get_node_info
  2017-02-14  2:06 [PATCH 1/3] f2fs: remove build_free_nids() during checkpoint Jaegeuk Kim
  2017-02-14  2:06 ` [PATCH 2/3] f2fs: add bitmaps for empty or full NAT blocks Jaegeuk Kim
@ 2017-02-14  2:06 ` Jaegeuk Kim
  2017-02-23 11:47   ` Chao Yu
  1 sibling, 1 reply; 16+ messages in thread
From: Jaegeuk Kim @ 2017-02-14  2:06 UTC (permalink / raw)
  To: linux-kernel, linux-fsdevel, linux-f2fs-devel; +Cc: Jaegeuk Kim

We've not seen this buggy case for a long time, so it's time to avoid this
unnecessary get_node_info() call which reading NAT page to cache nat entry.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/node.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index e001d084667f..b63bdb85ad66 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1033,7 +1033,7 @@ struct page *new_node_page(struct dnode_of_data *dn,
 				unsigned int ofs, struct page *ipage)
 {
 	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
-	struct node_info old_ni, new_ni;
+	struct node_info new_ni;
 	struct page *page;
 	int err;
 
@@ -1049,12 +1049,11 @@ struct page *new_node_page(struct dnode_of_data *dn,
 		goto fail;
 	}
 
-	get_node_info(sbi, dn->nid, &old_ni);
-
-	/* Reinitialize old_ni with new node page */
-	f2fs_bug_on(sbi, old_ni.blk_addr != NULL_ADDR);
-	new_ni = old_ni;
+	new_ni.nid = dn->nid;
 	new_ni.ino = dn->inode->i_ino;
+	new_ni.blk_addr = NULL_ADDR;
+	new_ni.flag = 0;
+	new_ni.version = 0;
 	set_node_addr(sbi, &new_ni, NEW_ADDR, false);
 
 	f2fs_wait_on_page_writeback(page, NODE, true);
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [f2fs-dev] [PATCH 2/3] f2fs: add bitmaps for empty or full NAT blocks
  2017-02-14  2:06 ` [PATCH 2/3] f2fs: add bitmaps for empty or full NAT blocks Jaegeuk Kim
@ 2017-02-23 11:41   ` Chao Yu
  2017-02-23 22:54     ` Jaegeuk Kim
  2017-02-23 22:58   ` [PATCH 2/3 v2] " Jaegeuk Kim
  1 sibling, 1 reply; 16+ messages in thread
From: Chao Yu @ 2017-02-23 11:41 UTC (permalink / raw)
  To: Jaegeuk Kim, linux-kernel, linux-fsdevel, linux-f2fs-devel

On 2017/2/14 10:06, Jaegeuk Kim wrote:
> This patches adds bitmaps to represent empty or full NAT blocks containing
> free nid entries.
> 
> If we can find valid crc|cp_ver in the last block of checkpoint pack, we'll
> use these bitmaps when building free nids. In order to avoid checkpointing
> burden, up-to-date bitmaps will be flushed only during umount time. So,
> normally we can get this gain, but when power-cut happens, we rely on fsck.f2fs
> which recovers this bitmap again.
> 
> After this patch, we build free nids from nid #0 at mount time to make more
> full NAT blocks, but in runtime, we check empty NAT blocks to load free nids
> without loading any NAT pages from disk.
> 
> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> ---
>  fs/f2fs/checkpoint.c    |  29 +++++++-
>  fs/f2fs/debug.c         |   1 +
>  fs/f2fs/f2fs.h          |  23 +++++-
>  fs/f2fs/node.c          | 188 +++++++++++++++++++++++++++++++++++++++++++-----
>  fs/f2fs/segment.c       |   2 +-
>  include/linux/f2fs_fs.h |   1 +
>  6 files changed, 224 insertions(+), 20 deletions(-)
> 
> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> index 042f8d9afe44..783c5c3f16a4 100644
> --- a/fs/f2fs/checkpoint.c
> +++ b/fs/f2fs/checkpoint.c
> @@ -1024,6 +1024,10 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>  
>  	spin_lock(&sbi->cp_lock);
>  
> +	if (ckpt->cp_pack_total_block_count >
> +			sbi->blocks_per_seg - NM_I(sbi)->nat_bits_blocks)
> +		disable_nat_bits(sbi, false);

I think we need to drop nat full/empty bitmap only if there is no enough space
in CP area while doing umount, otherwise we can keep this in memory.

> +
>  	if (cpc->reason == CP_UMOUNT)
>  		__set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
>  	else
> @@ -1136,6 +1140,29 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>  
>  	start_blk = __start_cp_next_addr(sbi);
>  
> +	/* write nat bits */
> +	if (cpc->reason == CP_UMOUNT &&
> +				is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) {
> +		__u64 cp_ver = le64_to_cpu(ckpt->checkpoint_ver);

__u64 cp_ver = cur_cp_version(ckpt);

> +		unsigned int i;
> +		block_t blk;
> +
> +		cp_ver |= ((__u64)crc32 << 32);
> +		*(__le64 *)nm_i->nat_bits = cpu_to_le64(cp_ver);
> +
> +		blk = start_blk + sbi->blocks_per_seg - nm_i->nat_bits_blocks;
> +		for (i = 0; i < nm_i->nat_bits_blocks; i++)
> +			update_meta_page(sbi, nm_i->nat_bits +
> +					(i << F2FS_BLKSIZE_BITS), blk + i);
> +
> +		/* Flush all the NAT BITS pages */
> +		while (get_pages(sbi, F2FS_DIRTY_META)) {
> +			sync_meta_pages(sbi, META, LONG_MAX);
> +			if (unlikely(f2fs_cp_error(sbi)))
> +				return -EIO;
> +		}
> +	}
> +
>  	/* need to wait for end_io results */
>  	wait_on_all_pages_writeback(sbi);
>  	if (unlikely(f2fs_cp_error(sbi)))
> @@ -1272,7 +1299,7 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>  	ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver);
>  
>  	/* write cached NAT/SIT entries to NAT/SIT area */
> -	flush_nat_entries(sbi);
> +	flush_nat_entries(sbi, cpc);
>  	flush_sit_entries(sbi, cpc);
>  
>  	/* unlock all the fs_lock[] in do_checkpoint() */
> diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
> index de8da9fc5c99..015ad2b73a92 100644
> --- a/fs/f2fs/debug.c
> +++ b/fs/f2fs/debug.c
> @@ -193,6 +193,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
>  	/* build nm */
>  	si->base_mem += sizeof(struct f2fs_nm_info);
>  	si->base_mem += __bitmap_size(sbi, NAT_BITMAP);
> +	si->base_mem += (NM_I(sbi)->nat_bits_blocks << F2FS_BLKSIZE_BITS);
>  
>  get_cache:
>  	si->cache_mem = 0;
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 5c87d763a347..d263dade5e4c 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -547,6 +547,7 @@ struct f2fs_nm_info {
>  	struct list_head nat_entries;	/* cached nat entry list (clean) */
>  	unsigned int nat_cnt;		/* the # of cached nat entries */
>  	unsigned int dirty_nat_cnt;	/* total num of nat entries in set */
> +	unsigned int nat_blocks;	/* # of nat blocks */
>  
>  	/* free node ids management */
>  	struct radix_tree_root free_nid_root;/* root of the free_nid cache */
> @@ -557,6 +558,11 @@ struct f2fs_nm_info {
>  
>  	/* for checkpoint */
>  	char *nat_bitmap;		/* NAT bitmap pointer */
> +
> +	unsigned int nat_bits_blocks;	/* # of nat bits blocks */
> +	unsigned char *nat_bits;	/* NAT bits blocks */
> +	unsigned char *full_nat_bits;	/* full NAT pages */
> +	unsigned char *empty_nat_bits;	/* empty NAT pages */
>  #ifdef CONFIG_F2FS_CHECK_FS
>  	char *nat_bitmap_mir;		/* NAT bitmap mirror */
>  #endif
> @@ -1158,6 +1164,19 @@ static inline void clear_ckpt_flags(struct f2fs_sb_info *sbi, unsigned int f)
>  	spin_unlock(&sbi->cp_lock);
>  }
>  
> +static inline void disable_nat_bits(struct f2fs_sb_info *sbi, bool lock)
> +{
> +	set_sbi_flag(sbi, SBI_NEED_FSCK);
> +
> +	if (lock)
> +		spin_lock(&sbi->cp_lock);
> +	__clear_ckpt_flags(F2FS_CKPT(sbi), CP_NAT_BITS_FLAG);
> +	kfree(NM_I(sbi)->nat_bits);
> +	NM_I(sbi)->nat_bits = NULL;
> +	if (lock)
> +		spin_unlock(&sbi->cp_lock);
> +}
> +
>  static inline void f2fs_lock_op(struct f2fs_sb_info *sbi)
>  {
>  	down_read(&sbi->cp_rwsem);
> @@ -2118,7 +2137,7 @@ void move_node_page(struct page *node_page, int gc_type);
>  int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
>  			struct writeback_control *wbc, bool atomic);
>  int sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc);
> -void build_free_nids(struct f2fs_sb_info *sbi, bool sync);
> +void build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount);
>  bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid);
>  void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid);
>  void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid);
> @@ -2129,7 +2148,7 @@ int recover_xattr_data(struct inode *inode, struct page *page,
>  int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page);
>  int restore_node_summary(struct f2fs_sb_info *sbi,
>  			unsigned int segno, struct f2fs_summary_block *sum);
> -void flush_nat_entries(struct f2fs_sb_info *sbi);
> +void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc);
>  int build_node_manager(struct f2fs_sb_info *sbi);
>  void destroy_node_manager(struct f2fs_sb_info *sbi);
>  int __init create_node_manager_caches(void);
> diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
> index f8fb25a27b46..e001d084667f 100644
> --- a/fs/f2fs/node.c
> +++ b/fs/f2fs/node.c
> @@ -338,6 +338,11 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
>  		set_nat_flag(e, IS_CHECKPOINTED, false);
>  	__set_nat_cache_dirty(nm_i, e);
>  
> +	if (is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG) &&
> +					new_blkaddr == NEW_ADDR)
> +		test_and_clear_bit_le(ni->nid / NAT_ENTRY_PER_BLOCK,
> +					nm_i->empty_nat_bits);

clear_bit_le(NAT_BLOCK_OFFSET,)

> +
>  	/* update fsync_mark if its inode nat entry is still alive */
>  	if (ni->nid != ni->ino)
>  		e = __lookup_nat_cache(nm_i, ni->ino);
> @@ -1839,7 +1844,59 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
>  	}
>  }
>  
> -static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
> +static int scan_nat_bits(struct f2fs_sb_info *sbi)
> +{
> +	struct f2fs_nm_info *nm_i = NM_I(sbi);
> +	struct page *page;
> +	unsigned int i = 0;
> +	nid_t target = FREE_NID_PAGES * NAT_ENTRY_PER_BLOCK;
> +	nid_t nid;
> +
> +	if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
> +		return -EAGAIN;
> +
> +	down_read(&nm_i->nat_tree_lock);
> +check_empty:
> +	i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i);
> +	if (i >= nm_i->nat_blocks) {
> +		i = 0;
> +		goto check_partial;
> +	}
> +
> +	for (nid = i * NAT_ENTRY_PER_BLOCK; nid < (i + 1) * NAT_ENTRY_PER_BLOCK;
> +									nid++) {
> +		if (unlikely(nid >= nm_i->max_nid))
> +			break;
> +		add_free_nid(sbi, nid, true);
> +	}
> +
> +	if (nm_i->nid_cnt[FREE_NID_LIST] >= target)
> +		goto out;
> +	i++;
> +	goto check_empty;
> +
> +check_partial:
> +	i = find_next_zero_bit_le(nm_i->full_nat_bits, nm_i->nat_blocks, i);
> +	if (i >= nm_i->nat_blocks) {
> +		disable_nat_bits(sbi, true);

Can this happen in real world? Should be a bug in somewhere?

> +		return -EINVAL;
> +	}
> +
> +	nid = i * NAT_ENTRY_PER_BLOCK;
> +	page = get_current_nat_page(sbi, nid);
> +	scan_nat_page(sbi, page, nid);
> +	f2fs_put_page(page, 1);
> +
> +	if (nm_i->nid_cnt[FREE_NID_LIST] < target) {
> +		i++;
> +		goto check_partial;
> +	}
> +out:
> +	up_read(&nm_i->nat_tree_lock);
> +	return 0;
> +}
> +
> +static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
>  {
>  	struct f2fs_nm_info *nm_i = NM_I(sbi);
>  	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
> @@ -1854,6 +1911,20 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
>  	if (!sync && !available_free_memory(sbi, FREE_NIDS))
>  		return;
>  
> +	/* try to find free nids with nat_bits */
> +	if (!mount && !scan_nat_bits(sbi) && nm_i->nid_cnt[FREE_NID_LIST])
> +		return;
> +
> +	/* find next valid candidate */

This is just for mount case?

> +	if (is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) {
> +		int idx = find_next_zero_bit_le(nm_i->full_nat_bits,
> +					nm_i->nat_blocks, 0);
> +		if (idx >= nm_i->nat_blocks)
> +			set_sbi_flag(sbi, SBI_NEED_FSCK);
> +		else
> +			nid = idx * NAT_ENTRY_PER_BLOCK;
> +	}
> +
>  	/* readahead nat pages to be scanned */
>  	ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
>  							META_NAT, true);
> @@ -1896,10 +1967,10 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
>  					nm_i->ra_nid_pages, META_NAT, false);
>  }
>  
> -void build_free_nids(struct f2fs_sb_info *sbi, bool sync)
> +void build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
>  {
>  	mutex_lock(&NM_I(sbi)->build_lock);
> -	__build_free_nids(sbi, sync);
> +	__build_free_nids(sbi, sync, mount);
>  	mutex_unlock(&NM_I(sbi)->build_lock);
>  }
>  
> @@ -1941,7 +2012,7 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
>  	spin_unlock(&nm_i->nid_list_lock);
>  
>  	/* Let's scan nat pages and its caches to get free nids */
> -	build_free_nids(sbi, true);
> +	build_free_nids(sbi, true, false);
>  	goto retry;
>  }
>  
> @@ -2233,8 +2304,39 @@ static void __adjust_nat_entry_set(struct nat_entry_set *nes,
>  	list_add_tail(&nes->set_list, head);
>  }
>  
> +void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
> +						struct page *page)
> +{
> +	struct f2fs_nm_info *nm_i = NM_I(sbi);
> +	unsigned int nat_index = start_nid / NAT_ENTRY_PER_BLOCK;
> +	struct f2fs_nat_block *nat_blk = page_address(page);
> +	int valid = 0;
> +	int i;
> +
> +	if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
> +		return;
> +
> +	for (i = 0; i < NAT_ENTRY_PER_BLOCK; i++) {
> +		if (start_nid == 0 && i == 0)
> +			valid++;
> +		if (nat_blk->entries[i].block_addr)
> +			valid++;
> +	}
> +	if (valid == 0) {
> +		test_and_set_bit_le(nat_index, nm_i->empty_nat_bits);
> +		test_and_clear_bit_le(nat_index, nm_i->full_nat_bits);

set_bit_le/clear_bit_le

> +		return;
> +	}
> +
> +	test_and_clear_bit_le(nat_index, nm_i->empty_nat_bits);

ditto

> +	if (valid == NAT_ENTRY_PER_BLOCK)
> +		test_and_set_bit_le(nat_index, nm_i->full_nat_bits);
> +	else
> +		test_and_clear_bit_le(nat_index, nm_i->full_nat_bits);

ditto

> +}
> +
>  static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
> -					struct nat_entry_set *set)
> +		struct nat_entry_set *set, struct cp_control *cpc)
>  {
>  	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
>  	struct f2fs_journal *journal = curseg->journal;
> @@ -2249,7 +2351,8 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
>  	 * #1, flush nat entries to journal in current hot data summary block.
>  	 * #2, flush nat entries to nat page.
>  	 */
> -	if (!__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL))
> +	if (cpc->reason == CP_UMOUNT ||

if ((cpc->reason == CP_UMOUNT && is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) ||

> +		!__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL))
>  		to_journal = false;
>  
>  	if (to_journal) {
> @@ -2289,10 +2392,12 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
>  		}
>  	}
>  
> -	if (to_journal)
> +	if (to_journal) {
>  		up_write(&curseg->journal_rwsem);
> -	else
> +	} else {
> +		__update_nat_bits(sbi, start_nid, page);
>  		f2fs_put_page(page, 1);
> +	}
>  
>  	f2fs_bug_on(sbi, set->entry_cnt);
>  
> @@ -2303,7 +2408,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
>  /*
>   * This function is called during the checkpointing process.
>   */
> -void flush_nat_entries(struct f2fs_sb_info *sbi)
> +void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>  {
>  	struct f2fs_nm_info *nm_i = NM_I(sbi);
>  	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
> @@ -2324,7 +2429,8 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
>  	 * entries, remove all entries from journal and merge them
>  	 * into nat entry set.
>  	 */
> -	if (!__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
> +	if (cpc->reason == CP_UMOUNT ||

if ((cpc->reason == CP_UMOUNT && is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) ||

> +		!__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
>  		remove_nats_in_journal(sbi);
>  
>  	while ((found = __gang_lookup_nat_set(nm_i,
> @@ -2338,27 +2444,72 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
>  
>  	/* flush dirty nats in nat entry set */
>  	list_for_each_entry_safe(set, tmp, &sets, set_list)
> -		__flush_nat_entry_set(sbi, set);
> +		__flush_nat_entry_set(sbi, set, cpc);
>  
>  	up_write(&nm_i->nat_tree_lock);
>  
>  	f2fs_bug_on(sbi, nm_i->dirty_nat_cnt);
>  }
>  
> +static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
> +{
> +	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
> +	struct f2fs_nm_info *nm_i = NM_I(sbi);
> +	unsigned int nat_bits_bytes = nm_i->nat_blocks / BITS_PER_BYTE;
> +	unsigned int i;
> +	__u64 cp_ver = le64_to_cpu(ckpt->checkpoint_ver);

__u64 cp_ver = cur_cp_version(ckpt);

Thanks,

> +	size_t crc_offset = le32_to_cpu(ckpt->checksum_offset);
> +	__u64 crc = le32_to_cpu(*((__le32 *)
> +				((unsigned char *)ckpt + crc_offset)));
> +	block_t nat_bits_addr;
> +
> +	if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
> +		return 0;
> +
> +	nm_i->nat_bits_blocks = F2FS_BYTES_TO_BLK((nat_bits_bytes << 1) + 8 +
> +						F2FS_BLKSIZE - 1);
> +	nm_i->nat_bits = kzalloc(nm_i->nat_bits_blocks << F2FS_BLKSIZE_BITS,
> +						GFP_KERNEL);
> +	if (!nm_i->nat_bits)
> +		return -ENOMEM;
> +
> +	nat_bits_addr = __start_cp_addr(sbi) + sbi->blocks_per_seg -
> +						nm_i->nat_bits_blocks;
> +	for (i = 0; i < nm_i->nat_bits_blocks; i++) {
> +		struct page *page = get_meta_page(sbi, nat_bits_addr++);
> +
> +		memcpy(nm_i->nat_bits + (i << F2FS_BLKSIZE_BITS),
> +					page_address(page), F2FS_BLKSIZE);
> +		f2fs_put_page(page, 1);
> +	}
> +
> +	cp_ver |= (crc << 32);
> +	if (cpu_to_le64(cp_ver) != *(__le64 *)nm_i->nat_bits) {
> +		disable_nat_bits(sbi, true);
> +		return 0;
> +	}
> +
> +	nm_i->full_nat_bits = nm_i->nat_bits + 8;
> +	nm_i->empty_nat_bits = nm_i->full_nat_bits + nat_bits_bytes;
> +
> +	f2fs_msg(sbi->sb, KERN_NOTICE, "Found nat_bits in checkpoint");
> +	return 0;
> +}
> +
>  static int init_node_manager(struct f2fs_sb_info *sbi)
>  {
>  	struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi);
>  	struct f2fs_nm_info *nm_i = NM_I(sbi);
>  	unsigned char *version_bitmap;
> -	unsigned int nat_segs, nat_blocks;
> +	unsigned int nat_segs;
> +	int err;
>  
>  	nm_i->nat_blkaddr = le32_to_cpu(sb_raw->nat_blkaddr);
>  
>  	/* segment_count_nat includes pair segment so divide to 2. */
>  	nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1;
> -	nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
> -
> -	nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks;
> +	nm_i->nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
> +	nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nm_i->nat_blocks;
>  
>  	/* not used nids: 0, node, meta, (and root counted as valid node) */
>  	nm_i->available_nids = nm_i->max_nid - sbi->total_valid_node_count -
> @@ -2392,6 +2543,10 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
>  	if (!nm_i->nat_bitmap)
>  		return -ENOMEM;
>  
> +	err = __get_nat_bitmaps(sbi);
> +	if (err)
> +		return err;
> +
>  #ifdef CONFIG_F2FS_CHECK_FS
>  	nm_i->nat_bitmap_mir = kmemdup(version_bitmap, nm_i->bitmap_size,
>  					GFP_KERNEL);
> @@ -2414,7 +2569,7 @@ int build_node_manager(struct f2fs_sb_info *sbi)
>  	if (err)
>  		return err;
>  
> -	build_free_nids(sbi, true);
> +	build_free_nids(sbi, true, true);
>  	return 0;
>  }
>  
> @@ -2473,6 +2628,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
>  	up_write(&nm_i->nat_tree_lock);
>  
>  	kfree(nm_i->nat_bitmap);
> +	kfree(nm_i->nat_bits);
>  #ifdef CONFIG_F2FS_CHECK_FS
>  	kfree(nm_i->nat_bitmap_mir);
>  #endif
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index df2ff5cfe8f4..8e1ec248c653 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -386,7 +386,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
>  	if (!available_free_memory(sbi, FREE_NIDS))
>  		try_to_free_nids(sbi, MAX_FREE_NIDS);
>  	else
> -		build_free_nids(sbi, false);
> +		build_free_nids(sbi, false, false);
>  
>  	if (!is_idle(sbi))
>  		return;
> diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
> index f0748524ca8c..1c92ace2e8f8 100644
> --- a/include/linux/f2fs_fs.h
> +++ b/include/linux/f2fs_fs.h
> @@ -114,6 +114,7 @@ struct f2fs_super_block {
>  /*
>   * For checkpoint
>   */
> +#define CP_NAT_BITS_FLAG	0x00000080
>  #define CP_CRC_RECOVERY_FLAG	0x00000040
>  #define CP_FASTBOOT_FLAG	0x00000020
>  #define CP_FSCK_FLAG		0x00000010
> 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 3/3] f2fs: avoid reading NAT page by get_node_info
  2017-02-14  2:06 ` [PATCH 3/3] f2fs: avoid reading NAT page by get_node_info Jaegeuk Kim
@ 2017-02-23 11:47   ` Chao Yu
  2017-02-23 18:20     ` [f2fs-dev] " Jaegeuk Kim
  0 siblings, 1 reply; 16+ messages in thread
From: Chao Yu @ 2017-02-23 11:47 UTC (permalink / raw)
  To: Jaegeuk Kim, linux-kernel, linux-fsdevel, linux-f2fs-devel

On 2017/2/14 10:06, Jaegeuk Kim wrote:
> We've not seen this buggy case for a long time, so it's time to avoid this
> unnecessary get_node_info() call which reading NAT page to cache nat entry.
> 
> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> ---
>  fs/f2fs/node.c | 11 +++++------
>  1 file changed, 5 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
> index e001d084667f..b63bdb85ad66 100644
> --- a/fs/f2fs/node.c
> +++ b/fs/f2fs/node.c
> @@ -1033,7 +1033,7 @@ struct page *new_node_page(struct dnode_of_data *dn,
>  				unsigned int ofs, struct page *ipage)
>  {
>  	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
> -	struct node_info old_ni, new_ni;
> +	struct node_info new_ni;
>  	struct page *page;
>  	int err;
>  
> @@ -1049,12 +1049,11 @@ struct page *new_node_page(struct dnode_of_data *dn,
>  		goto fail;
>  	}
>  
> -	get_node_info(sbi, dn->nid, &old_ni);
> -
> -	/* Reinitialize old_ni with new node page */
> -	f2fs_bug_on(sbi, old_ni.blk_addr != NULL_ADDR);

What about keeping get_node_info covered with CONFIG_F2FS_CHECK_FS for
consistence as we just introduce two new free nid caches, I think it will be
helpful for detecting bugs of these new codes.

Thanks,

> -	new_ni = old_ni;
> +	new_ni.nid = dn->nid;
>  	new_ni.ino = dn->inode->i_ino;
> +	new_ni.blk_addr = NULL_ADDR;
> +	new_ni.flag = 0;
> +	new_ni.version = 0;
>  	set_node_addr(sbi, &new_ni, NEW_ADDR, false);
>  
>  	f2fs_wait_on_page_writeback(page, NODE, true);
> 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [f2fs-dev] [PATCH 3/3] f2fs: avoid reading NAT page by get_node_info
  2017-02-23 11:47   ` Chao Yu
@ 2017-02-23 18:20     ` Jaegeuk Kim
  0 siblings, 0 replies; 16+ messages in thread
From: Jaegeuk Kim @ 2017-02-23 18:20 UTC (permalink / raw)
  To: Chao Yu; +Cc: linux-kernel, linux-fsdevel, linux-f2fs-devel

On 02/23, Chao Yu wrote:
> On 2017/2/14 10:06, Jaegeuk Kim wrote:
> > We've not seen this buggy case for a long time, so it's time to avoid this
> > unnecessary get_node_info() call which reading NAT page to cache nat entry.
> > 
> > Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> > ---
> >  fs/f2fs/node.c | 11 +++++------
> >  1 file changed, 5 insertions(+), 6 deletions(-)
> > 
> > diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
> > index e001d084667f..b63bdb85ad66 100644
> > --- a/fs/f2fs/node.c
> > +++ b/fs/f2fs/node.c
> > @@ -1033,7 +1033,7 @@ struct page *new_node_page(struct dnode_of_data *dn,
> >  				unsigned int ofs, struct page *ipage)
> >  {
> >  	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
> > -	struct node_info old_ni, new_ni;
> > +	struct node_info new_ni;
> >  	struct page *page;
> >  	int err;
> >  
> > @@ -1049,12 +1049,11 @@ struct page *new_node_page(struct dnode_of_data *dn,
> >  		goto fail;
> >  	}
> >  
> > -	get_node_info(sbi, dn->nid, &old_ni);
> > -
> > -	/* Reinitialize old_ni with new node page */
> > -	f2fs_bug_on(sbi, old_ni.blk_addr != NULL_ADDR);
> 
> What about keeping get_node_info covered with CONFIG_F2FS_CHECK_FS for
> consistence as we just introduce two new free nid caches, I think it will be
> helpful for detecting bugs of these new codes.

Good idea!
I added it like this.

>From a60572a7e9826b26f9634073ba9178762a9e0e7a Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Mon, 13 Feb 2017 17:02:44 -0800
Subject: [PATCH] f2fs: avoid reading NAT page by get_node_info

We've not seen this buggy case for a long time, so it's time to avoid this
unnecessary get_node_info() call which reading NAT page to cache nat entry.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/node.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index baef30e230d9..42bb6f3f4757 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1033,7 +1033,7 @@ struct page *new_node_page(struct dnode_of_data *dn,
 				unsigned int ofs, struct page *ipage)
 {
 	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
-	struct node_info old_ni, new_ni;
+	struct node_info new_ni;
 	struct page *page;
 	int err;
 
@@ -1048,13 +1048,15 @@ struct page *new_node_page(struct dnode_of_data *dn,
 		err = -ENOSPC;
 		goto fail;
 	}
-
-	get_node_info(sbi, dn->nid, &old_ni);
-
-	/* Reinitialize old_ni with new node page */
-	f2fs_bug_on(sbi, old_ni.blk_addr != NULL_ADDR);
-	new_ni = old_ni;
+#ifdef CONFIG_F2FS_CHECK_FS
+	get_node_info(sbi, dn->nid, &new_ni);
+	f2fs_bug_on(sbi, new_ni.blk_addr != NULL_ADDR);
+#endif
+	new_ni.nid = dn->nid;
 	new_ni.ino = dn->inode->i_ino;
+	new_ni.blk_addr = NULL_ADDR;
+	new_ni.flag = 0;
+	new_ni.version = 0;
 	set_node_addr(sbi, &new_ni, NEW_ADDR, false);
 
 	f2fs_wait_on_page_writeback(page, NODE, true);
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [f2fs-dev] [PATCH 2/3] f2fs: add bitmaps for empty or full NAT blocks
  2017-02-23 11:41   ` [f2fs-dev] " Chao Yu
@ 2017-02-23 22:54     ` Jaegeuk Kim
  2017-02-25  3:26       ` Chao Yu
  0 siblings, 1 reply; 16+ messages in thread
From: Jaegeuk Kim @ 2017-02-23 22:54 UTC (permalink / raw)
  To: Chao Yu; +Cc: linux-kernel, linux-fsdevel, linux-f2fs-devel

On 02/23, Chao Yu wrote:
> On 2017/2/14 10:06, Jaegeuk Kim wrote:
> > This patches adds bitmaps to represent empty or full NAT blocks containing
> > free nid entries.
> > 
> > If we can find valid crc|cp_ver in the last block of checkpoint pack, we'll
> > use these bitmaps when building free nids. In order to avoid checkpointing
> > burden, up-to-date bitmaps will be flushed only during umount time. So,
> > normally we can get this gain, but when power-cut happens, we rely on fsck.f2fs
> > which recovers this bitmap again.
> > 
> > After this patch, we build free nids from nid #0 at mount time to make more
> > full NAT blocks, but in runtime, we check empty NAT blocks to load free nids
> > without loading any NAT pages from disk.
> > 
> > Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> > ---
> >  fs/f2fs/checkpoint.c    |  29 +++++++-
> >  fs/f2fs/debug.c         |   1 +
> >  fs/f2fs/f2fs.h          |  23 +++++-
> >  fs/f2fs/node.c          | 188 +++++++++++++++++++++++++++++++++++++++++++-----
> >  fs/f2fs/segment.c       |   2 +-
> >  include/linux/f2fs_fs.h |   1 +
> >  6 files changed, 224 insertions(+), 20 deletions(-)
> > 
> > diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> > index 042f8d9afe44..783c5c3f16a4 100644
> > --- a/fs/f2fs/checkpoint.c
> > +++ b/fs/f2fs/checkpoint.c
> > @@ -1024,6 +1024,10 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
> >  
> >  	spin_lock(&sbi->cp_lock);
> >  
> > +	if (ckpt->cp_pack_total_block_count >
> > +			sbi->blocks_per_seg - NM_I(sbi)->nat_bits_blocks)
> > +		disable_nat_bits(sbi, false);
> 
> I think we need to drop nat full/empty bitmap only if there is no enough space
> in CP area while doing umount, otherwise we can keep this in memory.

Yup.

> 
> > +
> >  	if (cpc->reason == CP_UMOUNT)
> >  		__set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
> >  	else
> > @@ -1136,6 +1140,29 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
> >  
> >  	start_blk = __start_cp_next_addr(sbi);
> >  
> > +	/* write nat bits */

...

> > +static int scan_nat_bits(struct f2fs_sb_info *sbi)
> > +{
> > +	struct f2fs_nm_info *nm_i = NM_I(sbi);
> > +	struct page *page;
> > +	unsigned int i = 0;
> > +	nid_t target = FREE_NID_PAGES * NAT_ENTRY_PER_BLOCK;
> > +	nid_t nid;
> > +
> > +	if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
> > +		return -EAGAIN;
> > +
> > +	down_read(&nm_i->nat_tree_lock);
> > +check_empty:
> > +	i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i);
> > +	if (i >= nm_i->nat_blocks) {
> > +		i = 0;
> > +		goto check_partial;
> > +	}
> > +
> > +	for (nid = i * NAT_ENTRY_PER_BLOCK; nid < (i + 1) * NAT_ENTRY_PER_BLOCK;
> > +									nid++) {
> > +		if (unlikely(nid >= nm_i->max_nid))
> > +			break;
> > +		add_free_nid(sbi, nid, true);
> > +	}
> > +
> > +	if (nm_i->nid_cnt[FREE_NID_LIST] >= target)
> > +		goto out;
> > +	i++;
> > +	goto check_empty;
> > +
> > +check_partial:
> > +	i = find_next_zero_bit_le(nm_i->full_nat_bits, nm_i->nat_blocks, i);
> > +	if (i >= nm_i->nat_blocks) {
> > +		disable_nat_bits(sbi, true);
> 
> Can this happen in real world? Should be a bug in somewhere?

It happens, since current design handles full_nat_bits optionally in order
to avoid scanning a whole NAT page to set it back as 1 from 0.

> 
> > +		return -EINVAL;
> > +	}
> > +
> > +	nid = i * NAT_ENTRY_PER_BLOCK;
> > +	page = get_current_nat_page(sbi, nid);
> > +	scan_nat_page(sbi, page, nid);
> > +	f2fs_put_page(page, 1);
> > +
> > +	if (nm_i->nid_cnt[FREE_NID_LIST] < target) {
> > +		i++;
> > +		goto check_partial;
> > +	}
> > +out:
> > +	up_read(&nm_i->nat_tree_lock);
> > +	return 0;
> > +}
> > +
> > +static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
> >  {
> >  	struct f2fs_nm_info *nm_i = NM_I(sbi);
> >  	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
> > @@ -1854,6 +1911,20 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
> >  	if (!sync && !available_free_memory(sbi, FREE_NIDS))
> >  		return;
> >  
> > +	/* try to find free nids with nat_bits */
> > +	if (!mount && !scan_nat_bits(sbi) && nm_i->nid_cnt[FREE_NID_LIST])
> > +		return;
> > +
> > +	/* find next valid candidate */
> 
> This is just for mount case?

Yup, it reuses free nids in dirty NAT blocks, so that we can make them as full
NAT pages.

Thanks,

> 
> > +	if (is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) {
> > +		int idx = find_next_zero_bit_le(nm_i->full_nat_bits,
> > +					nm_i->nat_blocks, 0);
> > +		if (idx >= nm_i->nat_blocks)
> > +			set_sbi_flag(sbi, SBI_NEED_FSCK);
> > +		else
> > +			nid = idx * NAT_ENTRY_PER_BLOCK;
> > +	}
> > +
> >  	/* readahead nat pages to be scanned */
> >  	ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
> >  							META_NAT, true);
> > @@ -1896,10 +1967,10 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
> >  					nm_i->ra_nid_pages, META_NAT, false);
> >  }
> >  
> > -void build_free_nids(struct f2fs_sb_info *sbi, bool sync)
> > +void build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
> >  {
> >  	mutex_lock(&NM_I(sbi)->build_lock);
> > -	__build_free_nids(sbi, sync);
> > +	__build_free_nids(sbi, sync, mount);
> >  	mutex_unlock(&NM_I(sbi)->build_lock);
> >  }
> >  
> > @@ -1941,7 +2012,7 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
> >  	spin_unlock(&nm_i->nid_list_lock);
> >  
> >  	/* Let's scan nat pages and its caches to get free nids */
> > -	build_free_nids(sbi, true);
> > +	build_free_nids(sbi, true, false);
> >  	goto retry;
> >  }
> >  
> > @@ -2233,8 +2304,39 @@ static void __adjust_nat_entry_set(struct nat_entry_set *nes,
> >  	list_add_tail(&nes->set_list, head);
> >  }
> >  
> > +void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
> > +						struct page *page)
> > +{
> > +	struct f2fs_nm_info *nm_i = NM_I(sbi);
> > +	unsigned int nat_index = start_nid / NAT_ENTRY_PER_BLOCK;
> > +	struct f2fs_nat_block *nat_blk = page_address(page);
> > +	int valid = 0;
> > +	int i;
> > +
> > +	if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
> > +		return;
> > +
> > +	for (i = 0; i < NAT_ENTRY_PER_BLOCK; i++) {
> > +		if (start_nid == 0 && i == 0)
> > +			valid++;
> > +		if (nat_blk->entries[i].block_addr)
> > +			valid++;
> > +	}
> > +	if (valid == 0) {
> > +		test_and_set_bit_le(nat_index, nm_i->empty_nat_bits);
> > +		test_and_clear_bit_le(nat_index, nm_i->full_nat_bits);
> 
> set_bit_le/clear_bit_le
> 
> > +		return;
> > +	}
> > +
> > +	test_and_clear_bit_le(nat_index, nm_i->empty_nat_bits);
> 
> ditto
> 
> > +	if (valid == NAT_ENTRY_PER_BLOCK)
> > +		test_and_set_bit_le(nat_index, nm_i->full_nat_bits);
> > +	else
> > +		test_and_clear_bit_le(nat_index, nm_i->full_nat_bits);
> 
> ditto
> 
> > +}
> > +
> >  static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
> > -					struct nat_entry_set *set)
> > +		struct nat_entry_set *set, struct cp_control *cpc)
> >  {
> >  	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
> >  	struct f2fs_journal *journal = curseg->journal;
> > @@ -2249,7 +2351,8 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
> >  	 * #1, flush nat entries to journal in current hot data summary block.
> >  	 * #2, flush nat entries to nat page.
> >  	 */
> > -	if (!__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL))
> > +	if (cpc->reason == CP_UMOUNT ||
> 
> if ((cpc->reason == CP_UMOUNT && is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) ||
> 
> > +		!__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL))
> >  		to_journal = false;
> >  
> >  	if (to_journal) {
> > @@ -2289,10 +2392,12 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
> >  		}
> >  	}
> >  
> > -	if (to_journal)
> > +	if (to_journal) {
> >  		up_write(&curseg->journal_rwsem);
> > -	else
> > +	} else {
> > +		__update_nat_bits(sbi, start_nid, page);
> >  		f2fs_put_page(page, 1);
> > +	}
> >  
> >  	f2fs_bug_on(sbi, set->entry_cnt);
> >  
> > @@ -2303,7 +2408,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
> >  /*
> >   * This function is called during the checkpointing process.
> >   */
> > -void flush_nat_entries(struct f2fs_sb_info *sbi)
> > +void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
> >  {
> >  	struct f2fs_nm_info *nm_i = NM_I(sbi);
> >  	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
> > @@ -2324,7 +2429,8 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
> >  	 * entries, remove all entries from journal and merge them
> >  	 * into nat entry set.
> >  	 */
> > -	if (!__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
> > +	if (cpc->reason == CP_UMOUNT ||
> 
> if ((cpc->reason == CP_UMOUNT && is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) ||
> 
> > +		!__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
> >  		remove_nats_in_journal(sbi);
> >  
> >  	while ((found = __gang_lookup_nat_set(nm_i,
> > @@ -2338,27 +2444,72 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
> >  
> >  	/* flush dirty nats in nat entry set */
> >  	list_for_each_entry_safe(set, tmp, &sets, set_list)
> > -		__flush_nat_entry_set(sbi, set);
> > +		__flush_nat_entry_set(sbi, set, cpc);
> >  
> >  	up_write(&nm_i->nat_tree_lock);
> >  
> >  	f2fs_bug_on(sbi, nm_i->dirty_nat_cnt);
> >  }
> >  
> > +static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
> > +{
> > +	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
> > +	struct f2fs_nm_info *nm_i = NM_I(sbi);
> > +	unsigned int nat_bits_bytes = nm_i->nat_blocks / BITS_PER_BYTE;
> > +	unsigned int i;
> > +	__u64 cp_ver = le64_to_cpu(ckpt->checkpoint_ver);
> 
> __u64 cp_ver = cur_cp_version(ckpt);
> 
> Thanks,
> 
> > +	size_t crc_offset = le32_to_cpu(ckpt->checksum_offset);
> > +	__u64 crc = le32_to_cpu(*((__le32 *)
> > +				((unsigned char *)ckpt + crc_offset)));
> > +	block_t nat_bits_addr;
> > +
> > +	if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
> > +		return 0;
> > +
> > +	nm_i->nat_bits_blocks = F2FS_BYTES_TO_BLK((nat_bits_bytes << 1) + 8 +
> > +						F2FS_BLKSIZE - 1);
> > +	nm_i->nat_bits = kzalloc(nm_i->nat_bits_blocks << F2FS_BLKSIZE_BITS,
> > +						GFP_KERNEL);
> > +	if (!nm_i->nat_bits)
> > +		return -ENOMEM;
> > +
> > +	nat_bits_addr = __start_cp_addr(sbi) + sbi->blocks_per_seg -
> > +						nm_i->nat_bits_blocks;
> > +	for (i = 0; i < nm_i->nat_bits_blocks; i++) {
> > +		struct page *page = get_meta_page(sbi, nat_bits_addr++);
> > +
> > +		memcpy(nm_i->nat_bits + (i << F2FS_BLKSIZE_BITS),
> > +					page_address(page), F2FS_BLKSIZE);
> > +		f2fs_put_page(page, 1);
> > +	}
> > +
> > +	cp_ver |= (crc << 32);
> > +	if (cpu_to_le64(cp_ver) != *(__le64 *)nm_i->nat_bits) {
> > +		disable_nat_bits(sbi, true);
> > +		return 0;
> > +	}
> > +
> > +	nm_i->full_nat_bits = nm_i->nat_bits + 8;
> > +	nm_i->empty_nat_bits = nm_i->full_nat_bits + nat_bits_bytes;
> > +
> > +	f2fs_msg(sbi->sb, KERN_NOTICE, "Found nat_bits in checkpoint");
> > +	return 0;
> > +}
> > +
> >  static int init_node_manager(struct f2fs_sb_info *sbi)
> >  {
> >  	struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi);
> >  	struct f2fs_nm_info *nm_i = NM_I(sbi);
> >  	unsigned char *version_bitmap;
> > -	unsigned int nat_segs, nat_blocks;
> > +	unsigned int nat_segs;
> > +	int err;
> >  
> >  	nm_i->nat_blkaddr = le32_to_cpu(sb_raw->nat_blkaddr);
> >  
> >  	/* segment_count_nat includes pair segment so divide to 2. */
> >  	nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1;
> > -	nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
> > -
> > -	nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks;
> > +	nm_i->nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
> > +	nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nm_i->nat_blocks;
> >  
> >  	/* not used nids: 0, node, meta, (and root counted as valid node) */
> >  	nm_i->available_nids = nm_i->max_nid - sbi->total_valid_node_count -
> > @@ -2392,6 +2543,10 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
> >  	if (!nm_i->nat_bitmap)
> >  		return -ENOMEM;
> >  
> > +	err = __get_nat_bitmaps(sbi);
> > +	if (err)
> > +		return err;
> > +
> >  #ifdef CONFIG_F2FS_CHECK_FS
> >  	nm_i->nat_bitmap_mir = kmemdup(version_bitmap, nm_i->bitmap_size,
> >  					GFP_KERNEL);
> > @@ -2414,7 +2569,7 @@ int build_node_manager(struct f2fs_sb_info *sbi)
> >  	if (err)
> >  		return err;
> >  
> > -	build_free_nids(sbi, true);
> > +	build_free_nids(sbi, true, true);
> >  	return 0;
> >  }
> >  
> > @@ -2473,6 +2628,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
> >  	up_write(&nm_i->nat_tree_lock);
> >  
> >  	kfree(nm_i->nat_bitmap);
> > +	kfree(nm_i->nat_bits);
> >  #ifdef CONFIG_F2FS_CHECK_FS
> >  	kfree(nm_i->nat_bitmap_mir);
> >  #endif
> > diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> > index df2ff5cfe8f4..8e1ec248c653 100644
> > --- a/fs/f2fs/segment.c
> > +++ b/fs/f2fs/segment.c
> > @@ -386,7 +386,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
> >  	if (!available_free_memory(sbi, FREE_NIDS))
> >  		try_to_free_nids(sbi, MAX_FREE_NIDS);
> >  	else
> > -		build_free_nids(sbi, false);
> > +		build_free_nids(sbi, false, false);
> >  
> >  	if (!is_idle(sbi))
> >  		return;
> > diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
> > index f0748524ca8c..1c92ace2e8f8 100644
> > --- a/include/linux/f2fs_fs.h
> > +++ b/include/linux/f2fs_fs.h
> > @@ -114,6 +114,7 @@ struct f2fs_super_block {
> >  /*
> >   * For checkpoint
> >   */
> > +#define CP_NAT_BITS_FLAG	0x00000080
> >  #define CP_CRC_RECOVERY_FLAG	0x00000040
> >  #define CP_FASTBOOT_FLAG	0x00000020
> >  #define CP_FSCK_FLAG		0x00000010
> > 
> 
> 
> ------------------------------------------------------------------------------
> Check out the vibrant tech community on one of the world's most
> engaging tech sites, SlashDot.org! http://sdm.link/slashdot
> _______________________________________________
> Linux-f2fs-devel mailing list
> Linux-f2fs-devel@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/3 v2] f2fs: add bitmaps for empty or full NAT blocks
  2017-02-14  2:06 ` [PATCH 2/3] f2fs: add bitmaps for empty or full NAT blocks Jaegeuk Kim
  2017-02-23 11:41   ` [f2fs-dev] " Chao Yu
@ 2017-02-23 22:58   ` Jaegeuk Kim
  2017-02-28  3:34     ` [f2fs-dev] " Chao Yu
  1 sibling, 1 reply; 16+ messages in thread
From: Jaegeuk Kim @ 2017-02-23 22:58 UTC (permalink / raw)
  To: linux-kernel, linux-fsdevel, linux-f2fs-devel

Change log from v1:
 - modified some condition checks suggested by Chao

This patches adds bitmaps to represent empty or full NAT blocks containing
free nid entries.

If we can find valid crc|cp_ver in the last block of checkpoint pack, we'll
use these bitmaps when building free nids. In order to avoid checkpointing
burden, up-to-date bitmaps will be flushed only during umount time. So,
normally we can get this gain, but when power-cut happens, we rely on fsck.f2fs
which recovers this bitmap again.

After this patch, we build free nids from nid #0 at mount time to make more
full NAT blocks, but in runtime, we check empty NAT blocks to load free nids
without loading any NAT pages from disk.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/checkpoint.c    |  28 +++++++-
 fs/f2fs/debug.c         |   1 +
 fs/f2fs/f2fs.h          |  31 +++++++-
 fs/f2fs/node.c          | 188 +++++++++++++++++++++++++++++++++++++++++++-----
 fs/f2fs/segment.c       |   2 +-
 include/linux/f2fs_fs.h |   1 +
 6 files changed, 231 insertions(+), 20 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 042f8d9afe44..cd7132121573 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -1024,6 +1024,10 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 
 	spin_lock(&sbi->cp_lock);
 
+	if (cpc->reason == CP_UMOUNT && ckpt->cp_pack_total_block_count >
+			sbi->blocks_per_seg - NM_I(sbi)->nat_bits_blocks)
+		disable_nat_bits(sbi, false);
+
 	if (cpc->reason == CP_UMOUNT)
 		__set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
 	else
@@ -1136,6 +1140,28 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 
 	start_blk = __start_cp_next_addr(sbi);
 
+	/* write nat bits */
+	if (enabled_nat_bits(sbi, cpc)) {
+		__u64 cp_ver = cur_cp_version(ckpt);
+		unsigned int i;
+		block_t blk;
+
+		cp_ver |= ((__u64)crc32 << 32);
+		*(__le64 *)nm_i->nat_bits = cpu_to_le64(cp_ver);
+
+		blk = start_blk + sbi->blocks_per_seg - nm_i->nat_bits_blocks;
+		for (i = 0; i < nm_i->nat_bits_blocks; i++)
+			update_meta_page(sbi, nm_i->nat_bits +
+					(i << F2FS_BLKSIZE_BITS), blk + i);
+
+		/* Flush all the NAT BITS pages */
+		while (get_pages(sbi, F2FS_DIRTY_META)) {
+			sync_meta_pages(sbi, META, LONG_MAX);
+			if (unlikely(f2fs_cp_error(sbi)))
+				return -EIO;
+		}
+	}
+
 	/* need to wait for end_io results */
 	wait_on_all_pages_writeback(sbi);
 	if (unlikely(f2fs_cp_error(sbi)))
@@ -1272,7 +1298,7 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 	ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver);
 
 	/* write cached NAT/SIT entries to NAT/SIT area */
-	flush_nat_entries(sbi);
+	flush_nat_entries(sbi, cpc);
 	flush_sit_entries(sbi, cpc);
 
 	/* unlock all the fs_lock[] in do_checkpoint() */
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index de8da9fc5c99..015ad2b73a92 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -193,6 +193,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
 	/* build nm */
 	si->base_mem += sizeof(struct f2fs_nm_info);
 	si->base_mem += __bitmap_size(sbi, NAT_BITMAP);
+	si->base_mem += (NM_I(sbi)->nat_bits_blocks << F2FS_BLKSIZE_BITS);
 
 get_cache:
 	si->cache_mem = 0;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index e26cc6909a54..d1156cdd211e 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -554,6 +554,7 @@ struct f2fs_nm_info {
 	struct list_head nat_entries;	/* cached nat entry list (clean) */
 	unsigned int nat_cnt;		/* the # of cached nat entries */
 	unsigned int dirty_nat_cnt;	/* total num of nat entries in set */
+	unsigned int nat_blocks;	/* # of nat blocks */
 
 	/* free node ids management */
 	struct radix_tree_root free_nid_root;/* root of the free_nid cache */
@@ -564,6 +565,11 @@ struct f2fs_nm_info {
 
 	/* for checkpoint */
 	char *nat_bitmap;		/* NAT bitmap pointer */
+
+	unsigned int nat_bits_blocks;	/* # of nat bits blocks */
+	unsigned char *nat_bits;	/* NAT bits blocks */
+	unsigned char *full_nat_bits;	/* full NAT pages */
+	unsigned char *empty_nat_bits;	/* empty NAT pages */
 #ifdef CONFIG_F2FS_CHECK_FS
 	char *nat_bitmap_mir;		/* NAT bitmap mirror */
 #endif
@@ -1171,6 +1177,27 @@ static inline void clear_ckpt_flags(struct f2fs_sb_info *sbi, unsigned int f)
 	spin_unlock(&sbi->cp_lock);
 }
 
+static inline void disable_nat_bits(struct f2fs_sb_info *sbi, bool lock)
+{
+	set_sbi_flag(sbi, SBI_NEED_FSCK);
+
+	if (lock)
+		spin_lock(&sbi->cp_lock);
+	__clear_ckpt_flags(F2FS_CKPT(sbi), CP_NAT_BITS_FLAG);
+	kfree(NM_I(sbi)->nat_bits);
+	NM_I(sbi)->nat_bits = NULL;
+	if (lock)
+		spin_unlock(&sbi->cp_lock);
+}
+
+static inline bool enabled_nat_bits(struct f2fs_sb_info *sbi,
+					struct cp_control *cpc)
+{
+	bool set = is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG);
+
+	return (cpc) ? (cpc->reason == CP_UMOUNT) && set : set;
+}
+
 static inline void f2fs_lock_op(struct f2fs_sb_info *sbi)
 {
 	down_read(&sbi->cp_rwsem);
@@ -2131,7 +2158,7 @@ void move_node_page(struct page *node_page, int gc_type);
 int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
 			struct writeback_control *wbc, bool atomic);
 int sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc);
-void build_free_nids(struct f2fs_sb_info *sbi, bool sync);
+void build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount);
 bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid);
 void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid);
 void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid);
@@ -2142,7 +2169,7 @@ int recover_xattr_data(struct inode *inode, struct page *page,
 int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page);
 int restore_node_summary(struct f2fs_sb_info *sbi,
 			unsigned int segno, struct f2fs_summary_block *sum);
-void flush_nat_entries(struct f2fs_sb_info *sbi);
+void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc);
 int build_node_manager(struct f2fs_sb_info *sbi);
 void destroy_node_manager(struct f2fs_sb_info *sbi);
 int __init create_node_manager_caches(void);
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 8ebc4c78e6a4..43d35ec11851 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -338,6 +338,9 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
 		set_nat_flag(e, IS_CHECKPOINTED, false);
 	__set_nat_cache_dirty(nm_i, e);
 
+	if (enabled_nat_bits(sbi, NULL) && new_blkaddr == NEW_ADDR)
+		clear_bit_le(NAT_BLOCK_OFFSET(ni->nid), nm_i->empty_nat_bits);
+
 	/* update fsync_mark if its inode nat entry is still alive */
 	if (ni->nid != ni->ino)
 		e = __lookup_nat_cache(nm_i, ni->ino);
@@ -1841,7 +1844,60 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
 	}
 }
 
-static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
+static int scan_nat_bits(struct f2fs_sb_info *sbi)
+{
+	struct f2fs_nm_info *nm_i = NM_I(sbi);
+	struct page *page;
+	unsigned int i = 0;
+	nid_t target = FREE_NID_PAGES * NAT_ENTRY_PER_BLOCK;
+	nid_t nid;
+
+	if (!enabled_nat_bits(sbi, NULL))
+		return -EAGAIN;
+
+	down_read(&nm_i->nat_tree_lock);
+check_empty:
+	i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i);
+	if (i >= nm_i->nat_blocks) {
+		i = 0;
+		goto check_partial;
+	}
+
+	for (nid = i * NAT_ENTRY_PER_BLOCK; nid < (i + 1) * NAT_ENTRY_PER_BLOCK;
+									nid++) {
+		if (unlikely(nid >= nm_i->max_nid))
+			break;
+		add_free_nid(sbi, nid, true);
+	}
+
+	if (nm_i->nid_cnt[FREE_NID_LIST] >= target)
+		goto out;
+	i++;
+	goto check_empty;
+
+check_partial:
+	i = find_next_zero_bit_le(nm_i->full_nat_bits, nm_i->nat_blocks, i);
+	if (i >= nm_i->nat_blocks) {
+		disable_nat_bits(sbi, true);
+		up_read(&nm_i->nat_tree_lock);
+		return -EINVAL;
+	}
+
+	nid = i * NAT_ENTRY_PER_BLOCK;
+	page = get_current_nat_page(sbi, nid);
+	scan_nat_page(sbi, page, nid);
+	f2fs_put_page(page, 1);
+
+	if (nm_i->nid_cnt[FREE_NID_LIST] < target) {
+		i++;
+		goto check_partial;
+	}
+out:
+	up_read(&nm_i->nat_tree_lock);
+	return 0;
+}
+
+static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
 {
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
 	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
@@ -1856,6 +1912,21 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
 	if (!sync && !available_free_memory(sbi, FREE_NIDS))
 		return;
 
+	/* try to find free nids with nat_bits */
+	if (!mount && !scan_nat_bits(sbi) && nm_i->nid_cnt[FREE_NID_LIST])
+		return;
+
+	/* find next valid candidate */
+	if (enabled_nat_bits(sbi, NULL)) {
+		int idx = find_next_zero_bit_le(nm_i->full_nat_bits,
+					nm_i->nat_blocks, 0);
+
+		if (idx >= nm_i->nat_blocks)
+			set_sbi_flag(sbi, SBI_NEED_FSCK);
+		else
+			nid = idx * NAT_ENTRY_PER_BLOCK;
+	}
+
 	/* readahead nat pages to be scanned */
 	ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
 							META_NAT, true);
@@ -1898,10 +1969,10 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
 					nm_i->ra_nid_pages, META_NAT, false);
 }
 
-void build_free_nids(struct f2fs_sb_info *sbi, bool sync)
+void build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
 {
 	mutex_lock(&NM_I(sbi)->build_lock);
-	__build_free_nids(sbi, sync);
+	__build_free_nids(sbi, sync, mount);
 	mutex_unlock(&NM_I(sbi)->build_lock);
 }
 
@@ -1943,7 +2014,7 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
 	spin_unlock(&nm_i->nid_list_lock);
 
 	/* Let's scan nat pages and its caches to get free nids */
-	build_free_nids(sbi, true);
+	build_free_nids(sbi, true, false);
 	goto retry;
 }
 
@@ -2235,8 +2306,39 @@ static void __adjust_nat_entry_set(struct nat_entry_set *nes,
 	list_add_tail(&nes->set_list, head);
 }
 
+void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
+						struct page *page)
+{
+	struct f2fs_nm_info *nm_i = NM_I(sbi);
+	unsigned int nat_index = start_nid / NAT_ENTRY_PER_BLOCK;
+	struct f2fs_nat_block *nat_blk = page_address(page);
+	int valid = 0;
+	int i;
+
+	if (!enabled_nat_bits(sbi, NULL))
+		return;
+
+	for (i = 0; i < NAT_ENTRY_PER_BLOCK; i++) {
+		if (start_nid == 0 && i == 0)
+			valid++;
+		if (nat_blk->entries[i].block_addr)
+			valid++;
+	}
+	if (valid == 0) {
+		set_bit_le(nat_index, nm_i->empty_nat_bits);
+		clear_bit_le(nat_index, nm_i->full_nat_bits);
+		return;
+	}
+
+	clear_bit_le(nat_index, nm_i->empty_nat_bits);
+	if (valid == NAT_ENTRY_PER_BLOCK)
+		set_bit_le(nat_index, nm_i->full_nat_bits);
+	else
+		clear_bit_le(nat_index, nm_i->full_nat_bits);
+}
+
 static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
-					struct nat_entry_set *set)
+		struct nat_entry_set *set, struct cp_control *cpc)
 {
 	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
 	struct f2fs_journal *journal = curseg->journal;
@@ -2251,7 +2353,8 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
 	 * #1, flush nat entries to journal in current hot data summary block.
 	 * #2, flush nat entries to nat page.
 	 */
-	if (!__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL))
+	if (enabled_nat_bits(sbi, cpc) ||
+		!__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL))
 		to_journal = false;
 
 	if (to_journal) {
@@ -2291,10 +2394,12 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
 		}
 	}
 
-	if (to_journal)
+	if (to_journal) {
 		up_write(&curseg->journal_rwsem);
-	else
+	} else {
+		__update_nat_bits(sbi, start_nid, page);
 		f2fs_put_page(page, 1);
+	}
 
 	f2fs_bug_on(sbi, set->entry_cnt);
 
@@ -2305,7 +2410,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
 /*
  * This function is called during the checkpointing process.
  */
-void flush_nat_entries(struct f2fs_sb_info *sbi)
+void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 {
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
 	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
@@ -2326,7 +2431,8 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
 	 * entries, remove all entries from journal and merge them
 	 * into nat entry set.
 	 */
-	if (!__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
+	if (cpc->reason == CP_UMOUNT ||
+		!__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
 		remove_nats_in_journal(sbi);
 
 	while ((found = __gang_lookup_nat_set(nm_i,
@@ -2340,27 +2446,72 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
 
 	/* flush dirty nats in nat entry set */
 	list_for_each_entry_safe(set, tmp, &sets, set_list)
-		__flush_nat_entry_set(sbi, set);
+		__flush_nat_entry_set(sbi, set, cpc);
 
 	up_write(&nm_i->nat_tree_lock);
 
 	f2fs_bug_on(sbi, nm_i->dirty_nat_cnt);
 }
 
+static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
+{
+	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
+	struct f2fs_nm_info *nm_i = NM_I(sbi);
+	unsigned int nat_bits_bytes = nm_i->nat_blocks / BITS_PER_BYTE;
+	unsigned int i;
+	__u64 cp_ver = cur_cp_version(ckpt);
+	size_t crc_offset = le32_to_cpu(ckpt->checksum_offset);
+	__u64 crc = le32_to_cpu(*((__le32 *)
+				((unsigned char *)ckpt + crc_offset)));
+	block_t nat_bits_addr;
+
+	if (!enabled_nat_bits(sbi, NULL))
+		return 0;
+
+	nm_i->nat_bits_blocks = F2FS_BYTES_TO_BLK((nat_bits_bytes << 1) + 8 +
+						F2FS_BLKSIZE - 1);
+	nm_i->nat_bits = kzalloc(nm_i->nat_bits_blocks << F2FS_BLKSIZE_BITS,
+						GFP_KERNEL);
+	if (!nm_i->nat_bits)
+		return -ENOMEM;
+
+	nat_bits_addr = __start_cp_addr(sbi) + sbi->blocks_per_seg -
+						nm_i->nat_bits_blocks;
+	for (i = 0; i < nm_i->nat_bits_blocks; i++) {
+		struct page *page = get_meta_page(sbi, nat_bits_addr++);
+
+		memcpy(nm_i->nat_bits + (i << F2FS_BLKSIZE_BITS),
+					page_address(page), F2FS_BLKSIZE);
+		f2fs_put_page(page, 1);
+	}
+
+	cp_ver |= (crc << 32);
+	if (cpu_to_le64(cp_ver) != *(__le64 *)nm_i->nat_bits) {
+		disable_nat_bits(sbi, true);
+		return 0;
+	}
+
+	nm_i->full_nat_bits = nm_i->nat_bits + 8;
+	nm_i->empty_nat_bits = nm_i->full_nat_bits + nat_bits_bytes;
+
+	f2fs_msg(sbi->sb, KERN_NOTICE, "Found nat_bits in checkpoint");
+	return 0;
+}
+
 static int init_node_manager(struct f2fs_sb_info *sbi)
 {
 	struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi);
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
 	unsigned char *version_bitmap;
-	unsigned int nat_segs, nat_blocks;
+	unsigned int nat_segs;
+	int err;
 
 	nm_i->nat_blkaddr = le32_to_cpu(sb_raw->nat_blkaddr);
 
 	/* segment_count_nat includes pair segment so divide to 2. */
 	nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1;
-	nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
-
-	nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks;
+	nm_i->nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
+	nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nm_i->nat_blocks;
 
 	/* not used nids: 0, node, meta, (and root counted as valid node) */
 	nm_i->available_nids = nm_i->max_nid - sbi->total_valid_node_count -
@@ -2394,6 +2545,10 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
 	if (!nm_i->nat_bitmap)
 		return -ENOMEM;
 
+	err = __get_nat_bitmaps(sbi);
+	if (err)
+		return err;
+
 #ifdef CONFIG_F2FS_CHECK_FS
 	nm_i->nat_bitmap_mir = kmemdup(version_bitmap, nm_i->bitmap_size,
 					GFP_KERNEL);
@@ -2416,7 +2571,7 @@ int build_node_manager(struct f2fs_sb_info *sbi)
 	if (err)
 		return err;
 
-	build_free_nids(sbi, true);
+	build_free_nids(sbi, true, true);
 	return 0;
 }
 
@@ -2475,6 +2630,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
 	up_write(&nm_i->nat_tree_lock);
 
 	kfree(nm_i->nat_bitmap);
+	kfree(nm_i->nat_bits);
 #ifdef CONFIG_F2FS_CHECK_FS
 	kfree(nm_i->nat_bitmap_mir);
 #endif
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 3e95db5375ed..9eb6d89bf9e2 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -386,7 +386,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
 	if (!available_free_memory(sbi, FREE_NIDS))
 		try_to_free_nids(sbi, MAX_FREE_NIDS);
 	else
-		build_free_nids(sbi, false);
+		build_free_nids(sbi, false, false);
 
 	if (!is_idle(sbi))
 		return;
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index f0748524ca8c..1c92ace2e8f8 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -114,6 +114,7 @@ struct f2fs_super_block {
 /*
  * For checkpoint
  */
+#define CP_NAT_BITS_FLAG	0x00000080
 #define CP_CRC_RECOVERY_FLAG	0x00000040
 #define CP_FASTBOOT_FLAG	0x00000020
 #define CP_FSCK_FLAG		0x00000010
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [f2fs-dev] [PATCH 2/3] f2fs: add bitmaps for empty or full NAT blocks
  2017-02-23 22:54     ` Jaegeuk Kim
@ 2017-02-25  3:26       ` Chao Yu
  2017-02-25 18:34         ` Jaegeuk Kim
  0 siblings, 1 reply; 16+ messages in thread
From: Chao Yu @ 2017-02-25  3:26 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: linux-kernel, linux-fsdevel, linux-f2fs-devel

On 2017/2/24 6:54, Jaegeuk Kim wrote:
> On 02/23, Chao Yu wrote:
>> On 2017/2/14 10:06, Jaegeuk Kim wrote:
>>> This patches adds bitmaps to represent empty or full NAT blocks containing
>>> free nid entries.
>>>
>>> If we can find valid crc|cp_ver in the last block of checkpoint pack, we'll
>>> use these bitmaps when building free nids. In order to avoid checkpointing
>>> burden, up-to-date bitmaps will be flushed only during umount time. So,
>>> normally we can get this gain, but when power-cut happens, we rely on fsck.f2fs
>>> which recovers this bitmap again.
>>>
>>> After this patch, we build free nids from nid #0 at mount time to make more
>>> full NAT blocks, but in runtime, we check empty NAT blocks to load free nids
>>> without loading any NAT pages from disk.
>>>
>>> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
>>> ---
>>>  fs/f2fs/checkpoint.c    |  29 +++++++-
>>>  fs/f2fs/debug.c         |   1 +
>>>  fs/f2fs/f2fs.h          |  23 +++++-
>>>  fs/f2fs/node.c          | 188 +++++++++++++++++++++++++++++++++++++++++++-----
>>>  fs/f2fs/segment.c       |   2 +-
>>>  include/linux/f2fs_fs.h |   1 +
>>>  6 files changed, 224 insertions(+), 20 deletions(-)
>>>
>>> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
>>> index 042f8d9afe44..783c5c3f16a4 100644
>>> --- a/fs/f2fs/checkpoint.c
>>> +++ b/fs/f2fs/checkpoint.c
>>> @@ -1024,6 +1024,10 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>>>  
>>>  	spin_lock(&sbi->cp_lock);
>>>  
>>> +	if (ckpt->cp_pack_total_block_count >
>>> +			sbi->blocks_per_seg - NM_I(sbi)->nat_bits_blocks)
>>> +		disable_nat_bits(sbi, false);
>>
>> I think we need to drop nat full/empty bitmap only if there is no enough space
>> in CP area while doing umount, otherwise we can keep this in memory.
> 
> Yup.
> 
>>
>>> +
>>>  	if (cpc->reason == CP_UMOUNT)
>>>  		__set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
>>>  	else
>>> @@ -1136,6 +1140,29 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>>>  
>>>  	start_blk = __start_cp_next_addr(sbi);
>>>  
>>> +	/* write nat bits */
> 
> ...
> 
>>> +static int scan_nat_bits(struct f2fs_sb_info *sbi)
>>> +{
>>> +	struct f2fs_nm_info *nm_i = NM_I(sbi);
>>> +	struct page *page;
>>> +	unsigned int i = 0;
>>> +	nid_t target = FREE_NID_PAGES * NAT_ENTRY_PER_BLOCK;
>>> +	nid_t nid;
>>> +
>>> +	if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
>>> +		return -EAGAIN;
>>> +
>>> +	down_read(&nm_i->nat_tree_lock);
>>> +check_empty:
>>> +	i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i);
>>> +	if (i >= nm_i->nat_blocks) {
>>> +		i = 0;
>>> +		goto check_partial;
>>> +	}
>>> +
>>> +	for (nid = i * NAT_ENTRY_PER_BLOCK; nid < (i + 1) * NAT_ENTRY_PER_BLOCK;
>>> +									nid++) {
>>> +		if (unlikely(nid >= nm_i->max_nid))
>>> +			break;
>>> +		add_free_nid(sbi, nid, true);
>>> +	}
>>> +
>>> +	if (nm_i->nid_cnt[FREE_NID_LIST] >= target)
>>> +		goto out;
>>> +	i++;
>>> +	goto check_empty;
>>> +
>>> +check_partial:
>>> +	i = find_next_zero_bit_le(nm_i->full_nat_bits, nm_i->nat_blocks, i);
>>> +	if (i >= nm_i->nat_blocks) {
>>> +		disable_nat_bits(sbi, true);
>>
>> Can this happen in real world? Should be a bug in somewhere?
> 
> It happens, since current design handles full_nat_bits optionally in order
> to avoid scanning a whole NAT page to set it back as 1 from 0.

All 0 value in full_nat_bits means the NAT block has at least one free nid,
right? so if we traverse all such NAT blocks, and still haven't collect enough
*target* number of free nids, we don't have to disable nat bit cache, we can
just break out here.

Thanks,

> 
>>
>>> +		return -EINVAL;
>>> +	}
>>> +
>>> +	nid = i * NAT_ENTRY_PER_BLOCK;
>>> +	page = get_current_nat_page(sbi, nid);
>>> +	scan_nat_page(sbi, page, nid);
>>> +	f2fs_put_page(page, 1);
>>> +
>>> +	if (nm_i->nid_cnt[FREE_NID_LIST] < target) {
>>> +		i++;
>>> +		goto check_partial;
>>> +	}
>>> +out:
>>> +	up_read(&nm_i->nat_tree_lock);
>>> +	return 0;
>>> +}
>>> +
>>> +static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
>>>  {
>>>  	struct f2fs_nm_info *nm_i = NM_I(sbi);
>>>  	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
>>> @@ -1854,6 +1911,20 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
>>>  	if (!sync && !available_free_memory(sbi, FREE_NIDS))
>>>  		return;
>>>  
>>> +	/* try to find free nids with nat_bits */
>>> +	if (!mount && !scan_nat_bits(sbi) && nm_i->nid_cnt[FREE_NID_LIST])
>>> +		return;
>>> +
>>> +	/* find next valid candidate */
>>
>> This is just for mount case?
> 
> Yup, it reuses free nids in dirty NAT blocks, so that we can make them as full
> NAT pages.
> 
> Thanks,
> 
>>
>>> +	if (is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) {
>>> +		int idx = find_next_zero_bit_le(nm_i->full_nat_bits,
>>> +					nm_i->nat_blocks, 0);
>>> +		if (idx >= nm_i->nat_blocks)
>>> +			set_sbi_flag(sbi, SBI_NEED_FSCK);
>>> +		else
>>> +			nid = idx * NAT_ENTRY_PER_BLOCK;
>>> +	}
>>> +
>>>  	/* readahead nat pages to be scanned */
>>>  	ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
>>>  							META_NAT, true);
>>> @@ -1896,10 +1967,10 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
>>>  					nm_i->ra_nid_pages, META_NAT, false);
>>>  }
>>>  
>>> -void build_free_nids(struct f2fs_sb_info *sbi, bool sync)
>>> +void build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
>>>  {
>>>  	mutex_lock(&NM_I(sbi)->build_lock);
>>> -	__build_free_nids(sbi, sync);
>>> +	__build_free_nids(sbi, sync, mount);
>>>  	mutex_unlock(&NM_I(sbi)->build_lock);
>>>  }
>>>  
>>> @@ -1941,7 +2012,7 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
>>>  	spin_unlock(&nm_i->nid_list_lock);
>>>  
>>>  	/* Let's scan nat pages and its caches to get free nids */
>>> -	build_free_nids(sbi, true);
>>> +	build_free_nids(sbi, true, false);
>>>  	goto retry;
>>>  }
>>>  
>>> @@ -2233,8 +2304,39 @@ static void __adjust_nat_entry_set(struct nat_entry_set *nes,
>>>  	list_add_tail(&nes->set_list, head);
>>>  }
>>>  
>>> +void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
>>> +						struct page *page)
>>> +{
>>> +	struct f2fs_nm_info *nm_i = NM_I(sbi);
>>> +	unsigned int nat_index = start_nid / NAT_ENTRY_PER_BLOCK;
>>> +	struct f2fs_nat_block *nat_blk = page_address(page);
>>> +	int valid = 0;
>>> +	int i;
>>> +
>>> +	if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
>>> +		return;
>>> +
>>> +	for (i = 0; i < NAT_ENTRY_PER_BLOCK; i++) {
>>> +		if (start_nid == 0 && i == 0)
>>> +			valid++;
>>> +		if (nat_blk->entries[i].block_addr)
>>> +			valid++;
>>> +	}
>>> +	if (valid == 0) {
>>> +		test_and_set_bit_le(nat_index, nm_i->empty_nat_bits);
>>> +		test_and_clear_bit_le(nat_index, nm_i->full_nat_bits);
>>
>> set_bit_le/clear_bit_le
>>
>>> +		return;
>>> +	}
>>> +
>>> +	test_and_clear_bit_le(nat_index, nm_i->empty_nat_bits);
>>
>> ditto
>>
>>> +	if (valid == NAT_ENTRY_PER_BLOCK)
>>> +		test_and_set_bit_le(nat_index, nm_i->full_nat_bits);
>>> +	else
>>> +		test_and_clear_bit_le(nat_index, nm_i->full_nat_bits);
>>
>> ditto
>>
>>> +}
>>> +
>>>  static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
>>> -					struct nat_entry_set *set)
>>> +		struct nat_entry_set *set, struct cp_control *cpc)
>>>  {
>>>  	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
>>>  	struct f2fs_journal *journal = curseg->journal;
>>> @@ -2249,7 +2351,8 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
>>>  	 * #1, flush nat entries to journal in current hot data summary block.
>>>  	 * #2, flush nat entries to nat page.
>>>  	 */
>>> -	if (!__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL))
>>> +	if (cpc->reason == CP_UMOUNT ||
>>
>> if ((cpc->reason == CP_UMOUNT && is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) ||
>>
>>> +		!__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL))
>>>  		to_journal = false;
>>>  
>>>  	if (to_journal) {
>>> @@ -2289,10 +2392,12 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
>>>  		}
>>>  	}
>>>  
>>> -	if (to_journal)
>>> +	if (to_journal) {
>>>  		up_write(&curseg->journal_rwsem);
>>> -	else
>>> +	} else {
>>> +		__update_nat_bits(sbi, start_nid, page);
>>>  		f2fs_put_page(page, 1);
>>> +	}
>>>  
>>>  	f2fs_bug_on(sbi, set->entry_cnt);
>>>  
>>> @@ -2303,7 +2408,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
>>>  /*
>>>   * This function is called during the checkpointing process.
>>>   */
>>> -void flush_nat_entries(struct f2fs_sb_info *sbi)
>>> +void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>>>  {
>>>  	struct f2fs_nm_info *nm_i = NM_I(sbi);
>>>  	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
>>> @@ -2324,7 +2429,8 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
>>>  	 * entries, remove all entries from journal and merge them
>>>  	 * into nat entry set.
>>>  	 */
>>> -	if (!__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
>>> +	if (cpc->reason == CP_UMOUNT ||
>>
>> if ((cpc->reason == CP_UMOUNT && is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) ||
>>
>>> +		!__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
>>>  		remove_nats_in_journal(sbi);
>>>  
>>>  	while ((found = __gang_lookup_nat_set(nm_i,
>>> @@ -2338,27 +2444,72 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
>>>  
>>>  	/* flush dirty nats in nat entry set */
>>>  	list_for_each_entry_safe(set, tmp, &sets, set_list)
>>> -		__flush_nat_entry_set(sbi, set);
>>> +		__flush_nat_entry_set(sbi, set, cpc);
>>>  
>>>  	up_write(&nm_i->nat_tree_lock);
>>>  
>>>  	f2fs_bug_on(sbi, nm_i->dirty_nat_cnt);
>>>  }
>>>  
>>> +static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
>>> +{
>>> +	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
>>> +	struct f2fs_nm_info *nm_i = NM_I(sbi);
>>> +	unsigned int nat_bits_bytes = nm_i->nat_blocks / BITS_PER_BYTE;
>>> +	unsigned int i;
>>> +	__u64 cp_ver = le64_to_cpu(ckpt->checkpoint_ver);
>>
>> __u64 cp_ver = cur_cp_version(ckpt);
>>
>> Thanks,
>>
>>> +	size_t crc_offset = le32_to_cpu(ckpt->checksum_offset);
>>> +	__u64 crc = le32_to_cpu(*((__le32 *)
>>> +				((unsigned char *)ckpt + crc_offset)));
>>> +	block_t nat_bits_addr;
>>> +
>>> +	if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
>>> +		return 0;
>>> +
>>> +	nm_i->nat_bits_blocks = F2FS_BYTES_TO_BLK((nat_bits_bytes << 1) + 8 +
>>> +						F2FS_BLKSIZE - 1);
>>> +	nm_i->nat_bits = kzalloc(nm_i->nat_bits_blocks << F2FS_BLKSIZE_BITS,
>>> +						GFP_KERNEL);
>>> +	if (!nm_i->nat_bits)
>>> +		return -ENOMEM;
>>> +
>>> +	nat_bits_addr = __start_cp_addr(sbi) + sbi->blocks_per_seg -
>>> +						nm_i->nat_bits_blocks;
>>> +	for (i = 0; i < nm_i->nat_bits_blocks; i++) {
>>> +		struct page *page = get_meta_page(sbi, nat_bits_addr++);
>>> +
>>> +		memcpy(nm_i->nat_bits + (i << F2FS_BLKSIZE_BITS),
>>> +					page_address(page), F2FS_BLKSIZE);
>>> +		f2fs_put_page(page, 1);
>>> +	}
>>> +
>>> +	cp_ver |= (crc << 32);
>>> +	if (cpu_to_le64(cp_ver) != *(__le64 *)nm_i->nat_bits) {
>>> +		disable_nat_bits(sbi, true);
>>> +		return 0;
>>> +	}
>>> +
>>> +	nm_i->full_nat_bits = nm_i->nat_bits + 8;
>>> +	nm_i->empty_nat_bits = nm_i->full_nat_bits + nat_bits_bytes;
>>> +
>>> +	f2fs_msg(sbi->sb, KERN_NOTICE, "Found nat_bits in checkpoint");
>>> +	return 0;
>>> +}
>>> +
>>>  static int init_node_manager(struct f2fs_sb_info *sbi)
>>>  {
>>>  	struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi);
>>>  	struct f2fs_nm_info *nm_i = NM_I(sbi);
>>>  	unsigned char *version_bitmap;
>>> -	unsigned int nat_segs, nat_blocks;
>>> +	unsigned int nat_segs;
>>> +	int err;
>>>  
>>>  	nm_i->nat_blkaddr = le32_to_cpu(sb_raw->nat_blkaddr);
>>>  
>>>  	/* segment_count_nat includes pair segment so divide to 2. */
>>>  	nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1;
>>> -	nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
>>> -
>>> -	nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks;
>>> +	nm_i->nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
>>> +	nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nm_i->nat_blocks;
>>>  
>>>  	/* not used nids: 0, node, meta, (and root counted as valid node) */
>>>  	nm_i->available_nids = nm_i->max_nid - sbi->total_valid_node_count -
>>> @@ -2392,6 +2543,10 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
>>>  	if (!nm_i->nat_bitmap)
>>>  		return -ENOMEM;
>>>  
>>> +	err = __get_nat_bitmaps(sbi);
>>> +	if (err)
>>> +		return err;
>>> +
>>>  #ifdef CONFIG_F2FS_CHECK_FS
>>>  	nm_i->nat_bitmap_mir = kmemdup(version_bitmap, nm_i->bitmap_size,
>>>  					GFP_KERNEL);
>>> @@ -2414,7 +2569,7 @@ int build_node_manager(struct f2fs_sb_info *sbi)
>>>  	if (err)
>>>  		return err;
>>>  
>>> -	build_free_nids(sbi, true);
>>> +	build_free_nids(sbi, true, true);
>>>  	return 0;
>>>  }
>>>  
>>> @@ -2473,6 +2628,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
>>>  	up_write(&nm_i->nat_tree_lock);
>>>  
>>>  	kfree(nm_i->nat_bitmap);
>>> +	kfree(nm_i->nat_bits);
>>>  #ifdef CONFIG_F2FS_CHECK_FS
>>>  	kfree(nm_i->nat_bitmap_mir);
>>>  #endif
>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>>> index df2ff5cfe8f4..8e1ec248c653 100644
>>> --- a/fs/f2fs/segment.c
>>> +++ b/fs/f2fs/segment.c
>>> @@ -386,7 +386,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
>>>  	if (!available_free_memory(sbi, FREE_NIDS))
>>>  		try_to_free_nids(sbi, MAX_FREE_NIDS);
>>>  	else
>>> -		build_free_nids(sbi, false);
>>> +		build_free_nids(sbi, false, false);
>>>  
>>>  	if (!is_idle(sbi))
>>>  		return;
>>> diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
>>> index f0748524ca8c..1c92ace2e8f8 100644
>>> --- a/include/linux/f2fs_fs.h
>>> +++ b/include/linux/f2fs_fs.h
>>> @@ -114,6 +114,7 @@ struct f2fs_super_block {
>>>  /*
>>>   * For checkpoint
>>>   */
>>> +#define CP_NAT_BITS_FLAG	0x00000080
>>>  #define CP_CRC_RECOVERY_FLAG	0x00000040
>>>  #define CP_FASTBOOT_FLAG	0x00000020
>>>  #define CP_FSCK_FLAG		0x00000010
>>>
>>
>>
>> ------------------------------------------------------------------------------
>> Check out the vibrant tech community on one of the world's most
>> engaging tech sites, SlashDot.org! http://sdm.link/slashdot
>> _______________________________________________
>> Linux-f2fs-devel mailing list
>> Linux-f2fs-devel@lists.sourceforge.net
>> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
> 
> .
> 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [f2fs-dev] [PATCH 2/3] f2fs: add bitmaps for empty or full NAT blocks
  2017-02-25  3:26       ` Chao Yu
@ 2017-02-25 18:34         ` Jaegeuk Kim
  2017-02-27  3:03           ` Chao Yu
  0 siblings, 1 reply; 16+ messages in thread
From: Jaegeuk Kim @ 2017-02-25 18:34 UTC (permalink / raw)
  To: Chao Yu; +Cc: linux-fsdevel, linux-kernel, linux-f2fs-devel

On 02/25, Chao Yu wrote:
> On 2017/2/24 6:54, Jaegeuk Kim wrote:
> > On 02/23, Chao Yu wrote:
> >> On 2017/2/14 10:06, Jaegeuk Kim wrote:
...
> > 
> >>> +static int scan_nat_bits(struct f2fs_sb_info *sbi)
> >>> +{
> >>> +	struct f2fs_nm_info *nm_i = NM_I(sbi);
> >>> +	struct page *page;
> >>> +	unsigned int i = 0;
> >>> +	nid_t target = FREE_NID_PAGES * NAT_ENTRY_PER_BLOCK;
> >>> +	nid_t nid;
> >>> +
> >>> +	if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
> >>> +		return -EAGAIN;
> >>> +
> >>> +	down_read(&nm_i->nat_tree_lock);
> >>> +check_empty:
> >>> +	i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i);
> >>> +	if (i >= nm_i->nat_blocks) {
> >>> +		i = 0;
> >>> +		goto check_partial;
> >>> +	}
> >>> +
> >>> +	for (nid = i * NAT_ENTRY_PER_BLOCK; nid < (i + 1) * NAT_ENTRY_PER_BLOCK;
> >>> +									nid++) {
> >>> +		if (unlikely(nid >= nm_i->max_nid))
> >>> +			break;
> >>> +		add_free_nid(sbi, nid, true);
> >>> +	}
> >>> +
> >>> +	if (nm_i->nid_cnt[FREE_NID_LIST] >= target)
> >>> +		goto out;
> >>> +	i++;
> >>> +	goto check_empty;
> >>> +
> >>> +check_partial:
> >>> +	i = find_next_zero_bit_le(nm_i->full_nat_bits, nm_i->nat_blocks, i);
> >>> +	if (i >= nm_i->nat_blocks) {
> >>> +		disable_nat_bits(sbi, true);
> >>
> >> Can this happen in real world? Should be a bug in somewhere?
> > 
> > It happens, since current design handles full_nat_bits optionally in order
> > to avoid scanning a whole NAT page to set it back as 1 from 0.
> 
> All 0 value in full_nat_bits means the NAT block has at least one free nid,
> right? so if we traverse all such NAT blocks, and still haven't collect enough
> *target* number of free nids, we don't have to disable nat bit cache, we can
> just break out here.

No, I'm seeing this case:
1. mount with full=0, empty=0, which indicates some free nids.
2. allocate all there-in free nids, but still full=0, empty=0.
3. allocate more free nids.
...
... checkpoint makes full=1, empty=0

If there is no checkpoint and it consumes all the free nids, we can see all 0
value in full_nat_bits which is quite impossible. In that case, I'd prefer
to stop nat_bits and give fsck.f2fs to revive it back.

Thanks,

> 
> Thanks,
> 
> > 
> >>
> >>> +		return -EINVAL;
> >>> +	}
> >>> +
> >>> +	nid = i * NAT_ENTRY_PER_BLOCK;
> >>> +	page = get_current_nat_page(sbi, nid);
> >>> +	scan_nat_page(sbi, page, nid);
> >>> +	f2fs_put_page(page, 1);
> >>> +
> >>> +	if (nm_i->nid_cnt[FREE_NID_LIST] < target) {
> >>> +		i++;
> >>> +		goto check_partial;
> >>> +	}
> >>> +out:
> >>> +	up_read(&nm_i->nat_tree_lock);
> >>> +	return 0;
> >>> +}
> >>> +
> >>> +static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
> >>>  {
> >>>  	struct f2fs_nm_info *nm_i = NM_I(sbi);
> >>>  	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
> >>> @@ -1854,6 +1911,20 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
> >>>  	if (!sync && !available_free_memory(sbi, FREE_NIDS))
> >>>  		return;
> >>>  
> >>> +	/* try to find free nids with nat_bits */
> >>> +	if (!mount && !scan_nat_bits(sbi) && nm_i->nid_cnt[FREE_NID_LIST])
> >>> +		return;
> >>> +
> >>> +	/* find next valid candidate */
> >>
> >> This is just for mount case?
> > 
> > Yup, it reuses free nids in dirty NAT blocks, so that we can make them as full
> > NAT pages.
> > 
> > Thanks,
> > 
> >>
> >>> +	if (is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) {
> >>> +		int idx = find_next_zero_bit_le(nm_i->full_nat_bits,
> >>> +					nm_i->nat_blocks, 0);
> >>> +		if (idx >= nm_i->nat_blocks)
> >>> +			set_sbi_flag(sbi, SBI_NEED_FSCK);
> >>> +		else
> >>> +			nid = idx * NAT_ENTRY_PER_BLOCK;
> >>> +	}
> >>> +
> >>>  	/* readahead nat pages to be scanned */
> >>>  	ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
> >>>  							META_NAT, true);
> >>> @@ -1896,10 +1967,10 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
> >>>  					nm_i->ra_nid_pages, META_NAT, false);
> >>>  }
> >>>  
> >>> -void build_free_nids(struct f2fs_sb_info *sbi, bool sync)
> >>> +void build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
> >>>  {
> >>>  	mutex_lock(&NM_I(sbi)->build_lock);
> >>> -	__build_free_nids(sbi, sync);
> >>> +	__build_free_nids(sbi, sync, mount);
> >>>  	mutex_unlock(&NM_I(sbi)->build_lock);
> >>>  }
> >>>  
> >>> @@ -1941,7 +2012,7 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
> >>>  	spin_unlock(&nm_i->nid_list_lock);
> >>>  
> >>>  	/* Let's scan nat pages and its caches to get free nids */
> >>> -	build_free_nids(sbi, true);
> >>> +	build_free_nids(sbi, true, false);
> >>>  	goto retry;
> >>>  }
> >>>  
> >>> @@ -2233,8 +2304,39 @@ static void __adjust_nat_entry_set(struct nat_entry_set *nes,
> >>>  	list_add_tail(&nes->set_list, head);
> >>>  }
> >>>  
> >>> +void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
> >>> +						struct page *page)
> >>> +{
> >>> +	struct f2fs_nm_info *nm_i = NM_I(sbi);
> >>> +	unsigned int nat_index = start_nid / NAT_ENTRY_PER_BLOCK;
> >>> +	struct f2fs_nat_block *nat_blk = page_address(page);
> >>> +	int valid = 0;
> >>> +	int i;
> >>> +
> >>> +	if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
> >>> +		return;
> >>> +
> >>> +	for (i = 0; i < NAT_ENTRY_PER_BLOCK; i++) {
> >>> +		if (start_nid == 0 && i == 0)
> >>> +			valid++;
> >>> +		if (nat_blk->entries[i].block_addr)
> >>> +			valid++;
> >>> +	}
> >>> +	if (valid == 0) {
> >>> +		test_and_set_bit_le(nat_index, nm_i->empty_nat_bits);
> >>> +		test_and_clear_bit_le(nat_index, nm_i->full_nat_bits);
> >>
> >> set_bit_le/clear_bit_le
> >>
> >>> +		return;
> >>> +	}
> >>> +
> >>> +	test_and_clear_bit_le(nat_index, nm_i->empty_nat_bits);
> >>
> >> ditto
> >>
> >>> +	if (valid == NAT_ENTRY_PER_BLOCK)
> >>> +		test_and_set_bit_le(nat_index, nm_i->full_nat_bits);
> >>> +	else
> >>> +		test_and_clear_bit_le(nat_index, nm_i->full_nat_bits);
> >>
> >> ditto
> >>
> >>> +}
> >>> +
> >>>  static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
> >>> -					struct nat_entry_set *set)
> >>> +		struct nat_entry_set *set, struct cp_control *cpc)
> >>>  {
> >>>  	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
> >>>  	struct f2fs_journal *journal = curseg->journal;
> >>> @@ -2249,7 +2351,8 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
> >>>  	 * #1, flush nat entries to journal in current hot data summary block.
> >>>  	 * #2, flush nat entries to nat page.
> >>>  	 */
> >>> -	if (!__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL))
> >>> +	if (cpc->reason == CP_UMOUNT ||
> >>
> >> if ((cpc->reason == CP_UMOUNT && is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) ||
> >>
> >>> +		!__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL))
> >>>  		to_journal = false;
> >>>  
> >>>  	if (to_journal) {
> >>> @@ -2289,10 +2392,12 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
> >>>  		}
> >>>  	}
> >>>  
> >>> -	if (to_journal)
> >>> +	if (to_journal) {
> >>>  		up_write(&curseg->journal_rwsem);
> >>> -	else
> >>> +	} else {
> >>> +		__update_nat_bits(sbi, start_nid, page);
> >>>  		f2fs_put_page(page, 1);
> >>> +	}
> >>>  
> >>>  	f2fs_bug_on(sbi, set->entry_cnt);
> >>>  
> >>> @@ -2303,7 +2408,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
> >>>  /*
> >>>   * This function is called during the checkpointing process.
> >>>   */
> >>> -void flush_nat_entries(struct f2fs_sb_info *sbi)
> >>> +void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
> >>>  {
> >>>  	struct f2fs_nm_info *nm_i = NM_I(sbi);
> >>>  	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
> >>> @@ -2324,7 +2429,8 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
> >>>  	 * entries, remove all entries from journal and merge them
> >>>  	 * into nat entry set.
> >>>  	 */
> >>> -	if (!__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
> >>> +	if (cpc->reason == CP_UMOUNT ||
> >>
> >> if ((cpc->reason == CP_UMOUNT && is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) ||
> >>
> >>> +		!__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
> >>>  		remove_nats_in_journal(sbi);
> >>>  
> >>>  	while ((found = __gang_lookup_nat_set(nm_i,
> >>> @@ -2338,27 +2444,72 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
> >>>  
> >>>  	/* flush dirty nats in nat entry set */
> >>>  	list_for_each_entry_safe(set, tmp, &sets, set_list)
> >>> -		__flush_nat_entry_set(sbi, set);
> >>> +		__flush_nat_entry_set(sbi, set, cpc);
> >>>  
> >>>  	up_write(&nm_i->nat_tree_lock);
> >>>  
> >>>  	f2fs_bug_on(sbi, nm_i->dirty_nat_cnt);
> >>>  }
> >>>  
> >>> +static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
> >>> +{
> >>> +	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
> >>> +	struct f2fs_nm_info *nm_i = NM_I(sbi);
> >>> +	unsigned int nat_bits_bytes = nm_i->nat_blocks / BITS_PER_BYTE;
> >>> +	unsigned int i;
> >>> +	__u64 cp_ver = le64_to_cpu(ckpt->checkpoint_ver);
> >>
> >> __u64 cp_ver = cur_cp_version(ckpt);
> >>
> >> Thanks,
> >>
> >>> +	size_t crc_offset = le32_to_cpu(ckpt->checksum_offset);
> >>> +	__u64 crc = le32_to_cpu(*((__le32 *)
> >>> +				((unsigned char *)ckpt + crc_offset)));
> >>> +	block_t nat_bits_addr;
> >>> +
> >>> +	if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
> >>> +		return 0;
> >>> +
> >>> +	nm_i->nat_bits_blocks = F2FS_BYTES_TO_BLK((nat_bits_bytes << 1) + 8 +
> >>> +						F2FS_BLKSIZE - 1);
> >>> +	nm_i->nat_bits = kzalloc(nm_i->nat_bits_blocks << F2FS_BLKSIZE_BITS,
> >>> +						GFP_KERNEL);
> >>> +	if (!nm_i->nat_bits)
> >>> +		return -ENOMEM;
> >>> +
> >>> +	nat_bits_addr = __start_cp_addr(sbi) + sbi->blocks_per_seg -
> >>> +						nm_i->nat_bits_blocks;
> >>> +	for (i = 0; i < nm_i->nat_bits_blocks; i++) {
> >>> +		struct page *page = get_meta_page(sbi, nat_bits_addr++);
> >>> +
> >>> +		memcpy(nm_i->nat_bits + (i << F2FS_BLKSIZE_BITS),
> >>> +					page_address(page), F2FS_BLKSIZE);
> >>> +		f2fs_put_page(page, 1);
> >>> +	}
> >>> +
> >>> +	cp_ver |= (crc << 32);
> >>> +	if (cpu_to_le64(cp_ver) != *(__le64 *)nm_i->nat_bits) {
> >>> +		disable_nat_bits(sbi, true);
> >>> +		return 0;
> >>> +	}
> >>> +
> >>> +	nm_i->full_nat_bits = nm_i->nat_bits + 8;
> >>> +	nm_i->empty_nat_bits = nm_i->full_nat_bits + nat_bits_bytes;
> >>> +
> >>> +	f2fs_msg(sbi->sb, KERN_NOTICE, "Found nat_bits in checkpoint");
> >>> +	return 0;
> >>> +}
> >>> +
> >>>  static int init_node_manager(struct f2fs_sb_info *sbi)
> >>>  {
> >>>  	struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi);
> >>>  	struct f2fs_nm_info *nm_i = NM_I(sbi);
> >>>  	unsigned char *version_bitmap;
> >>> -	unsigned int nat_segs, nat_blocks;
> >>> +	unsigned int nat_segs;
> >>> +	int err;
> >>>  
> >>>  	nm_i->nat_blkaddr = le32_to_cpu(sb_raw->nat_blkaddr);
> >>>  
> >>>  	/* segment_count_nat includes pair segment so divide to 2. */
> >>>  	nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1;
> >>> -	nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
> >>> -
> >>> -	nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks;
> >>> +	nm_i->nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
> >>> +	nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nm_i->nat_blocks;
> >>>  
> >>>  	/* not used nids: 0, node, meta, (and root counted as valid node) */
> >>>  	nm_i->available_nids = nm_i->max_nid - sbi->total_valid_node_count -
> >>> @@ -2392,6 +2543,10 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
> >>>  	if (!nm_i->nat_bitmap)
> >>>  		return -ENOMEM;
> >>>  
> >>> +	err = __get_nat_bitmaps(sbi);
> >>> +	if (err)
> >>> +		return err;
> >>> +
> >>>  #ifdef CONFIG_F2FS_CHECK_FS
> >>>  	nm_i->nat_bitmap_mir = kmemdup(version_bitmap, nm_i->bitmap_size,
> >>>  					GFP_KERNEL);
> >>> @@ -2414,7 +2569,7 @@ int build_node_manager(struct f2fs_sb_info *sbi)
> >>>  	if (err)
> >>>  		return err;
> >>>  
> >>> -	build_free_nids(sbi, true);
> >>> +	build_free_nids(sbi, true, true);
> >>>  	return 0;
> >>>  }
> >>>  
> >>> @@ -2473,6 +2628,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
> >>>  	up_write(&nm_i->nat_tree_lock);
> >>>  
> >>>  	kfree(nm_i->nat_bitmap);
> >>> +	kfree(nm_i->nat_bits);
> >>>  #ifdef CONFIG_F2FS_CHECK_FS
> >>>  	kfree(nm_i->nat_bitmap_mir);
> >>>  #endif
> >>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> >>> index df2ff5cfe8f4..8e1ec248c653 100644
> >>> --- a/fs/f2fs/segment.c
> >>> +++ b/fs/f2fs/segment.c
> >>> @@ -386,7 +386,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
> >>>  	if (!available_free_memory(sbi, FREE_NIDS))
> >>>  		try_to_free_nids(sbi, MAX_FREE_NIDS);
> >>>  	else
> >>> -		build_free_nids(sbi, false);
> >>> +		build_free_nids(sbi, false, false);
> >>>  
> >>>  	if (!is_idle(sbi))
> >>>  		return;
> >>> diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
> >>> index f0748524ca8c..1c92ace2e8f8 100644
> >>> --- a/include/linux/f2fs_fs.h
> >>> +++ b/include/linux/f2fs_fs.h
> >>> @@ -114,6 +114,7 @@ struct f2fs_super_block {
> >>>  /*
> >>>   * For checkpoint
> >>>   */
> >>> +#define CP_NAT_BITS_FLAG	0x00000080
> >>>  #define CP_CRC_RECOVERY_FLAG	0x00000040
> >>>  #define CP_FASTBOOT_FLAG	0x00000020
> >>>  #define CP_FSCK_FLAG		0x00000010
> >>>
> >>
> >>
> >> ------------------------------------------------------------------------------
> >> Check out the vibrant tech community on one of the world's most
> >> engaging tech sites, SlashDot.org! http://sdm.link/slashdot
> >> _______________________________________________
> >> Linux-f2fs-devel mailing list
> >> Linux-f2fs-devel@lists.sourceforge.net
> >> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
> > 
> > .
> > 
> 
> 
> ------------------------------------------------------------------------------
> Check out the vibrant tech community on one of the world's most
> engaging tech sites, SlashDot.org! http://sdm.link/slashdot
> _______________________________________________
> Linux-f2fs-devel mailing list
> Linux-f2fs-devel@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [f2fs-dev] [PATCH 2/3] f2fs: add bitmaps for empty or full NAT blocks
  2017-02-25 18:34         ` Jaegeuk Kim
@ 2017-02-27  3:03           ` Chao Yu
  2017-02-27 22:19             ` Jaegeuk Kim
  0 siblings, 1 reply; 16+ messages in thread
From: Chao Yu @ 2017-02-27  3:03 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: linux-fsdevel, linux-kernel, linux-f2fs-devel

On 2017/2/26 2:34, Jaegeuk Kim wrote:
> On 02/25, Chao Yu wrote:
>> On 2017/2/24 6:54, Jaegeuk Kim wrote:
>>> On 02/23, Chao Yu wrote:
>>>> On 2017/2/14 10:06, Jaegeuk Kim wrote:
> ...
>>>
>>>>> +static int scan_nat_bits(struct f2fs_sb_info *sbi)
>>>>> +{
>>>>> +	struct f2fs_nm_info *nm_i = NM_I(sbi);
>>>>> +	struct page *page;
>>>>> +	unsigned int i = 0;
>>>>> +	nid_t target = FREE_NID_PAGES * NAT_ENTRY_PER_BLOCK;
>>>>> +	nid_t nid;
>>>>> +
>>>>> +	if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
>>>>> +		return -EAGAIN;
>>>>> +
>>>>> +	down_read(&nm_i->nat_tree_lock);
>>>>> +check_empty:
>>>>> +	i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i);
>>>>> +	if (i >= nm_i->nat_blocks) {
>>>>> +		i = 0;
>>>>> +		goto check_partial;
>>>>> +	}
>>>>> +
>>>>> +	for (nid = i * NAT_ENTRY_PER_BLOCK; nid < (i + 1) * NAT_ENTRY_PER_BLOCK;
>>>>> +									nid++) {
>>>>> +		if (unlikely(nid >= nm_i->max_nid))
>>>>> +			break;
>>>>> +		add_free_nid(sbi, nid, true);
>>>>> +	}
>>>>> +
>>>>> +	if (nm_i->nid_cnt[FREE_NID_LIST] >= target)
>>>>> +		goto out;
>>>>> +	i++;
>>>>> +	goto check_empty;
>>>>> +
>>>>> +check_partial:
>>>>> +	i = find_next_zero_bit_le(nm_i->full_nat_bits, nm_i->nat_blocks, i);
>>>>> +	if (i >= nm_i->nat_blocks) {
>>>>> +		disable_nat_bits(sbi, true);
>>>>
>>>> Can this happen in real world? Should be a bug in somewhere?
>>>
>>> It happens, since current design handles full_nat_bits optionally in order
>>> to avoid scanning a whole NAT page to set it back as 1 from 0.
>>
>> All 0 value in full_nat_bits means the NAT block has at least one free nid,
>> right? so if we traverse all such NAT blocks, and still haven't collect enough
>> *target* number of free nids, we don't have to disable nat bit cache, we can
>> just break out here.
> 
> No, I'm seeing this case:
> 1. mount with full=0, empty=0, which indicates some free nids.
> 2. allocate all there-in free nids, but still full=0, empty=0.
> 3. allocate more free nids.
> ...
> ... checkpoint makes full=1, empty=0
> 
> If there is no checkpoint and it consumes all the free nids, we can see all 0
> value in full_nat_bits which is quite impossible. In that case, I'd prefer
> to stop nat_bits and give fsck.f2fs to revive it back.

Yeah, I can understand that, but what I concern is when there is few free nids
(< target), we still try to load nids of 8 NAT blocks until ending the loop of
caching free nids, so it will be very easy to enter the case of disabling
nid_bits cache here, so how about doing more check?

if (i >= nm_i->nat_blocks &&
	nm_i->nid_cnt[FREE_NID_LIST] != nm_i->available_nids)
	disable_nat_bits

Thanks,

> 
> Thanks,
> 
>>
>> Thanks,
>>
>>>
>>>>
>>>>> +		return -EINVAL;
>>>>> +	}
>>>>> +
>>>>> +	nid = i * NAT_ENTRY_PER_BLOCK;
>>>>> +	page = get_current_nat_page(sbi, nid);
>>>>> +	scan_nat_page(sbi, page, nid);
>>>>> +	f2fs_put_page(page, 1);
>>>>> +
>>>>> +	if (nm_i->nid_cnt[FREE_NID_LIST] < target) {
>>>>> +		i++;
>>>>> +		goto check_partial;
>>>>> +	}
>>>>> +out:
>>>>> +	up_read(&nm_i->nat_tree_lock);
>>>>> +	return 0;
>>>>> +}
>>>>> +
>>>>> +static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
>>>>>  {
>>>>>  	struct f2fs_nm_info *nm_i = NM_I(sbi);
>>>>>  	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
>>>>> @@ -1854,6 +1911,20 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
>>>>>  	if (!sync && !available_free_memory(sbi, FREE_NIDS))
>>>>>  		return;
>>>>>  
>>>>> +	/* try to find free nids with nat_bits */
>>>>> +	if (!mount && !scan_nat_bits(sbi) && nm_i->nid_cnt[FREE_NID_LIST])
>>>>> +		return;
>>>>> +
>>>>> +	/* find next valid candidate */
>>>>
>>>> This is just for mount case?
>>>
>>> Yup, it reuses free nids in dirty NAT blocks, so that we can make them as full
>>> NAT pages.
>>>
>>> Thanks,
>>>
>>>>
>>>>> +	if (is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) {
>>>>> +		int idx = find_next_zero_bit_le(nm_i->full_nat_bits,
>>>>> +					nm_i->nat_blocks, 0);
>>>>> +		if (idx >= nm_i->nat_blocks)
>>>>> +			set_sbi_flag(sbi, SBI_NEED_FSCK);
>>>>> +		else
>>>>> +			nid = idx * NAT_ENTRY_PER_BLOCK;
>>>>> +	}
>>>>> +
>>>>>  	/* readahead nat pages to be scanned */
>>>>>  	ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
>>>>>  							META_NAT, true);
>>>>> @@ -1896,10 +1967,10 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
>>>>>  					nm_i->ra_nid_pages, META_NAT, false);
>>>>>  }
>>>>>  
>>>>> -void build_free_nids(struct f2fs_sb_info *sbi, bool sync)
>>>>> +void build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
>>>>>  {
>>>>>  	mutex_lock(&NM_I(sbi)->build_lock);
>>>>> -	__build_free_nids(sbi, sync);
>>>>> +	__build_free_nids(sbi, sync, mount);
>>>>>  	mutex_unlock(&NM_I(sbi)->build_lock);
>>>>>  }
>>>>>  
>>>>> @@ -1941,7 +2012,7 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
>>>>>  	spin_unlock(&nm_i->nid_list_lock);
>>>>>  
>>>>>  	/* Let's scan nat pages and its caches to get free nids */
>>>>> -	build_free_nids(sbi, true);
>>>>> +	build_free_nids(sbi, true, false);
>>>>>  	goto retry;
>>>>>  }
>>>>>  
>>>>> @@ -2233,8 +2304,39 @@ static void __adjust_nat_entry_set(struct nat_entry_set *nes,
>>>>>  	list_add_tail(&nes->set_list, head);
>>>>>  }
>>>>>  
>>>>> +void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
>>>>> +						struct page *page)
>>>>> +{
>>>>> +	struct f2fs_nm_info *nm_i = NM_I(sbi);
>>>>> +	unsigned int nat_index = start_nid / NAT_ENTRY_PER_BLOCK;
>>>>> +	struct f2fs_nat_block *nat_blk = page_address(page);
>>>>> +	int valid = 0;
>>>>> +	int i;
>>>>> +
>>>>> +	if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
>>>>> +		return;
>>>>> +
>>>>> +	for (i = 0; i < NAT_ENTRY_PER_BLOCK; i++) {
>>>>> +		if (start_nid == 0 && i == 0)
>>>>> +			valid++;
>>>>> +		if (nat_blk->entries[i].block_addr)
>>>>> +			valid++;
>>>>> +	}
>>>>> +	if (valid == 0) {
>>>>> +		test_and_set_bit_le(nat_index, nm_i->empty_nat_bits);
>>>>> +		test_and_clear_bit_le(nat_index, nm_i->full_nat_bits);
>>>>
>>>> set_bit_le/clear_bit_le
>>>>
>>>>> +		return;
>>>>> +	}
>>>>> +
>>>>> +	test_and_clear_bit_le(nat_index, nm_i->empty_nat_bits);
>>>>
>>>> ditto
>>>>
>>>>> +	if (valid == NAT_ENTRY_PER_BLOCK)
>>>>> +		test_and_set_bit_le(nat_index, nm_i->full_nat_bits);
>>>>> +	else
>>>>> +		test_and_clear_bit_le(nat_index, nm_i->full_nat_bits);
>>>>
>>>> ditto
>>>>
>>>>> +}
>>>>> +
>>>>>  static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
>>>>> -					struct nat_entry_set *set)
>>>>> +		struct nat_entry_set *set, struct cp_control *cpc)
>>>>>  {
>>>>>  	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
>>>>>  	struct f2fs_journal *journal = curseg->journal;
>>>>> @@ -2249,7 +2351,8 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
>>>>>  	 * #1, flush nat entries to journal in current hot data summary block.
>>>>>  	 * #2, flush nat entries to nat page.
>>>>>  	 */
>>>>> -	if (!__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL))
>>>>> +	if (cpc->reason == CP_UMOUNT ||
>>>>
>>>> if ((cpc->reason == CP_UMOUNT && is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) ||
>>>>
>>>>> +		!__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL))
>>>>>  		to_journal = false;
>>>>>  
>>>>>  	if (to_journal) {
>>>>> @@ -2289,10 +2392,12 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
>>>>>  		}
>>>>>  	}
>>>>>  
>>>>> -	if (to_journal)
>>>>> +	if (to_journal) {
>>>>>  		up_write(&curseg->journal_rwsem);
>>>>> -	else
>>>>> +	} else {
>>>>> +		__update_nat_bits(sbi, start_nid, page);
>>>>>  		f2fs_put_page(page, 1);
>>>>> +	}
>>>>>  
>>>>>  	f2fs_bug_on(sbi, set->entry_cnt);
>>>>>  
>>>>> @@ -2303,7 +2408,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
>>>>>  /*
>>>>>   * This function is called during the checkpointing process.
>>>>>   */
>>>>> -void flush_nat_entries(struct f2fs_sb_info *sbi)
>>>>> +void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>>>>>  {
>>>>>  	struct f2fs_nm_info *nm_i = NM_I(sbi);
>>>>>  	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
>>>>> @@ -2324,7 +2429,8 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
>>>>>  	 * entries, remove all entries from journal and merge them
>>>>>  	 * into nat entry set.
>>>>>  	 */
>>>>> -	if (!__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
>>>>> +	if (cpc->reason == CP_UMOUNT ||
>>>>
>>>> if ((cpc->reason == CP_UMOUNT && is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) ||
>>>>
>>>>> +		!__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
>>>>>  		remove_nats_in_journal(sbi);
>>>>>  
>>>>>  	while ((found = __gang_lookup_nat_set(nm_i,
>>>>> @@ -2338,27 +2444,72 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
>>>>>  
>>>>>  	/* flush dirty nats in nat entry set */
>>>>>  	list_for_each_entry_safe(set, tmp, &sets, set_list)
>>>>> -		__flush_nat_entry_set(sbi, set);
>>>>> +		__flush_nat_entry_set(sbi, set, cpc);
>>>>>  
>>>>>  	up_write(&nm_i->nat_tree_lock);
>>>>>  
>>>>>  	f2fs_bug_on(sbi, nm_i->dirty_nat_cnt);
>>>>>  }
>>>>>  
>>>>> +static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
>>>>> +{
>>>>> +	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
>>>>> +	struct f2fs_nm_info *nm_i = NM_I(sbi);
>>>>> +	unsigned int nat_bits_bytes = nm_i->nat_blocks / BITS_PER_BYTE;
>>>>> +	unsigned int i;
>>>>> +	__u64 cp_ver = le64_to_cpu(ckpt->checkpoint_ver);
>>>>
>>>> __u64 cp_ver = cur_cp_version(ckpt);
>>>>
>>>> Thanks,
>>>>
>>>>> +	size_t crc_offset = le32_to_cpu(ckpt->checksum_offset);
>>>>> +	__u64 crc = le32_to_cpu(*((__le32 *)
>>>>> +				((unsigned char *)ckpt + crc_offset)));
>>>>> +	block_t nat_bits_addr;
>>>>> +
>>>>> +	if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
>>>>> +		return 0;
>>>>> +
>>>>> +	nm_i->nat_bits_blocks = F2FS_BYTES_TO_BLK((nat_bits_bytes << 1) + 8 +
>>>>> +						F2FS_BLKSIZE - 1);
>>>>> +	nm_i->nat_bits = kzalloc(nm_i->nat_bits_blocks << F2FS_BLKSIZE_BITS,
>>>>> +						GFP_KERNEL);
>>>>> +	if (!nm_i->nat_bits)
>>>>> +		return -ENOMEM;
>>>>> +
>>>>> +	nat_bits_addr = __start_cp_addr(sbi) + sbi->blocks_per_seg -
>>>>> +						nm_i->nat_bits_blocks;
>>>>> +	for (i = 0; i < nm_i->nat_bits_blocks; i++) {
>>>>> +		struct page *page = get_meta_page(sbi, nat_bits_addr++);
>>>>> +
>>>>> +		memcpy(nm_i->nat_bits + (i << F2FS_BLKSIZE_BITS),
>>>>> +					page_address(page), F2FS_BLKSIZE);
>>>>> +		f2fs_put_page(page, 1);
>>>>> +	}
>>>>> +
>>>>> +	cp_ver |= (crc << 32);
>>>>> +	if (cpu_to_le64(cp_ver) != *(__le64 *)nm_i->nat_bits) {
>>>>> +		disable_nat_bits(sbi, true);
>>>>> +		return 0;
>>>>> +	}
>>>>> +
>>>>> +	nm_i->full_nat_bits = nm_i->nat_bits + 8;
>>>>> +	nm_i->empty_nat_bits = nm_i->full_nat_bits + nat_bits_bytes;
>>>>> +
>>>>> +	f2fs_msg(sbi->sb, KERN_NOTICE, "Found nat_bits in checkpoint");
>>>>> +	return 0;
>>>>> +}
>>>>> +
>>>>>  static int init_node_manager(struct f2fs_sb_info *sbi)
>>>>>  {
>>>>>  	struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi);
>>>>>  	struct f2fs_nm_info *nm_i = NM_I(sbi);
>>>>>  	unsigned char *version_bitmap;
>>>>> -	unsigned int nat_segs, nat_blocks;
>>>>> +	unsigned int nat_segs;
>>>>> +	int err;
>>>>>  
>>>>>  	nm_i->nat_blkaddr = le32_to_cpu(sb_raw->nat_blkaddr);
>>>>>  
>>>>>  	/* segment_count_nat includes pair segment so divide to 2. */
>>>>>  	nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1;
>>>>> -	nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
>>>>> -
>>>>> -	nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks;
>>>>> +	nm_i->nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
>>>>> +	nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nm_i->nat_blocks;
>>>>>  
>>>>>  	/* not used nids: 0, node, meta, (and root counted as valid node) */
>>>>>  	nm_i->available_nids = nm_i->max_nid - sbi->total_valid_node_count -
>>>>> @@ -2392,6 +2543,10 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
>>>>>  	if (!nm_i->nat_bitmap)
>>>>>  		return -ENOMEM;
>>>>>  
>>>>> +	err = __get_nat_bitmaps(sbi);
>>>>> +	if (err)
>>>>> +		return err;
>>>>> +
>>>>>  #ifdef CONFIG_F2FS_CHECK_FS
>>>>>  	nm_i->nat_bitmap_mir = kmemdup(version_bitmap, nm_i->bitmap_size,
>>>>>  					GFP_KERNEL);
>>>>> @@ -2414,7 +2569,7 @@ int build_node_manager(struct f2fs_sb_info *sbi)
>>>>>  	if (err)
>>>>>  		return err;
>>>>>  
>>>>> -	build_free_nids(sbi, true);
>>>>> +	build_free_nids(sbi, true, true);
>>>>>  	return 0;
>>>>>  }
>>>>>  
>>>>> @@ -2473,6 +2628,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
>>>>>  	up_write(&nm_i->nat_tree_lock);
>>>>>  
>>>>>  	kfree(nm_i->nat_bitmap);
>>>>> +	kfree(nm_i->nat_bits);
>>>>>  #ifdef CONFIG_F2FS_CHECK_FS
>>>>>  	kfree(nm_i->nat_bitmap_mir);
>>>>>  #endif
>>>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>>>>> index df2ff5cfe8f4..8e1ec248c653 100644
>>>>> --- a/fs/f2fs/segment.c
>>>>> +++ b/fs/f2fs/segment.c
>>>>> @@ -386,7 +386,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
>>>>>  	if (!available_free_memory(sbi, FREE_NIDS))
>>>>>  		try_to_free_nids(sbi, MAX_FREE_NIDS);
>>>>>  	else
>>>>> -		build_free_nids(sbi, false);
>>>>> +		build_free_nids(sbi, false, false);
>>>>>  
>>>>>  	if (!is_idle(sbi))
>>>>>  		return;
>>>>> diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
>>>>> index f0748524ca8c..1c92ace2e8f8 100644
>>>>> --- a/include/linux/f2fs_fs.h
>>>>> +++ b/include/linux/f2fs_fs.h
>>>>> @@ -114,6 +114,7 @@ struct f2fs_super_block {
>>>>>  /*
>>>>>   * For checkpoint
>>>>>   */
>>>>> +#define CP_NAT_BITS_FLAG	0x00000080
>>>>>  #define CP_CRC_RECOVERY_FLAG	0x00000040
>>>>>  #define CP_FASTBOOT_FLAG	0x00000020
>>>>>  #define CP_FSCK_FLAG		0x00000010
>>>>>
>>>>
>>>>
>>>> ------------------------------------------------------------------------------
>>>> Check out the vibrant tech community on one of the world's most
>>>> engaging tech sites, SlashDot.org! http://sdm.link/slashdot
>>>> _______________________________________________
>>>> Linux-f2fs-devel mailing list
>>>> Linux-f2fs-devel@lists.sourceforge.net
>>>> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
>>>
>>> .
>>>
>>
>>
>> ------------------------------------------------------------------------------
>> Check out the vibrant tech community on one of the world's most
>> engaging tech sites, SlashDot.org! http://sdm.link/slashdot
>> _______________________________________________
>> Linux-f2fs-devel mailing list
>> Linux-f2fs-devel@lists.sourceforge.net
>> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
> 
> .
> 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [f2fs-dev] [PATCH 2/3] f2fs: add bitmaps for empty or full NAT blocks
  2017-02-27  3:03           ` Chao Yu
@ 2017-02-27 22:19             ` Jaegeuk Kim
  2017-02-28 10:38               ` Chao Yu
  0 siblings, 1 reply; 16+ messages in thread
From: Jaegeuk Kim @ 2017-02-27 22:19 UTC (permalink / raw)
  To: Chao Yu; +Cc: linux-fsdevel, linux-kernel, linux-f2fs-devel

On 02/27, Chao Yu wrote:
> On 2017/2/26 2:34, Jaegeuk Kim wrote:
> > On 02/25, Chao Yu wrote:
> >> On 2017/2/24 6:54, Jaegeuk Kim wrote:
> >>> On 02/23, Chao Yu wrote:
> >>>> On 2017/2/14 10:06, Jaegeuk Kim wrote:
> > ...
> >>>
> >>>>> +static int scan_nat_bits(struct f2fs_sb_info *sbi)
> >>>>> +{
> >>>>> +	struct f2fs_nm_info *nm_i = NM_I(sbi);
> >>>>> +	struct page *page;
> >>>>> +	unsigned int i = 0;
> >>>>> +	nid_t target = FREE_NID_PAGES * NAT_ENTRY_PER_BLOCK;
> >>>>> +	nid_t nid;
> >>>>> +
> >>>>> +	if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
> >>>>> +		return -EAGAIN;
> >>>>> +
> >>>>> +	down_read(&nm_i->nat_tree_lock);
> >>>>> +check_empty:
> >>>>> +	i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i);
> >>>>> +	if (i >= nm_i->nat_blocks) {
> >>>>> +		i = 0;
> >>>>> +		goto check_partial;
> >>>>> +	}
> >>>>> +
> >>>>> +	for (nid = i * NAT_ENTRY_PER_BLOCK; nid < (i + 1) * NAT_ENTRY_PER_BLOCK;
> >>>>> +									nid++) {
> >>>>> +		if (unlikely(nid >= nm_i->max_nid))
> >>>>> +			break;
> >>>>> +		add_free_nid(sbi, nid, true);
> >>>>> +	}
> >>>>> +
> >>>>> +	if (nm_i->nid_cnt[FREE_NID_LIST] >= target)
> >>>>> +		goto out;
> >>>>> +	i++;
> >>>>> +	goto check_empty;
> >>>>> +
> >>>>> +check_partial:
> >>>>> +	i = find_next_zero_bit_le(nm_i->full_nat_bits, nm_i->nat_blocks, i);
> >>>>> +	if (i >= nm_i->nat_blocks) {
> >>>>> +		disable_nat_bits(sbi, true);
> >>>>
> >>>> Can this happen in real world? Should be a bug in somewhere?
> >>>
> >>> It happens, since current design handles full_nat_bits optionally in order
> >>> to avoid scanning a whole NAT page to set it back as 1 from 0.
> >>
> >> All 0 value in full_nat_bits means the NAT block has at least one free nid,
> >> right? so if we traverse all such NAT blocks, and still haven't collect enough
> >> *target* number of free nids, we don't have to disable nat bit cache, we can
> >> just break out here.
> > 
> > No, I'm seeing this case:
> > 1. mount with full=0, empty=0, which indicates some free nids.
> > 2. allocate all there-in free nids, but still full=0, empty=0.
> > 3. allocate more free nids.
> > ...
> > ... checkpoint makes full=1, empty=0
> > 
> > If there is no checkpoint and it consumes all the free nids, we can see all 0
> > value in full_nat_bits which is quite impossible. In that case, I'd prefer
> > to stop nat_bits and give fsck.f2fs to revive it back.
> 
> Yeah, I can understand that, but what I concern is when there is few free nids
> (< target), we still try to load nids of 8 NAT blocks until ending the loop of
> caching free nids, so it will be very easy to enter the case of disabling
> nid_bits cache here, so how about doing more check?
> 
> if (i >= nm_i->nat_blocks &&

This indicates full_nat_bits consists of all zeros, which means, whatever in
the next time, we need to return -EINVAL. If we do not disable it here, there is
no way to turn any bit into zero.

BTW, I'm trying to freeze all the patches for a pull request, so let me have
another patch for any pending issues which are not urgent for now as I think.

Thanks,

> 	nm_i->nid_cnt[FREE_NID_LIST] != nm_i->available_nids)
> 	disable_nat_bits
> 
> Thanks,
> 
> > 
> > Thanks,
> > 
> >>
> >> Thanks,
> >>
> >>>
> >>>>
> >>>>> +		return -EINVAL;
> >>>>> +	}
> >>>>> +
> >>>>> +	nid = i * NAT_ENTRY_PER_BLOCK;
> >>>>> +	page = get_current_nat_page(sbi, nid);
> >>>>> +	scan_nat_page(sbi, page, nid);
> >>>>> +	f2fs_put_page(page, 1);
> >>>>> +
> >>>>> +	if (nm_i->nid_cnt[FREE_NID_LIST] < target) {
> >>>>> +		i++;
> >>>>> +		goto check_partial;
> >>>>> +	}
> >>>>> +out:
> >>>>> +	up_read(&nm_i->nat_tree_lock);
> >>>>> +	return 0;
> >>>>> +}
> >>>>> +
> >>>>> +static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
> >>>>>  {
> >>>>>  	struct f2fs_nm_info *nm_i = NM_I(sbi);
> >>>>>  	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
> >>>>> @@ -1854,6 +1911,20 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
> >>>>>  	if (!sync && !available_free_memory(sbi, FREE_NIDS))
> >>>>>  		return;
> >>>>>  
> >>>>> +	/* try to find free nids with nat_bits */
> >>>>> +	if (!mount && !scan_nat_bits(sbi) && nm_i->nid_cnt[FREE_NID_LIST])
> >>>>> +		return;
> >>>>> +
> >>>>> +	/* find next valid candidate */
> >>>>
> >>>> This is just for mount case?
> >>>
> >>> Yup, it reuses free nids in dirty NAT blocks, so that we can make them as full
> >>> NAT pages.
> >>>
> >>> Thanks,
> >>>
> >>>>
> >>>>> +	if (is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) {
> >>>>> +		int idx = find_next_zero_bit_le(nm_i->full_nat_bits,
> >>>>> +					nm_i->nat_blocks, 0);
> >>>>> +		if (idx >= nm_i->nat_blocks)
> >>>>> +			set_sbi_flag(sbi, SBI_NEED_FSCK);
> >>>>> +		else
> >>>>> +			nid = idx * NAT_ENTRY_PER_BLOCK;
> >>>>> +	}
> >>>>> +
> >>>>>  	/* readahead nat pages to be scanned */
> >>>>>  	ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
> >>>>>  							META_NAT, true);
> >>>>> @@ -1896,10 +1967,10 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
> >>>>>  					nm_i->ra_nid_pages, META_NAT, false);
> >>>>>  }
> >>>>>  
> >>>>> -void build_free_nids(struct f2fs_sb_info *sbi, bool sync)
> >>>>> +void build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
> >>>>>  {
> >>>>>  	mutex_lock(&NM_I(sbi)->build_lock);
> >>>>> -	__build_free_nids(sbi, sync);
> >>>>> +	__build_free_nids(sbi, sync, mount);
> >>>>>  	mutex_unlock(&NM_I(sbi)->build_lock);
> >>>>>  }
> >>>>>  
> >>>>> @@ -1941,7 +2012,7 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
> >>>>>  	spin_unlock(&nm_i->nid_list_lock);
> >>>>>  
> >>>>>  	/* Let's scan nat pages and its caches to get free nids */
> >>>>> -	build_free_nids(sbi, true);
> >>>>> +	build_free_nids(sbi, true, false);
> >>>>>  	goto retry;
> >>>>>  }
> >>>>>  
> >>>>> @@ -2233,8 +2304,39 @@ static void __adjust_nat_entry_set(struct nat_entry_set *nes,
> >>>>>  	list_add_tail(&nes->set_list, head);
> >>>>>  }
> >>>>>  
> >>>>> +void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
> >>>>> +						struct page *page)
> >>>>> +{
> >>>>> +	struct f2fs_nm_info *nm_i = NM_I(sbi);
> >>>>> +	unsigned int nat_index = start_nid / NAT_ENTRY_PER_BLOCK;
> >>>>> +	struct f2fs_nat_block *nat_blk = page_address(page);
> >>>>> +	int valid = 0;
> >>>>> +	int i;
> >>>>> +
> >>>>> +	if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
> >>>>> +		return;
> >>>>> +
> >>>>> +	for (i = 0; i < NAT_ENTRY_PER_BLOCK; i++) {
> >>>>> +		if (start_nid == 0 && i == 0)
> >>>>> +			valid++;
> >>>>> +		if (nat_blk->entries[i].block_addr)
> >>>>> +			valid++;
> >>>>> +	}
> >>>>> +	if (valid == 0) {
> >>>>> +		test_and_set_bit_le(nat_index, nm_i->empty_nat_bits);
> >>>>> +		test_and_clear_bit_le(nat_index, nm_i->full_nat_bits);
> >>>>
> >>>> set_bit_le/clear_bit_le
> >>>>
> >>>>> +		return;
> >>>>> +	}
> >>>>> +
> >>>>> +	test_and_clear_bit_le(nat_index, nm_i->empty_nat_bits);
> >>>>
> >>>> ditto
> >>>>
> >>>>> +	if (valid == NAT_ENTRY_PER_BLOCK)
> >>>>> +		test_and_set_bit_le(nat_index, nm_i->full_nat_bits);
> >>>>> +	else
> >>>>> +		test_and_clear_bit_le(nat_index, nm_i->full_nat_bits);
> >>>>
> >>>> ditto
> >>>>
> >>>>> +}
> >>>>> +
> >>>>>  static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
> >>>>> -					struct nat_entry_set *set)
> >>>>> +		struct nat_entry_set *set, struct cp_control *cpc)
> >>>>>  {
> >>>>>  	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
> >>>>>  	struct f2fs_journal *journal = curseg->journal;
> >>>>> @@ -2249,7 +2351,8 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
> >>>>>  	 * #1, flush nat entries to journal in current hot data summary block.
> >>>>>  	 * #2, flush nat entries to nat page.
> >>>>>  	 */
> >>>>> -	if (!__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL))
> >>>>> +	if (cpc->reason == CP_UMOUNT ||
> >>>>
> >>>> if ((cpc->reason == CP_UMOUNT && is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) ||
> >>>>
> >>>>> +		!__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL))
> >>>>>  		to_journal = false;
> >>>>>  
> >>>>>  	if (to_journal) {
> >>>>> @@ -2289,10 +2392,12 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
> >>>>>  		}
> >>>>>  	}
> >>>>>  
> >>>>> -	if (to_journal)
> >>>>> +	if (to_journal) {
> >>>>>  		up_write(&curseg->journal_rwsem);
> >>>>> -	else
> >>>>> +	} else {
> >>>>> +		__update_nat_bits(sbi, start_nid, page);
> >>>>>  		f2fs_put_page(page, 1);
> >>>>> +	}
> >>>>>  
> >>>>>  	f2fs_bug_on(sbi, set->entry_cnt);
> >>>>>  
> >>>>> @@ -2303,7 +2408,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
> >>>>>  /*
> >>>>>   * This function is called during the checkpointing process.
> >>>>>   */
> >>>>> -void flush_nat_entries(struct f2fs_sb_info *sbi)
> >>>>> +void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
> >>>>>  {
> >>>>>  	struct f2fs_nm_info *nm_i = NM_I(sbi);
> >>>>>  	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
> >>>>> @@ -2324,7 +2429,8 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
> >>>>>  	 * entries, remove all entries from journal and merge them
> >>>>>  	 * into nat entry set.
> >>>>>  	 */
> >>>>> -	if (!__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
> >>>>> +	if (cpc->reason == CP_UMOUNT ||
> >>>>
> >>>> if ((cpc->reason == CP_UMOUNT && is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) ||
> >>>>
> >>>>> +		!__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
> >>>>>  		remove_nats_in_journal(sbi);
> >>>>>  
> >>>>>  	while ((found = __gang_lookup_nat_set(nm_i,
> >>>>> @@ -2338,27 +2444,72 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
> >>>>>  
> >>>>>  	/* flush dirty nats in nat entry set */
> >>>>>  	list_for_each_entry_safe(set, tmp, &sets, set_list)
> >>>>> -		__flush_nat_entry_set(sbi, set);
> >>>>> +		__flush_nat_entry_set(sbi, set, cpc);
> >>>>>  
> >>>>>  	up_write(&nm_i->nat_tree_lock);
> >>>>>  
> >>>>>  	f2fs_bug_on(sbi, nm_i->dirty_nat_cnt);
> >>>>>  }
> >>>>>  
> >>>>> +static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
> >>>>> +{
> >>>>> +	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
> >>>>> +	struct f2fs_nm_info *nm_i = NM_I(sbi);
> >>>>> +	unsigned int nat_bits_bytes = nm_i->nat_blocks / BITS_PER_BYTE;
> >>>>> +	unsigned int i;
> >>>>> +	__u64 cp_ver = le64_to_cpu(ckpt->checkpoint_ver);
> >>>>
> >>>> __u64 cp_ver = cur_cp_version(ckpt);
> >>>>
> >>>> Thanks,
> >>>>
> >>>>> +	size_t crc_offset = le32_to_cpu(ckpt->checksum_offset);
> >>>>> +	__u64 crc = le32_to_cpu(*((__le32 *)
> >>>>> +				((unsigned char *)ckpt + crc_offset)));
> >>>>> +	block_t nat_bits_addr;
> >>>>> +
> >>>>> +	if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
> >>>>> +		return 0;
> >>>>> +
> >>>>> +	nm_i->nat_bits_blocks = F2FS_BYTES_TO_BLK((nat_bits_bytes << 1) + 8 +
> >>>>> +						F2FS_BLKSIZE - 1);
> >>>>> +	nm_i->nat_bits = kzalloc(nm_i->nat_bits_blocks << F2FS_BLKSIZE_BITS,
> >>>>> +						GFP_KERNEL);
> >>>>> +	if (!nm_i->nat_bits)
> >>>>> +		return -ENOMEM;
> >>>>> +
> >>>>> +	nat_bits_addr = __start_cp_addr(sbi) + sbi->blocks_per_seg -
> >>>>> +						nm_i->nat_bits_blocks;
> >>>>> +	for (i = 0; i < nm_i->nat_bits_blocks; i++) {
> >>>>> +		struct page *page = get_meta_page(sbi, nat_bits_addr++);
> >>>>> +
> >>>>> +		memcpy(nm_i->nat_bits + (i << F2FS_BLKSIZE_BITS),
> >>>>> +					page_address(page), F2FS_BLKSIZE);
> >>>>> +		f2fs_put_page(page, 1);
> >>>>> +	}
> >>>>> +
> >>>>> +	cp_ver |= (crc << 32);
> >>>>> +	if (cpu_to_le64(cp_ver) != *(__le64 *)nm_i->nat_bits) {
> >>>>> +		disable_nat_bits(sbi, true);
> >>>>> +		return 0;
> >>>>> +	}
> >>>>> +
> >>>>> +	nm_i->full_nat_bits = nm_i->nat_bits + 8;
> >>>>> +	nm_i->empty_nat_bits = nm_i->full_nat_bits + nat_bits_bytes;
> >>>>> +
> >>>>> +	f2fs_msg(sbi->sb, KERN_NOTICE, "Found nat_bits in checkpoint");
> >>>>> +	return 0;
> >>>>> +}
> >>>>> +
> >>>>>  static int init_node_manager(struct f2fs_sb_info *sbi)
> >>>>>  {
> >>>>>  	struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi);
> >>>>>  	struct f2fs_nm_info *nm_i = NM_I(sbi);
> >>>>>  	unsigned char *version_bitmap;
> >>>>> -	unsigned int nat_segs, nat_blocks;
> >>>>> +	unsigned int nat_segs;
> >>>>> +	int err;
> >>>>>  
> >>>>>  	nm_i->nat_blkaddr = le32_to_cpu(sb_raw->nat_blkaddr);
> >>>>>  
> >>>>>  	/* segment_count_nat includes pair segment so divide to 2. */
> >>>>>  	nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1;
> >>>>> -	nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
> >>>>> -
> >>>>> -	nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks;
> >>>>> +	nm_i->nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
> >>>>> +	nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nm_i->nat_blocks;
> >>>>>  
> >>>>>  	/* not used nids: 0, node, meta, (and root counted as valid node) */
> >>>>>  	nm_i->available_nids = nm_i->max_nid - sbi->total_valid_node_count -
> >>>>> @@ -2392,6 +2543,10 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
> >>>>>  	if (!nm_i->nat_bitmap)
> >>>>>  		return -ENOMEM;
> >>>>>  
> >>>>> +	err = __get_nat_bitmaps(sbi);
> >>>>> +	if (err)
> >>>>> +		return err;
> >>>>> +
> >>>>>  #ifdef CONFIG_F2FS_CHECK_FS
> >>>>>  	nm_i->nat_bitmap_mir = kmemdup(version_bitmap, nm_i->bitmap_size,
> >>>>>  					GFP_KERNEL);
> >>>>> @@ -2414,7 +2569,7 @@ int build_node_manager(struct f2fs_sb_info *sbi)
> >>>>>  	if (err)
> >>>>>  		return err;
> >>>>>  
> >>>>> -	build_free_nids(sbi, true);
> >>>>> +	build_free_nids(sbi, true, true);
> >>>>>  	return 0;
> >>>>>  }
> >>>>>  
> >>>>> @@ -2473,6 +2628,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
> >>>>>  	up_write(&nm_i->nat_tree_lock);
> >>>>>  
> >>>>>  	kfree(nm_i->nat_bitmap);
> >>>>> +	kfree(nm_i->nat_bits);
> >>>>>  #ifdef CONFIG_F2FS_CHECK_FS
> >>>>>  	kfree(nm_i->nat_bitmap_mir);
> >>>>>  #endif
> >>>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> >>>>> index df2ff5cfe8f4..8e1ec248c653 100644
> >>>>> --- a/fs/f2fs/segment.c
> >>>>> +++ b/fs/f2fs/segment.c
> >>>>> @@ -386,7 +386,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
> >>>>>  	if (!available_free_memory(sbi, FREE_NIDS))
> >>>>>  		try_to_free_nids(sbi, MAX_FREE_NIDS);
> >>>>>  	else
> >>>>> -		build_free_nids(sbi, false);
> >>>>> +		build_free_nids(sbi, false, false);
> >>>>>  
> >>>>>  	if (!is_idle(sbi))
> >>>>>  		return;
> >>>>> diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
> >>>>> index f0748524ca8c..1c92ace2e8f8 100644
> >>>>> --- a/include/linux/f2fs_fs.h
> >>>>> +++ b/include/linux/f2fs_fs.h
> >>>>> @@ -114,6 +114,7 @@ struct f2fs_super_block {
> >>>>>  /*
> >>>>>   * For checkpoint
> >>>>>   */
> >>>>> +#define CP_NAT_BITS_FLAG	0x00000080
> >>>>>  #define CP_CRC_RECOVERY_FLAG	0x00000040
> >>>>>  #define CP_FASTBOOT_FLAG	0x00000020
> >>>>>  #define CP_FSCK_FLAG		0x00000010
> >>>>>
> >>>>
> >>>>
> >>>> ------------------------------------------------------------------------------
> >>>> Check out the vibrant tech community on one of the world's most
> >>>> engaging tech sites, SlashDot.org! http://sdm.link/slashdot
> >>>> _______________________________________________
> >>>> Linux-f2fs-devel mailing list
> >>>> Linux-f2fs-devel@lists.sourceforge.net
> >>>> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
> >>>
> >>> .
> >>>
> >>
> >>
> >> ------------------------------------------------------------------------------
> >> Check out the vibrant tech community on one of the world's most
> >> engaging tech sites, SlashDot.org! http://sdm.link/slashdot
> >> _______________________________________________
> >> Linux-f2fs-devel mailing list
> >> Linux-f2fs-devel@lists.sourceforge.net
> >> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
> > 
> > .
> > 
> 
> 
> ------------------------------------------------------------------------------
> Check out the vibrant tech community on one of the world's most
> engaging tech sites, SlashDot.org! http://sdm.link/slashdot
> _______________________________________________
> Linux-f2fs-devel mailing list
> Linux-f2fs-devel@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [f2fs-dev] [PATCH 2/3 v2] f2fs: add bitmaps for empty or full NAT blocks
  2017-02-23 22:58   ` [PATCH 2/3 v2] " Jaegeuk Kim
@ 2017-02-28  3:34     ` Chao Yu
  2017-02-28  5:33       ` Jaegeuk Kim
  0 siblings, 1 reply; 16+ messages in thread
From: Chao Yu @ 2017-02-28  3:34 UTC (permalink / raw)
  To: Jaegeuk Kim, linux-kernel, linux-fsdevel, linux-f2fs-devel

On 2017/2/24 6:58, Jaegeuk Kim wrote:
> @@ -2326,7 +2431,8 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
>  	 * entries, remove all entries from journal and merge them
>  	 * into nat entry set.
>  	 */
> -	if (!__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
> +	if (cpc->reason == CP_UMOUNT ||

enabled_nat_bits(sbi, cpc)

Thanks,

> +		!__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
>  		remove_nats_in_journal(sbi);

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [f2fs-dev] [PATCH 2/3 v2] f2fs: add bitmaps for empty or full NAT blocks
  2017-02-28  3:34     ` [f2fs-dev] " Chao Yu
@ 2017-02-28  5:33       ` Jaegeuk Kim
  2017-02-28 10:52         ` Chao Yu
  0 siblings, 1 reply; 16+ messages in thread
From: Jaegeuk Kim @ 2017-02-28  5:33 UTC (permalink / raw)
  To: Chao Yu; +Cc: linux-kernel, linux-fsdevel, linux-f2fs-devel

On 02/28, Chao Yu wrote:
> On 2017/2/24 6:58, Jaegeuk Kim wrote:
> > @@ -2326,7 +2431,8 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
> >  	 * entries, remove all entries from journal and merge them
> >  	 * into nat entry set.
> >  	 */
> > -	if (!__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
> > +	if (cpc->reason == CP_UMOUNT ||
> 
> enabled_nat_bits(sbi, cpc)

Thanks.
I added another patch to fix this.

>From b15f7112c3daced4c7f70c4d5fcba9013743cb6b Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Mon, 27 Feb 2017 21:28:53 -0800
Subject: [PATCH] f2fs: avoid to flush nat journal entries

This patch adds a missing condition which flushes nat journal entries
unnecessarily introduced by:

    f2fs: add bitmaps for empty or full NAT blocks

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/node.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index b3aead4c5a0f..94967171dee8 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -2513,7 +2513,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 	 * entries, remove all entries from journal and merge them
 	 * into nat entry set.
 	 */
-	if (cpc->reason == CP_UMOUNT ||
+	if (enabled_nat_bits(sbi, cpc) ||
 		!__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
 		remove_nats_in_journal(sbi);
 
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [f2fs-dev] [PATCH 2/3] f2fs: add bitmaps for empty or full NAT blocks
  2017-02-27 22:19             ` Jaegeuk Kim
@ 2017-02-28 10:38               ` Chao Yu
  0 siblings, 0 replies; 16+ messages in thread
From: Chao Yu @ 2017-02-28 10:38 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: linux-fsdevel, linux-kernel, linux-f2fs-devel

On 2017/2/28 6:19, Jaegeuk Kim wrote:
> On 02/27, Chao Yu wrote:
>> On 2017/2/26 2:34, Jaegeuk Kim wrote:
>>> On 02/25, Chao Yu wrote:
>>>> On 2017/2/24 6:54, Jaegeuk Kim wrote:
>>>>> On 02/23, Chao Yu wrote:
>>>>>> On 2017/2/14 10:06, Jaegeuk Kim wrote:
>>> ...
>>>>>
>>>>>>> +static int scan_nat_bits(struct f2fs_sb_info *sbi)
>>>>>>> +{
>>>>>>> +	struct f2fs_nm_info *nm_i = NM_I(sbi);
>>>>>>> +	struct page *page;
>>>>>>> +	unsigned int i = 0;
>>>>>>> +	nid_t target = FREE_NID_PAGES * NAT_ENTRY_PER_BLOCK;
>>>>>>> +	nid_t nid;
>>>>>>> +
>>>>>>> +	if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
>>>>>>> +		return -EAGAIN;
>>>>>>> +
>>>>>>> +	down_read(&nm_i->nat_tree_lock);
>>>>>>> +check_empty:
>>>>>>> +	i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i);
>>>>>>> +	if (i >= nm_i->nat_blocks) {
>>>>>>> +		i = 0;
>>>>>>> +		goto check_partial;
>>>>>>> +	}
>>>>>>> +
>>>>>>> +	for (nid = i * NAT_ENTRY_PER_BLOCK; nid < (i + 1) * NAT_ENTRY_PER_BLOCK;
>>>>>>> +									nid++) {
>>>>>>> +		if (unlikely(nid >= nm_i->max_nid))
>>>>>>> +			break;
>>>>>>> +		add_free_nid(sbi, nid, true);
>>>>>>> +	}
>>>>>>> +
>>>>>>> +	if (nm_i->nid_cnt[FREE_NID_LIST] >= target)
>>>>>>> +		goto out;
>>>>>>> +	i++;
>>>>>>> +	goto check_empty;
>>>>>>> +
>>>>>>> +check_partial:
>>>>>>> +	i = find_next_zero_bit_le(nm_i->full_nat_bits, nm_i->nat_blocks, i);
>>>>>>> +	if (i >= nm_i->nat_blocks) {
>>>>>>> +		disable_nat_bits(sbi, true);
>>>>>>
>>>>>> Can this happen in real world? Should be a bug in somewhere?
>>>>>
>>>>> It happens, since current design handles full_nat_bits optionally in order
>>>>> to avoid scanning a whole NAT page to set it back as 1 from 0.
>>>>
>>>> All 0 value in full_nat_bits means the NAT block has at least one free nid,
>>>> right? so if we traverse all such NAT blocks, and still haven't collect enough
>>>> *target* number of free nids, we don't have to disable nat bit cache, we can
>>>> just break out here.
>>>
>>> No, I'm seeing this case:
>>> 1. mount with full=0, empty=0, which indicates some free nids.
>>> 2. allocate all there-in free nids, but still full=0, empty=0.
>>> 3. allocate more free nids.
>>> ...
>>> ... checkpoint makes full=1, empty=0
>>>
>>> If there is no checkpoint and it consumes all the free nids, we can see all 0
>>> value in full_nat_bits which is quite impossible. In that case, I'd prefer
>>> to stop nat_bits and give fsck.f2fs to revive it back.
>>
>> Yeah, I can understand that, but what I concern is when there is few free nids
>> (< target), we still try to load nids of 8 NAT blocks until ending the loop of
>> caching free nids, so it will be very easy to enter the case of disabling
>> nid_bits cache here, so how about doing more check?
>>
>> if (i >= nm_i->nat_blocks &&
> 
> This indicates full_nat_bits consists of all zeros, which means, whatever in
> the next time, we need to return -EINVAL. If we do not disable it here, there is
> no way to turn any bit into zero.

We can expect late checkpoint will refresh full_nat_bits, __update_nat_bits will
turn bit to one or zero there.

> 
> BTW, I'm trying to freeze all the patches for a pull request, so let me have
> another patch for any pending issues which are not urgent for now as I think.

Got it. I will send you patch if I find explicit bug. :)

Thanks,

> 
> Thanks,
> 
>> 	nm_i->nid_cnt[FREE_NID_LIST] != nm_i->available_nids)
>> 	disable_nat_bits
>>
>> Thanks,
>>
>>>
>>> Thanks,
>>>
>>>>
>>>> Thanks,
>>>>
>>>>>
>>>>>>
>>>>>>> +		return -EINVAL;
>>>>>>> +	}
>>>>>>> +
>>>>>>> +	nid = i * NAT_ENTRY_PER_BLOCK;
>>>>>>> +	page = get_current_nat_page(sbi, nid);
>>>>>>> +	scan_nat_page(sbi, page, nid);
>>>>>>> +	f2fs_put_page(page, 1);
>>>>>>> +
>>>>>>> +	if (nm_i->nid_cnt[FREE_NID_LIST] < target) {
>>>>>>> +		i++;
>>>>>>> +		goto check_partial;
>>>>>>> +	}
>>>>>>> +out:
>>>>>>> +	up_read(&nm_i->nat_tree_lock);
>>>>>>> +	return 0;
>>>>>>> +}
>>>>>>> +
>>>>>>> +static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
>>>>>>>  {
>>>>>>>  	struct f2fs_nm_info *nm_i = NM_I(sbi);
>>>>>>>  	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
>>>>>>> @@ -1854,6 +1911,20 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
>>>>>>>  	if (!sync && !available_free_memory(sbi, FREE_NIDS))
>>>>>>>  		return;
>>>>>>>  
>>>>>>> +	/* try to find free nids with nat_bits */
>>>>>>> +	if (!mount && !scan_nat_bits(sbi) && nm_i->nid_cnt[FREE_NID_LIST])
>>>>>>> +		return;
>>>>>>> +
>>>>>>> +	/* find next valid candidate */
>>>>>>
>>>>>> This is just for mount case?
>>>>>
>>>>> Yup, it reuses free nids in dirty NAT blocks, so that we can make them as full
>>>>> NAT pages.
>>>>>
>>>>> Thanks,
>>>>>
>>>>>>
>>>>>>> +	if (is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) {
>>>>>>> +		int idx = find_next_zero_bit_le(nm_i->full_nat_bits,
>>>>>>> +					nm_i->nat_blocks, 0);
>>>>>>> +		if (idx >= nm_i->nat_blocks)
>>>>>>> +			set_sbi_flag(sbi, SBI_NEED_FSCK);
>>>>>>> +		else
>>>>>>> +			nid = idx * NAT_ENTRY_PER_BLOCK;
>>>>>>> +	}
>>>>>>> +
>>>>>>>  	/* readahead nat pages to be scanned */
>>>>>>>  	ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
>>>>>>>  							META_NAT, true);
>>>>>>> @@ -1896,10 +1967,10 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
>>>>>>>  					nm_i->ra_nid_pages, META_NAT, false);
>>>>>>>  }
>>>>>>>  
>>>>>>> -void build_free_nids(struct f2fs_sb_info *sbi, bool sync)
>>>>>>> +void build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
>>>>>>>  {
>>>>>>>  	mutex_lock(&NM_I(sbi)->build_lock);
>>>>>>> -	__build_free_nids(sbi, sync);
>>>>>>> +	__build_free_nids(sbi, sync, mount);
>>>>>>>  	mutex_unlock(&NM_I(sbi)->build_lock);
>>>>>>>  }
>>>>>>>  
>>>>>>> @@ -1941,7 +2012,7 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
>>>>>>>  	spin_unlock(&nm_i->nid_list_lock);
>>>>>>>  
>>>>>>>  	/* Let's scan nat pages and its caches to get free nids */
>>>>>>> -	build_free_nids(sbi, true);
>>>>>>> +	build_free_nids(sbi, true, false);
>>>>>>>  	goto retry;
>>>>>>>  }
>>>>>>>  
>>>>>>> @@ -2233,8 +2304,39 @@ static void __adjust_nat_entry_set(struct nat_entry_set *nes,
>>>>>>>  	list_add_tail(&nes->set_list, head);
>>>>>>>  }
>>>>>>>  
>>>>>>> +void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
>>>>>>> +						struct page *page)
>>>>>>> +{
>>>>>>> +	struct f2fs_nm_info *nm_i = NM_I(sbi);
>>>>>>> +	unsigned int nat_index = start_nid / NAT_ENTRY_PER_BLOCK;
>>>>>>> +	struct f2fs_nat_block *nat_blk = page_address(page);
>>>>>>> +	int valid = 0;
>>>>>>> +	int i;
>>>>>>> +
>>>>>>> +	if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
>>>>>>> +		return;
>>>>>>> +
>>>>>>> +	for (i = 0; i < NAT_ENTRY_PER_BLOCK; i++) {
>>>>>>> +		if (start_nid == 0 && i == 0)
>>>>>>> +			valid++;
>>>>>>> +		if (nat_blk->entries[i].block_addr)
>>>>>>> +			valid++;
>>>>>>> +	}
>>>>>>> +	if (valid == 0) {
>>>>>>> +		test_and_set_bit_le(nat_index, nm_i->empty_nat_bits);
>>>>>>> +		test_and_clear_bit_le(nat_index, nm_i->full_nat_bits);
>>>>>>
>>>>>> set_bit_le/clear_bit_le
>>>>>>
>>>>>>> +		return;
>>>>>>> +	}
>>>>>>> +
>>>>>>> +	test_and_clear_bit_le(nat_index, nm_i->empty_nat_bits);
>>>>>>
>>>>>> ditto
>>>>>>
>>>>>>> +	if (valid == NAT_ENTRY_PER_BLOCK)
>>>>>>> +		test_and_set_bit_le(nat_index, nm_i->full_nat_bits);
>>>>>>> +	else
>>>>>>> +		test_and_clear_bit_le(nat_index, nm_i->full_nat_bits);
>>>>>>
>>>>>> ditto
>>>>>>
>>>>>>> +}
>>>>>>> +
>>>>>>>  static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
>>>>>>> -					struct nat_entry_set *set)
>>>>>>> +		struct nat_entry_set *set, struct cp_control *cpc)
>>>>>>>  {
>>>>>>>  	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
>>>>>>>  	struct f2fs_journal *journal = curseg->journal;
>>>>>>> @@ -2249,7 +2351,8 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
>>>>>>>  	 * #1, flush nat entries to journal in current hot data summary block.
>>>>>>>  	 * #2, flush nat entries to nat page.
>>>>>>>  	 */
>>>>>>> -	if (!__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL))
>>>>>>> +	if (cpc->reason == CP_UMOUNT ||
>>>>>>
>>>>>> if ((cpc->reason == CP_UMOUNT && is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) ||
>>>>>>
>>>>>>> +		!__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL))
>>>>>>>  		to_journal = false;
>>>>>>>  
>>>>>>>  	if (to_journal) {
>>>>>>> @@ -2289,10 +2392,12 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
>>>>>>>  		}
>>>>>>>  	}
>>>>>>>  
>>>>>>> -	if (to_journal)
>>>>>>> +	if (to_journal) {
>>>>>>>  		up_write(&curseg->journal_rwsem);
>>>>>>> -	else
>>>>>>> +	} else {
>>>>>>> +		__update_nat_bits(sbi, start_nid, page);
>>>>>>>  		f2fs_put_page(page, 1);
>>>>>>> +	}
>>>>>>>  
>>>>>>>  	f2fs_bug_on(sbi, set->entry_cnt);
>>>>>>>  
>>>>>>> @@ -2303,7 +2408,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
>>>>>>>  /*
>>>>>>>   * This function is called during the checkpointing process.
>>>>>>>   */
>>>>>>> -void flush_nat_entries(struct f2fs_sb_info *sbi)
>>>>>>> +void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>>>>>>>  {
>>>>>>>  	struct f2fs_nm_info *nm_i = NM_I(sbi);
>>>>>>>  	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
>>>>>>> @@ -2324,7 +2429,8 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
>>>>>>>  	 * entries, remove all entries from journal and merge them
>>>>>>>  	 * into nat entry set.
>>>>>>>  	 */
>>>>>>> -	if (!__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
>>>>>>> +	if (cpc->reason == CP_UMOUNT ||
>>>>>>
>>>>>> if ((cpc->reason == CP_UMOUNT && is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) ||
>>>>>>
>>>>>>> +		!__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
>>>>>>>  		remove_nats_in_journal(sbi);
>>>>>>>  
>>>>>>>  	while ((found = __gang_lookup_nat_set(nm_i,
>>>>>>> @@ -2338,27 +2444,72 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
>>>>>>>  
>>>>>>>  	/* flush dirty nats in nat entry set */
>>>>>>>  	list_for_each_entry_safe(set, tmp, &sets, set_list)
>>>>>>> -		__flush_nat_entry_set(sbi, set);
>>>>>>> +		__flush_nat_entry_set(sbi, set, cpc);
>>>>>>>  
>>>>>>>  	up_write(&nm_i->nat_tree_lock);
>>>>>>>  
>>>>>>>  	f2fs_bug_on(sbi, nm_i->dirty_nat_cnt);
>>>>>>>  }
>>>>>>>  
>>>>>>> +static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
>>>>>>> +{
>>>>>>> +	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
>>>>>>> +	struct f2fs_nm_info *nm_i = NM_I(sbi);
>>>>>>> +	unsigned int nat_bits_bytes = nm_i->nat_blocks / BITS_PER_BYTE;
>>>>>>> +	unsigned int i;
>>>>>>> +	__u64 cp_ver = le64_to_cpu(ckpt->checkpoint_ver);
>>>>>>
>>>>>> __u64 cp_ver = cur_cp_version(ckpt);
>>>>>>
>>>>>> Thanks,
>>>>>>
>>>>>>> +	size_t crc_offset = le32_to_cpu(ckpt->checksum_offset);
>>>>>>> +	__u64 crc = le32_to_cpu(*((__le32 *)
>>>>>>> +				((unsigned char *)ckpt + crc_offset)));
>>>>>>> +	block_t nat_bits_addr;
>>>>>>> +
>>>>>>> +	if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
>>>>>>> +		return 0;
>>>>>>> +
>>>>>>> +	nm_i->nat_bits_blocks = F2FS_BYTES_TO_BLK((nat_bits_bytes << 1) + 8 +
>>>>>>> +						F2FS_BLKSIZE - 1);
>>>>>>> +	nm_i->nat_bits = kzalloc(nm_i->nat_bits_blocks << F2FS_BLKSIZE_BITS,
>>>>>>> +						GFP_KERNEL);
>>>>>>> +	if (!nm_i->nat_bits)
>>>>>>> +		return -ENOMEM;
>>>>>>> +
>>>>>>> +	nat_bits_addr = __start_cp_addr(sbi) + sbi->blocks_per_seg -
>>>>>>> +						nm_i->nat_bits_blocks;
>>>>>>> +	for (i = 0; i < nm_i->nat_bits_blocks; i++) {
>>>>>>> +		struct page *page = get_meta_page(sbi, nat_bits_addr++);
>>>>>>> +
>>>>>>> +		memcpy(nm_i->nat_bits + (i << F2FS_BLKSIZE_BITS),
>>>>>>> +					page_address(page), F2FS_BLKSIZE);
>>>>>>> +		f2fs_put_page(page, 1);
>>>>>>> +	}
>>>>>>> +
>>>>>>> +	cp_ver |= (crc << 32);
>>>>>>> +	if (cpu_to_le64(cp_ver) != *(__le64 *)nm_i->nat_bits) {
>>>>>>> +		disable_nat_bits(sbi, true);
>>>>>>> +		return 0;
>>>>>>> +	}
>>>>>>> +
>>>>>>> +	nm_i->full_nat_bits = nm_i->nat_bits + 8;
>>>>>>> +	nm_i->empty_nat_bits = nm_i->full_nat_bits + nat_bits_bytes;
>>>>>>> +
>>>>>>> +	f2fs_msg(sbi->sb, KERN_NOTICE, "Found nat_bits in checkpoint");
>>>>>>> +	return 0;
>>>>>>> +}
>>>>>>> +
>>>>>>>  static int init_node_manager(struct f2fs_sb_info *sbi)
>>>>>>>  {
>>>>>>>  	struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi);
>>>>>>>  	struct f2fs_nm_info *nm_i = NM_I(sbi);
>>>>>>>  	unsigned char *version_bitmap;
>>>>>>> -	unsigned int nat_segs, nat_blocks;
>>>>>>> +	unsigned int nat_segs;
>>>>>>> +	int err;
>>>>>>>  
>>>>>>>  	nm_i->nat_blkaddr = le32_to_cpu(sb_raw->nat_blkaddr);
>>>>>>>  
>>>>>>>  	/* segment_count_nat includes pair segment so divide to 2. */
>>>>>>>  	nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1;
>>>>>>> -	nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
>>>>>>> -
>>>>>>> -	nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks;
>>>>>>> +	nm_i->nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
>>>>>>> +	nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nm_i->nat_blocks;
>>>>>>>  
>>>>>>>  	/* not used nids: 0, node, meta, (and root counted as valid node) */
>>>>>>>  	nm_i->available_nids = nm_i->max_nid - sbi->total_valid_node_count -
>>>>>>> @@ -2392,6 +2543,10 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
>>>>>>>  	if (!nm_i->nat_bitmap)
>>>>>>>  		return -ENOMEM;
>>>>>>>  
>>>>>>> +	err = __get_nat_bitmaps(sbi);
>>>>>>> +	if (err)
>>>>>>> +		return err;
>>>>>>> +
>>>>>>>  #ifdef CONFIG_F2FS_CHECK_FS
>>>>>>>  	nm_i->nat_bitmap_mir = kmemdup(version_bitmap, nm_i->bitmap_size,
>>>>>>>  					GFP_KERNEL);
>>>>>>> @@ -2414,7 +2569,7 @@ int build_node_manager(struct f2fs_sb_info *sbi)
>>>>>>>  	if (err)
>>>>>>>  		return err;
>>>>>>>  
>>>>>>> -	build_free_nids(sbi, true);
>>>>>>> +	build_free_nids(sbi, true, true);
>>>>>>>  	return 0;
>>>>>>>  }
>>>>>>>  
>>>>>>> @@ -2473,6 +2628,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
>>>>>>>  	up_write(&nm_i->nat_tree_lock);
>>>>>>>  
>>>>>>>  	kfree(nm_i->nat_bitmap);
>>>>>>> +	kfree(nm_i->nat_bits);
>>>>>>>  #ifdef CONFIG_F2FS_CHECK_FS
>>>>>>>  	kfree(nm_i->nat_bitmap_mir);
>>>>>>>  #endif
>>>>>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>>>>>>> index df2ff5cfe8f4..8e1ec248c653 100644
>>>>>>> --- a/fs/f2fs/segment.c
>>>>>>> +++ b/fs/f2fs/segment.c
>>>>>>> @@ -386,7 +386,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
>>>>>>>  	if (!available_free_memory(sbi, FREE_NIDS))
>>>>>>>  		try_to_free_nids(sbi, MAX_FREE_NIDS);
>>>>>>>  	else
>>>>>>> -		build_free_nids(sbi, false);
>>>>>>> +		build_free_nids(sbi, false, false);
>>>>>>>  
>>>>>>>  	if (!is_idle(sbi))
>>>>>>>  		return;
>>>>>>> diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
>>>>>>> index f0748524ca8c..1c92ace2e8f8 100644
>>>>>>> --- a/include/linux/f2fs_fs.h
>>>>>>> +++ b/include/linux/f2fs_fs.h
>>>>>>> @@ -114,6 +114,7 @@ struct f2fs_super_block {
>>>>>>>  /*
>>>>>>>   * For checkpoint
>>>>>>>   */
>>>>>>> +#define CP_NAT_BITS_FLAG	0x00000080
>>>>>>>  #define CP_CRC_RECOVERY_FLAG	0x00000040
>>>>>>>  #define CP_FASTBOOT_FLAG	0x00000020
>>>>>>>  #define CP_FSCK_FLAG		0x00000010
>>>>>>>
>>>>>>
>>>>>>
>>>>>> ------------------------------------------------------------------------------
>>>>>> Check out the vibrant tech community on one of the world's most
>>>>>> engaging tech sites, SlashDot.org! http://sdm.link/slashdot
>>>>>> _______________________________________________
>>>>>> Linux-f2fs-devel mailing list
>>>>>> Linux-f2fs-devel@lists.sourceforge.net
>>>>>> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
>>>>>
>>>>> .
>>>>>
>>>>
>>>>
>>>> ------------------------------------------------------------------------------
>>>> Check out the vibrant tech community on one of the world's most
>>>> engaging tech sites, SlashDot.org! http://sdm.link/slashdot
>>>> _______________________________________________
>>>> Linux-f2fs-devel mailing list
>>>> Linux-f2fs-devel@lists.sourceforge.net
>>>> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
>>>
>>> .
>>>
>>
>>
>> ------------------------------------------------------------------------------
>> Check out the vibrant tech community on one of the world's most
>> engaging tech sites, SlashDot.org! http://sdm.link/slashdot
>> _______________________________________________
>> Linux-f2fs-devel mailing list
>> Linux-f2fs-devel@lists.sourceforge.net
>> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
> 
> .
> 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [f2fs-dev] [PATCH 2/3 v2] f2fs: add bitmaps for empty or full NAT blocks
  2017-02-28  5:33       ` Jaegeuk Kim
@ 2017-02-28 10:52         ` Chao Yu
  0 siblings, 0 replies; 16+ messages in thread
From: Chao Yu @ 2017-02-28 10:52 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: linux-kernel, linux-fsdevel, linux-f2fs-devel

On 2017/2/28 13:33, Jaegeuk Kim wrote:
> On 02/28, Chao Yu wrote:
>> On 2017/2/24 6:58, Jaegeuk Kim wrote:
>>> @@ -2326,7 +2431,8 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
>>>  	 * entries, remove all entries from journal and merge them
>>>  	 * into nat entry set.
>>>  	 */
>>> -	if (!__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
>>> +	if (cpc->reason == CP_UMOUNT ||
>>
>> enabled_nat_bits(sbi, cpc)
> 
> Thanks.
> I added another patch to fix this.

I'm OK with this change. Thanks, :)

Thanks,

> 
>>From b15f7112c3daced4c7f70c4d5fcba9013743cb6b Mon Sep 17 00:00:00 2001
> From: Jaegeuk Kim <jaegeuk@kernel.org>
> Date: Mon, 27 Feb 2017 21:28:53 -0800
> Subject: [PATCH] f2fs: avoid to flush nat journal entries
> 
> This patch adds a missing condition which flushes nat journal entries
> unnecessarily introduced by:
> 
>     f2fs: add bitmaps for empty or full NAT blocks
> 
> Signed-off-by: Chao Yu <yuchao0@huawei.com>
> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> ---
>  fs/f2fs/node.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
> index b3aead4c5a0f..94967171dee8 100644
> --- a/fs/f2fs/node.c
> +++ b/fs/f2fs/node.c
> @@ -2513,7 +2513,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>  	 * entries, remove all entries from journal and merge them
>  	 * into nat entry set.
>  	 */
> -	if (cpc->reason == CP_UMOUNT ||
> +	if (enabled_nat_bits(sbi, cpc) ||
>  		!__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
>  		remove_nats_in_journal(sbi);
>  
> 

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2017-02-28 10:58 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-02-14  2:06 [PATCH 1/3] f2fs: remove build_free_nids() during checkpoint Jaegeuk Kim
2017-02-14  2:06 ` [PATCH 2/3] f2fs: add bitmaps for empty or full NAT blocks Jaegeuk Kim
2017-02-23 11:41   ` [f2fs-dev] " Chao Yu
2017-02-23 22:54     ` Jaegeuk Kim
2017-02-25  3:26       ` Chao Yu
2017-02-25 18:34         ` Jaegeuk Kim
2017-02-27  3:03           ` Chao Yu
2017-02-27 22:19             ` Jaegeuk Kim
2017-02-28 10:38               ` Chao Yu
2017-02-23 22:58   ` [PATCH 2/3 v2] " Jaegeuk Kim
2017-02-28  3:34     ` [f2fs-dev] " Chao Yu
2017-02-28  5:33       ` Jaegeuk Kim
2017-02-28 10:52         ` Chao Yu
2017-02-14  2:06 ` [PATCH 3/3] f2fs: avoid reading NAT page by get_node_info Jaegeuk Kim
2017-02-23 11:47   ` Chao Yu
2017-02-23 18:20     ` [f2fs-dev] " Jaegeuk Kim

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).