* [PATCH 1/3] f2fs: remove build_free_nids() during checkpoint
@ 2017-02-14 2:06 Jaegeuk Kim
2017-02-14 2:06 ` [PATCH 2/3] f2fs: add bitmaps for empty or full NAT blocks Jaegeuk Kim
2017-02-14 2:06 ` [PATCH 3/3] f2fs: avoid reading NAT page by get_node_info Jaegeuk Kim
0 siblings, 2 replies; 16+ messages in thread
From: Jaegeuk Kim @ 2017-02-14 2:06 UTC (permalink / raw)
To: linux-kernel, linux-fsdevel, linux-f2fs-devel; +Cc: Jaegeuk Kim
Let's avoid build_free_nids() in checkpoint path.
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
fs/f2fs/checkpoint.c | 2 --
1 file changed, 2 deletions(-)
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index e6e42a4b1344..042f8d9afe44 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -999,8 +999,6 @@ static int block_operations(struct f2fs_sb_info *sbi)
static void unblock_operations(struct f2fs_sb_info *sbi)
{
up_write(&sbi->node_write);
-
- build_free_nids(sbi, false);
f2fs_unlock_all(sbi);
}
--
2.11.0
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH 2/3] f2fs: add bitmaps for empty or full NAT blocks
2017-02-14 2:06 [PATCH 1/3] f2fs: remove build_free_nids() during checkpoint Jaegeuk Kim
@ 2017-02-14 2:06 ` Jaegeuk Kim
2017-02-23 11:41 ` [f2fs-dev] " Chao Yu
2017-02-23 22:58 ` [PATCH 2/3 v2] " Jaegeuk Kim
2017-02-14 2:06 ` [PATCH 3/3] f2fs: avoid reading NAT page by get_node_info Jaegeuk Kim
1 sibling, 2 replies; 16+ messages in thread
From: Jaegeuk Kim @ 2017-02-14 2:06 UTC (permalink / raw)
To: linux-kernel, linux-fsdevel, linux-f2fs-devel; +Cc: Jaegeuk Kim
This patches adds bitmaps to represent empty or full NAT blocks containing
free nid entries.
If we can find valid crc|cp_ver in the last block of checkpoint pack, we'll
use these bitmaps when building free nids. In order to avoid checkpointing
burden, up-to-date bitmaps will be flushed only during umount time. So,
normally we can get this gain, but when power-cut happens, we rely on fsck.f2fs
which recovers this bitmap again.
After this patch, we build free nids from nid #0 at mount time to make more
full NAT blocks, but in runtime, we check empty NAT blocks to load free nids
without loading any NAT pages from disk.
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
fs/f2fs/checkpoint.c | 29 +++++++-
fs/f2fs/debug.c | 1 +
fs/f2fs/f2fs.h | 23 +++++-
fs/f2fs/node.c | 188 +++++++++++++++++++++++++++++++++++++++++++-----
fs/f2fs/segment.c | 2 +-
include/linux/f2fs_fs.h | 1 +
6 files changed, 224 insertions(+), 20 deletions(-)
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 042f8d9afe44..783c5c3f16a4 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -1024,6 +1024,10 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
spin_lock(&sbi->cp_lock);
+ if (ckpt->cp_pack_total_block_count >
+ sbi->blocks_per_seg - NM_I(sbi)->nat_bits_blocks)
+ disable_nat_bits(sbi, false);
+
if (cpc->reason == CP_UMOUNT)
__set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
else
@@ -1136,6 +1140,29 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
start_blk = __start_cp_next_addr(sbi);
+ /* write nat bits */
+ if (cpc->reason == CP_UMOUNT &&
+ is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) {
+ __u64 cp_ver = le64_to_cpu(ckpt->checkpoint_ver);
+ unsigned int i;
+ block_t blk;
+
+ cp_ver |= ((__u64)crc32 << 32);
+ *(__le64 *)nm_i->nat_bits = cpu_to_le64(cp_ver);
+
+ blk = start_blk + sbi->blocks_per_seg - nm_i->nat_bits_blocks;
+ for (i = 0; i < nm_i->nat_bits_blocks; i++)
+ update_meta_page(sbi, nm_i->nat_bits +
+ (i << F2FS_BLKSIZE_BITS), blk + i);
+
+ /* Flush all the NAT BITS pages */
+ while (get_pages(sbi, F2FS_DIRTY_META)) {
+ sync_meta_pages(sbi, META, LONG_MAX);
+ if (unlikely(f2fs_cp_error(sbi)))
+ return -EIO;
+ }
+ }
+
/* need to wait for end_io results */
wait_on_all_pages_writeback(sbi);
if (unlikely(f2fs_cp_error(sbi)))
@@ -1272,7 +1299,7 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver);
/* write cached NAT/SIT entries to NAT/SIT area */
- flush_nat_entries(sbi);
+ flush_nat_entries(sbi, cpc);
flush_sit_entries(sbi, cpc);
/* unlock all the fs_lock[] in do_checkpoint() */
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index de8da9fc5c99..015ad2b73a92 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -193,6 +193,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
/* build nm */
si->base_mem += sizeof(struct f2fs_nm_info);
si->base_mem += __bitmap_size(sbi, NAT_BITMAP);
+ si->base_mem += (NM_I(sbi)->nat_bits_blocks << F2FS_BLKSIZE_BITS);
get_cache:
si->cache_mem = 0;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 5c87d763a347..d263dade5e4c 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -547,6 +547,7 @@ struct f2fs_nm_info {
struct list_head nat_entries; /* cached nat entry list (clean) */
unsigned int nat_cnt; /* the # of cached nat entries */
unsigned int dirty_nat_cnt; /* total num of nat entries in set */
+ unsigned int nat_blocks; /* # of nat blocks */
/* free node ids management */
struct radix_tree_root free_nid_root;/* root of the free_nid cache */
@@ -557,6 +558,11 @@ struct f2fs_nm_info {
/* for checkpoint */
char *nat_bitmap; /* NAT bitmap pointer */
+
+ unsigned int nat_bits_blocks; /* # of nat bits blocks */
+ unsigned char *nat_bits; /* NAT bits blocks */
+ unsigned char *full_nat_bits; /* full NAT pages */
+ unsigned char *empty_nat_bits; /* empty NAT pages */
#ifdef CONFIG_F2FS_CHECK_FS
char *nat_bitmap_mir; /* NAT bitmap mirror */
#endif
@@ -1158,6 +1164,19 @@ static inline void clear_ckpt_flags(struct f2fs_sb_info *sbi, unsigned int f)
spin_unlock(&sbi->cp_lock);
}
+static inline void disable_nat_bits(struct f2fs_sb_info *sbi, bool lock)
+{
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+
+ if (lock)
+ spin_lock(&sbi->cp_lock);
+ __clear_ckpt_flags(F2FS_CKPT(sbi), CP_NAT_BITS_FLAG);
+ kfree(NM_I(sbi)->nat_bits);
+ NM_I(sbi)->nat_bits = NULL;
+ if (lock)
+ spin_unlock(&sbi->cp_lock);
+}
+
static inline void f2fs_lock_op(struct f2fs_sb_info *sbi)
{
down_read(&sbi->cp_rwsem);
@@ -2118,7 +2137,7 @@ void move_node_page(struct page *node_page, int gc_type);
int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
struct writeback_control *wbc, bool atomic);
int sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc);
-void build_free_nids(struct f2fs_sb_info *sbi, bool sync);
+void build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount);
bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid);
void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid);
void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid);
@@ -2129,7 +2148,7 @@ int recover_xattr_data(struct inode *inode, struct page *page,
int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page);
int restore_node_summary(struct f2fs_sb_info *sbi,
unsigned int segno, struct f2fs_summary_block *sum);
-void flush_nat_entries(struct f2fs_sb_info *sbi);
+void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc);
int build_node_manager(struct f2fs_sb_info *sbi);
void destroy_node_manager(struct f2fs_sb_info *sbi);
int __init create_node_manager_caches(void);
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index f8fb25a27b46..e001d084667f 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -338,6 +338,11 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
set_nat_flag(e, IS_CHECKPOINTED, false);
__set_nat_cache_dirty(nm_i, e);
+ if (is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG) &&
+ new_blkaddr == NEW_ADDR)
+ test_and_clear_bit_le(ni->nid / NAT_ENTRY_PER_BLOCK,
+ nm_i->empty_nat_bits);
+
/* update fsync_mark if its inode nat entry is still alive */
if (ni->nid != ni->ino)
e = __lookup_nat_cache(nm_i, ni->ino);
@@ -1839,7 +1844,59 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
}
}
-static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
+static int scan_nat_bits(struct f2fs_sb_info *sbi)
+{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
+ struct page *page;
+ unsigned int i = 0;
+ nid_t target = FREE_NID_PAGES * NAT_ENTRY_PER_BLOCK;
+ nid_t nid;
+
+ if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
+ return -EAGAIN;
+
+ down_read(&nm_i->nat_tree_lock);
+check_empty:
+ i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i);
+ if (i >= nm_i->nat_blocks) {
+ i = 0;
+ goto check_partial;
+ }
+
+ for (nid = i * NAT_ENTRY_PER_BLOCK; nid < (i + 1) * NAT_ENTRY_PER_BLOCK;
+ nid++) {
+ if (unlikely(nid >= nm_i->max_nid))
+ break;
+ add_free_nid(sbi, nid, true);
+ }
+
+ if (nm_i->nid_cnt[FREE_NID_LIST] >= target)
+ goto out;
+ i++;
+ goto check_empty;
+
+check_partial:
+ i = find_next_zero_bit_le(nm_i->full_nat_bits, nm_i->nat_blocks, i);
+ if (i >= nm_i->nat_blocks) {
+ disable_nat_bits(sbi, true);
+ return -EINVAL;
+ }
+
+ nid = i * NAT_ENTRY_PER_BLOCK;
+ page = get_current_nat_page(sbi, nid);
+ scan_nat_page(sbi, page, nid);
+ f2fs_put_page(page, 1);
+
+ if (nm_i->nid_cnt[FREE_NID_LIST] < target) {
+ i++;
+ goto check_partial;
+ }
+out:
+ up_read(&nm_i->nat_tree_lock);
+ return 0;
+}
+
+static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
@@ -1854,6 +1911,20 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
if (!sync && !available_free_memory(sbi, FREE_NIDS))
return;
+ /* try to find free nids with nat_bits */
+ if (!mount && !scan_nat_bits(sbi) && nm_i->nid_cnt[FREE_NID_LIST])
+ return;
+
+ /* find next valid candidate */
+ if (is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) {
+ int idx = find_next_zero_bit_le(nm_i->full_nat_bits,
+ nm_i->nat_blocks, 0);
+ if (idx >= nm_i->nat_blocks)
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ else
+ nid = idx * NAT_ENTRY_PER_BLOCK;
+ }
+
/* readahead nat pages to be scanned */
ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
META_NAT, true);
@@ -1896,10 +1967,10 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
nm_i->ra_nid_pages, META_NAT, false);
}
-void build_free_nids(struct f2fs_sb_info *sbi, bool sync)
+void build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
{
mutex_lock(&NM_I(sbi)->build_lock);
- __build_free_nids(sbi, sync);
+ __build_free_nids(sbi, sync, mount);
mutex_unlock(&NM_I(sbi)->build_lock);
}
@@ -1941,7 +2012,7 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
spin_unlock(&nm_i->nid_list_lock);
/* Let's scan nat pages and its caches to get free nids */
- build_free_nids(sbi, true);
+ build_free_nids(sbi, true, false);
goto retry;
}
@@ -2233,8 +2304,39 @@ static void __adjust_nat_entry_set(struct nat_entry_set *nes,
list_add_tail(&nes->set_list, head);
}
+void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
+ struct page *page)
+{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
+ unsigned int nat_index = start_nid / NAT_ENTRY_PER_BLOCK;
+ struct f2fs_nat_block *nat_blk = page_address(page);
+ int valid = 0;
+ int i;
+
+ if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
+ return;
+
+ for (i = 0; i < NAT_ENTRY_PER_BLOCK; i++) {
+ if (start_nid == 0 && i == 0)
+ valid++;
+ if (nat_blk->entries[i].block_addr)
+ valid++;
+ }
+ if (valid == 0) {
+ test_and_set_bit_le(nat_index, nm_i->empty_nat_bits);
+ test_and_clear_bit_le(nat_index, nm_i->full_nat_bits);
+ return;
+ }
+
+ test_and_clear_bit_le(nat_index, nm_i->empty_nat_bits);
+ if (valid == NAT_ENTRY_PER_BLOCK)
+ test_and_set_bit_le(nat_index, nm_i->full_nat_bits);
+ else
+ test_and_clear_bit_le(nat_index, nm_i->full_nat_bits);
+}
+
static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
- struct nat_entry_set *set)
+ struct nat_entry_set *set, struct cp_control *cpc)
{
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
struct f2fs_journal *journal = curseg->journal;
@@ -2249,7 +2351,8 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
* #1, flush nat entries to journal in current hot data summary block.
* #2, flush nat entries to nat page.
*/
- if (!__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL))
+ if (cpc->reason == CP_UMOUNT ||
+ !__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL))
to_journal = false;
if (to_journal) {
@@ -2289,10 +2392,12 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
}
}
- if (to_journal)
+ if (to_journal) {
up_write(&curseg->journal_rwsem);
- else
+ } else {
+ __update_nat_bits(sbi, start_nid, page);
f2fs_put_page(page, 1);
+ }
f2fs_bug_on(sbi, set->entry_cnt);
@@ -2303,7 +2408,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
/*
* This function is called during the checkpointing process.
*/
-void flush_nat_entries(struct f2fs_sb_info *sbi)
+void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
@@ -2324,7 +2429,8 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
* entries, remove all entries from journal and merge them
* into nat entry set.
*/
- if (!__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
+ if (cpc->reason == CP_UMOUNT ||
+ !__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
remove_nats_in_journal(sbi);
while ((found = __gang_lookup_nat_set(nm_i,
@@ -2338,27 +2444,72 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
/* flush dirty nats in nat entry set */
list_for_each_entry_safe(set, tmp, &sets, set_list)
- __flush_nat_entry_set(sbi, set);
+ __flush_nat_entry_set(sbi, set, cpc);
up_write(&nm_i->nat_tree_lock);
f2fs_bug_on(sbi, nm_i->dirty_nat_cnt);
}
+static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
+{
+ struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
+ unsigned int nat_bits_bytes = nm_i->nat_blocks / BITS_PER_BYTE;
+ unsigned int i;
+ __u64 cp_ver = le64_to_cpu(ckpt->checkpoint_ver);
+ size_t crc_offset = le32_to_cpu(ckpt->checksum_offset);
+ __u64 crc = le32_to_cpu(*((__le32 *)
+ ((unsigned char *)ckpt + crc_offset)));
+ block_t nat_bits_addr;
+
+ if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
+ return 0;
+
+ nm_i->nat_bits_blocks = F2FS_BYTES_TO_BLK((nat_bits_bytes << 1) + 8 +
+ F2FS_BLKSIZE - 1);
+ nm_i->nat_bits = kzalloc(nm_i->nat_bits_blocks << F2FS_BLKSIZE_BITS,
+ GFP_KERNEL);
+ if (!nm_i->nat_bits)
+ return -ENOMEM;
+
+ nat_bits_addr = __start_cp_addr(sbi) + sbi->blocks_per_seg -
+ nm_i->nat_bits_blocks;
+ for (i = 0; i < nm_i->nat_bits_blocks; i++) {
+ struct page *page = get_meta_page(sbi, nat_bits_addr++);
+
+ memcpy(nm_i->nat_bits + (i << F2FS_BLKSIZE_BITS),
+ page_address(page), F2FS_BLKSIZE);
+ f2fs_put_page(page, 1);
+ }
+
+ cp_ver |= (crc << 32);
+ if (cpu_to_le64(cp_ver) != *(__le64 *)nm_i->nat_bits) {
+ disable_nat_bits(sbi, true);
+ return 0;
+ }
+
+ nm_i->full_nat_bits = nm_i->nat_bits + 8;
+ nm_i->empty_nat_bits = nm_i->full_nat_bits + nat_bits_bytes;
+
+ f2fs_msg(sbi->sb, KERN_NOTICE, "Found nat_bits in checkpoint");
+ return 0;
+}
+
static int init_node_manager(struct f2fs_sb_info *sbi)
{
struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi);
struct f2fs_nm_info *nm_i = NM_I(sbi);
unsigned char *version_bitmap;
- unsigned int nat_segs, nat_blocks;
+ unsigned int nat_segs;
+ int err;
nm_i->nat_blkaddr = le32_to_cpu(sb_raw->nat_blkaddr);
/* segment_count_nat includes pair segment so divide to 2. */
nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1;
- nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
-
- nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks;
+ nm_i->nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
+ nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nm_i->nat_blocks;
/* not used nids: 0, node, meta, (and root counted as valid node) */
nm_i->available_nids = nm_i->max_nid - sbi->total_valid_node_count -
@@ -2392,6 +2543,10 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
if (!nm_i->nat_bitmap)
return -ENOMEM;
+ err = __get_nat_bitmaps(sbi);
+ if (err)
+ return err;
+
#ifdef CONFIG_F2FS_CHECK_FS
nm_i->nat_bitmap_mir = kmemdup(version_bitmap, nm_i->bitmap_size,
GFP_KERNEL);
@@ -2414,7 +2569,7 @@ int build_node_manager(struct f2fs_sb_info *sbi)
if (err)
return err;
- build_free_nids(sbi, true);
+ build_free_nids(sbi, true, true);
return 0;
}
@@ -2473,6 +2628,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
up_write(&nm_i->nat_tree_lock);
kfree(nm_i->nat_bitmap);
+ kfree(nm_i->nat_bits);
#ifdef CONFIG_F2FS_CHECK_FS
kfree(nm_i->nat_bitmap_mir);
#endif
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index df2ff5cfe8f4..8e1ec248c653 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -386,7 +386,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
if (!available_free_memory(sbi, FREE_NIDS))
try_to_free_nids(sbi, MAX_FREE_NIDS);
else
- build_free_nids(sbi, false);
+ build_free_nids(sbi, false, false);
if (!is_idle(sbi))
return;
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index f0748524ca8c..1c92ace2e8f8 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -114,6 +114,7 @@ struct f2fs_super_block {
/*
* For checkpoint
*/
+#define CP_NAT_BITS_FLAG 0x00000080
#define CP_CRC_RECOVERY_FLAG 0x00000040
#define CP_FASTBOOT_FLAG 0x00000020
#define CP_FSCK_FLAG 0x00000010
--
2.11.0
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH 3/3] f2fs: avoid reading NAT page by get_node_info
2017-02-14 2:06 [PATCH 1/3] f2fs: remove build_free_nids() during checkpoint Jaegeuk Kim
2017-02-14 2:06 ` [PATCH 2/3] f2fs: add bitmaps for empty or full NAT blocks Jaegeuk Kim
@ 2017-02-14 2:06 ` Jaegeuk Kim
2017-02-23 11:47 ` Chao Yu
1 sibling, 1 reply; 16+ messages in thread
From: Jaegeuk Kim @ 2017-02-14 2:06 UTC (permalink / raw)
To: linux-kernel, linux-fsdevel, linux-f2fs-devel; +Cc: Jaegeuk Kim
We've not seen this buggy case for a long time, so it's time to avoid this
unnecessary get_node_info() call which reading NAT page to cache nat entry.
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
fs/f2fs/node.c | 11 +++++------
1 file changed, 5 insertions(+), 6 deletions(-)
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index e001d084667f..b63bdb85ad66 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1033,7 +1033,7 @@ struct page *new_node_page(struct dnode_of_data *dn,
unsigned int ofs, struct page *ipage)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
- struct node_info old_ni, new_ni;
+ struct node_info new_ni;
struct page *page;
int err;
@@ -1049,12 +1049,11 @@ struct page *new_node_page(struct dnode_of_data *dn,
goto fail;
}
- get_node_info(sbi, dn->nid, &old_ni);
-
- /* Reinitialize old_ni with new node page */
- f2fs_bug_on(sbi, old_ni.blk_addr != NULL_ADDR);
- new_ni = old_ni;
+ new_ni.nid = dn->nid;
new_ni.ino = dn->inode->i_ino;
+ new_ni.blk_addr = NULL_ADDR;
+ new_ni.flag = 0;
+ new_ni.version = 0;
set_node_addr(sbi, &new_ni, NEW_ADDR, false);
f2fs_wait_on_page_writeback(page, NODE, true);
--
2.11.0
^ permalink raw reply related [flat|nested] 16+ messages in thread
* Re: [f2fs-dev] [PATCH 2/3] f2fs: add bitmaps for empty or full NAT blocks
2017-02-14 2:06 ` [PATCH 2/3] f2fs: add bitmaps for empty or full NAT blocks Jaegeuk Kim
@ 2017-02-23 11:41 ` Chao Yu
2017-02-23 22:54 ` Jaegeuk Kim
2017-02-23 22:58 ` [PATCH 2/3 v2] " Jaegeuk Kim
1 sibling, 1 reply; 16+ messages in thread
From: Chao Yu @ 2017-02-23 11:41 UTC (permalink / raw)
To: Jaegeuk Kim, linux-kernel, linux-fsdevel, linux-f2fs-devel
On 2017/2/14 10:06, Jaegeuk Kim wrote:
> This patches adds bitmaps to represent empty or full NAT blocks containing
> free nid entries.
>
> If we can find valid crc|cp_ver in the last block of checkpoint pack, we'll
> use these bitmaps when building free nids. In order to avoid checkpointing
> burden, up-to-date bitmaps will be flushed only during umount time. So,
> normally we can get this gain, but when power-cut happens, we rely on fsck.f2fs
> which recovers this bitmap again.
>
> After this patch, we build free nids from nid #0 at mount time to make more
> full NAT blocks, but in runtime, we check empty NAT blocks to load free nids
> without loading any NAT pages from disk.
>
> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> ---
> fs/f2fs/checkpoint.c | 29 +++++++-
> fs/f2fs/debug.c | 1 +
> fs/f2fs/f2fs.h | 23 +++++-
> fs/f2fs/node.c | 188 +++++++++++++++++++++++++++++++++++++++++++-----
> fs/f2fs/segment.c | 2 +-
> include/linux/f2fs_fs.h | 1 +
> 6 files changed, 224 insertions(+), 20 deletions(-)
>
> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> index 042f8d9afe44..783c5c3f16a4 100644
> --- a/fs/f2fs/checkpoint.c
> +++ b/fs/f2fs/checkpoint.c
> @@ -1024,6 +1024,10 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>
> spin_lock(&sbi->cp_lock);
>
> + if (ckpt->cp_pack_total_block_count >
> + sbi->blocks_per_seg - NM_I(sbi)->nat_bits_blocks)
> + disable_nat_bits(sbi, false);
I think we need to drop nat full/empty bitmap only if there is no enough space
in CP area while doing umount, otherwise we can keep this in memory.
> +
> if (cpc->reason == CP_UMOUNT)
> __set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
> else
> @@ -1136,6 +1140,29 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>
> start_blk = __start_cp_next_addr(sbi);
>
> + /* write nat bits */
> + if (cpc->reason == CP_UMOUNT &&
> + is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) {
> + __u64 cp_ver = le64_to_cpu(ckpt->checkpoint_ver);
__u64 cp_ver = cur_cp_version(ckpt);
> + unsigned int i;
> + block_t blk;
> +
> + cp_ver |= ((__u64)crc32 << 32);
> + *(__le64 *)nm_i->nat_bits = cpu_to_le64(cp_ver);
> +
> + blk = start_blk + sbi->blocks_per_seg - nm_i->nat_bits_blocks;
> + for (i = 0; i < nm_i->nat_bits_blocks; i++)
> + update_meta_page(sbi, nm_i->nat_bits +
> + (i << F2FS_BLKSIZE_BITS), blk + i);
> +
> + /* Flush all the NAT BITS pages */
> + while (get_pages(sbi, F2FS_DIRTY_META)) {
> + sync_meta_pages(sbi, META, LONG_MAX);
> + if (unlikely(f2fs_cp_error(sbi)))
> + return -EIO;
> + }
> + }
> +
> /* need to wait for end_io results */
> wait_on_all_pages_writeback(sbi);
> if (unlikely(f2fs_cp_error(sbi)))
> @@ -1272,7 +1299,7 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
> ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver);
>
> /* write cached NAT/SIT entries to NAT/SIT area */
> - flush_nat_entries(sbi);
> + flush_nat_entries(sbi, cpc);
> flush_sit_entries(sbi, cpc);
>
> /* unlock all the fs_lock[] in do_checkpoint() */
> diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
> index de8da9fc5c99..015ad2b73a92 100644
> --- a/fs/f2fs/debug.c
> +++ b/fs/f2fs/debug.c
> @@ -193,6 +193,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
> /* build nm */
> si->base_mem += sizeof(struct f2fs_nm_info);
> si->base_mem += __bitmap_size(sbi, NAT_BITMAP);
> + si->base_mem += (NM_I(sbi)->nat_bits_blocks << F2FS_BLKSIZE_BITS);
>
> get_cache:
> si->cache_mem = 0;
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 5c87d763a347..d263dade5e4c 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -547,6 +547,7 @@ struct f2fs_nm_info {
> struct list_head nat_entries; /* cached nat entry list (clean) */
> unsigned int nat_cnt; /* the # of cached nat entries */
> unsigned int dirty_nat_cnt; /* total num of nat entries in set */
> + unsigned int nat_blocks; /* # of nat blocks */
>
> /* free node ids management */
> struct radix_tree_root free_nid_root;/* root of the free_nid cache */
> @@ -557,6 +558,11 @@ struct f2fs_nm_info {
>
> /* for checkpoint */
> char *nat_bitmap; /* NAT bitmap pointer */
> +
> + unsigned int nat_bits_blocks; /* # of nat bits blocks */
> + unsigned char *nat_bits; /* NAT bits blocks */
> + unsigned char *full_nat_bits; /* full NAT pages */
> + unsigned char *empty_nat_bits; /* empty NAT pages */
> #ifdef CONFIG_F2FS_CHECK_FS
> char *nat_bitmap_mir; /* NAT bitmap mirror */
> #endif
> @@ -1158,6 +1164,19 @@ static inline void clear_ckpt_flags(struct f2fs_sb_info *sbi, unsigned int f)
> spin_unlock(&sbi->cp_lock);
> }
>
> +static inline void disable_nat_bits(struct f2fs_sb_info *sbi, bool lock)
> +{
> + set_sbi_flag(sbi, SBI_NEED_FSCK);
> +
> + if (lock)
> + spin_lock(&sbi->cp_lock);
> + __clear_ckpt_flags(F2FS_CKPT(sbi), CP_NAT_BITS_FLAG);
> + kfree(NM_I(sbi)->nat_bits);
> + NM_I(sbi)->nat_bits = NULL;
> + if (lock)
> + spin_unlock(&sbi->cp_lock);
> +}
> +
> static inline void f2fs_lock_op(struct f2fs_sb_info *sbi)
> {
> down_read(&sbi->cp_rwsem);
> @@ -2118,7 +2137,7 @@ void move_node_page(struct page *node_page, int gc_type);
> int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
> struct writeback_control *wbc, bool atomic);
> int sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc);
> -void build_free_nids(struct f2fs_sb_info *sbi, bool sync);
> +void build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount);
> bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid);
> void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid);
> void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid);
> @@ -2129,7 +2148,7 @@ int recover_xattr_data(struct inode *inode, struct page *page,
> int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page);
> int restore_node_summary(struct f2fs_sb_info *sbi,
> unsigned int segno, struct f2fs_summary_block *sum);
> -void flush_nat_entries(struct f2fs_sb_info *sbi);
> +void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc);
> int build_node_manager(struct f2fs_sb_info *sbi);
> void destroy_node_manager(struct f2fs_sb_info *sbi);
> int __init create_node_manager_caches(void);
> diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
> index f8fb25a27b46..e001d084667f 100644
> --- a/fs/f2fs/node.c
> +++ b/fs/f2fs/node.c
> @@ -338,6 +338,11 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
> set_nat_flag(e, IS_CHECKPOINTED, false);
> __set_nat_cache_dirty(nm_i, e);
>
> + if (is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG) &&
> + new_blkaddr == NEW_ADDR)
> + test_and_clear_bit_le(ni->nid / NAT_ENTRY_PER_BLOCK,
> + nm_i->empty_nat_bits);
clear_bit_le(NAT_BLOCK_OFFSET,)
> +
> /* update fsync_mark if its inode nat entry is still alive */
> if (ni->nid != ni->ino)
> e = __lookup_nat_cache(nm_i, ni->ino);
> @@ -1839,7 +1844,59 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
> }
> }
>
> -static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
> +static int scan_nat_bits(struct f2fs_sb_info *sbi)
> +{
> + struct f2fs_nm_info *nm_i = NM_I(sbi);
> + struct page *page;
> + unsigned int i = 0;
> + nid_t target = FREE_NID_PAGES * NAT_ENTRY_PER_BLOCK;
> + nid_t nid;
> +
> + if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
> + return -EAGAIN;
> +
> + down_read(&nm_i->nat_tree_lock);
> +check_empty:
> + i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i);
> + if (i >= nm_i->nat_blocks) {
> + i = 0;
> + goto check_partial;
> + }
> +
> + for (nid = i * NAT_ENTRY_PER_BLOCK; nid < (i + 1) * NAT_ENTRY_PER_BLOCK;
> + nid++) {
> + if (unlikely(nid >= nm_i->max_nid))
> + break;
> + add_free_nid(sbi, nid, true);
> + }
> +
> + if (nm_i->nid_cnt[FREE_NID_LIST] >= target)
> + goto out;
> + i++;
> + goto check_empty;
> +
> +check_partial:
> + i = find_next_zero_bit_le(nm_i->full_nat_bits, nm_i->nat_blocks, i);
> + if (i >= nm_i->nat_blocks) {
> + disable_nat_bits(sbi, true);
Can this happen in real world? Should be a bug in somewhere?
> + return -EINVAL;
> + }
> +
> + nid = i * NAT_ENTRY_PER_BLOCK;
> + page = get_current_nat_page(sbi, nid);
> + scan_nat_page(sbi, page, nid);
> + f2fs_put_page(page, 1);
> +
> + if (nm_i->nid_cnt[FREE_NID_LIST] < target) {
> + i++;
> + goto check_partial;
> + }
> +out:
> + up_read(&nm_i->nat_tree_lock);
> + return 0;
> +}
> +
> +static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
> {
> struct f2fs_nm_info *nm_i = NM_I(sbi);
> struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
> @@ -1854,6 +1911,20 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
> if (!sync && !available_free_memory(sbi, FREE_NIDS))
> return;
>
> + /* try to find free nids with nat_bits */
> + if (!mount && !scan_nat_bits(sbi) && nm_i->nid_cnt[FREE_NID_LIST])
> + return;
> +
> + /* find next valid candidate */
This is just for mount case?
> + if (is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) {
> + int idx = find_next_zero_bit_le(nm_i->full_nat_bits,
> + nm_i->nat_blocks, 0);
> + if (idx >= nm_i->nat_blocks)
> + set_sbi_flag(sbi, SBI_NEED_FSCK);
> + else
> + nid = idx * NAT_ENTRY_PER_BLOCK;
> + }
> +
> /* readahead nat pages to be scanned */
> ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
> META_NAT, true);
> @@ -1896,10 +1967,10 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
> nm_i->ra_nid_pages, META_NAT, false);
> }
>
> -void build_free_nids(struct f2fs_sb_info *sbi, bool sync)
> +void build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
> {
> mutex_lock(&NM_I(sbi)->build_lock);
> - __build_free_nids(sbi, sync);
> + __build_free_nids(sbi, sync, mount);
> mutex_unlock(&NM_I(sbi)->build_lock);
> }
>
> @@ -1941,7 +2012,7 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
> spin_unlock(&nm_i->nid_list_lock);
>
> /* Let's scan nat pages and its caches to get free nids */
> - build_free_nids(sbi, true);
> + build_free_nids(sbi, true, false);
> goto retry;
> }
>
> @@ -2233,8 +2304,39 @@ static void __adjust_nat_entry_set(struct nat_entry_set *nes,
> list_add_tail(&nes->set_list, head);
> }
>
> +void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
> + struct page *page)
> +{
> + struct f2fs_nm_info *nm_i = NM_I(sbi);
> + unsigned int nat_index = start_nid / NAT_ENTRY_PER_BLOCK;
> + struct f2fs_nat_block *nat_blk = page_address(page);
> + int valid = 0;
> + int i;
> +
> + if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
> + return;
> +
> + for (i = 0; i < NAT_ENTRY_PER_BLOCK; i++) {
> + if (start_nid == 0 && i == 0)
> + valid++;
> + if (nat_blk->entries[i].block_addr)
> + valid++;
> + }
> + if (valid == 0) {
> + test_and_set_bit_le(nat_index, nm_i->empty_nat_bits);
> + test_and_clear_bit_le(nat_index, nm_i->full_nat_bits);
set_bit_le/clear_bit_le
> + return;
> + }
> +
> + test_and_clear_bit_le(nat_index, nm_i->empty_nat_bits);
ditto
> + if (valid == NAT_ENTRY_PER_BLOCK)
> + test_and_set_bit_le(nat_index, nm_i->full_nat_bits);
> + else
> + test_and_clear_bit_le(nat_index, nm_i->full_nat_bits);
ditto
> +}
> +
> static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
> - struct nat_entry_set *set)
> + struct nat_entry_set *set, struct cp_control *cpc)
> {
> struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
> struct f2fs_journal *journal = curseg->journal;
> @@ -2249,7 +2351,8 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
> * #1, flush nat entries to journal in current hot data summary block.
> * #2, flush nat entries to nat page.
> */
> - if (!__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL))
> + if (cpc->reason == CP_UMOUNT ||
if ((cpc->reason == CP_UMOUNT && is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) ||
> + !__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL))
> to_journal = false;
>
> if (to_journal) {
> @@ -2289,10 +2392,12 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
> }
> }
>
> - if (to_journal)
> + if (to_journal) {
> up_write(&curseg->journal_rwsem);
> - else
> + } else {
> + __update_nat_bits(sbi, start_nid, page);
> f2fs_put_page(page, 1);
> + }
>
> f2fs_bug_on(sbi, set->entry_cnt);
>
> @@ -2303,7 +2408,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
> /*
> * This function is called during the checkpointing process.
> */
> -void flush_nat_entries(struct f2fs_sb_info *sbi)
> +void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
> {
> struct f2fs_nm_info *nm_i = NM_I(sbi);
> struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
> @@ -2324,7 +2429,8 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
> * entries, remove all entries from journal and merge them
> * into nat entry set.
> */
> - if (!__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
> + if (cpc->reason == CP_UMOUNT ||
if ((cpc->reason == CP_UMOUNT && is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) ||
> + !__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
> remove_nats_in_journal(sbi);
>
> while ((found = __gang_lookup_nat_set(nm_i,
> @@ -2338,27 +2444,72 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
>
> /* flush dirty nats in nat entry set */
> list_for_each_entry_safe(set, tmp, &sets, set_list)
> - __flush_nat_entry_set(sbi, set);
> + __flush_nat_entry_set(sbi, set, cpc);
>
> up_write(&nm_i->nat_tree_lock);
>
> f2fs_bug_on(sbi, nm_i->dirty_nat_cnt);
> }
>
> +static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
> +{
> + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
> + struct f2fs_nm_info *nm_i = NM_I(sbi);
> + unsigned int nat_bits_bytes = nm_i->nat_blocks / BITS_PER_BYTE;
> + unsigned int i;
> + __u64 cp_ver = le64_to_cpu(ckpt->checkpoint_ver);
__u64 cp_ver = cur_cp_version(ckpt);
Thanks,
> + size_t crc_offset = le32_to_cpu(ckpt->checksum_offset);
> + __u64 crc = le32_to_cpu(*((__le32 *)
> + ((unsigned char *)ckpt + crc_offset)));
> + block_t nat_bits_addr;
> +
> + if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
> + return 0;
> +
> + nm_i->nat_bits_blocks = F2FS_BYTES_TO_BLK((nat_bits_bytes << 1) + 8 +
> + F2FS_BLKSIZE - 1);
> + nm_i->nat_bits = kzalloc(nm_i->nat_bits_blocks << F2FS_BLKSIZE_BITS,
> + GFP_KERNEL);
> + if (!nm_i->nat_bits)
> + return -ENOMEM;
> +
> + nat_bits_addr = __start_cp_addr(sbi) + sbi->blocks_per_seg -
> + nm_i->nat_bits_blocks;
> + for (i = 0; i < nm_i->nat_bits_blocks; i++) {
> + struct page *page = get_meta_page(sbi, nat_bits_addr++);
> +
> + memcpy(nm_i->nat_bits + (i << F2FS_BLKSIZE_BITS),
> + page_address(page), F2FS_BLKSIZE);
> + f2fs_put_page(page, 1);
> + }
> +
> + cp_ver |= (crc << 32);
> + if (cpu_to_le64(cp_ver) != *(__le64 *)nm_i->nat_bits) {
> + disable_nat_bits(sbi, true);
> + return 0;
> + }
> +
> + nm_i->full_nat_bits = nm_i->nat_bits + 8;
> + nm_i->empty_nat_bits = nm_i->full_nat_bits + nat_bits_bytes;
> +
> + f2fs_msg(sbi->sb, KERN_NOTICE, "Found nat_bits in checkpoint");
> + return 0;
> +}
> +
> static int init_node_manager(struct f2fs_sb_info *sbi)
> {
> struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi);
> struct f2fs_nm_info *nm_i = NM_I(sbi);
> unsigned char *version_bitmap;
> - unsigned int nat_segs, nat_blocks;
> + unsigned int nat_segs;
> + int err;
>
> nm_i->nat_blkaddr = le32_to_cpu(sb_raw->nat_blkaddr);
>
> /* segment_count_nat includes pair segment so divide to 2. */
> nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1;
> - nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
> -
> - nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks;
> + nm_i->nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
> + nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nm_i->nat_blocks;
>
> /* not used nids: 0, node, meta, (and root counted as valid node) */
> nm_i->available_nids = nm_i->max_nid - sbi->total_valid_node_count -
> @@ -2392,6 +2543,10 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
> if (!nm_i->nat_bitmap)
> return -ENOMEM;
>
> + err = __get_nat_bitmaps(sbi);
> + if (err)
> + return err;
> +
> #ifdef CONFIG_F2FS_CHECK_FS
> nm_i->nat_bitmap_mir = kmemdup(version_bitmap, nm_i->bitmap_size,
> GFP_KERNEL);
> @@ -2414,7 +2569,7 @@ int build_node_manager(struct f2fs_sb_info *sbi)
> if (err)
> return err;
>
> - build_free_nids(sbi, true);
> + build_free_nids(sbi, true, true);
> return 0;
> }
>
> @@ -2473,6 +2628,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
> up_write(&nm_i->nat_tree_lock);
>
> kfree(nm_i->nat_bitmap);
> + kfree(nm_i->nat_bits);
> #ifdef CONFIG_F2FS_CHECK_FS
> kfree(nm_i->nat_bitmap_mir);
> #endif
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index df2ff5cfe8f4..8e1ec248c653 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -386,7 +386,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
> if (!available_free_memory(sbi, FREE_NIDS))
> try_to_free_nids(sbi, MAX_FREE_NIDS);
> else
> - build_free_nids(sbi, false);
> + build_free_nids(sbi, false, false);
>
> if (!is_idle(sbi))
> return;
> diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
> index f0748524ca8c..1c92ace2e8f8 100644
> --- a/include/linux/f2fs_fs.h
> +++ b/include/linux/f2fs_fs.h
> @@ -114,6 +114,7 @@ struct f2fs_super_block {
> /*
> * For checkpoint
> */
> +#define CP_NAT_BITS_FLAG 0x00000080
> #define CP_CRC_RECOVERY_FLAG 0x00000040
> #define CP_FASTBOOT_FLAG 0x00000020
> #define CP_FSCK_FLAG 0x00000010
>
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 3/3] f2fs: avoid reading NAT page by get_node_info
2017-02-14 2:06 ` [PATCH 3/3] f2fs: avoid reading NAT page by get_node_info Jaegeuk Kim
@ 2017-02-23 11:47 ` Chao Yu
2017-02-23 18:20 ` [f2fs-dev] " Jaegeuk Kim
0 siblings, 1 reply; 16+ messages in thread
From: Chao Yu @ 2017-02-23 11:47 UTC (permalink / raw)
To: Jaegeuk Kim, linux-kernel, linux-fsdevel, linux-f2fs-devel
On 2017/2/14 10:06, Jaegeuk Kim wrote:
> We've not seen this buggy case for a long time, so it's time to avoid this
> unnecessary get_node_info() call which reading NAT page to cache nat entry.
>
> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> ---
> fs/f2fs/node.c | 11 +++++------
> 1 file changed, 5 insertions(+), 6 deletions(-)
>
> diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
> index e001d084667f..b63bdb85ad66 100644
> --- a/fs/f2fs/node.c
> +++ b/fs/f2fs/node.c
> @@ -1033,7 +1033,7 @@ struct page *new_node_page(struct dnode_of_data *dn,
> unsigned int ofs, struct page *ipage)
> {
> struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
> - struct node_info old_ni, new_ni;
> + struct node_info new_ni;
> struct page *page;
> int err;
>
> @@ -1049,12 +1049,11 @@ struct page *new_node_page(struct dnode_of_data *dn,
> goto fail;
> }
>
> - get_node_info(sbi, dn->nid, &old_ni);
> -
> - /* Reinitialize old_ni with new node page */
> - f2fs_bug_on(sbi, old_ni.blk_addr != NULL_ADDR);
What about keeping get_node_info covered with CONFIG_F2FS_CHECK_FS for
consistence as we just introduce two new free nid caches, I think it will be
helpful for detecting bugs of these new codes.
Thanks,
> - new_ni = old_ni;
> + new_ni.nid = dn->nid;
> new_ni.ino = dn->inode->i_ino;
> + new_ni.blk_addr = NULL_ADDR;
> + new_ni.flag = 0;
> + new_ni.version = 0;
> set_node_addr(sbi, &new_ni, NEW_ADDR, false);
>
> f2fs_wait_on_page_writeback(page, NODE, true);
>
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [f2fs-dev] [PATCH 3/3] f2fs: avoid reading NAT page by get_node_info
2017-02-23 11:47 ` Chao Yu
@ 2017-02-23 18:20 ` Jaegeuk Kim
0 siblings, 0 replies; 16+ messages in thread
From: Jaegeuk Kim @ 2017-02-23 18:20 UTC (permalink / raw)
To: Chao Yu; +Cc: linux-kernel, linux-fsdevel, linux-f2fs-devel
On 02/23, Chao Yu wrote:
> On 2017/2/14 10:06, Jaegeuk Kim wrote:
> > We've not seen this buggy case for a long time, so it's time to avoid this
> > unnecessary get_node_info() call which reading NAT page to cache nat entry.
> >
> > Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> > ---
> > fs/f2fs/node.c | 11 +++++------
> > 1 file changed, 5 insertions(+), 6 deletions(-)
> >
> > diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
> > index e001d084667f..b63bdb85ad66 100644
> > --- a/fs/f2fs/node.c
> > +++ b/fs/f2fs/node.c
> > @@ -1033,7 +1033,7 @@ struct page *new_node_page(struct dnode_of_data *dn,
> > unsigned int ofs, struct page *ipage)
> > {
> > struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
> > - struct node_info old_ni, new_ni;
> > + struct node_info new_ni;
> > struct page *page;
> > int err;
> >
> > @@ -1049,12 +1049,11 @@ struct page *new_node_page(struct dnode_of_data *dn,
> > goto fail;
> > }
> >
> > - get_node_info(sbi, dn->nid, &old_ni);
> > -
> > - /* Reinitialize old_ni with new node page */
> > - f2fs_bug_on(sbi, old_ni.blk_addr != NULL_ADDR);
>
> What about keeping get_node_info covered with CONFIG_F2FS_CHECK_FS for
> consistence as we just introduce two new free nid caches, I think it will be
> helpful for detecting bugs of these new codes.
Good idea!
I added it like this.
>From a60572a7e9826b26f9634073ba9178762a9e0e7a Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Mon, 13 Feb 2017 17:02:44 -0800
Subject: [PATCH] f2fs: avoid reading NAT page by get_node_info
We've not seen this buggy case for a long time, so it's time to avoid this
unnecessary get_node_info() call which reading NAT page to cache nat entry.
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
fs/f2fs/node.c | 16 +++++++++-------
1 file changed, 9 insertions(+), 7 deletions(-)
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index baef30e230d9..42bb6f3f4757 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1033,7 +1033,7 @@ struct page *new_node_page(struct dnode_of_data *dn,
unsigned int ofs, struct page *ipage)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
- struct node_info old_ni, new_ni;
+ struct node_info new_ni;
struct page *page;
int err;
@@ -1048,13 +1048,15 @@ struct page *new_node_page(struct dnode_of_data *dn,
err = -ENOSPC;
goto fail;
}
-
- get_node_info(sbi, dn->nid, &old_ni);
-
- /* Reinitialize old_ni with new node page */
- f2fs_bug_on(sbi, old_ni.blk_addr != NULL_ADDR);
- new_ni = old_ni;
+#ifdef CONFIG_F2FS_CHECK_FS
+ get_node_info(sbi, dn->nid, &new_ni);
+ f2fs_bug_on(sbi, new_ni.blk_addr != NULL_ADDR);
+#endif
+ new_ni.nid = dn->nid;
new_ni.ino = dn->inode->i_ino;
+ new_ni.blk_addr = NULL_ADDR;
+ new_ni.flag = 0;
+ new_ni.version = 0;
set_node_addr(sbi, &new_ni, NEW_ADDR, false);
f2fs_wait_on_page_writeback(page, NODE, true);
--
2.11.0
^ permalink raw reply related [flat|nested] 16+ messages in thread
* Re: [f2fs-dev] [PATCH 2/3] f2fs: add bitmaps for empty or full NAT blocks
2017-02-23 11:41 ` [f2fs-dev] " Chao Yu
@ 2017-02-23 22:54 ` Jaegeuk Kim
2017-02-25 3:26 ` Chao Yu
0 siblings, 1 reply; 16+ messages in thread
From: Jaegeuk Kim @ 2017-02-23 22:54 UTC (permalink / raw)
To: Chao Yu; +Cc: linux-kernel, linux-fsdevel, linux-f2fs-devel
On 02/23, Chao Yu wrote:
> On 2017/2/14 10:06, Jaegeuk Kim wrote:
> > This patches adds bitmaps to represent empty or full NAT blocks containing
> > free nid entries.
> >
> > If we can find valid crc|cp_ver in the last block of checkpoint pack, we'll
> > use these bitmaps when building free nids. In order to avoid checkpointing
> > burden, up-to-date bitmaps will be flushed only during umount time. So,
> > normally we can get this gain, but when power-cut happens, we rely on fsck.f2fs
> > which recovers this bitmap again.
> >
> > After this patch, we build free nids from nid #0 at mount time to make more
> > full NAT blocks, but in runtime, we check empty NAT blocks to load free nids
> > without loading any NAT pages from disk.
> >
> > Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> > ---
> > fs/f2fs/checkpoint.c | 29 +++++++-
> > fs/f2fs/debug.c | 1 +
> > fs/f2fs/f2fs.h | 23 +++++-
> > fs/f2fs/node.c | 188 +++++++++++++++++++++++++++++++++++++++++++-----
> > fs/f2fs/segment.c | 2 +-
> > include/linux/f2fs_fs.h | 1 +
> > 6 files changed, 224 insertions(+), 20 deletions(-)
> >
> > diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> > index 042f8d9afe44..783c5c3f16a4 100644
> > --- a/fs/f2fs/checkpoint.c
> > +++ b/fs/f2fs/checkpoint.c
> > @@ -1024,6 +1024,10 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
> >
> > spin_lock(&sbi->cp_lock);
> >
> > + if (ckpt->cp_pack_total_block_count >
> > + sbi->blocks_per_seg - NM_I(sbi)->nat_bits_blocks)
> > + disable_nat_bits(sbi, false);
>
> I think we need to drop nat full/empty bitmap only if there is no enough space
> in CP area while doing umount, otherwise we can keep this in memory.
Yup.
>
> > +
> > if (cpc->reason == CP_UMOUNT)
> > __set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
> > else
> > @@ -1136,6 +1140,29 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
> >
> > start_blk = __start_cp_next_addr(sbi);
> >
> > + /* write nat bits */
...
> > +static int scan_nat_bits(struct f2fs_sb_info *sbi)
> > +{
> > + struct f2fs_nm_info *nm_i = NM_I(sbi);
> > + struct page *page;
> > + unsigned int i = 0;
> > + nid_t target = FREE_NID_PAGES * NAT_ENTRY_PER_BLOCK;
> > + nid_t nid;
> > +
> > + if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
> > + return -EAGAIN;
> > +
> > + down_read(&nm_i->nat_tree_lock);
> > +check_empty:
> > + i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i);
> > + if (i >= nm_i->nat_blocks) {
> > + i = 0;
> > + goto check_partial;
> > + }
> > +
> > + for (nid = i * NAT_ENTRY_PER_BLOCK; nid < (i + 1) * NAT_ENTRY_PER_BLOCK;
> > + nid++) {
> > + if (unlikely(nid >= nm_i->max_nid))
> > + break;
> > + add_free_nid(sbi, nid, true);
> > + }
> > +
> > + if (nm_i->nid_cnt[FREE_NID_LIST] >= target)
> > + goto out;
> > + i++;
> > + goto check_empty;
> > +
> > +check_partial:
> > + i = find_next_zero_bit_le(nm_i->full_nat_bits, nm_i->nat_blocks, i);
> > + if (i >= nm_i->nat_blocks) {
> > + disable_nat_bits(sbi, true);
>
> Can this happen in real world? Should be a bug in somewhere?
It happens, since current design handles full_nat_bits optionally in order
to avoid scanning a whole NAT page to set it back as 1 from 0.
>
> > + return -EINVAL;
> > + }
> > +
> > + nid = i * NAT_ENTRY_PER_BLOCK;
> > + page = get_current_nat_page(sbi, nid);
> > + scan_nat_page(sbi, page, nid);
> > + f2fs_put_page(page, 1);
> > +
> > + if (nm_i->nid_cnt[FREE_NID_LIST] < target) {
> > + i++;
> > + goto check_partial;
> > + }
> > +out:
> > + up_read(&nm_i->nat_tree_lock);
> > + return 0;
> > +}
> > +
> > +static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
> > {
> > struct f2fs_nm_info *nm_i = NM_I(sbi);
> > struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
> > @@ -1854,6 +1911,20 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
> > if (!sync && !available_free_memory(sbi, FREE_NIDS))
> > return;
> >
> > + /* try to find free nids with nat_bits */
> > + if (!mount && !scan_nat_bits(sbi) && nm_i->nid_cnt[FREE_NID_LIST])
> > + return;
> > +
> > + /* find next valid candidate */
>
> This is just for mount case?
Yup, it reuses free nids in dirty NAT blocks, so that we can make them as full
NAT pages.
Thanks,
>
> > + if (is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) {
> > + int idx = find_next_zero_bit_le(nm_i->full_nat_bits,
> > + nm_i->nat_blocks, 0);
> > + if (idx >= nm_i->nat_blocks)
> > + set_sbi_flag(sbi, SBI_NEED_FSCK);
> > + else
> > + nid = idx * NAT_ENTRY_PER_BLOCK;
> > + }
> > +
> > /* readahead nat pages to be scanned */
> > ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
> > META_NAT, true);
> > @@ -1896,10 +1967,10 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
> > nm_i->ra_nid_pages, META_NAT, false);
> > }
> >
> > -void build_free_nids(struct f2fs_sb_info *sbi, bool sync)
> > +void build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
> > {
> > mutex_lock(&NM_I(sbi)->build_lock);
> > - __build_free_nids(sbi, sync);
> > + __build_free_nids(sbi, sync, mount);
> > mutex_unlock(&NM_I(sbi)->build_lock);
> > }
> >
> > @@ -1941,7 +2012,7 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
> > spin_unlock(&nm_i->nid_list_lock);
> >
> > /* Let's scan nat pages and its caches to get free nids */
> > - build_free_nids(sbi, true);
> > + build_free_nids(sbi, true, false);
> > goto retry;
> > }
> >
> > @@ -2233,8 +2304,39 @@ static void __adjust_nat_entry_set(struct nat_entry_set *nes,
> > list_add_tail(&nes->set_list, head);
> > }
> >
> > +void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
> > + struct page *page)
> > +{
> > + struct f2fs_nm_info *nm_i = NM_I(sbi);
> > + unsigned int nat_index = start_nid / NAT_ENTRY_PER_BLOCK;
> > + struct f2fs_nat_block *nat_blk = page_address(page);
> > + int valid = 0;
> > + int i;
> > +
> > + if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
> > + return;
> > +
> > + for (i = 0; i < NAT_ENTRY_PER_BLOCK; i++) {
> > + if (start_nid == 0 && i == 0)
> > + valid++;
> > + if (nat_blk->entries[i].block_addr)
> > + valid++;
> > + }
> > + if (valid == 0) {
> > + test_and_set_bit_le(nat_index, nm_i->empty_nat_bits);
> > + test_and_clear_bit_le(nat_index, nm_i->full_nat_bits);
>
> set_bit_le/clear_bit_le
>
> > + return;
> > + }
> > +
> > + test_and_clear_bit_le(nat_index, nm_i->empty_nat_bits);
>
> ditto
>
> > + if (valid == NAT_ENTRY_PER_BLOCK)
> > + test_and_set_bit_le(nat_index, nm_i->full_nat_bits);
> > + else
> > + test_and_clear_bit_le(nat_index, nm_i->full_nat_bits);
>
> ditto
>
> > +}
> > +
> > static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
> > - struct nat_entry_set *set)
> > + struct nat_entry_set *set, struct cp_control *cpc)
> > {
> > struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
> > struct f2fs_journal *journal = curseg->journal;
> > @@ -2249,7 +2351,8 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
> > * #1, flush nat entries to journal in current hot data summary block.
> > * #2, flush nat entries to nat page.
> > */
> > - if (!__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL))
> > + if (cpc->reason == CP_UMOUNT ||
>
> if ((cpc->reason == CP_UMOUNT && is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) ||
>
> > + !__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL))
> > to_journal = false;
> >
> > if (to_journal) {
> > @@ -2289,10 +2392,12 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
> > }
> > }
> >
> > - if (to_journal)
> > + if (to_journal) {
> > up_write(&curseg->journal_rwsem);
> > - else
> > + } else {
> > + __update_nat_bits(sbi, start_nid, page);
> > f2fs_put_page(page, 1);
> > + }
> >
> > f2fs_bug_on(sbi, set->entry_cnt);
> >
> > @@ -2303,7 +2408,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
> > /*
> > * This function is called during the checkpointing process.
> > */
> > -void flush_nat_entries(struct f2fs_sb_info *sbi)
> > +void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
> > {
> > struct f2fs_nm_info *nm_i = NM_I(sbi);
> > struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
> > @@ -2324,7 +2429,8 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
> > * entries, remove all entries from journal and merge them
> > * into nat entry set.
> > */
> > - if (!__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
> > + if (cpc->reason == CP_UMOUNT ||
>
> if ((cpc->reason == CP_UMOUNT && is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) ||
>
> > + !__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
> > remove_nats_in_journal(sbi);
> >
> > while ((found = __gang_lookup_nat_set(nm_i,
> > @@ -2338,27 +2444,72 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
> >
> > /* flush dirty nats in nat entry set */
> > list_for_each_entry_safe(set, tmp, &sets, set_list)
> > - __flush_nat_entry_set(sbi, set);
> > + __flush_nat_entry_set(sbi, set, cpc);
> >
> > up_write(&nm_i->nat_tree_lock);
> >
> > f2fs_bug_on(sbi, nm_i->dirty_nat_cnt);
> > }
> >
> > +static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
> > +{
> > + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
> > + struct f2fs_nm_info *nm_i = NM_I(sbi);
> > + unsigned int nat_bits_bytes = nm_i->nat_blocks / BITS_PER_BYTE;
> > + unsigned int i;
> > + __u64 cp_ver = le64_to_cpu(ckpt->checkpoint_ver);
>
> __u64 cp_ver = cur_cp_version(ckpt);
>
> Thanks,
>
> > + size_t crc_offset = le32_to_cpu(ckpt->checksum_offset);
> > + __u64 crc = le32_to_cpu(*((__le32 *)
> > + ((unsigned char *)ckpt + crc_offset)));
> > + block_t nat_bits_addr;
> > +
> > + if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
> > + return 0;
> > +
> > + nm_i->nat_bits_blocks = F2FS_BYTES_TO_BLK((nat_bits_bytes << 1) + 8 +
> > + F2FS_BLKSIZE - 1);
> > + nm_i->nat_bits = kzalloc(nm_i->nat_bits_blocks << F2FS_BLKSIZE_BITS,
> > + GFP_KERNEL);
> > + if (!nm_i->nat_bits)
> > + return -ENOMEM;
> > +
> > + nat_bits_addr = __start_cp_addr(sbi) + sbi->blocks_per_seg -
> > + nm_i->nat_bits_blocks;
> > + for (i = 0; i < nm_i->nat_bits_blocks; i++) {
> > + struct page *page = get_meta_page(sbi, nat_bits_addr++);
> > +
> > + memcpy(nm_i->nat_bits + (i << F2FS_BLKSIZE_BITS),
> > + page_address(page), F2FS_BLKSIZE);
> > + f2fs_put_page(page, 1);
> > + }
> > +
> > + cp_ver |= (crc << 32);
> > + if (cpu_to_le64(cp_ver) != *(__le64 *)nm_i->nat_bits) {
> > + disable_nat_bits(sbi, true);
> > + return 0;
> > + }
> > +
> > + nm_i->full_nat_bits = nm_i->nat_bits + 8;
> > + nm_i->empty_nat_bits = nm_i->full_nat_bits + nat_bits_bytes;
> > +
> > + f2fs_msg(sbi->sb, KERN_NOTICE, "Found nat_bits in checkpoint");
> > + return 0;
> > +}
> > +
> > static int init_node_manager(struct f2fs_sb_info *sbi)
> > {
> > struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi);
> > struct f2fs_nm_info *nm_i = NM_I(sbi);
> > unsigned char *version_bitmap;
> > - unsigned int nat_segs, nat_blocks;
> > + unsigned int nat_segs;
> > + int err;
> >
> > nm_i->nat_blkaddr = le32_to_cpu(sb_raw->nat_blkaddr);
> >
> > /* segment_count_nat includes pair segment so divide to 2. */
> > nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1;
> > - nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
> > -
> > - nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks;
> > + nm_i->nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
> > + nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nm_i->nat_blocks;
> >
> > /* not used nids: 0, node, meta, (and root counted as valid node) */
> > nm_i->available_nids = nm_i->max_nid - sbi->total_valid_node_count -
> > @@ -2392,6 +2543,10 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
> > if (!nm_i->nat_bitmap)
> > return -ENOMEM;
> >
> > + err = __get_nat_bitmaps(sbi);
> > + if (err)
> > + return err;
> > +
> > #ifdef CONFIG_F2FS_CHECK_FS
> > nm_i->nat_bitmap_mir = kmemdup(version_bitmap, nm_i->bitmap_size,
> > GFP_KERNEL);
> > @@ -2414,7 +2569,7 @@ int build_node_manager(struct f2fs_sb_info *sbi)
> > if (err)
> > return err;
> >
> > - build_free_nids(sbi, true);
> > + build_free_nids(sbi, true, true);
> > return 0;
> > }
> >
> > @@ -2473,6 +2628,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
> > up_write(&nm_i->nat_tree_lock);
> >
> > kfree(nm_i->nat_bitmap);
> > + kfree(nm_i->nat_bits);
> > #ifdef CONFIG_F2FS_CHECK_FS
> > kfree(nm_i->nat_bitmap_mir);
> > #endif
> > diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> > index df2ff5cfe8f4..8e1ec248c653 100644
> > --- a/fs/f2fs/segment.c
> > +++ b/fs/f2fs/segment.c
> > @@ -386,7 +386,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
> > if (!available_free_memory(sbi, FREE_NIDS))
> > try_to_free_nids(sbi, MAX_FREE_NIDS);
> > else
> > - build_free_nids(sbi, false);
> > + build_free_nids(sbi, false, false);
> >
> > if (!is_idle(sbi))
> > return;
> > diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
> > index f0748524ca8c..1c92ace2e8f8 100644
> > --- a/include/linux/f2fs_fs.h
> > +++ b/include/linux/f2fs_fs.h
> > @@ -114,6 +114,7 @@ struct f2fs_super_block {
> > /*
> > * For checkpoint
> > */
> > +#define CP_NAT_BITS_FLAG 0x00000080
> > #define CP_CRC_RECOVERY_FLAG 0x00000040
> > #define CP_FASTBOOT_FLAG 0x00000020
> > #define CP_FSCK_FLAG 0x00000010
> >
>
>
> ------------------------------------------------------------------------------
> Check out the vibrant tech community on one of the world's most
> engaging tech sites, SlashDot.org! http://sdm.link/slashdot
> _______________________________________________
> Linux-f2fs-devel mailing list
> Linux-f2fs-devel@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 2/3 v2] f2fs: add bitmaps for empty or full NAT blocks
2017-02-14 2:06 ` [PATCH 2/3] f2fs: add bitmaps for empty or full NAT blocks Jaegeuk Kim
2017-02-23 11:41 ` [f2fs-dev] " Chao Yu
@ 2017-02-23 22:58 ` Jaegeuk Kim
2017-02-28 3:34 ` [f2fs-dev] " Chao Yu
1 sibling, 1 reply; 16+ messages in thread
From: Jaegeuk Kim @ 2017-02-23 22:58 UTC (permalink / raw)
To: linux-kernel, linux-fsdevel, linux-f2fs-devel
Change log from v1:
- modified some condition checks suggested by Chao
This patches adds bitmaps to represent empty or full NAT blocks containing
free nid entries.
If we can find valid crc|cp_ver in the last block of checkpoint pack, we'll
use these bitmaps when building free nids. In order to avoid checkpointing
burden, up-to-date bitmaps will be flushed only during umount time. So,
normally we can get this gain, but when power-cut happens, we rely on fsck.f2fs
which recovers this bitmap again.
After this patch, we build free nids from nid #0 at mount time to make more
full NAT blocks, but in runtime, we check empty NAT blocks to load free nids
without loading any NAT pages from disk.
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
fs/f2fs/checkpoint.c | 28 +++++++-
fs/f2fs/debug.c | 1 +
fs/f2fs/f2fs.h | 31 +++++++-
fs/f2fs/node.c | 188 +++++++++++++++++++++++++++++++++++++++++++-----
fs/f2fs/segment.c | 2 +-
include/linux/f2fs_fs.h | 1 +
6 files changed, 231 insertions(+), 20 deletions(-)
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 042f8d9afe44..cd7132121573 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -1024,6 +1024,10 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
spin_lock(&sbi->cp_lock);
+ if (cpc->reason == CP_UMOUNT && ckpt->cp_pack_total_block_count >
+ sbi->blocks_per_seg - NM_I(sbi)->nat_bits_blocks)
+ disable_nat_bits(sbi, false);
+
if (cpc->reason == CP_UMOUNT)
__set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
else
@@ -1136,6 +1140,28 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
start_blk = __start_cp_next_addr(sbi);
+ /* write nat bits */
+ if (enabled_nat_bits(sbi, cpc)) {
+ __u64 cp_ver = cur_cp_version(ckpt);
+ unsigned int i;
+ block_t blk;
+
+ cp_ver |= ((__u64)crc32 << 32);
+ *(__le64 *)nm_i->nat_bits = cpu_to_le64(cp_ver);
+
+ blk = start_blk + sbi->blocks_per_seg - nm_i->nat_bits_blocks;
+ for (i = 0; i < nm_i->nat_bits_blocks; i++)
+ update_meta_page(sbi, nm_i->nat_bits +
+ (i << F2FS_BLKSIZE_BITS), blk + i);
+
+ /* Flush all the NAT BITS pages */
+ while (get_pages(sbi, F2FS_DIRTY_META)) {
+ sync_meta_pages(sbi, META, LONG_MAX);
+ if (unlikely(f2fs_cp_error(sbi)))
+ return -EIO;
+ }
+ }
+
/* need to wait for end_io results */
wait_on_all_pages_writeback(sbi);
if (unlikely(f2fs_cp_error(sbi)))
@@ -1272,7 +1298,7 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver);
/* write cached NAT/SIT entries to NAT/SIT area */
- flush_nat_entries(sbi);
+ flush_nat_entries(sbi, cpc);
flush_sit_entries(sbi, cpc);
/* unlock all the fs_lock[] in do_checkpoint() */
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index de8da9fc5c99..015ad2b73a92 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -193,6 +193,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
/* build nm */
si->base_mem += sizeof(struct f2fs_nm_info);
si->base_mem += __bitmap_size(sbi, NAT_BITMAP);
+ si->base_mem += (NM_I(sbi)->nat_bits_blocks << F2FS_BLKSIZE_BITS);
get_cache:
si->cache_mem = 0;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index e26cc6909a54..d1156cdd211e 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -554,6 +554,7 @@ struct f2fs_nm_info {
struct list_head nat_entries; /* cached nat entry list (clean) */
unsigned int nat_cnt; /* the # of cached nat entries */
unsigned int dirty_nat_cnt; /* total num of nat entries in set */
+ unsigned int nat_blocks; /* # of nat blocks */
/* free node ids management */
struct radix_tree_root free_nid_root;/* root of the free_nid cache */
@@ -564,6 +565,11 @@ struct f2fs_nm_info {
/* for checkpoint */
char *nat_bitmap; /* NAT bitmap pointer */
+
+ unsigned int nat_bits_blocks; /* # of nat bits blocks */
+ unsigned char *nat_bits; /* NAT bits blocks */
+ unsigned char *full_nat_bits; /* full NAT pages */
+ unsigned char *empty_nat_bits; /* empty NAT pages */
#ifdef CONFIG_F2FS_CHECK_FS
char *nat_bitmap_mir; /* NAT bitmap mirror */
#endif
@@ -1171,6 +1177,27 @@ static inline void clear_ckpt_flags(struct f2fs_sb_info *sbi, unsigned int f)
spin_unlock(&sbi->cp_lock);
}
+static inline void disable_nat_bits(struct f2fs_sb_info *sbi, bool lock)
+{
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+
+ if (lock)
+ spin_lock(&sbi->cp_lock);
+ __clear_ckpt_flags(F2FS_CKPT(sbi), CP_NAT_BITS_FLAG);
+ kfree(NM_I(sbi)->nat_bits);
+ NM_I(sbi)->nat_bits = NULL;
+ if (lock)
+ spin_unlock(&sbi->cp_lock);
+}
+
+static inline bool enabled_nat_bits(struct f2fs_sb_info *sbi,
+ struct cp_control *cpc)
+{
+ bool set = is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG);
+
+ return (cpc) ? (cpc->reason == CP_UMOUNT) && set : set;
+}
+
static inline void f2fs_lock_op(struct f2fs_sb_info *sbi)
{
down_read(&sbi->cp_rwsem);
@@ -2131,7 +2158,7 @@ void move_node_page(struct page *node_page, int gc_type);
int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
struct writeback_control *wbc, bool atomic);
int sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc);
-void build_free_nids(struct f2fs_sb_info *sbi, bool sync);
+void build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount);
bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid);
void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid);
void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid);
@@ -2142,7 +2169,7 @@ int recover_xattr_data(struct inode *inode, struct page *page,
int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page);
int restore_node_summary(struct f2fs_sb_info *sbi,
unsigned int segno, struct f2fs_summary_block *sum);
-void flush_nat_entries(struct f2fs_sb_info *sbi);
+void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc);
int build_node_manager(struct f2fs_sb_info *sbi);
void destroy_node_manager(struct f2fs_sb_info *sbi);
int __init create_node_manager_caches(void);
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 8ebc4c78e6a4..43d35ec11851 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -338,6 +338,9 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
set_nat_flag(e, IS_CHECKPOINTED, false);
__set_nat_cache_dirty(nm_i, e);
+ if (enabled_nat_bits(sbi, NULL) && new_blkaddr == NEW_ADDR)
+ clear_bit_le(NAT_BLOCK_OFFSET(ni->nid), nm_i->empty_nat_bits);
+
/* update fsync_mark if its inode nat entry is still alive */
if (ni->nid != ni->ino)
e = __lookup_nat_cache(nm_i, ni->ino);
@@ -1841,7 +1844,60 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
}
}
-static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
+static int scan_nat_bits(struct f2fs_sb_info *sbi)
+{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
+ struct page *page;
+ unsigned int i = 0;
+ nid_t target = FREE_NID_PAGES * NAT_ENTRY_PER_BLOCK;
+ nid_t nid;
+
+ if (!enabled_nat_bits(sbi, NULL))
+ return -EAGAIN;
+
+ down_read(&nm_i->nat_tree_lock);
+check_empty:
+ i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i);
+ if (i >= nm_i->nat_blocks) {
+ i = 0;
+ goto check_partial;
+ }
+
+ for (nid = i * NAT_ENTRY_PER_BLOCK; nid < (i + 1) * NAT_ENTRY_PER_BLOCK;
+ nid++) {
+ if (unlikely(nid >= nm_i->max_nid))
+ break;
+ add_free_nid(sbi, nid, true);
+ }
+
+ if (nm_i->nid_cnt[FREE_NID_LIST] >= target)
+ goto out;
+ i++;
+ goto check_empty;
+
+check_partial:
+ i = find_next_zero_bit_le(nm_i->full_nat_bits, nm_i->nat_blocks, i);
+ if (i >= nm_i->nat_blocks) {
+ disable_nat_bits(sbi, true);
+ up_read(&nm_i->nat_tree_lock);
+ return -EINVAL;
+ }
+
+ nid = i * NAT_ENTRY_PER_BLOCK;
+ page = get_current_nat_page(sbi, nid);
+ scan_nat_page(sbi, page, nid);
+ f2fs_put_page(page, 1);
+
+ if (nm_i->nid_cnt[FREE_NID_LIST] < target) {
+ i++;
+ goto check_partial;
+ }
+out:
+ up_read(&nm_i->nat_tree_lock);
+ return 0;
+}
+
+static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
@@ -1856,6 +1912,21 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
if (!sync && !available_free_memory(sbi, FREE_NIDS))
return;
+ /* try to find free nids with nat_bits */
+ if (!mount && !scan_nat_bits(sbi) && nm_i->nid_cnt[FREE_NID_LIST])
+ return;
+
+ /* find next valid candidate */
+ if (enabled_nat_bits(sbi, NULL)) {
+ int idx = find_next_zero_bit_le(nm_i->full_nat_bits,
+ nm_i->nat_blocks, 0);
+
+ if (idx >= nm_i->nat_blocks)
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ else
+ nid = idx * NAT_ENTRY_PER_BLOCK;
+ }
+
/* readahead nat pages to be scanned */
ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
META_NAT, true);
@@ -1898,10 +1969,10 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
nm_i->ra_nid_pages, META_NAT, false);
}
-void build_free_nids(struct f2fs_sb_info *sbi, bool sync)
+void build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
{
mutex_lock(&NM_I(sbi)->build_lock);
- __build_free_nids(sbi, sync);
+ __build_free_nids(sbi, sync, mount);
mutex_unlock(&NM_I(sbi)->build_lock);
}
@@ -1943,7 +2014,7 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
spin_unlock(&nm_i->nid_list_lock);
/* Let's scan nat pages and its caches to get free nids */
- build_free_nids(sbi, true);
+ build_free_nids(sbi, true, false);
goto retry;
}
@@ -2235,8 +2306,39 @@ static void __adjust_nat_entry_set(struct nat_entry_set *nes,
list_add_tail(&nes->set_list, head);
}
+void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
+ struct page *page)
+{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
+ unsigned int nat_index = start_nid / NAT_ENTRY_PER_BLOCK;
+ struct f2fs_nat_block *nat_blk = page_address(page);
+ int valid = 0;
+ int i;
+
+ if (!enabled_nat_bits(sbi, NULL))
+ return;
+
+ for (i = 0; i < NAT_ENTRY_PER_BLOCK; i++) {
+ if (start_nid == 0 && i == 0)
+ valid++;
+ if (nat_blk->entries[i].block_addr)
+ valid++;
+ }
+ if (valid == 0) {
+ set_bit_le(nat_index, nm_i->empty_nat_bits);
+ clear_bit_le(nat_index, nm_i->full_nat_bits);
+ return;
+ }
+
+ clear_bit_le(nat_index, nm_i->empty_nat_bits);
+ if (valid == NAT_ENTRY_PER_BLOCK)
+ set_bit_le(nat_index, nm_i->full_nat_bits);
+ else
+ clear_bit_le(nat_index, nm_i->full_nat_bits);
+}
+
static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
- struct nat_entry_set *set)
+ struct nat_entry_set *set, struct cp_control *cpc)
{
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
struct f2fs_journal *journal = curseg->journal;
@@ -2251,7 +2353,8 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
* #1, flush nat entries to journal in current hot data summary block.
* #2, flush nat entries to nat page.
*/
- if (!__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL))
+ if (enabled_nat_bits(sbi, cpc) ||
+ !__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL))
to_journal = false;
if (to_journal) {
@@ -2291,10 +2394,12 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
}
}
- if (to_journal)
+ if (to_journal) {
up_write(&curseg->journal_rwsem);
- else
+ } else {
+ __update_nat_bits(sbi, start_nid, page);
f2fs_put_page(page, 1);
+ }
f2fs_bug_on(sbi, set->entry_cnt);
@@ -2305,7 +2410,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
/*
* This function is called during the checkpointing process.
*/
-void flush_nat_entries(struct f2fs_sb_info *sbi)
+void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
@@ -2326,7 +2431,8 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
* entries, remove all entries from journal and merge them
* into nat entry set.
*/
- if (!__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
+ if (cpc->reason == CP_UMOUNT ||
+ !__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
remove_nats_in_journal(sbi);
while ((found = __gang_lookup_nat_set(nm_i,
@@ -2340,27 +2446,72 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
/* flush dirty nats in nat entry set */
list_for_each_entry_safe(set, tmp, &sets, set_list)
- __flush_nat_entry_set(sbi, set);
+ __flush_nat_entry_set(sbi, set, cpc);
up_write(&nm_i->nat_tree_lock);
f2fs_bug_on(sbi, nm_i->dirty_nat_cnt);
}
+static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
+{
+ struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
+ unsigned int nat_bits_bytes = nm_i->nat_blocks / BITS_PER_BYTE;
+ unsigned int i;
+ __u64 cp_ver = cur_cp_version(ckpt);
+ size_t crc_offset = le32_to_cpu(ckpt->checksum_offset);
+ __u64 crc = le32_to_cpu(*((__le32 *)
+ ((unsigned char *)ckpt + crc_offset)));
+ block_t nat_bits_addr;
+
+ if (!enabled_nat_bits(sbi, NULL))
+ return 0;
+
+ nm_i->nat_bits_blocks = F2FS_BYTES_TO_BLK((nat_bits_bytes << 1) + 8 +
+ F2FS_BLKSIZE - 1);
+ nm_i->nat_bits = kzalloc(nm_i->nat_bits_blocks << F2FS_BLKSIZE_BITS,
+ GFP_KERNEL);
+ if (!nm_i->nat_bits)
+ return -ENOMEM;
+
+ nat_bits_addr = __start_cp_addr(sbi) + sbi->blocks_per_seg -
+ nm_i->nat_bits_blocks;
+ for (i = 0; i < nm_i->nat_bits_blocks; i++) {
+ struct page *page = get_meta_page(sbi, nat_bits_addr++);
+
+ memcpy(nm_i->nat_bits + (i << F2FS_BLKSIZE_BITS),
+ page_address(page), F2FS_BLKSIZE);
+ f2fs_put_page(page, 1);
+ }
+
+ cp_ver |= (crc << 32);
+ if (cpu_to_le64(cp_ver) != *(__le64 *)nm_i->nat_bits) {
+ disable_nat_bits(sbi, true);
+ return 0;
+ }
+
+ nm_i->full_nat_bits = nm_i->nat_bits + 8;
+ nm_i->empty_nat_bits = nm_i->full_nat_bits + nat_bits_bytes;
+
+ f2fs_msg(sbi->sb, KERN_NOTICE, "Found nat_bits in checkpoint");
+ return 0;
+}
+
static int init_node_manager(struct f2fs_sb_info *sbi)
{
struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi);
struct f2fs_nm_info *nm_i = NM_I(sbi);
unsigned char *version_bitmap;
- unsigned int nat_segs, nat_blocks;
+ unsigned int nat_segs;
+ int err;
nm_i->nat_blkaddr = le32_to_cpu(sb_raw->nat_blkaddr);
/* segment_count_nat includes pair segment so divide to 2. */
nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1;
- nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
-
- nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks;
+ nm_i->nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
+ nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nm_i->nat_blocks;
/* not used nids: 0, node, meta, (and root counted as valid node) */
nm_i->available_nids = nm_i->max_nid - sbi->total_valid_node_count -
@@ -2394,6 +2545,10 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
if (!nm_i->nat_bitmap)
return -ENOMEM;
+ err = __get_nat_bitmaps(sbi);
+ if (err)
+ return err;
+
#ifdef CONFIG_F2FS_CHECK_FS
nm_i->nat_bitmap_mir = kmemdup(version_bitmap, nm_i->bitmap_size,
GFP_KERNEL);
@@ -2416,7 +2571,7 @@ int build_node_manager(struct f2fs_sb_info *sbi)
if (err)
return err;
- build_free_nids(sbi, true);
+ build_free_nids(sbi, true, true);
return 0;
}
@@ -2475,6 +2630,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
up_write(&nm_i->nat_tree_lock);
kfree(nm_i->nat_bitmap);
+ kfree(nm_i->nat_bits);
#ifdef CONFIG_F2FS_CHECK_FS
kfree(nm_i->nat_bitmap_mir);
#endif
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 3e95db5375ed..9eb6d89bf9e2 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -386,7 +386,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
if (!available_free_memory(sbi, FREE_NIDS))
try_to_free_nids(sbi, MAX_FREE_NIDS);
else
- build_free_nids(sbi, false);
+ build_free_nids(sbi, false, false);
if (!is_idle(sbi))
return;
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index f0748524ca8c..1c92ace2e8f8 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -114,6 +114,7 @@ struct f2fs_super_block {
/*
* For checkpoint
*/
+#define CP_NAT_BITS_FLAG 0x00000080
#define CP_CRC_RECOVERY_FLAG 0x00000040
#define CP_FASTBOOT_FLAG 0x00000020
#define CP_FSCK_FLAG 0x00000010
--
2.11.0
^ permalink raw reply related [flat|nested] 16+ messages in thread
* Re: [f2fs-dev] [PATCH 2/3] f2fs: add bitmaps for empty or full NAT blocks
2017-02-23 22:54 ` Jaegeuk Kim
@ 2017-02-25 3:26 ` Chao Yu
2017-02-25 18:34 ` Jaegeuk Kim
0 siblings, 1 reply; 16+ messages in thread
From: Chao Yu @ 2017-02-25 3:26 UTC (permalink / raw)
To: Jaegeuk Kim; +Cc: linux-kernel, linux-fsdevel, linux-f2fs-devel
On 2017/2/24 6:54, Jaegeuk Kim wrote:
> On 02/23, Chao Yu wrote:
>> On 2017/2/14 10:06, Jaegeuk Kim wrote:
>>> This patches adds bitmaps to represent empty or full NAT blocks containing
>>> free nid entries.
>>>
>>> If we can find valid crc|cp_ver in the last block of checkpoint pack, we'll
>>> use these bitmaps when building free nids. In order to avoid checkpointing
>>> burden, up-to-date bitmaps will be flushed only during umount time. So,
>>> normally we can get this gain, but when power-cut happens, we rely on fsck.f2fs
>>> which recovers this bitmap again.
>>>
>>> After this patch, we build free nids from nid #0 at mount time to make more
>>> full NAT blocks, but in runtime, we check empty NAT blocks to load free nids
>>> without loading any NAT pages from disk.
>>>
>>> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
>>> ---
>>> fs/f2fs/checkpoint.c | 29 +++++++-
>>> fs/f2fs/debug.c | 1 +
>>> fs/f2fs/f2fs.h | 23 +++++-
>>> fs/f2fs/node.c | 188 +++++++++++++++++++++++++++++++++++++++++++-----
>>> fs/f2fs/segment.c | 2 +-
>>> include/linux/f2fs_fs.h | 1 +
>>> 6 files changed, 224 insertions(+), 20 deletions(-)
>>>
>>> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
>>> index 042f8d9afe44..783c5c3f16a4 100644
>>> --- a/fs/f2fs/checkpoint.c
>>> +++ b/fs/f2fs/checkpoint.c
>>> @@ -1024,6 +1024,10 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>>>
>>> spin_lock(&sbi->cp_lock);
>>>
>>> + if (ckpt->cp_pack_total_block_count >
>>> + sbi->blocks_per_seg - NM_I(sbi)->nat_bits_blocks)
>>> + disable_nat_bits(sbi, false);
>>
>> I think we need to drop nat full/empty bitmap only if there is no enough space
>> in CP area while doing umount, otherwise we can keep this in memory.
>
> Yup.
>
>>
>>> +
>>> if (cpc->reason == CP_UMOUNT)
>>> __set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
>>> else
>>> @@ -1136,6 +1140,29 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>>>
>>> start_blk = __start_cp_next_addr(sbi);
>>>
>>> + /* write nat bits */
>
> ...
>
>>> +static int scan_nat_bits(struct f2fs_sb_info *sbi)
>>> +{
>>> + struct f2fs_nm_info *nm_i = NM_I(sbi);
>>> + struct page *page;
>>> + unsigned int i = 0;
>>> + nid_t target = FREE_NID_PAGES * NAT_ENTRY_PER_BLOCK;
>>> + nid_t nid;
>>> +
>>> + if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
>>> + return -EAGAIN;
>>> +
>>> + down_read(&nm_i->nat_tree_lock);
>>> +check_empty:
>>> + i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i);
>>> + if (i >= nm_i->nat_blocks) {
>>> + i = 0;
>>> + goto check_partial;
>>> + }
>>> +
>>> + for (nid = i * NAT_ENTRY_PER_BLOCK; nid < (i + 1) * NAT_ENTRY_PER_BLOCK;
>>> + nid++) {
>>> + if (unlikely(nid >= nm_i->max_nid))
>>> + break;
>>> + add_free_nid(sbi, nid, true);
>>> + }
>>> +
>>> + if (nm_i->nid_cnt[FREE_NID_LIST] >= target)
>>> + goto out;
>>> + i++;
>>> + goto check_empty;
>>> +
>>> +check_partial:
>>> + i = find_next_zero_bit_le(nm_i->full_nat_bits, nm_i->nat_blocks, i);
>>> + if (i >= nm_i->nat_blocks) {
>>> + disable_nat_bits(sbi, true);
>>
>> Can this happen in real world? Should be a bug in somewhere?
>
> It happens, since current design handles full_nat_bits optionally in order
> to avoid scanning a whole NAT page to set it back as 1 from 0.
All 0 value in full_nat_bits means the NAT block has at least one free nid,
right? so if we traverse all such NAT blocks, and still haven't collect enough
*target* number of free nids, we don't have to disable nat bit cache, we can
just break out here.
Thanks,
>
>>
>>> + return -EINVAL;
>>> + }
>>> +
>>> + nid = i * NAT_ENTRY_PER_BLOCK;
>>> + page = get_current_nat_page(sbi, nid);
>>> + scan_nat_page(sbi, page, nid);
>>> + f2fs_put_page(page, 1);
>>> +
>>> + if (nm_i->nid_cnt[FREE_NID_LIST] < target) {
>>> + i++;
>>> + goto check_partial;
>>> + }
>>> +out:
>>> + up_read(&nm_i->nat_tree_lock);
>>> + return 0;
>>> +}
>>> +
>>> +static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
>>> {
>>> struct f2fs_nm_info *nm_i = NM_I(sbi);
>>> struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
>>> @@ -1854,6 +1911,20 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
>>> if (!sync && !available_free_memory(sbi, FREE_NIDS))
>>> return;
>>>
>>> + /* try to find free nids with nat_bits */
>>> + if (!mount && !scan_nat_bits(sbi) && nm_i->nid_cnt[FREE_NID_LIST])
>>> + return;
>>> +
>>> + /* find next valid candidate */
>>
>> This is just for mount case?
>
> Yup, it reuses free nids in dirty NAT blocks, so that we can make them as full
> NAT pages.
>
> Thanks,
>
>>
>>> + if (is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) {
>>> + int idx = find_next_zero_bit_le(nm_i->full_nat_bits,
>>> + nm_i->nat_blocks, 0);
>>> + if (idx >= nm_i->nat_blocks)
>>> + set_sbi_flag(sbi, SBI_NEED_FSCK);
>>> + else
>>> + nid = idx * NAT_ENTRY_PER_BLOCK;
>>> + }
>>> +
>>> /* readahead nat pages to be scanned */
>>> ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
>>> META_NAT, true);
>>> @@ -1896,10 +1967,10 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
>>> nm_i->ra_nid_pages, META_NAT, false);
>>> }
>>>
>>> -void build_free_nids(struct f2fs_sb_info *sbi, bool sync)
>>> +void build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
>>> {
>>> mutex_lock(&NM_I(sbi)->build_lock);
>>> - __build_free_nids(sbi, sync);
>>> + __build_free_nids(sbi, sync, mount);
>>> mutex_unlock(&NM_I(sbi)->build_lock);
>>> }
>>>
>>> @@ -1941,7 +2012,7 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
>>> spin_unlock(&nm_i->nid_list_lock);
>>>
>>> /* Let's scan nat pages and its caches to get free nids */
>>> - build_free_nids(sbi, true);
>>> + build_free_nids(sbi, true, false);
>>> goto retry;
>>> }
>>>
>>> @@ -2233,8 +2304,39 @@ static void __adjust_nat_entry_set(struct nat_entry_set *nes,
>>> list_add_tail(&nes->set_list, head);
>>> }
>>>
>>> +void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
>>> + struct page *page)
>>> +{
>>> + struct f2fs_nm_info *nm_i = NM_I(sbi);
>>> + unsigned int nat_index = start_nid / NAT_ENTRY_PER_BLOCK;
>>> + struct f2fs_nat_block *nat_blk = page_address(page);
>>> + int valid = 0;
>>> + int i;
>>> +
>>> + if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
>>> + return;
>>> +
>>> + for (i = 0; i < NAT_ENTRY_PER_BLOCK; i++) {
>>> + if (start_nid == 0 && i == 0)
>>> + valid++;
>>> + if (nat_blk->entries[i].block_addr)
>>> + valid++;
>>> + }
>>> + if (valid == 0) {
>>> + test_and_set_bit_le(nat_index, nm_i->empty_nat_bits);
>>> + test_and_clear_bit_le(nat_index, nm_i->full_nat_bits);
>>
>> set_bit_le/clear_bit_le
>>
>>> + return;
>>> + }
>>> +
>>> + test_and_clear_bit_le(nat_index, nm_i->empty_nat_bits);
>>
>> ditto
>>
>>> + if (valid == NAT_ENTRY_PER_BLOCK)
>>> + test_and_set_bit_le(nat_index, nm_i->full_nat_bits);
>>> + else
>>> + test_and_clear_bit_le(nat_index, nm_i->full_nat_bits);
>>
>> ditto
>>
>>> +}
>>> +
>>> static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
>>> - struct nat_entry_set *set)
>>> + struct nat_entry_set *set, struct cp_control *cpc)
>>> {
>>> struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
>>> struct f2fs_journal *journal = curseg->journal;
>>> @@ -2249,7 +2351,8 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
>>> * #1, flush nat entries to journal in current hot data summary block.
>>> * #2, flush nat entries to nat page.
>>> */
>>> - if (!__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL))
>>> + if (cpc->reason == CP_UMOUNT ||
>>
>> if ((cpc->reason == CP_UMOUNT && is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) ||
>>
>>> + !__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL))
>>> to_journal = false;
>>>
>>> if (to_journal) {
>>> @@ -2289,10 +2392,12 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
>>> }
>>> }
>>>
>>> - if (to_journal)
>>> + if (to_journal) {
>>> up_write(&curseg->journal_rwsem);
>>> - else
>>> + } else {
>>> + __update_nat_bits(sbi, start_nid, page);
>>> f2fs_put_page(page, 1);
>>> + }
>>>
>>> f2fs_bug_on(sbi, set->entry_cnt);
>>>
>>> @@ -2303,7 +2408,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
>>> /*
>>> * This function is called during the checkpointing process.
>>> */
>>> -void flush_nat_entries(struct f2fs_sb_info *sbi)
>>> +void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>>> {
>>> struct f2fs_nm_info *nm_i = NM_I(sbi);
>>> struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
>>> @@ -2324,7 +2429,8 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
>>> * entries, remove all entries from journal and merge them
>>> * into nat entry set.
>>> */
>>> - if (!__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
>>> + if (cpc->reason == CP_UMOUNT ||
>>
>> if ((cpc->reason == CP_UMOUNT && is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) ||
>>
>>> + !__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
>>> remove_nats_in_journal(sbi);
>>>
>>> while ((found = __gang_lookup_nat_set(nm_i,
>>> @@ -2338,27 +2444,72 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
>>>
>>> /* flush dirty nats in nat entry set */
>>> list_for_each_entry_safe(set, tmp, &sets, set_list)
>>> - __flush_nat_entry_set(sbi, set);
>>> + __flush_nat_entry_set(sbi, set, cpc);
>>>
>>> up_write(&nm_i->nat_tree_lock);
>>>
>>> f2fs_bug_on(sbi, nm_i->dirty_nat_cnt);
>>> }
>>>
>>> +static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
>>> +{
>>> + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
>>> + struct f2fs_nm_info *nm_i = NM_I(sbi);
>>> + unsigned int nat_bits_bytes = nm_i->nat_blocks / BITS_PER_BYTE;
>>> + unsigned int i;
>>> + __u64 cp_ver = le64_to_cpu(ckpt->checkpoint_ver);
>>
>> __u64 cp_ver = cur_cp_version(ckpt);
>>
>> Thanks,
>>
>>> + size_t crc_offset = le32_to_cpu(ckpt->checksum_offset);
>>> + __u64 crc = le32_to_cpu(*((__le32 *)
>>> + ((unsigned char *)ckpt + crc_offset)));
>>> + block_t nat_bits_addr;
>>> +
>>> + if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
>>> + return 0;
>>> +
>>> + nm_i->nat_bits_blocks = F2FS_BYTES_TO_BLK((nat_bits_bytes << 1) + 8 +
>>> + F2FS_BLKSIZE - 1);
>>> + nm_i->nat_bits = kzalloc(nm_i->nat_bits_blocks << F2FS_BLKSIZE_BITS,
>>> + GFP_KERNEL);
>>> + if (!nm_i->nat_bits)
>>> + return -ENOMEM;
>>> +
>>> + nat_bits_addr = __start_cp_addr(sbi) + sbi->blocks_per_seg -
>>> + nm_i->nat_bits_blocks;
>>> + for (i = 0; i < nm_i->nat_bits_blocks; i++) {
>>> + struct page *page = get_meta_page(sbi, nat_bits_addr++);
>>> +
>>> + memcpy(nm_i->nat_bits + (i << F2FS_BLKSIZE_BITS),
>>> + page_address(page), F2FS_BLKSIZE);
>>> + f2fs_put_page(page, 1);
>>> + }
>>> +
>>> + cp_ver |= (crc << 32);
>>> + if (cpu_to_le64(cp_ver) != *(__le64 *)nm_i->nat_bits) {
>>> + disable_nat_bits(sbi, true);
>>> + return 0;
>>> + }
>>> +
>>> + nm_i->full_nat_bits = nm_i->nat_bits + 8;
>>> + nm_i->empty_nat_bits = nm_i->full_nat_bits + nat_bits_bytes;
>>> +
>>> + f2fs_msg(sbi->sb, KERN_NOTICE, "Found nat_bits in checkpoint");
>>> + return 0;
>>> +}
>>> +
>>> static int init_node_manager(struct f2fs_sb_info *sbi)
>>> {
>>> struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi);
>>> struct f2fs_nm_info *nm_i = NM_I(sbi);
>>> unsigned char *version_bitmap;
>>> - unsigned int nat_segs, nat_blocks;
>>> + unsigned int nat_segs;
>>> + int err;
>>>
>>> nm_i->nat_blkaddr = le32_to_cpu(sb_raw->nat_blkaddr);
>>>
>>> /* segment_count_nat includes pair segment so divide to 2. */
>>> nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1;
>>> - nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
>>> -
>>> - nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks;
>>> + nm_i->nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
>>> + nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nm_i->nat_blocks;
>>>
>>> /* not used nids: 0, node, meta, (and root counted as valid node) */
>>> nm_i->available_nids = nm_i->max_nid - sbi->total_valid_node_count -
>>> @@ -2392,6 +2543,10 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
>>> if (!nm_i->nat_bitmap)
>>> return -ENOMEM;
>>>
>>> + err = __get_nat_bitmaps(sbi);
>>> + if (err)
>>> + return err;
>>> +
>>> #ifdef CONFIG_F2FS_CHECK_FS
>>> nm_i->nat_bitmap_mir = kmemdup(version_bitmap, nm_i->bitmap_size,
>>> GFP_KERNEL);
>>> @@ -2414,7 +2569,7 @@ int build_node_manager(struct f2fs_sb_info *sbi)
>>> if (err)
>>> return err;
>>>
>>> - build_free_nids(sbi, true);
>>> + build_free_nids(sbi, true, true);
>>> return 0;
>>> }
>>>
>>> @@ -2473,6 +2628,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
>>> up_write(&nm_i->nat_tree_lock);
>>>
>>> kfree(nm_i->nat_bitmap);
>>> + kfree(nm_i->nat_bits);
>>> #ifdef CONFIG_F2FS_CHECK_FS
>>> kfree(nm_i->nat_bitmap_mir);
>>> #endif
>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>>> index df2ff5cfe8f4..8e1ec248c653 100644
>>> --- a/fs/f2fs/segment.c
>>> +++ b/fs/f2fs/segment.c
>>> @@ -386,7 +386,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
>>> if (!available_free_memory(sbi, FREE_NIDS))
>>> try_to_free_nids(sbi, MAX_FREE_NIDS);
>>> else
>>> - build_free_nids(sbi, false);
>>> + build_free_nids(sbi, false, false);
>>>
>>> if (!is_idle(sbi))
>>> return;
>>> diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
>>> index f0748524ca8c..1c92ace2e8f8 100644
>>> --- a/include/linux/f2fs_fs.h
>>> +++ b/include/linux/f2fs_fs.h
>>> @@ -114,6 +114,7 @@ struct f2fs_super_block {
>>> /*
>>> * For checkpoint
>>> */
>>> +#define CP_NAT_BITS_FLAG 0x00000080
>>> #define CP_CRC_RECOVERY_FLAG 0x00000040
>>> #define CP_FASTBOOT_FLAG 0x00000020
>>> #define CP_FSCK_FLAG 0x00000010
>>>
>>
>>
>> ------------------------------------------------------------------------------
>> Check out the vibrant tech community on one of the world's most
>> engaging tech sites, SlashDot.org! http://sdm.link/slashdot
>> _______________________________________________
>> Linux-f2fs-devel mailing list
>> Linux-f2fs-devel@lists.sourceforge.net
>> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
>
> .
>
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [f2fs-dev] [PATCH 2/3] f2fs: add bitmaps for empty or full NAT blocks
2017-02-25 3:26 ` Chao Yu
@ 2017-02-25 18:34 ` Jaegeuk Kim
2017-02-27 3:03 ` Chao Yu
0 siblings, 1 reply; 16+ messages in thread
From: Jaegeuk Kim @ 2017-02-25 18:34 UTC (permalink / raw)
To: Chao Yu; +Cc: linux-fsdevel, linux-kernel, linux-f2fs-devel
On 02/25, Chao Yu wrote:
> On 2017/2/24 6:54, Jaegeuk Kim wrote:
> > On 02/23, Chao Yu wrote:
> >> On 2017/2/14 10:06, Jaegeuk Kim wrote:
...
> >
> >>> +static int scan_nat_bits(struct f2fs_sb_info *sbi)
> >>> +{
> >>> + struct f2fs_nm_info *nm_i = NM_I(sbi);
> >>> + struct page *page;
> >>> + unsigned int i = 0;
> >>> + nid_t target = FREE_NID_PAGES * NAT_ENTRY_PER_BLOCK;
> >>> + nid_t nid;
> >>> +
> >>> + if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
> >>> + return -EAGAIN;
> >>> +
> >>> + down_read(&nm_i->nat_tree_lock);
> >>> +check_empty:
> >>> + i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i);
> >>> + if (i >= nm_i->nat_blocks) {
> >>> + i = 0;
> >>> + goto check_partial;
> >>> + }
> >>> +
> >>> + for (nid = i * NAT_ENTRY_PER_BLOCK; nid < (i + 1) * NAT_ENTRY_PER_BLOCK;
> >>> + nid++) {
> >>> + if (unlikely(nid >= nm_i->max_nid))
> >>> + break;
> >>> + add_free_nid(sbi, nid, true);
> >>> + }
> >>> +
> >>> + if (nm_i->nid_cnt[FREE_NID_LIST] >= target)
> >>> + goto out;
> >>> + i++;
> >>> + goto check_empty;
> >>> +
> >>> +check_partial:
> >>> + i = find_next_zero_bit_le(nm_i->full_nat_bits, nm_i->nat_blocks, i);
> >>> + if (i >= nm_i->nat_blocks) {
> >>> + disable_nat_bits(sbi, true);
> >>
> >> Can this happen in real world? Should be a bug in somewhere?
> >
> > It happens, since current design handles full_nat_bits optionally in order
> > to avoid scanning a whole NAT page to set it back as 1 from 0.
>
> All 0 value in full_nat_bits means the NAT block has at least one free nid,
> right? so if we traverse all such NAT blocks, and still haven't collect enough
> *target* number of free nids, we don't have to disable nat bit cache, we can
> just break out here.
No, I'm seeing this case:
1. mount with full=0, empty=0, which indicates some free nids.
2. allocate all there-in free nids, but still full=0, empty=0.
3. allocate more free nids.
...
... checkpoint makes full=1, empty=0
If there is no checkpoint and it consumes all the free nids, we can see all 0
value in full_nat_bits which is quite impossible. In that case, I'd prefer
to stop nat_bits and give fsck.f2fs to revive it back.
Thanks,
>
> Thanks,
>
> >
> >>
> >>> + return -EINVAL;
> >>> + }
> >>> +
> >>> + nid = i * NAT_ENTRY_PER_BLOCK;
> >>> + page = get_current_nat_page(sbi, nid);
> >>> + scan_nat_page(sbi, page, nid);
> >>> + f2fs_put_page(page, 1);
> >>> +
> >>> + if (nm_i->nid_cnt[FREE_NID_LIST] < target) {
> >>> + i++;
> >>> + goto check_partial;
> >>> + }
> >>> +out:
> >>> + up_read(&nm_i->nat_tree_lock);
> >>> + return 0;
> >>> +}
> >>> +
> >>> +static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
> >>> {
> >>> struct f2fs_nm_info *nm_i = NM_I(sbi);
> >>> struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
> >>> @@ -1854,6 +1911,20 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
> >>> if (!sync && !available_free_memory(sbi, FREE_NIDS))
> >>> return;
> >>>
> >>> + /* try to find free nids with nat_bits */
> >>> + if (!mount && !scan_nat_bits(sbi) && nm_i->nid_cnt[FREE_NID_LIST])
> >>> + return;
> >>> +
> >>> + /* find next valid candidate */
> >>
> >> This is just for mount case?
> >
> > Yup, it reuses free nids in dirty NAT blocks, so that we can make them as full
> > NAT pages.
> >
> > Thanks,
> >
> >>
> >>> + if (is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) {
> >>> + int idx = find_next_zero_bit_le(nm_i->full_nat_bits,
> >>> + nm_i->nat_blocks, 0);
> >>> + if (idx >= nm_i->nat_blocks)
> >>> + set_sbi_flag(sbi, SBI_NEED_FSCK);
> >>> + else
> >>> + nid = idx * NAT_ENTRY_PER_BLOCK;
> >>> + }
> >>> +
> >>> /* readahead nat pages to be scanned */
> >>> ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
> >>> META_NAT, true);
> >>> @@ -1896,10 +1967,10 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
> >>> nm_i->ra_nid_pages, META_NAT, false);
> >>> }
> >>>
> >>> -void build_free_nids(struct f2fs_sb_info *sbi, bool sync)
> >>> +void build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
> >>> {
> >>> mutex_lock(&NM_I(sbi)->build_lock);
> >>> - __build_free_nids(sbi, sync);
> >>> + __build_free_nids(sbi, sync, mount);
> >>> mutex_unlock(&NM_I(sbi)->build_lock);
> >>> }
> >>>
> >>> @@ -1941,7 +2012,7 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
> >>> spin_unlock(&nm_i->nid_list_lock);
> >>>
> >>> /* Let's scan nat pages and its caches to get free nids */
> >>> - build_free_nids(sbi, true);
> >>> + build_free_nids(sbi, true, false);
> >>> goto retry;
> >>> }
> >>>
> >>> @@ -2233,8 +2304,39 @@ static void __adjust_nat_entry_set(struct nat_entry_set *nes,
> >>> list_add_tail(&nes->set_list, head);
> >>> }
> >>>
> >>> +void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
> >>> + struct page *page)
> >>> +{
> >>> + struct f2fs_nm_info *nm_i = NM_I(sbi);
> >>> + unsigned int nat_index = start_nid / NAT_ENTRY_PER_BLOCK;
> >>> + struct f2fs_nat_block *nat_blk = page_address(page);
> >>> + int valid = 0;
> >>> + int i;
> >>> +
> >>> + if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
> >>> + return;
> >>> +
> >>> + for (i = 0; i < NAT_ENTRY_PER_BLOCK; i++) {
> >>> + if (start_nid == 0 && i == 0)
> >>> + valid++;
> >>> + if (nat_blk->entries[i].block_addr)
> >>> + valid++;
> >>> + }
> >>> + if (valid == 0) {
> >>> + test_and_set_bit_le(nat_index, nm_i->empty_nat_bits);
> >>> + test_and_clear_bit_le(nat_index, nm_i->full_nat_bits);
> >>
> >> set_bit_le/clear_bit_le
> >>
> >>> + return;
> >>> + }
> >>> +
> >>> + test_and_clear_bit_le(nat_index, nm_i->empty_nat_bits);
> >>
> >> ditto
> >>
> >>> + if (valid == NAT_ENTRY_PER_BLOCK)
> >>> + test_and_set_bit_le(nat_index, nm_i->full_nat_bits);
> >>> + else
> >>> + test_and_clear_bit_le(nat_index, nm_i->full_nat_bits);
> >>
> >> ditto
> >>
> >>> +}
> >>> +
> >>> static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
> >>> - struct nat_entry_set *set)
> >>> + struct nat_entry_set *set, struct cp_control *cpc)
> >>> {
> >>> struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
> >>> struct f2fs_journal *journal = curseg->journal;
> >>> @@ -2249,7 +2351,8 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
> >>> * #1, flush nat entries to journal in current hot data summary block.
> >>> * #2, flush nat entries to nat page.
> >>> */
> >>> - if (!__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL))
> >>> + if (cpc->reason == CP_UMOUNT ||
> >>
> >> if ((cpc->reason == CP_UMOUNT && is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) ||
> >>
> >>> + !__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL))
> >>> to_journal = false;
> >>>
> >>> if (to_journal) {
> >>> @@ -2289,10 +2392,12 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
> >>> }
> >>> }
> >>>
> >>> - if (to_journal)
> >>> + if (to_journal) {
> >>> up_write(&curseg->journal_rwsem);
> >>> - else
> >>> + } else {
> >>> + __update_nat_bits(sbi, start_nid, page);
> >>> f2fs_put_page(page, 1);
> >>> + }
> >>>
> >>> f2fs_bug_on(sbi, set->entry_cnt);
> >>>
> >>> @@ -2303,7 +2408,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
> >>> /*
> >>> * This function is called during the checkpointing process.
> >>> */
> >>> -void flush_nat_entries(struct f2fs_sb_info *sbi)
> >>> +void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
> >>> {
> >>> struct f2fs_nm_info *nm_i = NM_I(sbi);
> >>> struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
> >>> @@ -2324,7 +2429,8 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
> >>> * entries, remove all entries from journal and merge them
> >>> * into nat entry set.
> >>> */
> >>> - if (!__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
> >>> + if (cpc->reason == CP_UMOUNT ||
> >>
> >> if ((cpc->reason == CP_UMOUNT && is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) ||
> >>
> >>> + !__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
> >>> remove_nats_in_journal(sbi);
> >>>
> >>> while ((found = __gang_lookup_nat_set(nm_i,
> >>> @@ -2338,27 +2444,72 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
> >>>
> >>> /* flush dirty nats in nat entry set */
> >>> list_for_each_entry_safe(set, tmp, &sets, set_list)
> >>> - __flush_nat_entry_set(sbi, set);
> >>> + __flush_nat_entry_set(sbi, set, cpc);
> >>>
> >>> up_write(&nm_i->nat_tree_lock);
> >>>
> >>> f2fs_bug_on(sbi, nm_i->dirty_nat_cnt);
> >>> }
> >>>
> >>> +static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
> >>> +{
> >>> + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
> >>> + struct f2fs_nm_info *nm_i = NM_I(sbi);
> >>> + unsigned int nat_bits_bytes = nm_i->nat_blocks / BITS_PER_BYTE;
> >>> + unsigned int i;
> >>> + __u64 cp_ver = le64_to_cpu(ckpt->checkpoint_ver);
> >>
> >> __u64 cp_ver = cur_cp_version(ckpt);
> >>
> >> Thanks,
> >>
> >>> + size_t crc_offset = le32_to_cpu(ckpt->checksum_offset);
> >>> + __u64 crc = le32_to_cpu(*((__le32 *)
> >>> + ((unsigned char *)ckpt + crc_offset)));
> >>> + block_t nat_bits_addr;
> >>> +
> >>> + if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
> >>> + return 0;
> >>> +
> >>> + nm_i->nat_bits_blocks = F2FS_BYTES_TO_BLK((nat_bits_bytes << 1) + 8 +
> >>> + F2FS_BLKSIZE - 1);
> >>> + nm_i->nat_bits = kzalloc(nm_i->nat_bits_blocks << F2FS_BLKSIZE_BITS,
> >>> + GFP_KERNEL);
> >>> + if (!nm_i->nat_bits)
> >>> + return -ENOMEM;
> >>> +
> >>> + nat_bits_addr = __start_cp_addr(sbi) + sbi->blocks_per_seg -
> >>> + nm_i->nat_bits_blocks;
> >>> + for (i = 0; i < nm_i->nat_bits_blocks; i++) {
> >>> + struct page *page = get_meta_page(sbi, nat_bits_addr++);
> >>> +
> >>> + memcpy(nm_i->nat_bits + (i << F2FS_BLKSIZE_BITS),
> >>> + page_address(page), F2FS_BLKSIZE);
> >>> + f2fs_put_page(page, 1);
> >>> + }
> >>> +
> >>> + cp_ver |= (crc << 32);
> >>> + if (cpu_to_le64(cp_ver) != *(__le64 *)nm_i->nat_bits) {
> >>> + disable_nat_bits(sbi, true);
> >>> + return 0;
> >>> + }
> >>> +
> >>> + nm_i->full_nat_bits = nm_i->nat_bits + 8;
> >>> + nm_i->empty_nat_bits = nm_i->full_nat_bits + nat_bits_bytes;
> >>> +
> >>> + f2fs_msg(sbi->sb, KERN_NOTICE, "Found nat_bits in checkpoint");
> >>> + return 0;
> >>> +}
> >>> +
> >>> static int init_node_manager(struct f2fs_sb_info *sbi)
> >>> {
> >>> struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi);
> >>> struct f2fs_nm_info *nm_i = NM_I(sbi);
> >>> unsigned char *version_bitmap;
> >>> - unsigned int nat_segs, nat_blocks;
> >>> + unsigned int nat_segs;
> >>> + int err;
> >>>
> >>> nm_i->nat_blkaddr = le32_to_cpu(sb_raw->nat_blkaddr);
> >>>
> >>> /* segment_count_nat includes pair segment so divide to 2. */
> >>> nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1;
> >>> - nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
> >>> -
> >>> - nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks;
> >>> + nm_i->nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
> >>> + nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nm_i->nat_blocks;
> >>>
> >>> /* not used nids: 0, node, meta, (and root counted as valid node) */
> >>> nm_i->available_nids = nm_i->max_nid - sbi->total_valid_node_count -
> >>> @@ -2392,6 +2543,10 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
> >>> if (!nm_i->nat_bitmap)
> >>> return -ENOMEM;
> >>>
> >>> + err = __get_nat_bitmaps(sbi);
> >>> + if (err)
> >>> + return err;
> >>> +
> >>> #ifdef CONFIG_F2FS_CHECK_FS
> >>> nm_i->nat_bitmap_mir = kmemdup(version_bitmap, nm_i->bitmap_size,
> >>> GFP_KERNEL);
> >>> @@ -2414,7 +2569,7 @@ int build_node_manager(struct f2fs_sb_info *sbi)
> >>> if (err)
> >>> return err;
> >>>
> >>> - build_free_nids(sbi, true);
> >>> + build_free_nids(sbi, true, true);
> >>> return 0;
> >>> }
> >>>
> >>> @@ -2473,6 +2628,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
> >>> up_write(&nm_i->nat_tree_lock);
> >>>
> >>> kfree(nm_i->nat_bitmap);
> >>> + kfree(nm_i->nat_bits);
> >>> #ifdef CONFIG_F2FS_CHECK_FS
> >>> kfree(nm_i->nat_bitmap_mir);
> >>> #endif
> >>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> >>> index df2ff5cfe8f4..8e1ec248c653 100644
> >>> --- a/fs/f2fs/segment.c
> >>> +++ b/fs/f2fs/segment.c
> >>> @@ -386,7 +386,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
> >>> if (!available_free_memory(sbi, FREE_NIDS))
> >>> try_to_free_nids(sbi, MAX_FREE_NIDS);
> >>> else
> >>> - build_free_nids(sbi, false);
> >>> + build_free_nids(sbi, false, false);
> >>>
> >>> if (!is_idle(sbi))
> >>> return;
> >>> diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
> >>> index f0748524ca8c..1c92ace2e8f8 100644
> >>> --- a/include/linux/f2fs_fs.h
> >>> +++ b/include/linux/f2fs_fs.h
> >>> @@ -114,6 +114,7 @@ struct f2fs_super_block {
> >>> /*
> >>> * For checkpoint
> >>> */
> >>> +#define CP_NAT_BITS_FLAG 0x00000080
> >>> #define CP_CRC_RECOVERY_FLAG 0x00000040
> >>> #define CP_FASTBOOT_FLAG 0x00000020
> >>> #define CP_FSCK_FLAG 0x00000010
> >>>
> >>
> >>
> >> ------------------------------------------------------------------------------
> >> Check out the vibrant tech community on one of the world's most
> >> engaging tech sites, SlashDot.org! http://sdm.link/slashdot
> >> _______________________________________________
> >> Linux-f2fs-devel mailing list
> >> Linux-f2fs-devel@lists.sourceforge.net
> >> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
> >
> > .
> >
>
>
> ------------------------------------------------------------------------------
> Check out the vibrant tech community on one of the world's most
> engaging tech sites, SlashDot.org! http://sdm.link/slashdot
> _______________________________________________
> Linux-f2fs-devel mailing list
> Linux-f2fs-devel@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [f2fs-dev] [PATCH 2/3] f2fs: add bitmaps for empty or full NAT blocks
2017-02-25 18:34 ` Jaegeuk Kim
@ 2017-02-27 3:03 ` Chao Yu
2017-02-27 22:19 ` Jaegeuk Kim
0 siblings, 1 reply; 16+ messages in thread
From: Chao Yu @ 2017-02-27 3:03 UTC (permalink / raw)
To: Jaegeuk Kim; +Cc: linux-fsdevel, linux-kernel, linux-f2fs-devel
On 2017/2/26 2:34, Jaegeuk Kim wrote:
> On 02/25, Chao Yu wrote:
>> On 2017/2/24 6:54, Jaegeuk Kim wrote:
>>> On 02/23, Chao Yu wrote:
>>>> On 2017/2/14 10:06, Jaegeuk Kim wrote:
> ...
>>>
>>>>> +static int scan_nat_bits(struct f2fs_sb_info *sbi)
>>>>> +{
>>>>> + struct f2fs_nm_info *nm_i = NM_I(sbi);
>>>>> + struct page *page;
>>>>> + unsigned int i = 0;
>>>>> + nid_t target = FREE_NID_PAGES * NAT_ENTRY_PER_BLOCK;
>>>>> + nid_t nid;
>>>>> +
>>>>> + if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
>>>>> + return -EAGAIN;
>>>>> +
>>>>> + down_read(&nm_i->nat_tree_lock);
>>>>> +check_empty:
>>>>> + i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i);
>>>>> + if (i >= nm_i->nat_blocks) {
>>>>> + i = 0;
>>>>> + goto check_partial;
>>>>> + }
>>>>> +
>>>>> + for (nid = i * NAT_ENTRY_PER_BLOCK; nid < (i + 1) * NAT_ENTRY_PER_BLOCK;
>>>>> + nid++) {
>>>>> + if (unlikely(nid >= nm_i->max_nid))
>>>>> + break;
>>>>> + add_free_nid(sbi, nid, true);
>>>>> + }
>>>>> +
>>>>> + if (nm_i->nid_cnt[FREE_NID_LIST] >= target)
>>>>> + goto out;
>>>>> + i++;
>>>>> + goto check_empty;
>>>>> +
>>>>> +check_partial:
>>>>> + i = find_next_zero_bit_le(nm_i->full_nat_bits, nm_i->nat_blocks, i);
>>>>> + if (i >= nm_i->nat_blocks) {
>>>>> + disable_nat_bits(sbi, true);
>>>>
>>>> Can this happen in real world? Should be a bug in somewhere?
>>>
>>> It happens, since current design handles full_nat_bits optionally in order
>>> to avoid scanning a whole NAT page to set it back as 1 from 0.
>>
>> All 0 value in full_nat_bits means the NAT block has at least one free nid,
>> right? so if we traverse all such NAT blocks, and still haven't collect enough
>> *target* number of free nids, we don't have to disable nat bit cache, we can
>> just break out here.
>
> No, I'm seeing this case:
> 1. mount with full=0, empty=0, which indicates some free nids.
> 2. allocate all there-in free nids, but still full=0, empty=0.
> 3. allocate more free nids.
> ...
> ... checkpoint makes full=1, empty=0
>
> If there is no checkpoint and it consumes all the free nids, we can see all 0
> value in full_nat_bits which is quite impossible. In that case, I'd prefer
> to stop nat_bits and give fsck.f2fs to revive it back.
Yeah, I can understand that, but what I concern is when there is few free nids
(< target), we still try to load nids of 8 NAT blocks until ending the loop of
caching free nids, so it will be very easy to enter the case of disabling
nid_bits cache here, so how about doing more check?
if (i >= nm_i->nat_blocks &&
nm_i->nid_cnt[FREE_NID_LIST] != nm_i->available_nids)
disable_nat_bits
Thanks,
>
> Thanks,
>
>>
>> Thanks,
>>
>>>
>>>>
>>>>> + return -EINVAL;
>>>>> + }
>>>>> +
>>>>> + nid = i * NAT_ENTRY_PER_BLOCK;
>>>>> + page = get_current_nat_page(sbi, nid);
>>>>> + scan_nat_page(sbi, page, nid);
>>>>> + f2fs_put_page(page, 1);
>>>>> +
>>>>> + if (nm_i->nid_cnt[FREE_NID_LIST] < target) {
>>>>> + i++;
>>>>> + goto check_partial;
>>>>> + }
>>>>> +out:
>>>>> + up_read(&nm_i->nat_tree_lock);
>>>>> + return 0;
>>>>> +}
>>>>> +
>>>>> +static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
>>>>> {
>>>>> struct f2fs_nm_info *nm_i = NM_I(sbi);
>>>>> struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
>>>>> @@ -1854,6 +1911,20 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
>>>>> if (!sync && !available_free_memory(sbi, FREE_NIDS))
>>>>> return;
>>>>>
>>>>> + /* try to find free nids with nat_bits */
>>>>> + if (!mount && !scan_nat_bits(sbi) && nm_i->nid_cnt[FREE_NID_LIST])
>>>>> + return;
>>>>> +
>>>>> + /* find next valid candidate */
>>>>
>>>> This is just for mount case?
>>>
>>> Yup, it reuses free nids in dirty NAT blocks, so that we can make them as full
>>> NAT pages.
>>>
>>> Thanks,
>>>
>>>>
>>>>> + if (is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) {
>>>>> + int idx = find_next_zero_bit_le(nm_i->full_nat_bits,
>>>>> + nm_i->nat_blocks, 0);
>>>>> + if (idx >= nm_i->nat_blocks)
>>>>> + set_sbi_flag(sbi, SBI_NEED_FSCK);
>>>>> + else
>>>>> + nid = idx * NAT_ENTRY_PER_BLOCK;
>>>>> + }
>>>>> +
>>>>> /* readahead nat pages to be scanned */
>>>>> ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
>>>>> META_NAT, true);
>>>>> @@ -1896,10 +1967,10 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
>>>>> nm_i->ra_nid_pages, META_NAT, false);
>>>>> }
>>>>>
>>>>> -void build_free_nids(struct f2fs_sb_info *sbi, bool sync)
>>>>> +void build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
>>>>> {
>>>>> mutex_lock(&NM_I(sbi)->build_lock);
>>>>> - __build_free_nids(sbi, sync);
>>>>> + __build_free_nids(sbi, sync, mount);
>>>>> mutex_unlock(&NM_I(sbi)->build_lock);
>>>>> }
>>>>>
>>>>> @@ -1941,7 +2012,7 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
>>>>> spin_unlock(&nm_i->nid_list_lock);
>>>>>
>>>>> /* Let's scan nat pages and its caches to get free nids */
>>>>> - build_free_nids(sbi, true);
>>>>> + build_free_nids(sbi, true, false);
>>>>> goto retry;
>>>>> }
>>>>>
>>>>> @@ -2233,8 +2304,39 @@ static void __adjust_nat_entry_set(struct nat_entry_set *nes,
>>>>> list_add_tail(&nes->set_list, head);
>>>>> }
>>>>>
>>>>> +void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
>>>>> + struct page *page)
>>>>> +{
>>>>> + struct f2fs_nm_info *nm_i = NM_I(sbi);
>>>>> + unsigned int nat_index = start_nid / NAT_ENTRY_PER_BLOCK;
>>>>> + struct f2fs_nat_block *nat_blk = page_address(page);
>>>>> + int valid = 0;
>>>>> + int i;
>>>>> +
>>>>> + if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
>>>>> + return;
>>>>> +
>>>>> + for (i = 0; i < NAT_ENTRY_PER_BLOCK; i++) {
>>>>> + if (start_nid == 0 && i == 0)
>>>>> + valid++;
>>>>> + if (nat_blk->entries[i].block_addr)
>>>>> + valid++;
>>>>> + }
>>>>> + if (valid == 0) {
>>>>> + test_and_set_bit_le(nat_index, nm_i->empty_nat_bits);
>>>>> + test_and_clear_bit_le(nat_index, nm_i->full_nat_bits);
>>>>
>>>> set_bit_le/clear_bit_le
>>>>
>>>>> + return;
>>>>> + }
>>>>> +
>>>>> + test_and_clear_bit_le(nat_index, nm_i->empty_nat_bits);
>>>>
>>>> ditto
>>>>
>>>>> + if (valid == NAT_ENTRY_PER_BLOCK)
>>>>> + test_and_set_bit_le(nat_index, nm_i->full_nat_bits);
>>>>> + else
>>>>> + test_and_clear_bit_le(nat_index, nm_i->full_nat_bits);
>>>>
>>>> ditto
>>>>
>>>>> +}
>>>>> +
>>>>> static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
>>>>> - struct nat_entry_set *set)
>>>>> + struct nat_entry_set *set, struct cp_control *cpc)
>>>>> {
>>>>> struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
>>>>> struct f2fs_journal *journal = curseg->journal;
>>>>> @@ -2249,7 +2351,8 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
>>>>> * #1, flush nat entries to journal in current hot data summary block.
>>>>> * #2, flush nat entries to nat page.
>>>>> */
>>>>> - if (!__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL))
>>>>> + if (cpc->reason == CP_UMOUNT ||
>>>>
>>>> if ((cpc->reason == CP_UMOUNT && is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) ||
>>>>
>>>>> + !__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL))
>>>>> to_journal = false;
>>>>>
>>>>> if (to_journal) {
>>>>> @@ -2289,10 +2392,12 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
>>>>> }
>>>>> }
>>>>>
>>>>> - if (to_journal)
>>>>> + if (to_journal) {
>>>>> up_write(&curseg->journal_rwsem);
>>>>> - else
>>>>> + } else {
>>>>> + __update_nat_bits(sbi, start_nid, page);
>>>>> f2fs_put_page(page, 1);
>>>>> + }
>>>>>
>>>>> f2fs_bug_on(sbi, set->entry_cnt);
>>>>>
>>>>> @@ -2303,7 +2408,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
>>>>> /*
>>>>> * This function is called during the checkpointing process.
>>>>> */
>>>>> -void flush_nat_entries(struct f2fs_sb_info *sbi)
>>>>> +void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>>>>> {
>>>>> struct f2fs_nm_info *nm_i = NM_I(sbi);
>>>>> struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
>>>>> @@ -2324,7 +2429,8 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
>>>>> * entries, remove all entries from journal and merge them
>>>>> * into nat entry set.
>>>>> */
>>>>> - if (!__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
>>>>> + if (cpc->reason == CP_UMOUNT ||
>>>>
>>>> if ((cpc->reason == CP_UMOUNT && is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) ||
>>>>
>>>>> + !__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
>>>>> remove_nats_in_journal(sbi);
>>>>>
>>>>> while ((found = __gang_lookup_nat_set(nm_i,
>>>>> @@ -2338,27 +2444,72 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
>>>>>
>>>>> /* flush dirty nats in nat entry set */
>>>>> list_for_each_entry_safe(set, tmp, &sets, set_list)
>>>>> - __flush_nat_entry_set(sbi, set);
>>>>> + __flush_nat_entry_set(sbi, set, cpc);
>>>>>
>>>>> up_write(&nm_i->nat_tree_lock);
>>>>>
>>>>> f2fs_bug_on(sbi, nm_i->dirty_nat_cnt);
>>>>> }
>>>>>
>>>>> +static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
>>>>> +{
>>>>> + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
>>>>> + struct f2fs_nm_info *nm_i = NM_I(sbi);
>>>>> + unsigned int nat_bits_bytes = nm_i->nat_blocks / BITS_PER_BYTE;
>>>>> + unsigned int i;
>>>>> + __u64 cp_ver = le64_to_cpu(ckpt->checkpoint_ver);
>>>>
>>>> __u64 cp_ver = cur_cp_version(ckpt);
>>>>
>>>> Thanks,
>>>>
>>>>> + size_t crc_offset = le32_to_cpu(ckpt->checksum_offset);
>>>>> + __u64 crc = le32_to_cpu(*((__le32 *)
>>>>> + ((unsigned char *)ckpt + crc_offset)));
>>>>> + block_t nat_bits_addr;
>>>>> +
>>>>> + if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
>>>>> + return 0;
>>>>> +
>>>>> + nm_i->nat_bits_blocks = F2FS_BYTES_TO_BLK((nat_bits_bytes << 1) + 8 +
>>>>> + F2FS_BLKSIZE - 1);
>>>>> + nm_i->nat_bits = kzalloc(nm_i->nat_bits_blocks << F2FS_BLKSIZE_BITS,
>>>>> + GFP_KERNEL);
>>>>> + if (!nm_i->nat_bits)
>>>>> + return -ENOMEM;
>>>>> +
>>>>> + nat_bits_addr = __start_cp_addr(sbi) + sbi->blocks_per_seg -
>>>>> + nm_i->nat_bits_blocks;
>>>>> + for (i = 0; i < nm_i->nat_bits_blocks; i++) {
>>>>> + struct page *page = get_meta_page(sbi, nat_bits_addr++);
>>>>> +
>>>>> + memcpy(nm_i->nat_bits + (i << F2FS_BLKSIZE_BITS),
>>>>> + page_address(page), F2FS_BLKSIZE);
>>>>> + f2fs_put_page(page, 1);
>>>>> + }
>>>>> +
>>>>> + cp_ver |= (crc << 32);
>>>>> + if (cpu_to_le64(cp_ver) != *(__le64 *)nm_i->nat_bits) {
>>>>> + disable_nat_bits(sbi, true);
>>>>> + return 0;
>>>>> + }
>>>>> +
>>>>> + nm_i->full_nat_bits = nm_i->nat_bits + 8;
>>>>> + nm_i->empty_nat_bits = nm_i->full_nat_bits + nat_bits_bytes;
>>>>> +
>>>>> + f2fs_msg(sbi->sb, KERN_NOTICE, "Found nat_bits in checkpoint");
>>>>> + return 0;
>>>>> +}
>>>>> +
>>>>> static int init_node_manager(struct f2fs_sb_info *sbi)
>>>>> {
>>>>> struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi);
>>>>> struct f2fs_nm_info *nm_i = NM_I(sbi);
>>>>> unsigned char *version_bitmap;
>>>>> - unsigned int nat_segs, nat_blocks;
>>>>> + unsigned int nat_segs;
>>>>> + int err;
>>>>>
>>>>> nm_i->nat_blkaddr = le32_to_cpu(sb_raw->nat_blkaddr);
>>>>>
>>>>> /* segment_count_nat includes pair segment so divide to 2. */
>>>>> nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1;
>>>>> - nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
>>>>> -
>>>>> - nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks;
>>>>> + nm_i->nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
>>>>> + nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nm_i->nat_blocks;
>>>>>
>>>>> /* not used nids: 0, node, meta, (and root counted as valid node) */
>>>>> nm_i->available_nids = nm_i->max_nid - sbi->total_valid_node_count -
>>>>> @@ -2392,6 +2543,10 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
>>>>> if (!nm_i->nat_bitmap)
>>>>> return -ENOMEM;
>>>>>
>>>>> + err = __get_nat_bitmaps(sbi);
>>>>> + if (err)
>>>>> + return err;
>>>>> +
>>>>> #ifdef CONFIG_F2FS_CHECK_FS
>>>>> nm_i->nat_bitmap_mir = kmemdup(version_bitmap, nm_i->bitmap_size,
>>>>> GFP_KERNEL);
>>>>> @@ -2414,7 +2569,7 @@ int build_node_manager(struct f2fs_sb_info *sbi)
>>>>> if (err)
>>>>> return err;
>>>>>
>>>>> - build_free_nids(sbi, true);
>>>>> + build_free_nids(sbi, true, true);
>>>>> return 0;
>>>>> }
>>>>>
>>>>> @@ -2473,6 +2628,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
>>>>> up_write(&nm_i->nat_tree_lock);
>>>>>
>>>>> kfree(nm_i->nat_bitmap);
>>>>> + kfree(nm_i->nat_bits);
>>>>> #ifdef CONFIG_F2FS_CHECK_FS
>>>>> kfree(nm_i->nat_bitmap_mir);
>>>>> #endif
>>>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>>>>> index df2ff5cfe8f4..8e1ec248c653 100644
>>>>> --- a/fs/f2fs/segment.c
>>>>> +++ b/fs/f2fs/segment.c
>>>>> @@ -386,7 +386,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
>>>>> if (!available_free_memory(sbi, FREE_NIDS))
>>>>> try_to_free_nids(sbi, MAX_FREE_NIDS);
>>>>> else
>>>>> - build_free_nids(sbi, false);
>>>>> + build_free_nids(sbi, false, false);
>>>>>
>>>>> if (!is_idle(sbi))
>>>>> return;
>>>>> diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
>>>>> index f0748524ca8c..1c92ace2e8f8 100644
>>>>> --- a/include/linux/f2fs_fs.h
>>>>> +++ b/include/linux/f2fs_fs.h
>>>>> @@ -114,6 +114,7 @@ struct f2fs_super_block {
>>>>> /*
>>>>> * For checkpoint
>>>>> */
>>>>> +#define CP_NAT_BITS_FLAG 0x00000080
>>>>> #define CP_CRC_RECOVERY_FLAG 0x00000040
>>>>> #define CP_FASTBOOT_FLAG 0x00000020
>>>>> #define CP_FSCK_FLAG 0x00000010
>>>>>
>>>>
>>>>
>>>> ------------------------------------------------------------------------------
>>>> Check out the vibrant tech community on one of the world's most
>>>> engaging tech sites, SlashDot.org! http://sdm.link/slashdot
>>>> _______________________________________________
>>>> Linux-f2fs-devel mailing list
>>>> Linux-f2fs-devel@lists.sourceforge.net
>>>> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
>>>
>>> .
>>>
>>
>>
>> ------------------------------------------------------------------------------
>> Check out the vibrant tech community on one of the world's most
>> engaging tech sites, SlashDot.org! http://sdm.link/slashdot
>> _______________________________________________
>> Linux-f2fs-devel mailing list
>> Linux-f2fs-devel@lists.sourceforge.net
>> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
>
> .
>
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [f2fs-dev] [PATCH 2/3] f2fs: add bitmaps for empty or full NAT blocks
2017-02-27 3:03 ` Chao Yu
@ 2017-02-27 22:19 ` Jaegeuk Kim
2017-02-28 10:38 ` Chao Yu
0 siblings, 1 reply; 16+ messages in thread
From: Jaegeuk Kim @ 2017-02-27 22:19 UTC (permalink / raw)
To: Chao Yu; +Cc: linux-fsdevel, linux-kernel, linux-f2fs-devel
On 02/27, Chao Yu wrote:
> On 2017/2/26 2:34, Jaegeuk Kim wrote:
> > On 02/25, Chao Yu wrote:
> >> On 2017/2/24 6:54, Jaegeuk Kim wrote:
> >>> On 02/23, Chao Yu wrote:
> >>>> On 2017/2/14 10:06, Jaegeuk Kim wrote:
> > ...
> >>>
> >>>>> +static int scan_nat_bits(struct f2fs_sb_info *sbi)
> >>>>> +{
> >>>>> + struct f2fs_nm_info *nm_i = NM_I(sbi);
> >>>>> + struct page *page;
> >>>>> + unsigned int i = 0;
> >>>>> + nid_t target = FREE_NID_PAGES * NAT_ENTRY_PER_BLOCK;
> >>>>> + nid_t nid;
> >>>>> +
> >>>>> + if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
> >>>>> + return -EAGAIN;
> >>>>> +
> >>>>> + down_read(&nm_i->nat_tree_lock);
> >>>>> +check_empty:
> >>>>> + i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i);
> >>>>> + if (i >= nm_i->nat_blocks) {
> >>>>> + i = 0;
> >>>>> + goto check_partial;
> >>>>> + }
> >>>>> +
> >>>>> + for (nid = i * NAT_ENTRY_PER_BLOCK; nid < (i + 1) * NAT_ENTRY_PER_BLOCK;
> >>>>> + nid++) {
> >>>>> + if (unlikely(nid >= nm_i->max_nid))
> >>>>> + break;
> >>>>> + add_free_nid(sbi, nid, true);
> >>>>> + }
> >>>>> +
> >>>>> + if (nm_i->nid_cnt[FREE_NID_LIST] >= target)
> >>>>> + goto out;
> >>>>> + i++;
> >>>>> + goto check_empty;
> >>>>> +
> >>>>> +check_partial:
> >>>>> + i = find_next_zero_bit_le(nm_i->full_nat_bits, nm_i->nat_blocks, i);
> >>>>> + if (i >= nm_i->nat_blocks) {
> >>>>> + disable_nat_bits(sbi, true);
> >>>>
> >>>> Can this happen in real world? Should be a bug in somewhere?
> >>>
> >>> It happens, since current design handles full_nat_bits optionally in order
> >>> to avoid scanning a whole NAT page to set it back as 1 from 0.
> >>
> >> All 0 value in full_nat_bits means the NAT block has at least one free nid,
> >> right? so if we traverse all such NAT blocks, and still haven't collect enough
> >> *target* number of free nids, we don't have to disable nat bit cache, we can
> >> just break out here.
> >
> > No, I'm seeing this case:
> > 1. mount with full=0, empty=0, which indicates some free nids.
> > 2. allocate all there-in free nids, but still full=0, empty=0.
> > 3. allocate more free nids.
> > ...
> > ... checkpoint makes full=1, empty=0
> >
> > If there is no checkpoint and it consumes all the free nids, we can see all 0
> > value in full_nat_bits which is quite impossible. In that case, I'd prefer
> > to stop nat_bits and give fsck.f2fs to revive it back.
>
> Yeah, I can understand that, but what I concern is when there is few free nids
> (< target), we still try to load nids of 8 NAT blocks until ending the loop of
> caching free nids, so it will be very easy to enter the case of disabling
> nid_bits cache here, so how about doing more check?
>
> if (i >= nm_i->nat_blocks &&
This indicates full_nat_bits consists of all zeros, which means, whatever in
the next time, we need to return -EINVAL. If we do not disable it here, there is
no way to turn any bit into zero.
BTW, I'm trying to freeze all the patches for a pull request, so let me have
another patch for any pending issues which are not urgent for now as I think.
Thanks,
> nm_i->nid_cnt[FREE_NID_LIST] != nm_i->available_nids)
> disable_nat_bits
>
> Thanks,
>
> >
> > Thanks,
> >
> >>
> >> Thanks,
> >>
> >>>
> >>>>
> >>>>> + return -EINVAL;
> >>>>> + }
> >>>>> +
> >>>>> + nid = i * NAT_ENTRY_PER_BLOCK;
> >>>>> + page = get_current_nat_page(sbi, nid);
> >>>>> + scan_nat_page(sbi, page, nid);
> >>>>> + f2fs_put_page(page, 1);
> >>>>> +
> >>>>> + if (nm_i->nid_cnt[FREE_NID_LIST] < target) {
> >>>>> + i++;
> >>>>> + goto check_partial;
> >>>>> + }
> >>>>> +out:
> >>>>> + up_read(&nm_i->nat_tree_lock);
> >>>>> + return 0;
> >>>>> +}
> >>>>> +
> >>>>> +static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
> >>>>> {
> >>>>> struct f2fs_nm_info *nm_i = NM_I(sbi);
> >>>>> struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
> >>>>> @@ -1854,6 +1911,20 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
> >>>>> if (!sync && !available_free_memory(sbi, FREE_NIDS))
> >>>>> return;
> >>>>>
> >>>>> + /* try to find free nids with nat_bits */
> >>>>> + if (!mount && !scan_nat_bits(sbi) && nm_i->nid_cnt[FREE_NID_LIST])
> >>>>> + return;
> >>>>> +
> >>>>> + /* find next valid candidate */
> >>>>
> >>>> This is just for mount case?
> >>>
> >>> Yup, it reuses free nids in dirty NAT blocks, so that we can make them as full
> >>> NAT pages.
> >>>
> >>> Thanks,
> >>>
> >>>>
> >>>>> + if (is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) {
> >>>>> + int idx = find_next_zero_bit_le(nm_i->full_nat_bits,
> >>>>> + nm_i->nat_blocks, 0);
> >>>>> + if (idx >= nm_i->nat_blocks)
> >>>>> + set_sbi_flag(sbi, SBI_NEED_FSCK);
> >>>>> + else
> >>>>> + nid = idx * NAT_ENTRY_PER_BLOCK;
> >>>>> + }
> >>>>> +
> >>>>> /* readahead nat pages to be scanned */
> >>>>> ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
> >>>>> META_NAT, true);
> >>>>> @@ -1896,10 +1967,10 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
> >>>>> nm_i->ra_nid_pages, META_NAT, false);
> >>>>> }
> >>>>>
> >>>>> -void build_free_nids(struct f2fs_sb_info *sbi, bool sync)
> >>>>> +void build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
> >>>>> {
> >>>>> mutex_lock(&NM_I(sbi)->build_lock);
> >>>>> - __build_free_nids(sbi, sync);
> >>>>> + __build_free_nids(sbi, sync, mount);
> >>>>> mutex_unlock(&NM_I(sbi)->build_lock);
> >>>>> }
> >>>>>
> >>>>> @@ -1941,7 +2012,7 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
> >>>>> spin_unlock(&nm_i->nid_list_lock);
> >>>>>
> >>>>> /* Let's scan nat pages and its caches to get free nids */
> >>>>> - build_free_nids(sbi, true);
> >>>>> + build_free_nids(sbi, true, false);
> >>>>> goto retry;
> >>>>> }
> >>>>>
> >>>>> @@ -2233,8 +2304,39 @@ static void __adjust_nat_entry_set(struct nat_entry_set *nes,
> >>>>> list_add_tail(&nes->set_list, head);
> >>>>> }
> >>>>>
> >>>>> +void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
> >>>>> + struct page *page)
> >>>>> +{
> >>>>> + struct f2fs_nm_info *nm_i = NM_I(sbi);
> >>>>> + unsigned int nat_index = start_nid / NAT_ENTRY_PER_BLOCK;
> >>>>> + struct f2fs_nat_block *nat_blk = page_address(page);
> >>>>> + int valid = 0;
> >>>>> + int i;
> >>>>> +
> >>>>> + if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
> >>>>> + return;
> >>>>> +
> >>>>> + for (i = 0; i < NAT_ENTRY_PER_BLOCK; i++) {
> >>>>> + if (start_nid == 0 && i == 0)
> >>>>> + valid++;
> >>>>> + if (nat_blk->entries[i].block_addr)
> >>>>> + valid++;
> >>>>> + }
> >>>>> + if (valid == 0) {
> >>>>> + test_and_set_bit_le(nat_index, nm_i->empty_nat_bits);
> >>>>> + test_and_clear_bit_le(nat_index, nm_i->full_nat_bits);
> >>>>
> >>>> set_bit_le/clear_bit_le
> >>>>
> >>>>> + return;
> >>>>> + }
> >>>>> +
> >>>>> + test_and_clear_bit_le(nat_index, nm_i->empty_nat_bits);
> >>>>
> >>>> ditto
> >>>>
> >>>>> + if (valid == NAT_ENTRY_PER_BLOCK)
> >>>>> + test_and_set_bit_le(nat_index, nm_i->full_nat_bits);
> >>>>> + else
> >>>>> + test_and_clear_bit_le(nat_index, nm_i->full_nat_bits);
> >>>>
> >>>> ditto
> >>>>
> >>>>> +}
> >>>>> +
> >>>>> static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
> >>>>> - struct nat_entry_set *set)
> >>>>> + struct nat_entry_set *set, struct cp_control *cpc)
> >>>>> {
> >>>>> struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
> >>>>> struct f2fs_journal *journal = curseg->journal;
> >>>>> @@ -2249,7 +2351,8 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
> >>>>> * #1, flush nat entries to journal in current hot data summary block.
> >>>>> * #2, flush nat entries to nat page.
> >>>>> */
> >>>>> - if (!__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL))
> >>>>> + if (cpc->reason == CP_UMOUNT ||
> >>>>
> >>>> if ((cpc->reason == CP_UMOUNT && is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) ||
> >>>>
> >>>>> + !__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL))
> >>>>> to_journal = false;
> >>>>>
> >>>>> if (to_journal) {
> >>>>> @@ -2289,10 +2392,12 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
> >>>>> }
> >>>>> }
> >>>>>
> >>>>> - if (to_journal)
> >>>>> + if (to_journal) {
> >>>>> up_write(&curseg->journal_rwsem);
> >>>>> - else
> >>>>> + } else {
> >>>>> + __update_nat_bits(sbi, start_nid, page);
> >>>>> f2fs_put_page(page, 1);
> >>>>> + }
> >>>>>
> >>>>> f2fs_bug_on(sbi, set->entry_cnt);
> >>>>>
> >>>>> @@ -2303,7 +2408,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
> >>>>> /*
> >>>>> * This function is called during the checkpointing process.
> >>>>> */
> >>>>> -void flush_nat_entries(struct f2fs_sb_info *sbi)
> >>>>> +void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
> >>>>> {
> >>>>> struct f2fs_nm_info *nm_i = NM_I(sbi);
> >>>>> struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
> >>>>> @@ -2324,7 +2429,8 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
> >>>>> * entries, remove all entries from journal and merge them
> >>>>> * into nat entry set.
> >>>>> */
> >>>>> - if (!__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
> >>>>> + if (cpc->reason == CP_UMOUNT ||
> >>>>
> >>>> if ((cpc->reason == CP_UMOUNT && is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) ||
> >>>>
> >>>>> + !__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
> >>>>> remove_nats_in_journal(sbi);
> >>>>>
> >>>>> while ((found = __gang_lookup_nat_set(nm_i,
> >>>>> @@ -2338,27 +2444,72 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
> >>>>>
> >>>>> /* flush dirty nats in nat entry set */
> >>>>> list_for_each_entry_safe(set, tmp, &sets, set_list)
> >>>>> - __flush_nat_entry_set(sbi, set);
> >>>>> + __flush_nat_entry_set(sbi, set, cpc);
> >>>>>
> >>>>> up_write(&nm_i->nat_tree_lock);
> >>>>>
> >>>>> f2fs_bug_on(sbi, nm_i->dirty_nat_cnt);
> >>>>> }
> >>>>>
> >>>>> +static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
> >>>>> +{
> >>>>> + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
> >>>>> + struct f2fs_nm_info *nm_i = NM_I(sbi);
> >>>>> + unsigned int nat_bits_bytes = nm_i->nat_blocks / BITS_PER_BYTE;
> >>>>> + unsigned int i;
> >>>>> + __u64 cp_ver = le64_to_cpu(ckpt->checkpoint_ver);
> >>>>
> >>>> __u64 cp_ver = cur_cp_version(ckpt);
> >>>>
> >>>> Thanks,
> >>>>
> >>>>> + size_t crc_offset = le32_to_cpu(ckpt->checksum_offset);
> >>>>> + __u64 crc = le32_to_cpu(*((__le32 *)
> >>>>> + ((unsigned char *)ckpt + crc_offset)));
> >>>>> + block_t nat_bits_addr;
> >>>>> +
> >>>>> + if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
> >>>>> + return 0;
> >>>>> +
> >>>>> + nm_i->nat_bits_blocks = F2FS_BYTES_TO_BLK((nat_bits_bytes << 1) + 8 +
> >>>>> + F2FS_BLKSIZE - 1);
> >>>>> + nm_i->nat_bits = kzalloc(nm_i->nat_bits_blocks << F2FS_BLKSIZE_BITS,
> >>>>> + GFP_KERNEL);
> >>>>> + if (!nm_i->nat_bits)
> >>>>> + return -ENOMEM;
> >>>>> +
> >>>>> + nat_bits_addr = __start_cp_addr(sbi) + sbi->blocks_per_seg -
> >>>>> + nm_i->nat_bits_blocks;
> >>>>> + for (i = 0; i < nm_i->nat_bits_blocks; i++) {
> >>>>> + struct page *page = get_meta_page(sbi, nat_bits_addr++);
> >>>>> +
> >>>>> + memcpy(nm_i->nat_bits + (i << F2FS_BLKSIZE_BITS),
> >>>>> + page_address(page), F2FS_BLKSIZE);
> >>>>> + f2fs_put_page(page, 1);
> >>>>> + }
> >>>>> +
> >>>>> + cp_ver |= (crc << 32);
> >>>>> + if (cpu_to_le64(cp_ver) != *(__le64 *)nm_i->nat_bits) {
> >>>>> + disable_nat_bits(sbi, true);
> >>>>> + return 0;
> >>>>> + }
> >>>>> +
> >>>>> + nm_i->full_nat_bits = nm_i->nat_bits + 8;
> >>>>> + nm_i->empty_nat_bits = nm_i->full_nat_bits + nat_bits_bytes;
> >>>>> +
> >>>>> + f2fs_msg(sbi->sb, KERN_NOTICE, "Found nat_bits in checkpoint");
> >>>>> + return 0;
> >>>>> +}
> >>>>> +
> >>>>> static int init_node_manager(struct f2fs_sb_info *sbi)
> >>>>> {
> >>>>> struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi);
> >>>>> struct f2fs_nm_info *nm_i = NM_I(sbi);
> >>>>> unsigned char *version_bitmap;
> >>>>> - unsigned int nat_segs, nat_blocks;
> >>>>> + unsigned int nat_segs;
> >>>>> + int err;
> >>>>>
> >>>>> nm_i->nat_blkaddr = le32_to_cpu(sb_raw->nat_blkaddr);
> >>>>>
> >>>>> /* segment_count_nat includes pair segment so divide to 2. */
> >>>>> nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1;
> >>>>> - nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
> >>>>> -
> >>>>> - nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks;
> >>>>> + nm_i->nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
> >>>>> + nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nm_i->nat_blocks;
> >>>>>
> >>>>> /* not used nids: 0, node, meta, (and root counted as valid node) */
> >>>>> nm_i->available_nids = nm_i->max_nid - sbi->total_valid_node_count -
> >>>>> @@ -2392,6 +2543,10 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
> >>>>> if (!nm_i->nat_bitmap)
> >>>>> return -ENOMEM;
> >>>>>
> >>>>> + err = __get_nat_bitmaps(sbi);
> >>>>> + if (err)
> >>>>> + return err;
> >>>>> +
> >>>>> #ifdef CONFIG_F2FS_CHECK_FS
> >>>>> nm_i->nat_bitmap_mir = kmemdup(version_bitmap, nm_i->bitmap_size,
> >>>>> GFP_KERNEL);
> >>>>> @@ -2414,7 +2569,7 @@ int build_node_manager(struct f2fs_sb_info *sbi)
> >>>>> if (err)
> >>>>> return err;
> >>>>>
> >>>>> - build_free_nids(sbi, true);
> >>>>> + build_free_nids(sbi, true, true);
> >>>>> return 0;
> >>>>> }
> >>>>>
> >>>>> @@ -2473,6 +2628,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
> >>>>> up_write(&nm_i->nat_tree_lock);
> >>>>>
> >>>>> kfree(nm_i->nat_bitmap);
> >>>>> + kfree(nm_i->nat_bits);
> >>>>> #ifdef CONFIG_F2FS_CHECK_FS
> >>>>> kfree(nm_i->nat_bitmap_mir);
> >>>>> #endif
> >>>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> >>>>> index df2ff5cfe8f4..8e1ec248c653 100644
> >>>>> --- a/fs/f2fs/segment.c
> >>>>> +++ b/fs/f2fs/segment.c
> >>>>> @@ -386,7 +386,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
> >>>>> if (!available_free_memory(sbi, FREE_NIDS))
> >>>>> try_to_free_nids(sbi, MAX_FREE_NIDS);
> >>>>> else
> >>>>> - build_free_nids(sbi, false);
> >>>>> + build_free_nids(sbi, false, false);
> >>>>>
> >>>>> if (!is_idle(sbi))
> >>>>> return;
> >>>>> diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
> >>>>> index f0748524ca8c..1c92ace2e8f8 100644
> >>>>> --- a/include/linux/f2fs_fs.h
> >>>>> +++ b/include/linux/f2fs_fs.h
> >>>>> @@ -114,6 +114,7 @@ struct f2fs_super_block {
> >>>>> /*
> >>>>> * For checkpoint
> >>>>> */
> >>>>> +#define CP_NAT_BITS_FLAG 0x00000080
> >>>>> #define CP_CRC_RECOVERY_FLAG 0x00000040
> >>>>> #define CP_FASTBOOT_FLAG 0x00000020
> >>>>> #define CP_FSCK_FLAG 0x00000010
> >>>>>
> >>>>
> >>>>
> >>>> ------------------------------------------------------------------------------
> >>>> Check out the vibrant tech community on one of the world's most
> >>>> engaging tech sites, SlashDot.org! http://sdm.link/slashdot
> >>>> _______________________________________________
> >>>> Linux-f2fs-devel mailing list
> >>>> Linux-f2fs-devel@lists.sourceforge.net
> >>>> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
> >>>
> >>> .
> >>>
> >>
> >>
> >> ------------------------------------------------------------------------------
> >> Check out the vibrant tech community on one of the world's most
> >> engaging tech sites, SlashDot.org! http://sdm.link/slashdot
> >> _______________________________________________
> >> Linux-f2fs-devel mailing list
> >> Linux-f2fs-devel@lists.sourceforge.net
> >> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
> >
> > .
> >
>
>
> ------------------------------------------------------------------------------
> Check out the vibrant tech community on one of the world's most
> engaging tech sites, SlashDot.org! http://sdm.link/slashdot
> _______________________________________________
> Linux-f2fs-devel mailing list
> Linux-f2fs-devel@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [f2fs-dev] [PATCH 2/3 v2] f2fs: add bitmaps for empty or full NAT blocks
2017-02-23 22:58 ` [PATCH 2/3 v2] " Jaegeuk Kim
@ 2017-02-28 3:34 ` Chao Yu
2017-02-28 5:33 ` Jaegeuk Kim
0 siblings, 1 reply; 16+ messages in thread
From: Chao Yu @ 2017-02-28 3:34 UTC (permalink / raw)
To: Jaegeuk Kim, linux-kernel, linux-fsdevel, linux-f2fs-devel
On 2017/2/24 6:58, Jaegeuk Kim wrote:
> @@ -2326,7 +2431,8 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
> * entries, remove all entries from journal and merge them
> * into nat entry set.
> */
> - if (!__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
> + if (cpc->reason == CP_UMOUNT ||
enabled_nat_bits(sbi, cpc)
Thanks,
> + !__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
> remove_nats_in_journal(sbi);
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [f2fs-dev] [PATCH 2/3 v2] f2fs: add bitmaps for empty or full NAT blocks
2017-02-28 3:34 ` [f2fs-dev] " Chao Yu
@ 2017-02-28 5:33 ` Jaegeuk Kim
2017-02-28 10:52 ` Chao Yu
0 siblings, 1 reply; 16+ messages in thread
From: Jaegeuk Kim @ 2017-02-28 5:33 UTC (permalink / raw)
To: Chao Yu; +Cc: linux-kernel, linux-fsdevel, linux-f2fs-devel
On 02/28, Chao Yu wrote:
> On 2017/2/24 6:58, Jaegeuk Kim wrote:
> > @@ -2326,7 +2431,8 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
> > * entries, remove all entries from journal and merge them
> > * into nat entry set.
> > */
> > - if (!__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
> > + if (cpc->reason == CP_UMOUNT ||
>
> enabled_nat_bits(sbi, cpc)
Thanks.
I added another patch to fix this.
>From b15f7112c3daced4c7f70c4d5fcba9013743cb6b Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Mon, 27 Feb 2017 21:28:53 -0800
Subject: [PATCH] f2fs: avoid to flush nat journal entries
This patch adds a missing condition which flushes nat journal entries
unnecessarily introduced by:
f2fs: add bitmaps for empty or full NAT blocks
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
fs/f2fs/node.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index b3aead4c5a0f..94967171dee8 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -2513,7 +2513,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
* entries, remove all entries from journal and merge them
* into nat entry set.
*/
- if (cpc->reason == CP_UMOUNT ||
+ if (enabled_nat_bits(sbi, cpc) ||
!__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
remove_nats_in_journal(sbi);
--
2.11.0
^ permalink raw reply related [flat|nested] 16+ messages in thread
* Re: [f2fs-dev] [PATCH 2/3] f2fs: add bitmaps for empty or full NAT blocks
2017-02-27 22:19 ` Jaegeuk Kim
@ 2017-02-28 10:38 ` Chao Yu
0 siblings, 0 replies; 16+ messages in thread
From: Chao Yu @ 2017-02-28 10:38 UTC (permalink / raw)
To: Jaegeuk Kim; +Cc: linux-fsdevel, linux-kernel, linux-f2fs-devel
On 2017/2/28 6:19, Jaegeuk Kim wrote:
> On 02/27, Chao Yu wrote:
>> On 2017/2/26 2:34, Jaegeuk Kim wrote:
>>> On 02/25, Chao Yu wrote:
>>>> On 2017/2/24 6:54, Jaegeuk Kim wrote:
>>>>> On 02/23, Chao Yu wrote:
>>>>>> On 2017/2/14 10:06, Jaegeuk Kim wrote:
>>> ...
>>>>>
>>>>>>> +static int scan_nat_bits(struct f2fs_sb_info *sbi)
>>>>>>> +{
>>>>>>> + struct f2fs_nm_info *nm_i = NM_I(sbi);
>>>>>>> + struct page *page;
>>>>>>> + unsigned int i = 0;
>>>>>>> + nid_t target = FREE_NID_PAGES * NAT_ENTRY_PER_BLOCK;
>>>>>>> + nid_t nid;
>>>>>>> +
>>>>>>> + if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
>>>>>>> + return -EAGAIN;
>>>>>>> +
>>>>>>> + down_read(&nm_i->nat_tree_lock);
>>>>>>> +check_empty:
>>>>>>> + i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i);
>>>>>>> + if (i >= nm_i->nat_blocks) {
>>>>>>> + i = 0;
>>>>>>> + goto check_partial;
>>>>>>> + }
>>>>>>> +
>>>>>>> + for (nid = i * NAT_ENTRY_PER_BLOCK; nid < (i + 1) * NAT_ENTRY_PER_BLOCK;
>>>>>>> + nid++) {
>>>>>>> + if (unlikely(nid >= nm_i->max_nid))
>>>>>>> + break;
>>>>>>> + add_free_nid(sbi, nid, true);
>>>>>>> + }
>>>>>>> +
>>>>>>> + if (nm_i->nid_cnt[FREE_NID_LIST] >= target)
>>>>>>> + goto out;
>>>>>>> + i++;
>>>>>>> + goto check_empty;
>>>>>>> +
>>>>>>> +check_partial:
>>>>>>> + i = find_next_zero_bit_le(nm_i->full_nat_bits, nm_i->nat_blocks, i);
>>>>>>> + if (i >= nm_i->nat_blocks) {
>>>>>>> + disable_nat_bits(sbi, true);
>>>>>>
>>>>>> Can this happen in real world? Should be a bug in somewhere?
>>>>>
>>>>> It happens, since current design handles full_nat_bits optionally in order
>>>>> to avoid scanning a whole NAT page to set it back as 1 from 0.
>>>>
>>>> All 0 value in full_nat_bits means the NAT block has at least one free nid,
>>>> right? so if we traverse all such NAT blocks, and still haven't collect enough
>>>> *target* number of free nids, we don't have to disable nat bit cache, we can
>>>> just break out here.
>>>
>>> No, I'm seeing this case:
>>> 1. mount with full=0, empty=0, which indicates some free nids.
>>> 2. allocate all there-in free nids, but still full=0, empty=0.
>>> 3. allocate more free nids.
>>> ...
>>> ... checkpoint makes full=1, empty=0
>>>
>>> If there is no checkpoint and it consumes all the free nids, we can see all 0
>>> value in full_nat_bits which is quite impossible. In that case, I'd prefer
>>> to stop nat_bits and give fsck.f2fs to revive it back.
>>
>> Yeah, I can understand that, but what I concern is when there is few free nids
>> (< target), we still try to load nids of 8 NAT blocks until ending the loop of
>> caching free nids, so it will be very easy to enter the case of disabling
>> nid_bits cache here, so how about doing more check?
>>
>> if (i >= nm_i->nat_blocks &&
>
> This indicates full_nat_bits consists of all zeros, which means, whatever in
> the next time, we need to return -EINVAL. If we do not disable it here, there is
> no way to turn any bit into zero.
We can expect late checkpoint will refresh full_nat_bits, __update_nat_bits will
turn bit to one or zero there.
>
> BTW, I'm trying to freeze all the patches for a pull request, so let me have
> another patch for any pending issues which are not urgent for now as I think.
Got it. I will send you patch if I find explicit bug. :)
Thanks,
>
> Thanks,
>
>> nm_i->nid_cnt[FREE_NID_LIST] != nm_i->available_nids)
>> disable_nat_bits
>>
>> Thanks,
>>
>>>
>>> Thanks,
>>>
>>>>
>>>> Thanks,
>>>>
>>>>>
>>>>>>
>>>>>>> + return -EINVAL;
>>>>>>> + }
>>>>>>> +
>>>>>>> + nid = i * NAT_ENTRY_PER_BLOCK;
>>>>>>> + page = get_current_nat_page(sbi, nid);
>>>>>>> + scan_nat_page(sbi, page, nid);
>>>>>>> + f2fs_put_page(page, 1);
>>>>>>> +
>>>>>>> + if (nm_i->nid_cnt[FREE_NID_LIST] < target) {
>>>>>>> + i++;
>>>>>>> + goto check_partial;
>>>>>>> + }
>>>>>>> +out:
>>>>>>> + up_read(&nm_i->nat_tree_lock);
>>>>>>> + return 0;
>>>>>>> +}
>>>>>>> +
>>>>>>> +static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
>>>>>>> {
>>>>>>> struct f2fs_nm_info *nm_i = NM_I(sbi);
>>>>>>> struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
>>>>>>> @@ -1854,6 +1911,20 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
>>>>>>> if (!sync && !available_free_memory(sbi, FREE_NIDS))
>>>>>>> return;
>>>>>>>
>>>>>>> + /* try to find free nids with nat_bits */
>>>>>>> + if (!mount && !scan_nat_bits(sbi) && nm_i->nid_cnt[FREE_NID_LIST])
>>>>>>> + return;
>>>>>>> +
>>>>>>> + /* find next valid candidate */
>>>>>>
>>>>>> This is just for mount case?
>>>>>
>>>>> Yup, it reuses free nids in dirty NAT blocks, so that we can make them as full
>>>>> NAT pages.
>>>>>
>>>>> Thanks,
>>>>>
>>>>>>
>>>>>>> + if (is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) {
>>>>>>> + int idx = find_next_zero_bit_le(nm_i->full_nat_bits,
>>>>>>> + nm_i->nat_blocks, 0);
>>>>>>> + if (idx >= nm_i->nat_blocks)
>>>>>>> + set_sbi_flag(sbi, SBI_NEED_FSCK);
>>>>>>> + else
>>>>>>> + nid = idx * NAT_ENTRY_PER_BLOCK;
>>>>>>> + }
>>>>>>> +
>>>>>>> /* readahead nat pages to be scanned */
>>>>>>> ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
>>>>>>> META_NAT, true);
>>>>>>> @@ -1896,10 +1967,10 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
>>>>>>> nm_i->ra_nid_pages, META_NAT, false);
>>>>>>> }
>>>>>>>
>>>>>>> -void build_free_nids(struct f2fs_sb_info *sbi, bool sync)
>>>>>>> +void build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
>>>>>>> {
>>>>>>> mutex_lock(&NM_I(sbi)->build_lock);
>>>>>>> - __build_free_nids(sbi, sync);
>>>>>>> + __build_free_nids(sbi, sync, mount);
>>>>>>> mutex_unlock(&NM_I(sbi)->build_lock);
>>>>>>> }
>>>>>>>
>>>>>>> @@ -1941,7 +2012,7 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
>>>>>>> spin_unlock(&nm_i->nid_list_lock);
>>>>>>>
>>>>>>> /* Let's scan nat pages and its caches to get free nids */
>>>>>>> - build_free_nids(sbi, true);
>>>>>>> + build_free_nids(sbi, true, false);
>>>>>>> goto retry;
>>>>>>> }
>>>>>>>
>>>>>>> @@ -2233,8 +2304,39 @@ static void __adjust_nat_entry_set(struct nat_entry_set *nes,
>>>>>>> list_add_tail(&nes->set_list, head);
>>>>>>> }
>>>>>>>
>>>>>>> +void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
>>>>>>> + struct page *page)
>>>>>>> +{
>>>>>>> + struct f2fs_nm_info *nm_i = NM_I(sbi);
>>>>>>> + unsigned int nat_index = start_nid / NAT_ENTRY_PER_BLOCK;
>>>>>>> + struct f2fs_nat_block *nat_blk = page_address(page);
>>>>>>> + int valid = 0;
>>>>>>> + int i;
>>>>>>> +
>>>>>>> + if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
>>>>>>> + return;
>>>>>>> +
>>>>>>> + for (i = 0; i < NAT_ENTRY_PER_BLOCK; i++) {
>>>>>>> + if (start_nid == 0 && i == 0)
>>>>>>> + valid++;
>>>>>>> + if (nat_blk->entries[i].block_addr)
>>>>>>> + valid++;
>>>>>>> + }
>>>>>>> + if (valid == 0) {
>>>>>>> + test_and_set_bit_le(nat_index, nm_i->empty_nat_bits);
>>>>>>> + test_and_clear_bit_le(nat_index, nm_i->full_nat_bits);
>>>>>>
>>>>>> set_bit_le/clear_bit_le
>>>>>>
>>>>>>> + return;
>>>>>>> + }
>>>>>>> +
>>>>>>> + test_and_clear_bit_le(nat_index, nm_i->empty_nat_bits);
>>>>>>
>>>>>> ditto
>>>>>>
>>>>>>> + if (valid == NAT_ENTRY_PER_BLOCK)
>>>>>>> + test_and_set_bit_le(nat_index, nm_i->full_nat_bits);
>>>>>>> + else
>>>>>>> + test_and_clear_bit_le(nat_index, nm_i->full_nat_bits);
>>>>>>
>>>>>> ditto
>>>>>>
>>>>>>> +}
>>>>>>> +
>>>>>>> static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
>>>>>>> - struct nat_entry_set *set)
>>>>>>> + struct nat_entry_set *set, struct cp_control *cpc)
>>>>>>> {
>>>>>>> struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
>>>>>>> struct f2fs_journal *journal = curseg->journal;
>>>>>>> @@ -2249,7 +2351,8 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
>>>>>>> * #1, flush nat entries to journal in current hot data summary block.
>>>>>>> * #2, flush nat entries to nat page.
>>>>>>> */
>>>>>>> - if (!__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL))
>>>>>>> + if (cpc->reason == CP_UMOUNT ||
>>>>>>
>>>>>> if ((cpc->reason == CP_UMOUNT && is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) ||
>>>>>>
>>>>>>> + !__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL))
>>>>>>> to_journal = false;
>>>>>>>
>>>>>>> if (to_journal) {
>>>>>>> @@ -2289,10 +2392,12 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
>>>>>>> }
>>>>>>> }
>>>>>>>
>>>>>>> - if (to_journal)
>>>>>>> + if (to_journal) {
>>>>>>> up_write(&curseg->journal_rwsem);
>>>>>>> - else
>>>>>>> + } else {
>>>>>>> + __update_nat_bits(sbi, start_nid, page);
>>>>>>> f2fs_put_page(page, 1);
>>>>>>> + }
>>>>>>>
>>>>>>> f2fs_bug_on(sbi, set->entry_cnt);
>>>>>>>
>>>>>>> @@ -2303,7 +2408,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
>>>>>>> /*
>>>>>>> * This function is called during the checkpointing process.
>>>>>>> */
>>>>>>> -void flush_nat_entries(struct f2fs_sb_info *sbi)
>>>>>>> +void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>>>>>>> {
>>>>>>> struct f2fs_nm_info *nm_i = NM_I(sbi);
>>>>>>> struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
>>>>>>> @@ -2324,7 +2429,8 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
>>>>>>> * entries, remove all entries from journal and merge them
>>>>>>> * into nat entry set.
>>>>>>> */
>>>>>>> - if (!__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
>>>>>>> + if (cpc->reason == CP_UMOUNT ||
>>>>>>
>>>>>> if ((cpc->reason == CP_UMOUNT && is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) ||
>>>>>>
>>>>>>> + !__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
>>>>>>> remove_nats_in_journal(sbi);
>>>>>>>
>>>>>>> while ((found = __gang_lookup_nat_set(nm_i,
>>>>>>> @@ -2338,27 +2444,72 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
>>>>>>>
>>>>>>> /* flush dirty nats in nat entry set */
>>>>>>> list_for_each_entry_safe(set, tmp, &sets, set_list)
>>>>>>> - __flush_nat_entry_set(sbi, set);
>>>>>>> + __flush_nat_entry_set(sbi, set, cpc);
>>>>>>>
>>>>>>> up_write(&nm_i->nat_tree_lock);
>>>>>>>
>>>>>>> f2fs_bug_on(sbi, nm_i->dirty_nat_cnt);
>>>>>>> }
>>>>>>>
>>>>>>> +static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
>>>>>>> +{
>>>>>>> + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
>>>>>>> + struct f2fs_nm_info *nm_i = NM_I(sbi);
>>>>>>> + unsigned int nat_bits_bytes = nm_i->nat_blocks / BITS_PER_BYTE;
>>>>>>> + unsigned int i;
>>>>>>> + __u64 cp_ver = le64_to_cpu(ckpt->checkpoint_ver);
>>>>>>
>>>>>> __u64 cp_ver = cur_cp_version(ckpt);
>>>>>>
>>>>>> Thanks,
>>>>>>
>>>>>>> + size_t crc_offset = le32_to_cpu(ckpt->checksum_offset);
>>>>>>> + __u64 crc = le32_to_cpu(*((__le32 *)
>>>>>>> + ((unsigned char *)ckpt + crc_offset)));
>>>>>>> + block_t nat_bits_addr;
>>>>>>> +
>>>>>>> + if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
>>>>>>> + return 0;
>>>>>>> +
>>>>>>> + nm_i->nat_bits_blocks = F2FS_BYTES_TO_BLK((nat_bits_bytes << 1) + 8 +
>>>>>>> + F2FS_BLKSIZE - 1);
>>>>>>> + nm_i->nat_bits = kzalloc(nm_i->nat_bits_blocks << F2FS_BLKSIZE_BITS,
>>>>>>> + GFP_KERNEL);
>>>>>>> + if (!nm_i->nat_bits)
>>>>>>> + return -ENOMEM;
>>>>>>> +
>>>>>>> + nat_bits_addr = __start_cp_addr(sbi) + sbi->blocks_per_seg -
>>>>>>> + nm_i->nat_bits_blocks;
>>>>>>> + for (i = 0; i < nm_i->nat_bits_blocks; i++) {
>>>>>>> + struct page *page = get_meta_page(sbi, nat_bits_addr++);
>>>>>>> +
>>>>>>> + memcpy(nm_i->nat_bits + (i << F2FS_BLKSIZE_BITS),
>>>>>>> + page_address(page), F2FS_BLKSIZE);
>>>>>>> + f2fs_put_page(page, 1);
>>>>>>> + }
>>>>>>> +
>>>>>>> + cp_ver |= (crc << 32);
>>>>>>> + if (cpu_to_le64(cp_ver) != *(__le64 *)nm_i->nat_bits) {
>>>>>>> + disable_nat_bits(sbi, true);
>>>>>>> + return 0;
>>>>>>> + }
>>>>>>> +
>>>>>>> + nm_i->full_nat_bits = nm_i->nat_bits + 8;
>>>>>>> + nm_i->empty_nat_bits = nm_i->full_nat_bits + nat_bits_bytes;
>>>>>>> +
>>>>>>> + f2fs_msg(sbi->sb, KERN_NOTICE, "Found nat_bits in checkpoint");
>>>>>>> + return 0;
>>>>>>> +}
>>>>>>> +
>>>>>>> static int init_node_manager(struct f2fs_sb_info *sbi)
>>>>>>> {
>>>>>>> struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi);
>>>>>>> struct f2fs_nm_info *nm_i = NM_I(sbi);
>>>>>>> unsigned char *version_bitmap;
>>>>>>> - unsigned int nat_segs, nat_blocks;
>>>>>>> + unsigned int nat_segs;
>>>>>>> + int err;
>>>>>>>
>>>>>>> nm_i->nat_blkaddr = le32_to_cpu(sb_raw->nat_blkaddr);
>>>>>>>
>>>>>>> /* segment_count_nat includes pair segment so divide to 2. */
>>>>>>> nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1;
>>>>>>> - nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
>>>>>>> -
>>>>>>> - nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks;
>>>>>>> + nm_i->nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
>>>>>>> + nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nm_i->nat_blocks;
>>>>>>>
>>>>>>> /* not used nids: 0, node, meta, (and root counted as valid node) */
>>>>>>> nm_i->available_nids = nm_i->max_nid - sbi->total_valid_node_count -
>>>>>>> @@ -2392,6 +2543,10 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
>>>>>>> if (!nm_i->nat_bitmap)
>>>>>>> return -ENOMEM;
>>>>>>>
>>>>>>> + err = __get_nat_bitmaps(sbi);
>>>>>>> + if (err)
>>>>>>> + return err;
>>>>>>> +
>>>>>>> #ifdef CONFIG_F2FS_CHECK_FS
>>>>>>> nm_i->nat_bitmap_mir = kmemdup(version_bitmap, nm_i->bitmap_size,
>>>>>>> GFP_KERNEL);
>>>>>>> @@ -2414,7 +2569,7 @@ int build_node_manager(struct f2fs_sb_info *sbi)
>>>>>>> if (err)
>>>>>>> return err;
>>>>>>>
>>>>>>> - build_free_nids(sbi, true);
>>>>>>> + build_free_nids(sbi, true, true);
>>>>>>> return 0;
>>>>>>> }
>>>>>>>
>>>>>>> @@ -2473,6 +2628,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
>>>>>>> up_write(&nm_i->nat_tree_lock);
>>>>>>>
>>>>>>> kfree(nm_i->nat_bitmap);
>>>>>>> + kfree(nm_i->nat_bits);
>>>>>>> #ifdef CONFIG_F2FS_CHECK_FS
>>>>>>> kfree(nm_i->nat_bitmap_mir);
>>>>>>> #endif
>>>>>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>>>>>>> index df2ff5cfe8f4..8e1ec248c653 100644
>>>>>>> --- a/fs/f2fs/segment.c
>>>>>>> +++ b/fs/f2fs/segment.c
>>>>>>> @@ -386,7 +386,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
>>>>>>> if (!available_free_memory(sbi, FREE_NIDS))
>>>>>>> try_to_free_nids(sbi, MAX_FREE_NIDS);
>>>>>>> else
>>>>>>> - build_free_nids(sbi, false);
>>>>>>> + build_free_nids(sbi, false, false);
>>>>>>>
>>>>>>> if (!is_idle(sbi))
>>>>>>> return;
>>>>>>> diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
>>>>>>> index f0748524ca8c..1c92ace2e8f8 100644
>>>>>>> --- a/include/linux/f2fs_fs.h
>>>>>>> +++ b/include/linux/f2fs_fs.h
>>>>>>> @@ -114,6 +114,7 @@ struct f2fs_super_block {
>>>>>>> /*
>>>>>>> * For checkpoint
>>>>>>> */
>>>>>>> +#define CP_NAT_BITS_FLAG 0x00000080
>>>>>>> #define CP_CRC_RECOVERY_FLAG 0x00000040
>>>>>>> #define CP_FASTBOOT_FLAG 0x00000020
>>>>>>> #define CP_FSCK_FLAG 0x00000010
>>>>>>>
>>>>>>
>>>>>>
>>>>>> ------------------------------------------------------------------------------
>>>>>> Check out the vibrant tech community on one of the world's most
>>>>>> engaging tech sites, SlashDot.org! http://sdm.link/slashdot
>>>>>> _______________________________________________
>>>>>> Linux-f2fs-devel mailing list
>>>>>> Linux-f2fs-devel@lists.sourceforge.net
>>>>>> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
>>>>>
>>>>> .
>>>>>
>>>>
>>>>
>>>> ------------------------------------------------------------------------------
>>>> Check out the vibrant tech community on one of the world's most
>>>> engaging tech sites, SlashDot.org! http://sdm.link/slashdot
>>>> _______________________________________________
>>>> Linux-f2fs-devel mailing list
>>>> Linux-f2fs-devel@lists.sourceforge.net
>>>> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
>>>
>>> .
>>>
>>
>>
>> ------------------------------------------------------------------------------
>> Check out the vibrant tech community on one of the world's most
>> engaging tech sites, SlashDot.org! http://sdm.link/slashdot
>> _______________________________________________
>> Linux-f2fs-devel mailing list
>> Linux-f2fs-devel@lists.sourceforge.net
>> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
>
> .
>
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [f2fs-dev] [PATCH 2/3 v2] f2fs: add bitmaps for empty or full NAT blocks
2017-02-28 5:33 ` Jaegeuk Kim
@ 2017-02-28 10:52 ` Chao Yu
0 siblings, 0 replies; 16+ messages in thread
From: Chao Yu @ 2017-02-28 10:52 UTC (permalink / raw)
To: Jaegeuk Kim; +Cc: linux-kernel, linux-fsdevel, linux-f2fs-devel
On 2017/2/28 13:33, Jaegeuk Kim wrote:
> On 02/28, Chao Yu wrote:
>> On 2017/2/24 6:58, Jaegeuk Kim wrote:
>>> @@ -2326,7 +2431,8 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
>>> * entries, remove all entries from journal and merge them
>>> * into nat entry set.
>>> */
>>> - if (!__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
>>> + if (cpc->reason == CP_UMOUNT ||
>>
>> enabled_nat_bits(sbi, cpc)
>
> Thanks.
> I added another patch to fix this.
I'm OK with this change. Thanks, :)
Thanks,
>
>>From b15f7112c3daced4c7f70c4d5fcba9013743cb6b Mon Sep 17 00:00:00 2001
> From: Jaegeuk Kim <jaegeuk@kernel.org>
> Date: Mon, 27 Feb 2017 21:28:53 -0800
> Subject: [PATCH] f2fs: avoid to flush nat journal entries
>
> This patch adds a missing condition which flushes nat journal entries
> unnecessarily introduced by:
>
> f2fs: add bitmaps for empty or full NAT blocks
>
> Signed-off-by: Chao Yu <yuchao0@huawei.com>
> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> ---
> fs/f2fs/node.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
> index b3aead4c5a0f..94967171dee8 100644
> --- a/fs/f2fs/node.c
> +++ b/fs/f2fs/node.c
> @@ -2513,7 +2513,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
> * entries, remove all entries from journal and merge them
> * into nat entry set.
> */
> - if (cpc->reason == CP_UMOUNT ||
> + if (enabled_nat_bits(sbi, cpc) ||
> !__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
> remove_nats_in_journal(sbi);
>
>
^ permalink raw reply [flat|nested] 16+ messages in thread
end of thread, other threads:[~2017-02-28 10:58 UTC | newest]
Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-02-14 2:06 [PATCH 1/3] f2fs: remove build_free_nids() during checkpoint Jaegeuk Kim
2017-02-14 2:06 ` [PATCH 2/3] f2fs: add bitmaps for empty or full NAT blocks Jaegeuk Kim
2017-02-23 11:41 ` [f2fs-dev] " Chao Yu
2017-02-23 22:54 ` Jaegeuk Kim
2017-02-25 3:26 ` Chao Yu
2017-02-25 18:34 ` Jaegeuk Kim
2017-02-27 3:03 ` Chao Yu
2017-02-27 22:19 ` Jaegeuk Kim
2017-02-28 10:38 ` Chao Yu
2017-02-23 22:58 ` [PATCH 2/3 v2] " Jaegeuk Kim
2017-02-28 3:34 ` [f2fs-dev] " Chao Yu
2017-02-28 5:33 ` Jaegeuk Kim
2017-02-28 10:52 ` Chao Yu
2017-02-14 2:06 ` [PATCH 3/3] f2fs: avoid reading NAT page by get_node_info Jaegeuk Kim
2017-02-23 11:47 ` Chao Yu
2017-02-23 18:20 ` [f2fs-dev] " Jaegeuk Kim
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).