linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH V2] f2fs: introduce free nid bitmap
@ 2017-02-23  2:53 Chao Yu
  2017-02-25  6:25 ` Chao Yu
  2017-03-01 19:35 ` Jaegeuk Kim
  0 siblings, 2 replies; 7+ messages in thread
From: Chao Yu @ 2017-02-23  2:53 UTC (permalink / raw)
  To: jaegeuk; +Cc: linux-f2fs-devel, linux-kernel, chao, Chao Yu

In scenario of intensively node allocation, free nids will be ran out
soon, then it needs to stop to load free nids by traversing NAT blocks,
in worse case, if NAT blocks does not be cached in memory, it generates
IOs which slows down our foreground operations.

In order to speed up node allocation, in this patch we introduce a new
free_nid_bitmap array, so there is an bitmap table for each NAT block,
Once the NAT block is loaded, related bitmap cache will be switched on,
and bitmap will be set during traversing nat entries in NAT block, later
we can query and update nid usage status in memory completely.

With such implementation, I expect performance of node allocation can be
improved in the long-term after filesystem image is mounted.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
---
 fs/f2fs/debug.c         |   2 +
 fs/f2fs/f2fs.h          |   2 +
 fs/f2fs/node.c          | 109 ++++++++++++++++++++++++++++++++++++++++++++++--
 include/linux/f2fs_fs.h |   1 +
 4 files changed, 111 insertions(+), 3 deletions(-)

diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 015ad2b73a92..a77df377e2e8 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -194,6 +194,8 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
 	si->base_mem += sizeof(struct f2fs_nm_info);
 	si->base_mem += __bitmap_size(sbi, NAT_BITMAP);
 	si->base_mem += (NM_I(sbi)->nat_bits_blocks << F2FS_BLKSIZE_BITS);
+	si->base_mem += NM_I(sbi)->nat_blocks * NAT_ENTRY_BITMAP_SIZE;
+	si->base_mem += NM_I(sbi)->nat_blocks / 8;
 
 get_cache:
 	si->cache_mem = 0;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 1c9f0cc8f027..6a48e649bfef 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -556,6 +556,8 @@ struct f2fs_nm_info {
 	unsigned int nid_cnt[MAX_NID_LIST];	/* the number of free node id */
 	spinlock_t nid_list_lock;	/* protect nid lists ops */
 	struct mutex build_lock;	/* lock for build free nids */
+	unsigned char (*free_nid_bitmap)[NAT_ENTRY_BITMAP_SIZE];
+	unsigned char *nat_block_bitmap;
 
 	/* for checkpoint */
 	char *nat_bitmap;		/* NAT bitmap pointer */
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index b63bdb85ad66..f0f036c2d2fc 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1821,14 +1821,32 @@ static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid)
 		kmem_cache_free(free_nid_slab, i);
 }
 
+void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid, bool set)
+{
+	struct f2fs_nm_info *nm_i = NM_I(sbi);
+	unsigned int nat_ofs = NAT_BLOCK_OFFSET(nid);
+	unsigned int nid_ofs = nid - START_NID(nid);
+
+	if (!test_bit_le(nat_ofs, nm_i->nat_block_bitmap))
+		return;
+
+	if (set)
+		set_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
+	else
+		clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
+}
+
 static void scan_nat_page(struct f2fs_sb_info *sbi,
 			struct page *nat_page, nid_t start_nid)
 {
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
 	struct f2fs_nat_block *nat_blk = page_address(nat_page);
 	block_t blk_addr;
+	unsigned int nat_ofs = NAT_BLOCK_OFFSET(start_nid);
 	int i;
 
+	set_bit_le(nat_ofs, nm_i->nat_block_bitmap);
+
 	i = start_nid % NAT_ENTRY_PER_BLOCK;
 
 	for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) {
@@ -1840,7 +1858,51 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
 		f2fs_bug_on(sbi, blk_addr == NEW_ADDR);
 		if (blk_addr == NULL_ADDR)
 			add_free_nid(sbi, start_nid, true);
+		update_free_nid_bitmap(sbi, start_nid, blk_addr == NULL_ADDR);
+	}
+}
+
+static void scan_free_nid_bits(struct f2fs_sb_info *sbi)
+{
+	struct f2fs_nm_info *nm_i = NM_I(sbi);
+	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
+	struct f2fs_journal *journal = curseg->journal;
+	unsigned int i, idx;
+	unsigned int target = FREE_NID_PAGES * NAT_ENTRY_PER_BLOCK;
+
+	down_read(&nm_i->nat_tree_lock);
+
+	for (i = 0; i < nm_i->nat_blocks; i++) {
+		if (!test_bit_le(i, nm_i->nat_block_bitmap))
+			continue;
+		for (idx = 0; idx < NAT_ENTRY_PER_BLOCK; idx++) {
+			nid_t nid;
+
+			if (!test_bit_le(idx, nm_i->free_nid_bitmap[i]))
+				continue;
+
+			nid = i * NAT_ENTRY_PER_BLOCK + idx;
+			add_free_nid(sbi, nid, true);
+
+			if (nm_i->nid_cnt[FREE_NID_LIST] >= target)
+				goto out;
+		}
+	}
+out:
+	down_read(&curseg->journal_rwsem);
+	for (i = 0; i < nats_in_cursum(journal); i++) {
+		block_t addr;
+		nid_t nid;
+
+		addr = le32_to_cpu(nat_in_journal(journal, i).block_addr);
+		nid = le32_to_cpu(nid_in_journal(journal, i));
+		if (addr == NULL_ADDR)
+			add_free_nid(sbi, nid, true);
+		else
+			remove_free_nid(sbi, nid);
 	}
+	up_read(&curseg->journal_rwsem);
+	up_read(&nm_i->nat_tree_lock);
 }
 
 static int scan_nat_bits(struct f2fs_sb_info *sbi)
@@ -1910,9 +1972,17 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
 	if (!sync && !available_free_memory(sbi, FREE_NIDS))
 		return;
 
-	/* try to find free nids with nat_bits */
-	if (!mount && !scan_nat_bits(sbi) && nm_i->nid_cnt[FREE_NID_LIST])
-		return;
+	if (!mount) {
+		/* try to find free nids in free_nid_bitmap */
+		scan_free_nid_bits(sbi);
+
+		if (nm_i->nid_cnt[FREE_NID_LIST])
+			return;
+
+		/* try to find free nids with nat_bits */
+		if (!scan_nat_bits(sbi) && nm_i->nid_cnt[FREE_NID_LIST])
+			return;
+	}
 
 	/* find next valid candidate */
 	if (is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) {
@@ -2005,6 +2075,9 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
 		i->state = NID_ALLOC;
 		__insert_nid_to_list(sbi, i, ALLOC_NID_LIST, false);
 		nm_i->available_nids--;
+
+		update_free_nid_bitmap(sbi, *nid, false);
+
 		spin_unlock(&nm_i->nid_list_lock);
 		return true;
 	}
@@ -2059,6 +2132,8 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
 
 	nm_i->available_nids++;
 
+	update_free_nid_bitmap(sbi, nid, true);
+
 	spin_unlock(&nm_i->nid_list_lock);
 
 	if (need_free)
@@ -2387,6 +2462,11 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
 			add_free_nid(sbi, nid, false);
 			spin_lock(&NM_I(sbi)->nid_list_lock);
 			NM_I(sbi)->available_nids++;
+			update_free_nid_bitmap(sbi, nid, true);
+			spin_unlock(&NM_I(sbi)->nid_list_lock);
+		} else {
+			spin_lock(&NM_I(sbi)->nid_list_lock);
+			update_free_nid_bitmap(sbi, nid, false);
 			spin_unlock(&NM_I(sbi)->nid_list_lock);
 		}
 	}
@@ -2556,6 +2636,22 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
 	return 0;
 }
 
+int init_free_nid_cache(struct f2fs_sb_info *sbi)
+{
+	struct f2fs_nm_info *nm_i = NM_I(sbi);
+
+	nm_i->free_nid_bitmap = f2fs_kvzalloc(nm_i->nat_blocks *
+					NAT_ENTRY_BITMAP_SIZE, GFP_KERNEL);
+	if (!nm_i->free_nid_bitmap)
+		return -ENOMEM;
+
+	nm_i->nat_block_bitmap = f2fs_kvzalloc(nm_i->nat_blocks / 8,
+								GFP_KERNEL);
+	if (!nm_i->nat_block_bitmap)
+		return -ENOMEM;
+	return 0;
+}
+
 int build_node_manager(struct f2fs_sb_info *sbi)
 {
 	int err;
@@ -2568,6 +2664,10 @@ int build_node_manager(struct f2fs_sb_info *sbi)
 	if (err)
 		return err;
 
+	err = init_free_nid_cache(sbi);
+	if (err)
+		return err;
+
 	build_free_nids(sbi, true, true);
 	return 0;
 }
@@ -2626,6 +2726,9 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
 	}
 	up_write(&nm_i->nat_tree_lock);
 
+	kvfree(nm_i->nat_block_bitmap);
+	kvfree(nm_i->free_nid_bitmap);
+
 	kfree(nm_i->nat_bitmap);
 	kfree(nm_i->nat_bits);
 #ifdef CONFIG_F2FS_CHECK_FS
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index 1c92ace2e8f8..e2d239ed4c60 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -279,6 +279,7 @@ struct f2fs_node {
  * For NAT entries
  */
 #define NAT_ENTRY_PER_BLOCK (PAGE_SIZE / sizeof(struct f2fs_nat_entry))
+#define NAT_ENTRY_BITMAP_SIZE	((NAT_ENTRY_PER_BLOCK + 7) / 8)
 
 struct f2fs_nat_entry {
 	__u8 version;		/* latest version of cached nat entry */
-- 
2.8.2.295.g3f1c1d0

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH V2] f2fs: introduce free nid bitmap
  2017-02-23  2:53 [PATCH V2] f2fs: introduce free nid bitmap Chao Yu
@ 2017-02-25  6:25 ` Chao Yu
  2017-02-25 19:02   ` Jaegeuk Kim
  2017-03-01 19:35 ` Jaegeuk Kim
  1 sibling, 1 reply; 7+ messages in thread
From: Chao Yu @ 2017-02-25  6:25 UTC (permalink / raw)
  To: jaegeuk; +Cc: linux-f2fs-devel, linux-kernel, chao

Hi Jaegeuk,

I added below diff code into this patch in order to fix incorrectly nid status
updating to reduce large latency of generic/251 in fstest, could you help to
review code below?

Latency:    before    after
generic/251 1483s ... 184s

diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 52db02396878..d25dcadf901a 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1774,23 +1774,24 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t
nid, bool build)

        /* 0 nid should not be used */
        if (unlikely(nid == 0))
-               return 0;
+               return 1;

        if (build) {
                /* do not add allocated nids */
                ne = __lookup_nat_cache(nm_i, nid);
                if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) ||
                                nat_get_blkaddr(ne) != NULL_ADDR))
-                       return 0;
+                       return 1;
        }

        i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS);
        i->nid = nid;
        i->state = NID_NEW;

-       if (radix_tree_preload(GFP_NOFS)) {
+       err = radix_tree_preload(GFP_NOFS);
+       if (err) {
                kmem_cache_free(free_nid_slab, i);
-               return 0;
+               return err;
        }

        spin_lock(&nm_i->nid_list_lock);
@@ -1799,9 +1800,9 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t
nid, bool build)
        radix_tree_preload_end();
        if (err) {
                kmem_cache_free(free_nid_slab, i);
-               return 0;
+               return err;
        }
-       return 1;
+       return 0;
 }

 static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid)
@@ -1844,7 +1845,7 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
        struct f2fs_nat_block *nat_blk = page_address(nat_page);
        block_t blk_addr;
        unsigned int nat_ofs = NAT_BLOCK_OFFSET(start_nid);
-       int i;
+       int i, ret;

        set_bit_le(nat_ofs, nm_i->nat_block_bitmap);

@@ -1857,9 +1858,12 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,

                blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr);
                f2fs_bug_on(sbi, blk_addr == NEW_ADDR);
-               if (blk_addr == NULL_ADDR)
-                       add_free_nid(sbi, start_nid, true);
-               update_free_nid_bitmap(sbi, start_nid, blk_addr == NULL_ADDR);
+               if (blk_addr == NULL_ADDR) {
+                       ret = add_free_nid(sbi, start_nid, true);
+                       update_free_nid_bitmap(sbi, start_nid, ret <= 0);
+               } else {
+                       update_free_nid_bitmap(sbi, start_nid, false);
+               }
        }
 }


On 2017/2/23 10:53, Chao Yu wrote:
> In scenario of intensively node allocation, free nids will be ran out
> soon, then it needs to stop to load free nids by traversing NAT blocks,
> in worse case, if NAT blocks does not be cached in memory, it generates
> IOs which slows down our foreground operations.
> 
> In order to speed up node allocation, in this patch we introduce a new
> free_nid_bitmap array, so there is an bitmap table for each NAT block,
> Once the NAT block is loaded, related bitmap cache will be switched on,
> and bitmap will be set during traversing nat entries in NAT block, later
> we can query and update nid usage status in memory completely.
> 
> With such implementation, I expect performance of node allocation can be
> improved in the long-term after filesystem image is mounted.
> 
> Signed-off-by: Chao Yu <yuchao0@huawei.com>
> ---
>  fs/f2fs/debug.c         |   2 +
>  fs/f2fs/f2fs.h          |   2 +
>  fs/f2fs/node.c          | 109 ++++++++++++++++++++++++++++++++++++++++++++++--
>  include/linux/f2fs_fs.h |   1 +
>  4 files changed, 111 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
> index 015ad2b73a92..a77df377e2e8 100644
> --- a/fs/f2fs/debug.c
> +++ b/fs/f2fs/debug.c
> @@ -194,6 +194,8 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
>  	si->base_mem += sizeof(struct f2fs_nm_info);
>  	si->base_mem += __bitmap_size(sbi, NAT_BITMAP);
>  	si->base_mem += (NM_I(sbi)->nat_bits_blocks << F2FS_BLKSIZE_BITS);
> +	si->base_mem += NM_I(sbi)->nat_blocks * NAT_ENTRY_BITMAP_SIZE;
> +	si->base_mem += NM_I(sbi)->nat_blocks / 8;
>  
>  get_cache:
>  	si->cache_mem = 0;
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 1c9f0cc8f027..6a48e649bfef 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -556,6 +556,8 @@ struct f2fs_nm_info {
>  	unsigned int nid_cnt[MAX_NID_LIST];	/* the number of free node id */
>  	spinlock_t nid_list_lock;	/* protect nid lists ops */
>  	struct mutex build_lock;	/* lock for build free nids */
> +	unsigned char (*free_nid_bitmap)[NAT_ENTRY_BITMAP_SIZE];
> +	unsigned char *nat_block_bitmap;
>  
>  	/* for checkpoint */
>  	char *nat_bitmap;		/* NAT bitmap pointer */
> diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
> index b63bdb85ad66..f0f036c2d2fc 100644
> --- a/fs/f2fs/node.c
> +++ b/fs/f2fs/node.c
> @@ -1821,14 +1821,32 @@ static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid)
>  		kmem_cache_free(free_nid_slab, i);
>  }
>  
> +void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid, bool set)
> +{
> +	struct f2fs_nm_info *nm_i = NM_I(sbi);
> +	unsigned int nat_ofs = NAT_BLOCK_OFFSET(nid);
> +	unsigned int nid_ofs = nid - START_NID(nid);
> +
> +	if (!test_bit_le(nat_ofs, nm_i->nat_block_bitmap))
> +		return;
> +
> +	if (set)
> +		set_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
> +	else
> +		clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
> +}
> +
>  static void scan_nat_page(struct f2fs_sb_info *sbi,
>  			struct page *nat_page, nid_t start_nid)
>  {
>  	struct f2fs_nm_info *nm_i = NM_I(sbi);
>  	struct f2fs_nat_block *nat_blk = page_address(nat_page);
>  	block_t blk_addr;
> +	unsigned int nat_ofs = NAT_BLOCK_OFFSET(start_nid);
>  	int i;
>  
> +	set_bit_le(nat_ofs, nm_i->nat_block_bitmap);
> +
>  	i = start_nid % NAT_ENTRY_PER_BLOCK;
>  
>  	for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) {
> @@ -1840,7 +1858,51 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
>  		f2fs_bug_on(sbi, blk_addr == NEW_ADDR);
>  		if (blk_addr == NULL_ADDR)
>  			add_free_nid(sbi, start_nid, true);
> +		update_free_nid_bitmap(sbi, start_nid, blk_addr == NULL_ADDR);
> +	}
> +}
> +
> +static void scan_free_nid_bits(struct f2fs_sb_info *sbi)
> +{
> +	struct f2fs_nm_info *nm_i = NM_I(sbi);
> +	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
> +	struct f2fs_journal *journal = curseg->journal;
> +	unsigned int i, idx;
> +	unsigned int target = FREE_NID_PAGES * NAT_ENTRY_PER_BLOCK;
> +
> +	down_read(&nm_i->nat_tree_lock);
> +
> +	for (i = 0; i < nm_i->nat_blocks; i++) {
> +		if (!test_bit_le(i, nm_i->nat_block_bitmap))
> +			continue;
> +		for (idx = 0; idx < NAT_ENTRY_PER_BLOCK; idx++) {
> +			nid_t nid;
> +
> +			if (!test_bit_le(idx, nm_i->free_nid_bitmap[i]))
> +				continue;
> +
> +			nid = i * NAT_ENTRY_PER_BLOCK + idx;
> +			add_free_nid(sbi, nid, true);
> +
> +			if (nm_i->nid_cnt[FREE_NID_LIST] >= target)
> +				goto out;
> +		}
> +	}
> +out:
> +	down_read(&curseg->journal_rwsem);
> +	for (i = 0; i < nats_in_cursum(journal); i++) {
> +		block_t addr;
> +		nid_t nid;
> +
> +		addr = le32_to_cpu(nat_in_journal(journal, i).block_addr);
> +		nid = le32_to_cpu(nid_in_journal(journal, i));
> +		if (addr == NULL_ADDR)
> +			add_free_nid(sbi, nid, true);
> +		else
> +			remove_free_nid(sbi, nid);
>  	}
> +	up_read(&curseg->journal_rwsem);
> +	up_read(&nm_i->nat_tree_lock);
>  }
>  
>  static int scan_nat_bits(struct f2fs_sb_info *sbi)
> @@ -1910,9 +1972,17 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
>  	if (!sync && !available_free_memory(sbi, FREE_NIDS))
>  		return;
>  
> -	/* try to find free nids with nat_bits */
> -	if (!mount && !scan_nat_bits(sbi) && nm_i->nid_cnt[FREE_NID_LIST])
> -		return;
> +	if (!mount) {
> +		/* try to find free nids in free_nid_bitmap */
> +		scan_free_nid_bits(sbi);
> +
> +		if (nm_i->nid_cnt[FREE_NID_LIST])
> +			return;
> +
> +		/* try to find free nids with nat_bits */
> +		if (!scan_nat_bits(sbi) && nm_i->nid_cnt[FREE_NID_LIST])
> +			return;
> +	}
>  
>  	/* find next valid candidate */
>  	if (is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) {
> @@ -2005,6 +2075,9 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
>  		i->state = NID_ALLOC;
>  		__insert_nid_to_list(sbi, i, ALLOC_NID_LIST, false);
>  		nm_i->available_nids--;
> +
> +		update_free_nid_bitmap(sbi, *nid, false);
> +
>  		spin_unlock(&nm_i->nid_list_lock);
>  		return true;
>  	}
> @@ -2059,6 +2132,8 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
>  
>  	nm_i->available_nids++;
>  
> +	update_free_nid_bitmap(sbi, nid, true);
> +
>  	spin_unlock(&nm_i->nid_list_lock);
>  
>  	if (need_free)
> @@ -2387,6 +2462,11 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
>  			add_free_nid(sbi, nid, false);
>  			spin_lock(&NM_I(sbi)->nid_list_lock);
>  			NM_I(sbi)->available_nids++;
> +			update_free_nid_bitmap(sbi, nid, true);
> +			spin_unlock(&NM_I(sbi)->nid_list_lock);
> +		} else {
> +			spin_lock(&NM_I(sbi)->nid_list_lock);
> +			update_free_nid_bitmap(sbi, nid, false);
>  			spin_unlock(&NM_I(sbi)->nid_list_lock);
>  		}
>  	}
> @@ -2556,6 +2636,22 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
>  	return 0;
>  }
>  
> +int init_free_nid_cache(struct f2fs_sb_info *sbi)
> +{
> +	struct f2fs_nm_info *nm_i = NM_I(sbi);
> +
> +	nm_i->free_nid_bitmap = f2fs_kvzalloc(nm_i->nat_blocks *
> +					NAT_ENTRY_BITMAP_SIZE, GFP_KERNEL);
> +	if (!nm_i->free_nid_bitmap)
> +		return -ENOMEM;
> +
> +	nm_i->nat_block_bitmap = f2fs_kvzalloc(nm_i->nat_blocks / 8,
> +								GFP_KERNEL);
> +	if (!nm_i->nat_block_bitmap)
> +		return -ENOMEM;
> +	return 0;
> +}
> +
>  int build_node_manager(struct f2fs_sb_info *sbi)
>  {
>  	int err;
> @@ -2568,6 +2664,10 @@ int build_node_manager(struct f2fs_sb_info *sbi)
>  	if (err)
>  		return err;
>  
> +	err = init_free_nid_cache(sbi);
> +	if (err)
> +		return err;
> +
>  	build_free_nids(sbi, true, true);
>  	return 0;
>  }
> @@ -2626,6 +2726,9 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
>  	}
>  	up_write(&nm_i->nat_tree_lock);
>  
> +	kvfree(nm_i->nat_block_bitmap);
> +	kvfree(nm_i->free_nid_bitmap);
> +
>  	kfree(nm_i->nat_bitmap);
>  	kfree(nm_i->nat_bits);
>  #ifdef CONFIG_F2FS_CHECK_FS
> diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
> index 1c92ace2e8f8..e2d239ed4c60 100644
> --- a/include/linux/f2fs_fs.h
> +++ b/include/linux/f2fs_fs.h
> @@ -279,6 +279,7 @@ struct f2fs_node {
>   * For NAT entries
>   */
>  #define NAT_ENTRY_PER_BLOCK (PAGE_SIZE / sizeof(struct f2fs_nat_entry))
> +#define NAT_ENTRY_BITMAP_SIZE	((NAT_ENTRY_PER_BLOCK + 7) / 8)
>  
>  struct f2fs_nat_entry {
>  	__u8 version;		/* latest version of cached nat entry */
> 

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH V2] f2fs: introduce free nid bitmap
  2017-02-25  6:25 ` Chao Yu
@ 2017-02-25 19:02   ` Jaegeuk Kim
  2017-02-27  3:11     ` Chao Yu
  0 siblings, 1 reply; 7+ messages in thread
From: Jaegeuk Kim @ 2017-02-25 19:02 UTC (permalink / raw)
  To: Chao Yu; +Cc: linux-f2fs-devel, linux-kernel, chao

On 02/25, Chao Yu wrote:
> Hi Jaegeuk,
> 
> I added below diff code into this patch in order to fix incorrectly nid status
> updating to reduce large latency of generic/251 in fstest, could you help to
> review code below?

Understand the problem, and how about this?

diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 52db02396878..83b305689913 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1765,7 +1765,7 @@ static void __remove_nid_from_list(struct f2fs_sb_info *sbi,
 		radix_tree_delete(&nm_i->free_nid_root, i->nid);
 }
 
-static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
+static bool add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
 {
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
 	struct free_nid *i;
@@ -1774,14 +1774,14 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
 
 	/* 0 nid should not be used */
 	if (unlikely(nid == 0))
-		return 0;
+		return false;
 
 	if (build) {
 		/* do not add allocated nids */
 		ne = __lookup_nat_cache(nm_i, nid);
 		if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) ||
 				nat_get_blkaddr(ne) != NULL_ADDR))
-			return 0;
+			return false;
 	}
 
 	i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS);
@@ -1790,7 +1790,7 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
 
 	if (radix_tree_preload(GFP_NOFS)) {
 		kmem_cache_free(free_nid_slab, i);
-		return 0;
+		return false;
 	}
 
 	spin_lock(&nm_i->nid_list_lock);
@@ -1799,9 +1799,9 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
 	radix_tree_preload_end();
 	if (err) {
 		kmem_cache_free(free_nid_slab, i);
-		return 0;
+		return false;
 	}
-	return 1;
+	return true;
 }
 
 static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid)
@@ -1851,6 +1851,7 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
 	i = start_nid % NAT_ENTRY_PER_BLOCK;
 
 	for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) {
+		bool freed = false;
 
 		if (unlikely(start_nid >= nm_i->max_nid))
 			break;
@@ -1858,8 +1859,8 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
 		blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr);
 		f2fs_bug_on(sbi, blk_addr == NEW_ADDR);
 		if (blk_addr == NULL_ADDR)
-			add_free_nid(sbi, start_nid, true);
-		update_free_nid_bitmap(sbi, start_nid, blk_addr == NULL_ADDR);
+			freed = add_free_nid(sbi, start_nid, true);
+		update_free_nid_bitmap(sbi, start_nid, freed);
 	}
 }
 
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH V2] f2fs: introduce free nid bitmap
  2017-02-25 19:02   ` Jaegeuk Kim
@ 2017-02-27  3:11     ` Chao Yu
  2017-02-27 22:21       ` Jaegeuk Kim
  0 siblings, 1 reply; 7+ messages in thread
From: Chao Yu @ 2017-02-27  3:11 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: linux-f2fs-devel, linux-kernel, chao

On 2017/2/26 3:02, Jaegeuk Kim wrote:
> On 02/25, Chao Yu wrote:
>> Hi Jaegeuk,
>>
>> I added below diff code into this patch in order to fix incorrectly nid status
>> updating to reduce large latency of generic/251 in fstest, could you help to
>> review code below?
> 
> Understand the problem, and how about this?
> 
> diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
> index 52db02396878..83b305689913 100644
> --- a/fs/f2fs/node.c
> +++ b/fs/f2fs/node.c
> @@ -1765,7 +1765,7 @@ static void __remove_nid_from_list(struct f2fs_sb_info *sbi,
>  		radix_tree_delete(&nm_i->free_nid_root, i->nid);
>  }
>  
> -static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
> +static bool add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
>  {
>  	struct f2fs_nm_info *nm_i = NM_I(sbi);
>  	struct free_nid *i;
> @@ -1774,14 +1774,14 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
>  
>  	/* 0 nid should not be used */
>  	if (unlikely(nid == 0))
> -		return 0;
> +		return false;
>  
>  	if (build) {
>  		/* do not add allocated nids */
>  		ne = __lookup_nat_cache(nm_i, nid);
>  		if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) ||
>  				nat_get_blkaddr(ne) != NULL_ADDR))
> -			return 0;
> +			return false;
>  	}
>  
>  	i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS);
> @@ -1790,7 +1790,7 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
>  
>  	if (radix_tree_preload(GFP_NOFS)) {
>  		kmem_cache_free(free_nid_slab, i);
> -		return 0;
> +		return false;

If there is no memory, actually current free nid is still available, we need to
return true (or other value -1?), then caller can set free_nid_bitmap correctly.

>  	}
>  
>  	spin_lock(&nm_i->nid_list_lock);
> @@ -1799,9 +1799,9 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
>  	radix_tree_preload_end();
>  	if (err) {
>  		kmem_cache_free(free_nid_slab, i);
> -		return 0;
> +		return false;

ditto.

Thanks,

>  	}
> -	return 1;
> +	return true;
>  }
>  
>  static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid)
> @@ -1851,6 +1851,7 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
>  	i = start_nid % NAT_ENTRY_PER_BLOCK;
>  
>  	for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) {
> +		bool freed = false;
>  
>  		if (unlikely(start_nid >= nm_i->max_nid))
>  			break;
> @@ -1858,8 +1859,8 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
>  		blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr);
>  		f2fs_bug_on(sbi, blk_addr == NEW_ADDR);
>  		if (blk_addr == NULL_ADDR)
> -			add_free_nid(sbi, start_nid, true);
> -		update_free_nid_bitmap(sbi, start_nid, blk_addr == NULL_ADDR);
> +			freed = add_free_nid(sbi, start_nid, true);
> +		update_free_nid_bitmap(sbi, start_nid, freed);
>  	}
>  }
>  
> 

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH V2] f2fs: introduce free nid bitmap
  2017-02-27  3:11     ` Chao Yu
@ 2017-02-27 22:21       ` Jaegeuk Kim
  0 siblings, 0 replies; 7+ messages in thread
From: Jaegeuk Kim @ 2017-02-27 22:21 UTC (permalink / raw)
  To: Chao Yu; +Cc: linux-f2fs-devel, linux-kernel, chao

On 02/27, Chao Yu wrote:
> On 2017/2/26 3:02, Jaegeuk Kim wrote:
> > On 02/25, Chao Yu wrote:
> >> Hi Jaegeuk,
> >>
> >> I added below diff code into this patch in order to fix incorrectly nid status
> >> updating to reduce large latency of generic/251 in fstest, could you help to
> >> review code below?
> > 
> > Understand the problem, and how about this?
> > 
> > diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
> > index 52db02396878..83b305689913 100644
> > --- a/fs/f2fs/node.c
> > +++ b/fs/f2fs/node.c
> > @@ -1765,7 +1765,7 @@ static void __remove_nid_from_list(struct f2fs_sb_info *sbi,
> >  		radix_tree_delete(&nm_i->free_nid_root, i->nid);
> >  }
> >  
> > -static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
> > +static bool add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
> >  {
> >  	struct f2fs_nm_info *nm_i = NM_I(sbi);
> >  	struct free_nid *i;
> > @@ -1774,14 +1774,14 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
> >  
> >  	/* 0 nid should not be used */
> >  	if (unlikely(nid == 0))
> > -		return 0;
> > +		return false;
> >  
> >  	if (build) {
> >  		/* do not add allocated nids */
> >  		ne = __lookup_nat_cache(nm_i, nid);
> >  		if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) ||
> >  				nat_get_blkaddr(ne) != NULL_ADDR))
> > -			return 0;
> > +			return false;
> >  	}
> >  
> >  	i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS);
> > @@ -1790,7 +1790,7 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
> >  
> >  	if (radix_tree_preload(GFP_NOFS)) {
> >  		kmem_cache_free(free_nid_slab, i);
> > -		return 0;
> > +		return false;
> 
> If there is no memory, actually current free nid is still available, we need to
> return true (or other value -1?), then caller can set free_nid_bitmap correctly.
> 
> >  	}
> >  
> >  	spin_lock(&nm_i->nid_list_lock);
> > @@ -1799,9 +1799,9 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
> >  	radix_tree_preload_end();
> >  	if (err) {
> >  		kmem_cache_free(free_nid_slab, i);
> > -		return 0;
> > +		return false;
> 
> ditto.

Yup, applied.

Thanks,

> 
> Thanks,
> 
> >  	}
> > -	return 1;
> > +	return true;
> >  }
> >  
> >  static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid)
> > @@ -1851,6 +1851,7 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
> >  	i = start_nid % NAT_ENTRY_PER_BLOCK;
> >  
> >  	for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) {
> > +		bool freed = false;
> >  
> >  		if (unlikely(start_nid >= nm_i->max_nid))
> >  			break;
> > @@ -1858,8 +1859,8 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
> >  		blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr);
> >  		f2fs_bug_on(sbi, blk_addr == NEW_ADDR);
> >  		if (blk_addr == NULL_ADDR)
> > -			add_free_nid(sbi, start_nid, true);
> > -		update_free_nid_bitmap(sbi, start_nid, blk_addr == NULL_ADDR);
> > +			freed = add_free_nid(sbi, start_nid, true);
> > +		update_free_nid_bitmap(sbi, start_nid, freed);
> >  	}
> >  }
> >  
> > 

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH V2] f2fs: introduce free nid bitmap
  2017-02-23  2:53 [PATCH V2] f2fs: introduce free nid bitmap Chao Yu
  2017-02-25  6:25 ` Chao Yu
@ 2017-03-01 19:35 ` Jaegeuk Kim
  2017-03-02  1:30   ` Chao Yu
  1 sibling, 1 reply; 7+ messages in thread
From: Jaegeuk Kim @ 2017-03-01 19:35 UTC (permalink / raw)
  To: Chao Yu; +Cc: linux-f2fs-devel, linux-kernel, chao

Hi Chao,

BTW, we need to add a shrinker for this as well, right?

Thanks,

On 02/23, Chao Yu wrote:
> In scenario of intensively node allocation, free nids will be ran out
> soon, then it needs to stop to load free nids by traversing NAT blocks,
> in worse case, if NAT blocks does not be cached in memory, it generates
> IOs which slows down our foreground operations.
> 
> In order to speed up node allocation, in this patch we introduce a new
> free_nid_bitmap array, so there is an bitmap table for each NAT block,
> Once the NAT block is loaded, related bitmap cache will be switched on,
> and bitmap will be set during traversing nat entries in NAT block, later
> we can query and update nid usage status in memory completely.
> 
> With such implementation, I expect performance of node allocation can be
> improved in the long-term after filesystem image is mounted.
> 
> Signed-off-by: Chao Yu <yuchao0@huawei.com>
> ---
>  fs/f2fs/debug.c         |   2 +
>  fs/f2fs/f2fs.h          |   2 +
>  fs/f2fs/node.c          | 109 ++++++++++++++++++++++++++++++++++++++++++++++--
>  include/linux/f2fs_fs.h |   1 +
>  4 files changed, 111 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
> index 015ad2b73a92..a77df377e2e8 100644
> --- a/fs/f2fs/debug.c
> +++ b/fs/f2fs/debug.c
> @@ -194,6 +194,8 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
>  	si->base_mem += sizeof(struct f2fs_nm_info);
>  	si->base_mem += __bitmap_size(sbi, NAT_BITMAP);
>  	si->base_mem += (NM_I(sbi)->nat_bits_blocks << F2FS_BLKSIZE_BITS);
> +	si->base_mem += NM_I(sbi)->nat_blocks * NAT_ENTRY_BITMAP_SIZE;
> +	si->base_mem += NM_I(sbi)->nat_blocks / 8;
>  
>  get_cache:
>  	si->cache_mem = 0;
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 1c9f0cc8f027..6a48e649bfef 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -556,6 +556,8 @@ struct f2fs_nm_info {
>  	unsigned int nid_cnt[MAX_NID_LIST];	/* the number of free node id */
>  	spinlock_t nid_list_lock;	/* protect nid lists ops */
>  	struct mutex build_lock;	/* lock for build free nids */
> +	unsigned char (*free_nid_bitmap)[NAT_ENTRY_BITMAP_SIZE];
> +	unsigned char *nat_block_bitmap;
>  
>  	/* for checkpoint */
>  	char *nat_bitmap;		/* NAT bitmap pointer */
> diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
> index b63bdb85ad66..f0f036c2d2fc 100644
> --- a/fs/f2fs/node.c
> +++ b/fs/f2fs/node.c
> @@ -1821,14 +1821,32 @@ static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid)
>  		kmem_cache_free(free_nid_slab, i);
>  }
>  
> +void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid, bool set)
> +{
> +	struct f2fs_nm_info *nm_i = NM_I(sbi);
> +	unsigned int nat_ofs = NAT_BLOCK_OFFSET(nid);
> +	unsigned int nid_ofs = nid - START_NID(nid);
> +
> +	if (!test_bit_le(nat_ofs, nm_i->nat_block_bitmap))
> +		return;
> +
> +	if (set)
> +		set_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
> +	else
> +		clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
> +}
> +
>  static void scan_nat_page(struct f2fs_sb_info *sbi,
>  			struct page *nat_page, nid_t start_nid)
>  {
>  	struct f2fs_nm_info *nm_i = NM_I(sbi);
>  	struct f2fs_nat_block *nat_blk = page_address(nat_page);
>  	block_t blk_addr;
> +	unsigned int nat_ofs = NAT_BLOCK_OFFSET(start_nid);
>  	int i;
>  
> +	set_bit_le(nat_ofs, nm_i->nat_block_bitmap);
> +
>  	i = start_nid % NAT_ENTRY_PER_BLOCK;
>  
>  	for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) {
> @@ -1840,7 +1858,51 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
>  		f2fs_bug_on(sbi, blk_addr == NEW_ADDR);
>  		if (blk_addr == NULL_ADDR)
>  			add_free_nid(sbi, start_nid, true);
> +		update_free_nid_bitmap(sbi, start_nid, blk_addr == NULL_ADDR);
> +	}
> +}
> +
> +static void scan_free_nid_bits(struct f2fs_sb_info *sbi)
> +{
> +	struct f2fs_nm_info *nm_i = NM_I(sbi);
> +	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
> +	struct f2fs_journal *journal = curseg->journal;
> +	unsigned int i, idx;
> +	unsigned int target = FREE_NID_PAGES * NAT_ENTRY_PER_BLOCK;
> +
> +	down_read(&nm_i->nat_tree_lock);
> +
> +	for (i = 0; i < nm_i->nat_blocks; i++) {
> +		if (!test_bit_le(i, nm_i->nat_block_bitmap))
> +			continue;
> +		for (idx = 0; idx < NAT_ENTRY_PER_BLOCK; idx++) {
> +			nid_t nid;
> +
> +			if (!test_bit_le(idx, nm_i->free_nid_bitmap[i]))
> +				continue;
> +
> +			nid = i * NAT_ENTRY_PER_BLOCK + idx;
> +			add_free_nid(sbi, nid, true);
> +
> +			if (nm_i->nid_cnt[FREE_NID_LIST] >= target)
> +				goto out;
> +		}
> +	}
> +out:
> +	down_read(&curseg->journal_rwsem);
> +	for (i = 0; i < nats_in_cursum(journal); i++) {
> +		block_t addr;
> +		nid_t nid;
> +
> +		addr = le32_to_cpu(nat_in_journal(journal, i).block_addr);
> +		nid = le32_to_cpu(nid_in_journal(journal, i));
> +		if (addr == NULL_ADDR)
> +			add_free_nid(sbi, nid, true);
> +		else
> +			remove_free_nid(sbi, nid);
>  	}
> +	up_read(&curseg->journal_rwsem);
> +	up_read(&nm_i->nat_tree_lock);
>  }
>  
>  static int scan_nat_bits(struct f2fs_sb_info *sbi)
> @@ -1910,9 +1972,17 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
>  	if (!sync && !available_free_memory(sbi, FREE_NIDS))
>  		return;
>  
> -	/* try to find free nids with nat_bits */
> -	if (!mount && !scan_nat_bits(sbi) && nm_i->nid_cnt[FREE_NID_LIST])
> -		return;
> +	if (!mount) {
> +		/* try to find free nids in free_nid_bitmap */
> +		scan_free_nid_bits(sbi);
> +
> +		if (nm_i->nid_cnt[FREE_NID_LIST])
> +			return;
> +
> +		/* try to find free nids with nat_bits */
> +		if (!scan_nat_bits(sbi) && nm_i->nid_cnt[FREE_NID_LIST])
> +			return;
> +	}
>  
>  	/* find next valid candidate */
>  	if (is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) {
> @@ -2005,6 +2075,9 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
>  		i->state = NID_ALLOC;
>  		__insert_nid_to_list(sbi, i, ALLOC_NID_LIST, false);
>  		nm_i->available_nids--;
> +
> +		update_free_nid_bitmap(sbi, *nid, false);
> +
>  		spin_unlock(&nm_i->nid_list_lock);
>  		return true;
>  	}
> @@ -2059,6 +2132,8 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
>  
>  	nm_i->available_nids++;
>  
> +	update_free_nid_bitmap(sbi, nid, true);
> +
>  	spin_unlock(&nm_i->nid_list_lock);
>  
>  	if (need_free)
> @@ -2387,6 +2462,11 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
>  			add_free_nid(sbi, nid, false);
>  			spin_lock(&NM_I(sbi)->nid_list_lock);
>  			NM_I(sbi)->available_nids++;
> +			update_free_nid_bitmap(sbi, nid, true);
> +			spin_unlock(&NM_I(sbi)->nid_list_lock);
> +		} else {
> +			spin_lock(&NM_I(sbi)->nid_list_lock);
> +			update_free_nid_bitmap(sbi, nid, false);
>  			spin_unlock(&NM_I(sbi)->nid_list_lock);
>  		}
>  	}
> @@ -2556,6 +2636,22 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
>  	return 0;
>  }
>  
> +int init_free_nid_cache(struct f2fs_sb_info *sbi)
> +{
> +	struct f2fs_nm_info *nm_i = NM_I(sbi);
> +
> +	nm_i->free_nid_bitmap = f2fs_kvzalloc(nm_i->nat_blocks *
> +					NAT_ENTRY_BITMAP_SIZE, GFP_KERNEL);
> +	if (!nm_i->free_nid_bitmap)
> +		return -ENOMEM;
> +
> +	nm_i->nat_block_bitmap = f2fs_kvzalloc(nm_i->nat_blocks / 8,
> +								GFP_KERNEL);
> +	if (!nm_i->nat_block_bitmap)
> +		return -ENOMEM;
> +	return 0;
> +}
> +
>  int build_node_manager(struct f2fs_sb_info *sbi)
>  {
>  	int err;
> @@ -2568,6 +2664,10 @@ int build_node_manager(struct f2fs_sb_info *sbi)
>  	if (err)
>  		return err;
>  
> +	err = init_free_nid_cache(sbi);
> +	if (err)
> +		return err;
> +
>  	build_free_nids(sbi, true, true);
>  	return 0;
>  }
> @@ -2626,6 +2726,9 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
>  	}
>  	up_write(&nm_i->nat_tree_lock);
>  
> +	kvfree(nm_i->nat_block_bitmap);
> +	kvfree(nm_i->free_nid_bitmap);
> +
>  	kfree(nm_i->nat_bitmap);
>  	kfree(nm_i->nat_bits);
>  #ifdef CONFIG_F2FS_CHECK_FS
> diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
> index 1c92ace2e8f8..e2d239ed4c60 100644
> --- a/include/linux/f2fs_fs.h
> +++ b/include/linux/f2fs_fs.h
> @@ -279,6 +279,7 @@ struct f2fs_node {
>   * For NAT entries
>   */
>  #define NAT_ENTRY_PER_BLOCK (PAGE_SIZE / sizeof(struct f2fs_nat_entry))
> +#define NAT_ENTRY_BITMAP_SIZE	((NAT_ENTRY_PER_BLOCK + 7) / 8)
>  
>  struct f2fs_nat_entry {
>  	__u8 version;		/* latest version of cached nat entry */
> -- 
> 2.8.2.295.g3f1c1d0

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH V2] f2fs: introduce free nid bitmap
  2017-03-01 19:35 ` Jaegeuk Kim
@ 2017-03-02  1:30   ` Chao Yu
  0 siblings, 0 replies; 7+ messages in thread
From: Chao Yu @ 2017-03-02  1:30 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: linux-f2fs-devel, linux-kernel, chao

Hi Jaegeuk,

On 2017/3/2 3:35, Jaegeuk Kim wrote:
> Hi Chao,
> 
> BTW, we need to add a shrinker for this as well, right?

Hmm...do we really have to?

As at most we will have 3900 NAT blocks, so
a. free_nid_bitmap:	3900 * 57 = ~217 KB
b. nat_block_bitmap:	3900 / 8 = ~487 BYTE
c. free_nid_count:	3900 * 2 = ~7 KB

Thanks,

> 
> Thanks,
> 
> On 02/23, Chao Yu wrote:
>> In scenario of intensively node allocation, free nids will be ran out
>> soon, then it needs to stop to load free nids by traversing NAT blocks,
>> in worse case, if NAT blocks does not be cached in memory, it generates
>> IOs which slows down our foreground operations.
>>
>> In order to speed up node allocation, in this patch we introduce a new
>> free_nid_bitmap array, so there is an bitmap table for each NAT block,
>> Once the NAT block is loaded, related bitmap cache will be switched on,
>> and bitmap will be set during traversing nat entries in NAT block, later
>> we can query and update nid usage status in memory completely.
>>
>> With such implementation, I expect performance of node allocation can be
>> improved in the long-term after filesystem image is mounted.
>>
>> Signed-off-by: Chao Yu <yuchao0@huawei.com>
>> ---
>>  fs/f2fs/debug.c         |   2 +
>>  fs/f2fs/f2fs.h          |   2 +
>>  fs/f2fs/node.c          | 109 ++++++++++++++++++++++++++++++++++++++++++++++--
>>  include/linux/f2fs_fs.h |   1 +
>>  4 files changed, 111 insertions(+), 3 deletions(-)
>>
>> diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
>> index 015ad2b73a92..a77df377e2e8 100644
>> --- a/fs/f2fs/debug.c
>> +++ b/fs/f2fs/debug.c
>> @@ -194,6 +194,8 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
>>  	si->base_mem += sizeof(struct f2fs_nm_info);
>>  	si->base_mem += __bitmap_size(sbi, NAT_BITMAP);
>>  	si->base_mem += (NM_I(sbi)->nat_bits_blocks << F2FS_BLKSIZE_BITS);
>> +	si->base_mem += NM_I(sbi)->nat_blocks * NAT_ENTRY_BITMAP_SIZE;
>> +	si->base_mem += NM_I(sbi)->nat_blocks / 8;
>>  
>>  get_cache:
>>  	si->cache_mem = 0;
>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>> index 1c9f0cc8f027..6a48e649bfef 100644
>> --- a/fs/f2fs/f2fs.h
>> +++ b/fs/f2fs/f2fs.h
>> @@ -556,6 +556,8 @@ struct f2fs_nm_info {
>>  	unsigned int nid_cnt[MAX_NID_LIST];	/* the number of free node id */
>>  	spinlock_t nid_list_lock;	/* protect nid lists ops */
>>  	struct mutex build_lock;	/* lock for build free nids */
>> +	unsigned char (*free_nid_bitmap)[NAT_ENTRY_BITMAP_SIZE];
>> +	unsigned char *nat_block_bitmap;
>>  
>>  	/* for checkpoint */
>>  	char *nat_bitmap;		/* NAT bitmap pointer */
>> diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
>> index b63bdb85ad66..f0f036c2d2fc 100644
>> --- a/fs/f2fs/node.c
>> +++ b/fs/f2fs/node.c
>> @@ -1821,14 +1821,32 @@ static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid)
>>  		kmem_cache_free(free_nid_slab, i);
>>  }
>>  
>> +void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid, bool set)
>> +{
>> +	struct f2fs_nm_info *nm_i = NM_I(sbi);
>> +	unsigned int nat_ofs = NAT_BLOCK_OFFSET(nid);
>> +	unsigned int nid_ofs = nid - START_NID(nid);
>> +
>> +	if (!test_bit_le(nat_ofs, nm_i->nat_block_bitmap))
>> +		return;
>> +
>> +	if (set)
>> +		set_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
>> +	else
>> +		clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
>> +}
>> +
>>  static void scan_nat_page(struct f2fs_sb_info *sbi,
>>  			struct page *nat_page, nid_t start_nid)
>>  {
>>  	struct f2fs_nm_info *nm_i = NM_I(sbi);
>>  	struct f2fs_nat_block *nat_blk = page_address(nat_page);
>>  	block_t blk_addr;
>> +	unsigned int nat_ofs = NAT_BLOCK_OFFSET(start_nid);
>>  	int i;
>>  
>> +	set_bit_le(nat_ofs, nm_i->nat_block_bitmap);
>> +
>>  	i = start_nid % NAT_ENTRY_PER_BLOCK;
>>  
>>  	for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) {
>> @@ -1840,7 +1858,51 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
>>  		f2fs_bug_on(sbi, blk_addr == NEW_ADDR);
>>  		if (blk_addr == NULL_ADDR)
>>  			add_free_nid(sbi, start_nid, true);
>> +		update_free_nid_bitmap(sbi, start_nid, blk_addr == NULL_ADDR);
>> +	}
>> +}
>> +
>> +static void scan_free_nid_bits(struct f2fs_sb_info *sbi)
>> +{
>> +	struct f2fs_nm_info *nm_i = NM_I(sbi);
>> +	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
>> +	struct f2fs_journal *journal = curseg->journal;
>> +	unsigned int i, idx;
>> +	unsigned int target = FREE_NID_PAGES * NAT_ENTRY_PER_BLOCK;
>> +
>> +	down_read(&nm_i->nat_tree_lock);
>> +
>> +	for (i = 0; i < nm_i->nat_blocks; i++) {
>> +		if (!test_bit_le(i, nm_i->nat_block_bitmap))
>> +			continue;
>> +		for (idx = 0; idx < NAT_ENTRY_PER_BLOCK; idx++) {
>> +			nid_t nid;
>> +
>> +			if (!test_bit_le(idx, nm_i->free_nid_bitmap[i]))
>> +				continue;
>> +
>> +			nid = i * NAT_ENTRY_PER_BLOCK + idx;
>> +			add_free_nid(sbi, nid, true);
>> +
>> +			if (nm_i->nid_cnt[FREE_NID_LIST] >= target)
>> +				goto out;
>> +		}
>> +	}
>> +out:
>> +	down_read(&curseg->journal_rwsem);
>> +	for (i = 0; i < nats_in_cursum(journal); i++) {
>> +		block_t addr;
>> +		nid_t nid;
>> +
>> +		addr = le32_to_cpu(nat_in_journal(journal, i).block_addr);
>> +		nid = le32_to_cpu(nid_in_journal(journal, i));
>> +		if (addr == NULL_ADDR)
>> +			add_free_nid(sbi, nid, true);
>> +		else
>> +			remove_free_nid(sbi, nid);
>>  	}
>> +	up_read(&curseg->journal_rwsem);
>> +	up_read(&nm_i->nat_tree_lock);
>>  }
>>  
>>  static int scan_nat_bits(struct f2fs_sb_info *sbi)
>> @@ -1910,9 +1972,17 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
>>  	if (!sync && !available_free_memory(sbi, FREE_NIDS))
>>  		return;
>>  
>> -	/* try to find free nids with nat_bits */
>> -	if (!mount && !scan_nat_bits(sbi) && nm_i->nid_cnt[FREE_NID_LIST])
>> -		return;
>> +	if (!mount) {
>> +		/* try to find free nids in free_nid_bitmap */
>> +		scan_free_nid_bits(sbi);
>> +
>> +		if (nm_i->nid_cnt[FREE_NID_LIST])
>> +			return;
>> +
>> +		/* try to find free nids with nat_bits */
>> +		if (!scan_nat_bits(sbi) && nm_i->nid_cnt[FREE_NID_LIST])
>> +			return;
>> +	}
>>  
>>  	/* find next valid candidate */
>>  	if (is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) {
>> @@ -2005,6 +2075,9 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
>>  		i->state = NID_ALLOC;
>>  		__insert_nid_to_list(sbi, i, ALLOC_NID_LIST, false);
>>  		nm_i->available_nids--;
>> +
>> +		update_free_nid_bitmap(sbi, *nid, false);
>> +
>>  		spin_unlock(&nm_i->nid_list_lock);
>>  		return true;
>>  	}
>> @@ -2059,6 +2132,8 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
>>  
>>  	nm_i->available_nids++;
>>  
>> +	update_free_nid_bitmap(sbi, nid, true);
>> +
>>  	spin_unlock(&nm_i->nid_list_lock);
>>  
>>  	if (need_free)
>> @@ -2387,6 +2462,11 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
>>  			add_free_nid(sbi, nid, false);
>>  			spin_lock(&NM_I(sbi)->nid_list_lock);
>>  			NM_I(sbi)->available_nids++;
>> +			update_free_nid_bitmap(sbi, nid, true);
>> +			spin_unlock(&NM_I(sbi)->nid_list_lock);
>> +		} else {
>> +			spin_lock(&NM_I(sbi)->nid_list_lock);
>> +			update_free_nid_bitmap(sbi, nid, false);
>>  			spin_unlock(&NM_I(sbi)->nid_list_lock);
>>  		}
>>  	}
>> @@ -2556,6 +2636,22 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
>>  	return 0;
>>  }
>>  
>> +int init_free_nid_cache(struct f2fs_sb_info *sbi)
>> +{
>> +	struct f2fs_nm_info *nm_i = NM_I(sbi);
>> +
>> +	nm_i->free_nid_bitmap = f2fs_kvzalloc(nm_i->nat_blocks *
>> +					NAT_ENTRY_BITMAP_SIZE, GFP_KERNEL);
>> +	if (!nm_i->free_nid_bitmap)
>> +		return -ENOMEM;
>> +
>> +	nm_i->nat_block_bitmap = f2fs_kvzalloc(nm_i->nat_blocks / 8,
>> +								GFP_KERNEL);
>> +	if (!nm_i->nat_block_bitmap)
>> +		return -ENOMEM;
>> +	return 0;
>> +}
>> +
>>  int build_node_manager(struct f2fs_sb_info *sbi)
>>  {
>>  	int err;
>> @@ -2568,6 +2664,10 @@ int build_node_manager(struct f2fs_sb_info *sbi)
>>  	if (err)
>>  		return err;
>>  
>> +	err = init_free_nid_cache(sbi);
>> +	if (err)
>> +		return err;
>> +
>>  	build_free_nids(sbi, true, true);
>>  	return 0;
>>  }
>> @@ -2626,6 +2726,9 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
>>  	}
>>  	up_write(&nm_i->nat_tree_lock);
>>  
>> +	kvfree(nm_i->nat_block_bitmap);
>> +	kvfree(nm_i->free_nid_bitmap);
>> +
>>  	kfree(nm_i->nat_bitmap);
>>  	kfree(nm_i->nat_bits);
>>  #ifdef CONFIG_F2FS_CHECK_FS
>> diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
>> index 1c92ace2e8f8..e2d239ed4c60 100644
>> --- a/include/linux/f2fs_fs.h
>> +++ b/include/linux/f2fs_fs.h
>> @@ -279,6 +279,7 @@ struct f2fs_node {
>>   * For NAT entries
>>   */
>>  #define NAT_ENTRY_PER_BLOCK (PAGE_SIZE / sizeof(struct f2fs_nat_entry))
>> +#define NAT_ENTRY_BITMAP_SIZE	((NAT_ENTRY_PER_BLOCK + 7) / 8)
>>  
>>  struct f2fs_nat_entry {
>>  	__u8 version;		/* latest version of cached nat entry */
>> -- 
>> 2.8.2.295.g3f1c1d0
> 
> .
> 

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2017-03-02  1:41 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-02-23  2:53 [PATCH V2] f2fs: introduce free nid bitmap Chao Yu
2017-02-25  6:25 ` Chao Yu
2017-02-25 19:02   ` Jaegeuk Kim
2017-02-27  3:11     ` Chao Yu
2017-02-27 22:21       ` Jaegeuk Kim
2017-03-01 19:35 ` Jaegeuk Kim
2017-03-02  1:30   ` Chao Yu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).