All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] f2fs: introduce nid cache
@ 2017-01-22  9:53 ` Chao Yu
  0 siblings, 0 replies; 15+ messages in thread
From: Chao Yu @ 2017-01-22  9:53 UTC (permalink / raw)
  To: jaegeuk; +Cc: linux-f2fs-devel, linux-kernel, chao, Chao Yu

In scenario of intensively node allocation, free nids will be ran out
soon, then it needs to stop to load free nids by traversing NAT blocks,
in worse case, if NAT blocks does not be cached in memory, it generates
IOs which slows down our foreground operations.

In order to speed up node allocation, in this patch we introduce a new
option named "nid cache", when turns on this option, it will load all
nat entries in NAT blocks when doing mount, and organize all free nids
in a bitmap, for any operations related to free nid, we will query and
set the new prebuilded bitmap instead of reading and lookuping NAT
blocks, so performance of node allocation can be improved.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
---
 Documentation/filesystems/f2fs.txt |   3 ++
 fs/f2fs/f2fs.h                     |   2 +
 fs/f2fs/node.c                     | 107 +++++++++++++++++++++++++++++++++++++
 fs/f2fs/super.c                    |   5 ++
 4 files changed, 117 insertions(+)

diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt
index d99faced79cb..3320a4976c12 100644
--- a/Documentation/filesystems/f2fs.txt
+++ b/Documentation/filesystems/f2fs.txt
@@ -159,6 +159,9 @@ mode=%s                Control block allocation mode which supports "adaptive"
                        writes towards main area.
 io_bits=%u             Set the bit size of write IO requests. It should be set
                        with "mode=lfs".
+nid_cache              Enable free nid bitmap cache which records all nid usage
+                       status, it can improve performance of continuously node
+                       allocating.
 
 ================================================================================
 DEBUGFS ENTRIES
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 620f6be060a5..c696b5eee1f0 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -83,6 +83,7 @@ extern char *fault_name[FAULT_MAX];
 #define F2FS_MOUNT_FAULT_INJECTION	0x00010000
 #define F2FS_MOUNT_ADAPTIVE		0x00020000
 #define F2FS_MOUNT_LFS			0x00040000
+#define F2FS_MOUNT_NID_CACHE		0x00080000
 
 #define clear_opt(sbi, option)	(sbi->mount_opt.opt &= ~F2FS_MOUNT_##option)
 #define set_opt(sbi, option)	(sbi->mount_opt.opt |= F2FS_MOUNT_##option)
@@ -551,6 +552,7 @@ struct f2fs_nm_info {
 	unsigned int nid_cnt[MAX_NID_LIST];	/* the number of free node id */
 	spinlock_t nid_list_lock;	/* protect nid lists ops */
 	struct mutex build_lock;	/* lock for build free nids */
+	unsigned long *free_nid_bitmap;	/* indicate nid is free or not */
 
 	/* for checkpoint */
 	char *nat_bitmap;		/* NAT bitmap pointer */
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 69c38a0022e7..5cdf05919e9f 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1819,6 +1819,24 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
 	}
 }
 
+void load_nids_from_bitmap(struct f2fs_sb_info *sbi)
+{
+	struct f2fs_nm_info *nm_i = NM_I(sbi);
+	nid_t nid = 0;
+	unsigned int count = 0;
+
+	down_read(&nm_i->nat_tree_lock);
+	do {
+		nid = find_next_bit(nm_i->free_nid_bitmap, nm_i->max_nid, nid);
+		if (nid < nm_i->max_nid)
+			count += add_free_nid(sbi, nid, true);
+
+		if (nm_i->nid_cnt[FREE_NID_LIST] >= NAT_ENTRY_PER_BLOCK)
+			break;
+	} while (++nid < nm_i->max_nid);
+	up_read(&nm_i->nat_tree_lock);
+}
+
 static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
 {
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
@@ -1834,6 +1852,9 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
 	if (!sync && !available_free_memory(sbi, FREE_NIDS))
 		return;
 
+	if (test_opt(sbi, NID_CACHE))
+		return load_nids_from_bitmap(sbi);
+
 	/* readahead nat pages to be scanned */
 	ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
 							META_NAT, true);
@@ -1915,6 +1936,11 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
 		i->state = NID_ALLOC;
 		__insert_nid_to_list(sbi, i, ALLOC_NID_LIST, false);
 		nm_i->available_nids--;
+
+		if (test_opt(sbi, NID_CACHE)) {
+			f2fs_bug_on(sbi, !test_bit(*nid, nm_i->free_nid_bitmap));
+			clear_bit(*nid, nm_i->free_nid_bitmap);
+		}
 		spin_unlock(&nm_i->nid_list_lock);
 		return true;
 	}
@@ -1969,6 +1995,9 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
 
 	nm_i->available_nids++;
 
+	if (test_opt(sbi, NID_CACHE))
+		set_bit(nid, nm_i->free_nid_bitmap);
+
 	spin_unlock(&nm_i->nid_list_lock);
 
 	if (need_free)
@@ -2257,6 +2286,13 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
 			add_free_nid(sbi, nid, false);
 			spin_lock(&NM_I(sbi)->nid_list_lock);
 			NM_I(sbi)->available_nids++;
+			if (test_opt(sbi, NID_CACHE))
+				set_bit(nid, NM_I(sbi)->free_nid_bitmap);
+			spin_unlock(&NM_I(sbi)->nid_list_lock);
+		} else {
+			spin_lock(&NM_I(sbi)->nid_list_lock);
+			if (test_opt(sbi, NID_CACHE))
+				clear_bit(nid, NM_I(sbi)->free_nid_bitmap);
 			spin_unlock(&NM_I(sbi)->nid_list_lock);
 		}
 	}
@@ -2374,6 +2410,71 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
 	return 0;
 }
 
+int init_free_nid_cache(struct f2fs_sb_info *sbi)
+{
+	struct f2fs_nm_info *nm_i = NM_I(sbi);
+	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
+	struct f2fs_journal *journal = curseg->journal;
+	struct f2fs_nat_entry *nat;
+	unsigned int i, readed, start_blk = 0, end_blk;
+	unsigned int max_blk = NAT_BLOCK_OFFSET(NM_I(sbi)->max_nid);
+	nid_t nid = 0;
+
+	if (!test_opt(sbi, NID_CACHE))
+		return 0;
+
+	nm_i->free_nid_bitmap = f2fs_kvzalloc(nm_i->max_nid / 8, GFP_KERNEL);
+	if (!nm_i->free_nid_bitmap)
+		return -ENOMEM;
+
+	do {
+		readed = ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid),
+					FREE_NID_PAGES * 8, META_NAT, true);
+
+		end_blk = start_blk + readed;
+
+		for (; start_blk < end_blk; start_blk++) {
+			struct f2fs_nat_block *nat_blk;
+			struct page *page;
+			nid_t start_nid = START_NID(nid);
+
+			page = get_current_nat_page(sbi, nid);
+			nat_blk = (struct f2fs_nat_block *)page_address(page);
+
+			for (i = 0; i < NAT_ENTRY_PER_BLOCK; i++, nid++) {
+				if (unlikely(nid >= nm_i->max_nid))
+					break;
+
+				if (unlikely(nid == 0))
+					continue;
+
+				nat = &nat_blk->entries[nid - start_nid];
+				if (le32_to_cpu(nat->block_addr) == NULL_ADDR)
+					set_bit(nid, nm_i->free_nid_bitmap);
+				else
+					clear_bit(nid, nm_i->free_nid_bitmap);
+			}
+
+			f2fs_put_page(page, 1);
+
+		}
+	} while (start_blk < max_blk);
+
+	down_read(&curseg->journal_rwsem);
+	for (i = 0; i < nats_in_cursum(journal); i++) {
+		nid_t nid = le32_to_cpu(nid_in_journal(journal, i));
+
+		nat = &nat_in_journal(journal, i);
+		if (le32_to_cpu(nat->block_addr) == NULL_ADDR)
+			set_bit(nid, nm_i->free_nid_bitmap);
+		else
+			clear_bit(nid, nm_i->free_nid_bitmap);
+	}
+	up_read(&curseg->journal_rwsem);
+
+	return 0;
+}
+
 int build_node_manager(struct f2fs_sb_info *sbi)
 {
 	int err;
@@ -2386,6 +2487,10 @@ int build_node_manager(struct f2fs_sb_info *sbi)
 	if (err)
 		return err;
 
+	err = init_free_nid_cache(sbi);
+	if (err)
+		return err;
+
 	build_free_nids(sbi, true);
 	return 0;
 }
@@ -2444,6 +2549,8 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
 	}
 	up_write(&nm_i->nat_tree_lock);
 
+	kfree(nm_i->free_nid_bitmap);
+
 	kfree(nm_i->nat_bitmap);
 #ifdef CONFIG_F2FS_CHECK_FS
 	kfree(nm_i->nat_bitmap_mir);
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 2ddd2dc50b08..3c84c27d026a 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -105,6 +105,7 @@ enum {
 	Opt_fault_injection,
 	Opt_lazytime,
 	Opt_nolazytime,
+	Opt_nid_cache,
 	Opt_err,
 };
 
@@ -138,6 +139,7 @@ static match_table_t f2fs_tokens = {
 	{Opt_fault_injection, "fault_injection=%u"},
 	{Opt_lazytime, "lazytime"},
 	{Opt_nolazytime, "nolazytime"},
+	{Opt_nid_cache, "nid_cache"},
 	{Opt_err, NULL},
 };
 
@@ -565,6 +567,9 @@ static int parse_options(struct super_block *sb, char *options)
 		case Opt_nolazytime:
 			sb->s_flags &= ~MS_LAZYTIME;
 			break;
+		case Opt_nid_cache:
+			set_opt(sbi, NID_CACHE);
+			break;
 		default:
 			f2fs_msg(sb, KERN_ERR,
 				"Unrecognized mount option \"%s\" or missing value",
-- 
2.8.2.295.g3f1c1d0

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH] f2fs: introduce nid cache
@ 2017-01-22  9:53 ` Chao Yu
  0 siblings, 0 replies; 15+ messages in thread
From: Chao Yu @ 2017-01-22  9:53 UTC (permalink / raw)
  To: jaegeuk; +Cc: chao, linux-kernel, linux-f2fs-devel

In scenario of intensively node allocation, free nids will be ran out
soon, then it needs to stop to load free nids by traversing NAT blocks,
in worse case, if NAT blocks does not be cached in memory, it generates
IOs which slows down our foreground operations.

In order to speed up node allocation, in this patch we introduce a new
option named "nid cache", when turns on this option, it will load all
nat entries in NAT blocks when doing mount, and organize all free nids
in a bitmap, for any operations related to free nid, we will query and
set the new prebuilded bitmap instead of reading and lookuping NAT
blocks, so performance of node allocation can be improved.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
---
 Documentation/filesystems/f2fs.txt |   3 ++
 fs/f2fs/f2fs.h                     |   2 +
 fs/f2fs/node.c                     | 107 +++++++++++++++++++++++++++++++++++++
 fs/f2fs/super.c                    |   5 ++
 4 files changed, 117 insertions(+)

diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt
index d99faced79cb..3320a4976c12 100644
--- a/Documentation/filesystems/f2fs.txt
+++ b/Documentation/filesystems/f2fs.txt
@@ -159,6 +159,9 @@ mode=%s                Control block allocation mode which supports "adaptive"
                        writes towards main area.
 io_bits=%u             Set the bit size of write IO requests. It should be set
                        with "mode=lfs".
+nid_cache              Enable free nid bitmap cache which records all nid usage
+                       status, it can improve performance of continuously node
+                       allocating.
 
 ================================================================================
 DEBUGFS ENTRIES
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 620f6be060a5..c696b5eee1f0 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -83,6 +83,7 @@ extern char *fault_name[FAULT_MAX];
 #define F2FS_MOUNT_FAULT_INJECTION	0x00010000
 #define F2FS_MOUNT_ADAPTIVE		0x00020000
 #define F2FS_MOUNT_LFS			0x00040000
+#define F2FS_MOUNT_NID_CACHE		0x00080000
 
 #define clear_opt(sbi, option)	(sbi->mount_opt.opt &= ~F2FS_MOUNT_##option)
 #define set_opt(sbi, option)	(sbi->mount_opt.opt |= F2FS_MOUNT_##option)
@@ -551,6 +552,7 @@ struct f2fs_nm_info {
 	unsigned int nid_cnt[MAX_NID_LIST];	/* the number of free node id */
 	spinlock_t nid_list_lock;	/* protect nid lists ops */
 	struct mutex build_lock;	/* lock for build free nids */
+	unsigned long *free_nid_bitmap;	/* indicate nid is free or not */
 
 	/* for checkpoint */
 	char *nat_bitmap;		/* NAT bitmap pointer */
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 69c38a0022e7..5cdf05919e9f 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1819,6 +1819,24 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
 	}
 }
 
+void load_nids_from_bitmap(struct f2fs_sb_info *sbi)
+{
+	struct f2fs_nm_info *nm_i = NM_I(sbi);
+	nid_t nid = 0;
+	unsigned int count = 0;
+
+	down_read(&nm_i->nat_tree_lock);
+	do {
+		nid = find_next_bit(nm_i->free_nid_bitmap, nm_i->max_nid, nid);
+		if (nid < nm_i->max_nid)
+			count += add_free_nid(sbi, nid, true);
+
+		if (nm_i->nid_cnt[FREE_NID_LIST] >= NAT_ENTRY_PER_BLOCK)
+			break;
+	} while (++nid < nm_i->max_nid);
+	up_read(&nm_i->nat_tree_lock);
+}
+
 static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
 {
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
@@ -1834,6 +1852,9 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
 	if (!sync && !available_free_memory(sbi, FREE_NIDS))
 		return;
 
+	if (test_opt(sbi, NID_CACHE))
+		return load_nids_from_bitmap(sbi);
+
 	/* readahead nat pages to be scanned */
 	ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
 							META_NAT, true);
@@ -1915,6 +1936,11 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
 		i->state = NID_ALLOC;
 		__insert_nid_to_list(sbi, i, ALLOC_NID_LIST, false);
 		nm_i->available_nids--;
+
+		if (test_opt(sbi, NID_CACHE)) {
+			f2fs_bug_on(sbi, !test_bit(*nid, nm_i->free_nid_bitmap));
+			clear_bit(*nid, nm_i->free_nid_bitmap);
+		}
 		spin_unlock(&nm_i->nid_list_lock);
 		return true;
 	}
@@ -1969,6 +1995,9 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
 
 	nm_i->available_nids++;
 
+	if (test_opt(sbi, NID_CACHE))
+		set_bit(nid, nm_i->free_nid_bitmap);
+
 	spin_unlock(&nm_i->nid_list_lock);
 
 	if (need_free)
@@ -2257,6 +2286,13 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
 			add_free_nid(sbi, nid, false);
 			spin_lock(&NM_I(sbi)->nid_list_lock);
 			NM_I(sbi)->available_nids++;
+			if (test_opt(sbi, NID_CACHE))
+				set_bit(nid, NM_I(sbi)->free_nid_bitmap);
+			spin_unlock(&NM_I(sbi)->nid_list_lock);
+		} else {
+			spin_lock(&NM_I(sbi)->nid_list_lock);
+			if (test_opt(sbi, NID_CACHE))
+				clear_bit(nid, NM_I(sbi)->free_nid_bitmap);
 			spin_unlock(&NM_I(sbi)->nid_list_lock);
 		}
 	}
@@ -2374,6 +2410,71 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
 	return 0;
 }
 
+int init_free_nid_cache(struct f2fs_sb_info *sbi)
+{
+	struct f2fs_nm_info *nm_i = NM_I(sbi);
+	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
+	struct f2fs_journal *journal = curseg->journal;
+	struct f2fs_nat_entry *nat;
+	unsigned int i, readed, start_blk = 0, end_blk;
+	unsigned int max_blk = NAT_BLOCK_OFFSET(NM_I(sbi)->max_nid);
+	nid_t nid = 0;
+
+	if (!test_opt(sbi, NID_CACHE))
+		return 0;
+
+	nm_i->free_nid_bitmap = f2fs_kvzalloc(nm_i->max_nid / 8, GFP_KERNEL);
+	if (!nm_i->free_nid_bitmap)
+		return -ENOMEM;
+
+	do {
+		readed = ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid),
+					FREE_NID_PAGES * 8, META_NAT, true);
+
+		end_blk = start_blk + readed;
+
+		for (; start_blk < end_blk; start_blk++) {
+			struct f2fs_nat_block *nat_blk;
+			struct page *page;
+			nid_t start_nid = START_NID(nid);
+
+			page = get_current_nat_page(sbi, nid);
+			nat_blk = (struct f2fs_nat_block *)page_address(page);
+
+			for (i = 0; i < NAT_ENTRY_PER_BLOCK; i++, nid++) {
+				if (unlikely(nid >= nm_i->max_nid))
+					break;
+
+				if (unlikely(nid == 0))
+					continue;
+
+				nat = &nat_blk->entries[nid - start_nid];
+				if (le32_to_cpu(nat->block_addr) == NULL_ADDR)
+					set_bit(nid, nm_i->free_nid_bitmap);
+				else
+					clear_bit(nid, nm_i->free_nid_bitmap);
+			}
+
+			f2fs_put_page(page, 1);
+
+		}
+	} while (start_blk < max_blk);
+
+	down_read(&curseg->journal_rwsem);
+	for (i = 0; i < nats_in_cursum(journal); i++) {
+		nid_t nid = le32_to_cpu(nid_in_journal(journal, i));
+
+		nat = &nat_in_journal(journal, i);
+		if (le32_to_cpu(nat->block_addr) == NULL_ADDR)
+			set_bit(nid, nm_i->free_nid_bitmap);
+		else
+			clear_bit(nid, nm_i->free_nid_bitmap);
+	}
+	up_read(&curseg->journal_rwsem);
+
+	return 0;
+}
+
 int build_node_manager(struct f2fs_sb_info *sbi)
 {
 	int err;
@@ -2386,6 +2487,10 @@ int build_node_manager(struct f2fs_sb_info *sbi)
 	if (err)
 		return err;
 
+	err = init_free_nid_cache(sbi);
+	if (err)
+		return err;
+
 	build_free_nids(sbi, true);
 	return 0;
 }
@@ -2444,6 +2549,8 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
 	}
 	up_write(&nm_i->nat_tree_lock);
 
+	kfree(nm_i->free_nid_bitmap);
+
 	kfree(nm_i->nat_bitmap);
 #ifdef CONFIG_F2FS_CHECK_FS
 	kfree(nm_i->nat_bitmap_mir);
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 2ddd2dc50b08..3c84c27d026a 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -105,6 +105,7 @@ enum {
 	Opt_fault_injection,
 	Opt_lazytime,
 	Opt_nolazytime,
+	Opt_nid_cache,
 	Opt_err,
 };
 
@@ -138,6 +139,7 @@ static match_table_t f2fs_tokens = {
 	{Opt_fault_injection, "fault_injection=%u"},
 	{Opt_lazytime, "lazytime"},
 	{Opt_nolazytime, "nolazytime"},
+	{Opt_nid_cache, "nid_cache"},
 	{Opt_err, NULL},
 };
 
@@ -565,6 +567,9 @@ static int parse_options(struct super_block *sb, char *options)
 		case Opt_nolazytime:
 			sb->s_flags &= ~MS_LAZYTIME;
 			break;
+		case Opt_nid_cache:
+			set_opt(sbi, NID_CACHE);
+			break;
 		default:
 			f2fs_msg(sb, KERN_ERR,
 				"Unrecognized mount option \"%s\" or missing value",
-- 
2.8.2.295.g3f1c1d0


------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, SlashDot.org! http://sdm.link/slashdot

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH] f2fs: introduce nid cache
  2017-01-22  9:53 ` Chao Yu
@ 2017-01-24  4:35   ` Jaegeuk Kim
  -1 siblings, 0 replies; 15+ messages in thread
From: Jaegeuk Kim @ 2017-01-24  4:35 UTC (permalink / raw)
  To: Chao Yu; +Cc: linux-f2fs-devel, linux-kernel, chao

Hi Chao,

On 01/22, Chao Yu wrote:
> In scenario of intensively node allocation, free nids will be ran out
> soon, then it needs to stop to load free nids by traversing NAT blocks,
> in worse case, if NAT blocks does not be cached in memory, it generates
> IOs which slows down our foreground operations.
> 
> In order to speed up node allocation, in this patch we introduce a new
> option named "nid cache", when turns on this option, it will load all
> nat entries in NAT blocks when doing mount, and organize all free nids
> in a bitmap, for any operations related to free nid, we will query and
> set the new prebuilded bitmap instead of reading and lookuping NAT
> blocks, so performance of node allocation can be improved.
> 

How does this affect mount time and memory consumption? IMO, if those do not
raise huge concerns, we would be able to consider just replacing current free
nid list with this bitmap.

Thanks,

> Signed-off-by: Chao Yu <yuchao0@huawei.com>
> ---
>  Documentation/filesystems/f2fs.txt |   3 ++
>  fs/f2fs/f2fs.h                     |   2 +
>  fs/f2fs/node.c                     | 107 +++++++++++++++++++++++++++++++++++++
>  fs/f2fs/super.c                    |   5 ++
>  4 files changed, 117 insertions(+)
> 
> diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt
> index d99faced79cb..3320a4976c12 100644
> --- a/Documentation/filesystems/f2fs.txt
> +++ b/Documentation/filesystems/f2fs.txt
> @@ -159,6 +159,9 @@ mode=%s                Control block allocation mode which supports "adaptive"
>                         writes towards main area.
>  io_bits=%u             Set the bit size of write IO requests. It should be set
>                         with "mode=lfs".
> +nid_cache              Enable free nid bitmap cache which records all nid usage
> +                       status, it can improve performance of continuously node
> +                       allocating.
>  
>  ================================================================================
>  DEBUGFS ENTRIES
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 620f6be060a5..c696b5eee1f0 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -83,6 +83,7 @@ extern char *fault_name[FAULT_MAX];
>  #define F2FS_MOUNT_FAULT_INJECTION	0x00010000
>  #define F2FS_MOUNT_ADAPTIVE		0x00020000
>  #define F2FS_MOUNT_LFS			0x00040000
> +#define F2FS_MOUNT_NID_CACHE		0x00080000
>  
>  #define clear_opt(sbi, option)	(sbi->mount_opt.opt &= ~F2FS_MOUNT_##option)
>  #define set_opt(sbi, option)	(sbi->mount_opt.opt |= F2FS_MOUNT_##option)
> @@ -551,6 +552,7 @@ struct f2fs_nm_info {
>  	unsigned int nid_cnt[MAX_NID_LIST];	/* the number of free node id */
>  	spinlock_t nid_list_lock;	/* protect nid lists ops */
>  	struct mutex build_lock;	/* lock for build free nids */
> +	unsigned long *free_nid_bitmap;	/* indicate nid is free or not */
>  
>  	/* for checkpoint */
>  	char *nat_bitmap;		/* NAT bitmap pointer */
> diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
> index 69c38a0022e7..5cdf05919e9f 100644
> --- a/fs/f2fs/node.c
> +++ b/fs/f2fs/node.c
> @@ -1819,6 +1819,24 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
>  	}
>  }
>  
> +void load_nids_from_bitmap(struct f2fs_sb_info *sbi)
> +{
> +	struct f2fs_nm_info *nm_i = NM_I(sbi);
> +	nid_t nid = 0;
> +	unsigned int count = 0;
> +
> +	down_read(&nm_i->nat_tree_lock);
> +	do {
> +		nid = find_next_bit(nm_i->free_nid_bitmap, nm_i->max_nid, nid);
> +		if (nid < nm_i->max_nid)
> +			count += add_free_nid(sbi, nid, true);
> +
> +		if (nm_i->nid_cnt[FREE_NID_LIST] >= NAT_ENTRY_PER_BLOCK)
> +			break;
> +	} while (++nid < nm_i->max_nid);
> +	up_read(&nm_i->nat_tree_lock);
> +}
> +
>  static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
>  {
>  	struct f2fs_nm_info *nm_i = NM_I(sbi);
> @@ -1834,6 +1852,9 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
>  	if (!sync && !available_free_memory(sbi, FREE_NIDS))
>  		return;
>  
> +	if (test_opt(sbi, NID_CACHE))
> +		return load_nids_from_bitmap(sbi);
> +
>  	/* readahead nat pages to be scanned */
>  	ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
>  							META_NAT, true);
> @@ -1915,6 +1936,11 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
>  		i->state = NID_ALLOC;
>  		__insert_nid_to_list(sbi, i, ALLOC_NID_LIST, false);
>  		nm_i->available_nids--;
> +
> +		if (test_opt(sbi, NID_CACHE)) {
> +			f2fs_bug_on(sbi, !test_bit(*nid, nm_i->free_nid_bitmap));
> +			clear_bit(*nid, nm_i->free_nid_bitmap);
> +		}
>  		spin_unlock(&nm_i->nid_list_lock);
>  		return true;
>  	}
> @@ -1969,6 +1995,9 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
>  
>  	nm_i->available_nids++;
>  
> +	if (test_opt(sbi, NID_CACHE))
> +		set_bit(nid, nm_i->free_nid_bitmap);
> +
>  	spin_unlock(&nm_i->nid_list_lock);
>  
>  	if (need_free)
> @@ -2257,6 +2286,13 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
>  			add_free_nid(sbi, nid, false);
>  			spin_lock(&NM_I(sbi)->nid_list_lock);
>  			NM_I(sbi)->available_nids++;
> +			if (test_opt(sbi, NID_CACHE))
> +				set_bit(nid, NM_I(sbi)->free_nid_bitmap);
> +			spin_unlock(&NM_I(sbi)->nid_list_lock);
> +		} else {
> +			spin_lock(&NM_I(sbi)->nid_list_lock);
> +			if (test_opt(sbi, NID_CACHE))
> +				clear_bit(nid, NM_I(sbi)->free_nid_bitmap);
>  			spin_unlock(&NM_I(sbi)->nid_list_lock);
>  		}
>  	}
> @@ -2374,6 +2410,71 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
>  	return 0;
>  }
>  
> +int init_free_nid_cache(struct f2fs_sb_info *sbi)
> +{
> +	struct f2fs_nm_info *nm_i = NM_I(sbi);
> +	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
> +	struct f2fs_journal *journal = curseg->journal;
> +	struct f2fs_nat_entry *nat;
> +	unsigned int i, readed, start_blk = 0, end_blk;
> +	unsigned int max_blk = NAT_BLOCK_OFFSET(NM_I(sbi)->max_nid);
> +	nid_t nid = 0;
> +
> +	if (!test_opt(sbi, NID_CACHE))
> +		return 0;
> +
> +	nm_i->free_nid_bitmap = f2fs_kvzalloc(nm_i->max_nid / 8, GFP_KERNEL);
> +	if (!nm_i->free_nid_bitmap)
> +		return -ENOMEM;
> +
> +	do {
> +		readed = ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid),
> +					FREE_NID_PAGES * 8, META_NAT, true);
> +
> +		end_blk = start_blk + readed;
> +
> +		for (; start_blk < end_blk; start_blk++) {
> +			struct f2fs_nat_block *nat_blk;
> +			struct page *page;
> +			nid_t start_nid = START_NID(nid);
> +
> +			page = get_current_nat_page(sbi, nid);
> +			nat_blk = (struct f2fs_nat_block *)page_address(page);
> +
> +			for (i = 0; i < NAT_ENTRY_PER_BLOCK; i++, nid++) {
> +				if (unlikely(nid >= nm_i->max_nid))
> +					break;
> +
> +				if (unlikely(nid == 0))
> +					continue;
> +
> +				nat = &nat_blk->entries[nid - start_nid];
> +				if (le32_to_cpu(nat->block_addr) == NULL_ADDR)
> +					set_bit(nid, nm_i->free_nid_bitmap);
> +				else
> +					clear_bit(nid, nm_i->free_nid_bitmap);
> +			}
> +
> +			f2fs_put_page(page, 1);
> +
> +		}
> +	} while (start_blk < max_blk);
> +
> +	down_read(&curseg->journal_rwsem);
> +	for (i = 0; i < nats_in_cursum(journal); i++) {
> +		nid_t nid = le32_to_cpu(nid_in_journal(journal, i));
> +
> +		nat = &nat_in_journal(journal, i);
> +		if (le32_to_cpu(nat->block_addr) == NULL_ADDR)
> +			set_bit(nid, nm_i->free_nid_bitmap);
> +		else
> +			clear_bit(nid, nm_i->free_nid_bitmap);
> +	}
> +	up_read(&curseg->journal_rwsem);
> +
> +	return 0;
> +}
> +
>  int build_node_manager(struct f2fs_sb_info *sbi)
>  {
>  	int err;
> @@ -2386,6 +2487,10 @@ int build_node_manager(struct f2fs_sb_info *sbi)
>  	if (err)
>  		return err;
>  
> +	err = init_free_nid_cache(sbi);
> +	if (err)
> +		return err;
> +
>  	build_free_nids(sbi, true);
>  	return 0;
>  }
> @@ -2444,6 +2549,8 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
>  	}
>  	up_write(&nm_i->nat_tree_lock);
>  
> +	kfree(nm_i->free_nid_bitmap);
> +
>  	kfree(nm_i->nat_bitmap);
>  #ifdef CONFIG_F2FS_CHECK_FS
>  	kfree(nm_i->nat_bitmap_mir);
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index 2ddd2dc50b08..3c84c27d026a 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -105,6 +105,7 @@ enum {
>  	Opt_fault_injection,
>  	Opt_lazytime,
>  	Opt_nolazytime,
> +	Opt_nid_cache,
>  	Opt_err,
>  };
>  
> @@ -138,6 +139,7 @@ static match_table_t f2fs_tokens = {
>  	{Opt_fault_injection, "fault_injection=%u"},
>  	{Opt_lazytime, "lazytime"},
>  	{Opt_nolazytime, "nolazytime"},
> +	{Opt_nid_cache, "nid_cache"},
>  	{Opt_err, NULL},
>  };
>  
> @@ -565,6 +567,9 @@ static int parse_options(struct super_block *sb, char *options)
>  		case Opt_nolazytime:
>  			sb->s_flags &= ~MS_LAZYTIME;
>  			break;
> +		case Opt_nid_cache:
> +			set_opt(sbi, NID_CACHE);
> +			break;
>  		default:
>  			f2fs_msg(sb, KERN_ERR,
>  				"Unrecognized mount option \"%s\" or missing value",
> -- 
> 2.8.2.295.g3f1c1d0

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] f2fs: introduce nid cache
@ 2017-01-24  4:35   ` Jaegeuk Kim
  0 siblings, 0 replies; 15+ messages in thread
From: Jaegeuk Kim @ 2017-01-24  4:35 UTC (permalink / raw)
  To: Chao Yu; +Cc: chao, linux-kernel, linux-f2fs-devel

Hi Chao,

On 01/22, Chao Yu wrote:
> In scenario of intensively node allocation, free nids will be ran out
> soon, then it needs to stop to load free nids by traversing NAT blocks,
> in worse case, if NAT blocks does not be cached in memory, it generates
> IOs which slows down our foreground operations.
> 
> In order to speed up node allocation, in this patch we introduce a new
> option named "nid cache", when turns on this option, it will load all
> nat entries in NAT blocks when doing mount, and organize all free nids
> in a bitmap, for any operations related to free nid, we will query and
> set the new prebuilded bitmap instead of reading and lookuping NAT
> blocks, so performance of node allocation can be improved.
> 

How does this affect mount time and memory consumption? IMO, if those do not
raise huge concerns, we would be able to consider just replacing current free
nid list with this bitmap.

Thanks,

> Signed-off-by: Chao Yu <yuchao0@huawei.com>
> ---
>  Documentation/filesystems/f2fs.txt |   3 ++
>  fs/f2fs/f2fs.h                     |   2 +
>  fs/f2fs/node.c                     | 107 +++++++++++++++++++++++++++++++++++++
>  fs/f2fs/super.c                    |   5 ++
>  4 files changed, 117 insertions(+)
> 
> diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt
> index d99faced79cb..3320a4976c12 100644
> --- a/Documentation/filesystems/f2fs.txt
> +++ b/Documentation/filesystems/f2fs.txt
> @@ -159,6 +159,9 @@ mode=%s                Control block allocation mode which supports "adaptive"
>                         writes towards main area.
>  io_bits=%u             Set the bit size of write IO requests. It should be set
>                         with "mode=lfs".
> +nid_cache              Enable free nid bitmap cache which records all nid usage
> +                       status, it can improve performance of continuously node
> +                       allocating.
>  
>  ================================================================================
>  DEBUGFS ENTRIES
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 620f6be060a5..c696b5eee1f0 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -83,6 +83,7 @@ extern char *fault_name[FAULT_MAX];
>  #define F2FS_MOUNT_FAULT_INJECTION	0x00010000
>  #define F2FS_MOUNT_ADAPTIVE		0x00020000
>  #define F2FS_MOUNT_LFS			0x00040000
> +#define F2FS_MOUNT_NID_CACHE		0x00080000
>  
>  #define clear_opt(sbi, option)	(sbi->mount_opt.opt &= ~F2FS_MOUNT_##option)
>  #define set_opt(sbi, option)	(sbi->mount_opt.opt |= F2FS_MOUNT_##option)
> @@ -551,6 +552,7 @@ struct f2fs_nm_info {
>  	unsigned int nid_cnt[MAX_NID_LIST];	/* the number of free node id */
>  	spinlock_t nid_list_lock;	/* protect nid lists ops */
>  	struct mutex build_lock;	/* lock for build free nids */
> +	unsigned long *free_nid_bitmap;	/* indicate nid is free or not */
>  
>  	/* for checkpoint */
>  	char *nat_bitmap;		/* NAT bitmap pointer */
> diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
> index 69c38a0022e7..5cdf05919e9f 100644
> --- a/fs/f2fs/node.c
> +++ b/fs/f2fs/node.c
> @@ -1819,6 +1819,24 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
>  	}
>  }
>  
> +void load_nids_from_bitmap(struct f2fs_sb_info *sbi)
> +{
> +	struct f2fs_nm_info *nm_i = NM_I(sbi);
> +	nid_t nid = 0;
> +	unsigned int count = 0;
> +
> +	down_read(&nm_i->nat_tree_lock);
> +	do {
> +		nid = find_next_bit(nm_i->free_nid_bitmap, nm_i->max_nid, nid);
> +		if (nid < nm_i->max_nid)
> +			count += add_free_nid(sbi, nid, true);
> +
> +		if (nm_i->nid_cnt[FREE_NID_LIST] >= NAT_ENTRY_PER_BLOCK)
> +			break;
> +	} while (++nid < nm_i->max_nid);
> +	up_read(&nm_i->nat_tree_lock);
> +}
> +
>  static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
>  {
>  	struct f2fs_nm_info *nm_i = NM_I(sbi);
> @@ -1834,6 +1852,9 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
>  	if (!sync && !available_free_memory(sbi, FREE_NIDS))
>  		return;
>  
> +	if (test_opt(sbi, NID_CACHE))
> +		return load_nids_from_bitmap(sbi);
> +
>  	/* readahead nat pages to be scanned */
>  	ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
>  							META_NAT, true);
> @@ -1915,6 +1936,11 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
>  		i->state = NID_ALLOC;
>  		__insert_nid_to_list(sbi, i, ALLOC_NID_LIST, false);
>  		nm_i->available_nids--;
> +
> +		if (test_opt(sbi, NID_CACHE)) {
> +			f2fs_bug_on(sbi, !test_bit(*nid, nm_i->free_nid_bitmap));
> +			clear_bit(*nid, nm_i->free_nid_bitmap);
> +		}
>  		spin_unlock(&nm_i->nid_list_lock);
>  		return true;
>  	}
> @@ -1969,6 +1995,9 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
>  
>  	nm_i->available_nids++;
>  
> +	if (test_opt(sbi, NID_CACHE))
> +		set_bit(nid, nm_i->free_nid_bitmap);
> +
>  	spin_unlock(&nm_i->nid_list_lock);
>  
>  	if (need_free)
> @@ -2257,6 +2286,13 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
>  			add_free_nid(sbi, nid, false);
>  			spin_lock(&NM_I(sbi)->nid_list_lock);
>  			NM_I(sbi)->available_nids++;
> +			if (test_opt(sbi, NID_CACHE))
> +				set_bit(nid, NM_I(sbi)->free_nid_bitmap);
> +			spin_unlock(&NM_I(sbi)->nid_list_lock);
> +		} else {
> +			spin_lock(&NM_I(sbi)->nid_list_lock);
> +			if (test_opt(sbi, NID_CACHE))
> +				clear_bit(nid, NM_I(sbi)->free_nid_bitmap);
>  			spin_unlock(&NM_I(sbi)->nid_list_lock);
>  		}
>  	}
> @@ -2374,6 +2410,71 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
>  	return 0;
>  }
>  
> +int init_free_nid_cache(struct f2fs_sb_info *sbi)
> +{
> +	struct f2fs_nm_info *nm_i = NM_I(sbi);
> +	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
> +	struct f2fs_journal *journal = curseg->journal;
> +	struct f2fs_nat_entry *nat;
> +	unsigned int i, readed, start_blk = 0, end_blk;
> +	unsigned int max_blk = NAT_BLOCK_OFFSET(NM_I(sbi)->max_nid);
> +	nid_t nid = 0;
> +
> +	if (!test_opt(sbi, NID_CACHE))
> +		return 0;
> +
> +	nm_i->free_nid_bitmap = f2fs_kvzalloc(nm_i->max_nid / 8, GFP_KERNEL);
> +	if (!nm_i->free_nid_bitmap)
> +		return -ENOMEM;
> +
> +	do {
> +		readed = ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid),
> +					FREE_NID_PAGES * 8, META_NAT, true);
> +
> +		end_blk = start_blk + readed;
> +
> +		for (; start_blk < end_blk; start_blk++) {
> +			struct f2fs_nat_block *nat_blk;
> +			struct page *page;
> +			nid_t start_nid = START_NID(nid);
> +
> +			page = get_current_nat_page(sbi, nid);
> +			nat_blk = (struct f2fs_nat_block *)page_address(page);
> +
> +			for (i = 0; i < NAT_ENTRY_PER_BLOCK; i++, nid++) {
> +				if (unlikely(nid >= nm_i->max_nid))
> +					break;
> +
> +				if (unlikely(nid == 0))
> +					continue;
> +
> +				nat = &nat_blk->entries[nid - start_nid];
> +				if (le32_to_cpu(nat->block_addr) == NULL_ADDR)
> +					set_bit(nid, nm_i->free_nid_bitmap);
> +				else
> +					clear_bit(nid, nm_i->free_nid_bitmap);
> +			}
> +
> +			f2fs_put_page(page, 1);
> +
> +		}
> +	} while (start_blk < max_blk);
> +
> +	down_read(&curseg->journal_rwsem);
> +	for (i = 0; i < nats_in_cursum(journal); i++) {
> +		nid_t nid = le32_to_cpu(nid_in_journal(journal, i));
> +
> +		nat = &nat_in_journal(journal, i);
> +		if (le32_to_cpu(nat->block_addr) == NULL_ADDR)
> +			set_bit(nid, nm_i->free_nid_bitmap);
> +		else
> +			clear_bit(nid, nm_i->free_nid_bitmap);
> +	}
> +	up_read(&curseg->journal_rwsem);
> +
> +	return 0;
> +}
> +
>  int build_node_manager(struct f2fs_sb_info *sbi)
>  {
>  	int err;
> @@ -2386,6 +2487,10 @@ int build_node_manager(struct f2fs_sb_info *sbi)
>  	if (err)
>  		return err;
>  
> +	err = init_free_nid_cache(sbi);
> +	if (err)
> +		return err;
> +
>  	build_free_nids(sbi, true);
>  	return 0;
>  }
> @@ -2444,6 +2549,8 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
>  	}
>  	up_write(&nm_i->nat_tree_lock);
>  
> +	kfree(nm_i->free_nid_bitmap);
> +
>  	kfree(nm_i->nat_bitmap);
>  #ifdef CONFIG_F2FS_CHECK_FS
>  	kfree(nm_i->nat_bitmap_mir);
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index 2ddd2dc50b08..3c84c27d026a 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -105,6 +105,7 @@ enum {
>  	Opt_fault_injection,
>  	Opt_lazytime,
>  	Opt_nolazytime,
> +	Opt_nid_cache,
>  	Opt_err,
>  };
>  
> @@ -138,6 +139,7 @@ static match_table_t f2fs_tokens = {
>  	{Opt_fault_injection, "fault_injection=%u"},
>  	{Opt_lazytime, "lazytime"},
>  	{Opt_nolazytime, "nolazytime"},
> +	{Opt_nid_cache, "nid_cache"},
>  	{Opt_err, NULL},
>  };
>  
> @@ -565,6 +567,9 @@ static int parse_options(struct super_block *sb, char *options)
>  		case Opt_nolazytime:
>  			sb->s_flags &= ~MS_LAZYTIME;
>  			break;
> +		case Opt_nid_cache:
> +			set_opt(sbi, NID_CACHE);
> +			break;
>  		default:
>  			f2fs_msg(sb, KERN_ERR,
>  				"Unrecognized mount option \"%s\" or missing value",
> -- 
> 2.8.2.295.g3f1c1d0

------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, SlashDot.org! http://sdm.link/slashdot

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] f2fs: introduce nid cache
  2017-01-24  4:35   ` Jaegeuk Kim
@ 2017-02-07  7:24     ` Chao Yu
  -1 siblings, 0 replies; 15+ messages in thread
From: Chao Yu @ 2017-02-07  7:24 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: linux-f2fs-devel, linux-kernel, chao

Hi Jaegeuk,

Happy Chinese New Year! :)

On 2017/1/24 12:35, Jaegeuk Kim wrote:
> Hi Chao,
> 
> On 01/22, Chao Yu wrote:
>> In scenario of intensively node allocation, free nids will be ran out
>> soon, then it needs to stop to load free nids by traversing NAT blocks,
>> in worse case, if NAT blocks does not be cached in memory, it generates
>> IOs which slows down our foreground operations.
>>
>> In order to speed up node allocation, in this patch we introduce a new
>> option named "nid cache", when turns on this option, it will load all
>> nat entries in NAT blocks when doing mount, and organize all free nids
>> in a bitmap, for any operations related to free nid, we will query and
>> set the new prebuilded bitmap instead of reading and lookuping NAT
>> blocks, so performance of node allocation can be improved.
>>
> 
> How does this affect mount time and memory consumption?

Sorry for the delay.

Let me figure out some numbers later.

> IMO, if those do not
> raise huge concerns, we would be able to consider just replacing current free
> nid list with this bitmap.

Yup, I agree with you.

Thanks,

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] f2fs: introduce nid cache
@ 2017-02-07  7:24     ` Chao Yu
  0 siblings, 0 replies; 15+ messages in thread
From: Chao Yu @ 2017-02-07  7:24 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: chao, linux-kernel, linux-f2fs-devel

Hi Jaegeuk,

Happy Chinese New Year! :)

On 2017/1/24 12:35, Jaegeuk Kim wrote:
> Hi Chao,
> 
> On 01/22, Chao Yu wrote:
>> In scenario of intensively node allocation, free nids will be ran out
>> soon, then it needs to stop to load free nids by traversing NAT blocks,
>> in worse case, if NAT blocks does not be cached in memory, it generates
>> IOs which slows down our foreground operations.
>>
>> In order to speed up node allocation, in this patch we introduce a new
>> option named "nid cache", when turns on this option, it will load all
>> nat entries in NAT blocks when doing mount, and organize all free nids
>> in a bitmap, for any operations related to free nid, we will query and
>> set the new prebuilded bitmap instead of reading and lookuping NAT
>> blocks, so performance of node allocation can be improved.
>>
> 
> How does this affect mount time and memory consumption?

Sorry for the delay.

Let me figure out some numbers later.

> IMO, if those do not
> raise huge concerns, we would be able to consider just replacing current free
> nid list with this bitmap.

Yup, I agree with you.

Thanks,


------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, SlashDot.org! http://sdm.link/slashdot

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] f2fs: introduce nid cache
  2017-02-07  7:24     ` Chao Yu
@ 2017-02-08 15:25       ` Chao Yu
  -1 siblings, 0 replies; 15+ messages in thread
From: Chao Yu @ 2017-02-08 15:25 UTC (permalink / raw)
  To: Chao Yu, Jaegeuk Kim; +Cc: linux-f2fs-devel, linux-kernel

On 2017/2/7 15:24, Chao Yu wrote:
> Hi Jaegeuk,
> 
> Happy Chinese New Year! :)
> 
> On 2017/1/24 12:35, Jaegeuk Kim wrote:
>> Hi Chao,
>>
>> On 01/22, Chao Yu wrote:
>>> In scenario of intensively node allocation, free nids will be ran out
>>> soon, then it needs to stop to load free nids by traversing NAT blocks,
>>> in worse case, if NAT blocks does not be cached in memory, it generates
>>> IOs which slows down our foreground operations.
>>>
>>> In order to speed up node allocation, in this patch we introduce a new
>>> option named "nid cache", when turns on this option, it will load all
>>> nat entries in NAT blocks when doing mount, and organize all free nids
>>> in a bitmap, for any operations related to free nid, we will query and
>>> set the new prebuilded bitmap instead of reading and lookuping NAT
>>> blocks, so performance of node allocation can be improved.
>>>
>>
>> How does this affect mount time and memory consumption?
> 
> Sorry for the delay.
> 
> Let me figure out some numbers later.

a. mount time

I choose slow device (Kingston 16GB SD card) to see how this option affect mount
time when there is not enough bandwidth in low level,

Before the test, I change readahead window size of NAT pages from FREE_NID_PAGES
* 8 to sbi->blocks_per_seg for better ra performance, so the result is:

time mount -t f2fs -o nid_cache /dev/sde /mnt/f2fs/

before:
real	0m0.204s
user	0m0.004s
sys	0m0.020s

after:
real	0m3.792s
user	0m0.000s
sys	0m0.140s

b. memory consumption

For 16GB size image, there is total 34 NAT pages, so memory footprint is:
34 / 2 * 512 * 455 / 8 = 495040 bytes = 483.4 KB

Increasing of memory footprint is liner with total user valid blocks in image,
and at most it will eat 3900 * 8 * 455 / 8 = 1774500 bytes = 1732.9 KB

Thanks,

> 
>> IMO, if those do not
>> raise huge concerns, we would be able to consider just replacing current free
>> nid list with this bitmap.
> 
> Yup, I agree with you.
> 
> Thanks,
> 

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] f2fs: introduce nid cache
@ 2017-02-08 15:25       ` Chao Yu
  0 siblings, 0 replies; 15+ messages in thread
From: Chao Yu @ 2017-02-08 15:25 UTC (permalink / raw)
  To: Chao Yu, Jaegeuk Kim; +Cc: linux-kernel, linux-f2fs-devel

On 2017/2/7 15:24, Chao Yu wrote:
> Hi Jaegeuk,
> 
> Happy Chinese New Year! :)
> 
> On 2017/1/24 12:35, Jaegeuk Kim wrote:
>> Hi Chao,
>>
>> On 01/22, Chao Yu wrote:
>>> In scenario of intensively node allocation, free nids will be ran out
>>> soon, then it needs to stop to load free nids by traversing NAT blocks,
>>> in worse case, if NAT blocks does not be cached in memory, it generates
>>> IOs which slows down our foreground operations.
>>>
>>> In order to speed up node allocation, in this patch we introduce a new
>>> option named "nid cache", when turns on this option, it will load all
>>> nat entries in NAT blocks when doing mount, and organize all free nids
>>> in a bitmap, for any operations related to free nid, we will query and
>>> set the new prebuilded bitmap instead of reading and lookuping NAT
>>> blocks, so performance of node allocation can be improved.
>>>
>>
>> How does this affect mount time and memory consumption?
> 
> Sorry for the delay.
> 
> Let me figure out some numbers later.

a. mount time

I choose slow device (Kingston 16GB SD card) to see how this option affect mount
time when there is not enough bandwidth in low level,

Before the test, I change readahead window size of NAT pages from FREE_NID_PAGES
* 8 to sbi->blocks_per_seg for better ra performance, so the result is:

time mount -t f2fs -o nid_cache /dev/sde /mnt/f2fs/

before:
real	0m0.204s
user	0m0.004s
sys	0m0.020s

after:
real	0m3.792s
user	0m0.000s
sys	0m0.140s

b. memory consumption

For 16GB size image, there is total 34 NAT pages, so memory footprint is:
34 / 2 * 512 * 455 / 8 = 495040 bytes = 483.4 KB

Increasing of memory footprint is liner with total user valid blocks in image,
and at most it will eat 3900 * 8 * 455 / 8 = 1774500 bytes = 1732.9 KB

Thanks,

> 
>> IMO, if those do not
>> raise huge concerns, we would be able to consider just replacing current free
>> nid list with this bitmap.
> 
> Yup, I agree with you.
> 
> Thanks,
> 

------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, SlashDot.org! http://sdm.link/slashdot

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] f2fs: introduce nid cache
  2017-02-08 15:25       ` Chao Yu
@ 2017-02-09  1:28         ` Jaegeuk Kim
  -1 siblings, 0 replies; 15+ messages in thread
From: Jaegeuk Kim @ 2017-02-09  1:28 UTC (permalink / raw)
  To: Chao Yu; +Cc: Chao Yu, linux-f2fs-devel, linux-kernel

On 02/08, Chao Yu wrote:
> On 2017/2/7 15:24, Chao Yu wrote:
> > Hi Jaegeuk,
> > 
> > Happy Chinese New Year! :)
> > 
> > On 2017/1/24 12:35, Jaegeuk Kim wrote:
> >> Hi Chao,
> >>
> >> On 01/22, Chao Yu wrote:
> >>> In scenario of intensively node allocation, free nids will be ran out
> >>> soon, then it needs to stop to load free nids by traversing NAT blocks,
> >>> in worse case, if NAT blocks does not be cached in memory, it generates
> >>> IOs which slows down our foreground operations.
> >>>
> >>> In order to speed up node allocation, in this patch we introduce a new
> >>> option named "nid cache", when turns on this option, it will load all
> >>> nat entries in NAT blocks when doing mount, and organize all free nids
> >>> in a bitmap, for any operations related to free nid, we will query and
> >>> set the new prebuilded bitmap instead of reading and lookuping NAT
> >>> blocks, so performance of node allocation can be improved.
> >>>
> >>
> >> How does this affect mount time and memory consumption?
> > 
> > Sorry for the delay.
> > 
> > Let me figure out some numbers later.
> 
> a. mount time
> 
> I choose slow device (Kingston 16GB SD card) to see how this option affect mount
> time when there is not enough bandwidth in low level,
> 
> Before the test, I change readahead window size of NAT pages from FREE_NID_PAGES
> * 8 to sbi->blocks_per_seg for better ra performance, so the result is:
> 
> time mount -t f2fs -o nid_cache /dev/sde /mnt/f2fs/
> 
> before:
> real	0m0.204s
> user	0m0.004s
> sys	0m0.020s
> 
> after:
> real	0m3.792s

Oops, we can't accept this even only for 16GB, right? :(

> user	0m0.000s
> sys	0m0.140s
> 
> b. memory consumption
> 
> For 16GB size image, there is total 34 NAT pages, so memory footprint is:
> 34 / 2 * 512 * 455 / 8 = 495040 bytes = 483.4 KB
> 
> Increasing of memory footprint is liner with total user valid blocks in image,
> and at most it will eat 3900 * 8 * 455 / 8 = 1774500 bytes = 1732.9 KB

How about adding two bitmaps for whole NAT pages and storing the bitmaps in
checkpoint pack, which needs at most two blocks additionally?

1. full-assigned NAT bitmap, where 1 means there is no free nids.
2. empty NAT bitmap, where 1 means whole there-in nids are free.

With these bitmaps, build_free_nids() can scan from 0'th NAT block by:

	if (full-assigned NAT)
		skip;
	else if (empty NAT)
		add_free_nid(all);
	else
		read NAT page and add_free_nid();

The flush_nat_entries() has to change its bitmaps accordingly.

With this approach, I expect we can reuse nids as much as possible while
getting cached NAT pages more effectively.

Thanks,

> 
> Thanks,
> 
> > 
> >> IMO, if those do not
> >> raise huge concerns, we would be able to consider just replacing current free
> >> nid list with this bitmap.
> > 
> > Yup, I agree with you.
> > 
> > Thanks,
> > 

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] f2fs: introduce nid cache
@ 2017-02-09  1:28         ` Jaegeuk Kim
  0 siblings, 0 replies; 15+ messages in thread
From: Jaegeuk Kim @ 2017-02-09  1:28 UTC (permalink / raw)
  To: Chao Yu; +Cc: linux-kernel, linux-f2fs-devel

On 02/08, Chao Yu wrote:
> On 2017/2/7 15:24, Chao Yu wrote:
> > Hi Jaegeuk,
> > 
> > Happy Chinese New Year! :)
> > 
> > On 2017/1/24 12:35, Jaegeuk Kim wrote:
> >> Hi Chao,
> >>
> >> On 01/22, Chao Yu wrote:
> >>> In scenario of intensively node allocation, free nids will be ran out
> >>> soon, then it needs to stop to load free nids by traversing NAT blocks,
> >>> in worse case, if NAT blocks does not be cached in memory, it generates
> >>> IOs which slows down our foreground operations.
> >>>
> >>> In order to speed up node allocation, in this patch we introduce a new
> >>> option named "nid cache", when turns on this option, it will load all
> >>> nat entries in NAT blocks when doing mount, and organize all free nids
> >>> in a bitmap, for any operations related to free nid, we will query and
> >>> set the new prebuilded bitmap instead of reading and lookuping NAT
> >>> blocks, so performance of node allocation can be improved.
> >>>
> >>
> >> How does this affect mount time and memory consumption?
> > 
> > Sorry for the delay.
> > 
> > Let me figure out some numbers later.
> 
> a. mount time
> 
> I choose slow device (Kingston 16GB SD card) to see how this option affect mount
> time when there is not enough bandwidth in low level,
> 
> Before the test, I change readahead window size of NAT pages from FREE_NID_PAGES
> * 8 to sbi->blocks_per_seg for better ra performance, so the result is:
> 
> time mount -t f2fs -o nid_cache /dev/sde /mnt/f2fs/
> 
> before:
> real	0m0.204s
> user	0m0.004s
> sys	0m0.020s
> 
> after:
> real	0m3.792s

Oops, we can't accept this even only for 16GB, right? :(

> user	0m0.000s
> sys	0m0.140s
> 
> b. memory consumption
> 
> For 16GB size image, there is total 34 NAT pages, so memory footprint is:
> 34 / 2 * 512 * 455 / 8 = 495040 bytes = 483.4 KB
> 
> Increasing of memory footprint is liner with total user valid blocks in image,
> and at most it will eat 3900 * 8 * 455 / 8 = 1774500 bytes = 1732.9 KB

How about adding two bitmaps for whole NAT pages and storing the bitmaps in
checkpoint pack, which needs at most two blocks additionally?

1. full-assigned NAT bitmap, where 1 means there is no free nids.
2. empty NAT bitmap, where 1 means whole there-in nids are free.

With these bitmaps, build_free_nids() can scan from 0'th NAT block by:

	if (full-assigned NAT)
		skip;
	else if (empty NAT)
		add_free_nid(all);
	else
		read NAT page and add_free_nid();

The flush_nat_entries() has to change its bitmaps accordingly.

With this approach, I expect we can reuse nids as much as possible while
getting cached NAT pages more effectively.

Thanks,

> 
> Thanks,
> 
> > 
> >> IMO, if those do not
> >> raise huge concerns, we would be able to consider just replacing current free
> >> nid list with this bitmap.
> > 
> > Yup, I agree with you.
> > 
> > Thanks,
> > 

------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, SlashDot.org! http://sdm.link/slashdot

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] f2fs: introduce nid cache
  2017-02-09  1:28         ` Jaegeuk Kim
@ 2017-02-11  6:17           ` Chao Yu
  -1 siblings, 0 replies; 15+ messages in thread
From: Chao Yu @ 2017-02-11  6:17 UTC (permalink / raw)
  To: Jaegeuk Kim, Chao Yu; +Cc: linux-f2fs-devel, linux-kernel, houpengyang

On 2017/2/9 9:28, Jaegeuk Kim wrote:
> On 02/08, Chao Yu wrote:
>> On 2017/2/7 15:24, Chao Yu wrote:
>>> Hi Jaegeuk,
>>>
>>> Happy Chinese New Year! :)
>>>
>>> On 2017/1/24 12:35, Jaegeuk Kim wrote:
>>>> Hi Chao,
>>>>
>>>> On 01/22, Chao Yu wrote:
>>>>> In scenario of intensively node allocation, free nids will be ran out
>>>>> soon, then it needs to stop to load free nids by traversing NAT blocks,
>>>>> in worse case, if NAT blocks does not be cached in memory, it generates
>>>>> IOs which slows down our foreground operations.
>>>>>
>>>>> In order to speed up node allocation, in this patch we introduce a new
>>>>> option named "nid cache", when turns on this option, it will load all
>>>>> nat entries in NAT blocks when doing mount, and organize all free nids
>>>>> in a bitmap, for any operations related to free nid, we will query and
>>>>> set the new prebuilded bitmap instead of reading and lookuping NAT
>>>>> blocks, so performance of node allocation can be improved.
>>>>>
>>>>
>>>> How does this affect mount time and memory consumption?
>>>
>>> Sorry for the delay.
>>>
>>> Let me figure out some numbers later.
>>
>> a. mount time
>>
>> I choose slow device (Kingston 16GB SD card) to see how this option affect mount
>> time when there is not enough bandwidth in low level,
>>
>> Before the test, I change readahead window size of NAT pages from FREE_NID_PAGES
>> * 8 to sbi->blocks_per_seg for better ra performance, so the result is:
>>
>> time mount -t f2fs -o nid_cache /dev/sde /mnt/f2fs/
>>
>> before:
>> real	0m0.204s
>> user	0m0.004s
>> sys	0m0.020s
>>
>> after:
>> real	0m3.792s
> 
> Oops, we can't accept this even only for 16GB, right? :(

Pengyang Hou help testing this patch in 64GB UFS, the result of mount time is:

Before:	110 ms
After:	770 ms

So these test results shows that we'd better not set nid_cache option by default
in upstream since anyway it slows down mount procedure obviously, but still
users can decide whether use it or not depending on their requirement. e.g.:
a. For readonly case, this option is complete no needed.
b. For in batch node allocation/deletion case, this option is recommended.

> 
>> user	0m0.000s
>> sys	0m0.140s
>>
>> b. memory consumption
>>
>> For 16GB size image, there is total 34 NAT pages, so memory footprint is:
>> 34 / 2 * 512 * 455 / 8 = 495040 bytes = 483.4 KB
>>
>> Increasing of memory footprint is liner with total user valid blocks in image,
>> and at most it will eat 3900 * 8 * 455 / 8 = 1774500 bytes = 1732.9 KB
> 
> How about adding two bitmaps for whole NAT pages and storing the bitmaps in
> checkpoint pack, which needs at most two blocks additionally?
> 
> 1. full-assigned NAT bitmap, where 1 means there is no free nids.
> 2. empty NAT bitmap, where 1 means whole there-in nids are free.
> 
> With these bitmaps, build_free_nids() can scan from 0'th NAT block by:
> 
> 	if (full-assigned NAT)
> 		skip;
> 	else if (empty NAT)
> 		add_free_nid(all);
> 	else
> 		read NAT page and add_free_nid();
> 
> The flush_nat_entries() has to change its bitmaps accordingly.
> 
> With this approach, I expect we can reuse nids as much as possible while
> getting cached NAT pages more effectively.

Good idea! :)

And there is another approach which do not need to change disk layout is:

We can allocate free_nid_bitmap[NAT_BLOCKS_COUNT][455] array, each bitmap
indicates usage of free nids in one NAT block, and we introduce another
nat_block_bitmap[NAT_BLOCKS_COUNT] to indicate each NAT block is loaded or not,
if it is loaded and we can do lookup in free_nid_bitmap correspondingly. So I
expect that we will load one NAT block from disk one time at most, it will:
- not increase mount latency
- after loading NAT blocks from disk, we will build its bitmap inside memory to
reduce lookup time for second time

Thoughts? Which one is preferred?

Thanks,

> 
> Thanks,
> 
>>
>> Thanks,
>>
>>>
>>>> IMO, if those do not
>>>> raise huge concerns, we would be able to consider just replacing current free
>>>> nid list with this bitmap.
>>>
>>> Yup, I agree with you.
>>>
>>> Thanks,
>>>
> 
> .
> 

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] f2fs: introduce nid cache
@ 2017-02-11  6:17           ` Chao Yu
  0 siblings, 0 replies; 15+ messages in thread
From: Chao Yu @ 2017-02-11  6:17 UTC (permalink / raw)
  To: Jaegeuk Kim, Chao Yu; +Cc: linux-kernel, linux-f2fs-devel

On 2017/2/9 9:28, Jaegeuk Kim wrote:
> On 02/08, Chao Yu wrote:
>> On 2017/2/7 15:24, Chao Yu wrote:
>>> Hi Jaegeuk,
>>>
>>> Happy Chinese New Year! :)
>>>
>>> On 2017/1/24 12:35, Jaegeuk Kim wrote:
>>>> Hi Chao,
>>>>
>>>> On 01/22, Chao Yu wrote:
>>>>> In scenario of intensively node allocation, free nids will be ran out
>>>>> soon, then it needs to stop to load free nids by traversing NAT blocks,
>>>>> in worse case, if NAT blocks does not be cached in memory, it generates
>>>>> IOs which slows down our foreground operations.
>>>>>
>>>>> In order to speed up node allocation, in this patch we introduce a new
>>>>> option named "nid cache", when turns on this option, it will load all
>>>>> nat entries in NAT blocks when doing mount, and organize all free nids
>>>>> in a bitmap, for any operations related to free nid, we will query and
>>>>> set the new prebuilded bitmap instead of reading and lookuping NAT
>>>>> blocks, so performance of node allocation can be improved.
>>>>>
>>>>
>>>> How does this affect mount time and memory consumption?
>>>
>>> Sorry for the delay.
>>>
>>> Let me figure out some numbers later.
>>
>> a. mount time
>>
>> I choose slow device (Kingston 16GB SD card) to see how this option affect mount
>> time when there is not enough bandwidth in low level,
>>
>> Before the test, I change readahead window size of NAT pages from FREE_NID_PAGES
>> * 8 to sbi->blocks_per_seg for better ra performance, so the result is:
>>
>> time mount -t f2fs -o nid_cache /dev/sde /mnt/f2fs/
>>
>> before:
>> real	0m0.204s
>> user	0m0.004s
>> sys	0m0.020s
>>
>> after:
>> real	0m3.792s
> 
> Oops, we can't accept this even only for 16GB, right? :(

Pengyang Hou help testing this patch in 64GB UFS, the result of mount time is:

Before:	110 ms
After:	770 ms

So these test results shows that we'd better not set nid_cache option by default
in upstream since anyway it slows down mount procedure obviously, but still
users can decide whether use it or not depending on their requirement. e.g.:
a. For readonly case, this option is complete no needed.
b. For in batch node allocation/deletion case, this option is recommended.

> 
>> user	0m0.000s
>> sys	0m0.140s
>>
>> b. memory consumption
>>
>> For 16GB size image, there is total 34 NAT pages, so memory footprint is:
>> 34 / 2 * 512 * 455 / 8 = 495040 bytes = 483.4 KB
>>
>> Increasing of memory footprint is liner with total user valid blocks in image,
>> and at most it will eat 3900 * 8 * 455 / 8 = 1774500 bytes = 1732.9 KB
> 
> How about adding two bitmaps for whole NAT pages and storing the bitmaps in
> checkpoint pack, which needs at most two blocks additionally?
> 
> 1. full-assigned NAT bitmap, where 1 means there is no free nids.
> 2. empty NAT bitmap, where 1 means whole there-in nids are free.
> 
> With these bitmaps, build_free_nids() can scan from 0'th NAT block by:
> 
> 	if (full-assigned NAT)
> 		skip;
> 	else if (empty NAT)
> 		add_free_nid(all);
> 	else
> 		read NAT page and add_free_nid();
> 
> The flush_nat_entries() has to change its bitmaps accordingly.
> 
> With this approach, I expect we can reuse nids as much as possible while
> getting cached NAT pages more effectively.

Good idea! :)

And there is another approach which do not need to change disk layout is:

We can allocate free_nid_bitmap[NAT_BLOCKS_COUNT][455] array, each bitmap
indicates usage of free nids in one NAT block, and we introduce another
nat_block_bitmap[NAT_BLOCKS_COUNT] to indicate each NAT block is loaded or not,
if it is loaded and we can do lookup in free_nid_bitmap correspondingly. So I
expect that we will load one NAT block from disk one time at most, it will:
- not increase mount latency
- after loading NAT blocks from disk, we will build its bitmap inside memory to
reduce lookup time for second time

Thoughts? Which one is preferred?

Thanks,

> 
> Thanks,
> 
>>
>> Thanks,
>>
>>>
>>>> IMO, if those do not
>>>> raise huge concerns, we would be able to consider just replacing current free
>>>> nid list with this bitmap.
>>>
>>> Yup, I agree with you.
>>>
>>> Thanks,
>>>
> 
> .
> 


------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, SlashDot.org! http://sdm.link/slashdot

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] f2fs: introduce nid cache
  2017-02-11  6:17           ` Chao Yu
  (?)
@ 2017-02-14  0:25           ` Jaegeuk Kim
  2017-02-15  2:25               ` Chao Yu
  -1 siblings, 1 reply; 15+ messages in thread
From: Jaegeuk Kim @ 2017-02-14  0:25 UTC (permalink / raw)
  To: Chao Yu; +Cc: Chao Yu, linux-f2fs-devel, linux-kernel, houpengyang

On 02/11, Chao Yu wrote:
> On 2017/2/9 9:28, Jaegeuk Kim wrote:
> > On 02/08, Chao Yu wrote:
> >> On 2017/2/7 15:24, Chao Yu wrote:
> >>> Hi Jaegeuk,
> >>>
> >>> Happy Chinese New Year! :)
> >>>
> >>> On 2017/1/24 12:35, Jaegeuk Kim wrote:
> >>>> Hi Chao,
> >>>>
> >>>> On 01/22, Chao Yu wrote:
> >>>>> In scenario of intensively node allocation, free nids will be ran out
> >>>>> soon, then it needs to stop to load free nids by traversing NAT blocks,
> >>>>> in worse case, if NAT blocks does not be cached in memory, it generates
> >>>>> IOs which slows down our foreground operations.
> >>>>>
> >>>>> In order to speed up node allocation, in this patch we introduce a new
> >>>>> option named "nid cache", when turns on this option, it will load all
> >>>>> nat entries in NAT blocks when doing mount, and organize all free nids
> >>>>> in a bitmap, for any operations related to free nid, we will query and
> >>>>> set the new prebuilded bitmap instead of reading and lookuping NAT
> >>>>> blocks, so performance of node allocation can be improved.
> >>>>>
> >>>>
> >>>> How does this affect mount time and memory consumption?
> >>>
> >>> Sorry for the delay.
> >>>
> >>> Let me figure out some numbers later.
> >>
> >> a. mount time
> >>
> >> I choose slow device (Kingston 16GB SD card) to see how this option affect mount
> >> time when there is not enough bandwidth in low level,
> >>
> >> Before the test, I change readahead window size of NAT pages from FREE_NID_PAGES
> >> * 8 to sbi->blocks_per_seg for better ra performance, so the result is:
> >>
> >> time mount -t f2fs -o nid_cache /dev/sde /mnt/f2fs/
> >>
> >> before:
> >> real	0m0.204s
> >> user	0m0.004s
> >> sys	0m0.020s
> >>
> >> after:
> >> real	0m3.792s
> > 
> > Oops, we can't accept this even only for 16GB, right? :(
> 
> Pengyang Hou help testing this patch in 64GB UFS, the result of mount time is:
> 
> Before:	110 ms
> After:	770 ms
> 
> So these test results shows that we'd better not set nid_cache option by default
> in upstream since anyway it slows down mount procedure obviously, but still
> users can decide whether use it or not depending on their requirement. e.g.:
> a. For readonly case, this option is complete no needed.
> b. For in batch node allocation/deletion case, this option is recommended.
> 
> > 
> >> user	0m0.000s
> >> sys	0m0.140s
> >>
> >> b. memory consumption
> >>
> >> For 16GB size image, there is total 34 NAT pages, so memory footprint is:
> >> 34 / 2 * 512 * 455 / 8 = 495040 bytes = 483.4 KB
> >>
> >> Increasing of memory footprint is liner with total user valid blocks in image,
> >> and at most it will eat 3900 * 8 * 455 / 8 = 1774500 bytes = 1732.9 KB
> > 
> > How about adding two bitmaps for whole NAT pages and storing the bitmaps in
> > checkpoint pack, which needs at most two blocks additionally?
> > 
> > 1. full-assigned NAT bitmap, where 1 means there is no free nids.
> > 2. empty NAT bitmap, where 1 means whole there-in nids are free.
> > 
> > With these bitmaps, build_free_nids() can scan from 0'th NAT block by:
> > 
> > 	if (full-assigned NAT)
> > 		skip;
> > 	else if (empty NAT)
> > 		add_free_nid(all);
> > 	else
> > 		read NAT page and add_free_nid();
> > 
> > The flush_nat_entries() has to change its bitmaps accordingly.
> > 
> > With this approach, I expect we can reuse nids as much as possible while
> > getting cached NAT pages more effectively.
> 
> Good idea! :)
> 
> And there is another approach which do not need to change disk layout is:
> 
> We can allocate free_nid_bitmap[NAT_BLOCKS_COUNT][455] array, each bitmap
> indicates usage of free nids in one NAT block, and we introduce another
> nat_block_bitmap[NAT_BLOCKS_COUNT] to indicate each NAT block is loaded or not,
> if it is loaded and we can do lookup in free_nid_bitmap correspondingly. So I
> expect that we will load one NAT block from disk one time at most, it will:
> - not increase mount latency
> - after loading NAT blocks from disk, we will build its bitmap inside memory to
> reduce lookup time for second time

Yup, I think both of them are doable together. Meanwhile, I've written patches
which support the bitmaps for NAT pages and started to test them. Could you
write a patch to introduce free_nid_bitmap[][] as well?

Thanks,

> 
> Thoughts? Which one is preferred?
> 
> Thanks,
> 
> > 
> > Thanks,
> > 
> >>
> >> Thanks,
> >>
> >>>
> >>>> IMO, if those do not
> >>>> raise huge concerns, we would be able to consider just replacing current free
> >>>> nid list with this bitmap.
> >>>
> >>> Yup, I agree with you.
> >>>
> >>> Thanks,
> >>>
> > 
> > .
> > 

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] f2fs: introduce nid cache
  2017-02-14  0:25           ` Jaegeuk Kim
@ 2017-02-15  2:25               ` Chao Yu
  0 siblings, 0 replies; 15+ messages in thread
From: Chao Yu @ 2017-02-15  2:25 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: Chao Yu, linux-f2fs-devel, linux-kernel, houpengyang

On 2017/2/14 8:25, Jaegeuk Kim wrote:
> On 02/11, Chao Yu wrote:
>> On 2017/2/9 9:28, Jaegeuk Kim wrote:
>>> On 02/08, Chao Yu wrote:
>>>> On 2017/2/7 15:24, Chao Yu wrote:
>>>>> Hi Jaegeuk,
>>>>>
>>>>> Happy Chinese New Year! :)
>>>>>
>>>>> On 2017/1/24 12:35, Jaegeuk Kim wrote:
>>>>>> Hi Chao,
>>>>>>
>>>>>> On 01/22, Chao Yu wrote:
>>>>>>> In scenario of intensively node allocation, free nids will be ran out
>>>>>>> soon, then it needs to stop to load free nids by traversing NAT blocks,
>>>>>>> in worse case, if NAT blocks does not be cached in memory, it generates
>>>>>>> IOs which slows down our foreground operations.
>>>>>>>
>>>>>>> In order to speed up node allocation, in this patch we introduce a new
>>>>>>> option named "nid cache", when turns on this option, it will load all
>>>>>>> nat entries in NAT blocks when doing mount, and organize all free nids
>>>>>>> in a bitmap, for any operations related to free nid, we will query and
>>>>>>> set the new prebuilded bitmap instead of reading and lookuping NAT
>>>>>>> blocks, so performance of node allocation can be improved.
>>>>>>>
>>>>>>
>>>>>> How does this affect mount time and memory consumption?
>>>>>
>>>>> Sorry for the delay.
>>>>>
>>>>> Let me figure out some numbers later.
>>>>
>>>> a. mount time
>>>>
>>>> I choose slow device (Kingston 16GB SD card) to see how this option affect mount
>>>> time when there is not enough bandwidth in low level,
>>>>
>>>> Before the test, I change readahead window size of NAT pages from FREE_NID_PAGES
>>>> * 8 to sbi->blocks_per_seg for better ra performance, so the result is:
>>>>
>>>> time mount -t f2fs -o nid_cache /dev/sde /mnt/f2fs/
>>>>
>>>> before:
>>>> real	0m0.204s
>>>> user	0m0.004s
>>>> sys	0m0.020s
>>>>
>>>> after:
>>>> real	0m3.792s
>>>
>>> Oops, we can't accept this even only for 16GB, right? :(
>>
>> Pengyang Hou help testing this patch in 64GB UFS, the result of mount time is:
>>
>> Before:	110 ms
>> After:	770 ms
>>
>> So these test results shows that we'd better not set nid_cache option by default
>> in upstream since anyway it slows down mount procedure obviously, but still
>> users can decide whether use it or not depending on their requirement. e.g.:
>> a. For readonly case, this option is complete no needed.
>> b. For in batch node allocation/deletion case, this option is recommended.
>>
>>>
>>>> user	0m0.000s
>>>> sys	0m0.140s
>>>>
>>>> b. memory consumption
>>>>
>>>> For 16GB size image, there is total 34 NAT pages, so memory footprint is:
>>>> 34 / 2 * 512 * 455 / 8 = 495040 bytes = 483.4 KB
>>>>
>>>> Increasing of memory footprint is liner with total user valid blocks in image,
>>>> and at most it will eat 3900 * 8 * 455 / 8 = 1774500 bytes = 1732.9 KB
>>>
>>> How about adding two bitmaps for whole NAT pages and storing the bitmaps in
>>> checkpoint pack, which needs at most two blocks additionally?
>>>
>>> 1. full-assigned NAT bitmap, where 1 means there is no free nids.
>>> 2. empty NAT bitmap, where 1 means whole there-in nids are free.
>>>
>>> With these bitmaps, build_free_nids() can scan from 0'th NAT block by:
>>>
>>> 	if (full-assigned NAT)
>>> 		skip;
>>> 	else if (empty NAT)
>>> 		add_free_nid(all);
>>> 	else
>>> 		read NAT page and add_free_nid();
>>>
>>> The flush_nat_entries() has to change its bitmaps accordingly.
>>>
>>> With this approach, I expect we can reuse nids as much as possible while
>>> getting cached NAT pages more effectively.
>>
>> Good idea! :)
>>
>> And there is another approach which do not need to change disk layout is:
>>
>> We can allocate free_nid_bitmap[NAT_BLOCKS_COUNT][455] array, each bitmap
>> indicates usage of free nids in one NAT block, and we introduce another
>> nat_block_bitmap[NAT_BLOCKS_COUNT] to indicate each NAT block is loaded or not,
>> if it is loaded and we can do lookup in free_nid_bitmap correspondingly. So I
>> expect that we will load one NAT block from disk one time at most, it will:
>> - not increase mount latency
>> - after loading NAT blocks from disk, we will build its bitmap inside memory to
>> reduce lookup time for second time
> 
> Yup, I think both of them are doable together. Meanwhile, I've written patches
> which support the bitmaps for NAT pages and started to test them. Could you
> write a patch to introduce free_nid_bitmap[][] as well?

No problem. :)

Thanks,

> 
> Thanks,
> 
>>
>> Thoughts? Which one is preferred?
>>
>> Thanks,
>>
>>>
>>> Thanks,
>>>
>>>>
>>>> Thanks,
>>>>
>>>>>
>>>>>> IMO, if those do not
>>>>>> raise huge concerns, we would be able to consider just replacing current free
>>>>>> nid list with this bitmap.
>>>>>
>>>>> Yup, I agree with you.
>>>>>
>>>>> Thanks,
>>>>>
>>>
>>> .
>>>
> 
> .
> 

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] f2fs: introduce nid cache
@ 2017-02-15  2:25               ` Chao Yu
  0 siblings, 0 replies; 15+ messages in thread
From: Chao Yu @ 2017-02-15  2:25 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: Chao Yu, linux-f2fs-devel, linux-kernel, houpengyang

On 2017/2/14 8:25, Jaegeuk Kim wrote:
> On 02/11, Chao Yu wrote:
>> On 2017/2/9 9:28, Jaegeuk Kim wrote:
>>> On 02/08, Chao Yu wrote:
>>>> On 2017/2/7 15:24, Chao Yu wrote:
>>>>> Hi Jaegeuk,
>>>>>
>>>>> Happy Chinese New Year! :)
>>>>>
>>>>> On 2017/1/24 12:35, Jaegeuk Kim wrote:
>>>>>> Hi Chao,
>>>>>>
>>>>>> On 01/22, Chao Yu wrote:
>>>>>>> In scenario of intensively node allocation, free nids will be ran out
>>>>>>> soon, then it needs to stop to load free nids by traversing NAT blocks,
>>>>>>> in worse case, if NAT blocks does not be cached in memory, it generates
>>>>>>> IOs which slows down our foreground operations.
>>>>>>>
>>>>>>> In order to speed up node allocation, in this patch we introduce a new
>>>>>>> option named "nid cache", when turns on this option, it will load all
>>>>>>> nat entries in NAT blocks when doing mount, and organize all free nids
>>>>>>> in a bitmap, for any operations related to free nid, we will query and
>>>>>>> set the new prebuilded bitmap instead of reading and lookuping NAT
>>>>>>> blocks, so performance of node allocation can be improved.
>>>>>>>
>>>>>>
>>>>>> How does this affect mount time and memory consumption?
>>>>>
>>>>> Sorry for the delay.
>>>>>
>>>>> Let me figure out some numbers later.
>>>>
>>>> a. mount time
>>>>
>>>> I choose slow device (Kingston 16GB SD card) to see how this option affect mount
>>>> time when there is not enough bandwidth in low level,
>>>>
>>>> Before the test, I change readahead window size of NAT pages from FREE_NID_PAGES
>>>> * 8 to sbi->blocks_per_seg for better ra performance, so the result is:
>>>>
>>>> time mount -t f2fs -o nid_cache /dev/sde /mnt/f2fs/
>>>>
>>>> before:
>>>> real	0m0.204s
>>>> user	0m0.004s
>>>> sys	0m0.020s
>>>>
>>>> after:
>>>> real	0m3.792s
>>>
>>> Oops, we can't accept this even only for 16GB, right? :(
>>
>> Pengyang Hou help testing this patch in 64GB UFS, the result of mount time is:
>>
>> Before:	110 ms
>> After:	770 ms
>>
>> So these test results shows that we'd better not set nid_cache option by default
>> in upstream since anyway it slows down mount procedure obviously, but still
>> users can decide whether use it or not depending on their requirement. e.g.:
>> a. For readonly case, this option is complete no needed.
>> b. For in batch node allocation/deletion case, this option is recommended.
>>
>>>
>>>> user	0m0.000s
>>>> sys	0m0.140s
>>>>
>>>> b. memory consumption
>>>>
>>>> For 16GB size image, there is total 34 NAT pages, so memory footprint is:
>>>> 34 / 2 * 512 * 455 / 8 = 495040 bytes = 483.4 KB
>>>>
>>>> Increasing of memory footprint is liner with total user valid blocks in image,
>>>> and at most it will eat 3900 * 8 * 455 / 8 = 1774500 bytes = 1732.9 KB
>>>
>>> How about adding two bitmaps for whole NAT pages and storing the bitmaps in
>>> checkpoint pack, which needs at most two blocks additionally?
>>>
>>> 1. full-assigned NAT bitmap, where 1 means there is no free nids.
>>> 2. empty NAT bitmap, where 1 means whole there-in nids are free.
>>>
>>> With these bitmaps, build_free_nids() can scan from 0'th NAT block by:
>>>
>>> 	if (full-assigned NAT)
>>> 		skip;
>>> 	else if (empty NAT)
>>> 		add_free_nid(all);
>>> 	else
>>> 		read NAT page and add_free_nid();
>>>
>>> The flush_nat_entries() has to change its bitmaps accordingly.
>>>
>>> With this approach, I expect we can reuse nids as much as possible while
>>> getting cached NAT pages more effectively.
>>
>> Good idea! :)
>>
>> And there is another approach which do not need to change disk layout is:
>>
>> We can allocate free_nid_bitmap[NAT_BLOCKS_COUNT][455] array, each bitmap
>> indicates usage of free nids in one NAT block, and we introduce another
>> nat_block_bitmap[NAT_BLOCKS_COUNT] to indicate each NAT block is loaded or not,
>> if it is loaded and we can do lookup in free_nid_bitmap correspondingly. So I
>> expect that we will load one NAT block from disk one time at most, it will:
>> - not increase mount latency
>> - after loading NAT blocks from disk, we will build its bitmap inside memory to
>> reduce lookup time for second time
> 
> Yup, I think both of them are doable together. Meanwhile, I've written patches
> which support the bitmaps for NAT pages and started to test them. Could you
> write a patch to introduce free_nid_bitmap[][] as well?

No problem. :)

Thanks,

> 
> Thanks,
> 
>>
>> Thoughts? Which one is preferred?
>>
>> Thanks,
>>
>>>
>>> Thanks,
>>>
>>>>
>>>> Thanks,
>>>>
>>>>>
>>>>>> IMO, if those do not
>>>>>> raise huge concerns, we would be able to consider just replacing current free
>>>>>> nid list with this bitmap.
>>>>>
>>>>> Yup, I agree with you.
>>>>>
>>>>> Thanks,
>>>>>
>>>
>>> .
>>>
> 
> .
> 

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2017-02-15  2:26 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-01-22  9:53 [PATCH] f2fs: introduce nid cache Chao Yu
2017-01-22  9:53 ` Chao Yu
2017-01-24  4:35 ` Jaegeuk Kim
2017-01-24  4:35   ` Jaegeuk Kim
2017-02-07  7:24   ` Chao Yu
2017-02-07  7:24     ` Chao Yu
2017-02-08 15:25     ` Chao Yu
2017-02-08 15:25       ` Chao Yu
2017-02-09  1:28       ` Jaegeuk Kim
2017-02-09  1:28         ` Jaegeuk Kim
2017-02-11  6:17         ` Chao Yu
2017-02-11  6:17           ` Chao Yu
2017-02-14  0:25           ` Jaegeuk Kim
2017-02-15  2:25             ` Chao Yu
2017-02-15  2:25               ` Chao Yu

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.