Linux-f2fs-devel Archive on lore.kernel.org
 help / color / Atom feed
* [f2fs-dev] [PATCH RFC 1/5] f2fs: introduce inmem curseg
@ 2020-06-30 10:04 Chao Yu
  2020-06-30 10:04 ` [f2fs-dev] [PATCH RFC 2/5] f2fs: record average update time of segment Chao Yu
                   ` (4 more replies)
  0 siblings, 5 replies; 19+ messages in thread
From: Chao Yu @ 2020-06-30 10:04 UTC (permalink / raw)
  To: jaegeuk; +Cc: linux-kernel, linux-f2fs-devel

Previous implementation of aligned pinfile allocation will:
- allocate new segment on cold data log no matter whether last used
segment is partially used or not, it makes IOs more random;
- force concurrent cold data/GCed IO going into warm data area, it
can make a bad effect on hot/cold data separation;

In this patch, we introduce a new type of log named 'inmem curseg',
the differents from normal curseg is:
- it reuses existed segment type (CURSEG_XXX_NODE/DATA);
- it only exists in memory, its segno, blkofs, summary will not b
 persisted into checkpoint area;

With this new feature, we can enhance scalability of log, special
allocators can be created for purposes:
- pure lfs allocator for aligned pinfile allocation or file
defragmentation
- pure ssr allocator for later feature

So that, let's update aligned pinfile allocation to use this new
inmem curseg fwk.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
---
 fs/f2fs/checkpoint.c |   7 ++-
 fs/f2fs/debug.c      |   6 ++-
 fs/f2fs/f2fs.h       |  12 +++--
 fs/f2fs/file.c       |   3 +-
 fs/f2fs/gc.c         |   2 +-
 fs/f2fs/segment.c    | 107 ++++++++++++++++++++++++++++++-------------
 fs/f2fs/segment.h    |  17 ++++---
 fs/f2fs/super.c      |   9 ++--
 8 files changed, 112 insertions(+), 51 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 1bb8278a1c4a..644a914af25a 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -1623,11 +1623,16 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 
 	f2fs_flush_sit_entries(sbi, cpc);
 
+	/* save inmem log status */
+	f2fs_save_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
+
 	err = do_checkpoint(sbi, cpc);
 	if (err)
 		f2fs_release_discard_addrs(sbi);
 	else
 		f2fs_clear_prefree_segments(sbi, cpc);
+
+	f2fs_restore_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
 stop:
 	unblock_operations(sbi);
 	stat_inc_cp_count(sbi->stat_info);
@@ -1658,7 +1663,7 @@ void f2fs_init_ino_entry_info(struct f2fs_sb_info *sbi)
 	}
 
 	sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS -
-			NR_CURSEG_TYPE - __cp_payload(sbi)) *
+			NR_CURSEG_PERSIST_TYPE - __cp_payload(sbi)) *
 				F2FS_ORPHANS_PER_BLOCK;
 }
 
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 4276c0f79beb..41a91aa8c262 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -164,7 +164,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
 		* 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg)
 		/ 2;
 	si->util_invalid = 50 - si->util_free - si->util_valid;
-	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_NODE; i++) {
+	for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) {
 		struct curseg_info *curseg = CURSEG_I(sbi, i);
 		si->curseg[i] = curseg->segno;
 		si->cursec[i] = GET_SEC_FROM_SEG(sbi, curseg->segno);
@@ -393,6 +393,10 @@ static int stat_show(struct seq_file *s, void *v)
 			   si->dirty_seg[CURSEG_COLD_NODE],
 			   si->full_seg[CURSEG_COLD_NODE],
 			   si->valid_blks[CURSEG_COLD_NODE]);
+		seq_printf(s, "  - Pinned file: %8d %8d %8d\n",
+			   si->curseg[CURSEG_COLD_DATA_PINNED],
+			   si->cursec[CURSEG_COLD_DATA_PINNED],
+			   si->curzone[CURSEG_COLD_DATA_PINNED]);
 		seq_printf(s, "\n  - Valid: %d\n  - Dirty: %d\n",
 			   si->main_area_segs - si->dirty_count -
 			   si->prefree_count - si->free_segs,
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 7d6c5f8ce16b..f06c77066284 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -996,7 +996,9 @@ static inline void set_new_dnode(struct dnode_of_data *dn, struct inode *inode,
  */
 #define	NR_CURSEG_DATA_TYPE	(3)
 #define NR_CURSEG_NODE_TYPE	(3)
-#define NR_CURSEG_TYPE	(NR_CURSEG_DATA_TYPE + NR_CURSEG_NODE_TYPE)
+#define NR_CURSEG_INMEM_TYPE	(1)
+#define NR_CURSEG_PERSIST_TYPE	(NR_CURSEG_DATA_TYPE + NR_CURSEG_NODE_TYPE)
+#define NR_CURSEG_TYPE		(NR_CURSEG_INMEM_TYPE + NR_CURSEG_PERSIST_TYPE)
 
 enum {
 	CURSEG_HOT_DATA	= 0,	/* directory entry blocks */
@@ -1005,8 +1007,10 @@ enum {
 	CURSEG_HOT_NODE,	/* direct node blocks of directory files */
 	CURSEG_WARM_NODE,	/* direct node blocks of normal files */
 	CURSEG_COLD_NODE,	/* indirect node blocks */
-	NO_CHECK_TYPE,
-	CURSEG_COLD_DATA_PINNED,/* cold data for pinned file */
+	NR_PERSISTENT_LOG,	/* number of persistent log */
+	CURSEG_COLD_DATA_PINNED = NR_PERSISTENT_LOG,
+				/* pinned file that needs consecutive block address */
+	NO_CHECK_TYPE,		/* number of persistent & inmem log */
 };
 
 struct flush_cmd {
@@ -3359,6 +3363,8 @@ block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi);
 int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable);
 void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi);
 int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra);
+void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi, int type);
+void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi, int type);
 void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
 					unsigned int start, unsigned int end);
 void f2fs_allocate_new_segment(struct f2fs_sb_info *sbi, int type);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index c10e82806c2a..8611ade06018 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1656,13 +1656,14 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
 		}
 
 		down_write(&sbi->pin_sem);
-		map.m_seg_type = CURSEG_COLD_DATA_PINNED;
 
 		f2fs_lock_op(sbi);
 		f2fs_allocate_new_segment(sbi, CURSEG_COLD_DATA);
 		f2fs_unlock_op(sbi);
 
+		map.m_seg_type = CURSEG_COLD_DATA_PINNED;
 		err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_DIO);
+
 		up_write(&sbi->pin_sem);
 
 		done += map.m_len;
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 3b718da69910..84807abe4e00 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -1448,7 +1448,7 @@ static int free_segment_range(struct f2fs_sb_info *sbi,
 	mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
 
 	/* Move out cursegs from the target range */
-	for (type = CURSEG_HOT_DATA; type < NR_CURSEG_TYPE; type++)
+	for (type = CURSEG_HOT_DATA; type < NR_CURSEG_PERSIST_TYPE; type++)
 		f2fs_allocate_segment_for_resize(sbi, type, start, end);
 
 	/* do GC to move out valid blocks in the range */
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 5924b3965ae4..863ec6f1fb87 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1958,7 +1958,7 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
 
 	mutex_lock(&dirty_i->seglist_lock);
 	for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi))
-		__set_test_and_free(sbi, segno);
+		__set_test_and_free(sbi, segno, false);
 	mutex_unlock(&dirty_i->seglist_lock);
 }
 
@@ -2496,6 +2496,7 @@ static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
 	struct curseg_info *curseg = CURSEG_I(sbi, type);
 	struct summary_footer *sum_footer;
 
+	curseg->inited = true;
 	curseg->segno = curseg->next_segno;
 	curseg->zone = GET_ZONE_FROM_SEG(sbi, curseg->segno);
 	curseg->next_blkoff = 0;
@@ -2503,24 +2504,31 @@ static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
 
 	sum_footer = &(curseg->sum_blk->footer);
 	memset(sum_footer, 0, sizeof(struct summary_footer));
-	if (IS_DATASEG(type))
+	if (IS_DATASEG(curseg->seg_type))
 		SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA);
-	if (IS_NODESEG(type))
+	if (IS_NODESEG(curseg->seg_type))
 		SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE);
-	__set_sit_entry_type(sbi, type, curseg->segno, modified);
+	__set_sit_entry_type(sbi, curseg->seg_type, curseg->segno, modified);
 }
 
 static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
 {
+	struct curseg_info *curseg = CURSEG_I(sbi, type);
+
 	/* if segs_per_sec is large than 1, we need to keep original policy. */
 	if (__is_large_section(sbi))
-		return CURSEG_I(sbi, type)->segno;
+		return curseg->segno;
+
+	/* inmem log may not locate on any segment after mount */
+	if (!curseg->inited)
+		return 0;
 
 	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
 		return 0;
 
 	if (test_opt(sbi, NOHEAP) &&
-		(type == CURSEG_HOT_DATA || IS_NODESEG(type)))
+		(curseg->seg_type == CURSEG_HOT_DATA ||
+		IS_NODESEG(curseg->seg_type)))
 		return 0;
 
 	if (SIT_I(sbi)->last_victim[ALLOC_NEXT])
@@ -2530,7 +2538,7 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
 	if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE)
 		return 0;
 
-	return CURSEG_I(sbi, type)->segno;
+	return curseg->segno;
 }
 
 /*
@@ -2540,12 +2548,14 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
 static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
 {
 	struct curseg_info *curseg = CURSEG_I(sbi, type);
+	unsigned short seg_type = curseg->seg_type;
 	unsigned int segno = curseg->segno;
 	int dir = ALLOC_LEFT;
 
-	write_sum_page(sbi, curseg->sum_blk,
+	if (curseg->inited)
+		write_sum_page(sbi, curseg->sum_blk,
 				GET_SUM_BLOCK(sbi, segno));
-	if (type == CURSEG_WARM_DATA || type == CURSEG_COLD_DATA)
+	if (seg_type == CURSEG_WARM_DATA || seg_type == CURSEG_COLD_DATA)
 		dir = ALLOC_RIGHT;
 
 	if (test_opt(sbi, NOHEAP))
@@ -2622,6 +2632,43 @@ static void change_curseg(struct f2fs_sb_info *sbi, int type)
 	f2fs_put_page(sum_page, 1);
 }
 
+void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi, int type)
+{
+	struct curseg_info *curseg = CURSEG_I(sbi, type);
+
+	mutex_lock(&curseg->curseg_mutex);
+	if (!curseg->inited)
+		goto out;
+
+	if (get_valid_blocks(sbi, curseg->segno, false)) {
+		write_sum_page(sbi, curseg->sum_blk,
+				GET_SUM_BLOCK(sbi, curseg->segno));
+	} else {
+		mutex_lock(&DIRTY_I(sbi)->seglist_lock);
+		__set_test_and_free(sbi, curseg->segno, true);
+		mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
+	}
+out:
+	mutex_unlock(&curseg->curseg_mutex);
+}
+
+void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi, int type)
+{
+	struct curseg_info *curseg = CURSEG_I(sbi, type);
+
+	mutex_lock(&curseg->curseg_mutex);
+	if (!curseg->inited)
+		goto out;
+	if (get_valid_blocks(sbi, curseg->segno, false))
+		goto out;
+
+	mutex_lock(&DIRTY_I(sbi)->seglist_lock);
+	__set_test_and_inuse(sbi, curseg->segno);
+	mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
+out:
+	mutex_unlock(&curseg->curseg_mutex);
+}
+
 static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
 {
 	struct curseg_info *curseg = CURSEG_I(sbi, type);
@@ -2738,11 +2785,15 @@ void __allocate_new_segment(struct f2fs_sb_info *sbi, int type)
 	struct curseg_info *curseg = CURSEG_I(sbi, type);
 	unsigned int old_segno;
 
+	if (!curseg->inited)
+		goto alloc;
+
 	if (!curseg->next_blkoff &&
 		!get_valid_blocks(sbi, curseg->segno, false) &&
 		!get_ckpt_valid_blocks(sbi, curseg->segno))
 		return;
 
+alloc:
 	old_segno = curseg->segno;
 	SIT_I(sbi)->s_ops->allocate_segment(sbi, type, true);
 	locate_dirty_segment(sbi, old_segno);
@@ -3126,19 +3177,6 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
 {
 	struct sit_info *sit_i = SIT_I(sbi);
 	struct curseg_info *curseg = CURSEG_I(sbi, type);
-	bool put_pin_sem = false;
-
-	if (type == CURSEG_COLD_DATA) {
-		/* GC during CURSEG_COLD_DATA_PINNED allocation */
-		if (down_read_trylock(&sbi->pin_sem)) {
-			put_pin_sem = true;
-		} else {
-			type = CURSEG_WARM_DATA;
-			curseg = CURSEG_I(sbi, type);
-		}
-	} else if (type == CURSEG_COLD_DATA_PINNED) {
-		type = CURSEG_COLD_DATA;
-	}
 
 	down_read(&SM_I(sbi)->curseg_lock);
 
@@ -3204,9 +3242,6 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
 	mutex_unlock(&curseg->curseg_mutex);
 
 	up_read(&SM_I(sbi)->curseg_lock);
-
-	if (put_pin_sem)
-		up_read(&sbi->pin_sem);
 }
 
 static void update_device_state(struct f2fs_io_info *fio)
@@ -3574,7 +3609,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
 		blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type -
 							CURSEG_HOT_DATA]);
 		if (__exist_node_summaries(sbi))
-			blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type);
+			blk_addr = sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type);
 		else
 			blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type);
 	} else {
@@ -3652,8 +3687,9 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
 	}
 
 	if (__exist_node_summaries(sbi))
-		f2fs_ra_meta_pages(sbi, sum_blk_addr(sbi, NR_CURSEG_TYPE, type),
-					NR_CURSEG_TYPE - type, META_CP, true);
+		f2fs_ra_meta_pages(sbi,
+				sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type),
+				NR_CURSEG_PERSIST_TYPE - type, META_CP, true);
 
 	for (; type <= CURSEG_COLD_NODE; type++) {
 		err = read_normal_summaries(sbi, type);
@@ -4155,14 +4191,14 @@ static int build_curseg(struct f2fs_sb_info *sbi)
 	struct curseg_info *array;
 	int i;
 
-	array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE, sizeof(*array)),
-			     GFP_KERNEL);
+	array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE,
+					sizeof(*array)), GFP_KERNEL);
 	if (!array)
 		return -ENOMEM;
 
 	SM_I(sbi)->curseg_array = array;
 
-	for (i = 0; i < NR_CURSEG_TYPE; i++) {
+	for (i = 0; i < NO_CHECK_TYPE; i++) {
 		mutex_init(&array[i].curseg_mutex);
 		array[i].sum_blk = f2fs_kzalloc(sbi, PAGE_SIZE, GFP_KERNEL);
 		if (!array[i].sum_blk)
@@ -4172,8 +4208,13 @@ static int build_curseg(struct f2fs_sb_info *sbi)
 				sizeof(struct f2fs_journal), GFP_KERNEL);
 		if (!array[i].journal)
 			return -ENOMEM;
+		if (i < NR_PERSISTENT_LOG)
+			array[i].seg_type = CURSEG_HOT_DATA + i;
+		else if (i == CURSEG_COLD_DATA_PINNED)
+			array[i].seg_type = CURSEG_COLD_DATA;
 		array[i].segno = NULL_SEGNO;
 		array[i].next_blkoff = 0;
+		array[i].inited = false;
 	}
 	return restore_curseg_summaries(sbi);
 }
@@ -4408,7 +4449,7 @@ static int sanity_check_curseg(struct f2fs_sb_info *sbi)
 	 * In LFS/SSR curseg, .next_blkoff should point to an unused blkaddr;
 	 * In LFS curseg, all blkaddr after .next_blkoff should be unused.
 	 */
-	for (i = 0; i < NO_CHECK_TYPE; i++) {
+	for (i = 0; i < NR_PERSISTENT_LOG; i++) {
 		struct curseg_info *curseg = CURSEG_I(sbi, i);
 		struct seg_entry *se = get_seg_entry(sbi, curseg->segno);
 		unsigned int blkofs = curseg->next_blkoff;
@@ -4637,7 +4678,7 @@ int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi)
 {
 	int i, ret;
 
-	for (i = 0; i < NO_CHECK_TYPE; i++) {
+	for (i = 0; i < NR_PERSISTENT_LOG; i++) {
 		ret = fix_curseg_write_pointer(sbi, i);
 		if (ret)
 			return ret;
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index f261e3e6a69b..8ff261550cbb 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -22,7 +22,7 @@
 #define GET_R2L_SEGNO(free_i, segno)	((segno) + (free_i)->start_segno)
 
 #define IS_DATASEG(t)	((t) <= CURSEG_COLD_DATA)
-#define IS_NODESEG(t)	((t) >= CURSEG_HOT_NODE)
+#define IS_NODESEG(t)	((t) >= CURSEG_HOT_NODE && (t) <= CURSEG_COLD_NODE)
 
 #define IS_HOT(t)	((t) == CURSEG_HOT_NODE || (t) == CURSEG_HOT_DATA)
 #define IS_WARM(t)	((t) == CURSEG_WARM_NODE || (t) == CURSEG_WARM_DATA)
@@ -34,7 +34,8 @@
 	 ((seg) == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno) ||	\
 	 ((seg) == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno) ||	\
 	 ((seg) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno) ||	\
-	 ((seg) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno))
+	 ((seg) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno) ||	\
+	 ((seg) == CURSEG_I(sbi, CURSEG_COLD_DATA_PINNED)->segno))
 
 #define IS_CURSEC(sbi, secno)						\
 	(((secno) == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno /		\
@@ -48,7 +49,9 @@
 	 ((secno) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno /		\
 	  (sbi)->segs_per_sec) ||	\
 	 ((secno) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno /		\
-	  (sbi)->segs_per_sec))	\
+	  (sbi)->segs_per_sec) ||	\
+	 ((secno) == CURSEG_I(sbi, CURSEG_COLD_DATA_PINNED)->segno /	\
+	  (sbi)->segs_per_sec))
 
 #define MAIN_BLKADDR(sbi)						\
 	(SM_I(sbi) ? SM_I(sbi)->main_blkaddr : 				\
@@ -288,10 +291,12 @@ struct curseg_info {
 	struct rw_semaphore journal_rwsem;	/* protect journal area */
 	struct f2fs_journal *journal;		/* cached journal info */
 	unsigned char alloc_type;		/* current allocation type */
+	unsigned short seg_type;		/* segment type like CURSEG_XXX_TYPE */
 	unsigned int segno;			/* current segment number */
 	unsigned short next_blkoff;		/* next block offset to write */
 	unsigned int zone;			/* current zone number */
 	unsigned int next_segno;		/* preallocated segment */
+	bool inited;				/* indicate inmem log is inited */
 };
 
 struct sit_entry_set {
@@ -305,8 +310,6 @@ struct sit_entry_set {
  */
 static inline struct curseg_info *CURSEG_I(struct f2fs_sb_info *sbi, int type)
 {
-	if (type == CURSEG_COLD_DATA_PINNED)
-		type = CURSEG_COLD_DATA;
 	return (struct curseg_info *)(SM_I(sbi)->curseg_array + type);
 }
 
@@ -438,7 +441,7 @@ static inline void __set_inuse(struct f2fs_sb_info *sbi,
 }
 
 static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
-		unsigned int segno)
+		unsigned int segno, bool inmem)
 {
 	struct free_segmap_info *free_i = FREE_I(sbi);
 	unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
@@ -449,7 +452,7 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
 	if (test_and_clear_bit(segno, free_i->free_segmap)) {
 		free_i->free_segments++;
 
-		if (IS_CURSEC(sbi, secno))
+		if (!inmem && IS_CURSEC(sbi, secno))
 			goto skip_free;
 		next = find_next_bit(free_i->free_segmap,
 				start_segno + sbi->segs_per_sec, start_segno);
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 80cb7cd358f8..0fefa130585f 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -575,7 +575,8 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
 		case Opt_active_logs:
 			if (args->from && match_int(args, &arg))
 				return -EINVAL;
-			if (arg != 2 && arg != 4 && arg != NR_CURSEG_TYPE)
+			if (arg != 2 && arg != 4 &&
+				arg != NR_CURSEG_PERSIST_TYPE)
 				return -EINVAL;
 			F2FS_OPTION(sbi).active_logs = arg;
 			break;
@@ -981,7 +982,7 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
 	}
 
 	/* Not pass down write hints if the number of active logs is lesser
-	 * than NR_CURSEG_TYPE.
+	 * than NR_CURSEG_PERSIST_TYPE.
 	 */
 	if (F2FS_OPTION(sbi).active_logs != NR_CURSEG_TYPE)
 		F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF;
@@ -1614,7 +1615,7 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
 static void default_options(struct f2fs_sb_info *sbi)
 {
 	/* init some FS parameters */
-	F2FS_OPTION(sbi).active_logs = NR_CURSEG_TYPE;
+	F2FS_OPTION(sbi).active_logs = NR_CURSEG_PERSIST_TYPE;
 	F2FS_OPTION(sbi).inline_xattr_size = DEFAULT_INLINE_XATTR_ADDRS;
 	F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF;
 	F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_DEFAULT;
@@ -2946,7 +2947,7 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
 	cp_payload = __cp_payload(sbi);
 	if (cp_pack_start_sum < cp_payload + 1 ||
 		cp_pack_start_sum > blocks_per_seg - 1 -
-			NR_CURSEG_TYPE) {
+			NR_CURSEG_PERSIST_TYPE) {
 		f2fs_err(sbi, "Wrong cp_pack_start_sum: %u",
 			 cp_pack_start_sum);
 		return 1;
-- 
2.26.2



_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [f2fs-dev] [PATCH RFC 2/5] f2fs: record average update time of segment
  2020-06-30 10:04 [f2fs-dev] [PATCH RFC 1/5] f2fs: introduce inmem curseg Chao Yu
@ 2020-06-30 10:04 ` Chao Yu
  2020-07-01 16:19   ` Jaegeuk Kim
  2020-06-30 10:04 ` [f2fs-dev] [PATCH RFC 3/5] f2fs: inherit mtime of original block during GC Chao Yu
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 19+ messages in thread
From: Chao Yu @ 2020-06-30 10:04 UTC (permalink / raw)
  To: jaegeuk; +Cc: linux-kernel, linux-f2fs-devel

Previously, once we update one block in segment, we will update mtime of
segment to last time, making aged segment becoming freshest, result in
that GC with cost benefit algorithm missing such segment, So this patch
changes to record mtime as average block updating time instead of last
updating time.

It's not needed to reset mtime for prefree segment, as se->valid_blocks
is zero, then old se->mtime won't take any weight with below calculation:

	se->mtime = (se->mtime * se->valid_blocks + mtime) /
				(se->valid_blocks + 1);

Signed-off-by: Chao Yu <yuchao0@huawei.com>
---
 fs/f2fs/segment.c | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 863ec6f1fb87..906c313835ad 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -2150,6 +2150,22 @@ static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type,
 		__mark_sit_entry_dirty(sbi, segno);
 }
 
+static void update_segment_mtime(struct f2fs_sb_info *sbi, block_t blkaddr)
+{
+	unsigned int segno = GET_SEGNO(sbi, blkaddr);
+	struct seg_entry *se = get_seg_entry(sbi, segno);
+	unsigned long long mtime = get_mtime(sbi, false);
+
+	if (!se->mtime) {
+		se->mtime = mtime;
+	} else {
+		se->mtime = (se->mtime * se->valid_blocks + mtime) /
+						(se->valid_blocks + 1);
+	}
+	if (mtime > SIT_I(sbi)->max_mtime)
+		SIT_I(sbi)->max_mtime = mtime;
+}
+
 static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
 {
 	struct seg_entry *se;
@@ -2169,10 +2185,9 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
 	f2fs_bug_on(sbi, (new_vblocks >> (sizeof(unsigned short) << 3) ||
 				(new_vblocks > sbi->blocks_per_seg)));
 
+	update_segment_mtime(sbi, blkaddr);
+
 	se->valid_blocks = new_vblocks;
-	se->mtime = get_mtime(sbi, false);
-	if (se->mtime > SIT_I(sbi)->max_mtime)
-		SIT_I(sbi)->max_mtime = se->mtime;
 
 	/* Update valid block bitmap */
 	if (del > 0) {
-- 
2.26.2



_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [f2fs-dev] [PATCH RFC 3/5] f2fs: inherit mtime of original block during GC
  2020-06-30 10:04 [f2fs-dev] [PATCH RFC 1/5] f2fs: introduce inmem curseg Chao Yu
  2020-06-30 10:04 ` [f2fs-dev] [PATCH RFC 2/5] f2fs: record average update time of segment Chao Yu
@ 2020-06-30 10:04 ` Chao Yu
  2020-06-30 10:04 ` [f2fs-dev] [PATCH RFC 4/5] f2fs: support 64-bits key in f2fs rb-tree node entry Chao Yu
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 19+ messages in thread
From: Chao Yu @ 2020-06-30 10:04 UTC (permalink / raw)
  To: jaegeuk; +Cc: linux-kernel, linux-f2fs-devel

Don't let f2fs inner GC ruins original aging degree of segment.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
---
 fs/f2fs/data.c    |  2 +-
 fs/f2fs/f2fs.h    |  5 +++--
 fs/f2fs/gc.c      |  4 ++--
 fs/f2fs/segment.c | 55 ++++++++++++++++++++++++++++++++++++-----------
 4 files changed, 49 insertions(+), 17 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index d742f38b1445..062d585fa080 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1366,7 +1366,7 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
 	set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
 	old_blkaddr = dn->data_blkaddr;
 	f2fs_allocate_data_block(sbi, NULL, old_blkaddr, &dn->data_blkaddr,
-					&sum, seg_type, NULL);
+				&sum, seg_type, NULL, false);
 	if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
 		invalidate_mapping_pages(META_MAPPING(sbi),
 					old_blkaddr, old_blkaddr);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index f06c77066284..a90702a725b6 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -3383,7 +3383,8 @@ void f2fs_outplace_write_data(struct dnode_of_data *dn,
 int f2fs_inplace_write_data(struct f2fs_io_info *fio);
 void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
 			block_t old_blkaddr, block_t new_blkaddr,
-			bool recover_curseg, bool recover_newaddr);
+			bool recover_curseg, bool recover_newaddr,
+			bool from_gc);
 void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
 			block_t old_addr, block_t new_addr,
 			unsigned char version, bool recover_curseg,
@@ -3391,7 +3392,7 @@ void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
 void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
 			block_t old_blkaddr, block_t *new_blkaddr,
 			struct f2fs_summary *sum, int type,
-			struct f2fs_io_info *fio);
+			struct f2fs_io_info *fio, bool from_gc);
 void f2fs_wait_on_page_writeback(struct page *page,
 			enum page_type type, bool ordered, bool locked);
 void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr);
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 84807abe4e00..e720886ef9bf 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -875,7 +875,7 @@ static int move_data_block(struct inode *inode, block_t bidx,
 	}
 
 	f2fs_allocate_data_block(fio.sbi, NULL, fio.old_blkaddr, &newaddr,
-					&sum, CURSEG_COLD_DATA, NULL);
+				&sum, CURSEG_COLD_DATA, NULL, true);
 
 	fio.encrypted_page = f2fs_pagecache_get_page(META_MAPPING(fio.sbi),
 				newaddr, FGP_LOCK | FGP_CREAT, GFP_NOFS);
@@ -925,7 +925,7 @@ static int move_data_block(struct inode *inode, block_t bidx,
 recover_block:
 	if (err)
 		f2fs_do_replace_block(fio.sbi, &sum, newaddr, fio.old_blkaddr,
-								true, true);
+							true, true, true);
 up_out:
 	if (lfs_mode)
 		up_write(&fio.sbi->io_order_lock);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 906c313835ad..0fde06b904c5 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -2149,12 +2149,28 @@ static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type,
 	if (modified)
 		__mark_sit_entry_dirty(sbi, segno);
 }
+static inline unsigned long long get_segment_mtime(struct f2fs_sb_info *sbi,
+								block_t blkaddr)
+{
+	unsigned int segno = GET_SEGNO(sbi, blkaddr);
+
+	if (segno == NULL_SEGNO)
+		return 0;
+	return get_seg_entry(sbi, segno)->mtime;
+}
 
-static void update_segment_mtime(struct f2fs_sb_info *sbi, block_t blkaddr)
+static void update_segment_mtime(struct f2fs_sb_info *sbi, block_t blkaddr,
+						unsigned long long old_mtime)
 {
+	struct seg_entry *se;
 	unsigned int segno = GET_SEGNO(sbi, blkaddr);
-	struct seg_entry *se = get_seg_entry(sbi, segno);
-	unsigned long long mtime = get_mtime(sbi, false);
+	unsigned long long ctime = get_mtime(sbi, false);
+	unsigned long long mtime = old_mtime ? old_mtime : ctime;
+
+	if (segno == NULL_SEGNO)
+		return;
+
+	se = get_seg_entry(sbi, segno);
 
 	if (!se->mtime) {
 		se->mtime = mtime;
@@ -2162,8 +2178,8 @@ static void update_segment_mtime(struct f2fs_sb_info *sbi, block_t blkaddr)
 		se->mtime = (se->mtime * se->valid_blocks + mtime) /
 						(se->valid_blocks + 1);
 	}
-	if (mtime > SIT_I(sbi)->max_mtime)
-		SIT_I(sbi)->max_mtime = mtime;
+	if (ctime > SIT_I(sbi)->max_mtime)
+		SIT_I(sbi)->max_mtime = ctime;
 }
 
 static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
@@ -2185,8 +2201,6 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
 	f2fs_bug_on(sbi, (new_vblocks >> (sizeof(unsigned short) << 3) ||
 				(new_vblocks > sbi->blocks_per_seg)));
 
-	update_segment_mtime(sbi, blkaddr);
-
 	se->valid_blocks = new_vblocks;
 
 	/* Update valid block bitmap */
@@ -2280,6 +2294,7 @@ void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
 	/* add it into sit main buffer */
 	down_write(&sit_i->sentry_lock);
 
+	update_segment_mtime(sbi, addr, 0);
 	update_sit_entry(sbi, addr, -1);
 
 	/* add it into dirty seglist */
@@ -3188,10 +3203,11 @@ static int __get_segment_type(struct f2fs_io_info *fio)
 void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
 		block_t old_blkaddr, block_t *new_blkaddr,
 		struct f2fs_summary *sum, int type,
-		struct f2fs_io_info *fio)
+		struct f2fs_io_info *fio, bool from_gc)
 {
 	struct sit_info *sit_i = SIT_I(sbi);
 	struct curseg_info *curseg = CURSEG_I(sbi, type);
+	unsigned long long old_mtime;
 
 	down_read(&SM_I(sbi)->curseg_lock);
 
@@ -3213,6 +3229,14 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
 
 	stat_inc_block_count(sbi, curseg);
 
+	if (from_gc) {
+		old_mtime = get_segment_mtime(sbi, old_blkaddr);
+	} else {
+		update_segment_mtime(sbi, old_blkaddr, 0);
+		old_mtime = 0;
+	}
+	update_segment_mtime(sbi, *new_blkaddr, old_mtime);
+
 	/*
 	 * SIT information should be updated before segment allocation,
 	 * since SSR needs latest valid block information.
@@ -3289,7 +3313,8 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
 		down_read(&fio->sbi->io_order_lock);
 reallocate:
 	f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr,
-			&fio->new_blkaddr, sum, type, fio);
+			&fio->new_blkaddr, sum, type, fio,
+			is_cold_data(fio->page));
 	if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO)
 		invalidate_mapping_pages(META_MAPPING(fio->sbi),
 					fio->old_blkaddr, fio->old_blkaddr);
@@ -3405,7 +3430,8 @@ static inline int __f2fs_get_curseg(struct f2fs_sb_info *sbi,
 
 void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
 				block_t old_blkaddr, block_t new_blkaddr,
-				bool recover_curseg, bool recover_newaddr)
+				bool recover_curseg, bool recover_newaddr,
+				bool from_gc)
 {
 	struct sit_info *sit_i = SIT_I(sbi);
 	struct curseg_info *curseg;
@@ -3456,11 +3482,16 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
 	curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
 	__add_sum_entry(sbi, type, sum);
 
-	if (!recover_curseg || recover_newaddr)
+	if (!recover_curseg || recover_newaddr) {
+		if (!from_gc)
+			update_segment_mtime(sbi, new_blkaddr, 0);
 		update_sit_entry(sbi, new_blkaddr, 1);
+	}
 	if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) {
 		invalidate_mapping_pages(META_MAPPING(sbi),
 					old_blkaddr, old_blkaddr);
+		if (!from_gc)
+			update_segment_mtime(sbi, old_blkaddr, 0);
 		update_sit_entry(sbi, old_blkaddr, -1);
 	}
 
@@ -3492,7 +3523,7 @@ void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
 	set_summary(&sum, dn->nid, dn->ofs_in_node, version);
 
 	f2fs_do_replace_block(sbi, &sum, old_addr, new_addr,
-					recover_curseg, recover_newaddr);
+					recover_curseg, recover_newaddr, false);
 
 	f2fs_update_data_blkaddr(dn, new_addr);
 }
-- 
2.26.2



_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [f2fs-dev] [PATCH RFC 4/5] f2fs: support 64-bits key in f2fs rb-tree node entry
  2020-06-30 10:04 [f2fs-dev] [PATCH RFC 1/5] f2fs: introduce inmem curseg Chao Yu
  2020-06-30 10:04 ` [f2fs-dev] [PATCH RFC 2/5] f2fs: record average update time of segment Chao Yu
  2020-06-30 10:04 ` [f2fs-dev] [PATCH RFC 3/5] f2fs: inherit mtime of original block during GC Chao Yu
@ 2020-06-30 10:04 ` Chao Yu
  2020-06-30 10:04 ` [f2fs-dev] [PATCH RFC 5/5] f2fs: support age threshold based garbage collection Chao Yu
  2020-07-07  3:21 ` [f2fs-dev] [PATCH RFC 1/5] f2fs: introduce inmem curseg Jaegeuk Kim
  4 siblings, 0 replies; 19+ messages in thread
From: Chao Yu @ 2020-06-30 10:04 UTC (permalink / raw)
  To: jaegeuk; +Cc: linux-kernel, linux-f2fs-devel

then, we can add specified entry into rb-tree with 64-bits segment time
as key.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
---
 fs/f2fs/extent_cache.c | 37 +++++++++++++++++++++++++++++++++++--
 fs/f2fs/f2fs.h         | 15 ++++++++++++---
 fs/f2fs/segment.c      |  4 ++--
 3 files changed, 49 insertions(+), 7 deletions(-)

diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index 686c68b98610..3ebf976a682d 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -58,6 +58,29 @@ struct rb_entry *f2fs_lookup_rb_tree(struct rb_root_cached *root,
 	return re;
 }
 
+struct rb_node **f2fs_lookup_rb_tree_ext(struct f2fs_sb_info *sbi,
+					struct rb_root_cached *root,
+					struct rb_node **parent,
+					unsigned long long key, bool *leftmost)
+{
+	struct rb_node **p = &root->rb_root.rb_node;
+	struct rb_entry *re;
+
+	while (*p) {
+		*parent = *p;
+		re = rb_entry(*parent, struct rb_entry, rb_node);
+
+		if (key < re->key) {
+			p = &(*p)->rb_left;
+		} else {
+			p = &(*p)->rb_right;
+			*leftmost = false;
+		}
+	}
+
+	return p;
+}
+
 struct rb_node **f2fs_lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi,
 				struct rb_root_cached *root,
 				struct rb_node **parent,
@@ -166,7 +189,7 @@ struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root_cached *root,
 }
 
 bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi,
-						struct rb_root_cached *root)
+				struct rb_root_cached *root, bool check_key)
 {
 #ifdef CONFIG_F2FS_CHECK_FS
 	struct rb_node *cur = rb_first_cached(root), *next;
@@ -183,13 +206,23 @@ bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi,
 		cur_re = rb_entry(cur, struct rb_entry, rb_node);
 		next_re = rb_entry(next, struct rb_entry, rb_node);
 
+		if (check_key) {
+			if (cur_re->key > next_re->key) {
+				f2fs_info(sbi, "inconsistent rbtree, "
+					"cur(%llu) next(%llu)",
+					cur_re->key, next_re->key);
+				return false;
+			}
+			goto next;
+		}
+
 		if (cur_re->ofs + cur_re->len > next_re->ofs) {
 			f2fs_info(sbi, "inconsistent rbtree, cur(%u, %u) next(%u, %u)",
 				  cur_re->ofs, cur_re->len,
 				  next_re->ofs, next_re->len);
 			return false;
 		}
-
+next:
 		cur = next;
 	}
 #endif
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index a90702a725b6..8b60c0644123 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -635,8 +635,13 @@ enum {
 
 struct rb_entry {
 	struct rb_node rb_node;		/* rb node located in rb-tree */
-	unsigned int ofs;		/* start offset of the entry */
-	unsigned int len;		/* length of the entry */
+	union {
+		struct {
+			unsigned int ofs;	/* start offset of the entry */
+			unsigned int len;	/* length of the entry */
+		};
+		unsigned long long key;		/* 64-bits key */
+	};
 };
 
 struct extent_info {
@@ -3828,6 +3833,10 @@ void f2fs_leave_shrinker(struct f2fs_sb_info *sbi);
  */
 struct rb_entry *f2fs_lookup_rb_tree(struct rb_root_cached *root,
 				struct rb_entry *cached_re, unsigned int ofs);
+struct rb_node **f2fs_lookup_rb_tree_ext(struct f2fs_sb_info *sbi,
+				struct rb_root_cached *root,
+				struct rb_node **parent,
+				unsigned long long key, bool *left_most);
 struct rb_node **f2fs_lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi,
 				struct rb_root_cached *root,
 				struct rb_node **parent,
@@ -3838,7 +3847,7 @@ struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root_cached *root,
 		struct rb_node ***insert_p, struct rb_node **insert_parent,
 		bool force, bool *leftmost);
 bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi,
-						struct rb_root_cached *root);
+				struct rb_root_cached *root, bool check_key);
 unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink);
 void f2fs_init_extent_tree(struct inode *inode, struct page *ipage);
 void f2fs_drop_extent_tree(struct inode *inode);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 0fde06b904c5..50670354fb9a 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1521,7 +1521,7 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
 			goto next;
 		if (unlikely(dcc->rbtree_check))
 			f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi,
-								&dcc->root));
+							&dcc->root, false));
 		blk_start_plug(&plug);
 		list_for_each_entry_safe(dc, tmp, pend_list, list) {
 			f2fs_bug_on(sbi, dc->state != D_PREP);
@@ -2887,7 +2887,7 @@ static unsigned int __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
 	mutex_lock(&dcc->cmd_lock);
 	if (unlikely(dcc->rbtree_check))
 		f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi,
-								&dcc->root));
+							&dcc->root, false));
 
 	dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
 					NULL, start,
-- 
2.26.2



_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [f2fs-dev] [PATCH RFC 5/5] f2fs: support age threshold based garbage collection
  2020-06-30 10:04 [f2fs-dev] [PATCH RFC 1/5] f2fs: introduce inmem curseg Chao Yu
                   ` (2 preceding siblings ...)
  2020-06-30 10:04 ` [f2fs-dev] [PATCH RFC 4/5] f2fs: support 64-bits key in f2fs rb-tree node entry Chao Yu
@ 2020-06-30 10:04 ` Chao Yu
  2020-07-06  8:25   ` Chao Yu
  2020-07-07  3:21 ` [f2fs-dev] [PATCH RFC 1/5] f2fs: introduce inmem curseg Jaegeuk Kim
  4 siblings, 1 reply; 19+ messages in thread
From: Chao Yu @ 2020-06-30 10:04 UTC (permalink / raw)
  To: jaegeuk; +Cc: linux-kernel, linux-f2fs-devel

There are several issues in current background GC algorithm:
- valid blocks is one of key factors during cost overhead calculation,
so if segment has less valid block, however even its age is young or
it locates hot segment, CB algorithm will still choose the segment as
victim, it's not appropriate.
- GCed data/node will go to existing logs, no matter in-there datas'
update frequency is the same or not, it may mix hot and cold data
again.
- GC alloctor mainly use LFS type segment, it will cost free segment
more quickly.

This patch introduces a new algorithm named age threshold based
garbage collection to solve above issues, there are three steps
mainly:

1. select a source victim:
- set an age threshold, and select candidates beased threshold:
e.g.
 0 means youngest, 100 means oldest, if we set age threshold to 80
 then select dirty segments which has age in range of [80, 100] as
 candiddates;
- set candidate_ratio threshold, and select candidates based the
ratio, so that we can shrink candidates to those oldest segments;
- select target segment with fewest valid blocks in order to
migrate blocks with minimum cost;

2. select a target victim:
- select candidates beased age threshold;
- set candidate_radius threshold, search candidates whose age is
around source victims, searching radius should less than the
radius threshold.
- select target segment with most valid blocks in order to avoid
migrating current target segment.

3. merge valid blocks from source victim into target victim with
SSR alloctor.

Test steps:
- create 160 dirty segments:
 * half of them have 128 valid blocks per segment
 * left of them have 384 valid blocks per segment
- run background GC

Benefit: GC count and block movement count both decrease obviously:

- Before:
  - Valid: 86
  - Dirty: 1
  - Prefree: 11
  - Free: 6001 (6001)

GC calls: 162 (BG: 220)
  - data segments : 160 (160)
  - node segments : 2 (2)
Try to move 41454 blocks (BG: 41454)
  - data blocks : 40960 (40960)
  - node blocks : 494 (494)

IPU: 0 blocks
SSR: 0 blocks in 0 segments
LFS: 41364 blocks in 81 segments

- After:

  - Valid: 87
  - Dirty: 0
  - Prefree: 4
  - Free: 6008 (6008)

GC calls: 75 (BG: 76)
  - data segments : 74 (74)
  - node segments : 1 (1)
Try to move 12813 blocks (BG: 12813)
  - data blocks : 12544 (12544)
  - node blocks : 269 (269)

IPU: 0 blocks
SSR: 12032 blocks in 77 segments
LFS: 855 blocks in 2 segments

Signed-off-by: Chao Yu <yuchao0@huawei.com>
---
 fs/f2fs/checkpoint.c        |   4 +-
 fs/f2fs/data.c              |   2 +-
 fs/f2fs/debug.c             |   4 +
 fs/f2fs/f2fs.h              |  28 ++-
 fs/f2fs/gc.c                | 371 +++++++++++++++++++++++++++++++++++-
 fs/f2fs/gc.h                |  25 +++
 fs/f2fs/segment.c           | 184 ++++++++++++++----
 fs/f2fs/segment.h           |  25 ++-
 fs/f2fs/super.c             |  10 +-
 include/trace/events/f2fs.h |   8 +-
 10 files changed, 602 insertions(+), 59 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 644a914af25a..470907ec7c3d 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -1624,7 +1624,7 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 	f2fs_flush_sit_entries(sbi, cpc);
 
 	/* save inmem log status */
-	f2fs_save_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
+	f2fs_save_inmem_curseg(sbi);
 
 	err = do_checkpoint(sbi, cpc);
 	if (err)
@@ -1632,7 +1632,7 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 	else
 		f2fs_clear_prefree_segments(sbi, cpc);
 
-	f2fs_restore_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
+	f2fs_restore_inmem_curseg(sbi);
 stop:
 	unblock_operations(sbi);
 	stat_inc_cp_count(sbi->stat_info);
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 062d585fa080..f4c7901c99b0 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1366,7 +1366,7 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
 	set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
 	old_blkaddr = dn->data_blkaddr;
 	f2fs_allocate_data_block(sbi, NULL, old_blkaddr, &dn->data_blkaddr,
-				&sum, seg_type, NULL, false);
+				&sum, seg_type, NULL);
 	if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
 		invalidate_mapping_pages(META_MAPPING(sbi),
 					old_blkaddr, old_blkaddr);
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 41a91aa8c262..cb679561f44d 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -397,6 +397,10 @@ static int stat_show(struct seq_file *s, void *v)
 			   si->curseg[CURSEG_COLD_DATA_PINNED],
 			   si->cursec[CURSEG_COLD_DATA_PINNED],
 			   si->curzone[CURSEG_COLD_DATA_PINNED]);
+		seq_printf(s, "  - ATGC   data: %8d %8d %8d\n",
+			   si->curseg[CURSEG_ALL_DATA_ATGC],
+			   si->cursec[CURSEG_ALL_DATA_ATGC],
+			   si->curzone[CURSEG_ALL_DATA_ATGC]);
 		seq_printf(s, "\n  - Valid: %d\n  - Dirty: %d\n",
 			   si->main_area_segs - si->dirty_count -
 			   si->prefree_count - si->free_segs,
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 8b60c0644123..b3905cbbf731 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1001,7 +1001,7 @@ static inline void set_new_dnode(struct dnode_of_data *dn, struct inode *inode,
  */
 #define	NR_CURSEG_DATA_TYPE	(3)
 #define NR_CURSEG_NODE_TYPE	(3)
-#define NR_CURSEG_INMEM_TYPE	(1)
+#define NR_CURSEG_INMEM_TYPE	(2)
 #define NR_CURSEG_PERSIST_TYPE	(NR_CURSEG_DATA_TYPE + NR_CURSEG_NODE_TYPE)
 #define NR_CURSEG_TYPE		(NR_CURSEG_INMEM_TYPE + NR_CURSEG_PERSIST_TYPE)
 
@@ -1015,6 +1015,7 @@ enum {
 	NR_PERSISTENT_LOG,	/* number of persistent log */
 	CURSEG_COLD_DATA_PINNED = NR_PERSISTENT_LOG,
 				/* pinned file that needs consecutive block address */
+	CURSEG_ALL_DATA_ATGC,	/* SSR alloctor in hot/warm/cold data area */
 	NO_CHECK_TYPE,		/* number of persistent & inmem log */
 };
 
@@ -1260,6 +1261,18 @@ struct inode_management {
 	unsigned long ino_num;			/* number of entries */
 };
 
+/* for GC_AT */
+struct atgc_management {
+	bool atgc_enabled;			/* ATGC is enabled or not */
+	struct rb_root_cached root;		/* root of victim rb-tree */
+	struct list_head victim_list;		/* linked with all victim entries */
+	unsigned int victim_count;		/* victim count in rb-tree */
+	unsigned int candidate_ratio;		/* candidate ratio */
+	unsigned int max_candidate_count;	/* max candidate count */
+	unsigned int age_weight;		/* age weight, vblock_weight = 100 - age_weight */
+	unsigned long long age_threshold;	/* age threshold */
+};
+
 /* For s_flag in struct f2fs_sb_info */
 enum {
 	SBI_IS_DIRTY,				/* dirty flag for checkpoint */
@@ -1292,6 +1305,7 @@ enum {
 	GC_NORMAL,
 	GC_IDLE_CB,
 	GC_IDLE_GREEDY,
+	GC_IDLE_AT,
 	GC_URGENT_HIGH,
 	GC_URGENT_LOW,
 };
@@ -1549,6 +1563,7 @@ struct f2fs_sb_info {
 						 * race between GC and GC or CP
 						 */
 	struct f2fs_gc_kthread	*gc_thread;	/* GC thread */
+	struct atgc_management am;		/* atgc management */
 	unsigned int cur_victim_sec;		/* current victim section num */
 	unsigned int gc_mode;			/* current GC state */
 	unsigned int next_victim_seg[2];	/* next segment in victim section */
@@ -3368,8 +3383,11 @@ block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi);
 int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable);
 void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi);
 int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra);
-void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi, int type);
-void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi, int type);
+void f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi);
+void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi);
+void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi);
+void f2fs_get_new_segment(struct f2fs_sb_info *sbi,
+			unsigned int *newseg, bool new_sec, int dir);
 void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
 					unsigned int start, unsigned int end);
 void f2fs_allocate_new_segment(struct f2fs_sb_info *sbi, int type);
@@ -3397,7 +3415,7 @@ void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
 void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
 			block_t old_blkaddr, block_t *new_blkaddr,
 			struct f2fs_summary *sum, int type,
-			struct f2fs_io_info *fio, bool from_gc);
+			struct f2fs_io_info *fio);
 void f2fs_wait_on_page_writeback(struct page *page,
 			enum page_type type, bool ordered, bool locked);
 void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr);
@@ -3532,6 +3550,8 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background,
 			unsigned int segno);
 void f2fs_build_gc_manager(struct f2fs_sb_info *sbi);
 int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count);
+int __init f2fs_create_garbage_collection_cache(void);
+void f2fs_destroy_garbage_collection_cache(void);
 
 /*
  * recovery.c
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index e720886ef9bf..cf0d2b02e264 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -21,6 +21,8 @@
 #include "gc.h"
 #include <trace/events/f2fs.h>
 
+static struct kmem_cache *victim_entry_slab;
+
 static unsigned int count_bits(const unsigned long *addr,
 				unsigned int offset, unsigned int len);
 
@@ -169,7 +171,16 @@ void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi)
 
 static int select_gc_type(struct f2fs_sb_info *sbi, int gc_type)
 {
-	int gc_mode = (gc_type == BG_GC) ? GC_CB : GC_GREEDY;
+	int gc_mode;
+
+	if (gc_type == BG_GC) {
+		if (sbi->am.atgc_enabled)
+			gc_mode = GC_AT;
+		else
+			gc_mode = GC_CB;
+	} else {
+		gc_mode = GC_GREEDY;
+	}
 
 	switch (sbi->gc_mode) {
 	case GC_IDLE_CB:
@@ -179,7 +190,11 @@ static int select_gc_type(struct f2fs_sb_info *sbi, int gc_type)
 	case GC_URGENT_HIGH:
 		gc_mode = GC_GREEDY;
 		break;
+	case GC_IDLE_AT:
+		gc_mode = GC_AT;
+		break;
 	}
+
 	return gc_mode;
 }
 
@@ -193,6 +208,11 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
 		p->dirty_bitmap = dirty_i->dirty_segmap[type];
 		p->max_search = dirty_i->nr_dirty[type];
 		p->ofs_unit = 1;
+	} else if (p->alloc_mode == AT_SSR) {
+		p->gc_mode = GC_GREEDY;
+		p->dirty_bitmap = dirty_i->dirty_segmap[type];
+		p->max_search = dirty_i->nr_dirty[type];
+		p->ofs_unit = 1;
 	} else {
 		p->gc_mode = select_gc_type(sbi, gc_type);
 		p->ofs_unit = sbi->segs_per_sec;
@@ -212,6 +232,7 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
 	 */
 	if (gc_type != FG_GC &&
 			(sbi->gc_mode != GC_URGENT_HIGH) &&
+			(p->gc_mode != GC_AT && p->alloc_mode != AT_SSR) &&
 			p->max_search > sbi->max_victim_search)
 		p->max_search = sbi->max_victim_search;
 
@@ -229,10 +250,16 @@ static unsigned int get_max_cost(struct f2fs_sb_info *sbi,
 	/* SSR allocates in a segment unit */
 	if (p->alloc_mode == SSR)
 		return sbi->blocks_per_seg;
+	else if (p->alloc_mode == AT_SSR)
+		return UINT_MAX;
+
+	/* LFS */
 	if (p->gc_mode == GC_GREEDY)
 		return 2 * sbi->blocks_per_seg * p->ofs_unit;
 	else if (p->gc_mode == GC_CB)
 		return UINT_MAX;
+	else if (p->gc_mode == GC_AT)
+		return UINT_MAX;
 	else /* No other gc_mode */
 		return 0;
 }
@@ -297,8 +324,10 @@ static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi,
 	/* alloc_mode == LFS */
 	if (p->gc_mode == GC_GREEDY)
 		return get_valid_blocks(sbi, segno, true);
-	else
+	else if (p->gc_mode == GC_CB)
 		return get_cb_cost(sbi, segno);
+	else
+		f2fs_bug_on(sbi, 1);
 }
 
 static unsigned int count_bits(const unsigned long *addr,
@@ -313,6 +342,266 @@ static unsigned int count_bits(const unsigned long *addr,
 	return sum;
 }
 
+static struct victim_entry *attach_victim_entry(struct f2fs_sb_info *sbi,
+				unsigned long long mtime, unsigned int segno,
+				struct rb_node *parent, struct rb_node **p,
+				bool left_most)
+{
+	struct atgc_management *am = &sbi->am;
+	struct victim_entry *ve;
+
+	ve =  f2fs_kmem_cache_alloc(victim_entry_slab, GFP_NOFS);
+
+	ve->mtime = mtime;
+	ve->segno = segno;
+
+	rb_link_node(&ve->rb_node, parent, p);
+	rb_insert_color_cached(&ve->rb_node, &am->root, left_most);
+
+	list_add_tail(&ve->list, &am->victim_list);
+
+	am->victim_count++;
+
+	return ve;
+}
+
+static void insert_victim_entry(struct f2fs_sb_info *sbi,
+				unsigned long long mtime, unsigned int segno)
+{
+	struct atgc_management *am = &sbi->am;
+	struct rb_node **p;
+	struct rb_node *parent = NULL;
+	struct victim_entry *ve = NULL;
+	bool left_most = true;
+
+	p = f2fs_lookup_rb_tree_ext(sbi, &am->root, &parent, mtime, &left_most);
+	ve = attach_victim_entry(sbi, mtime, segno, parent, p, left_most);
+}
+
+static void add_victim_entry(struct f2fs_sb_info *sbi,
+				struct victim_sel_policy *p, unsigned int segno)
+{
+	struct sit_info *sit_i = SIT_I(sbi);
+	unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
+	unsigned int start = GET_SEG_FROM_SEC(sbi, secno);
+	unsigned long long mtime = 0;
+	unsigned int i;
+
+	for (i = 0; i < sbi->segs_per_sec; i++)
+		mtime += get_seg_entry(sbi, start + i)->mtime;
+	mtime = div_u64(mtime, sbi->segs_per_sec);
+
+	/* Handle if the system time has changed by the user */
+	if (mtime < sit_i->min_mtime)
+		sit_i->min_mtime = mtime;
+	if (mtime > sit_i->max_mtime)
+		sit_i->max_mtime = mtime;
+	if (mtime < sit_i->dirty_min_mtime)
+		sit_i->dirty_min_mtime = mtime;
+	if (mtime > sit_i->dirty_max_mtime)
+		sit_i->dirty_max_mtime = mtime;
+
+	/* don't choose young section as candidate */
+	if (sit_i->dirty_max_mtime - mtime < p->age_threshold)
+		return;
+
+	insert_victim_entry(sbi, mtime, segno);
+}
+
+static struct rb_node *lookup_central_victim(struct f2fs_sb_info *sbi,
+						struct victim_sel_policy *p)
+{
+	struct atgc_management *am = &sbi->am;
+	struct rb_node *parent = NULL;
+	bool left_most;
+
+	f2fs_lookup_rb_tree_ext(sbi, &am->root, &parent, p->age, &left_most);
+
+	return parent;
+}
+
+static void atgc_lookup_victim(struct f2fs_sb_info *sbi,
+						struct victim_sel_policy *p)
+{
+	struct sit_info *sit_i = SIT_I(sbi);
+	struct atgc_management *am = &sbi->am;
+	struct rb_root_cached *root = &am->root;
+	struct rb_node *node;
+	struct rb_entry *re;
+	struct victim_entry *ve;
+	unsigned long long total_time;
+	unsigned long long age, u, accu;
+	unsigned long long max_mtime = sit_i->dirty_max_mtime;
+	unsigned long long min_mtime = sit_i->dirty_min_mtime;
+	unsigned int sec_blocks = BLKS_PER_SEC(sbi);
+	unsigned int vblocks;
+	unsigned int dirty_threshold = max(am->max_candidate_count,
+					am->candidate_ratio *
+					am->victim_count / 100);
+	unsigned int age_weight = am->age_weight;
+	unsigned int cost;
+	unsigned int iter = 0;
+
+	if (max_mtime < min_mtime)
+		return;
+
+	max_mtime += 1;
+	total_time = max_mtime - min_mtime;
+
+	accu = min_t(unsigned long long,
+			ULLONG_MAX / total_time / 100,
+			DEFAULT_ACCURACY_CLASS);
+
+	node = rb_first_cached(root);
+next:
+	re = rb_entry_safe(node, struct rb_entry, rb_node);
+	if (!re)
+		return;
+
+	ve = (struct victim_entry *)re;
+
+	if (ve->mtime >= max_mtime || ve->mtime < min_mtime)
+		goto skip;
+
+	/* age = 10000 * x% * 60 */
+	age = div64_u64(accu * (max_mtime - ve->mtime), total_time) *
+								age_weight;
+
+	vblocks = get_valid_blocks(sbi, ve->segno, true);
+	f2fs_bug_on(sbi, !vblocks || vblocks == sec_blocks);
+
+	/* u = 10000 * x% * 40 */
+	u = div64_u64(accu * (sec_blocks - vblocks), sec_blocks) *
+							(100 - age_weight);
+
+	f2fs_bug_on(sbi, age + u >= UINT_MAX);
+
+	cost = UINT_MAX - (age + u);
+	iter++;
+
+	if (cost < p->min_cost ||
+			(cost == p->min_cost && age > p->oldest_age)) {
+		p->min_cost = cost;
+		p->oldest_age = age;
+		p->min_segno = ve->segno;
+	}
+skip:
+	if (iter < dirty_threshold) {
+		node = rb_next(node);
+		goto next;
+	}
+}
+
+/*
+ * select candidates around source section in range of
+ * [target - dirty_threshold, target + dirty_threshold]
+ */
+static void atssr_lookup_victim(struct f2fs_sb_info *sbi,
+						struct victim_sel_policy *p)
+{
+	struct sit_info *sit_i = SIT_I(sbi);
+	struct atgc_management *am = &sbi->am;
+	struct rb_node *node;
+	struct rb_entry *re;
+	struct victim_entry *ve;
+	unsigned long long total_time;
+	unsigned long long age;
+	unsigned long long max_mtime = sit_i->dirty_max_mtime;
+	unsigned long long min_mtime = sit_i->dirty_min_mtime;
+	unsigned int seg_blocks = sbi->blocks_per_seg;
+	unsigned int vblocks;
+	unsigned int dirty_threshold = max(am->max_candidate_count,
+					am->candidate_ratio *
+					am->victim_count / 100);
+	unsigned int cost;
+	unsigned int iter = 0;
+	int stage = 0;
+
+	if (max_mtime < min_mtime)
+		return;
+	max_mtime += 1;
+	total_time = max_mtime - min_mtime;
+next_stage:
+	node = lookup_central_victim(sbi, p);
+next_node:
+	re = rb_entry_safe(node, struct rb_entry, rb_node);
+	if (!re) {
+		if (stage == 0)
+			goto skip_stage;
+		return;
+	}
+
+	ve = (struct victim_entry *)re;
+
+	if (ve->mtime >= max_mtime || ve->mtime < min_mtime)
+		goto skip_node;
+
+	age = max_mtime - ve->mtime;
+
+	vblocks = get_seg_entry(sbi, ve->segno)->ckpt_valid_blocks;
+	f2fs_bug_on(sbi, !vblocks);
+
+	/* rare case */
+	if (vblocks == seg_blocks)
+		goto skip_node;
+
+	iter++;
+
+	age = max_mtime - abs(p->age - age);
+	cost = UINT_MAX - vblocks;
+
+	if (cost < p->min_cost ||
+			(cost == p->min_cost && age > p->oldest_age)) {
+		p->min_cost = cost;
+		p->oldest_age = age;
+		p->min_segno = ve->segno;
+	}
+skip_node:
+	if (iter < dirty_threshold) {
+		if (stage == 0)
+			node = rb_prev(node);
+		else if (stage == 1)
+			node = rb_next(node);
+		goto next_node;
+	}
+skip_stage:
+	if (stage < 1) {
+		stage++;
+		iter = 0;
+		goto next_stage;
+	}
+}
+static void lookup_victim_by_age(struct f2fs_sb_info *sbi,
+						struct victim_sel_policy *p)
+{
+	f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi,
+						&sbi->am.root, true));
+
+	if (p->gc_mode == GC_AT)
+		atgc_lookup_victim(sbi, p);
+	else if (p->alloc_mode == AT_SSR)
+		atssr_lookup_victim(sbi, p);
+	else
+		f2fs_bug_on(sbi, 1);
+}
+
+void release_victim_entry(struct f2fs_sb_info *sbi)
+{
+	struct atgc_management *am = &sbi->am;
+	struct victim_entry *ve, *tmp;
+
+	list_for_each_entry_safe(ve, tmp, &am->victim_list, list) {
+		list_del(&ve->list);
+		kmem_cache_free(victim_entry_slab, ve);
+		am->victim_count--;
+	}
+
+	am->root = RB_ROOT_CACHED;
+
+	f2fs_bug_on(sbi, am->victim_count);
+	f2fs_bug_on(sbi, !list_empty(&am->victim_list));
+}
+
 /*
  * This function is called from two paths.
  * One is garbage collection and the other is SSR segment selection.
@@ -322,25 +611,37 @@ static unsigned int count_bits(const unsigned long *addr,
  * which has minimum valid blocks and removes it from dirty seglist.
  */
 static int get_victim_by_default(struct f2fs_sb_info *sbi,
-		unsigned int *result, int gc_type, int type, char alloc_mode)
+			unsigned int *result, int gc_type, int type,
+			char alloc_mode, unsigned long long age)
 {
 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
 	struct sit_info *sm = SIT_I(sbi);
 	struct victim_sel_policy p;
 	unsigned int secno, last_victim;
 	unsigned int last_segment;
-	unsigned int nsearched = 0;
+	unsigned int nsearched;
+	bool is_atgc;
 	int ret = 0;
 
 	mutex_lock(&dirty_i->seglist_lock);
 	last_segment = MAIN_SECS(sbi) * sbi->segs_per_sec;
 
 	p.alloc_mode = alloc_mode;
-	select_policy(sbi, gc_type, type, &p);
+	p.age = age;
+	p.age_threshold = sbi->am.age_threshold;
 
+retry:
+	select_policy(sbi, gc_type, type, &p);
 	p.min_segno = NULL_SEGNO;
+	p.oldest_age = 0;
 	p.min_cost = get_max_cost(sbi, &p);
 
+	is_atgc = (p.gc_mode == GC_AT || p.alloc_mode == AT_SSR);
+	nsearched = 0;
+
+	if (is_atgc)
+		SIT_I(sbi)->dirty_min_mtime = ULLONG_MAX;
+
 	if (*result != NULL_SEGNO) {
 		if (!get_valid_blocks(sbi, *result, false)) {
 			ret = -ENODATA;
@@ -426,6 +727,11 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
 		if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap))
 			goto next;
 
+		if (is_atgc) {
+			add_victim_entry(sbi, &p, segno);
+			goto next;
+		}
+
 		cost = get_gc_cost(sbi, segno, &p);
 
 		if (p.min_cost > cost) {
@@ -444,6 +750,19 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
 			break;
 		}
 	}
+
+	/* get victim for GC_AT/AT_SSR */
+	if (is_atgc) {
+		lookup_victim_by_age(sbi, &p);
+		release_victim_entry(sbi);
+	}
+
+	if (is_atgc && p.min_segno == NULL_SEGNO &&
+			sm->elapsed_time < p.age_threshold) {
+		p.age_threshold = 0;
+		goto retry;
+	}
+
 	if (p.min_segno != NULL_SEGNO) {
 got_it:
 		*result = (p.min_segno / p.ofs_unit) * p.ofs_unit;
@@ -791,6 +1110,8 @@ static int move_data_block(struct inode *inode, block_t bidx,
 	block_t newaddr;
 	int err = 0;
 	bool lfs_mode = f2fs_lfs_mode(fio.sbi);
+	int type = fio.sbi->am.atgc_enabled ?
+				CURSEG_ALL_DATA_ATGC : CURSEG_COLD_DATA;
 
 	/* do not read out */
 	page = f2fs_grab_cache_page(inode->i_mapping, bidx, false);
@@ -875,7 +1196,7 @@ static int move_data_block(struct inode *inode, block_t bidx,
 	}
 
 	f2fs_allocate_data_block(fio.sbi, NULL, fio.old_blkaddr, &newaddr,
-				&sum, CURSEG_COLD_DATA, NULL, true);
+				&sum, type, NULL);
 
 	fio.encrypted_page = f2fs_pagecache_get_page(META_MAPPING(fio.sbi),
 				newaddr, FGP_LOCK | FGP_CREAT, GFP_NOFS);
@@ -1180,7 +1501,7 @@ static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim,
 
 	down_write(&sit_i->sentry_lock);
 	ret = DIRTY_I(sbi)->v_ops->get_victim(sbi, victim, gc_type,
-					      NO_CHECK_TYPE, LFS);
+					      NO_CHECK_TYPE, LFS, 0);
 	up_write(&sit_i->sentry_lock);
 	return ret;
 }
@@ -1202,6 +1523,8 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
 	if (__is_large_section(sbi))
 		end_segno = rounddown(end_segno, sbi->segs_per_sec);
 
+	sanity_check_seg_type(sbi, get_seg_entry(sbi, segno)->type);
+
 	/* readahead multi ssa blocks those have contiguous address */
 	if (__is_large_section(sbi))
 		f2fs_ra_meta_pages(sbi, GET_SUM_BLOCK(sbi, segno),
@@ -1411,6 +1734,38 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
 	return ret;
 }
 
+int __init f2fs_create_garbage_collection_cache(void)
+{
+	victim_entry_slab = f2fs_kmem_cache_create("f2fs_victim_entry",
+					sizeof(struct victim_entry));
+	if (!victim_entry_slab)
+		return -ENOMEM;
+	return 0;
+}
+
+void f2fs_destroy_garbage_collection_cache(void)
+{
+	kmem_cache_destroy(victim_entry_slab);
+}
+
+static void init_atgc_management(struct f2fs_sb_info *sbi)
+{
+	struct atgc_management *am = &sbi->am;
+
+	if (SIT_I(sbi)->elapsed_time < (4 * DEF_GC_THREAD_AGE_THRESHOLD))
+		am->atgc_enabled = false;
+	else
+		am->atgc_enabled = true;
+
+	am->root = RB_ROOT_CACHED;
+	INIT_LIST_HEAD(&am->victim_list);
+	am->victim_count = 0;
+
+	am->candidate_ratio = DEF_GC_THREAD_CANDIDATE_RATIO;
+	am->max_candidate_count = DEF_GC_THREAD_MAX_CANDIDATE_COUNT;
+	am->age_weight = DEF_GC_THREAD_AGE_WEIGHT;
+}
+
 void f2fs_build_gc_manager(struct f2fs_sb_info *sbi)
 {
 	DIRTY_I(sbi)->v_ops = &default_v_ops;
@@ -1421,6 +1776,8 @@ void f2fs_build_gc_manager(struct f2fs_sb_info *sbi)
 	if (f2fs_is_multi_device(sbi) && !__is_large_section(sbi))
 		SIT_I(sbi)->last_victim[ALLOC_NEXT] =
 				GET_SEGNO(sbi, FDEV(0).end_blk) + 1;
+
+	init_atgc_management(sbi);
 }
 
 static int free_segment_range(struct f2fs_sb_info *sbi,
diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
index db3c61046aa4..853287b2b45c 100644
--- a/fs/f2fs/gc.h
+++ b/fs/f2fs/gc.h
@@ -14,6 +14,14 @@
 #define DEF_GC_THREAD_MIN_SLEEP_TIME	30000	/* milliseconds */
 #define DEF_GC_THREAD_MAX_SLEEP_TIME	60000
 #define DEF_GC_THREAD_NOGC_SLEEP_TIME	300000	/* wait 5 min */
+
+/* choose candidates from sections which has age of more than 7 days */
+#define DEF_GC_THREAD_AGE_THRESHOLD		(60 * 60 * 24 * 7)
+#define DEF_GC_THREAD_CANDIDATE_RATIO		20	/* select 20% oldest sections as candidates */
+#define DEF_GC_THREAD_MAX_CANDIDATE_COUNT	10	/* select at most 10 sections as candidates */
+#define DEF_GC_THREAD_AGE_WEIGHT		60	/* age weight */
+#define DEFAULT_ACCURACY_CLASS			10000	/* accuracy class */
+
 #define LIMIT_INVALID_BLOCK	40 /* percentage over total user space */
 #define LIMIT_FREE_BLOCK	40 /* percentage over invalid + free space */
 
@@ -41,6 +49,23 @@ struct gc_inode_list {
 	struct radix_tree_root iroot;
 };
 
+struct victim_info {
+	unsigned long long mtime;	/* mtime of section */
+	unsigned int segno;		/* section No. */
+};
+
+struct victim_entry {
+	struct rb_node rb_node;		/* rb node located in rb-tree */
+	union {
+		struct {
+			unsigned long long mtime;	/* mtime of section */
+			unsigned int segno;		/* segment No. */
+		};
+		struct victim_info vi;	/* victim info */
+	};
+	struct list_head list;
+};
+
 /*
  * inline functions
  */
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 50670354fb9a..028fd20aa004 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -2419,9 +2419,9 @@ static void write_current_sum_page(struct f2fs_sb_info *sbi,
 	f2fs_put_page(page, 1);
 }
 
-static int is_next_segment_free(struct f2fs_sb_info *sbi, int type)
+static int is_next_segment_free(struct f2fs_sb_info *sbi,
+				struct curseg_info *curseg, int type)
 {
-	struct curseg_info *curseg = CURSEG_I(sbi, type);
 	unsigned int segno = curseg->segno + 1;
 	struct free_segmap_info *free_i = FREE_I(sbi);
 
@@ -2434,7 +2434,7 @@ static int is_next_segment_free(struct f2fs_sb_info *sbi, int type)
  * Find a new segment from the free segments bitmap to right order
  * This function should be returned with success, otherwise BUG
  */
-static void get_new_segment(struct f2fs_sb_info *sbi,
+void get_new_segment(struct f2fs_sb_info *sbi,
 			unsigned int *newseg, bool new_sec, int dir)
 {
 	struct free_segmap_info *free_i = FREE_I(sbi);
@@ -2525,6 +2525,7 @@ static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
 {
 	struct curseg_info *curseg = CURSEG_I(sbi, type);
 	struct summary_footer *sum_footer;
+	unsigned short seg_type = curseg->seg_type;
 
 	curseg->inited = true;
 	curseg->segno = curseg->next_segno;
@@ -2534,16 +2535,22 @@ static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
 
 	sum_footer = &(curseg->sum_blk->footer);
 	memset(sum_footer, 0, sizeof(struct summary_footer));
-	if (IS_DATASEG(curseg->seg_type))
+
+	sanity_check_seg_type(sbi, seg_type);
+
+	if (IS_DATASEG(seg_type))
 		SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA);
-	if (IS_NODESEG(curseg->seg_type))
+	if (IS_NODESEG(seg_type))
 		SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE);
-	__set_sit_entry_type(sbi, curseg->seg_type, curseg->segno, modified);
+	__set_sit_entry_type(sbi, seg_type, curseg->segno, modified);
 }
 
 static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
 {
 	struct curseg_info *curseg = CURSEG_I(sbi, type);
+	unsigned short seg_type = curseg->seg_type;
+
+	sanity_check_seg_type(sbi, seg_type);
 
 	/* if segs_per_sec is large than 1, we need to keep original policy. */
 	if (__is_large_section(sbi))
@@ -2557,8 +2564,7 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
 		return 0;
 
 	if (test_opt(sbi, NOHEAP) &&
-		(curseg->seg_type == CURSEG_HOT_DATA ||
-		IS_NODESEG(curseg->seg_type)))
+		(seg_type == CURSEG_HOT_DATA || IS_NODESEG(seg_type)))
 		return 0;
 
 	if (SIT_I(sbi)->last_victim[ALLOC_NEXT])
@@ -2634,7 +2640,7 @@ static void __refresh_next_blkoff(struct f2fs_sb_info *sbi,
  * This function always allocates a used segment(from dirty seglist) by SSR
  * manner, so it should recover the existing segment information of valid blocks
  */
-static void change_curseg(struct f2fs_sb_info *sbi, int type)
+static void change_curseg(struct f2fs_sb_info *sbi, int type, bool flush)
 {
 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
 	struct curseg_info *curseg = CURSEG_I(sbi, type);
@@ -2642,8 +2648,10 @@ static void change_curseg(struct f2fs_sb_info *sbi, int type)
 	struct f2fs_summary_block *sum_node;
 	struct page *sum_page;
 
-	write_sum_page(sbi, curseg->sum_blk,
-				GET_SUM_BLOCK(sbi, curseg->segno));
+	if (flush)
+		write_sum_page(sbi, curseg->sum_blk,
+					GET_SUM_BLOCK(sbi, curseg->segno));
+
 	__set_test_and_inuse(sbi, new_segno);
 
 	mutex_lock(&dirty_i->seglist_lock);
@@ -2662,10 +2670,62 @@ static void change_curseg(struct f2fs_sb_info *sbi, int type)
 	f2fs_put_page(sum_page, 1);
 }
 
-void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi, int type)
+static int get_ssr_segment(struct f2fs_sb_info *sbi, int type,
+				int alloc_mode, unsigned long long age);
+
+static void get_atssr_segment(struct f2fs_sb_info *sbi, int type,
+					int target_type, int alloc_mode,
+					unsigned long long age)
 {
 	struct curseg_info *curseg = CURSEG_I(sbi, type);
 
+	curseg->seg_type = target_type;
+
+	if (get_ssr_segment(sbi, type, alloc_mode, age)) {
+		struct seg_entry *se = get_seg_entry(sbi, curseg->next_segno);
+
+		curseg->seg_type = se->type;
+		change_curseg(sbi, type, true);
+	} else {
+		/* allocate cold segment by default */
+		curseg->seg_type = CURSEG_COLD_DATA;
+		new_curseg(sbi, type, true);
+	}
+	stat_inc_seg_type(sbi, curseg);
+}
+
+static void __f2fs_init_atgc_curseg(struct f2fs_sb_info *sbi)
+{
+	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_ALL_DATA_ATGC);
+
+	if (!sbi->am.atgc_enabled)
+		return;
+
+	down_read(&SM_I(sbi)->curseg_lock);
+
+	mutex_lock(&curseg->curseg_mutex);
+	down_write(&SIT_I(sbi)->sentry_lock);
+
+	get_atssr_segment(sbi, CURSEG_ALL_DATA_ATGC, CURSEG_COLD_DATA, SSR, 0);
+
+	up_write(&SIT_I(sbi)->sentry_lock);
+	mutex_unlock(&curseg->curseg_mutex);
+
+	up_read(&SM_I(sbi)->curseg_lock);
+
+}
+void f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi)
+{
+	__f2fs_init_atgc_curseg(sbi);
+}
+
+void __f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi, int type)
+{
+	struct curseg_info *curseg = CURSEG_I(sbi, type);
+
+	if (!sbi->am.atgc_enabled)
+		return;
+
 	mutex_lock(&curseg->curseg_mutex);
 	if (!curseg->inited)
 		goto out;
@@ -2682,10 +2742,21 @@ void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi, int type)
 	mutex_unlock(&curseg->curseg_mutex);
 }
 
-void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi, int type)
+void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi)
+{
+	__f2fs_save_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
+
+	if (sbi->am.atgc_enabled)
+		__f2fs_save_inmem_curseg(sbi, CURSEG_ALL_DATA_ATGC);
+}
+
+void __f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi, int type)
 {
 	struct curseg_info *curseg = CURSEG_I(sbi, type);
 
+	if (!sbi->am.atgc_enabled)
+		return;
+
 	mutex_lock(&curseg->curseg_mutex);
 	if (!curseg->inited)
 		goto out;
@@ -2699,23 +2770,35 @@ void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi, int type)
 	mutex_unlock(&curseg->curseg_mutex);
 }
 
-static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
+void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi)
+{
+	__f2fs_restore_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
+
+	if (sbi->am.atgc_enabled)
+		__f2fs_restore_inmem_curseg(sbi, CURSEG_ALL_DATA_ATGC);
+}
+
+static int get_ssr_segment(struct f2fs_sb_info *sbi, int type,
+				int alloc_mode, unsigned long long age)
 {
 	struct curseg_info *curseg = CURSEG_I(sbi, type);
 	const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops;
 	unsigned segno = NULL_SEGNO;
+	unsigned short seg_type = curseg->seg_type;
 	int i, cnt;
 	bool reversed = false;
 
+	sanity_check_seg_type(sbi, seg_type);
+
 	/* f2fs_need_SSR() already forces to do this */
-	if (!v_ops->get_victim(sbi, &segno, BG_GC, type, SSR)) {
+	if (!v_ops->get_victim(sbi, &segno, BG_GC, seg_type, alloc_mode, age)) {
 		curseg->next_segno = segno;
 		return 1;
 	}
 
 	/* For node segments, let's do SSR more intensively */
-	if (IS_NODESEG(type)) {
-		if (type >= CURSEG_WARM_NODE) {
+	if (IS_NODESEG(seg_type)) {
+		if (seg_type >= CURSEG_WARM_NODE) {
 			reversed = true;
 			i = CURSEG_COLD_NODE;
 		} else {
@@ -2723,7 +2806,7 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
 		}
 		cnt = NR_CURSEG_NODE_TYPE;
 	} else {
-		if (type >= CURSEG_WARM_DATA) {
+		if (seg_type >= CURSEG_WARM_DATA) {
 			reversed = true;
 			i = CURSEG_COLD_DATA;
 		} else {
@@ -2733,9 +2816,9 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
 	}
 
 	for (; cnt-- > 0; reversed ? i-- : i++) {
-		if (i == type)
+		if (i == seg_type)
 			continue;
-		if (!v_ops->get_victim(sbi, &segno, BG_GC, i, SSR)) {
+		if (!v_ops->get_victim(sbi, &segno, BG_GC, i, alloc_mode, age)) {
 			curseg->next_segno = segno;
 			return 1;
 		}
@@ -2764,13 +2847,15 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
 	if (force)
 		new_curseg(sbi, type, true);
 	else if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) &&
-					type == CURSEG_WARM_NODE)
+					curseg->seg_type == CURSEG_WARM_NODE)
 		new_curseg(sbi, type, false);
-	else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type) &&
+	else if (curseg->alloc_type == LFS &&
+			is_next_segment_free(sbi, curseg, type) &&
 			likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
 		new_curseg(sbi, type, false);
-	else if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type))
-		change_curseg(sbi, type);
+	else if (f2fs_need_SSR(sbi) &&
+			get_ssr_segment(sbi, type, SSR, 0))
+		change_curseg(sbi, type, true);
 	else
 		new_curseg(sbi, type, false);
 
@@ -2791,8 +2876,8 @@ void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
 	if (segno < start || segno > end)
 		goto unlock;
 
-	if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type))
-		change_curseg(sbi, type);
+	if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type, SSR, 0))
+		change_curseg(sbi, type, true);
 	else
 		new_curseg(sbi, type, true);
 
@@ -3011,9 +3096,9 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
 	return err;
 }
 
-static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type)
+static bool __has_curseg_space(struct f2fs_sb_info *sbi,
+					struct curseg_info *curseg)
 {
-	struct curseg_info *curseg = CURSEG_I(sbi, type);
 	if (curseg->next_blkoff < sbi->blocks_per_seg)
 		return true;
 	return false;
@@ -3156,8 +3241,13 @@ static int __get_segment_type_6(struct f2fs_io_info *fio)
 	if (fio->type == DATA) {
 		struct inode *inode = fio->page->mapping->host;
 
-		if (is_cold_data(fio->page) || file_is_cold(inode) ||
-				f2fs_compressed_file(inode))
+		if (is_cold_data(fio->page)) {
+			if (fio->sbi->am.atgc_enabled)
+				return CURSEG_ALL_DATA_ATGC;
+			else
+				return CURSEG_COLD_DATA;
+		}
+		if (file_is_cold(inode) || f2fs_compressed_file(inode))
 			return CURSEG_COLD_DATA;
 		if (file_is_hot(inode) ||
 				is_inode_flag_set(inode, FI_HOT_DATA) ||
@@ -3203,19 +3293,29 @@ static int __get_segment_type(struct f2fs_io_info *fio)
 void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
 		block_t old_blkaddr, block_t *new_blkaddr,
 		struct f2fs_summary *sum, int type,
-		struct f2fs_io_info *fio, bool from_gc)
+		struct f2fs_io_info *fio)
 {
 	struct sit_info *sit_i = SIT_I(sbi);
 	struct curseg_info *curseg = CURSEG_I(sbi, type);
 	unsigned long long old_mtime;
+	bool from_gc = (type == CURSEG_ALL_DATA_ATGC);
+	struct seg_entry *se;
 
 	down_read(&SM_I(sbi)->curseg_lock);
 
 	mutex_lock(&curseg->curseg_mutex);
 	down_write(&sit_i->sentry_lock);
 
+	if (from_gc) {
+		f2fs_bug_on(sbi, GET_SEGNO(sbi, old_blkaddr) == NULL_SEGNO);
+		se = get_seg_entry(sbi, GET_SEGNO(sbi, old_blkaddr));
+		sanity_check_seg_type(sbi, se->type);
+		f2fs_bug_on(sbi, IS_NODESEG(se->type));
+	}
 	*new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
 
+	f2fs_bug_on(sbi, curseg->next_blkoff >= sbi->blocks_per_seg);
+
 	f2fs_wait_discard_bio(sbi, *new_blkaddr);
 
 	/*
@@ -3245,9 +3345,13 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
 	if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
 		update_sit_entry(sbi, old_blkaddr, -1);
 
-	if (!__has_curseg_space(sbi, type))
-		sit_i->s_ops->allocate_segment(sbi, type, false);
-
+	if (!__has_curseg_space(sbi, curseg)) {
+		if (from_gc)
+			get_atssr_segment(sbi, type, se->type,
+						AT_SSR, se->mtime);
+		else
+			sit_i->s_ops->allocate_segment(sbi, type, false);
+	}
 	/*
 	 * segment dirty status should be updated after segment allocation,
 	 * so we just need to update status only one time after previous
@@ -3313,8 +3417,7 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
 		down_read(&fio->sbi->io_order_lock);
 reallocate:
 	f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr,
-			&fio->new_blkaddr, sum, type, fio,
-			is_cold_data(fio->page));
+			&fio->new_blkaddr, sum, type, fio);
 	if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO)
 		invalidate_mapping_pages(META_MAPPING(fio->sbi),
 					fio->old_blkaddr, fio->old_blkaddr);
@@ -3476,7 +3579,7 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
 	/* change the current segment */
 	if (segno != curseg->segno) {
 		curseg->next_segno = segno;
-		change_curseg(sbi, type);
+		change_curseg(sbi, type, true);
 	}
 
 	curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
@@ -3503,7 +3606,7 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
 	if (recover_curseg) {
 		if (old_cursegno != curseg->segno) {
 			curseg->next_segno = old_cursegno;
-			change_curseg(sbi, type);
+			change_curseg(sbi, type, true);
 		}
 		curseg->next_blkoff = old_blkoff;
 	}
@@ -4258,6 +4361,8 @@ static int build_curseg(struct f2fs_sb_info *sbi)
 			array[i].seg_type = CURSEG_HOT_DATA + i;
 		else if (i == CURSEG_COLD_DATA_PINNED)
 			array[i].seg_type = CURSEG_COLD_DATA;
+		else if (i == CURSEG_ALL_DATA_ATGC)
+			array[i].seg_type = CURSEG_COLD_DATA;
 		array[i].segno = NULL_SEGNO;
 		array[i].next_blkoff = 0;
 		array[i].inited = false;
@@ -4500,6 +4605,8 @@ static int sanity_check_curseg(struct f2fs_sb_info *sbi)
 		struct seg_entry *se = get_seg_entry(sbi, curseg->segno);
 		unsigned int blkofs = curseg->next_blkoff;
 
+		sanity_check_seg_type(sbi, curseg->seg_type);
+
 		if (f2fs_test_bit(blkofs, se->cur_valid_map))
 			goto out;
 
@@ -4802,6 +4909,7 @@ static void init_min_max_mtime(struct f2fs_sb_info *sbi)
 			sit_i->min_mtime = mtime;
 	}
 	sit_i->max_mtime = get_mtime(sbi, false);
+	sit_i->dirty_max_mtime = 0;
 	up_write(&sit_i->sentry_lock);
 }
 
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 8ff261550cbb..eab79b832544 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -24,6 +24,12 @@
 #define IS_DATASEG(t)	((t) <= CURSEG_COLD_DATA)
 #define IS_NODESEG(t)	((t) >= CURSEG_HOT_NODE && (t) <= CURSEG_COLD_NODE)
 
+static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi,
+						unsigned short seg_type)
+{
+	f2fs_bug_on(sbi, seg_type >= NR_PERSISTENT_LOG);
+}
+
 #define IS_HOT(t)	((t) == CURSEG_HOT_NODE || (t) == CURSEG_HOT_DATA)
 #define IS_WARM(t)	((t) == CURSEG_WARM_NODE || (t) == CURSEG_WARM_DATA)
 #define IS_COLD(t)	((t) == CURSEG_COLD_NODE || (t) == CURSEG_COLD_DATA)
@@ -35,7 +41,8 @@
 	 ((seg) == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno) ||	\
 	 ((seg) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno) ||	\
 	 ((seg) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno) ||	\
-	 ((seg) == CURSEG_I(sbi, CURSEG_COLD_DATA_PINNED)->segno))
+	 ((seg) == CURSEG_I(sbi, CURSEG_COLD_DATA_PINNED)->segno) ||	\
+	 ((seg) == CURSEG_I(sbi, CURSEG_ALL_DATA_ATGC)->segno))
 
 #define IS_CURSEC(sbi, secno)						\
 	(((secno) == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno /		\
@@ -51,6 +58,8 @@
 	 ((secno) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno /		\
 	  (sbi)->segs_per_sec) ||	\
 	 ((secno) == CURSEG_I(sbi, CURSEG_COLD_DATA_PINNED)->segno /	\
+	  (sbi)->segs_per_sec) ||	\
+	 ((secno) == CURSEG_I(sbi, CURSEG_ALL_DATA_ATGC)->segno /	\
 	  (sbi)->segs_per_sec))
 
 #define MAIN_BLKADDR(sbi)						\
@@ -135,20 +144,25 @@ enum {
  * In the victim_sel_policy->alloc_mode, there are two block allocation modes.
  * LFS writes data sequentially with cleaning operations.
  * SSR (Slack Space Recycle) reuses obsolete space without cleaning operations.
+ * AT_SSR (Age Threshold based Slack Space Recycle) merges fragments into
+ * fragmented segment which has similar aging degree.
  */
 enum {
 	LFS = 0,
-	SSR
+	SSR,
+	AT_SSR,
 };
 
 /*
  * In the victim_sel_policy->gc_mode, there are two gc, aka cleaning, modes.
  * GC_CB is based on cost-benefit algorithm.
  * GC_GREEDY is based on greedy algorithm.
+ * GC_AT is based on age-threshold algorithm.
  */
 enum {
 	GC_CB = 0,
 	GC_GREEDY,
+	GC_AT,
 	ALLOC_NEXT,
 	FLUSH_DEVICE,
 	MAX_GC_POLICY,
@@ -177,7 +191,10 @@ struct victim_sel_policy {
 	unsigned int offset;		/* last scanned bitmap offset */
 	unsigned int ofs_unit;		/* bitmap search unit */
 	unsigned int min_cost;		/* minimum cost */
+	unsigned long long oldest_age;	/* oldest age of segments having the same min cost */
 	unsigned int min_segno;		/* segment # having min. cost */
+	unsigned long long age;		/* mtime of GCed section*/
+	unsigned long long age_threshold;/* age threshold */
 };
 
 struct seg_entry {
@@ -243,6 +260,8 @@ struct sit_info {
 	unsigned long long mounted_time;	/* mount time */
 	unsigned long long min_mtime;		/* min. modification time */
 	unsigned long long max_mtime;		/* max. modification time */
+	unsigned long long dirty_min_mtime;	/* rerange candidates in GC_AT */
+	unsigned long long dirty_max_mtime;	/* rerange candidates in GC_AT */
 
 	unsigned int last_victim[MAX_GC_POLICY]; /* last victim segment # */
 };
@@ -281,7 +300,7 @@ struct dirty_seglist_info {
 /* victim selection function for cleaning and SSR */
 struct victim_selection {
 	int (*get_victim)(struct f2fs_sb_info *, unsigned int *,
-							int, int, char);
+					int, int, char, unsigned long long);
 };
 
 /* for active log information */
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 0fefa130585f..d86fa9197236 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -3714,6 +3714,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 	}
 
 reset_checkpoint:
+	f2fs_init_inmem_curseg(sbi);
+
 	/* f2fs_recover_fsync_data() cleared this already */
 	clear_sbi_flag(sbi, SBI_POR_DOING);
 
@@ -3919,9 +3921,12 @@ static int __init init_f2fs_fs(void)
 	err = f2fs_create_extent_cache();
 	if (err)
 		goto free_checkpoint_caches;
-	err = f2fs_init_sysfs();
+	err = f2fs_create_garbage_collection_cache();
 	if (err)
 		goto free_extent_cache;
+	err = f2fs_init_sysfs();
+	if (err)
+		goto free_garbage_collection_cache;
 	err = register_shrinker(&f2fs_shrinker_info);
 	if (err)
 		goto free_sysfs;
@@ -3955,6 +3960,8 @@ static int __init init_f2fs_fs(void)
 	unregister_shrinker(&f2fs_shrinker_info);
 free_sysfs:
 	f2fs_exit_sysfs();
+free_garbage_collection_cache:
+	f2fs_destroy_garbage_collection_cache();
 free_extent_cache:
 	f2fs_destroy_extent_cache();
 free_checkpoint_caches:
@@ -3979,6 +3986,7 @@ static void __exit exit_f2fs_fs(void)
 	unregister_filesystem(&f2fs_fs_type);
 	unregister_shrinker(&f2fs_shrinker_info);
 	f2fs_exit_sysfs();
+	f2fs_destroy_garbage_collection_cache();
 	f2fs_destroy_extent_cache();
 	f2fs_destroy_checkpoint_caches();
 	f2fs_destroy_segment_manager_caches();
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index 8a1c1311acac..06c73f3f6dd2 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -111,13 +111,15 @@ TRACE_DEFINE_ENUM(CP_RESIZE);
 
 #define show_alloc_mode(type)						\
 	__print_symbolic(type,						\
-		{ LFS,	"LFS-mode" },					\
-		{ SSR,	"SSR-mode" })
+		{ LFS,		"LFS-mode" },				\
+		{ SSR,		"SSR-mode" },				\
+		{ AT_SSR,	"AT_SSR-mode" })
 
 #define show_victim_policy(type)					\
 	__print_symbolic(type,						\
 		{ GC_GREEDY,	"Greedy" },				\
-		{ GC_CB,	"Cost-Benefit" })
+		{ GC_CB,	"Cost-Benefit" },			\
+		{ GC_AT,	"Age-threshold" })
 
 #define show_cpreason(type)						\
 	__print_flags(type, "|",					\
-- 
2.26.2



_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [f2fs-dev] [PATCH RFC 2/5] f2fs: record average update time of segment
  2020-06-30 10:04 ` [f2fs-dev] [PATCH RFC 2/5] f2fs: record average update time of segment Chao Yu
@ 2020-07-01 16:19   ` Jaegeuk Kim
  2020-07-03  2:13     ` Chao Yu
  0 siblings, 1 reply; 19+ messages in thread
From: Jaegeuk Kim @ 2020-07-01 16:19 UTC (permalink / raw)
  To: Chao Yu; +Cc: linux-kernel, linux-f2fs-devel

On 06/30, Chao Yu wrote:
> Previously, once we update one block in segment, we will update mtime of
> segment to last time, making aged segment becoming freshest, result in
> that GC with cost benefit algorithm missing such segment, So this patch
> changes to record mtime as average block updating time instead of last
> updating time.
> 
> It's not needed to reset mtime for prefree segment, as se->valid_blocks
> is zero, then old se->mtime won't take any weight with below calculation:
> 
> 	se->mtime = (se->mtime * se->valid_blocks + mtime) /
> 				(se->valid_blocks + 1);
> 
> Signed-off-by: Chao Yu <yuchao0@huawei.com>
> ---
>  fs/f2fs/segment.c | 21 ++++++++++++++++++---
>  1 file changed, 18 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index 863ec6f1fb87..906c313835ad 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -2150,6 +2150,22 @@ static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type,
>  		__mark_sit_entry_dirty(sbi, segno);
>  }
>  
> +static void update_segment_mtime(struct f2fs_sb_info *sbi, block_t blkaddr)
> +{
> +	unsigned int segno = GET_SEGNO(sbi, blkaddr);
> +	struct seg_entry *se = get_seg_entry(sbi, segno);
> +	unsigned long long mtime = get_mtime(sbi, false);
> +
> +	if (!se->mtime) {

Don't need {}.

> +		se->mtime = mtime;
> +	} else {
> +		se->mtime = (se->mtime * se->valid_blocks + mtime) /
> +						(se->valid_blocks + 1);
> +	}
> +	if (mtime > SIT_I(sbi)->max_mtime)
> +		SIT_I(sbi)->max_mtime = mtime;
> +}
> +
>  static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
>  {
>  	struct seg_entry *se;
> @@ -2169,10 +2185,9 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
>  	f2fs_bug_on(sbi, (new_vblocks >> (sizeof(unsigned short) << 3) ||
>  				(new_vblocks > sbi->blocks_per_seg)));
>  
> +	update_segment_mtime(sbi, blkaddr);
> +
>  	se->valid_blocks = new_vblocks;
> -	se->mtime = get_mtime(sbi, false);
> -	if (se->mtime > SIT_I(sbi)->max_mtime)
> -		SIT_I(sbi)->max_mtime = se->mtime;
>  
>  	/* Update valid block bitmap */
>  	if (del > 0) {
> -- 
> 2.26.2


_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [f2fs-dev] [PATCH RFC 2/5] f2fs: record average update time of segment
  2020-07-01 16:19   ` Jaegeuk Kim
@ 2020-07-03  2:13     ` Chao Yu
  0 siblings, 0 replies; 19+ messages in thread
From: Chao Yu @ 2020-07-03  2:13 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: linux-kernel, linux-f2fs-devel

On 2020/7/2 0:19, Jaegeuk Kim wrote:
> On 06/30, Chao Yu wrote:
>> Previously, once we update one block in segment, we will update mtime of
>> segment to last time, making aged segment becoming freshest, result in
>> that GC with cost benefit algorithm missing such segment, So this patch
>> changes to record mtime as average block updating time instead of last
>> updating time.
>>
>> It's not needed to reset mtime for prefree segment, as se->valid_blocks
>> is zero, then old se->mtime won't take any weight with below calculation:
>>
>> 	se->mtime = (se->mtime * se->valid_blocks + mtime) /
>> 				(se->valid_blocks + 1);
>>
>> Signed-off-by: Chao Yu <yuchao0@huawei.com>
>> ---
>>  fs/f2fs/segment.c | 21 ++++++++++++++++++---
>>  1 file changed, 18 insertions(+), 3 deletions(-)
>>
>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>> index 863ec6f1fb87..906c313835ad 100644
>> --- a/fs/f2fs/segment.c
>> +++ b/fs/f2fs/segment.c
>> @@ -2150,6 +2150,22 @@ static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type,
>>  		__mark_sit_entry_dirty(sbi, segno);
>>  }
>>  
>> +static void update_segment_mtime(struct f2fs_sb_info *sbi, block_t blkaddr)
>> +{
>> +	unsigned int segno = GET_SEGNO(sbi, blkaddr);
>> +	struct seg_entry *se = get_seg_entry(sbi, segno);
>> +	unsigned long long mtime = get_mtime(sbi, false);
>> +
>> +	if (!se->mtime) {
> 
> Don't need {}.

Updated,

BTW, have fixed below compile error:

   m68k-linux-ld: fs/f2fs/segment.o: in function `update_segment_mtime':
   fs/f2fs/segment.c:2162: undefined reference to `__udivdi3'

Thanks,

> 
>> +		se->mtime = mtime;
>> +	} else {
>> +		se->mtime = (se->mtime * se->valid_blocks + mtime) /
>> +						(se->valid_blocks + 1);
>> +	}
>> +	if (mtime > SIT_I(sbi)->max_mtime)
>> +		SIT_I(sbi)->max_mtime = mtime;
>> +}
>> +
>>  static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
>>  {
>>  	struct seg_entry *se;
>> @@ -2169,10 +2185,9 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
>>  	f2fs_bug_on(sbi, (new_vblocks >> (sizeof(unsigned short) << 3) ||
>>  				(new_vblocks > sbi->blocks_per_seg)));
>>  
>> +	update_segment_mtime(sbi, blkaddr);
>> +
>>  	se->valid_blocks = new_vblocks;
>> -	se->mtime = get_mtime(sbi, false);
>> -	if (se->mtime > SIT_I(sbi)->max_mtime)
>> -		SIT_I(sbi)->max_mtime = se->mtime;
>>  
>>  	/* Update valid block bitmap */
>>  	if (del > 0) {
>> -- 
>> 2.26.2
> .
> 


_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [f2fs-dev] [PATCH RFC 5/5] f2fs: support age threshold based garbage collection
  2020-06-30 10:04 ` [f2fs-dev] [PATCH RFC 5/5] f2fs: support age threshold based garbage collection Chao Yu
@ 2020-07-06  8:25   ` Chao Yu
  0 siblings, 0 replies; 19+ messages in thread
From: Chao Yu @ 2020-07-06  8:25 UTC (permalink / raw)
  To: jaegeuk; +Cc: linux-kernel, linux-f2fs-devel

Jaegeuk,

This is a commercialized feature in huawei products for years, I think
it's time to try to upstream it, could you please take a look at this
idea, to see whether it could be a formal feature of f2fs?

On 2020/6/30 18:04, Chao Yu wrote:
> There are several issues in current background GC algorithm:
> - valid blocks is one of key factors during cost overhead calculation,
> so if segment has less valid block, however even its age is young or
> it locates hot segment, CB algorithm will still choose the segment as
> victim, it's not appropriate.
> - GCed data/node will go to existing logs, no matter in-there datas'
> update frequency is the same or not, it may mix hot and cold data
> again.
> - GC alloctor mainly use LFS type segment, it will cost free segment
> more quickly.
> 
> This patch introduces a new algorithm named age threshold based
> garbage collection to solve above issues, there are three steps
> mainly:
> 
> 1. select a source victim:
> - set an age threshold, and select candidates beased threshold:
> e.g.
>  0 means youngest, 100 means oldest, if we set age threshold to 80
>  then select dirty segments which has age in range of [80, 100] as
>  candiddates;
> - set candidate_ratio threshold, and select candidates based the
> ratio, so that we can shrink candidates to those oldest segments;
> - select target segment with fewest valid blocks in order to
> migrate blocks with minimum cost;
> 
> 2. select a target victim:
> - select candidates beased age threshold;
> - set candidate_radius threshold, search candidates whose age is
> around source victims, searching radius should less than the
> radius threshold.
> - select target segment with most valid blocks in order to avoid
> migrating current target segment.
> 
> 3. merge valid blocks from source victim into target victim with
> SSR alloctor.
> 
> Test steps:
> - create 160 dirty segments:
>  * half of them have 128 valid blocks per segment
>  * left of them have 384 valid blocks per segment
> - run background GC
> 
> Benefit: GC count and block movement count both decrease obviously:
> 
> - Before:
>   - Valid: 86
>   - Dirty: 1
>   - Prefree: 11
>   - Free: 6001 (6001)
> 
> GC calls: 162 (BG: 220)
>   - data segments : 160 (160)
>   - node segments : 2 (2)
> Try to move 41454 blocks (BG: 41454)
>   - data blocks : 40960 (40960)
>   - node blocks : 494 (494)
> 
> IPU: 0 blocks
> SSR: 0 blocks in 0 segments
> LFS: 41364 blocks in 81 segments
> 
> - After:
> 
>   - Valid: 87
>   - Dirty: 0
>   - Prefree: 4
>   - Free: 6008 (6008)
> 
> GC calls: 75 (BG: 76)
>   - data segments : 74 (74)
>   - node segments : 1 (1)
> Try to move 12813 blocks (BG: 12813)
>   - data blocks : 12544 (12544)
>   - node blocks : 269 (269)
> 
> IPU: 0 blocks
> SSR: 12032 blocks in 77 segments
> LFS: 855 blocks in 2 segments


_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [f2fs-dev] [PATCH RFC 1/5] f2fs: introduce inmem curseg
  2020-06-30 10:04 [f2fs-dev] [PATCH RFC 1/5] f2fs: introduce inmem curseg Chao Yu
                   ` (3 preceding siblings ...)
  2020-06-30 10:04 ` [f2fs-dev] [PATCH RFC 5/5] f2fs: support age threshold based garbage collection Chao Yu
@ 2020-07-07  3:21 ` Jaegeuk Kim
  2020-07-07  3:37   ` Chao Yu
  4 siblings, 1 reply; 19+ messages in thread
From: Jaegeuk Kim @ 2020-07-07  3:21 UTC (permalink / raw)
  To: Chao Yu; +Cc: linux-kernel, linux-f2fs-devel

Hi Chao,

Do you have any brief design doc to present the idea?

Thanks,

On 06/30, Chao Yu wrote:
> Previous implementation of aligned pinfile allocation will:
> - allocate new segment on cold data log no matter whether last used
> segment is partially used or not, it makes IOs more random;
> - force concurrent cold data/GCed IO going into warm data area, it
> can make a bad effect on hot/cold data separation;
> 
> In this patch, we introduce a new type of log named 'inmem curseg',
> the differents from normal curseg is:
> - it reuses existed segment type (CURSEG_XXX_NODE/DATA);
> - it only exists in memory, its segno, blkofs, summary will not b
>  persisted into checkpoint area;
> 
> With this new feature, we can enhance scalability of log, special
> allocators can be created for purposes:
> - pure lfs allocator for aligned pinfile allocation or file
> defragmentation
> - pure ssr allocator for later feature
> 
> So that, let's update aligned pinfile allocation to use this new
> inmem curseg fwk.
> 
> Signed-off-by: Chao Yu <yuchao0@huawei.com>
> ---
>  fs/f2fs/checkpoint.c |   7 ++-
>  fs/f2fs/debug.c      |   6 ++-
>  fs/f2fs/f2fs.h       |  12 +++--
>  fs/f2fs/file.c       |   3 +-
>  fs/f2fs/gc.c         |   2 +-
>  fs/f2fs/segment.c    | 107 ++++++++++++++++++++++++++++++-------------
>  fs/f2fs/segment.h    |  17 ++++---
>  fs/f2fs/super.c      |   9 ++--
>  8 files changed, 112 insertions(+), 51 deletions(-)
> 
> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> index 1bb8278a1c4a..644a914af25a 100644
> --- a/fs/f2fs/checkpoint.c
> +++ b/fs/f2fs/checkpoint.c
> @@ -1623,11 +1623,16 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>  
>  	f2fs_flush_sit_entries(sbi, cpc);
>  
> +	/* save inmem log status */
> +	f2fs_save_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
> +
>  	err = do_checkpoint(sbi, cpc);
>  	if (err)
>  		f2fs_release_discard_addrs(sbi);
>  	else
>  		f2fs_clear_prefree_segments(sbi, cpc);
> +
> +	f2fs_restore_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
>  stop:
>  	unblock_operations(sbi);
>  	stat_inc_cp_count(sbi->stat_info);
> @@ -1658,7 +1663,7 @@ void f2fs_init_ino_entry_info(struct f2fs_sb_info *sbi)
>  	}
>  
>  	sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS -
> -			NR_CURSEG_TYPE - __cp_payload(sbi)) *
> +			NR_CURSEG_PERSIST_TYPE - __cp_payload(sbi)) *
>  				F2FS_ORPHANS_PER_BLOCK;
>  }
>  
> diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
> index 4276c0f79beb..41a91aa8c262 100644
> --- a/fs/f2fs/debug.c
> +++ b/fs/f2fs/debug.c
> @@ -164,7 +164,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
>  		* 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg)
>  		/ 2;
>  	si->util_invalid = 50 - si->util_free - si->util_valid;
> -	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_NODE; i++) {
> +	for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) {
>  		struct curseg_info *curseg = CURSEG_I(sbi, i);
>  		si->curseg[i] = curseg->segno;
>  		si->cursec[i] = GET_SEC_FROM_SEG(sbi, curseg->segno);
> @@ -393,6 +393,10 @@ static int stat_show(struct seq_file *s, void *v)
>  			   si->dirty_seg[CURSEG_COLD_NODE],
>  			   si->full_seg[CURSEG_COLD_NODE],
>  			   si->valid_blks[CURSEG_COLD_NODE]);
> +		seq_printf(s, "  - Pinned file: %8d %8d %8d\n",
> +			   si->curseg[CURSEG_COLD_DATA_PINNED],
> +			   si->cursec[CURSEG_COLD_DATA_PINNED],
> +			   si->curzone[CURSEG_COLD_DATA_PINNED]);
>  		seq_printf(s, "\n  - Valid: %d\n  - Dirty: %d\n",
>  			   si->main_area_segs - si->dirty_count -
>  			   si->prefree_count - si->free_segs,
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 7d6c5f8ce16b..f06c77066284 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -996,7 +996,9 @@ static inline void set_new_dnode(struct dnode_of_data *dn, struct inode *inode,
>   */
>  #define	NR_CURSEG_DATA_TYPE	(3)
>  #define NR_CURSEG_NODE_TYPE	(3)
> -#define NR_CURSEG_TYPE	(NR_CURSEG_DATA_TYPE + NR_CURSEG_NODE_TYPE)
> +#define NR_CURSEG_INMEM_TYPE	(1)
> +#define NR_CURSEG_PERSIST_TYPE	(NR_CURSEG_DATA_TYPE + NR_CURSEG_NODE_TYPE)
> +#define NR_CURSEG_TYPE		(NR_CURSEG_INMEM_TYPE + NR_CURSEG_PERSIST_TYPE)
>  
>  enum {
>  	CURSEG_HOT_DATA	= 0,	/* directory entry blocks */
> @@ -1005,8 +1007,10 @@ enum {
>  	CURSEG_HOT_NODE,	/* direct node blocks of directory files */
>  	CURSEG_WARM_NODE,	/* direct node blocks of normal files */
>  	CURSEG_COLD_NODE,	/* indirect node blocks */
> -	NO_CHECK_TYPE,
> -	CURSEG_COLD_DATA_PINNED,/* cold data for pinned file */
> +	NR_PERSISTENT_LOG,	/* number of persistent log */
> +	CURSEG_COLD_DATA_PINNED = NR_PERSISTENT_LOG,
> +				/* pinned file that needs consecutive block address */
> +	NO_CHECK_TYPE,		/* number of persistent & inmem log */
>  };
>  
>  struct flush_cmd {
> @@ -3359,6 +3363,8 @@ block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi);
>  int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable);
>  void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi);
>  int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra);
> +void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi, int type);
> +void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi, int type);
>  void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
>  					unsigned int start, unsigned int end);
>  void f2fs_allocate_new_segment(struct f2fs_sb_info *sbi, int type);
> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> index c10e82806c2a..8611ade06018 100644
> --- a/fs/f2fs/file.c
> +++ b/fs/f2fs/file.c
> @@ -1656,13 +1656,14 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
>  		}
>  
>  		down_write(&sbi->pin_sem);
> -		map.m_seg_type = CURSEG_COLD_DATA_PINNED;
>  
>  		f2fs_lock_op(sbi);
>  		f2fs_allocate_new_segment(sbi, CURSEG_COLD_DATA);
>  		f2fs_unlock_op(sbi);
>  
> +		map.m_seg_type = CURSEG_COLD_DATA_PINNED;
>  		err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_DIO);
> +
>  		up_write(&sbi->pin_sem);
>  
>  		done += map.m_len;
> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> index 3b718da69910..84807abe4e00 100644
> --- a/fs/f2fs/gc.c
> +++ b/fs/f2fs/gc.c
> @@ -1448,7 +1448,7 @@ static int free_segment_range(struct f2fs_sb_info *sbi,
>  	mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
>  
>  	/* Move out cursegs from the target range */
> -	for (type = CURSEG_HOT_DATA; type < NR_CURSEG_TYPE; type++)
> +	for (type = CURSEG_HOT_DATA; type < NR_CURSEG_PERSIST_TYPE; type++)
>  		f2fs_allocate_segment_for_resize(sbi, type, start, end);
>  
>  	/* do GC to move out valid blocks in the range */
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index 5924b3965ae4..863ec6f1fb87 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -1958,7 +1958,7 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
>  
>  	mutex_lock(&dirty_i->seglist_lock);
>  	for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi))
> -		__set_test_and_free(sbi, segno);
> +		__set_test_and_free(sbi, segno, false);
>  	mutex_unlock(&dirty_i->seglist_lock);
>  }
>  
> @@ -2496,6 +2496,7 @@ static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
>  	struct curseg_info *curseg = CURSEG_I(sbi, type);
>  	struct summary_footer *sum_footer;
>  
> +	curseg->inited = true;
>  	curseg->segno = curseg->next_segno;
>  	curseg->zone = GET_ZONE_FROM_SEG(sbi, curseg->segno);
>  	curseg->next_blkoff = 0;
> @@ -2503,24 +2504,31 @@ static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
>  
>  	sum_footer = &(curseg->sum_blk->footer);
>  	memset(sum_footer, 0, sizeof(struct summary_footer));
> -	if (IS_DATASEG(type))
> +	if (IS_DATASEG(curseg->seg_type))
>  		SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA);
> -	if (IS_NODESEG(type))
> +	if (IS_NODESEG(curseg->seg_type))
>  		SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE);
> -	__set_sit_entry_type(sbi, type, curseg->segno, modified);
> +	__set_sit_entry_type(sbi, curseg->seg_type, curseg->segno, modified);
>  }
>  
>  static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
>  {
> +	struct curseg_info *curseg = CURSEG_I(sbi, type);
> +
>  	/* if segs_per_sec is large than 1, we need to keep original policy. */
>  	if (__is_large_section(sbi))
> -		return CURSEG_I(sbi, type)->segno;
> +		return curseg->segno;
> +
> +	/* inmem log may not locate on any segment after mount */
> +	if (!curseg->inited)
> +		return 0;
>  
>  	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
>  		return 0;
>  
>  	if (test_opt(sbi, NOHEAP) &&
> -		(type == CURSEG_HOT_DATA || IS_NODESEG(type)))
> +		(curseg->seg_type == CURSEG_HOT_DATA ||
> +		IS_NODESEG(curseg->seg_type)))
>  		return 0;
>  
>  	if (SIT_I(sbi)->last_victim[ALLOC_NEXT])
> @@ -2530,7 +2538,7 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
>  	if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE)
>  		return 0;
>  
> -	return CURSEG_I(sbi, type)->segno;
> +	return curseg->segno;
>  }
>  
>  /*
> @@ -2540,12 +2548,14 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
>  static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
>  {
>  	struct curseg_info *curseg = CURSEG_I(sbi, type);
> +	unsigned short seg_type = curseg->seg_type;
>  	unsigned int segno = curseg->segno;
>  	int dir = ALLOC_LEFT;
>  
> -	write_sum_page(sbi, curseg->sum_blk,
> +	if (curseg->inited)
> +		write_sum_page(sbi, curseg->sum_blk,
>  				GET_SUM_BLOCK(sbi, segno));
> -	if (type == CURSEG_WARM_DATA || type == CURSEG_COLD_DATA)
> +	if (seg_type == CURSEG_WARM_DATA || seg_type == CURSEG_COLD_DATA)
>  		dir = ALLOC_RIGHT;
>  
>  	if (test_opt(sbi, NOHEAP))
> @@ -2622,6 +2632,43 @@ static void change_curseg(struct f2fs_sb_info *sbi, int type)
>  	f2fs_put_page(sum_page, 1);
>  }
>  
> +void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi, int type)
> +{
> +	struct curseg_info *curseg = CURSEG_I(sbi, type);
> +
> +	mutex_lock(&curseg->curseg_mutex);
> +	if (!curseg->inited)
> +		goto out;
> +
> +	if (get_valid_blocks(sbi, curseg->segno, false)) {
> +		write_sum_page(sbi, curseg->sum_blk,
> +				GET_SUM_BLOCK(sbi, curseg->segno));
> +	} else {
> +		mutex_lock(&DIRTY_I(sbi)->seglist_lock);
> +		__set_test_and_free(sbi, curseg->segno, true);
> +		mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
> +	}
> +out:
> +	mutex_unlock(&curseg->curseg_mutex);
> +}
> +
> +void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi, int type)
> +{
> +	struct curseg_info *curseg = CURSEG_I(sbi, type);
> +
> +	mutex_lock(&curseg->curseg_mutex);
> +	if (!curseg->inited)
> +		goto out;
> +	if (get_valid_blocks(sbi, curseg->segno, false))
> +		goto out;
> +
> +	mutex_lock(&DIRTY_I(sbi)->seglist_lock);
> +	__set_test_and_inuse(sbi, curseg->segno);
> +	mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
> +out:
> +	mutex_unlock(&curseg->curseg_mutex);
> +}
> +
>  static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
>  {
>  	struct curseg_info *curseg = CURSEG_I(sbi, type);
> @@ -2738,11 +2785,15 @@ void __allocate_new_segment(struct f2fs_sb_info *sbi, int type)
>  	struct curseg_info *curseg = CURSEG_I(sbi, type);
>  	unsigned int old_segno;
>  
> +	if (!curseg->inited)
> +		goto alloc;
> +
>  	if (!curseg->next_blkoff &&
>  		!get_valid_blocks(sbi, curseg->segno, false) &&
>  		!get_ckpt_valid_blocks(sbi, curseg->segno))
>  		return;
>  
> +alloc:
>  	old_segno = curseg->segno;
>  	SIT_I(sbi)->s_ops->allocate_segment(sbi, type, true);
>  	locate_dirty_segment(sbi, old_segno);
> @@ -3126,19 +3177,6 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
>  {
>  	struct sit_info *sit_i = SIT_I(sbi);
>  	struct curseg_info *curseg = CURSEG_I(sbi, type);
> -	bool put_pin_sem = false;
> -
> -	if (type == CURSEG_COLD_DATA) {
> -		/* GC during CURSEG_COLD_DATA_PINNED allocation */
> -		if (down_read_trylock(&sbi->pin_sem)) {
> -			put_pin_sem = true;
> -		} else {
> -			type = CURSEG_WARM_DATA;
> -			curseg = CURSEG_I(sbi, type);
> -		}
> -	} else if (type == CURSEG_COLD_DATA_PINNED) {
> -		type = CURSEG_COLD_DATA;
> -	}
>  
>  	down_read(&SM_I(sbi)->curseg_lock);
>  
> @@ -3204,9 +3242,6 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
>  	mutex_unlock(&curseg->curseg_mutex);
>  
>  	up_read(&SM_I(sbi)->curseg_lock);
> -
> -	if (put_pin_sem)
> -		up_read(&sbi->pin_sem);
>  }
>  
>  static void update_device_state(struct f2fs_io_info *fio)
> @@ -3574,7 +3609,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
>  		blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type -
>  							CURSEG_HOT_DATA]);
>  		if (__exist_node_summaries(sbi))
> -			blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type);
> +			blk_addr = sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type);
>  		else
>  			blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type);
>  	} else {
> @@ -3652,8 +3687,9 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
>  	}
>  
>  	if (__exist_node_summaries(sbi))
> -		f2fs_ra_meta_pages(sbi, sum_blk_addr(sbi, NR_CURSEG_TYPE, type),
> -					NR_CURSEG_TYPE - type, META_CP, true);
> +		f2fs_ra_meta_pages(sbi,
> +				sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type),
> +				NR_CURSEG_PERSIST_TYPE - type, META_CP, true);
>  
>  	for (; type <= CURSEG_COLD_NODE; type++) {
>  		err = read_normal_summaries(sbi, type);
> @@ -4155,14 +4191,14 @@ static int build_curseg(struct f2fs_sb_info *sbi)
>  	struct curseg_info *array;
>  	int i;
>  
> -	array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE, sizeof(*array)),
> -			     GFP_KERNEL);
> +	array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE,
> +					sizeof(*array)), GFP_KERNEL);
>  	if (!array)
>  		return -ENOMEM;
>  
>  	SM_I(sbi)->curseg_array = array;
>  
> -	for (i = 0; i < NR_CURSEG_TYPE; i++) {
> +	for (i = 0; i < NO_CHECK_TYPE; i++) {
>  		mutex_init(&array[i].curseg_mutex);
>  		array[i].sum_blk = f2fs_kzalloc(sbi, PAGE_SIZE, GFP_KERNEL);
>  		if (!array[i].sum_blk)
> @@ -4172,8 +4208,13 @@ static int build_curseg(struct f2fs_sb_info *sbi)
>  				sizeof(struct f2fs_journal), GFP_KERNEL);
>  		if (!array[i].journal)
>  			return -ENOMEM;
> +		if (i < NR_PERSISTENT_LOG)
> +			array[i].seg_type = CURSEG_HOT_DATA + i;
> +		else if (i == CURSEG_COLD_DATA_PINNED)
> +			array[i].seg_type = CURSEG_COLD_DATA;
>  		array[i].segno = NULL_SEGNO;
>  		array[i].next_blkoff = 0;
> +		array[i].inited = false;
>  	}
>  	return restore_curseg_summaries(sbi);
>  }
> @@ -4408,7 +4449,7 @@ static int sanity_check_curseg(struct f2fs_sb_info *sbi)
>  	 * In LFS/SSR curseg, .next_blkoff should point to an unused blkaddr;
>  	 * In LFS curseg, all blkaddr after .next_blkoff should be unused.
>  	 */
> -	for (i = 0; i < NO_CHECK_TYPE; i++) {
> +	for (i = 0; i < NR_PERSISTENT_LOG; i++) {
>  		struct curseg_info *curseg = CURSEG_I(sbi, i);
>  		struct seg_entry *se = get_seg_entry(sbi, curseg->segno);
>  		unsigned int blkofs = curseg->next_blkoff;
> @@ -4637,7 +4678,7 @@ int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi)
>  {
>  	int i, ret;
>  
> -	for (i = 0; i < NO_CHECK_TYPE; i++) {
> +	for (i = 0; i < NR_PERSISTENT_LOG; i++) {
>  		ret = fix_curseg_write_pointer(sbi, i);
>  		if (ret)
>  			return ret;
> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
> index f261e3e6a69b..8ff261550cbb 100644
> --- a/fs/f2fs/segment.h
> +++ b/fs/f2fs/segment.h
> @@ -22,7 +22,7 @@
>  #define GET_R2L_SEGNO(free_i, segno)	((segno) + (free_i)->start_segno)
>  
>  #define IS_DATASEG(t)	((t) <= CURSEG_COLD_DATA)
> -#define IS_NODESEG(t)	((t) >= CURSEG_HOT_NODE)
> +#define IS_NODESEG(t)	((t) >= CURSEG_HOT_NODE && (t) <= CURSEG_COLD_NODE)
>  
>  #define IS_HOT(t)	((t) == CURSEG_HOT_NODE || (t) == CURSEG_HOT_DATA)
>  #define IS_WARM(t)	((t) == CURSEG_WARM_NODE || (t) == CURSEG_WARM_DATA)
> @@ -34,7 +34,8 @@
>  	 ((seg) == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno) ||	\
>  	 ((seg) == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno) ||	\
>  	 ((seg) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno) ||	\
> -	 ((seg) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno))
> +	 ((seg) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno) ||	\
> +	 ((seg) == CURSEG_I(sbi, CURSEG_COLD_DATA_PINNED)->segno))
>  
>  #define IS_CURSEC(sbi, secno)						\
>  	(((secno) == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno /		\
> @@ -48,7 +49,9 @@
>  	 ((secno) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno /		\
>  	  (sbi)->segs_per_sec) ||	\
>  	 ((secno) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno /		\
> -	  (sbi)->segs_per_sec))	\
> +	  (sbi)->segs_per_sec) ||	\
> +	 ((secno) == CURSEG_I(sbi, CURSEG_COLD_DATA_PINNED)->segno /	\
> +	  (sbi)->segs_per_sec))
>  
>  #define MAIN_BLKADDR(sbi)						\
>  	(SM_I(sbi) ? SM_I(sbi)->main_blkaddr : 				\
> @@ -288,10 +291,12 @@ struct curseg_info {
>  	struct rw_semaphore journal_rwsem;	/* protect journal area */
>  	struct f2fs_journal *journal;		/* cached journal info */
>  	unsigned char alloc_type;		/* current allocation type */
> +	unsigned short seg_type;		/* segment type like CURSEG_XXX_TYPE */
>  	unsigned int segno;			/* current segment number */
>  	unsigned short next_blkoff;		/* next block offset to write */
>  	unsigned int zone;			/* current zone number */
>  	unsigned int next_segno;		/* preallocated segment */
> +	bool inited;				/* indicate inmem log is inited */
>  };
>  
>  struct sit_entry_set {
> @@ -305,8 +310,6 @@ struct sit_entry_set {
>   */
>  static inline struct curseg_info *CURSEG_I(struct f2fs_sb_info *sbi, int type)
>  {
> -	if (type == CURSEG_COLD_DATA_PINNED)
> -		type = CURSEG_COLD_DATA;
>  	return (struct curseg_info *)(SM_I(sbi)->curseg_array + type);
>  }
>  
> @@ -438,7 +441,7 @@ static inline void __set_inuse(struct f2fs_sb_info *sbi,
>  }
>  
>  static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
> -		unsigned int segno)
> +		unsigned int segno, bool inmem)
>  {
>  	struct free_segmap_info *free_i = FREE_I(sbi);
>  	unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
> @@ -449,7 +452,7 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
>  	if (test_and_clear_bit(segno, free_i->free_segmap)) {
>  		free_i->free_segments++;
>  
> -		if (IS_CURSEC(sbi, secno))
> +		if (!inmem && IS_CURSEC(sbi, secno))
>  			goto skip_free;
>  		next = find_next_bit(free_i->free_segmap,
>  				start_segno + sbi->segs_per_sec, start_segno);
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index 80cb7cd358f8..0fefa130585f 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -575,7 +575,8 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
>  		case Opt_active_logs:
>  			if (args->from && match_int(args, &arg))
>  				return -EINVAL;
> -			if (arg != 2 && arg != 4 && arg != NR_CURSEG_TYPE)
> +			if (arg != 2 && arg != 4 &&
> +				arg != NR_CURSEG_PERSIST_TYPE)
>  				return -EINVAL;
>  			F2FS_OPTION(sbi).active_logs = arg;
>  			break;
> @@ -981,7 +982,7 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
>  	}
>  
>  	/* Not pass down write hints if the number of active logs is lesser
> -	 * than NR_CURSEG_TYPE.
> +	 * than NR_CURSEG_PERSIST_TYPE.
>  	 */
>  	if (F2FS_OPTION(sbi).active_logs != NR_CURSEG_TYPE)
>  		F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF;
> @@ -1614,7 +1615,7 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
>  static void default_options(struct f2fs_sb_info *sbi)
>  {
>  	/* init some FS parameters */
> -	F2FS_OPTION(sbi).active_logs = NR_CURSEG_TYPE;
> +	F2FS_OPTION(sbi).active_logs = NR_CURSEG_PERSIST_TYPE;
>  	F2FS_OPTION(sbi).inline_xattr_size = DEFAULT_INLINE_XATTR_ADDRS;
>  	F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF;
>  	F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_DEFAULT;
> @@ -2946,7 +2947,7 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
>  	cp_payload = __cp_payload(sbi);
>  	if (cp_pack_start_sum < cp_payload + 1 ||
>  		cp_pack_start_sum > blocks_per_seg - 1 -
> -			NR_CURSEG_TYPE) {
> +			NR_CURSEG_PERSIST_TYPE) {
>  		f2fs_err(sbi, "Wrong cp_pack_start_sum: %u",
>  			 cp_pack_start_sum);
>  		return 1;
> -- 
> 2.26.2


_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [f2fs-dev] [PATCH RFC 1/5] f2fs: introduce inmem curseg
  2020-07-07  3:21 ` [f2fs-dev] [PATCH RFC 1/5] f2fs: introduce inmem curseg Jaegeuk Kim
@ 2020-07-07  3:37   ` Chao Yu
  2020-07-07  3:51     ` Jaegeuk Kim
  0 siblings, 1 reply; 19+ messages in thread
From: Chao Yu @ 2020-07-07  3:37 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: linux-kernel, linux-f2fs-devel

On 2020/7/7 11:21, Jaegeuk Kim wrote:
> Hi Chao,
> 
> Do you have any brief design doc to present the idea?

Hi Jaegeuk,

You mean this whole patchset, right?

I can add a brief design description in patch 0/5.

> 
> Thanks,
> 
> On 06/30, Chao Yu wrote:
>> Previous implementation of aligned pinfile allocation will:
>> - allocate new segment on cold data log no matter whether last used
>> segment is partially used or not, it makes IOs more random;
>> - force concurrent cold data/GCed IO going into warm data area, it
>> can make a bad effect on hot/cold data separation;
>>
>> In this patch, we introduce a new type of log named 'inmem curseg',
>> the differents from normal curseg is:
>> - it reuses existed segment type (CURSEG_XXX_NODE/DATA);
>> - it only exists in memory, its segno, blkofs, summary will not b
>>  persisted into checkpoint area;
>>
>> With this new feature, we can enhance scalability of log, special
>> allocators can be created for purposes:
>> - pure lfs allocator for aligned pinfile allocation or file
>> defragmentation
>> - pure ssr allocator for later feature
>>
>> So that, let's update aligned pinfile allocation to use this new
>> inmem curseg fwk.
>>
>> Signed-off-by: Chao Yu <yuchao0@huawei.com>
>> ---
>>  fs/f2fs/checkpoint.c |   7 ++-
>>  fs/f2fs/debug.c      |   6 ++-
>>  fs/f2fs/f2fs.h       |  12 +++--
>>  fs/f2fs/file.c       |   3 +-
>>  fs/f2fs/gc.c         |   2 +-
>>  fs/f2fs/segment.c    | 107 ++++++++++++++++++++++++++++++-------------
>>  fs/f2fs/segment.h    |  17 ++++---
>>  fs/f2fs/super.c      |   9 ++--
>>  8 files changed, 112 insertions(+), 51 deletions(-)
>>
>> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
>> index 1bb8278a1c4a..644a914af25a 100644
>> --- a/fs/f2fs/checkpoint.c
>> +++ b/fs/f2fs/checkpoint.c
>> @@ -1623,11 +1623,16 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>>  
>>  	f2fs_flush_sit_entries(sbi, cpc);
>>  
>> +	/* save inmem log status */
>> +	f2fs_save_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
>> +
>>  	err = do_checkpoint(sbi, cpc);
>>  	if (err)
>>  		f2fs_release_discard_addrs(sbi);
>>  	else
>>  		f2fs_clear_prefree_segments(sbi, cpc);
>> +
>> +	f2fs_restore_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
>>  stop:
>>  	unblock_operations(sbi);
>>  	stat_inc_cp_count(sbi->stat_info);
>> @@ -1658,7 +1663,7 @@ void f2fs_init_ino_entry_info(struct f2fs_sb_info *sbi)
>>  	}
>>  
>>  	sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS -
>> -			NR_CURSEG_TYPE - __cp_payload(sbi)) *
>> +			NR_CURSEG_PERSIST_TYPE - __cp_payload(sbi)) *
>>  				F2FS_ORPHANS_PER_BLOCK;
>>  }
>>  
>> diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
>> index 4276c0f79beb..41a91aa8c262 100644
>> --- a/fs/f2fs/debug.c
>> +++ b/fs/f2fs/debug.c
>> @@ -164,7 +164,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
>>  		* 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg)
>>  		/ 2;
>>  	si->util_invalid = 50 - si->util_free - si->util_valid;
>> -	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_NODE; i++) {
>> +	for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) {
>>  		struct curseg_info *curseg = CURSEG_I(sbi, i);
>>  		si->curseg[i] = curseg->segno;
>>  		si->cursec[i] = GET_SEC_FROM_SEG(sbi, curseg->segno);
>> @@ -393,6 +393,10 @@ static int stat_show(struct seq_file *s, void *v)
>>  			   si->dirty_seg[CURSEG_COLD_NODE],
>>  			   si->full_seg[CURSEG_COLD_NODE],
>>  			   si->valid_blks[CURSEG_COLD_NODE]);
>> +		seq_printf(s, "  - Pinned file: %8d %8d %8d\n",
>> +			   si->curseg[CURSEG_COLD_DATA_PINNED],
>> +			   si->cursec[CURSEG_COLD_DATA_PINNED],
>> +			   si->curzone[CURSEG_COLD_DATA_PINNED]);
>>  		seq_printf(s, "\n  - Valid: %d\n  - Dirty: %d\n",
>>  			   si->main_area_segs - si->dirty_count -
>>  			   si->prefree_count - si->free_segs,
>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>> index 7d6c5f8ce16b..f06c77066284 100644
>> --- a/fs/f2fs/f2fs.h
>> +++ b/fs/f2fs/f2fs.h
>> @@ -996,7 +996,9 @@ static inline void set_new_dnode(struct dnode_of_data *dn, struct inode *inode,
>>   */
>>  #define	NR_CURSEG_DATA_TYPE	(3)
>>  #define NR_CURSEG_NODE_TYPE	(3)
>> -#define NR_CURSEG_TYPE	(NR_CURSEG_DATA_TYPE + NR_CURSEG_NODE_TYPE)
>> +#define NR_CURSEG_INMEM_TYPE	(1)
>> +#define NR_CURSEG_PERSIST_TYPE	(NR_CURSEG_DATA_TYPE + NR_CURSEG_NODE_TYPE)
>> +#define NR_CURSEG_TYPE		(NR_CURSEG_INMEM_TYPE + NR_CURSEG_PERSIST_TYPE)
>>  
>>  enum {
>>  	CURSEG_HOT_DATA	= 0,	/* directory entry blocks */
>> @@ -1005,8 +1007,10 @@ enum {
>>  	CURSEG_HOT_NODE,	/* direct node blocks of directory files */
>>  	CURSEG_WARM_NODE,	/* direct node blocks of normal files */
>>  	CURSEG_COLD_NODE,	/* indirect node blocks */
>> -	NO_CHECK_TYPE,
>> -	CURSEG_COLD_DATA_PINNED,/* cold data for pinned file */
>> +	NR_PERSISTENT_LOG,	/* number of persistent log */
>> +	CURSEG_COLD_DATA_PINNED = NR_PERSISTENT_LOG,
>> +				/* pinned file that needs consecutive block address */
>> +	NO_CHECK_TYPE,		/* number of persistent & inmem log */
>>  };
>>  
>>  struct flush_cmd {
>> @@ -3359,6 +3363,8 @@ block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi);
>>  int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable);
>>  void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi);
>>  int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra);
>> +void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi, int type);
>> +void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi, int type);
>>  void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
>>  					unsigned int start, unsigned int end);
>>  void f2fs_allocate_new_segment(struct f2fs_sb_info *sbi, int type);
>> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
>> index c10e82806c2a..8611ade06018 100644
>> --- a/fs/f2fs/file.c
>> +++ b/fs/f2fs/file.c
>> @@ -1656,13 +1656,14 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
>>  		}
>>  
>>  		down_write(&sbi->pin_sem);
>> -		map.m_seg_type = CURSEG_COLD_DATA_PINNED;
>>  
>>  		f2fs_lock_op(sbi);
>>  		f2fs_allocate_new_segment(sbi, CURSEG_COLD_DATA);
>>  		f2fs_unlock_op(sbi);
>>  
>> +		map.m_seg_type = CURSEG_COLD_DATA_PINNED;
>>  		err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_DIO);
>> +
>>  		up_write(&sbi->pin_sem);
>>  
>>  		done += map.m_len;
>> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
>> index 3b718da69910..84807abe4e00 100644
>> --- a/fs/f2fs/gc.c
>> +++ b/fs/f2fs/gc.c
>> @@ -1448,7 +1448,7 @@ static int free_segment_range(struct f2fs_sb_info *sbi,
>>  	mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
>>  
>>  	/* Move out cursegs from the target range */
>> -	for (type = CURSEG_HOT_DATA; type < NR_CURSEG_TYPE; type++)
>> +	for (type = CURSEG_HOT_DATA; type < NR_CURSEG_PERSIST_TYPE; type++)
>>  		f2fs_allocate_segment_for_resize(sbi, type, start, end);
>>  
>>  	/* do GC to move out valid blocks in the range */
>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>> index 5924b3965ae4..863ec6f1fb87 100644
>> --- a/fs/f2fs/segment.c
>> +++ b/fs/f2fs/segment.c
>> @@ -1958,7 +1958,7 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
>>  
>>  	mutex_lock(&dirty_i->seglist_lock);
>>  	for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi))
>> -		__set_test_and_free(sbi, segno);
>> +		__set_test_and_free(sbi, segno, false);
>>  	mutex_unlock(&dirty_i->seglist_lock);
>>  }
>>  
>> @@ -2496,6 +2496,7 @@ static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
>>  	struct curseg_info *curseg = CURSEG_I(sbi, type);
>>  	struct summary_footer *sum_footer;
>>  
>> +	curseg->inited = true;
>>  	curseg->segno = curseg->next_segno;
>>  	curseg->zone = GET_ZONE_FROM_SEG(sbi, curseg->segno);
>>  	curseg->next_blkoff = 0;
>> @@ -2503,24 +2504,31 @@ static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
>>  
>>  	sum_footer = &(curseg->sum_blk->footer);
>>  	memset(sum_footer, 0, sizeof(struct summary_footer));
>> -	if (IS_DATASEG(type))
>> +	if (IS_DATASEG(curseg->seg_type))
>>  		SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA);
>> -	if (IS_NODESEG(type))
>> +	if (IS_NODESEG(curseg->seg_type))
>>  		SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE);
>> -	__set_sit_entry_type(sbi, type, curseg->segno, modified);
>> +	__set_sit_entry_type(sbi, curseg->seg_type, curseg->segno, modified);
>>  }
>>  
>>  static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
>>  {
>> +	struct curseg_info *curseg = CURSEG_I(sbi, type);
>> +
>>  	/* if segs_per_sec is large than 1, we need to keep original policy. */
>>  	if (__is_large_section(sbi))
>> -		return CURSEG_I(sbi, type)->segno;
>> +		return curseg->segno;
>> +
>> +	/* inmem log may not locate on any segment after mount */
>> +	if (!curseg->inited)
>> +		return 0;
>>  
>>  	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
>>  		return 0;
>>  
>>  	if (test_opt(sbi, NOHEAP) &&
>> -		(type == CURSEG_HOT_DATA || IS_NODESEG(type)))
>> +		(curseg->seg_type == CURSEG_HOT_DATA ||
>> +		IS_NODESEG(curseg->seg_type)))
>>  		return 0;
>>  
>>  	if (SIT_I(sbi)->last_victim[ALLOC_NEXT])
>> @@ -2530,7 +2538,7 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
>>  	if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE)
>>  		return 0;
>>  
>> -	return CURSEG_I(sbi, type)->segno;
>> +	return curseg->segno;
>>  }
>>  
>>  /*
>> @@ -2540,12 +2548,14 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
>>  static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
>>  {
>>  	struct curseg_info *curseg = CURSEG_I(sbi, type);
>> +	unsigned short seg_type = curseg->seg_type;
>>  	unsigned int segno = curseg->segno;
>>  	int dir = ALLOC_LEFT;
>>  
>> -	write_sum_page(sbi, curseg->sum_blk,
>> +	if (curseg->inited)
>> +		write_sum_page(sbi, curseg->sum_blk,
>>  				GET_SUM_BLOCK(sbi, segno));
>> -	if (type == CURSEG_WARM_DATA || type == CURSEG_COLD_DATA)
>> +	if (seg_type == CURSEG_WARM_DATA || seg_type == CURSEG_COLD_DATA)
>>  		dir = ALLOC_RIGHT;
>>  
>>  	if (test_opt(sbi, NOHEAP))
>> @@ -2622,6 +2632,43 @@ static void change_curseg(struct f2fs_sb_info *sbi, int type)
>>  	f2fs_put_page(sum_page, 1);
>>  }
>>  
>> +void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi, int type)
>> +{
>> +	struct curseg_info *curseg = CURSEG_I(sbi, type);
>> +
>> +	mutex_lock(&curseg->curseg_mutex);
>> +	if (!curseg->inited)
>> +		goto out;
>> +
>> +	if (get_valid_blocks(sbi, curseg->segno, false)) {
>> +		write_sum_page(sbi, curseg->sum_blk,
>> +				GET_SUM_BLOCK(sbi, curseg->segno));
>> +	} else {
>> +		mutex_lock(&DIRTY_I(sbi)->seglist_lock);
>> +		__set_test_and_free(sbi, curseg->segno, true);
>> +		mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
>> +	}
>> +out:
>> +	mutex_unlock(&curseg->curseg_mutex);
>> +}
>> +
>> +void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi, int type)
>> +{
>> +	struct curseg_info *curseg = CURSEG_I(sbi, type);
>> +
>> +	mutex_lock(&curseg->curseg_mutex);
>> +	if (!curseg->inited)
>> +		goto out;
>> +	if (get_valid_blocks(sbi, curseg->segno, false))
>> +		goto out;
>> +
>> +	mutex_lock(&DIRTY_I(sbi)->seglist_lock);
>> +	__set_test_and_inuse(sbi, curseg->segno);
>> +	mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
>> +out:
>> +	mutex_unlock(&curseg->curseg_mutex);
>> +}
>> +
>>  static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
>>  {
>>  	struct curseg_info *curseg = CURSEG_I(sbi, type);
>> @@ -2738,11 +2785,15 @@ void __allocate_new_segment(struct f2fs_sb_info *sbi, int type)
>>  	struct curseg_info *curseg = CURSEG_I(sbi, type);
>>  	unsigned int old_segno;
>>  
>> +	if (!curseg->inited)
>> +		goto alloc;
>> +
>>  	if (!curseg->next_blkoff &&
>>  		!get_valid_blocks(sbi, curseg->segno, false) &&
>>  		!get_ckpt_valid_blocks(sbi, curseg->segno))
>>  		return;
>>  
>> +alloc:
>>  	old_segno = curseg->segno;
>>  	SIT_I(sbi)->s_ops->allocate_segment(sbi, type, true);
>>  	locate_dirty_segment(sbi, old_segno);
>> @@ -3126,19 +3177,6 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
>>  {
>>  	struct sit_info *sit_i = SIT_I(sbi);
>>  	struct curseg_info *curseg = CURSEG_I(sbi, type);
>> -	bool put_pin_sem = false;
>> -
>> -	if (type == CURSEG_COLD_DATA) {
>> -		/* GC during CURSEG_COLD_DATA_PINNED allocation */
>> -		if (down_read_trylock(&sbi->pin_sem)) {
>> -			put_pin_sem = true;
>> -		} else {
>> -			type = CURSEG_WARM_DATA;
>> -			curseg = CURSEG_I(sbi, type);
>> -		}
>> -	} else if (type == CURSEG_COLD_DATA_PINNED) {
>> -		type = CURSEG_COLD_DATA;
>> -	}
>>  
>>  	down_read(&SM_I(sbi)->curseg_lock);
>>  
>> @@ -3204,9 +3242,6 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
>>  	mutex_unlock(&curseg->curseg_mutex);
>>  
>>  	up_read(&SM_I(sbi)->curseg_lock);
>> -
>> -	if (put_pin_sem)
>> -		up_read(&sbi->pin_sem);
>>  }
>>  
>>  static void update_device_state(struct f2fs_io_info *fio)
>> @@ -3574,7 +3609,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
>>  		blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type -
>>  							CURSEG_HOT_DATA]);
>>  		if (__exist_node_summaries(sbi))
>> -			blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type);
>> +			blk_addr = sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type);
>>  		else
>>  			blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type);
>>  	} else {
>> @@ -3652,8 +3687,9 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
>>  	}
>>  
>>  	if (__exist_node_summaries(sbi))
>> -		f2fs_ra_meta_pages(sbi, sum_blk_addr(sbi, NR_CURSEG_TYPE, type),
>> -					NR_CURSEG_TYPE - type, META_CP, true);
>> +		f2fs_ra_meta_pages(sbi,
>> +				sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type),
>> +				NR_CURSEG_PERSIST_TYPE - type, META_CP, true);
>>  
>>  	for (; type <= CURSEG_COLD_NODE; type++) {
>>  		err = read_normal_summaries(sbi, type);
>> @@ -4155,14 +4191,14 @@ static int build_curseg(struct f2fs_sb_info *sbi)
>>  	struct curseg_info *array;
>>  	int i;
>>  
>> -	array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE, sizeof(*array)),
>> -			     GFP_KERNEL);
>> +	array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE,
>> +					sizeof(*array)), GFP_KERNEL);
>>  	if (!array)
>>  		return -ENOMEM;
>>  
>>  	SM_I(sbi)->curseg_array = array;
>>  
>> -	for (i = 0; i < NR_CURSEG_TYPE; i++) {
>> +	for (i = 0; i < NO_CHECK_TYPE; i++) {
>>  		mutex_init(&array[i].curseg_mutex);
>>  		array[i].sum_blk = f2fs_kzalloc(sbi, PAGE_SIZE, GFP_KERNEL);
>>  		if (!array[i].sum_blk)
>> @@ -4172,8 +4208,13 @@ static int build_curseg(struct f2fs_sb_info *sbi)
>>  				sizeof(struct f2fs_journal), GFP_KERNEL);
>>  		if (!array[i].journal)
>>  			return -ENOMEM;
>> +		if (i < NR_PERSISTENT_LOG)
>> +			array[i].seg_type = CURSEG_HOT_DATA + i;
>> +		else if (i == CURSEG_COLD_DATA_PINNED)
>> +			array[i].seg_type = CURSEG_COLD_DATA;
>>  		array[i].segno = NULL_SEGNO;
>>  		array[i].next_blkoff = 0;
>> +		array[i].inited = false;
>>  	}
>>  	return restore_curseg_summaries(sbi);
>>  }
>> @@ -4408,7 +4449,7 @@ static int sanity_check_curseg(struct f2fs_sb_info *sbi)
>>  	 * In LFS/SSR curseg, .next_blkoff should point to an unused blkaddr;
>>  	 * In LFS curseg, all blkaddr after .next_blkoff should be unused.
>>  	 */
>> -	for (i = 0; i < NO_CHECK_TYPE; i++) {
>> +	for (i = 0; i < NR_PERSISTENT_LOG; i++) {
>>  		struct curseg_info *curseg = CURSEG_I(sbi, i);
>>  		struct seg_entry *se = get_seg_entry(sbi, curseg->segno);
>>  		unsigned int blkofs = curseg->next_blkoff;
>> @@ -4637,7 +4678,7 @@ int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi)
>>  {
>>  	int i, ret;
>>  
>> -	for (i = 0; i < NO_CHECK_TYPE; i++) {
>> +	for (i = 0; i < NR_PERSISTENT_LOG; i++) {
>>  		ret = fix_curseg_write_pointer(sbi, i);
>>  		if (ret)
>>  			return ret;
>> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
>> index f261e3e6a69b..8ff261550cbb 100644
>> --- a/fs/f2fs/segment.h
>> +++ b/fs/f2fs/segment.h
>> @@ -22,7 +22,7 @@
>>  #define GET_R2L_SEGNO(free_i, segno)	((segno) + (free_i)->start_segno)
>>  
>>  #define IS_DATASEG(t)	((t) <= CURSEG_COLD_DATA)
>> -#define IS_NODESEG(t)	((t) >= CURSEG_HOT_NODE)
>> +#define IS_NODESEG(t)	((t) >= CURSEG_HOT_NODE && (t) <= CURSEG_COLD_NODE)
>>  
>>  #define IS_HOT(t)	((t) == CURSEG_HOT_NODE || (t) == CURSEG_HOT_DATA)
>>  #define IS_WARM(t)	((t) == CURSEG_WARM_NODE || (t) == CURSEG_WARM_DATA)
>> @@ -34,7 +34,8 @@
>>  	 ((seg) == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno) ||	\
>>  	 ((seg) == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno) ||	\
>>  	 ((seg) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno) ||	\
>> -	 ((seg) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno))
>> +	 ((seg) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno) ||	\
>> +	 ((seg) == CURSEG_I(sbi, CURSEG_COLD_DATA_PINNED)->segno))
>>  
>>  #define IS_CURSEC(sbi, secno)						\
>>  	(((secno) == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno /		\
>> @@ -48,7 +49,9 @@
>>  	 ((secno) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno /		\
>>  	  (sbi)->segs_per_sec) ||	\
>>  	 ((secno) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno /		\
>> -	  (sbi)->segs_per_sec))	\
>> +	  (sbi)->segs_per_sec) ||	\
>> +	 ((secno) == CURSEG_I(sbi, CURSEG_COLD_DATA_PINNED)->segno /	\
>> +	  (sbi)->segs_per_sec))
>>  
>>  #define MAIN_BLKADDR(sbi)						\
>>  	(SM_I(sbi) ? SM_I(sbi)->main_blkaddr : 				\
>> @@ -288,10 +291,12 @@ struct curseg_info {
>>  	struct rw_semaphore journal_rwsem;	/* protect journal area */
>>  	struct f2fs_journal *journal;		/* cached journal info */
>>  	unsigned char alloc_type;		/* current allocation type */
>> +	unsigned short seg_type;		/* segment type like CURSEG_XXX_TYPE */
>>  	unsigned int segno;			/* current segment number */
>>  	unsigned short next_blkoff;		/* next block offset to write */
>>  	unsigned int zone;			/* current zone number */
>>  	unsigned int next_segno;		/* preallocated segment */
>> +	bool inited;				/* indicate inmem log is inited */
>>  };
>>  
>>  struct sit_entry_set {
>> @@ -305,8 +310,6 @@ struct sit_entry_set {
>>   */
>>  static inline struct curseg_info *CURSEG_I(struct f2fs_sb_info *sbi, int type)
>>  {
>> -	if (type == CURSEG_COLD_DATA_PINNED)
>> -		type = CURSEG_COLD_DATA;
>>  	return (struct curseg_info *)(SM_I(sbi)->curseg_array + type);
>>  }
>>  
>> @@ -438,7 +441,7 @@ static inline void __set_inuse(struct f2fs_sb_info *sbi,
>>  }
>>  
>>  static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
>> -		unsigned int segno)
>> +		unsigned int segno, bool inmem)
>>  {
>>  	struct free_segmap_info *free_i = FREE_I(sbi);
>>  	unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
>> @@ -449,7 +452,7 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
>>  	if (test_and_clear_bit(segno, free_i->free_segmap)) {
>>  		free_i->free_segments++;
>>  
>> -		if (IS_CURSEC(sbi, secno))
>> +		if (!inmem && IS_CURSEC(sbi, secno))
>>  			goto skip_free;
>>  		next = find_next_bit(free_i->free_segmap,
>>  				start_segno + sbi->segs_per_sec, start_segno);
>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>> index 80cb7cd358f8..0fefa130585f 100644
>> --- a/fs/f2fs/super.c
>> +++ b/fs/f2fs/super.c
>> @@ -575,7 +575,8 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
>>  		case Opt_active_logs:
>>  			if (args->from && match_int(args, &arg))
>>  				return -EINVAL;
>> -			if (arg != 2 && arg != 4 && arg != NR_CURSEG_TYPE)
>> +			if (arg != 2 && arg != 4 &&
>> +				arg != NR_CURSEG_PERSIST_TYPE)
>>  				return -EINVAL;
>>  			F2FS_OPTION(sbi).active_logs = arg;
>>  			break;
>> @@ -981,7 +982,7 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
>>  	}
>>  
>>  	/* Not pass down write hints if the number of active logs is lesser
>> -	 * than NR_CURSEG_TYPE.
>> +	 * than NR_CURSEG_PERSIST_TYPE.
>>  	 */
>>  	if (F2FS_OPTION(sbi).active_logs != NR_CURSEG_TYPE)
>>  		F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF;
>> @@ -1614,7 +1615,7 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
>>  static void default_options(struct f2fs_sb_info *sbi)
>>  {
>>  	/* init some FS parameters */
>> -	F2FS_OPTION(sbi).active_logs = NR_CURSEG_TYPE;
>> +	F2FS_OPTION(sbi).active_logs = NR_CURSEG_PERSIST_TYPE;
>>  	F2FS_OPTION(sbi).inline_xattr_size = DEFAULT_INLINE_XATTR_ADDRS;
>>  	F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF;
>>  	F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_DEFAULT;
>> @@ -2946,7 +2947,7 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
>>  	cp_payload = __cp_payload(sbi);
>>  	if (cp_pack_start_sum < cp_payload + 1 ||
>>  		cp_pack_start_sum > blocks_per_seg - 1 -
>> -			NR_CURSEG_TYPE) {
>> +			NR_CURSEG_PERSIST_TYPE) {
>>  		f2fs_err(sbi, "Wrong cp_pack_start_sum: %u",
>>  			 cp_pack_start_sum);
>>  		return 1;
>> -- 
>> 2.26.2
> .
> 


_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [f2fs-dev] [PATCH RFC 1/5] f2fs: introduce inmem curseg
  2020-07-07  3:37   ` Chao Yu
@ 2020-07-07  3:51     ` Jaegeuk Kim
  2020-07-15  3:39       ` Chao Yu
  0 siblings, 1 reply; 19+ messages in thread
From: Jaegeuk Kim @ 2020-07-07  3:51 UTC (permalink / raw)
  To: Chao Yu; +Cc: linux-kernel, linux-f2fs-devel

On 07/07, Chao Yu wrote:
> On 2020/7/7 11:21, Jaegeuk Kim wrote:
> > Hi Chao,
> > 
> > Do you have any brief design doc to present the idea?
> 
> Hi Jaegeuk,
> 
> You mean this whole patchset, right?
> 
> I can add a brief design description in patch 0/5.

Yeah, it's a bit hard to understand the whole flow.

Thanks,

> 
> > 
> > Thanks,
> > 
> > On 06/30, Chao Yu wrote:
> >> Previous implementation of aligned pinfile allocation will:
> >> - allocate new segment on cold data log no matter whether last used
> >> segment is partially used or not, it makes IOs more random;
> >> - force concurrent cold data/GCed IO going into warm data area, it
> >> can make a bad effect on hot/cold data separation;
> >>
> >> In this patch, we introduce a new type of log named 'inmem curseg',
> >> the differents from normal curseg is:
> >> - it reuses existed segment type (CURSEG_XXX_NODE/DATA);
> >> - it only exists in memory, its segno, blkofs, summary will not b
> >>  persisted into checkpoint area;
> >>
> >> With this new feature, we can enhance scalability of log, special
> >> allocators can be created for purposes:
> >> - pure lfs allocator for aligned pinfile allocation or file
> >> defragmentation
> >> - pure ssr allocator for later feature
> >>
> >> So that, let's update aligned pinfile allocation to use this new
> >> inmem curseg fwk.
> >>
> >> Signed-off-by: Chao Yu <yuchao0@huawei.com>
> >> ---
> >>  fs/f2fs/checkpoint.c |   7 ++-
> >>  fs/f2fs/debug.c      |   6 ++-
> >>  fs/f2fs/f2fs.h       |  12 +++--
> >>  fs/f2fs/file.c       |   3 +-
> >>  fs/f2fs/gc.c         |   2 +-
> >>  fs/f2fs/segment.c    | 107 ++++++++++++++++++++++++++++++-------------
> >>  fs/f2fs/segment.h    |  17 ++++---
> >>  fs/f2fs/super.c      |   9 ++--
> >>  8 files changed, 112 insertions(+), 51 deletions(-)
> >>
> >> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> >> index 1bb8278a1c4a..644a914af25a 100644
> >> --- a/fs/f2fs/checkpoint.c
> >> +++ b/fs/f2fs/checkpoint.c
> >> @@ -1623,11 +1623,16 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
> >>  
> >>  	f2fs_flush_sit_entries(sbi, cpc);
> >>  
> >> +	/* save inmem log status */
> >> +	f2fs_save_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
> >> +
> >>  	err = do_checkpoint(sbi, cpc);
> >>  	if (err)
> >>  		f2fs_release_discard_addrs(sbi);
> >>  	else
> >>  		f2fs_clear_prefree_segments(sbi, cpc);
> >> +
> >> +	f2fs_restore_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
> >>  stop:
> >>  	unblock_operations(sbi);
> >>  	stat_inc_cp_count(sbi->stat_info);
> >> @@ -1658,7 +1663,7 @@ void f2fs_init_ino_entry_info(struct f2fs_sb_info *sbi)
> >>  	}
> >>  
> >>  	sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS -
> >> -			NR_CURSEG_TYPE - __cp_payload(sbi)) *
> >> +			NR_CURSEG_PERSIST_TYPE - __cp_payload(sbi)) *
> >>  				F2FS_ORPHANS_PER_BLOCK;
> >>  }
> >>  
> >> diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
> >> index 4276c0f79beb..41a91aa8c262 100644
> >> --- a/fs/f2fs/debug.c
> >> +++ b/fs/f2fs/debug.c
> >> @@ -164,7 +164,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
> >>  		* 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg)
> >>  		/ 2;
> >>  	si->util_invalid = 50 - si->util_free - si->util_valid;
> >> -	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_NODE; i++) {
> >> +	for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) {
> >>  		struct curseg_info *curseg = CURSEG_I(sbi, i);
> >>  		si->curseg[i] = curseg->segno;
> >>  		si->cursec[i] = GET_SEC_FROM_SEG(sbi, curseg->segno);
> >> @@ -393,6 +393,10 @@ static int stat_show(struct seq_file *s, void *v)
> >>  			   si->dirty_seg[CURSEG_COLD_NODE],
> >>  			   si->full_seg[CURSEG_COLD_NODE],
> >>  			   si->valid_blks[CURSEG_COLD_NODE]);
> >> +		seq_printf(s, "  - Pinned file: %8d %8d %8d\n",
> >> +			   si->curseg[CURSEG_COLD_DATA_PINNED],
> >> +			   si->cursec[CURSEG_COLD_DATA_PINNED],
> >> +			   si->curzone[CURSEG_COLD_DATA_PINNED]);
> >>  		seq_printf(s, "\n  - Valid: %d\n  - Dirty: %d\n",
> >>  			   si->main_area_segs - si->dirty_count -
> >>  			   si->prefree_count - si->free_segs,
> >> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> >> index 7d6c5f8ce16b..f06c77066284 100644
> >> --- a/fs/f2fs/f2fs.h
> >> +++ b/fs/f2fs/f2fs.h
> >> @@ -996,7 +996,9 @@ static inline void set_new_dnode(struct dnode_of_data *dn, struct inode *inode,
> >>   */
> >>  #define	NR_CURSEG_DATA_TYPE	(3)
> >>  #define NR_CURSEG_NODE_TYPE	(3)
> >> -#define NR_CURSEG_TYPE	(NR_CURSEG_DATA_TYPE + NR_CURSEG_NODE_TYPE)
> >> +#define NR_CURSEG_INMEM_TYPE	(1)
> >> +#define NR_CURSEG_PERSIST_TYPE	(NR_CURSEG_DATA_TYPE + NR_CURSEG_NODE_TYPE)
> >> +#define NR_CURSEG_TYPE		(NR_CURSEG_INMEM_TYPE + NR_CURSEG_PERSIST_TYPE)
> >>  
> >>  enum {
> >>  	CURSEG_HOT_DATA	= 0,	/* directory entry blocks */
> >> @@ -1005,8 +1007,10 @@ enum {
> >>  	CURSEG_HOT_NODE,	/* direct node blocks of directory files */
> >>  	CURSEG_WARM_NODE,	/* direct node blocks of normal files */
> >>  	CURSEG_COLD_NODE,	/* indirect node blocks */
> >> -	NO_CHECK_TYPE,
> >> -	CURSEG_COLD_DATA_PINNED,/* cold data for pinned file */
> >> +	NR_PERSISTENT_LOG,	/* number of persistent log */
> >> +	CURSEG_COLD_DATA_PINNED = NR_PERSISTENT_LOG,
> >> +				/* pinned file that needs consecutive block address */
> >> +	NO_CHECK_TYPE,		/* number of persistent & inmem log */
> >>  };
> >>  
> >>  struct flush_cmd {
> >> @@ -3359,6 +3363,8 @@ block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi);
> >>  int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable);
> >>  void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi);
> >>  int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra);
> >> +void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi, int type);
> >> +void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi, int type);
> >>  void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
> >>  					unsigned int start, unsigned int end);
> >>  void f2fs_allocate_new_segment(struct f2fs_sb_info *sbi, int type);
> >> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> >> index c10e82806c2a..8611ade06018 100644
> >> --- a/fs/f2fs/file.c
> >> +++ b/fs/f2fs/file.c
> >> @@ -1656,13 +1656,14 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
> >>  		}
> >>  
> >>  		down_write(&sbi->pin_sem);
> >> -		map.m_seg_type = CURSEG_COLD_DATA_PINNED;
> >>  
> >>  		f2fs_lock_op(sbi);
> >>  		f2fs_allocate_new_segment(sbi, CURSEG_COLD_DATA);
> >>  		f2fs_unlock_op(sbi);
> >>  
> >> +		map.m_seg_type = CURSEG_COLD_DATA_PINNED;
> >>  		err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_DIO);
> >> +
> >>  		up_write(&sbi->pin_sem);
> >>  
> >>  		done += map.m_len;
> >> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> >> index 3b718da69910..84807abe4e00 100644
> >> --- a/fs/f2fs/gc.c
> >> +++ b/fs/f2fs/gc.c
> >> @@ -1448,7 +1448,7 @@ static int free_segment_range(struct f2fs_sb_info *sbi,
> >>  	mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
> >>  
> >>  	/* Move out cursegs from the target range */
> >> -	for (type = CURSEG_HOT_DATA; type < NR_CURSEG_TYPE; type++)
> >> +	for (type = CURSEG_HOT_DATA; type < NR_CURSEG_PERSIST_TYPE; type++)
> >>  		f2fs_allocate_segment_for_resize(sbi, type, start, end);
> >>  
> >>  	/* do GC to move out valid blocks in the range */
> >> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> >> index 5924b3965ae4..863ec6f1fb87 100644
> >> --- a/fs/f2fs/segment.c
> >> +++ b/fs/f2fs/segment.c
> >> @@ -1958,7 +1958,7 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
> >>  
> >>  	mutex_lock(&dirty_i->seglist_lock);
> >>  	for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi))
> >> -		__set_test_and_free(sbi, segno);
> >> +		__set_test_and_free(sbi, segno, false);
> >>  	mutex_unlock(&dirty_i->seglist_lock);
> >>  }
> >>  
> >> @@ -2496,6 +2496,7 @@ static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
> >>  	struct curseg_info *curseg = CURSEG_I(sbi, type);
> >>  	struct summary_footer *sum_footer;
> >>  
> >> +	curseg->inited = true;
> >>  	curseg->segno = curseg->next_segno;
> >>  	curseg->zone = GET_ZONE_FROM_SEG(sbi, curseg->segno);
> >>  	curseg->next_blkoff = 0;
> >> @@ -2503,24 +2504,31 @@ static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
> >>  
> >>  	sum_footer = &(curseg->sum_blk->footer);
> >>  	memset(sum_footer, 0, sizeof(struct summary_footer));
> >> -	if (IS_DATASEG(type))
> >> +	if (IS_DATASEG(curseg->seg_type))
> >>  		SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA);
> >> -	if (IS_NODESEG(type))
> >> +	if (IS_NODESEG(curseg->seg_type))
> >>  		SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE);
> >> -	__set_sit_entry_type(sbi, type, curseg->segno, modified);
> >> +	__set_sit_entry_type(sbi, curseg->seg_type, curseg->segno, modified);
> >>  }
> >>  
> >>  static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
> >>  {
> >> +	struct curseg_info *curseg = CURSEG_I(sbi, type);
> >> +
> >>  	/* if segs_per_sec is large than 1, we need to keep original policy. */
> >>  	if (__is_large_section(sbi))
> >> -		return CURSEG_I(sbi, type)->segno;
> >> +		return curseg->segno;
> >> +
> >> +	/* inmem log may not locate on any segment after mount */
> >> +	if (!curseg->inited)
> >> +		return 0;
> >>  
> >>  	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
> >>  		return 0;
> >>  
> >>  	if (test_opt(sbi, NOHEAP) &&
> >> -		(type == CURSEG_HOT_DATA || IS_NODESEG(type)))
> >> +		(curseg->seg_type == CURSEG_HOT_DATA ||
> >> +		IS_NODESEG(curseg->seg_type)))
> >>  		return 0;
> >>  
> >>  	if (SIT_I(sbi)->last_victim[ALLOC_NEXT])
> >> @@ -2530,7 +2538,7 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
> >>  	if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE)
> >>  		return 0;
> >>  
> >> -	return CURSEG_I(sbi, type)->segno;
> >> +	return curseg->segno;
> >>  }
> >>  
> >>  /*
> >> @@ -2540,12 +2548,14 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
> >>  static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
> >>  {
> >>  	struct curseg_info *curseg = CURSEG_I(sbi, type);
> >> +	unsigned short seg_type = curseg->seg_type;
> >>  	unsigned int segno = curseg->segno;
> >>  	int dir = ALLOC_LEFT;
> >>  
> >> -	write_sum_page(sbi, curseg->sum_blk,
> >> +	if (curseg->inited)
> >> +		write_sum_page(sbi, curseg->sum_blk,
> >>  				GET_SUM_BLOCK(sbi, segno));
> >> -	if (type == CURSEG_WARM_DATA || type == CURSEG_COLD_DATA)
> >> +	if (seg_type == CURSEG_WARM_DATA || seg_type == CURSEG_COLD_DATA)
> >>  		dir = ALLOC_RIGHT;
> >>  
> >>  	if (test_opt(sbi, NOHEAP))
> >> @@ -2622,6 +2632,43 @@ static void change_curseg(struct f2fs_sb_info *sbi, int type)
> >>  	f2fs_put_page(sum_page, 1);
> >>  }
> >>  
> >> +void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi, int type)
> >> +{
> >> +	struct curseg_info *curseg = CURSEG_I(sbi, type);
> >> +
> >> +	mutex_lock(&curseg->curseg_mutex);
> >> +	if (!curseg->inited)
> >> +		goto out;
> >> +
> >> +	if (get_valid_blocks(sbi, curseg->segno, false)) {
> >> +		write_sum_page(sbi, curseg->sum_blk,
> >> +				GET_SUM_BLOCK(sbi, curseg->segno));
> >> +	} else {
> >> +		mutex_lock(&DIRTY_I(sbi)->seglist_lock);
> >> +		__set_test_and_free(sbi, curseg->segno, true);
> >> +		mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
> >> +	}
> >> +out:
> >> +	mutex_unlock(&curseg->curseg_mutex);
> >> +}
> >> +
> >> +void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi, int type)
> >> +{
> >> +	struct curseg_info *curseg = CURSEG_I(sbi, type);
> >> +
> >> +	mutex_lock(&curseg->curseg_mutex);
> >> +	if (!curseg->inited)
> >> +		goto out;
> >> +	if (get_valid_blocks(sbi, curseg->segno, false))
> >> +		goto out;
> >> +
> >> +	mutex_lock(&DIRTY_I(sbi)->seglist_lock);
> >> +	__set_test_and_inuse(sbi, curseg->segno);
> >> +	mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
> >> +out:
> >> +	mutex_unlock(&curseg->curseg_mutex);
> >> +}
> >> +
> >>  static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
> >>  {
> >>  	struct curseg_info *curseg = CURSEG_I(sbi, type);
> >> @@ -2738,11 +2785,15 @@ void __allocate_new_segment(struct f2fs_sb_info *sbi, int type)
> >>  	struct curseg_info *curseg = CURSEG_I(sbi, type);
> >>  	unsigned int old_segno;
> >>  
> >> +	if (!curseg->inited)
> >> +		goto alloc;
> >> +
> >>  	if (!curseg->next_blkoff &&
> >>  		!get_valid_blocks(sbi, curseg->segno, false) &&
> >>  		!get_ckpt_valid_blocks(sbi, curseg->segno))
> >>  		return;
> >>  
> >> +alloc:
> >>  	old_segno = curseg->segno;
> >>  	SIT_I(sbi)->s_ops->allocate_segment(sbi, type, true);
> >>  	locate_dirty_segment(sbi, old_segno);
> >> @@ -3126,19 +3177,6 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
> >>  {
> >>  	struct sit_info *sit_i = SIT_I(sbi);
> >>  	struct curseg_info *curseg = CURSEG_I(sbi, type);
> >> -	bool put_pin_sem = false;
> >> -
> >> -	if (type == CURSEG_COLD_DATA) {
> >> -		/* GC during CURSEG_COLD_DATA_PINNED allocation */
> >> -		if (down_read_trylock(&sbi->pin_sem)) {
> >> -			put_pin_sem = true;
> >> -		} else {
> >> -			type = CURSEG_WARM_DATA;
> >> -			curseg = CURSEG_I(sbi, type);
> >> -		}
> >> -	} else if (type == CURSEG_COLD_DATA_PINNED) {
> >> -		type = CURSEG_COLD_DATA;
> >> -	}
> >>  
> >>  	down_read(&SM_I(sbi)->curseg_lock);
> >>  
> >> @@ -3204,9 +3242,6 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
> >>  	mutex_unlock(&curseg->curseg_mutex);
> >>  
> >>  	up_read(&SM_I(sbi)->curseg_lock);
> >> -
> >> -	if (put_pin_sem)
> >> -		up_read(&sbi->pin_sem);
> >>  }
> >>  
> >>  static void update_device_state(struct f2fs_io_info *fio)
> >> @@ -3574,7 +3609,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
> >>  		blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type -
> >>  							CURSEG_HOT_DATA]);
> >>  		if (__exist_node_summaries(sbi))
> >> -			blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type);
> >> +			blk_addr = sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type);
> >>  		else
> >>  			blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type);
> >>  	} else {
> >> @@ -3652,8 +3687,9 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
> >>  	}
> >>  
> >>  	if (__exist_node_summaries(sbi))
> >> -		f2fs_ra_meta_pages(sbi, sum_blk_addr(sbi, NR_CURSEG_TYPE, type),
> >> -					NR_CURSEG_TYPE - type, META_CP, true);
> >> +		f2fs_ra_meta_pages(sbi,
> >> +				sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type),
> >> +				NR_CURSEG_PERSIST_TYPE - type, META_CP, true);
> >>  
> >>  	for (; type <= CURSEG_COLD_NODE; type++) {
> >>  		err = read_normal_summaries(sbi, type);
> >> @@ -4155,14 +4191,14 @@ static int build_curseg(struct f2fs_sb_info *sbi)
> >>  	struct curseg_info *array;
> >>  	int i;
> >>  
> >> -	array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE, sizeof(*array)),
> >> -			     GFP_KERNEL);
> >> +	array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE,
> >> +					sizeof(*array)), GFP_KERNEL);
> >>  	if (!array)
> >>  		return -ENOMEM;
> >>  
> >>  	SM_I(sbi)->curseg_array = array;
> >>  
> >> -	for (i = 0; i < NR_CURSEG_TYPE; i++) {
> >> +	for (i = 0; i < NO_CHECK_TYPE; i++) {
> >>  		mutex_init(&array[i].curseg_mutex);
> >>  		array[i].sum_blk = f2fs_kzalloc(sbi, PAGE_SIZE, GFP_KERNEL);
> >>  		if (!array[i].sum_blk)
> >> @@ -4172,8 +4208,13 @@ static int build_curseg(struct f2fs_sb_info *sbi)
> >>  				sizeof(struct f2fs_journal), GFP_KERNEL);
> >>  		if (!array[i].journal)
> >>  			return -ENOMEM;
> >> +		if (i < NR_PERSISTENT_LOG)
> >> +			array[i].seg_type = CURSEG_HOT_DATA + i;
> >> +		else if (i == CURSEG_COLD_DATA_PINNED)
> >> +			array[i].seg_type = CURSEG_COLD_DATA;
> >>  		array[i].segno = NULL_SEGNO;
> >>  		array[i].next_blkoff = 0;
> >> +		array[i].inited = false;
> >>  	}
> >>  	return restore_curseg_summaries(sbi);
> >>  }
> >> @@ -4408,7 +4449,7 @@ static int sanity_check_curseg(struct f2fs_sb_info *sbi)
> >>  	 * In LFS/SSR curseg, .next_blkoff should point to an unused blkaddr;
> >>  	 * In LFS curseg, all blkaddr after .next_blkoff should be unused.
> >>  	 */
> >> -	for (i = 0; i < NO_CHECK_TYPE; i++) {
> >> +	for (i = 0; i < NR_PERSISTENT_LOG; i++) {
> >>  		struct curseg_info *curseg = CURSEG_I(sbi, i);
> >>  		struct seg_entry *se = get_seg_entry(sbi, curseg->segno);
> >>  		unsigned int blkofs = curseg->next_blkoff;
> >> @@ -4637,7 +4678,7 @@ int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi)
> >>  {
> >>  	int i, ret;
> >>  
> >> -	for (i = 0; i < NO_CHECK_TYPE; i++) {
> >> +	for (i = 0; i < NR_PERSISTENT_LOG; i++) {
> >>  		ret = fix_curseg_write_pointer(sbi, i);
> >>  		if (ret)
> >>  			return ret;
> >> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
> >> index f261e3e6a69b..8ff261550cbb 100644
> >> --- a/fs/f2fs/segment.h
> >> +++ b/fs/f2fs/segment.h
> >> @@ -22,7 +22,7 @@
> >>  #define GET_R2L_SEGNO(free_i, segno)	((segno) + (free_i)->start_segno)
> >>  
> >>  #define IS_DATASEG(t)	((t) <= CURSEG_COLD_DATA)
> >> -#define IS_NODESEG(t)	((t) >= CURSEG_HOT_NODE)
> >> +#define IS_NODESEG(t)	((t) >= CURSEG_HOT_NODE && (t) <= CURSEG_COLD_NODE)
> >>  
> >>  #define IS_HOT(t)	((t) == CURSEG_HOT_NODE || (t) == CURSEG_HOT_DATA)
> >>  #define IS_WARM(t)	((t) == CURSEG_WARM_NODE || (t) == CURSEG_WARM_DATA)
> >> @@ -34,7 +34,8 @@
> >>  	 ((seg) == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno) ||	\
> >>  	 ((seg) == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno) ||	\
> >>  	 ((seg) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno) ||	\
> >> -	 ((seg) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno))
> >> +	 ((seg) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno) ||	\
> >> +	 ((seg) == CURSEG_I(sbi, CURSEG_COLD_DATA_PINNED)->segno))
> >>  
> >>  #define IS_CURSEC(sbi, secno)						\
> >>  	(((secno) == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno /		\
> >> @@ -48,7 +49,9 @@
> >>  	 ((secno) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno /		\
> >>  	  (sbi)->segs_per_sec) ||	\
> >>  	 ((secno) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno /		\
> >> -	  (sbi)->segs_per_sec))	\
> >> +	  (sbi)->segs_per_sec) ||	\
> >> +	 ((secno) == CURSEG_I(sbi, CURSEG_COLD_DATA_PINNED)->segno /	\
> >> +	  (sbi)->segs_per_sec))
> >>  
> >>  #define MAIN_BLKADDR(sbi)						\
> >>  	(SM_I(sbi) ? SM_I(sbi)->main_blkaddr : 				\
> >> @@ -288,10 +291,12 @@ struct curseg_info {
> >>  	struct rw_semaphore journal_rwsem;	/* protect journal area */
> >>  	struct f2fs_journal *journal;		/* cached journal info */
> >>  	unsigned char alloc_type;		/* current allocation type */
> >> +	unsigned short seg_type;		/* segment type like CURSEG_XXX_TYPE */
> >>  	unsigned int segno;			/* current segment number */
> >>  	unsigned short next_blkoff;		/* next block offset to write */
> >>  	unsigned int zone;			/* current zone number */
> >>  	unsigned int next_segno;		/* preallocated segment */
> >> +	bool inited;				/* indicate inmem log is inited */
> >>  };
> >>  
> >>  struct sit_entry_set {
> >> @@ -305,8 +310,6 @@ struct sit_entry_set {
> >>   */
> >>  static inline struct curseg_info *CURSEG_I(struct f2fs_sb_info *sbi, int type)
> >>  {
> >> -	if (type == CURSEG_COLD_DATA_PINNED)
> >> -		type = CURSEG_COLD_DATA;
> >>  	return (struct curseg_info *)(SM_I(sbi)->curseg_array + type);
> >>  }
> >>  
> >> @@ -438,7 +441,7 @@ static inline void __set_inuse(struct f2fs_sb_info *sbi,
> >>  }
> >>  
> >>  static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
> >> -		unsigned int segno)
> >> +		unsigned int segno, bool inmem)
> >>  {
> >>  	struct free_segmap_info *free_i = FREE_I(sbi);
> >>  	unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
> >> @@ -449,7 +452,7 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
> >>  	if (test_and_clear_bit(segno, free_i->free_segmap)) {
> >>  		free_i->free_segments++;
> >>  
> >> -		if (IS_CURSEC(sbi, secno))
> >> +		if (!inmem && IS_CURSEC(sbi, secno))
> >>  			goto skip_free;
> >>  		next = find_next_bit(free_i->free_segmap,
> >>  				start_segno + sbi->segs_per_sec, start_segno);
> >> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> >> index 80cb7cd358f8..0fefa130585f 100644
> >> --- a/fs/f2fs/super.c
> >> +++ b/fs/f2fs/super.c
> >> @@ -575,7 +575,8 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
> >>  		case Opt_active_logs:
> >>  			if (args->from && match_int(args, &arg))
> >>  				return -EINVAL;
> >> -			if (arg != 2 && arg != 4 && arg != NR_CURSEG_TYPE)
> >> +			if (arg != 2 && arg != 4 &&
> >> +				arg != NR_CURSEG_PERSIST_TYPE)
> >>  				return -EINVAL;
> >>  			F2FS_OPTION(sbi).active_logs = arg;
> >>  			break;
> >> @@ -981,7 +982,7 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
> >>  	}
> >>  
> >>  	/* Not pass down write hints if the number of active logs is lesser
> >> -	 * than NR_CURSEG_TYPE.
> >> +	 * than NR_CURSEG_PERSIST_TYPE.
> >>  	 */
> >>  	if (F2FS_OPTION(sbi).active_logs != NR_CURSEG_TYPE)
> >>  		F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF;
> >> @@ -1614,7 +1615,7 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
> >>  static void default_options(struct f2fs_sb_info *sbi)
> >>  {
> >>  	/* init some FS parameters */
> >> -	F2FS_OPTION(sbi).active_logs = NR_CURSEG_TYPE;
> >> +	F2FS_OPTION(sbi).active_logs = NR_CURSEG_PERSIST_TYPE;
> >>  	F2FS_OPTION(sbi).inline_xattr_size = DEFAULT_INLINE_XATTR_ADDRS;
> >>  	F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF;
> >>  	F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_DEFAULT;
> >> @@ -2946,7 +2947,7 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
> >>  	cp_payload = __cp_payload(sbi);
> >>  	if (cp_pack_start_sum < cp_payload + 1 ||
> >>  		cp_pack_start_sum > blocks_per_seg - 1 -
> >> -			NR_CURSEG_TYPE) {
> >> +			NR_CURSEG_PERSIST_TYPE) {
> >>  		f2fs_err(sbi, "Wrong cp_pack_start_sum: %u",
> >>  			 cp_pack_start_sum);
> >>  		return 1;
> >> -- 
> >> 2.26.2
> > .
> > 


_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [f2fs-dev] [PATCH RFC 1/5] f2fs: introduce inmem curseg
  2020-07-07  3:51     ` Jaegeuk Kim
@ 2020-07-15  3:39       ` Chao Yu
  2020-07-15 19:07         ` Jaegeuk Kim
  0 siblings, 1 reply; 19+ messages in thread
From: Chao Yu @ 2020-07-15  3:39 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: linux-kernel, linux-f2fs-devel

On 2020/7/7 11:51, Jaegeuk Kim wrote:
> On 07/07, Chao Yu wrote:
>> On 2020/7/7 11:21, Jaegeuk Kim wrote:
>>> Hi Chao,
>>>
>>> Do you have any brief design doc to present the idea?
>>
>> Hi Jaegeuk,
>>
>> You mean this whole patchset, right?
>>
>> I can add a brief design description in patch 0/5.
> 
> Yeah, it's a bit hard to understand the whole flow.

Jaegeuk,

Do you have time to take a look at this idea summarized in
[PATCH 0/5]'s cover letter?

> 
> Thanks,
> 
>>
>>>
>>> Thanks,
>>>
>>> On 06/30, Chao Yu wrote:
>>>> Previous implementation of aligned pinfile allocation will:
>>>> - allocate new segment on cold data log no matter whether last used
>>>> segment is partially used or not, it makes IOs more random;
>>>> - force concurrent cold data/GCed IO going into warm data area, it
>>>> can make a bad effect on hot/cold data separation;
>>>>
>>>> In this patch, we introduce a new type of log named 'inmem curseg',
>>>> the differents from normal curseg is:
>>>> - it reuses existed segment type (CURSEG_XXX_NODE/DATA);
>>>> - it only exists in memory, its segno, blkofs, summary will not b
>>>>  persisted into checkpoint area;
>>>>
>>>> With this new feature, we can enhance scalability of log, special
>>>> allocators can be created for purposes:
>>>> - pure lfs allocator for aligned pinfile allocation or file
>>>> defragmentation
>>>> - pure ssr allocator for later feature
>>>>
>>>> So that, let's update aligned pinfile allocation to use this new
>>>> inmem curseg fwk.
>>>>
>>>> Signed-off-by: Chao Yu <yuchao0@huawei.com>
>>>> ---
>>>>  fs/f2fs/checkpoint.c |   7 ++-
>>>>  fs/f2fs/debug.c      |   6 ++-
>>>>  fs/f2fs/f2fs.h       |  12 +++--
>>>>  fs/f2fs/file.c       |   3 +-
>>>>  fs/f2fs/gc.c         |   2 +-
>>>>  fs/f2fs/segment.c    | 107 ++++++++++++++++++++++++++++++-------------
>>>>  fs/f2fs/segment.h    |  17 ++++---
>>>>  fs/f2fs/super.c      |   9 ++--
>>>>  8 files changed, 112 insertions(+), 51 deletions(-)
>>>>
>>>> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
>>>> index 1bb8278a1c4a..644a914af25a 100644
>>>> --- a/fs/f2fs/checkpoint.c
>>>> +++ b/fs/f2fs/checkpoint.c
>>>> @@ -1623,11 +1623,16 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>>>>  
>>>>  	f2fs_flush_sit_entries(sbi, cpc);
>>>>  
>>>> +	/* save inmem log status */
>>>> +	f2fs_save_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
>>>> +
>>>>  	err = do_checkpoint(sbi, cpc);
>>>>  	if (err)
>>>>  		f2fs_release_discard_addrs(sbi);
>>>>  	else
>>>>  		f2fs_clear_prefree_segments(sbi, cpc);
>>>> +
>>>> +	f2fs_restore_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
>>>>  stop:
>>>>  	unblock_operations(sbi);
>>>>  	stat_inc_cp_count(sbi->stat_info);
>>>> @@ -1658,7 +1663,7 @@ void f2fs_init_ino_entry_info(struct f2fs_sb_info *sbi)
>>>>  	}
>>>>  
>>>>  	sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS -
>>>> -			NR_CURSEG_TYPE - __cp_payload(sbi)) *
>>>> +			NR_CURSEG_PERSIST_TYPE - __cp_payload(sbi)) *
>>>>  				F2FS_ORPHANS_PER_BLOCK;
>>>>  }
>>>>  
>>>> diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
>>>> index 4276c0f79beb..41a91aa8c262 100644
>>>> --- a/fs/f2fs/debug.c
>>>> +++ b/fs/f2fs/debug.c
>>>> @@ -164,7 +164,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
>>>>  		* 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg)
>>>>  		/ 2;
>>>>  	si->util_invalid = 50 - si->util_free - si->util_valid;
>>>> -	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_NODE; i++) {
>>>> +	for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) {
>>>>  		struct curseg_info *curseg = CURSEG_I(sbi, i);
>>>>  		si->curseg[i] = curseg->segno;
>>>>  		si->cursec[i] = GET_SEC_FROM_SEG(sbi, curseg->segno);
>>>> @@ -393,6 +393,10 @@ static int stat_show(struct seq_file *s, void *v)
>>>>  			   si->dirty_seg[CURSEG_COLD_NODE],
>>>>  			   si->full_seg[CURSEG_COLD_NODE],
>>>>  			   si->valid_blks[CURSEG_COLD_NODE]);
>>>> +		seq_printf(s, "  - Pinned file: %8d %8d %8d\n",
>>>> +			   si->curseg[CURSEG_COLD_DATA_PINNED],
>>>> +			   si->cursec[CURSEG_COLD_DATA_PINNED],
>>>> +			   si->curzone[CURSEG_COLD_DATA_PINNED]);
>>>>  		seq_printf(s, "\n  - Valid: %d\n  - Dirty: %d\n",
>>>>  			   si->main_area_segs - si->dirty_count -
>>>>  			   si->prefree_count - si->free_segs,
>>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>>> index 7d6c5f8ce16b..f06c77066284 100644
>>>> --- a/fs/f2fs/f2fs.h
>>>> +++ b/fs/f2fs/f2fs.h
>>>> @@ -996,7 +996,9 @@ static inline void set_new_dnode(struct dnode_of_data *dn, struct inode *inode,
>>>>   */
>>>>  #define	NR_CURSEG_DATA_TYPE	(3)
>>>>  #define NR_CURSEG_NODE_TYPE	(3)
>>>> -#define NR_CURSEG_TYPE	(NR_CURSEG_DATA_TYPE + NR_CURSEG_NODE_TYPE)
>>>> +#define NR_CURSEG_INMEM_TYPE	(1)
>>>> +#define NR_CURSEG_PERSIST_TYPE	(NR_CURSEG_DATA_TYPE + NR_CURSEG_NODE_TYPE)
>>>> +#define NR_CURSEG_TYPE		(NR_CURSEG_INMEM_TYPE + NR_CURSEG_PERSIST_TYPE)
>>>>  
>>>>  enum {
>>>>  	CURSEG_HOT_DATA	= 0,	/* directory entry blocks */
>>>> @@ -1005,8 +1007,10 @@ enum {
>>>>  	CURSEG_HOT_NODE,	/* direct node blocks of directory files */
>>>>  	CURSEG_WARM_NODE,	/* direct node blocks of normal files */
>>>>  	CURSEG_COLD_NODE,	/* indirect node blocks */
>>>> -	NO_CHECK_TYPE,
>>>> -	CURSEG_COLD_DATA_PINNED,/* cold data for pinned file */
>>>> +	NR_PERSISTENT_LOG,	/* number of persistent log */
>>>> +	CURSEG_COLD_DATA_PINNED = NR_PERSISTENT_LOG,
>>>> +				/* pinned file that needs consecutive block address */
>>>> +	NO_CHECK_TYPE,		/* number of persistent & inmem log */
>>>>  };
>>>>  
>>>>  struct flush_cmd {
>>>> @@ -3359,6 +3363,8 @@ block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi);
>>>>  int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable);
>>>>  void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi);
>>>>  int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra);
>>>> +void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi, int type);
>>>> +void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi, int type);
>>>>  void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
>>>>  					unsigned int start, unsigned int end);
>>>>  void f2fs_allocate_new_segment(struct f2fs_sb_info *sbi, int type);
>>>> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
>>>> index c10e82806c2a..8611ade06018 100644
>>>> --- a/fs/f2fs/file.c
>>>> +++ b/fs/f2fs/file.c
>>>> @@ -1656,13 +1656,14 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
>>>>  		}
>>>>  
>>>>  		down_write(&sbi->pin_sem);
>>>> -		map.m_seg_type = CURSEG_COLD_DATA_PINNED;
>>>>  
>>>>  		f2fs_lock_op(sbi);
>>>>  		f2fs_allocate_new_segment(sbi, CURSEG_COLD_DATA);
>>>>  		f2fs_unlock_op(sbi);
>>>>  
>>>> +		map.m_seg_type = CURSEG_COLD_DATA_PINNED;
>>>>  		err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_DIO);
>>>> +
>>>>  		up_write(&sbi->pin_sem);
>>>>  
>>>>  		done += map.m_len;
>>>> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
>>>> index 3b718da69910..84807abe4e00 100644
>>>> --- a/fs/f2fs/gc.c
>>>> +++ b/fs/f2fs/gc.c
>>>> @@ -1448,7 +1448,7 @@ static int free_segment_range(struct f2fs_sb_info *sbi,
>>>>  	mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
>>>>  
>>>>  	/* Move out cursegs from the target range */
>>>> -	for (type = CURSEG_HOT_DATA; type < NR_CURSEG_TYPE; type++)
>>>> +	for (type = CURSEG_HOT_DATA; type < NR_CURSEG_PERSIST_TYPE; type++)
>>>>  		f2fs_allocate_segment_for_resize(sbi, type, start, end);
>>>>  
>>>>  	/* do GC to move out valid blocks in the range */
>>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>>>> index 5924b3965ae4..863ec6f1fb87 100644
>>>> --- a/fs/f2fs/segment.c
>>>> +++ b/fs/f2fs/segment.c
>>>> @@ -1958,7 +1958,7 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
>>>>  
>>>>  	mutex_lock(&dirty_i->seglist_lock);
>>>>  	for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi))
>>>> -		__set_test_and_free(sbi, segno);
>>>> +		__set_test_and_free(sbi, segno, false);
>>>>  	mutex_unlock(&dirty_i->seglist_lock);
>>>>  }
>>>>  
>>>> @@ -2496,6 +2496,7 @@ static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
>>>>  	struct curseg_info *curseg = CURSEG_I(sbi, type);
>>>>  	struct summary_footer *sum_footer;
>>>>  
>>>> +	curseg->inited = true;
>>>>  	curseg->segno = curseg->next_segno;
>>>>  	curseg->zone = GET_ZONE_FROM_SEG(sbi, curseg->segno);
>>>>  	curseg->next_blkoff = 0;
>>>> @@ -2503,24 +2504,31 @@ static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
>>>>  
>>>>  	sum_footer = &(curseg->sum_blk->footer);
>>>>  	memset(sum_footer, 0, sizeof(struct summary_footer));
>>>> -	if (IS_DATASEG(type))
>>>> +	if (IS_DATASEG(curseg->seg_type))
>>>>  		SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA);
>>>> -	if (IS_NODESEG(type))
>>>> +	if (IS_NODESEG(curseg->seg_type))
>>>>  		SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE);
>>>> -	__set_sit_entry_type(sbi, type, curseg->segno, modified);
>>>> +	__set_sit_entry_type(sbi, curseg->seg_type, curseg->segno, modified);
>>>>  }
>>>>  
>>>>  static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
>>>>  {
>>>> +	struct curseg_info *curseg = CURSEG_I(sbi, type);
>>>> +
>>>>  	/* if segs_per_sec is large than 1, we need to keep original policy. */
>>>>  	if (__is_large_section(sbi))
>>>> -		return CURSEG_I(sbi, type)->segno;
>>>> +		return curseg->segno;
>>>> +
>>>> +	/* inmem log may not locate on any segment after mount */
>>>> +	if (!curseg->inited)
>>>> +		return 0;
>>>>  
>>>>  	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
>>>>  		return 0;
>>>>  
>>>>  	if (test_opt(sbi, NOHEAP) &&
>>>> -		(type == CURSEG_HOT_DATA || IS_NODESEG(type)))
>>>> +		(curseg->seg_type == CURSEG_HOT_DATA ||
>>>> +		IS_NODESEG(curseg->seg_type)))
>>>>  		return 0;
>>>>  
>>>>  	if (SIT_I(sbi)->last_victim[ALLOC_NEXT])
>>>> @@ -2530,7 +2538,7 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
>>>>  	if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE)
>>>>  		return 0;
>>>>  
>>>> -	return CURSEG_I(sbi, type)->segno;
>>>> +	return curseg->segno;
>>>>  }
>>>>  
>>>>  /*
>>>> @@ -2540,12 +2548,14 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
>>>>  static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
>>>>  {
>>>>  	struct curseg_info *curseg = CURSEG_I(sbi, type);
>>>> +	unsigned short seg_type = curseg->seg_type;
>>>>  	unsigned int segno = curseg->segno;
>>>>  	int dir = ALLOC_LEFT;
>>>>  
>>>> -	write_sum_page(sbi, curseg->sum_blk,
>>>> +	if (curseg->inited)
>>>> +		write_sum_page(sbi, curseg->sum_blk,
>>>>  				GET_SUM_BLOCK(sbi, segno));
>>>> -	if (type == CURSEG_WARM_DATA || type == CURSEG_COLD_DATA)
>>>> +	if (seg_type == CURSEG_WARM_DATA || seg_type == CURSEG_COLD_DATA)
>>>>  		dir = ALLOC_RIGHT;
>>>>  
>>>>  	if (test_opt(sbi, NOHEAP))
>>>> @@ -2622,6 +2632,43 @@ static void change_curseg(struct f2fs_sb_info *sbi, int type)
>>>>  	f2fs_put_page(sum_page, 1);
>>>>  }
>>>>  
>>>> +void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi, int type)
>>>> +{
>>>> +	struct curseg_info *curseg = CURSEG_I(sbi, type);
>>>> +
>>>> +	mutex_lock(&curseg->curseg_mutex);
>>>> +	if (!curseg->inited)
>>>> +		goto out;
>>>> +
>>>> +	if (get_valid_blocks(sbi, curseg->segno, false)) {
>>>> +		write_sum_page(sbi, curseg->sum_blk,
>>>> +				GET_SUM_BLOCK(sbi, curseg->segno));
>>>> +	} else {
>>>> +		mutex_lock(&DIRTY_I(sbi)->seglist_lock);
>>>> +		__set_test_and_free(sbi, curseg->segno, true);
>>>> +		mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
>>>> +	}
>>>> +out:
>>>> +	mutex_unlock(&curseg->curseg_mutex);
>>>> +}
>>>> +
>>>> +void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi, int type)
>>>> +{
>>>> +	struct curseg_info *curseg = CURSEG_I(sbi, type);
>>>> +
>>>> +	mutex_lock(&curseg->curseg_mutex);
>>>> +	if (!curseg->inited)
>>>> +		goto out;
>>>> +	if (get_valid_blocks(sbi, curseg->segno, false))
>>>> +		goto out;
>>>> +
>>>> +	mutex_lock(&DIRTY_I(sbi)->seglist_lock);
>>>> +	__set_test_and_inuse(sbi, curseg->segno);
>>>> +	mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
>>>> +out:
>>>> +	mutex_unlock(&curseg->curseg_mutex);
>>>> +}
>>>> +
>>>>  static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
>>>>  {
>>>>  	struct curseg_info *curseg = CURSEG_I(sbi, type);
>>>> @@ -2738,11 +2785,15 @@ void __allocate_new_segment(struct f2fs_sb_info *sbi, int type)
>>>>  	struct curseg_info *curseg = CURSEG_I(sbi, type);
>>>>  	unsigned int old_segno;
>>>>  
>>>> +	if (!curseg->inited)
>>>> +		goto alloc;
>>>> +
>>>>  	if (!curseg->next_blkoff &&
>>>>  		!get_valid_blocks(sbi, curseg->segno, false) &&
>>>>  		!get_ckpt_valid_blocks(sbi, curseg->segno))
>>>>  		return;
>>>>  
>>>> +alloc:
>>>>  	old_segno = curseg->segno;
>>>>  	SIT_I(sbi)->s_ops->allocate_segment(sbi, type, true);
>>>>  	locate_dirty_segment(sbi, old_segno);
>>>> @@ -3126,19 +3177,6 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
>>>>  {
>>>>  	struct sit_info *sit_i = SIT_I(sbi);
>>>>  	struct curseg_info *curseg = CURSEG_I(sbi, type);
>>>> -	bool put_pin_sem = false;
>>>> -
>>>> -	if (type == CURSEG_COLD_DATA) {
>>>> -		/* GC during CURSEG_COLD_DATA_PINNED allocation */
>>>> -		if (down_read_trylock(&sbi->pin_sem)) {
>>>> -			put_pin_sem = true;
>>>> -		} else {
>>>> -			type = CURSEG_WARM_DATA;
>>>> -			curseg = CURSEG_I(sbi, type);
>>>> -		}
>>>> -	} else if (type == CURSEG_COLD_DATA_PINNED) {
>>>> -		type = CURSEG_COLD_DATA;
>>>> -	}
>>>>  
>>>>  	down_read(&SM_I(sbi)->curseg_lock);
>>>>  
>>>> @@ -3204,9 +3242,6 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
>>>>  	mutex_unlock(&curseg->curseg_mutex);
>>>>  
>>>>  	up_read(&SM_I(sbi)->curseg_lock);
>>>> -
>>>> -	if (put_pin_sem)
>>>> -		up_read(&sbi->pin_sem);
>>>>  }
>>>>  
>>>>  static void update_device_state(struct f2fs_io_info *fio)
>>>> @@ -3574,7 +3609,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
>>>>  		blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type -
>>>>  							CURSEG_HOT_DATA]);
>>>>  		if (__exist_node_summaries(sbi))
>>>> -			blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type);
>>>> +			blk_addr = sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type);
>>>>  		else
>>>>  			blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type);
>>>>  	} else {
>>>> @@ -3652,8 +3687,9 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
>>>>  	}
>>>>  
>>>>  	if (__exist_node_summaries(sbi))
>>>> -		f2fs_ra_meta_pages(sbi, sum_blk_addr(sbi, NR_CURSEG_TYPE, type),
>>>> -					NR_CURSEG_TYPE - type, META_CP, true);
>>>> +		f2fs_ra_meta_pages(sbi,
>>>> +				sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type),
>>>> +				NR_CURSEG_PERSIST_TYPE - type, META_CP, true);
>>>>  
>>>>  	for (; type <= CURSEG_COLD_NODE; type++) {
>>>>  		err = read_normal_summaries(sbi, type);
>>>> @@ -4155,14 +4191,14 @@ static int build_curseg(struct f2fs_sb_info *sbi)
>>>>  	struct curseg_info *array;
>>>>  	int i;
>>>>  
>>>> -	array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE, sizeof(*array)),
>>>> -			     GFP_KERNEL);
>>>> +	array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE,
>>>> +					sizeof(*array)), GFP_KERNEL);
>>>>  	if (!array)
>>>>  		return -ENOMEM;
>>>>  
>>>>  	SM_I(sbi)->curseg_array = array;
>>>>  
>>>> -	for (i = 0; i < NR_CURSEG_TYPE; i++) {
>>>> +	for (i = 0; i < NO_CHECK_TYPE; i++) {
>>>>  		mutex_init(&array[i].curseg_mutex);
>>>>  		array[i].sum_blk = f2fs_kzalloc(sbi, PAGE_SIZE, GFP_KERNEL);
>>>>  		if (!array[i].sum_blk)
>>>> @@ -4172,8 +4208,13 @@ static int build_curseg(struct f2fs_sb_info *sbi)
>>>>  				sizeof(struct f2fs_journal), GFP_KERNEL);
>>>>  		if (!array[i].journal)
>>>>  			return -ENOMEM;
>>>> +		if (i < NR_PERSISTENT_LOG)
>>>> +			array[i].seg_type = CURSEG_HOT_DATA + i;
>>>> +		else if (i == CURSEG_COLD_DATA_PINNED)
>>>> +			array[i].seg_type = CURSEG_COLD_DATA;
>>>>  		array[i].segno = NULL_SEGNO;
>>>>  		array[i].next_blkoff = 0;
>>>> +		array[i].inited = false;
>>>>  	}
>>>>  	return restore_curseg_summaries(sbi);
>>>>  }
>>>> @@ -4408,7 +4449,7 @@ static int sanity_check_curseg(struct f2fs_sb_info *sbi)
>>>>  	 * In LFS/SSR curseg, .next_blkoff should point to an unused blkaddr;
>>>>  	 * In LFS curseg, all blkaddr after .next_blkoff should be unused.
>>>>  	 */
>>>> -	for (i = 0; i < NO_CHECK_TYPE; i++) {
>>>> +	for (i = 0; i < NR_PERSISTENT_LOG; i++) {
>>>>  		struct curseg_info *curseg = CURSEG_I(sbi, i);
>>>>  		struct seg_entry *se = get_seg_entry(sbi, curseg->segno);
>>>>  		unsigned int blkofs = curseg->next_blkoff;
>>>> @@ -4637,7 +4678,7 @@ int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi)
>>>>  {
>>>>  	int i, ret;
>>>>  
>>>> -	for (i = 0; i < NO_CHECK_TYPE; i++) {
>>>> +	for (i = 0; i < NR_PERSISTENT_LOG; i++) {
>>>>  		ret = fix_curseg_write_pointer(sbi, i);
>>>>  		if (ret)
>>>>  			return ret;
>>>> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
>>>> index f261e3e6a69b..8ff261550cbb 100644
>>>> --- a/fs/f2fs/segment.h
>>>> +++ b/fs/f2fs/segment.h
>>>> @@ -22,7 +22,7 @@
>>>>  #define GET_R2L_SEGNO(free_i, segno)	((segno) + (free_i)->start_segno)
>>>>  
>>>>  #define IS_DATASEG(t)	((t) <= CURSEG_COLD_DATA)
>>>> -#define IS_NODESEG(t)	((t) >= CURSEG_HOT_NODE)
>>>> +#define IS_NODESEG(t)	((t) >= CURSEG_HOT_NODE && (t) <= CURSEG_COLD_NODE)
>>>>  
>>>>  #define IS_HOT(t)	((t) == CURSEG_HOT_NODE || (t) == CURSEG_HOT_DATA)
>>>>  #define IS_WARM(t)	((t) == CURSEG_WARM_NODE || (t) == CURSEG_WARM_DATA)
>>>> @@ -34,7 +34,8 @@
>>>>  	 ((seg) == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno) ||	\
>>>>  	 ((seg) == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno) ||	\
>>>>  	 ((seg) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno) ||	\
>>>> -	 ((seg) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno))
>>>> +	 ((seg) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno) ||	\
>>>> +	 ((seg) == CURSEG_I(sbi, CURSEG_COLD_DATA_PINNED)->segno))
>>>>  
>>>>  #define IS_CURSEC(sbi, secno)						\
>>>>  	(((secno) == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno /		\
>>>> @@ -48,7 +49,9 @@
>>>>  	 ((secno) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno /		\
>>>>  	  (sbi)->segs_per_sec) ||	\
>>>>  	 ((secno) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno /		\
>>>> -	  (sbi)->segs_per_sec))	\
>>>> +	  (sbi)->segs_per_sec) ||	\
>>>> +	 ((secno) == CURSEG_I(sbi, CURSEG_COLD_DATA_PINNED)->segno /	\
>>>> +	  (sbi)->segs_per_sec))
>>>>  
>>>>  #define MAIN_BLKADDR(sbi)						\
>>>>  	(SM_I(sbi) ? SM_I(sbi)->main_blkaddr : 				\
>>>> @@ -288,10 +291,12 @@ struct curseg_info {
>>>>  	struct rw_semaphore journal_rwsem;	/* protect journal area */
>>>>  	struct f2fs_journal *journal;		/* cached journal info */
>>>>  	unsigned char alloc_type;		/* current allocation type */
>>>> +	unsigned short seg_type;		/* segment type like CURSEG_XXX_TYPE */
>>>>  	unsigned int segno;			/* current segment number */
>>>>  	unsigned short next_blkoff;		/* next block offset to write */
>>>>  	unsigned int zone;			/* current zone number */
>>>>  	unsigned int next_segno;		/* preallocated segment */
>>>> +	bool inited;				/* indicate inmem log is inited */
>>>>  };
>>>>  
>>>>  struct sit_entry_set {
>>>> @@ -305,8 +310,6 @@ struct sit_entry_set {
>>>>   */
>>>>  static inline struct curseg_info *CURSEG_I(struct f2fs_sb_info *sbi, int type)
>>>>  {
>>>> -	if (type == CURSEG_COLD_DATA_PINNED)
>>>> -		type = CURSEG_COLD_DATA;
>>>>  	return (struct curseg_info *)(SM_I(sbi)->curseg_array + type);
>>>>  }
>>>>  
>>>> @@ -438,7 +441,7 @@ static inline void __set_inuse(struct f2fs_sb_info *sbi,
>>>>  }
>>>>  
>>>>  static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
>>>> -		unsigned int segno)
>>>> +		unsigned int segno, bool inmem)
>>>>  {
>>>>  	struct free_segmap_info *free_i = FREE_I(sbi);
>>>>  	unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
>>>> @@ -449,7 +452,7 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
>>>>  	if (test_and_clear_bit(segno, free_i->free_segmap)) {
>>>>  		free_i->free_segments++;
>>>>  
>>>> -		if (IS_CURSEC(sbi, secno))
>>>> +		if (!inmem && IS_CURSEC(sbi, secno))
>>>>  			goto skip_free;
>>>>  		next = find_next_bit(free_i->free_segmap,
>>>>  				start_segno + sbi->segs_per_sec, start_segno);
>>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>>>> index 80cb7cd358f8..0fefa130585f 100644
>>>> --- a/fs/f2fs/super.c
>>>> +++ b/fs/f2fs/super.c
>>>> @@ -575,7 +575,8 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
>>>>  		case Opt_active_logs:
>>>>  			if (args->from && match_int(args, &arg))
>>>>  				return -EINVAL;
>>>> -			if (arg != 2 && arg != 4 && arg != NR_CURSEG_TYPE)
>>>> +			if (arg != 2 && arg != 4 &&
>>>> +				arg != NR_CURSEG_PERSIST_TYPE)
>>>>  				return -EINVAL;
>>>>  			F2FS_OPTION(sbi).active_logs = arg;
>>>>  			break;
>>>> @@ -981,7 +982,7 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
>>>>  	}
>>>>  
>>>>  	/* Not pass down write hints if the number of active logs is lesser
>>>> -	 * than NR_CURSEG_TYPE.
>>>> +	 * than NR_CURSEG_PERSIST_TYPE.
>>>>  	 */
>>>>  	if (F2FS_OPTION(sbi).active_logs != NR_CURSEG_TYPE)
>>>>  		F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF;
>>>> @@ -1614,7 +1615,7 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
>>>>  static void default_options(struct f2fs_sb_info *sbi)
>>>>  {
>>>>  	/* init some FS parameters */
>>>> -	F2FS_OPTION(sbi).active_logs = NR_CURSEG_TYPE;
>>>> +	F2FS_OPTION(sbi).active_logs = NR_CURSEG_PERSIST_TYPE;
>>>>  	F2FS_OPTION(sbi).inline_xattr_size = DEFAULT_INLINE_XATTR_ADDRS;
>>>>  	F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF;
>>>>  	F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_DEFAULT;
>>>> @@ -2946,7 +2947,7 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
>>>>  	cp_payload = __cp_payload(sbi);
>>>>  	if (cp_pack_start_sum < cp_payload + 1 ||
>>>>  		cp_pack_start_sum > blocks_per_seg - 1 -
>>>> -			NR_CURSEG_TYPE) {
>>>> +			NR_CURSEG_PERSIST_TYPE) {
>>>>  		f2fs_err(sbi, "Wrong cp_pack_start_sum: %u",
>>>>  			 cp_pack_start_sum);
>>>>  		return 1;
>>>> -- 
>>>> 2.26.2
>>> .
>>>
> .
> 


_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [f2fs-dev] [PATCH RFC 1/5] f2fs: introduce inmem curseg
  2020-07-15  3:39       ` Chao Yu
@ 2020-07-15 19:07         ` Jaegeuk Kim
  2020-07-16  1:24           ` Chao Yu
  0 siblings, 1 reply; 19+ messages in thread
From: Jaegeuk Kim @ 2020-07-15 19:07 UTC (permalink / raw)
  To: Chao Yu; +Cc: linux-kernel, linux-f2fs-devel

On 07/15, Chao Yu wrote:
> On 2020/7/7 11:51, Jaegeuk Kim wrote:
> > On 07/07, Chao Yu wrote:
> >> On 2020/7/7 11:21, Jaegeuk Kim wrote:
> >>> Hi Chao,
> >>>
> >>> Do you have any brief design doc to present the idea?
> >>
> >> Hi Jaegeuk,
> >>
> >> You mean this whole patchset, right?
> >>
> >> I can add a brief design description in patch 0/5.
> > 
> > Yeah, it's a bit hard to understand the whole flow.
> 
> Jaegeuk,
> 
> Do you have time to take a look at this idea summarized in
> [PATCH 0/5]'s cover letter?

Sorry, I couldn't afford to sitting down to review the design.
Let me give it a try soon.

> 
> > 
> > Thanks,
> > 
> >>
> >>>
> >>> Thanks,
> >>>
> >>> On 06/30, Chao Yu wrote:
> >>>> Previous implementation of aligned pinfile allocation will:
> >>>> - allocate new segment on cold data log no matter whether last used
> >>>> segment is partially used or not, it makes IOs more random;
> >>>> - force concurrent cold data/GCed IO going into warm data area, it
> >>>> can make a bad effect on hot/cold data separation;
> >>>>
> >>>> In this patch, we introduce a new type of log named 'inmem curseg',
> >>>> the differents from normal curseg is:
> >>>> - it reuses existed segment type (CURSEG_XXX_NODE/DATA);
> >>>> - it only exists in memory, its segno, blkofs, summary will not b
> >>>>  persisted into checkpoint area;
> >>>>
> >>>> With this new feature, we can enhance scalability of log, special
> >>>> allocators can be created for purposes:
> >>>> - pure lfs allocator for aligned pinfile allocation or file
> >>>> defragmentation
> >>>> - pure ssr allocator for later feature
> >>>>
> >>>> So that, let's update aligned pinfile allocation to use this new
> >>>> inmem curseg fwk.
> >>>>
> >>>> Signed-off-by: Chao Yu <yuchao0@huawei.com>
> >>>> ---
> >>>>  fs/f2fs/checkpoint.c |   7 ++-
> >>>>  fs/f2fs/debug.c      |   6 ++-
> >>>>  fs/f2fs/f2fs.h       |  12 +++--
> >>>>  fs/f2fs/file.c       |   3 +-
> >>>>  fs/f2fs/gc.c         |   2 +-
> >>>>  fs/f2fs/segment.c    | 107 ++++++++++++++++++++++++++++++-------------
> >>>>  fs/f2fs/segment.h    |  17 ++++---
> >>>>  fs/f2fs/super.c      |   9 ++--
> >>>>  8 files changed, 112 insertions(+), 51 deletions(-)
> >>>>
> >>>> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> >>>> index 1bb8278a1c4a..644a914af25a 100644
> >>>> --- a/fs/f2fs/checkpoint.c
> >>>> +++ b/fs/f2fs/checkpoint.c
> >>>> @@ -1623,11 +1623,16 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
> >>>>  
> >>>>  	f2fs_flush_sit_entries(sbi, cpc);
> >>>>  
> >>>> +	/* save inmem log status */
> >>>> +	f2fs_save_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
> >>>> +
> >>>>  	err = do_checkpoint(sbi, cpc);
> >>>>  	if (err)
> >>>>  		f2fs_release_discard_addrs(sbi);
> >>>>  	else
> >>>>  		f2fs_clear_prefree_segments(sbi, cpc);
> >>>> +
> >>>> +	f2fs_restore_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
> >>>>  stop:
> >>>>  	unblock_operations(sbi);
> >>>>  	stat_inc_cp_count(sbi->stat_info);
> >>>> @@ -1658,7 +1663,7 @@ void f2fs_init_ino_entry_info(struct f2fs_sb_info *sbi)
> >>>>  	}
> >>>>  
> >>>>  	sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS -
> >>>> -			NR_CURSEG_TYPE - __cp_payload(sbi)) *
> >>>> +			NR_CURSEG_PERSIST_TYPE - __cp_payload(sbi)) *
> >>>>  				F2FS_ORPHANS_PER_BLOCK;
> >>>>  }
> >>>>  
> >>>> diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
> >>>> index 4276c0f79beb..41a91aa8c262 100644
> >>>> --- a/fs/f2fs/debug.c
> >>>> +++ b/fs/f2fs/debug.c
> >>>> @@ -164,7 +164,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
> >>>>  		* 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg)
> >>>>  		/ 2;
> >>>>  	si->util_invalid = 50 - si->util_free - si->util_valid;
> >>>> -	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_NODE; i++) {
> >>>> +	for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) {
> >>>>  		struct curseg_info *curseg = CURSEG_I(sbi, i);
> >>>>  		si->curseg[i] = curseg->segno;
> >>>>  		si->cursec[i] = GET_SEC_FROM_SEG(sbi, curseg->segno);
> >>>> @@ -393,6 +393,10 @@ static int stat_show(struct seq_file *s, void *v)
> >>>>  			   si->dirty_seg[CURSEG_COLD_NODE],
> >>>>  			   si->full_seg[CURSEG_COLD_NODE],
> >>>>  			   si->valid_blks[CURSEG_COLD_NODE]);
> >>>> +		seq_printf(s, "  - Pinned file: %8d %8d %8d\n",
> >>>> +			   si->curseg[CURSEG_COLD_DATA_PINNED],
> >>>> +			   si->cursec[CURSEG_COLD_DATA_PINNED],
> >>>> +			   si->curzone[CURSEG_COLD_DATA_PINNED]);
> >>>>  		seq_printf(s, "\n  - Valid: %d\n  - Dirty: %d\n",
> >>>>  			   si->main_area_segs - si->dirty_count -
> >>>>  			   si->prefree_count - si->free_segs,
> >>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> >>>> index 7d6c5f8ce16b..f06c77066284 100644
> >>>> --- a/fs/f2fs/f2fs.h
> >>>> +++ b/fs/f2fs/f2fs.h
> >>>> @@ -996,7 +996,9 @@ static inline void set_new_dnode(struct dnode_of_data *dn, struct inode *inode,
> >>>>   */
> >>>>  #define	NR_CURSEG_DATA_TYPE	(3)
> >>>>  #define NR_CURSEG_NODE_TYPE	(3)
> >>>> -#define NR_CURSEG_TYPE	(NR_CURSEG_DATA_TYPE + NR_CURSEG_NODE_TYPE)
> >>>> +#define NR_CURSEG_INMEM_TYPE	(1)
> >>>> +#define NR_CURSEG_PERSIST_TYPE	(NR_CURSEG_DATA_TYPE + NR_CURSEG_NODE_TYPE)
> >>>> +#define NR_CURSEG_TYPE		(NR_CURSEG_INMEM_TYPE + NR_CURSEG_PERSIST_TYPE)
> >>>>  
> >>>>  enum {
> >>>>  	CURSEG_HOT_DATA	= 0,	/* directory entry blocks */
> >>>> @@ -1005,8 +1007,10 @@ enum {
> >>>>  	CURSEG_HOT_NODE,	/* direct node blocks of directory files */
> >>>>  	CURSEG_WARM_NODE,	/* direct node blocks of normal files */
> >>>>  	CURSEG_COLD_NODE,	/* indirect node blocks */
> >>>> -	NO_CHECK_TYPE,
> >>>> -	CURSEG_COLD_DATA_PINNED,/* cold data for pinned file */
> >>>> +	NR_PERSISTENT_LOG,	/* number of persistent log */
> >>>> +	CURSEG_COLD_DATA_PINNED = NR_PERSISTENT_LOG,
> >>>> +				/* pinned file that needs consecutive block address */
> >>>> +	NO_CHECK_TYPE,		/* number of persistent & inmem log */
> >>>>  };
> >>>>  
> >>>>  struct flush_cmd {
> >>>> @@ -3359,6 +3363,8 @@ block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi);
> >>>>  int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable);
> >>>>  void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi);
> >>>>  int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra);
> >>>> +void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi, int type);
> >>>> +void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi, int type);
> >>>>  void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
> >>>>  					unsigned int start, unsigned int end);
> >>>>  void f2fs_allocate_new_segment(struct f2fs_sb_info *sbi, int type);
> >>>> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> >>>> index c10e82806c2a..8611ade06018 100644
> >>>> --- a/fs/f2fs/file.c
> >>>> +++ b/fs/f2fs/file.c
> >>>> @@ -1656,13 +1656,14 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
> >>>>  		}
> >>>>  
> >>>>  		down_write(&sbi->pin_sem);
> >>>> -		map.m_seg_type = CURSEG_COLD_DATA_PINNED;
> >>>>  
> >>>>  		f2fs_lock_op(sbi);
> >>>>  		f2fs_allocate_new_segment(sbi, CURSEG_COLD_DATA);
> >>>>  		f2fs_unlock_op(sbi);
> >>>>  
> >>>> +		map.m_seg_type = CURSEG_COLD_DATA_PINNED;
> >>>>  		err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_DIO);
> >>>> +
> >>>>  		up_write(&sbi->pin_sem);
> >>>>  
> >>>>  		done += map.m_len;
> >>>> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> >>>> index 3b718da69910..84807abe4e00 100644
> >>>> --- a/fs/f2fs/gc.c
> >>>> +++ b/fs/f2fs/gc.c
> >>>> @@ -1448,7 +1448,7 @@ static int free_segment_range(struct f2fs_sb_info *sbi,
> >>>>  	mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
> >>>>  
> >>>>  	/* Move out cursegs from the target range */
> >>>> -	for (type = CURSEG_HOT_DATA; type < NR_CURSEG_TYPE; type++)
> >>>> +	for (type = CURSEG_HOT_DATA; type < NR_CURSEG_PERSIST_TYPE; type++)
> >>>>  		f2fs_allocate_segment_for_resize(sbi, type, start, end);
> >>>>  
> >>>>  	/* do GC to move out valid blocks in the range */
> >>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> >>>> index 5924b3965ae4..863ec6f1fb87 100644
> >>>> --- a/fs/f2fs/segment.c
> >>>> +++ b/fs/f2fs/segment.c
> >>>> @@ -1958,7 +1958,7 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
> >>>>  
> >>>>  	mutex_lock(&dirty_i->seglist_lock);
> >>>>  	for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi))
> >>>> -		__set_test_and_free(sbi, segno);
> >>>> +		__set_test_and_free(sbi, segno, false);
> >>>>  	mutex_unlock(&dirty_i->seglist_lock);
> >>>>  }
> >>>>  
> >>>> @@ -2496,6 +2496,7 @@ static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
> >>>>  	struct curseg_info *curseg = CURSEG_I(sbi, type);
> >>>>  	struct summary_footer *sum_footer;
> >>>>  
> >>>> +	curseg->inited = true;
> >>>>  	curseg->segno = curseg->next_segno;
> >>>>  	curseg->zone = GET_ZONE_FROM_SEG(sbi, curseg->segno);
> >>>>  	curseg->next_blkoff = 0;
> >>>> @@ -2503,24 +2504,31 @@ static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
> >>>>  
> >>>>  	sum_footer = &(curseg->sum_blk->footer);
> >>>>  	memset(sum_footer, 0, sizeof(struct summary_footer));
> >>>> -	if (IS_DATASEG(type))
> >>>> +	if (IS_DATASEG(curseg->seg_type))
> >>>>  		SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA);
> >>>> -	if (IS_NODESEG(type))
> >>>> +	if (IS_NODESEG(curseg->seg_type))
> >>>>  		SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE);
> >>>> -	__set_sit_entry_type(sbi, type, curseg->segno, modified);
> >>>> +	__set_sit_entry_type(sbi, curseg->seg_type, curseg->segno, modified);
> >>>>  }
> >>>>  
> >>>>  static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
> >>>>  {
> >>>> +	struct curseg_info *curseg = CURSEG_I(sbi, type);
> >>>> +
> >>>>  	/* if segs_per_sec is large than 1, we need to keep original policy. */
> >>>>  	if (__is_large_section(sbi))
> >>>> -		return CURSEG_I(sbi, type)->segno;
> >>>> +		return curseg->segno;
> >>>> +
> >>>> +	/* inmem log may not locate on any segment after mount */
> >>>> +	if (!curseg->inited)
> >>>> +		return 0;
> >>>>  
> >>>>  	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
> >>>>  		return 0;
> >>>>  
> >>>>  	if (test_opt(sbi, NOHEAP) &&
> >>>> -		(type == CURSEG_HOT_DATA || IS_NODESEG(type)))
> >>>> +		(curseg->seg_type == CURSEG_HOT_DATA ||
> >>>> +		IS_NODESEG(curseg->seg_type)))
> >>>>  		return 0;
> >>>>  
> >>>>  	if (SIT_I(sbi)->last_victim[ALLOC_NEXT])
> >>>> @@ -2530,7 +2538,7 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
> >>>>  	if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE)
> >>>>  		return 0;
> >>>>  
> >>>> -	return CURSEG_I(sbi, type)->segno;
> >>>> +	return curseg->segno;
> >>>>  }
> >>>>  
> >>>>  /*
> >>>> @@ -2540,12 +2548,14 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
> >>>>  static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
> >>>>  {
> >>>>  	struct curseg_info *curseg = CURSEG_I(sbi, type);
> >>>> +	unsigned short seg_type = curseg->seg_type;
> >>>>  	unsigned int segno = curseg->segno;
> >>>>  	int dir = ALLOC_LEFT;
> >>>>  
> >>>> -	write_sum_page(sbi, curseg->sum_blk,
> >>>> +	if (curseg->inited)
> >>>> +		write_sum_page(sbi, curseg->sum_blk,
> >>>>  				GET_SUM_BLOCK(sbi, segno));
> >>>> -	if (type == CURSEG_WARM_DATA || type == CURSEG_COLD_DATA)
> >>>> +	if (seg_type == CURSEG_WARM_DATA || seg_type == CURSEG_COLD_DATA)
> >>>>  		dir = ALLOC_RIGHT;
> >>>>  
> >>>>  	if (test_opt(sbi, NOHEAP))
> >>>> @@ -2622,6 +2632,43 @@ static void change_curseg(struct f2fs_sb_info *sbi, int type)
> >>>>  	f2fs_put_page(sum_page, 1);
> >>>>  }
> >>>>  
> >>>> +void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi, int type)
> >>>> +{
> >>>> +	struct curseg_info *curseg = CURSEG_I(sbi, type);
> >>>> +
> >>>> +	mutex_lock(&curseg->curseg_mutex);
> >>>> +	if (!curseg->inited)
> >>>> +		goto out;
> >>>> +
> >>>> +	if (get_valid_blocks(sbi, curseg->segno, false)) {
> >>>> +		write_sum_page(sbi, curseg->sum_blk,
> >>>> +				GET_SUM_BLOCK(sbi, curseg->segno));
> >>>> +	} else {
> >>>> +		mutex_lock(&DIRTY_I(sbi)->seglist_lock);
> >>>> +		__set_test_and_free(sbi, curseg->segno, true);
> >>>> +		mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
> >>>> +	}
> >>>> +out:
> >>>> +	mutex_unlock(&curseg->curseg_mutex);
> >>>> +}
> >>>> +
> >>>> +void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi, int type)
> >>>> +{
> >>>> +	struct curseg_info *curseg = CURSEG_I(sbi, type);
> >>>> +
> >>>> +	mutex_lock(&curseg->curseg_mutex);
> >>>> +	if (!curseg->inited)
> >>>> +		goto out;
> >>>> +	if (get_valid_blocks(sbi, curseg->segno, false))
> >>>> +		goto out;
> >>>> +
> >>>> +	mutex_lock(&DIRTY_I(sbi)->seglist_lock);
> >>>> +	__set_test_and_inuse(sbi, curseg->segno);
> >>>> +	mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
> >>>> +out:
> >>>> +	mutex_unlock(&curseg->curseg_mutex);
> >>>> +}
> >>>> +
> >>>>  static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
> >>>>  {
> >>>>  	struct curseg_info *curseg = CURSEG_I(sbi, type);
> >>>> @@ -2738,11 +2785,15 @@ void __allocate_new_segment(struct f2fs_sb_info *sbi, int type)
> >>>>  	struct curseg_info *curseg = CURSEG_I(sbi, type);
> >>>>  	unsigned int old_segno;
> >>>>  
> >>>> +	if (!curseg->inited)
> >>>> +		goto alloc;
> >>>> +
> >>>>  	if (!curseg->next_blkoff &&
> >>>>  		!get_valid_blocks(sbi, curseg->segno, false) &&
> >>>>  		!get_ckpt_valid_blocks(sbi, curseg->segno))
> >>>>  		return;
> >>>>  
> >>>> +alloc:
> >>>>  	old_segno = curseg->segno;
> >>>>  	SIT_I(sbi)->s_ops->allocate_segment(sbi, type, true);
> >>>>  	locate_dirty_segment(sbi, old_segno);
> >>>> @@ -3126,19 +3177,6 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
> >>>>  {
> >>>>  	struct sit_info *sit_i = SIT_I(sbi);
> >>>>  	struct curseg_info *curseg = CURSEG_I(sbi, type);
> >>>> -	bool put_pin_sem = false;
> >>>> -
> >>>> -	if (type == CURSEG_COLD_DATA) {
> >>>> -		/* GC during CURSEG_COLD_DATA_PINNED allocation */
> >>>> -		if (down_read_trylock(&sbi->pin_sem)) {
> >>>> -			put_pin_sem = true;
> >>>> -		} else {
> >>>> -			type = CURSEG_WARM_DATA;
> >>>> -			curseg = CURSEG_I(sbi, type);
> >>>> -		}
> >>>> -	} else if (type == CURSEG_COLD_DATA_PINNED) {
> >>>> -		type = CURSEG_COLD_DATA;
> >>>> -	}
> >>>>  
> >>>>  	down_read(&SM_I(sbi)->curseg_lock);
> >>>>  
> >>>> @@ -3204,9 +3242,6 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
> >>>>  	mutex_unlock(&curseg->curseg_mutex);
> >>>>  
> >>>>  	up_read(&SM_I(sbi)->curseg_lock);
> >>>> -
> >>>> -	if (put_pin_sem)
> >>>> -		up_read(&sbi->pin_sem);
> >>>>  }
> >>>>  
> >>>>  static void update_device_state(struct f2fs_io_info *fio)
> >>>> @@ -3574,7 +3609,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
> >>>>  		blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type -
> >>>>  							CURSEG_HOT_DATA]);
> >>>>  		if (__exist_node_summaries(sbi))
> >>>> -			blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type);
> >>>> +			blk_addr = sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type);
> >>>>  		else
> >>>>  			blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type);
> >>>>  	} else {
> >>>> @@ -3652,8 +3687,9 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
> >>>>  	}
> >>>>  
> >>>>  	if (__exist_node_summaries(sbi))
> >>>> -		f2fs_ra_meta_pages(sbi, sum_blk_addr(sbi, NR_CURSEG_TYPE, type),
> >>>> -					NR_CURSEG_TYPE - type, META_CP, true);
> >>>> +		f2fs_ra_meta_pages(sbi,
> >>>> +				sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type),
> >>>> +				NR_CURSEG_PERSIST_TYPE - type, META_CP, true);
> >>>>  
> >>>>  	for (; type <= CURSEG_COLD_NODE; type++) {
> >>>>  		err = read_normal_summaries(sbi, type);
> >>>> @@ -4155,14 +4191,14 @@ static int build_curseg(struct f2fs_sb_info *sbi)
> >>>>  	struct curseg_info *array;
> >>>>  	int i;
> >>>>  
> >>>> -	array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE, sizeof(*array)),
> >>>> -			     GFP_KERNEL);
> >>>> +	array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE,
> >>>> +					sizeof(*array)), GFP_KERNEL);
> >>>>  	if (!array)
> >>>>  		return -ENOMEM;
> >>>>  
> >>>>  	SM_I(sbi)->curseg_array = array;
> >>>>  
> >>>> -	for (i = 0; i < NR_CURSEG_TYPE; i++) {
> >>>> +	for (i = 0; i < NO_CHECK_TYPE; i++) {
> >>>>  		mutex_init(&array[i].curseg_mutex);
> >>>>  		array[i].sum_blk = f2fs_kzalloc(sbi, PAGE_SIZE, GFP_KERNEL);
> >>>>  		if (!array[i].sum_blk)
> >>>> @@ -4172,8 +4208,13 @@ static int build_curseg(struct f2fs_sb_info *sbi)
> >>>>  				sizeof(struct f2fs_journal), GFP_KERNEL);
> >>>>  		if (!array[i].journal)
> >>>>  			return -ENOMEM;
> >>>> +		if (i < NR_PERSISTENT_LOG)
> >>>> +			array[i].seg_type = CURSEG_HOT_DATA + i;
> >>>> +		else if (i == CURSEG_COLD_DATA_PINNED)
> >>>> +			array[i].seg_type = CURSEG_COLD_DATA;
> >>>>  		array[i].segno = NULL_SEGNO;
> >>>>  		array[i].next_blkoff = 0;
> >>>> +		array[i].inited = false;
> >>>>  	}
> >>>>  	return restore_curseg_summaries(sbi);
> >>>>  }
> >>>> @@ -4408,7 +4449,7 @@ static int sanity_check_curseg(struct f2fs_sb_info *sbi)
> >>>>  	 * In LFS/SSR curseg, .next_blkoff should point to an unused blkaddr;
> >>>>  	 * In LFS curseg, all blkaddr after .next_blkoff should be unused.
> >>>>  	 */
> >>>> -	for (i = 0; i < NO_CHECK_TYPE; i++) {
> >>>> +	for (i = 0; i < NR_PERSISTENT_LOG; i++) {
> >>>>  		struct curseg_info *curseg = CURSEG_I(sbi, i);
> >>>>  		struct seg_entry *se = get_seg_entry(sbi, curseg->segno);
> >>>>  		unsigned int blkofs = curseg->next_blkoff;
> >>>> @@ -4637,7 +4678,7 @@ int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi)
> >>>>  {
> >>>>  	int i, ret;
> >>>>  
> >>>> -	for (i = 0; i < NO_CHECK_TYPE; i++) {
> >>>> +	for (i = 0; i < NR_PERSISTENT_LOG; i++) {
> >>>>  		ret = fix_curseg_write_pointer(sbi, i);
> >>>>  		if (ret)
> >>>>  			return ret;
> >>>> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
> >>>> index f261e3e6a69b..8ff261550cbb 100644
> >>>> --- a/fs/f2fs/segment.h
> >>>> +++ b/fs/f2fs/segment.h
> >>>> @@ -22,7 +22,7 @@
> >>>>  #define GET_R2L_SEGNO(free_i, segno)	((segno) + (free_i)->start_segno)
> >>>>  
> >>>>  #define IS_DATASEG(t)	((t) <= CURSEG_COLD_DATA)
> >>>> -#define IS_NODESEG(t)	((t) >= CURSEG_HOT_NODE)
> >>>> +#define IS_NODESEG(t)	((t) >= CURSEG_HOT_NODE && (t) <= CURSEG_COLD_NODE)
> >>>>  
> >>>>  #define IS_HOT(t)	((t) == CURSEG_HOT_NODE || (t) == CURSEG_HOT_DATA)
> >>>>  #define IS_WARM(t)	((t) == CURSEG_WARM_NODE || (t) == CURSEG_WARM_DATA)
> >>>> @@ -34,7 +34,8 @@
> >>>>  	 ((seg) == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno) ||	\
> >>>>  	 ((seg) == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno) ||	\
> >>>>  	 ((seg) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno) ||	\
> >>>> -	 ((seg) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno))
> >>>> +	 ((seg) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno) ||	\
> >>>> +	 ((seg) == CURSEG_I(sbi, CURSEG_COLD_DATA_PINNED)->segno))
> >>>>  
> >>>>  #define IS_CURSEC(sbi, secno)						\
> >>>>  	(((secno) == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno /		\
> >>>> @@ -48,7 +49,9 @@
> >>>>  	 ((secno) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno /		\
> >>>>  	  (sbi)->segs_per_sec) ||	\
> >>>>  	 ((secno) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno /		\
> >>>> -	  (sbi)->segs_per_sec))	\
> >>>> +	  (sbi)->segs_per_sec) ||	\
> >>>> +	 ((secno) == CURSEG_I(sbi, CURSEG_COLD_DATA_PINNED)->segno /	\
> >>>> +	  (sbi)->segs_per_sec))
> >>>>  
> >>>>  #define MAIN_BLKADDR(sbi)						\
> >>>>  	(SM_I(sbi) ? SM_I(sbi)->main_blkaddr : 				\
> >>>> @@ -288,10 +291,12 @@ struct curseg_info {
> >>>>  	struct rw_semaphore journal_rwsem;	/* protect journal area */
> >>>>  	struct f2fs_journal *journal;		/* cached journal info */
> >>>>  	unsigned char alloc_type;		/* current allocation type */
> >>>> +	unsigned short seg_type;		/* segment type like CURSEG_XXX_TYPE */
> >>>>  	unsigned int segno;			/* current segment number */
> >>>>  	unsigned short next_blkoff;		/* next block offset to write */
> >>>>  	unsigned int zone;			/* current zone number */
> >>>>  	unsigned int next_segno;		/* preallocated segment */
> >>>> +	bool inited;				/* indicate inmem log is inited */
> >>>>  };
> >>>>  
> >>>>  struct sit_entry_set {
> >>>> @@ -305,8 +310,6 @@ struct sit_entry_set {
> >>>>   */
> >>>>  static inline struct curseg_info *CURSEG_I(struct f2fs_sb_info *sbi, int type)
> >>>>  {
> >>>> -	if (type == CURSEG_COLD_DATA_PINNED)
> >>>> -		type = CURSEG_COLD_DATA;
> >>>>  	return (struct curseg_info *)(SM_I(sbi)->curseg_array + type);
> >>>>  }
> >>>>  
> >>>> @@ -438,7 +441,7 @@ static inline void __set_inuse(struct f2fs_sb_info *sbi,
> >>>>  }
> >>>>  
> >>>>  static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
> >>>> -		unsigned int segno)
> >>>> +		unsigned int segno, bool inmem)
> >>>>  {
> >>>>  	struct free_segmap_info *free_i = FREE_I(sbi);
> >>>>  	unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
> >>>> @@ -449,7 +452,7 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
> >>>>  	if (test_and_clear_bit(segno, free_i->free_segmap)) {
> >>>>  		free_i->free_segments++;
> >>>>  
> >>>> -		if (IS_CURSEC(sbi, secno))
> >>>> +		if (!inmem && IS_CURSEC(sbi, secno))
> >>>>  			goto skip_free;
> >>>>  		next = find_next_bit(free_i->free_segmap,
> >>>>  				start_segno + sbi->segs_per_sec, start_segno);
> >>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> >>>> index 80cb7cd358f8..0fefa130585f 100644
> >>>> --- a/fs/f2fs/super.c
> >>>> +++ b/fs/f2fs/super.c
> >>>> @@ -575,7 +575,8 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
> >>>>  		case Opt_active_logs:
> >>>>  			if (args->from && match_int(args, &arg))
> >>>>  				return -EINVAL;
> >>>> -			if (arg != 2 && arg != 4 && arg != NR_CURSEG_TYPE)
> >>>> +			if (arg != 2 && arg != 4 &&
> >>>> +				arg != NR_CURSEG_PERSIST_TYPE)
> >>>>  				return -EINVAL;
> >>>>  			F2FS_OPTION(sbi).active_logs = arg;
> >>>>  			break;
> >>>> @@ -981,7 +982,7 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
> >>>>  	}
> >>>>  
> >>>>  	/* Not pass down write hints if the number of active logs is lesser
> >>>> -	 * than NR_CURSEG_TYPE.
> >>>> +	 * than NR_CURSEG_PERSIST_TYPE.
> >>>>  	 */
> >>>>  	if (F2FS_OPTION(sbi).active_logs != NR_CURSEG_TYPE)
> >>>>  		F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF;
> >>>> @@ -1614,7 +1615,7 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
> >>>>  static void default_options(struct f2fs_sb_info *sbi)
> >>>>  {
> >>>>  	/* init some FS parameters */
> >>>> -	F2FS_OPTION(sbi).active_logs = NR_CURSEG_TYPE;
> >>>> +	F2FS_OPTION(sbi).active_logs = NR_CURSEG_PERSIST_TYPE;
> >>>>  	F2FS_OPTION(sbi).inline_xattr_size = DEFAULT_INLINE_XATTR_ADDRS;
> >>>>  	F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF;
> >>>>  	F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_DEFAULT;
> >>>> @@ -2946,7 +2947,7 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
> >>>>  	cp_payload = __cp_payload(sbi);
> >>>>  	if (cp_pack_start_sum < cp_payload + 1 ||
> >>>>  		cp_pack_start_sum > blocks_per_seg - 1 -
> >>>> -			NR_CURSEG_TYPE) {
> >>>> +			NR_CURSEG_PERSIST_TYPE) {
> >>>>  		f2fs_err(sbi, "Wrong cp_pack_start_sum: %u",
> >>>>  			 cp_pack_start_sum);
> >>>>  		return 1;
> >>>> -- 
> >>>> 2.26.2
> >>> .
> >>>
> > .
> > 


_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [f2fs-dev] [PATCH RFC 1/5] f2fs: introduce inmem curseg
  2020-07-15 19:07         ` Jaegeuk Kim
@ 2020-07-16  1:24           ` Chao Yu
  2020-07-25  8:42             ` Chao Yu
  0 siblings, 1 reply; 19+ messages in thread
From: Chao Yu @ 2020-07-16  1:24 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: linux-kernel, linux-f2fs-devel

On 2020/7/16 3:07, Jaegeuk Kim wrote:
> On 07/15, Chao Yu wrote:
>> On 2020/7/7 11:51, Jaegeuk Kim wrote:
>>> On 07/07, Chao Yu wrote:
>>>> On 2020/7/7 11:21, Jaegeuk Kim wrote:
>>>>> Hi Chao,
>>>>>
>>>>> Do you have any brief design doc to present the idea?
>>>>
>>>> Hi Jaegeuk,
>>>>
>>>> You mean this whole patchset, right?
>>>>
>>>> I can add a brief design description in patch 0/5.
>>>
>>> Yeah, it's a bit hard to understand the whole flow.
>>
>> Jaegeuk,
>>
>> Do you have time to take a look at this idea summarized in
>> [PATCH 0/5]'s cover letter?
> 
> Sorry, I couldn't afford to sitting down to review the design.
> Let me give it a try soon.

Alright, let me know if you have any question about the idea.

> 
>>
>>>
>>> Thanks,
>>>
>>>>
>>>>>
>>>>> Thanks,
>>>>>
>>>>> On 06/30, Chao Yu wrote:
>>>>>> Previous implementation of aligned pinfile allocation will:
>>>>>> - allocate new segment on cold data log no matter whether last used
>>>>>> segment is partially used or not, it makes IOs more random;
>>>>>> - force concurrent cold data/GCed IO going into warm data area, it
>>>>>> can make a bad effect on hot/cold data separation;
>>>>>>
>>>>>> In this patch, we introduce a new type of log named 'inmem curseg',
>>>>>> the differents from normal curseg is:
>>>>>> - it reuses existed segment type (CURSEG_XXX_NODE/DATA);
>>>>>> - it only exists in memory, its segno, blkofs, summary will not b
>>>>>>  persisted into checkpoint area;
>>>>>>
>>>>>> With this new feature, we can enhance scalability of log, special
>>>>>> allocators can be created for purposes:
>>>>>> - pure lfs allocator for aligned pinfile allocation or file
>>>>>> defragmentation
>>>>>> - pure ssr allocator for later feature
>>>>>>
>>>>>> So that, let's update aligned pinfile allocation to use this new
>>>>>> inmem curseg fwk.
>>>>>>
>>>>>> Signed-off-by: Chao Yu <yuchao0@huawei.com>
>>>>>> ---
>>>>>>  fs/f2fs/checkpoint.c |   7 ++-
>>>>>>  fs/f2fs/debug.c      |   6 ++-
>>>>>>  fs/f2fs/f2fs.h       |  12 +++--
>>>>>>  fs/f2fs/file.c       |   3 +-
>>>>>>  fs/f2fs/gc.c         |   2 +-
>>>>>>  fs/f2fs/segment.c    | 107 ++++++++++++++++++++++++++++++-------------
>>>>>>  fs/f2fs/segment.h    |  17 ++++---
>>>>>>  fs/f2fs/super.c      |   9 ++--
>>>>>>  8 files changed, 112 insertions(+), 51 deletions(-)
>>>>>>
>>>>>> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
>>>>>> index 1bb8278a1c4a..644a914af25a 100644
>>>>>> --- a/fs/f2fs/checkpoint.c
>>>>>> +++ b/fs/f2fs/checkpoint.c
>>>>>> @@ -1623,11 +1623,16 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>>>>>>  
>>>>>>  	f2fs_flush_sit_entries(sbi, cpc);
>>>>>>  
>>>>>> +	/* save inmem log status */
>>>>>> +	f2fs_save_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
>>>>>> +
>>>>>>  	err = do_checkpoint(sbi, cpc);
>>>>>>  	if (err)
>>>>>>  		f2fs_release_discard_addrs(sbi);
>>>>>>  	else
>>>>>>  		f2fs_clear_prefree_segments(sbi, cpc);
>>>>>> +
>>>>>> +	f2fs_restore_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
>>>>>>  stop:
>>>>>>  	unblock_operations(sbi);
>>>>>>  	stat_inc_cp_count(sbi->stat_info);
>>>>>> @@ -1658,7 +1663,7 @@ void f2fs_init_ino_entry_info(struct f2fs_sb_info *sbi)
>>>>>>  	}
>>>>>>  
>>>>>>  	sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS -
>>>>>> -			NR_CURSEG_TYPE - __cp_payload(sbi)) *
>>>>>> +			NR_CURSEG_PERSIST_TYPE - __cp_payload(sbi)) *
>>>>>>  				F2FS_ORPHANS_PER_BLOCK;
>>>>>>  }
>>>>>>  
>>>>>> diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
>>>>>> index 4276c0f79beb..41a91aa8c262 100644
>>>>>> --- a/fs/f2fs/debug.c
>>>>>> +++ b/fs/f2fs/debug.c
>>>>>> @@ -164,7 +164,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
>>>>>>  		* 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg)
>>>>>>  		/ 2;
>>>>>>  	si->util_invalid = 50 - si->util_free - si->util_valid;
>>>>>> -	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_NODE; i++) {
>>>>>> +	for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) {
>>>>>>  		struct curseg_info *curseg = CURSEG_I(sbi, i);
>>>>>>  		si->curseg[i] = curseg->segno;
>>>>>>  		si->cursec[i] = GET_SEC_FROM_SEG(sbi, curseg->segno);
>>>>>> @@ -393,6 +393,10 @@ static int stat_show(struct seq_file *s, void *v)
>>>>>>  			   si->dirty_seg[CURSEG_COLD_NODE],
>>>>>>  			   si->full_seg[CURSEG_COLD_NODE],
>>>>>>  			   si->valid_blks[CURSEG_COLD_NODE]);
>>>>>> +		seq_printf(s, "  - Pinned file: %8d %8d %8d\n",
>>>>>> +			   si->curseg[CURSEG_COLD_DATA_PINNED],
>>>>>> +			   si->cursec[CURSEG_COLD_DATA_PINNED],
>>>>>> +			   si->curzone[CURSEG_COLD_DATA_PINNED]);
>>>>>>  		seq_printf(s, "\n  - Valid: %d\n  - Dirty: %d\n",
>>>>>>  			   si->main_area_segs - si->dirty_count -
>>>>>>  			   si->prefree_count - si->free_segs,
>>>>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>>>>> index 7d6c5f8ce16b..f06c77066284 100644
>>>>>> --- a/fs/f2fs/f2fs.h
>>>>>> +++ b/fs/f2fs/f2fs.h
>>>>>> @@ -996,7 +996,9 @@ static inline void set_new_dnode(struct dnode_of_data *dn, struct inode *inode,
>>>>>>   */
>>>>>>  #define	NR_CURSEG_DATA_TYPE	(3)
>>>>>>  #define NR_CURSEG_NODE_TYPE	(3)
>>>>>> -#define NR_CURSEG_TYPE	(NR_CURSEG_DATA_TYPE + NR_CURSEG_NODE_TYPE)
>>>>>> +#define NR_CURSEG_INMEM_TYPE	(1)
>>>>>> +#define NR_CURSEG_PERSIST_TYPE	(NR_CURSEG_DATA_TYPE + NR_CURSEG_NODE_TYPE)
>>>>>> +#define NR_CURSEG_TYPE		(NR_CURSEG_INMEM_TYPE + NR_CURSEG_PERSIST_TYPE)
>>>>>>  
>>>>>>  enum {
>>>>>>  	CURSEG_HOT_DATA	= 0,	/* directory entry blocks */
>>>>>> @@ -1005,8 +1007,10 @@ enum {
>>>>>>  	CURSEG_HOT_NODE,	/* direct node blocks of directory files */
>>>>>>  	CURSEG_WARM_NODE,	/* direct node blocks of normal files */
>>>>>>  	CURSEG_COLD_NODE,	/* indirect node blocks */
>>>>>> -	NO_CHECK_TYPE,
>>>>>> -	CURSEG_COLD_DATA_PINNED,/* cold data for pinned file */
>>>>>> +	NR_PERSISTENT_LOG,	/* number of persistent log */
>>>>>> +	CURSEG_COLD_DATA_PINNED = NR_PERSISTENT_LOG,
>>>>>> +				/* pinned file that needs consecutive block address */
>>>>>> +	NO_CHECK_TYPE,		/* number of persistent & inmem log */
>>>>>>  };
>>>>>>  
>>>>>>  struct flush_cmd {
>>>>>> @@ -3359,6 +3363,8 @@ block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi);
>>>>>>  int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable);
>>>>>>  void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi);
>>>>>>  int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra);
>>>>>> +void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi, int type);
>>>>>> +void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi, int type);
>>>>>>  void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
>>>>>>  					unsigned int start, unsigned int end);
>>>>>>  void f2fs_allocate_new_segment(struct f2fs_sb_info *sbi, int type);
>>>>>> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
>>>>>> index c10e82806c2a..8611ade06018 100644
>>>>>> --- a/fs/f2fs/file.c
>>>>>> +++ b/fs/f2fs/file.c
>>>>>> @@ -1656,13 +1656,14 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
>>>>>>  		}
>>>>>>  
>>>>>>  		down_write(&sbi->pin_sem);
>>>>>> -		map.m_seg_type = CURSEG_COLD_DATA_PINNED;
>>>>>>  
>>>>>>  		f2fs_lock_op(sbi);
>>>>>>  		f2fs_allocate_new_segment(sbi, CURSEG_COLD_DATA);
>>>>>>  		f2fs_unlock_op(sbi);
>>>>>>  
>>>>>> +		map.m_seg_type = CURSEG_COLD_DATA_PINNED;
>>>>>>  		err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_DIO);
>>>>>> +
>>>>>>  		up_write(&sbi->pin_sem);
>>>>>>  
>>>>>>  		done += map.m_len;
>>>>>> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
>>>>>> index 3b718da69910..84807abe4e00 100644
>>>>>> --- a/fs/f2fs/gc.c
>>>>>> +++ b/fs/f2fs/gc.c
>>>>>> @@ -1448,7 +1448,7 @@ static int free_segment_range(struct f2fs_sb_info *sbi,
>>>>>>  	mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
>>>>>>  
>>>>>>  	/* Move out cursegs from the target range */
>>>>>> -	for (type = CURSEG_HOT_DATA; type < NR_CURSEG_TYPE; type++)
>>>>>> +	for (type = CURSEG_HOT_DATA; type < NR_CURSEG_PERSIST_TYPE; type++)
>>>>>>  		f2fs_allocate_segment_for_resize(sbi, type, start, end);
>>>>>>  
>>>>>>  	/* do GC to move out valid blocks in the range */
>>>>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>>>>>> index 5924b3965ae4..863ec6f1fb87 100644
>>>>>> --- a/fs/f2fs/segment.c
>>>>>> +++ b/fs/f2fs/segment.c
>>>>>> @@ -1958,7 +1958,7 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
>>>>>>  
>>>>>>  	mutex_lock(&dirty_i->seglist_lock);
>>>>>>  	for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi))
>>>>>> -		__set_test_and_free(sbi, segno);
>>>>>> +		__set_test_and_free(sbi, segno, false);
>>>>>>  	mutex_unlock(&dirty_i->seglist_lock);
>>>>>>  }
>>>>>>  
>>>>>> @@ -2496,6 +2496,7 @@ static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
>>>>>>  	struct curseg_info *curseg = CURSEG_I(sbi, type);
>>>>>>  	struct summary_footer *sum_footer;
>>>>>>  
>>>>>> +	curseg->inited = true;
>>>>>>  	curseg->segno = curseg->next_segno;
>>>>>>  	curseg->zone = GET_ZONE_FROM_SEG(sbi, curseg->segno);
>>>>>>  	curseg->next_blkoff = 0;
>>>>>> @@ -2503,24 +2504,31 @@ static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
>>>>>>  
>>>>>>  	sum_footer = &(curseg->sum_blk->footer);
>>>>>>  	memset(sum_footer, 0, sizeof(struct summary_footer));
>>>>>> -	if (IS_DATASEG(type))
>>>>>> +	if (IS_DATASEG(curseg->seg_type))
>>>>>>  		SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA);
>>>>>> -	if (IS_NODESEG(type))
>>>>>> +	if (IS_NODESEG(curseg->seg_type))
>>>>>>  		SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE);
>>>>>> -	__set_sit_entry_type(sbi, type, curseg->segno, modified);
>>>>>> +	__set_sit_entry_type(sbi, curseg->seg_type, curseg->segno, modified);
>>>>>>  }
>>>>>>  
>>>>>>  static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
>>>>>>  {
>>>>>> +	struct curseg_info *curseg = CURSEG_I(sbi, type);
>>>>>> +
>>>>>>  	/* if segs_per_sec is large than 1, we need to keep original policy. */
>>>>>>  	if (__is_large_section(sbi))
>>>>>> -		return CURSEG_I(sbi, type)->segno;
>>>>>> +		return curseg->segno;
>>>>>> +
>>>>>> +	/* inmem log may not locate on any segment after mount */
>>>>>> +	if (!curseg->inited)
>>>>>> +		return 0;
>>>>>>  
>>>>>>  	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
>>>>>>  		return 0;
>>>>>>  
>>>>>>  	if (test_opt(sbi, NOHEAP) &&
>>>>>> -		(type == CURSEG_HOT_DATA || IS_NODESEG(type)))
>>>>>> +		(curseg->seg_type == CURSEG_HOT_DATA ||
>>>>>> +		IS_NODESEG(curseg->seg_type)))
>>>>>>  		return 0;
>>>>>>  
>>>>>>  	if (SIT_I(sbi)->last_victim[ALLOC_NEXT])
>>>>>> @@ -2530,7 +2538,7 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
>>>>>>  	if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE)
>>>>>>  		return 0;
>>>>>>  
>>>>>> -	return CURSEG_I(sbi, type)->segno;
>>>>>> +	return curseg->segno;
>>>>>>  }
>>>>>>  
>>>>>>  /*
>>>>>> @@ -2540,12 +2548,14 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
>>>>>>  static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
>>>>>>  {
>>>>>>  	struct curseg_info *curseg = CURSEG_I(sbi, type);
>>>>>> +	unsigned short seg_type = curseg->seg_type;
>>>>>>  	unsigned int segno = curseg->segno;
>>>>>>  	int dir = ALLOC_LEFT;
>>>>>>  
>>>>>> -	write_sum_page(sbi, curseg->sum_blk,
>>>>>> +	if (curseg->inited)
>>>>>> +		write_sum_page(sbi, curseg->sum_blk,
>>>>>>  				GET_SUM_BLOCK(sbi, segno));
>>>>>> -	if (type == CURSEG_WARM_DATA || type == CURSEG_COLD_DATA)
>>>>>> +	if (seg_type == CURSEG_WARM_DATA || seg_type == CURSEG_COLD_DATA)
>>>>>>  		dir = ALLOC_RIGHT;
>>>>>>  
>>>>>>  	if (test_opt(sbi, NOHEAP))
>>>>>> @@ -2622,6 +2632,43 @@ static void change_curseg(struct f2fs_sb_info *sbi, int type)
>>>>>>  	f2fs_put_page(sum_page, 1);
>>>>>>  }
>>>>>>  
>>>>>> +void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi, int type)
>>>>>> +{
>>>>>> +	struct curseg_info *curseg = CURSEG_I(sbi, type);
>>>>>> +
>>>>>> +	mutex_lock(&curseg->curseg_mutex);
>>>>>> +	if (!curseg->inited)
>>>>>> +		goto out;
>>>>>> +
>>>>>> +	if (get_valid_blocks(sbi, curseg->segno, false)) {
>>>>>> +		write_sum_page(sbi, curseg->sum_blk,
>>>>>> +				GET_SUM_BLOCK(sbi, curseg->segno));
>>>>>> +	} else {
>>>>>> +		mutex_lock(&DIRTY_I(sbi)->seglist_lock);
>>>>>> +		__set_test_and_free(sbi, curseg->segno, true);
>>>>>> +		mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
>>>>>> +	}
>>>>>> +out:
>>>>>> +	mutex_unlock(&curseg->curseg_mutex);
>>>>>> +}
>>>>>> +
>>>>>> +void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi, int type)
>>>>>> +{
>>>>>> +	struct curseg_info *curseg = CURSEG_I(sbi, type);
>>>>>> +
>>>>>> +	mutex_lock(&curseg->curseg_mutex);
>>>>>> +	if (!curseg->inited)
>>>>>> +		goto out;
>>>>>> +	if (get_valid_blocks(sbi, curseg->segno, false))
>>>>>> +		goto out;
>>>>>> +
>>>>>> +	mutex_lock(&DIRTY_I(sbi)->seglist_lock);
>>>>>> +	__set_test_and_inuse(sbi, curseg->segno);
>>>>>> +	mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
>>>>>> +out:
>>>>>> +	mutex_unlock(&curseg->curseg_mutex);
>>>>>> +}
>>>>>> +
>>>>>>  static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
>>>>>>  {
>>>>>>  	struct curseg_info *curseg = CURSEG_I(sbi, type);
>>>>>> @@ -2738,11 +2785,15 @@ void __allocate_new_segment(struct f2fs_sb_info *sbi, int type)
>>>>>>  	struct curseg_info *curseg = CURSEG_I(sbi, type);
>>>>>>  	unsigned int old_segno;
>>>>>>  
>>>>>> +	if (!curseg->inited)
>>>>>> +		goto alloc;
>>>>>> +
>>>>>>  	if (!curseg->next_blkoff &&
>>>>>>  		!get_valid_blocks(sbi, curseg->segno, false) &&
>>>>>>  		!get_ckpt_valid_blocks(sbi, curseg->segno))
>>>>>>  		return;
>>>>>>  
>>>>>> +alloc:
>>>>>>  	old_segno = curseg->segno;
>>>>>>  	SIT_I(sbi)->s_ops->allocate_segment(sbi, type, true);
>>>>>>  	locate_dirty_segment(sbi, old_segno);
>>>>>> @@ -3126,19 +3177,6 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
>>>>>>  {
>>>>>>  	struct sit_info *sit_i = SIT_I(sbi);
>>>>>>  	struct curseg_info *curseg = CURSEG_I(sbi, type);
>>>>>> -	bool put_pin_sem = false;
>>>>>> -
>>>>>> -	if (type == CURSEG_COLD_DATA) {
>>>>>> -		/* GC during CURSEG_COLD_DATA_PINNED allocation */
>>>>>> -		if (down_read_trylock(&sbi->pin_sem)) {
>>>>>> -			put_pin_sem = true;
>>>>>> -		} else {
>>>>>> -			type = CURSEG_WARM_DATA;
>>>>>> -			curseg = CURSEG_I(sbi, type);
>>>>>> -		}
>>>>>> -	} else if (type == CURSEG_COLD_DATA_PINNED) {
>>>>>> -		type = CURSEG_COLD_DATA;
>>>>>> -	}
>>>>>>  
>>>>>>  	down_read(&SM_I(sbi)->curseg_lock);
>>>>>>  
>>>>>> @@ -3204,9 +3242,6 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
>>>>>>  	mutex_unlock(&curseg->curseg_mutex);
>>>>>>  
>>>>>>  	up_read(&SM_I(sbi)->curseg_lock);
>>>>>> -
>>>>>> -	if (put_pin_sem)
>>>>>> -		up_read(&sbi->pin_sem);
>>>>>>  }
>>>>>>  
>>>>>>  static void update_device_state(struct f2fs_io_info *fio)
>>>>>> @@ -3574,7 +3609,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
>>>>>>  		blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type -
>>>>>>  							CURSEG_HOT_DATA]);
>>>>>>  		if (__exist_node_summaries(sbi))
>>>>>> -			blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type);
>>>>>> +			blk_addr = sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type);
>>>>>>  		else
>>>>>>  			blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type);
>>>>>>  	} else {
>>>>>> @@ -3652,8 +3687,9 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
>>>>>>  	}
>>>>>>  
>>>>>>  	if (__exist_node_summaries(sbi))
>>>>>> -		f2fs_ra_meta_pages(sbi, sum_blk_addr(sbi, NR_CURSEG_TYPE, type),
>>>>>> -					NR_CURSEG_TYPE - type, META_CP, true);
>>>>>> +		f2fs_ra_meta_pages(sbi,
>>>>>> +				sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type),
>>>>>> +				NR_CURSEG_PERSIST_TYPE - type, META_CP, true);
>>>>>>  
>>>>>>  	for (; type <= CURSEG_COLD_NODE; type++) {
>>>>>>  		err = read_normal_summaries(sbi, type);
>>>>>> @@ -4155,14 +4191,14 @@ static int build_curseg(struct f2fs_sb_info *sbi)
>>>>>>  	struct curseg_info *array;
>>>>>>  	int i;
>>>>>>  
>>>>>> -	array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE, sizeof(*array)),
>>>>>> -			     GFP_KERNEL);
>>>>>> +	array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE,
>>>>>> +					sizeof(*array)), GFP_KERNEL);
>>>>>>  	if (!array)
>>>>>>  		return -ENOMEM;
>>>>>>  
>>>>>>  	SM_I(sbi)->curseg_array = array;
>>>>>>  
>>>>>> -	for (i = 0; i < NR_CURSEG_TYPE; i++) {
>>>>>> +	for (i = 0; i < NO_CHECK_TYPE; i++) {
>>>>>>  		mutex_init(&array[i].curseg_mutex);
>>>>>>  		array[i].sum_blk = f2fs_kzalloc(sbi, PAGE_SIZE, GFP_KERNEL);
>>>>>>  		if (!array[i].sum_blk)
>>>>>> @@ -4172,8 +4208,13 @@ static int build_curseg(struct f2fs_sb_info *sbi)
>>>>>>  				sizeof(struct f2fs_journal), GFP_KERNEL);
>>>>>>  		if (!array[i].journal)
>>>>>>  			return -ENOMEM;
>>>>>> +		if (i < NR_PERSISTENT_LOG)
>>>>>> +			array[i].seg_type = CURSEG_HOT_DATA + i;
>>>>>> +		else if (i == CURSEG_COLD_DATA_PINNED)
>>>>>> +			array[i].seg_type = CURSEG_COLD_DATA;
>>>>>>  		array[i].segno = NULL_SEGNO;
>>>>>>  		array[i].next_blkoff = 0;
>>>>>> +		array[i].inited = false;
>>>>>>  	}
>>>>>>  	return restore_curseg_summaries(sbi);
>>>>>>  }
>>>>>> @@ -4408,7 +4449,7 @@ static int sanity_check_curseg(struct f2fs_sb_info *sbi)
>>>>>>  	 * In LFS/SSR curseg, .next_blkoff should point to an unused blkaddr;
>>>>>>  	 * In LFS curseg, all blkaddr after .next_blkoff should be unused.
>>>>>>  	 */
>>>>>> -	for (i = 0; i < NO_CHECK_TYPE; i++) {
>>>>>> +	for (i = 0; i < NR_PERSISTENT_LOG; i++) {
>>>>>>  		struct curseg_info *curseg = CURSEG_I(sbi, i);
>>>>>>  		struct seg_entry *se = get_seg_entry(sbi, curseg->segno);
>>>>>>  		unsigned int blkofs = curseg->next_blkoff;
>>>>>> @@ -4637,7 +4678,7 @@ int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi)
>>>>>>  {
>>>>>>  	int i, ret;
>>>>>>  
>>>>>> -	for (i = 0; i < NO_CHECK_TYPE; i++) {
>>>>>> +	for (i = 0; i < NR_PERSISTENT_LOG; i++) {
>>>>>>  		ret = fix_curseg_write_pointer(sbi, i);
>>>>>>  		if (ret)
>>>>>>  			return ret;
>>>>>> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
>>>>>> index f261e3e6a69b..8ff261550cbb 100644
>>>>>> --- a/fs/f2fs/segment.h
>>>>>> +++ b/fs/f2fs/segment.h
>>>>>> @@ -22,7 +22,7 @@
>>>>>>  #define GET_R2L_SEGNO(free_i, segno)	((segno) + (free_i)->start_segno)
>>>>>>  
>>>>>>  #define IS_DATASEG(t)	((t) <= CURSEG_COLD_DATA)
>>>>>> -#define IS_NODESEG(t)	((t) >= CURSEG_HOT_NODE)
>>>>>> +#define IS_NODESEG(t)	((t) >= CURSEG_HOT_NODE && (t) <= CURSEG_COLD_NODE)
>>>>>>  
>>>>>>  #define IS_HOT(t)	((t) == CURSEG_HOT_NODE || (t) == CURSEG_HOT_DATA)
>>>>>>  #define IS_WARM(t)	((t) == CURSEG_WARM_NODE || (t) == CURSEG_WARM_DATA)
>>>>>> @@ -34,7 +34,8 @@
>>>>>>  	 ((seg) == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno) ||	\
>>>>>>  	 ((seg) == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno) ||	\
>>>>>>  	 ((seg) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno) ||	\
>>>>>> -	 ((seg) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno))
>>>>>> +	 ((seg) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno) ||	\
>>>>>> +	 ((seg) == CURSEG_I(sbi, CURSEG_COLD_DATA_PINNED)->segno))
>>>>>>  
>>>>>>  #define IS_CURSEC(sbi, secno)						\
>>>>>>  	(((secno) == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno /		\
>>>>>> @@ -48,7 +49,9 @@
>>>>>>  	 ((secno) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno /		\
>>>>>>  	  (sbi)->segs_per_sec) ||	\
>>>>>>  	 ((secno) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno /		\
>>>>>> -	  (sbi)->segs_per_sec))	\
>>>>>> +	  (sbi)->segs_per_sec) ||	\
>>>>>> +	 ((secno) == CURSEG_I(sbi, CURSEG_COLD_DATA_PINNED)->segno /	\
>>>>>> +	  (sbi)->segs_per_sec))
>>>>>>  
>>>>>>  #define MAIN_BLKADDR(sbi)						\
>>>>>>  	(SM_I(sbi) ? SM_I(sbi)->main_blkaddr : 				\
>>>>>> @@ -288,10 +291,12 @@ struct curseg_info {
>>>>>>  	struct rw_semaphore journal_rwsem;	/* protect journal area */
>>>>>>  	struct f2fs_journal *journal;		/* cached journal info */
>>>>>>  	unsigned char alloc_type;		/* current allocation type */
>>>>>> +	unsigned short seg_type;		/* segment type like CURSEG_XXX_TYPE */
>>>>>>  	unsigned int segno;			/* current segment number */
>>>>>>  	unsigned short next_blkoff;		/* next block offset to write */
>>>>>>  	unsigned int zone;			/* current zone number */
>>>>>>  	unsigned int next_segno;		/* preallocated segment */
>>>>>> +	bool inited;				/* indicate inmem log is inited */
>>>>>>  };
>>>>>>  
>>>>>>  struct sit_entry_set {
>>>>>> @@ -305,8 +310,6 @@ struct sit_entry_set {
>>>>>>   */
>>>>>>  static inline struct curseg_info *CURSEG_I(struct f2fs_sb_info *sbi, int type)
>>>>>>  {
>>>>>> -	if (type == CURSEG_COLD_DATA_PINNED)
>>>>>> -		type = CURSEG_COLD_DATA;
>>>>>>  	return (struct curseg_info *)(SM_I(sbi)->curseg_array + type);
>>>>>>  }
>>>>>>  
>>>>>> @@ -438,7 +441,7 @@ static inline void __set_inuse(struct f2fs_sb_info *sbi,
>>>>>>  }
>>>>>>  
>>>>>>  static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
>>>>>> -		unsigned int segno)
>>>>>> +		unsigned int segno, bool inmem)
>>>>>>  {
>>>>>>  	struct free_segmap_info *free_i = FREE_I(sbi);
>>>>>>  	unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
>>>>>> @@ -449,7 +452,7 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
>>>>>>  	if (test_and_clear_bit(segno, free_i->free_segmap)) {
>>>>>>  		free_i->free_segments++;
>>>>>>  
>>>>>> -		if (IS_CURSEC(sbi, secno))
>>>>>> +		if (!inmem && IS_CURSEC(sbi, secno))
>>>>>>  			goto skip_free;
>>>>>>  		next = find_next_bit(free_i->free_segmap,
>>>>>>  				start_segno + sbi->segs_per_sec, start_segno);
>>>>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>>>>>> index 80cb7cd358f8..0fefa130585f 100644
>>>>>> --- a/fs/f2fs/super.c
>>>>>> +++ b/fs/f2fs/super.c
>>>>>> @@ -575,7 +575,8 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
>>>>>>  		case Opt_active_logs:
>>>>>>  			if (args->from && match_int(args, &arg))
>>>>>>  				return -EINVAL;
>>>>>> -			if (arg != 2 && arg != 4 && arg != NR_CURSEG_TYPE)
>>>>>> +			if (arg != 2 && arg != 4 &&
>>>>>> +				arg != NR_CURSEG_PERSIST_TYPE)
>>>>>>  				return -EINVAL;
>>>>>>  			F2FS_OPTION(sbi).active_logs = arg;
>>>>>>  			break;
>>>>>> @@ -981,7 +982,7 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
>>>>>>  	}
>>>>>>  
>>>>>>  	/* Not pass down write hints if the number of active logs is lesser
>>>>>> -	 * than NR_CURSEG_TYPE.
>>>>>> +	 * than NR_CURSEG_PERSIST_TYPE.
>>>>>>  	 */
>>>>>>  	if (F2FS_OPTION(sbi).active_logs != NR_CURSEG_TYPE)
>>>>>>  		F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF;
>>>>>> @@ -1614,7 +1615,7 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
>>>>>>  static void default_options(struct f2fs_sb_info *sbi)
>>>>>>  {
>>>>>>  	/* init some FS parameters */
>>>>>> -	F2FS_OPTION(sbi).active_logs = NR_CURSEG_TYPE;
>>>>>> +	F2FS_OPTION(sbi).active_logs = NR_CURSEG_PERSIST_TYPE;
>>>>>>  	F2FS_OPTION(sbi).inline_xattr_size = DEFAULT_INLINE_XATTR_ADDRS;
>>>>>>  	F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF;
>>>>>>  	F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_DEFAULT;
>>>>>> @@ -2946,7 +2947,7 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
>>>>>>  	cp_payload = __cp_payload(sbi);
>>>>>>  	if (cp_pack_start_sum < cp_payload + 1 ||
>>>>>>  		cp_pack_start_sum > blocks_per_seg - 1 -
>>>>>> -			NR_CURSEG_TYPE) {
>>>>>> +			NR_CURSEG_PERSIST_TYPE) {
>>>>>>  		f2fs_err(sbi, "Wrong cp_pack_start_sum: %u",
>>>>>>  			 cp_pack_start_sum);
>>>>>>  		return 1;
>>>>>> -- 
>>>>>> 2.26.2
>>>>> .
>>>>>
>>> .
>>>
> .
> 


_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [f2fs-dev] [PATCH RFC 1/5] f2fs: introduce inmem curseg
  2020-07-16  1:24           ` Chao Yu
@ 2020-07-25  8:42             ` Chao Yu
  2020-08-04  1:49               ` Chao Yu
  0 siblings, 1 reply; 19+ messages in thread
From: Chao Yu @ 2020-07-25  8:42 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: linux-kernel, linux-f2fs-devel

On 2020/7/16 9:24, Chao Yu wrote:
> On 2020/7/16 3:07, Jaegeuk Kim wrote:
>> On 07/15, Chao Yu wrote:
>>> On 2020/7/7 11:51, Jaegeuk Kim wrote:
>>>> On 07/07, Chao Yu wrote:
>>>>> On 2020/7/7 11:21, Jaegeuk Kim wrote:
>>>>>> Hi Chao,
>>>>>>
>>>>>> Do you have any brief design doc to present the idea?
>>>>>
>>>>> Hi Jaegeuk,
>>>>>
>>>>> You mean this whole patchset, right?
>>>>>
>>>>> I can add a brief design description in patch 0/5.
>>>>
>>>> Yeah, it's a bit hard to understand the whole flow.
>>>
>>> Jaegeuk,
>>>
>>> Do you have time to take a look at this idea summarized in
>>> [PATCH 0/5]'s cover letter?
>>
>> Sorry, I couldn't afford to sitting down to review the design.
>> Let me give it a try soon.
> 
> Alright, let me know if you have any question about the idea.

Ping,

Jaegeuk, still be too busy...? :P

Thanks,

> 
>>
>>>
>>>>
>>>> Thanks,
>>>>
>>>>>
>>>>>>
>>>>>> Thanks,
>>>>>>
>>>>>> On 06/30, Chao Yu wrote:
>>>>>>> Previous implementation of aligned pinfile allocation will:
>>>>>>> - allocate new segment on cold data log no matter whether last used
>>>>>>> segment is partially used or not, it makes IOs more random;
>>>>>>> - force concurrent cold data/GCed IO going into warm data area, it
>>>>>>> can make a bad effect on hot/cold data separation;
>>>>>>>
>>>>>>> In this patch, we introduce a new type of log named 'inmem curseg',
>>>>>>> the differents from normal curseg is:
>>>>>>> - it reuses existed segment type (CURSEG_XXX_NODE/DATA);
>>>>>>> - it only exists in memory, its segno, blkofs, summary will not b
>>>>>>>   persisted into checkpoint area;
>>>>>>>
>>>>>>> With this new feature, we can enhance scalability of log, special
>>>>>>> allocators can be created for purposes:
>>>>>>> - pure lfs allocator for aligned pinfile allocation or file
>>>>>>> defragmentation
>>>>>>> - pure ssr allocator for later feature
>>>>>>>
>>>>>>> So that, let's update aligned pinfile allocation to use this new
>>>>>>> inmem curseg fwk.
>>>>>>>
>>>>>>> Signed-off-by: Chao Yu <yuchao0@huawei.com>
>>>>>>> ---
>>>>>>>   fs/f2fs/checkpoint.c |   7 ++-
>>>>>>>   fs/f2fs/debug.c      |   6 ++-
>>>>>>>   fs/f2fs/f2fs.h       |  12 +++--
>>>>>>>   fs/f2fs/file.c       |   3 +-
>>>>>>>   fs/f2fs/gc.c         |   2 +-
>>>>>>>   fs/f2fs/segment.c    | 107 ++++++++++++++++++++++++++++++-------------
>>>>>>>   fs/f2fs/segment.h    |  17 ++++---
>>>>>>>   fs/f2fs/super.c      |   9 ++--
>>>>>>>   8 files changed, 112 insertions(+), 51 deletions(-)
>>>>>>>
>>>>>>> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
>>>>>>> index 1bb8278a1c4a..644a914af25a 100644
>>>>>>> --- a/fs/f2fs/checkpoint.c
>>>>>>> +++ b/fs/f2fs/checkpoint.c
>>>>>>> @@ -1623,11 +1623,16 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>>>>>>>   
>>>>>>>   	f2fs_flush_sit_entries(sbi, cpc);
>>>>>>>   
>>>>>>> +	/* save inmem log status */
>>>>>>> +	f2fs_save_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
>>>>>>> +
>>>>>>>   	err = do_checkpoint(sbi, cpc);
>>>>>>>   	if (err)
>>>>>>>   		f2fs_release_discard_addrs(sbi);
>>>>>>>   	else
>>>>>>>   		f2fs_clear_prefree_segments(sbi, cpc);
>>>>>>> +
>>>>>>> +	f2fs_restore_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
>>>>>>>   stop:
>>>>>>>   	unblock_operations(sbi);
>>>>>>>   	stat_inc_cp_count(sbi->stat_info);
>>>>>>> @@ -1658,7 +1663,7 @@ void f2fs_init_ino_entry_info(struct f2fs_sb_info *sbi)
>>>>>>>   	}
>>>>>>>   
>>>>>>>   	sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS -
>>>>>>> -			NR_CURSEG_TYPE - __cp_payload(sbi)) *
>>>>>>> +			NR_CURSEG_PERSIST_TYPE - __cp_payload(sbi)) *
>>>>>>>   				F2FS_ORPHANS_PER_BLOCK;
>>>>>>>   }
>>>>>>>   
>>>>>>> diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
>>>>>>> index 4276c0f79beb..41a91aa8c262 100644
>>>>>>> --- a/fs/f2fs/debug.c
>>>>>>> +++ b/fs/f2fs/debug.c
>>>>>>> @@ -164,7 +164,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
>>>>>>>   		* 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg)
>>>>>>>   		/ 2;
>>>>>>>   	si->util_invalid = 50 - si->util_free - si->util_valid;
>>>>>>> -	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_NODE; i++) {
>>>>>>> +	for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) {
>>>>>>>   		struct curseg_info *curseg = CURSEG_I(sbi, i);
>>>>>>>   		si->curseg[i] = curseg->segno;
>>>>>>>   		si->cursec[i] = GET_SEC_FROM_SEG(sbi, curseg->segno);
>>>>>>> @@ -393,6 +393,10 @@ static int stat_show(struct seq_file *s, void *v)
>>>>>>>   			   si->dirty_seg[CURSEG_COLD_NODE],
>>>>>>>   			   si->full_seg[CURSEG_COLD_NODE],
>>>>>>>   			   si->valid_blks[CURSEG_COLD_NODE]);
>>>>>>> +		seq_printf(s, "  - Pinned file: %8d %8d %8d\n",
>>>>>>> +			   si->curseg[CURSEG_COLD_DATA_PINNED],
>>>>>>> +			   si->cursec[CURSEG_COLD_DATA_PINNED],
>>>>>>> +			   si->curzone[CURSEG_COLD_DATA_PINNED]);
>>>>>>>   		seq_printf(s, "\n  - Valid: %d\n  - Dirty: %d\n",
>>>>>>>   			   si->main_area_segs - si->dirty_count -
>>>>>>>   			   si->prefree_count - si->free_segs,
>>>>>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>>>>>> index 7d6c5f8ce16b..f06c77066284 100644
>>>>>>> --- a/fs/f2fs/f2fs.h
>>>>>>> +++ b/fs/f2fs/f2fs.h
>>>>>>> @@ -996,7 +996,9 @@ static inline void set_new_dnode(struct dnode_of_data *dn, struct inode *inode,
>>>>>>>    */
>>>>>>>   #define	NR_CURSEG_DATA_TYPE	(3)
>>>>>>>   #define NR_CURSEG_NODE_TYPE	(3)
>>>>>>> -#define NR_CURSEG_TYPE	(NR_CURSEG_DATA_TYPE + NR_CURSEG_NODE_TYPE)
>>>>>>> +#define NR_CURSEG_INMEM_TYPE	(1)
>>>>>>> +#define NR_CURSEG_PERSIST_TYPE	(NR_CURSEG_DATA_TYPE + NR_CURSEG_NODE_TYPE)
>>>>>>> +#define NR_CURSEG_TYPE		(NR_CURSEG_INMEM_TYPE + NR_CURSEG_PERSIST_TYPE)
>>>>>>>   
>>>>>>>   enum {
>>>>>>>   	CURSEG_HOT_DATA	= 0,	/* directory entry blocks */
>>>>>>> @@ -1005,8 +1007,10 @@ enum {
>>>>>>>   	CURSEG_HOT_NODE,	/* direct node blocks of directory files */
>>>>>>>   	CURSEG_WARM_NODE,	/* direct node blocks of normal files */
>>>>>>>   	CURSEG_COLD_NODE,	/* indirect node blocks */
>>>>>>> -	NO_CHECK_TYPE,
>>>>>>> -	CURSEG_COLD_DATA_PINNED,/* cold data for pinned file */
>>>>>>> +	NR_PERSISTENT_LOG,	/* number of persistent log */
>>>>>>> +	CURSEG_COLD_DATA_PINNED = NR_PERSISTENT_LOG,
>>>>>>> +				/* pinned file that needs consecutive block address */
>>>>>>> +	NO_CHECK_TYPE,		/* number of persistent & inmem log */
>>>>>>>   };
>>>>>>>   
>>>>>>>   struct flush_cmd {
>>>>>>> @@ -3359,6 +3363,8 @@ block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi);
>>>>>>>   int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable);
>>>>>>>   void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi);
>>>>>>>   int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra);
>>>>>>> +void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi, int type);
>>>>>>> +void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi, int type);
>>>>>>>   void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
>>>>>>>   					unsigned int start, unsigned int end);
>>>>>>>   void f2fs_allocate_new_segment(struct f2fs_sb_info *sbi, int type);
>>>>>>> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
>>>>>>> index c10e82806c2a..8611ade06018 100644
>>>>>>> --- a/fs/f2fs/file.c
>>>>>>> +++ b/fs/f2fs/file.c
>>>>>>> @@ -1656,13 +1656,14 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
>>>>>>>   		}
>>>>>>>   
>>>>>>>   		down_write(&sbi->pin_sem);
>>>>>>> -		map.m_seg_type = CURSEG_COLD_DATA_PINNED;
>>>>>>>   
>>>>>>>   		f2fs_lock_op(sbi);
>>>>>>>   		f2fs_allocate_new_segment(sbi, CURSEG_COLD_DATA);
>>>>>>>   		f2fs_unlock_op(sbi);
>>>>>>>   
>>>>>>> +		map.m_seg_type = CURSEG_COLD_DATA_PINNED;
>>>>>>>   		err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_DIO);
>>>>>>> +
>>>>>>>   		up_write(&sbi->pin_sem);
>>>>>>>   
>>>>>>>   		done += map.m_len;
>>>>>>> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
>>>>>>> index 3b718da69910..84807abe4e00 100644
>>>>>>> --- a/fs/f2fs/gc.c
>>>>>>> +++ b/fs/f2fs/gc.c
>>>>>>> @@ -1448,7 +1448,7 @@ static int free_segment_range(struct f2fs_sb_info *sbi,
>>>>>>>   	mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
>>>>>>>   
>>>>>>>   	/* Move out cursegs from the target range */
>>>>>>> -	for (type = CURSEG_HOT_DATA; type < NR_CURSEG_TYPE; type++)
>>>>>>> +	for (type = CURSEG_HOT_DATA; type < NR_CURSEG_PERSIST_TYPE; type++)
>>>>>>>   		f2fs_allocate_segment_for_resize(sbi, type, start, end);
>>>>>>>   
>>>>>>>   	/* do GC to move out valid blocks in the range */
>>>>>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>>>>>>> index 5924b3965ae4..863ec6f1fb87 100644
>>>>>>> --- a/fs/f2fs/segment.c
>>>>>>> +++ b/fs/f2fs/segment.c
>>>>>>> @@ -1958,7 +1958,7 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
>>>>>>>   
>>>>>>>   	mutex_lock(&dirty_i->seglist_lock);
>>>>>>>   	for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi))
>>>>>>> -		__set_test_and_free(sbi, segno);
>>>>>>> +		__set_test_and_free(sbi, segno, false);
>>>>>>>   	mutex_unlock(&dirty_i->seglist_lock);
>>>>>>>   }
>>>>>>>   
>>>>>>> @@ -2496,6 +2496,7 @@ static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
>>>>>>>   	struct curseg_info *curseg = CURSEG_I(sbi, type);
>>>>>>>   	struct summary_footer *sum_footer;
>>>>>>>   
>>>>>>> +	curseg->inited = true;
>>>>>>>   	curseg->segno = curseg->next_segno;
>>>>>>>   	curseg->zone = GET_ZONE_FROM_SEG(sbi, curseg->segno);
>>>>>>>   	curseg->next_blkoff = 0;
>>>>>>> @@ -2503,24 +2504,31 @@ static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
>>>>>>>   
>>>>>>>   	sum_footer = &(curseg->sum_blk->footer);
>>>>>>>   	memset(sum_footer, 0, sizeof(struct summary_footer));
>>>>>>> -	if (IS_DATASEG(type))
>>>>>>> +	if (IS_DATASEG(curseg->seg_type))
>>>>>>>   		SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA);
>>>>>>> -	if (IS_NODESEG(type))
>>>>>>> +	if (IS_NODESEG(curseg->seg_type))
>>>>>>>   		SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE);
>>>>>>> -	__set_sit_entry_type(sbi, type, curseg->segno, modified);
>>>>>>> +	__set_sit_entry_type(sbi, curseg->seg_type, curseg->segno, modified);
>>>>>>>   }
>>>>>>>   
>>>>>>>   static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
>>>>>>>   {
>>>>>>> +	struct curseg_info *curseg = CURSEG_I(sbi, type);
>>>>>>> +
>>>>>>>   	/* if segs_per_sec is large than 1, we need to keep original policy. */
>>>>>>>   	if (__is_large_section(sbi))
>>>>>>> -		return CURSEG_I(sbi, type)->segno;
>>>>>>> +		return curseg->segno;
>>>>>>> +
>>>>>>> +	/* inmem log may not locate on any segment after mount */
>>>>>>> +	if (!curseg->inited)
>>>>>>> +		return 0;
>>>>>>>   
>>>>>>>   	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
>>>>>>>   		return 0;
>>>>>>>   
>>>>>>>   	if (test_opt(sbi, NOHEAP) &&
>>>>>>> -		(type == CURSEG_HOT_DATA || IS_NODESEG(type)))
>>>>>>> +		(curseg->seg_type == CURSEG_HOT_DATA ||
>>>>>>> +		IS_NODESEG(curseg->seg_type)))
>>>>>>>   		return 0;
>>>>>>>   
>>>>>>>   	if (SIT_I(sbi)->last_victim[ALLOC_NEXT])
>>>>>>> @@ -2530,7 +2538,7 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
>>>>>>>   	if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE)
>>>>>>>   		return 0;
>>>>>>>   
>>>>>>> -	return CURSEG_I(sbi, type)->segno;
>>>>>>> +	return curseg->segno;
>>>>>>>   }
>>>>>>>   
>>>>>>>   /*
>>>>>>> @@ -2540,12 +2548,14 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
>>>>>>>   static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
>>>>>>>   {
>>>>>>>   	struct curseg_info *curseg = CURSEG_I(sbi, type);
>>>>>>> +	unsigned short seg_type = curseg->seg_type;
>>>>>>>   	unsigned int segno = curseg->segno;
>>>>>>>   	int dir = ALLOC_LEFT;
>>>>>>>   
>>>>>>> -	write_sum_page(sbi, curseg->sum_blk,
>>>>>>> +	if (curseg->inited)
>>>>>>> +		write_sum_page(sbi, curseg->sum_blk,
>>>>>>>   				GET_SUM_BLOCK(sbi, segno));
>>>>>>> -	if (type == CURSEG_WARM_DATA || type == CURSEG_COLD_DATA)
>>>>>>> +	if (seg_type == CURSEG_WARM_DATA || seg_type == CURSEG_COLD_DATA)
>>>>>>>   		dir = ALLOC_RIGHT;
>>>>>>>   
>>>>>>>   	if (test_opt(sbi, NOHEAP))
>>>>>>> @@ -2622,6 +2632,43 @@ static void change_curseg(struct f2fs_sb_info *sbi, int type)
>>>>>>>   	f2fs_put_page(sum_page, 1);
>>>>>>>   }
>>>>>>>   
>>>>>>> +void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi, int type)
>>>>>>> +{
>>>>>>> +	struct curseg_info *curseg = CURSEG_I(sbi, type);
>>>>>>> +
>>>>>>> +	mutex_lock(&curseg->curseg_mutex);
>>>>>>> +	if (!curseg->inited)
>>>>>>> +		goto out;
>>>>>>> +
>>>>>>> +	if (get_valid_blocks(sbi, curseg->segno, false)) {
>>>>>>> +		write_sum_page(sbi, curseg->sum_blk,
>>>>>>> +				GET_SUM_BLOCK(sbi, curseg->segno));
>>>>>>> +	} else {
>>>>>>> +		mutex_lock(&DIRTY_I(sbi)->seglist_lock);
>>>>>>> +		__set_test_and_free(sbi, curseg->segno, true);
>>>>>>> +		mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
>>>>>>> +	}
>>>>>>> +out:
>>>>>>> +	mutex_unlock(&curseg->curseg_mutex);
>>>>>>> +}
>>>>>>> +
>>>>>>> +void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi, int type)
>>>>>>> +{
>>>>>>> +	struct curseg_info *curseg = CURSEG_I(sbi, type);
>>>>>>> +
>>>>>>> +	mutex_lock(&curseg->curseg_mutex);
>>>>>>> +	if (!curseg->inited)
>>>>>>> +		goto out;
>>>>>>> +	if (get_valid_blocks(sbi, curseg->segno, false))
>>>>>>> +		goto out;
>>>>>>> +
>>>>>>> +	mutex_lock(&DIRTY_I(sbi)->seglist_lock);
>>>>>>> +	__set_test_and_inuse(sbi, curseg->segno);
>>>>>>> +	mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
>>>>>>> +out:
>>>>>>> +	mutex_unlock(&curseg->curseg_mutex);
>>>>>>> +}
>>>>>>> +
>>>>>>>   static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
>>>>>>>   {
>>>>>>>   	struct curseg_info *curseg = CURSEG_I(sbi, type);
>>>>>>> @@ -2738,11 +2785,15 @@ void __allocate_new_segment(struct f2fs_sb_info *sbi, int type)
>>>>>>>   	struct curseg_info *curseg = CURSEG_I(sbi, type);
>>>>>>>   	unsigned int old_segno;
>>>>>>>   
>>>>>>> +	if (!curseg->inited)
>>>>>>> +		goto alloc;
>>>>>>> +
>>>>>>>   	if (!curseg->next_blkoff &&
>>>>>>>   		!get_valid_blocks(sbi, curseg->segno, false) &&
>>>>>>>   		!get_ckpt_valid_blocks(sbi, curseg->segno))
>>>>>>>   		return;
>>>>>>>   
>>>>>>> +alloc:
>>>>>>>   	old_segno = curseg->segno;
>>>>>>>   	SIT_I(sbi)->s_ops->allocate_segment(sbi, type, true);
>>>>>>>   	locate_dirty_segment(sbi, old_segno);
>>>>>>> @@ -3126,19 +3177,6 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
>>>>>>>   {
>>>>>>>   	struct sit_info *sit_i = SIT_I(sbi);
>>>>>>>   	struct curseg_info *curseg = CURSEG_I(sbi, type);
>>>>>>> -	bool put_pin_sem = false;
>>>>>>> -
>>>>>>> -	if (type == CURSEG_COLD_DATA) {
>>>>>>> -		/* GC during CURSEG_COLD_DATA_PINNED allocation */
>>>>>>> -		if (down_read_trylock(&sbi->pin_sem)) {
>>>>>>> -			put_pin_sem = true;
>>>>>>> -		} else {
>>>>>>> -			type = CURSEG_WARM_DATA;
>>>>>>> -			curseg = CURSEG_I(sbi, type);
>>>>>>> -		}
>>>>>>> -	} else if (type == CURSEG_COLD_DATA_PINNED) {
>>>>>>> -		type = CURSEG_COLD_DATA;
>>>>>>> -	}
>>>>>>>   
>>>>>>>   	down_read(&SM_I(sbi)->curseg_lock);
>>>>>>>   
>>>>>>> @@ -3204,9 +3242,6 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
>>>>>>>   	mutex_unlock(&curseg->curseg_mutex);
>>>>>>>   
>>>>>>>   	up_read(&SM_I(sbi)->curseg_lock);
>>>>>>> -
>>>>>>> -	if (put_pin_sem)
>>>>>>> -		up_read(&sbi->pin_sem);
>>>>>>>   }
>>>>>>>   
>>>>>>>   static void update_device_state(struct f2fs_io_info *fio)
>>>>>>> @@ -3574,7 +3609,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
>>>>>>>   		blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type -
>>>>>>>   							CURSEG_HOT_DATA]);
>>>>>>>   		if (__exist_node_summaries(sbi))
>>>>>>> -			blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type);
>>>>>>> +			blk_addr = sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type);
>>>>>>>   		else
>>>>>>>   			blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type);
>>>>>>>   	} else {
>>>>>>> @@ -3652,8 +3687,9 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
>>>>>>>   	}
>>>>>>>   
>>>>>>>   	if (__exist_node_summaries(sbi))
>>>>>>> -		f2fs_ra_meta_pages(sbi, sum_blk_addr(sbi, NR_CURSEG_TYPE, type),
>>>>>>> -					NR_CURSEG_TYPE - type, META_CP, true);
>>>>>>> +		f2fs_ra_meta_pages(sbi,
>>>>>>> +				sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type),
>>>>>>> +				NR_CURSEG_PERSIST_TYPE - type, META_CP, true);
>>>>>>>   
>>>>>>>   	for (; type <= CURSEG_COLD_NODE; type++) {
>>>>>>>   		err = read_normal_summaries(sbi, type);
>>>>>>> @@ -4155,14 +4191,14 @@ static int build_curseg(struct f2fs_sb_info *sbi)
>>>>>>>   	struct curseg_info *array;
>>>>>>>   	int i;
>>>>>>>   
>>>>>>> -	array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE, sizeof(*array)),
>>>>>>> -			     GFP_KERNEL);
>>>>>>> +	array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE,
>>>>>>> +					sizeof(*array)), GFP_KERNEL);
>>>>>>>   	if (!array)
>>>>>>>   		return -ENOMEM;
>>>>>>>   
>>>>>>>   	SM_I(sbi)->curseg_array = array;
>>>>>>>   
>>>>>>> -	for (i = 0; i < NR_CURSEG_TYPE; i++) {
>>>>>>> +	for (i = 0; i < NO_CHECK_TYPE; i++) {
>>>>>>>   		mutex_init(&array[i].curseg_mutex);
>>>>>>>   		array[i].sum_blk = f2fs_kzalloc(sbi, PAGE_SIZE, GFP_KERNEL);
>>>>>>>   		if (!array[i].sum_blk)
>>>>>>> @@ -4172,8 +4208,13 @@ static int build_curseg(struct f2fs_sb_info *sbi)
>>>>>>>   				sizeof(struct f2fs_journal), GFP_KERNEL);
>>>>>>>   		if (!array[i].journal)
>>>>>>>   			return -ENOMEM;
>>>>>>> +		if (i < NR_PERSISTENT_LOG)
>>>>>>> +			array[i].seg_type = CURSEG_HOT_DATA + i;
>>>>>>> +		else if (i == CURSEG_COLD_DATA_PINNED)
>>>>>>> +			array[i].seg_type = CURSEG_COLD_DATA;
>>>>>>>   		array[i].segno = NULL_SEGNO;
>>>>>>>   		array[i].next_blkoff = 0;
>>>>>>> +		array[i].inited = false;
>>>>>>>   	}
>>>>>>>   	return restore_curseg_summaries(sbi);
>>>>>>>   }
>>>>>>> @@ -4408,7 +4449,7 @@ static int sanity_check_curseg(struct f2fs_sb_info *sbi)
>>>>>>>   	 * In LFS/SSR curseg, .next_blkoff should point to an unused blkaddr;
>>>>>>>   	 * In LFS curseg, all blkaddr after .next_blkoff should be unused.
>>>>>>>   	 */
>>>>>>> -	for (i = 0; i < NO_CHECK_TYPE; i++) {
>>>>>>> +	for (i = 0; i < NR_PERSISTENT_LOG; i++) {
>>>>>>>   		struct curseg_info *curseg = CURSEG_I(sbi, i);
>>>>>>>   		struct seg_entry *se = get_seg_entry(sbi, curseg->segno);
>>>>>>>   		unsigned int blkofs = curseg->next_blkoff;
>>>>>>> @@ -4637,7 +4678,7 @@ int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi)
>>>>>>>   {
>>>>>>>   	int i, ret;
>>>>>>>   
>>>>>>> -	for (i = 0; i < NO_CHECK_TYPE; i++) {
>>>>>>> +	for (i = 0; i < NR_PERSISTENT_LOG; i++) {
>>>>>>>   		ret = fix_curseg_write_pointer(sbi, i);
>>>>>>>   		if (ret)
>>>>>>>   			return ret;
>>>>>>> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
>>>>>>> index f261e3e6a69b..8ff261550cbb 100644
>>>>>>> --- a/fs/f2fs/segment.h
>>>>>>> +++ b/fs/f2fs/segment.h
>>>>>>> @@ -22,7 +22,7 @@
>>>>>>>   #define GET_R2L_SEGNO(free_i, segno)	((segno) + (free_i)->start_segno)
>>>>>>>   
>>>>>>>   #define IS_DATASEG(t)	((t) <= CURSEG_COLD_DATA)
>>>>>>> -#define IS_NODESEG(t)	((t) >= CURSEG_HOT_NODE)
>>>>>>> +#define IS_NODESEG(t)	((t) >= CURSEG_HOT_NODE && (t) <= CURSEG_COLD_NODE)
>>>>>>>   
>>>>>>>   #define IS_HOT(t)	((t) == CURSEG_HOT_NODE || (t) == CURSEG_HOT_DATA)
>>>>>>>   #define IS_WARM(t)	((t) == CURSEG_WARM_NODE || (t) == CURSEG_WARM_DATA)
>>>>>>> @@ -34,7 +34,8 @@
>>>>>>>   	 ((seg) == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno) ||	\
>>>>>>>   	 ((seg) == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno) ||	\
>>>>>>>   	 ((seg) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno) ||	\
>>>>>>> -	 ((seg) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno))
>>>>>>> +	 ((seg) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno) ||	\
>>>>>>> +	 ((seg) == CURSEG_I(sbi, CURSEG_COLD_DATA_PINNED)->segno))
>>>>>>>   
>>>>>>>   #define IS_CURSEC(sbi, secno)						\
>>>>>>>   	(((secno) == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno /		\
>>>>>>> @@ -48,7 +49,9 @@
>>>>>>>   	 ((secno) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno /		\
>>>>>>>   	  (sbi)->segs_per_sec) ||	\
>>>>>>>   	 ((secno) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno /		\
>>>>>>> -	  (sbi)->segs_per_sec))	\
>>>>>>> +	  (sbi)->segs_per_sec) ||	\
>>>>>>> +	 ((secno) == CURSEG_I(sbi, CURSEG_COLD_DATA_PINNED)->segno /	\
>>>>>>> +	  (sbi)->segs_per_sec))
>>>>>>>   
>>>>>>>   #define MAIN_BLKADDR(sbi)						\
>>>>>>>   	(SM_I(sbi) ? SM_I(sbi)->main_blkaddr : 				\
>>>>>>> @@ -288,10 +291,12 @@ struct curseg_info {
>>>>>>>   	struct rw_semaphore journal_rwsem;	/* protect journal area */
>>>>>>>   	struct f2fs_journal *journal;		/* cached journal info */
>>>>>>>   	unsigned char alloc_type;		/* current allocation type */
>>>>>>> +	unsigned short seg_type;		/* segment type like CURSEG_XXX_TYPE */
>>>>>>>   	unsigned int segno;			/* current segment number */
>>>>>>>   	unsigned short next_blkoff;		/* next block offset to write */
>>>>>>>   	unsigned int zone;			/* current zone number */
>>>>>>>   	unsigned int next_segno;		/* preallocated segment */
>>>>>>> +	bool inited;				/* indicate inmem log is inited */
>>>>>>>   };
>>>>>>>   
>>>>>>>   struct sit_entry_set {
>>>>>>> @@ -305,8 +310,6 @@ struct sit_entry_set {
>>>>>>>    */
>>>>>>>   static inline struct curseg_info *CURSEG_I(struct f2fs_sb_info *sbi, int type)
>>>>>>>   {
>>>>>>> -	if (type == CURSEG_COLD_DATA_PINNED)
>>>>>>> -		type = CURSEG_COLD_DATA;
>>>>>>>   	return (struct curseg_info *)(SM_I(sbi)->curseg_array + type);
>>>>>>>   }
>>>>>>>   
>>>>>>> @@ -438,7 +441,7 @@ static inline void __set_inuse(struct f2fs_sb_info *sbi,
>>>>>>>   }
>>>>>>>   
>>>>>>>   static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
>>>>>>> -		unsigned int segno)
>>>>>>> +		unsigned int segno, bool inmem)
>>>>>>>   {
>>>>>>>   	struct free_segmap_info *free_i = FREE_I(sbi);
>>>>>>>   	unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
>>>>>>> @@ -449,7 +452,7 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
>>>>>>>   	if (test_and_clear_bit(segno, free_i->free_segmap)) {
>>>>>>>   		free_i->free_segments++;
>>>>>>>   
>>>>>>> -		if (IS_CURSEC(sbi, secno))
>>>>>>> +		if (!inmem && IS_CURSEC(sbi, secno))
>>>>>>>   			goto skip_free;
>>>>>>>   		next = find_next_bit(free_i->free_segmap,
>>>>>>>   				start_segno + sbi->segs_per_sec, start_segno);
>>>>>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>>>>>>> index 80cb7cd358f8..0fefa130585f 100644
>>>>>>> --- a/fs/f2fs/super.c
>>>>>>> +++ b/fs/f2fs/super.c
>>>>>>> @@ -575,7 +575,8 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
>>>>>>>   		case Opt_active_logs:
>>>>>>>   			if (args->from && match_int(args, &arg))
>>>>>>>   				return -EINVAL;
>>>>>>> -			if (arg != 2 && arg != 4 && arg != NR_CURSEG_TYPE)
>>>>>>> +			if (arg != 2 && arg != 4 &&
>>>>>>> +				arg != NR_CURSEG_PERSIST_TYPE)
>>>>>>>   				return -EINVAL;
>>>>>>>   			F2FS_OPTION(sbi).active_logs = arg;
>>>>>>>   			break;
>>>>>>> @@ -981,7 +982,7 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
>>>>>>>   	}
>>>>>>>   
>>>>>>>   	/* Not pass down write hints if the number of active logs is lesser
>>>>>>> -	 * than NR_CURSEG_TYPE.
>>>>>>> +	 * than NR_CURSEG_PERSIST_TYPE.
>>>>>>>   	 */
>>>>>>>   	if (F2FS_OPTION(sbi).active_logs != NR_CURSEG_TYPE)
>>>>>>>   		F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF;
>>>>>>> @@ -1614,7 +1615,7 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
>>>>>>>   static void default_options(struct f2fs_sb_info *sbi)
>>>>>>>   {
>>>>>>>   	/* init some FS parameters */
>>>>>>> -	F2FS_OPTION(sbi).active_logs = NR_CURSEG_TYPE;
>>>>>>> +	F2FS_OPTION(sbi).active_logs = NR_CURSEG_PERSIST_TYPE;
>>>>>>>   	F2FS_OPTION(sbi).inline_xattr_size = DEFAULT_INLINE_XATTR_ADDRS;
>>>>>>>   	F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF;
>>>>>>>   	F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_DEFAULT;
>>>>>>> @@ -2946,7 +2947,7 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
>>>>>>>   	cp_payload = __cp_payload(sbi);
>>>>>>>   	if (cp_pack_start_sum < cp_payload + 1 ||
>>>>>>>   		cp_pack_start_sum > blocks_per_seg - 1 -
>>>>>>> -			NR_CURSEG_TYPE) {
>>>>>>> +			NR_CURSEG_PERSIST_TYPE) {
>>>>>>>   		f2fs_err(sbi, "Wrong cp_pack_start_sum: %u",
>>>>>>>   			 cp_pack_start_sum);
>>>>>>>   		return 1;
>>>>>>> -- 
>>>>>>> 2.26.2
>>>>>> .
>>>>>>
>>>> .
>>>>
>> .
>>
> 
> 
> _______________________________________________
> Linux-f2fs-devel mailing list
> Linux-f2fs-devel@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
> .
> 


_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [f2fs-dev] [PATCH RFC 1/5] f2fs: introduce inmem curseg
  2020-07-25  8:42             ` Chao Yu
@ 2020-08-04  1:49               ` Chao Yu
  2020-08-04  2:44                 ` Jaegeuk Kim
  0 siblings, 1 reply; 19+ messages in thread
From: Chao Yu @ 2020-08-04  1:49 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: linux-kernel, linux-f2fs-devel

On 2020/7/25 16:42, Chao Yu wrote:
> On 2020/7/16 9:24, Chao Yu wrote:
>> On 2020/7/16 3:07, Jaegeuk Kim wrote:
>>> On 07/15, Chao Yu wrote:
>>>> On 2020/7/7 11:51, Jaegeuk Kim wrote:
>>>>> On 07/07, Chao Yu wrote:
>>>>>> On 2020/7/7 11:21, Jaegeuk Kim wrote:
>>>>>>> Hi Chao,
>>>>>>>
>>>>>>> Do you have any brief design doc to present the idea?
>>>>>>
>>>>>> Hi Jaegeuk,
>>>>>>
>>>>>> You mean this whole patchset, right?
>>>>>>
>>>>>> I can add a brief design description in patch 0/5.
>>>>>
>>>>> Yeah, it's a bit hard to understand the whole flow.
>>>>
>>>> Jaegeuk,
>>>>
>>>> Do you have time to take a look at this idea summarized in
>>>> [PATCH 0/5]'s cover letter?
>>>
>>> Sorry, I couldn't afford to sitting down to review the design.
>>> Let me give it a try soon.
>>
>> Alright, let me know if you have any question about the idea.
> 
> Ping,
> 
> Jaegeuk, still be too busy...? :P

Ping again...

Any thoughts about left patches in patchset?

> 
> Thanks,
> 
>>
>>>
>>>>
>>>>>
>>>>> Thanks,
>>>>>
>>>>>>
>>>>>>>
>>>>>>> Thanks,
>>>>>>>
>>>>>>> On 06/30, Chao Yu wrote:
>>>>>>>> Previous implementation of aligned pinfile allocation will:
>>>>>>>> - allocate new segment on cold data log no matter whether last used
>>>>>>>> segment is partially used or not, it makes IOs more random;
>>>>>>>> - force concurrent cold data/GCed IO going into warm data area, it
>>>>>>>> can make a bad effect on hot/cold data separation;
>>>>>>>>
>>>>>>>> In this patch, we introduce a new type of log named 'inmem curseg',
>>>>>>>> the differents from normal curseg is:
>>>>>>>> - it reuses existed segment type (CURSEG_XXX_NODE/DATA);
>>>>>>>> - it only exists in memory, its segno, blkofs, summary will not b
>>>>>>>>    persisted into checkpoint area;
>>>>>>>>
>>>>>>>> With this new feature, we can enhance scalability of log, special
>>>>>>>> allocators can be created for purposes:
>>>>>>>> - pure lfs allocator for aligned pinfile allocation or file
>>>>>>>> defragmentation
>>>>>>>> - pure ssr allocator for later feature
>>>>>>>>
>>>>>>>> So that, let's update aligned pinfile allocation to use this new
>>>>>>>> inmem curseg fwk.
>>>>>>>>
>>>>>>>> Signed-off-by: Chao Yu <yuchao0@huawei.com>
>>>>>>>> ---
>>>>>>>>    fs/f2fs/checkpoint.c |   7 ++-
>>>>>>>>    fs/f2fs/debug.c      |   6 ++-
>>>>>>>>    fs/f2fs/f2fs.h       |  12 +++--
>>>>>>>>    fs/f2fs/file.c       |   3 +-
>>>>>>>>    fs/f2fs/gc.c         |   2 +-
>>>>>>>>    fs/f2fs/segment.c    | 107 ++++++++++++++++++++++++++++++-------------
>>>>>>>>    fs/f2fs/segment.h    |  17 ++++---
>>>>>>>>    fs/f2fs/super.c      |   9 ++--
>>>>>>>>    8 files changed, 112 insertions(+), 51 deletions(-)
>>>>>>>>
>>>>>>>> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
>>>>>>>> index 1bb8278a1c4a..644a914af25a 100644
>>>>>>>> --- a/fs/f2fs/checkpoint.c
>>>>>>>> +++ b/fs/f2fs/checkpoint.c
>>>>>>>> @@ -1623,11 +1623,16 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>>>>>>>>    
>>>>>>>>    	f2fs_flush_sit_entries(sbi, cpc);
>>>>>>>>    
>>>>>>>> +	/* save inmem log status */
>>>>>>>> +	f2fs_save_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
>>>>>>>> +
>>>>>>>>    	err = do_checkpoint(sbi, cpc);
>>>>>>>>    	if (err)
>>>>>>>>    		f2fs_release_discard_addrs(sbi);
>>>>>>>>    	else
>>>>>>>>    		f2fs_clear_prefree_segments(sbi, cpc);
>>>>>>>> +
>>>>>>>> +	f2fs_restore_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
>>>>>>>>    stop:
>>>>>>>>    	unblock_operations(sbi);
>>>>>>>>    	stat_inc_cp_count(sbi->stat_info);
>>>>>>>> @@ -1658,7 +1663,7 @@ void f2fs_init_ino_entry_info(struct f2fs_sb_info *sbi)
>>>>>>>>    	}
>>>>>>>>    
>>>>>>>>    	sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS -
>>>>>>>> -			NR_CURSEG_TYPE - __cp_payload(sbi)) *
>>>>>>>> +			NR_CURSEG_PERSIST_TYPE - __cp_payload(sbi)) *
>>>>>>>>    				F2FS_ORPHANS_PER_BLOCK;
>>>>>>>>    }
>>>>>>>>    
>>>>>>>> diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
>>>>>>>> index 4276c0f79beb..41a91aa8c262 100644
>>>>>>>> --- a/fs/f2fs/debug.c
>>>>>>>> +++ b/fs/f2fs/debug.c
>>>>>>>> @@ -164,7 +164,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
>>>>>>>>    		* 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg)
>>>>>>>>    		/ 2;
>>>>>>>>    	si->util_invalid = 50 - si->util_free - si->util_valid;
>>>>>>>> -	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_NODE; i++) {
>>>>>>>> +	for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) {
>>>>>>>>    		struct curseg_info *curseg = CURSEG_I(sbi, i);
>>>>>>>>    		si->curseg[i] = curseg->segno;
>>>>>>>>    		si->cursec[i] = GET_SEC_FROM_SEG(sbi, curseg->segno);
>>>>>>>> @@ -393,6 +393,10 @@ static int stat_show(struct seq_file *s, void *v)
>>>>>>>>    			   si->dirty_seg[CURSEG_COLD_NODE],
>>>>>>>>    			   si->full_seg[CURSEG_COLD_NODE],
>>>>>>>>    			   si->valid_blks[CURSEG_COLD_NODE]);
>>>>>>>> +		seq_printf(s, "  - Pinned file: %8d %8d %8d\n",
>>>>>>>> +			   si->curseg[CURSEG_COLD_DATA_PINNED],
>>>>>>>> +			   si->cursec[CURSEG_COLD_DATA_PINNED],
>>>>>>>> +			   si->curzone[CURSEG_COLD_DATA_PINNED]);
>>>>>>>>    		seq_printf(s, "\n  - Valid: %d\n  - Dirty: %d\n",
>>>>>>>>    			   si->main_area_segs - si->dirty_count -
>>>>>>>>    			   si->prefree_count - si->free_segs,
>>>>>>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>>>>>>> index 7d6c5f8ce16b..f06c77066284 100644
>>>>>>>> --- a/fs/f2fs/f2fs.h
>>>>>>>> +++ b/fs/f2fs/f2fs.h
>>>>>>>> @@ -996,7 +996,9 @@ static inline void set_new_dnode(struct dnode_of_data *dn, struct inode *inode,
>>>>>>>>     */
>>>>>>>>    #define	NR_CURSEG_DATA_TYPE	(3)
>>>>>>>>    #define NR_CURSEG_NODE_TYPE	(3)
>>>>>>>> -#define NR_CURSEG_TYPE	(NR_CURSEG_DATA_TYPE + NR_CURSEG_NODE_TYPE)
>>>>>>>> +#define NR_CURSEG_INMEM_TYPE	(1)
>>>>>>>> +#define NR_CURSEG_PERSIST_TYPE	(NR_CURSEG_DATA_TYPE + NR_CURSEG_NODE_TYPE)
>>>>>>>> +#define NR_CURSEG_TYPE		(NR_CURSEG_INMEM_TYPE + NR_CURSEG_PERSIST_TYPE)
>>>>>>>>    
>>>>>>>>    enum {
>>>>>>>>    	CURSEG_HOT_DATA	= 0,	/* directory entry blocks */
>>>>>>>> @@ -1005,8 +1007,10 @@ enum {
>>>>>>>>    	CURSEG_HOT_NODE,	/* direct node blocks of directory files */
>>>>>>>>    	CURSEG_WARM_NODE,	/* direct node blocks of normal files */
>>>>>>>>    	CURSEG_COLD_NODE,	/* indirect node blocks */
>>>>>>>> -	NO_CHECK_TYPE,
>>>>>>>> -	CURSEG_COLD_DATA_PINNED,/* cold data for pinned file */
>>>>>>>> +	NR_PERSISTENT_LOG,	/* number of persistent log */
>>>>>>>> +	CURSEG_COLD_DATA_PINNED = NR_PERSISTENT_LOG,
>>>>>>>> +				/* pinned file that needs consecutive block address */
>>>>>>>> +	NO_CHECK_TYPE,		/* number of persistent & inmem log */
>>>>>>>>    };
>>>>>>>>    
>>>>>>>>    struct flush_cmd {
>>>>>>>> @@ -3359,6 +3363,8 @@ block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi);
>>>>>>>>    int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable);
>>>>>>>>    void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi);
>>>>>>>>    int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra);
>>>>>>>> +void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi, int type);
>>>>>>>> +void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi, int type);
>>>>>>>>    void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
>>>>>>>>    					unsigned int start, unsigned int end);
>>>>>>>>    void f2fs_allocate_new_segment(struct f2fs_sb_info *sbi, int type);
>>>>>>>> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
>>>>>>>> index c10e82806c2a..8611ade06018 100644
>>>>>>>> --- a/fs/f2fs/file.c
>>>>>>>> +++ b/fs/f2fs/file.c
>>>>>>>> @@ -1656,13 +1656,14 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
>>>>>>>>    		}
>>>>>>>>    
>>>>>>>>    		down_write(&sbi->pin_sem);
>>>>>>>> -		map.m_seg_type = CURSEG_COLD_DATA_PINNED;
>>>>>>>>    
>>>>>>>>    		f2fs_lock_op(sbi);
>>>>>>>>    		f2fs_allocate_new_segment(sbi, CURSEG_COLD_DATA);
>>>>>>>>    		f2fs_unlock_op(sbi);
>>>>>>>>    
>>>>>>>> +		map.m_seg_type = CURSEG_COLD_DATA_PINNED;
>>>>>>>>    		err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_DIO);
>>>>>>>> +
>>>>>>>>    		up_write(&sbi->pin_sem);
>>>>>>>>    
>>>>>>>>    		done += map.m_len;
>>>>>>>> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
>>>>>>>> index 3b718da69910..84807abe4e00 100644
>>>>>>>> --- a/fs/f2fs/gc.c
>>>>>>>> +++ b/fs/f2fs/gc.c
>>>>>>>> @@ -1448,7 +1448,7 @@ static int free_segment_range(struct f2fs_sb_info *sbi,
>>>>>>>>    	mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
>>>>>>>>    
>>>>>>>>    	/* Move out cursegs from the target range */
>>>>>>>> -	for (type = CURSEG_HOT_DATA; type < NR_CURSEG_TYPE; type++)
>>>>>>>> +	for (type = CURSEG_HOT_DATA; type < NR_CURSEG_PERSIST_TYPE; type++)
>>>>>>>>    		f2fs_allocate_segment_for_resize(sbi, type, start, end);
>>>>>>>>    
>>>>>>>>    	/* do GC to move out valid blocks in the range */
>>>>>>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>>>>>>>> index 5924b3965ae4..863ec6f1fb87 100644
>>>>>>>> --- a/fs/f2fs/segment.c
>>>>>>>> +++ b/fs/f2fs/segment.c
>>>>>>>> @@ -1958,7 +1958,7 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
>>>>>>>>    
>>>>>>>>    	mutex_lock(&dirty_i->seglist_lock);
>>>>>>>>    	for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi))
>>>>>>>> -		__set_test_and_free(sbi, segno);
>>>>>>>> +		__set_test_and_free(sbi, segno, false);
>>>>>>>>    	mutex_unlock(&dirty_i->seglist_lock);
>>>>>>>>    }
>>>>>>>>    
>>>>>>>> @@ -2496,6 +2496,7 @@ static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
>>>>>>>>    	struct curseg_info *curseg = CURSEG_I(sbi, type);
>>>>>>>>    	struct summary_footer *sum_footer;
>>>>>>>>    
>>>>>>>> +	curseg->inited = true;
>>>>>>>>    	curseg->segno = curseg->next_segno;
>>>>>>>>    	curseg->zone = GET_ZONE_FROM_SEG(sbi, curseg->segno);
>>>>>>>>    	curseg->next_blkoff = 0;
>>>>>>>> @@ -2503,24 +2504,31 @@ static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
>>>>>>>>    
>>>>>>>>    	sum_footer = &(curseg->sum_blk->footer);
>>>>>>>>    	memset(sum_footer, 0, sizeof(struct summary_footer));
>>>>>>>> -	if (IS_DATASEG(type))
>>>>>>>> +	if (IS_DATASEG(curseg->seg_type))
>>>>>>>>    		SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA);
>>>>>>>> -	if (IS_NODESEG(type))
>>>>>>>> +	if (IS_NODESEG(curseg->seg_type))
>>>>>>>>    		SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE);
>>>>>>>> -	__set_sit_entry_type(sbi, type, curseg->segno, modified);
>>>>>>>> +	__set_sit_entry_type(sbi, curseg->seg_type, curseg->segno, modified);
>>>>>>>>    }
>>>>>>>>    
>>>>>>>>    static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
>>>>>>>>    {
>>>>>>>> +	struct curseg_info *curseg = CURSEG_I(sbi, type);
>>>>>>>> +
>>>>>>>>    	/* if segs_per_sec is large than 1, we need to keep original policy. */
>>>>>>>>    	if (__is_large_section(sbi))
>>>>>>>> -		return CURSEG_I(sbi, type)->segno;
>>>>>>>> +		return curseg->segno;
>>>>>>>> +
>>>>>>>> +	/* inmem log may not locate on any segment after mount */
>>>>>>>> +	if (!curseg->inited)
>>>>>>>> +		return 0;
>>>>>>>>    
>>>>>>>>    	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
>>>>>>>>    		return 0;
>>>>>>>>    
>>>>>>>>    	if (test_opt(sbi, NOHEAP) &&
>>>>>>>> -		(type == CURSEG_HOT_DATA || IS_NODESEG(type)))
>>>>>>>> +		(curseg->seg_type == CURSEG_HOT_DATA ||
>>>>>>>> +		IS_NODESEG(curseg->seg_type)))
>>>>>>>>    		return 0;
>>>>>>>>    
>>>>>>>>    	if (SIT_I(sbi)->last_victim[ALLOC_NEXT])
>>>>>>>> @@ -2530,7 +2538,7 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
>>>>>>>>    	if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE)
>>>>>>>>    		return 0;
>>>>>>>>    
>>>>>>>> -	return CURSEG_I(sbi, type)->segno;
>>>>>>>> +	return curseg->segno;
>>>>>>>>    }
>>>>>>>>    
>>>>>>>>    /*
>>>>>>>> @@ -2540,12 +2548,14 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
>>>>>>>>    static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
>>>>>>>>    {
>>>>>>>>    	struct curseg_info *curseg = CURSEG_I(sbi, type);
>>>>>>>> +	unsigned short seg_type = curseg->seg_type;
>>>>>>>>    	unsigned int segno = curseg->segno;
>>>>>>>>    	int dir = ALLOC_LEFT;
>>>>>>>>    
>>>>>>>> -	write_sum_page(sbi, curseg->sum_blk,
>>>>>>>> +	if (curseg->inited)
>>>>>>>> +		write_sum_page(sbi, curseg->sum_blk,
>>>>>>>>    				GET_SUM_BLOCK(sbi, segno));
>>>>>>>> -	if (type == CURSEG_WARM_DATA || type == CURSEG_COLD_DATA)
>>>>>>>> +	if (seg_type == CURSEG_WARM_DATA || seg_type == CURSEG_COLD_DATA)
>>>>>>>>    		dir = ALLOC_RIGHT;
>>>>>>>>    
>>>>>>>>    	if (test_opt(sbi, NOHEAP))
>>>>>>>> @@ -2622,6 +2632,43 @@ static void change_curseg(struct f2fs_sb_info *sbi, int type)
>>>>>>>>    	f2fs_put_page(sum_page, 1);
>>>>>>>>    }
>>>>>>>>    
>>>>>>>> +void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi, int type)
>>>>>>>> +{
>>>>>>>> +	struct curseg_info *curseg = CURSEG_I(sbi, type);
>>>>>>>> +
>>>>>>>> +	mutex_lock(&curseg->curseg_mutex);
>>>>>>>> +	if (!curseg->inited)
>>>>>>>> +		goto out;
>>>>>>>> +
>>>>>>>> +	if (get_valid_blocks(sbi, curseg->segno, false)) {
>>>>>>>> +		write_sum_page(sbi, curseg->sum_blk,
>>>>>>>> +				GET_SUM_BLOCK(sbi, curseg->segno));
>>>>>>>> +	} else {
>>>>>>>> +		mutex_lock(&DIRTY_I(sbi)->seglist_lock);
>>>>>>>> +		__set_test_and_free(sbi, curseg->segno, true);
>>>>>>>> +		mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
>>>>>>>> +	}
>>>>>>>> +out:
>>>>>>>> +	mutex_unlock(&curseg->curseg_mutex);
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi, int type)
>>>>>>>> +{
>>>>>>>> +	struct curseg_info *curseg = CURSEG_I(sbi, type);
>>>>>>>> +
>>>>>>>> +	mutex_lock(&curseg->curseg_mutex);
>>>>>>>> +	if (!curseg->inited)
>>>>>>>> +		goto out;
>>>>>>>> +	if (get_valid_blocks(sbi, curseg->segno, false))
>>>>>>>> +		goto out;
>>>>>>>> +
>>>>>>>> +	mutex_lock(&DIRTY_I(sbi)->seglist_lock);
>>>>>>>> +	__set_test_and_inuse(sbi, curseg->segno);
>>>>>>>> +	mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
>>>>>>>> +out:
>>>>>>>> +	mutex_unlock(&curseg->curseg_mutex);
>>>>>>>> +}
>>>>>>>> +
>>>>>>>>    static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
>>>>>>>>    {
>>>>>>>>    	struct curseg_info *curseg = CURSEG_I(sbi, type);
>>>>>>>> @@ -2738,11 +2785,15 @@ void __allocate_new_segment(struct f2fs_sb_info *sbi, int type)
>>>>>>>>    	struct curseg_info *curseg = CURSEG_I(sbi, type);
>>>>>>>>    	unsigned int old_segno;
>>>>>>>>    
>>>>>>>> +	if (!curseg->inited)
>>>>>>>> +		goto alloc;
>>>>>>>> +
>>>>>>>>    	if (!curseg->next_blkoff &&
>>>>>>>>    		!get_valid_blocks(sbi, curseg->segno, false) &&
>>>>>>>>    		!get_ckpt_valid_blocks(sbi, curseg->segno))
>>>>>>>>    		return;
>>>>>>>>    
>>>>>>>> +alloc:
>>>>>>>>    	old_segno = curseg->segno;
>>>>>>>>    	SIT_I(sbi)->s_ops->allocate_segment(sbi, type, true);
>>>>>>>>    	locate_dirty_segment(sbi, old_segno);
>>>>>>>> @@ -3126,19 +3177,6 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
>>>>>>>>    {
>>>>>>>>    	struct sit_info *sit_i = SIT_I(sbi);
>>>>>>>>    	struct curseg_info *curseg = CURSEG_I(sbi, type);
>>>>>>>> -	bool put_pin_sem = false;
>>>>>>>> -
>>>>>>>> -	if (type == CURSEG_COLD_DATA) {
>>>>>>>> -		/* GC during CURSEG_COLD_DATA_PINNED allocation */
>>>>>>>> -		if (down_read_trylock(&sbi->pin_sem)) {
>>>>>>>> -			put_pin_sem = true;
>>>>>>>> -		} else {
>>>>>>>> -			type = CURSEG_WARM_DATA;
>>>>>>>> -			curseg = CURSEG_I(sbi, type);
>>>>>>>> -		}
>>>>>>>> -	} else if (type == CURSEG_COLD_DATA_PINNED) {
>>>>>>>> -		type = CURSEG_COLD_DATA;
>>>>>>>> -	}
>>>>>>>>    
>>>>>>>>    	down_read(&SM_I(sbi)->curseg_lock);
>>>>>>>>    
>>>>>>>> @@ -3204,9 +3242,6 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
>>>>>>>>    	mutex_unlock(&curseg->curseg_mutex);
>>>>>>>>    
>>>>>>>>    	up_read(&SM_I(sbi)->curseg_lock);
>>>>>>>> -
>>>>>>>> -	if (put_pin_sem)
>>>>>>>> -		up_read(&sbi->pin_sem);
>>>>>>>>    }
>>>>>>>>    
>>>>>>>>    static void update_device_state(struct f2fs_io_info *fio)
>>>>>>>> @@ -3574,7 +3609,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
>>>>>>>>    		blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type -
>>>>>>>>    							CURSEG_HOT_DATA]);
>>>>>>>>    		if (__exist_node_summaries(sbi))
>>>>>>>> -			blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type);
>>>>>>>> +			blk_addr = sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type);
>>>>>>>>    		else
>>>>>>>>    			blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type);
>>>>>>>>    	} else {
>>>>>>>> @@ -3652,8 +3687,9 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
>>>>>>>>    	}
>>>>>>>>    
>>>>>>>>    	if (__exist_node_summaries(sbi))
>>>>>>>> -		f2fs_ra_meta_pages(sbi, sum_blk_addr(sbi, NR_CURSEG_TYPE, type),
>>>>>>>> -					NR_CURSEG_TYPE - type, META_CP, true);
>>>>>>>> +		f2fs_ra_meta_pages(sbi,
>>>>>>>> +				sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type),
>>>>>>>> +				NR_CURSEG_PERSIST_TYPE - type, META_CP, true);
>>>>>>>>    
>>>>>>>>    	for (; type <= CURSEG_COLD_NODE; type++) {
>>>>>>>>    		err = read_normal_summaries(sbi, type);
>>>>>>>> @@ -4155,14 +4191,14 @@ static int build_curseg(struct f2fs_sb_info *sbi)
>>>>>>>>    	struct curseg_info *array;
>>>>>>>>    	int i;
>>>>>>>>    
>>>>>>>> -	array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE, sizeof(*array)),
>>>>>>>> -			     GFP_KERNEL);
>>>>>>>> +	array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE,
>>>>>>>> +					sizeof(*array)), GFP_KERNEL);
>>>>>>>>    	if (!array)
>>>>>>>>    		return -ENOMEM;
>>>>>>>>    
>>>>>>>>    	SM_I(sbi)->curseg_array = array;
>>>>>>>>    
>>>>>>>> -	for (i = 0; i < NR_CURSEG_TYPE; i++) {
>>>>>>>> +	for (i = 0; i < NO_CHECK_TYPE; i++) {
>>>>>>>>    		mutex_init(&array[i].curseg_mutex);
>>>>>>>>    		array[i].sum_blk = f2fs_kzalloc(sbi, PAGE_SIZE, GFP_KERNEL);
>>>>>>>>    		if (!array[i].sum_blk)
>>>>>>>> @@ -4172,8 +4208,13 @@ static int build_curseg(struct f2fs_sb_info *sbi)
>>>>>>>>    				sizeof(struct f2fs_journal), GFP_KERNEL);
>>>>>>>>    		if (!array[i].journal)
>>>>>>>>    			return -ENOMEM;
>>>>>>>> +		if (i < NR_PERSISTENT_LOG)
>>>>>>>> +			array[i].seg_type = CURSEG_HOT_DATA + i;
>>>>>>>> +		else if (i == CURSEG_COLD_DATA_PINNED)
>>>>>>>> +			array[i].seg_type = CURSEG_COLD_DATA;
>>>>>>>>    		array[i].segno = NULL_SEGNO;
>>>>>>>>    		array[i].next_blkoff = 0;
>>>>>>>> +		array[i].inited = false;
>>>>>>>>    	}
>>>>>>>>    	return restore_curseg_summaries(sbi);
>>>>>>>>    }
>>>>>>>> @@ -4408,7 +4449,7 @@ static int sanity_check_curseg(struct f2fs_sb_info *sbi)
>>>>>>>>    	 * In LFS/SSR curseg, .next_blkoff should point to an unused blkaddr;
>>>>>>>>    	 * In LFS curseg, all blkaddr after .next_blkoff should be unused.
>>>>>>>>    	 */
>>>>>>>> -	for (i = 0; i < NO_CHECK_TYPE; i++) {
>>>>>>>> +	for (i = 0; i < NR_PERSISTENT_LOG; i++) {
>>>>>>>>    		struct curseg_info *curseg = CURSEG_I(sbi, i);
>>>>>>>>    		struct seg_entry *se = get_seg_entry(sbi, curseg->segno);
>>>>>>>>    		unsigned int blkofs = curseg->next_blkoff;
>>>>>>>> @@ -4637,7 +4678,7 @@ int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi)
>>>>>>>>    {
>>>>>>>>    	int i, ret;
>>>>>>>>    
>>>>>>>> -	for (i = 0; i < NO_CHECK_TYPE; i++) {
>>>>>>>> +	for (i = 0; i < NR_PERSISTENT_LOG; i++) {
>>>>>>>>    		ret = fix_curseg_write_pointer(sbi, i);
>>>>>>>>    		if (ret)
>>>>>>>>    			return ret;
>>>>>>>> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
>>>>>>>> index f261e3e6a69b..8ff261550cbb 100644
>>>>>>>> --- a/fs/f2fs/segment.h
>>>>>>>> +++ b/fs/f2fs/segment.h
>>>>>>>> @@ -22,7 +22,7 @@
>>>>>>>>    #define GET_R2L_SEGNO(free_i, segno)	((segno) + (free_i)->start_segno)
>>>>>>>>    
>>>>>>>>    #define IS_DATASEG(t)	((t) <= CURSEG_COLD_DATA)
>>>>>>>> -#define IS_NODESEG(t)	((t) >= CURSEG_HOT_NODE)
>>>>>>>> +#define IS_NODESEG(t)	((t) >= CURSEG_HOT_NODE && (t) <= CURSEG_COLD_NODE)
>>>>>>>>    
>>>>>>>>    #define IS_HOT(t)	((t) == CURSEG_HOT_NODE || (t) == CURSEG_HOT_DATA)
>>>>>>>>    #define IS_WARM(t)	((t) == CURSEG_WARM_NODE || (t) == CURSEG_WARM_DATA)
>>>>>>>> @@ -34,7 +34,8 @@
>>>>>>>>    	 ((seg) == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno) ||	\
>>>>>>>>    	 ((seg) == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno) ||	\
>>>>>>>>    	 ((seg) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno) ||	\
>>>>>>>> -	 ((seg) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno))
>>>>>>>> +	 ((seg) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno) ||	\
>>>>>>>> +	 ((seg) == CURSEG_I(sbi, CURSEG_COLD_DATA_PINNED)->segno))
>>>>>>>>    
>>>>>>>>    #define IS_CURSEC(sbi, secno)						\
>>>>>>>>    	(((secno) == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno /		\
>>>>>>>> @@ -48,7 +49,9 @@
>>>>>>>>    	 ((secno) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno /		\
>>>>>>>>    	  (sbi)->segs_per_sec) ||	\
>>>>>>>>    	 ((secno) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno /		\
>>>>>>>> -	  (sbi)->segs_per_sec))	\
>>>>>>>> +	  (sbi)->segs_per_sec) ||	\
>>>>>>>> +	 ((secno) == CURSEG_I(sbi, CURSEG_COLD_DATA_PINNED)->segno /	\
>>>>>>>> +	  (sbi)->segs_per_sec))
>>>>>>>>    
>>>>>>>>    #define MAIN_BLKADDR(sbi)						\
>>>>>>>>    	(SM_I(sbi) ? SM_I(sbi)->main_blkaddr : 				\
>>>>>>>> @@ -288,10 +291,12 @@ struct curseg_info {
>>>>>>>>    	struct rw_semaphore journal_rwsem;	/* protect journal area */
>>>>>>>>    	struct f2fs_journal *journal;		/* cached journal info */
>>>>>>>>    	unsigned char alloc_type;		/* current allocation type */
>>>>>>>> +	unsigned short seg_type;		/* segment type like CURSEG_XXX_TYPE */
>>>>>>>>    	unsigned int segno;			/* current segment number */
>>>>>>>>    	unsigned short next_blkoff;		/* next block offset to write */
>>>>>>>>    	unsigned int zone;			/* current zone number */
>>>>>>>>    	unsigned int next_segno;		/* preallocated segment */
>>>>>>>> +	bool inited;				/* indicate inmem log is inited */
>>>>>>>>    };
>>>>>>>>    
>>>>>>>>    struct sit_entry_set {
>>>>>>>> @@ -305,8 +310,6 @@ struct sit_entry_set {
>>>>>>>>     */
>>>>>>>>    static inline struct curseg_info *CURSEG_I(struct f2fs_sb_info *sbi, int type)
>>>>>>>>    {
>>>>>>>> -	if (type == CURSEG_COLD_DATA_PINNED)
>>>>>>>> -		type = CURSEG_COLD_DATA;
>>>>>>>>    	return (struct curseg_info *)(SM_I(sbi)->curseg_array + type);
>>>>>>>>    }
>>>>>>>>    
>>>>>>>> @@ -438,7 +441,7 @@ static inline void __set_inuse(struct f2fs_sb_info *sbi,
>>>>>>>>    }
>>>>>>>>    
>>>>>>>>    static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
>>>>>>>> -		unsigned int segno)
>>>>>>>> +		unsigned int segno, bool inmem)
>>>>>>>>    {
>>>>>>>>    	struct free_segmap_info *free_i = FREE_I(sbi);
>>>>>>>>    	unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
>>>>>>>> @@ -449,7 +452,7 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
>>>>>>>>    	if (test_and_clear_bit(segno, free_i->free_segmap)) {
>>>>>>>>    		free_i->free_segments++;
>>>>>>>>    
>>>>>>>> -		if (IS_CURSEC(sbi, secno))
>>>>>>>> +		if (!inmem && IS_CURSEC(sbi, secno))
>>>>>>>>    			goto skip_free;
>>>>>>>>    		next = find_next_bit(free_i->free_segmap,
>>>>>>>>    				start_segno + sbi->segs_per_sec, start_segno);
>>>>>>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>>>>>>>> index 80cb7cd358f8..0fefa130585f 100644
>>>>>>>> --- a/fs/f2fs/super.c
>>>>>>>> +++ b/fs/f2fs/super.c
>>>>>>>> @@ -575,7 +575,8 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
>>>>>>>>    		case Opt_active_logs:
>>>>>>>>    			if (args->from && match_int(args, &arg))
>>>>>>>>    				return -EINVAL;
>>>>>>>> -			if (arg != 2 && arg != 4 && arg != NR_CURSEG_TYPE)
>>>>>>>> +			if (arg != 2 && arg != 4 &&
>>>>>>>> +				arg != NR_CURSEG_PERSIST_TYPE)
>>>>>>>>    				return -EINVAL;
>>>>>>>>    			F2FS_OPTION(sbi).active_logs = arg;
>>>>>>>>    			break;
>>>>>>>> @@ -981,7 +982,7 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
>>>>>>>>    	}
>>>>>>>>    
>>>>>>>>    	/* Not pass down write hints if the number of active logs is lesser
>>>>>>>> -	 * than NR_CURSEG_TYPE.
>>>>>>>> +	 * than NR_CURSEG_PERSIST_TYPE.
>>>>>>>>    	 */
>>>>>>>>    	if (F2FS_OPTION(sbi).active_logs != NR_CURSEG_TYPE)
>>>>>>>>    		F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF;
>>>>>>>> @@ -1614,7 +1615,7 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
>>>>>>>>    static void default_options(struct f2fs_sb_info *sbi)
>>>>>>>>    {
>>>>>>>>    	/* init some FS parameters */
>>>>>>>> -	F2FS_OPTION(sbi).active_logs = NR_CURSEG_TYPE;
>>>>>>>> +	F2FS_OPTION(sbi).active_logs = NR_CURSEG_PERSIST_TYPE;
>>>>>>>>    	F2FS_OPTION(sbi).inline_xattr_size = DEFAULT_INLINE_XATTR_ADDRS;
>>>>>>>>    	F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF;
>>>>>>>>    	F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_DEFAULT;
>>>>>>>> @@ -2946,7 +2947,7 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
>>>>>>>>    	cp_payload = __cp_payload(sbi);
>>>>>>>>    	if (cp_pack_start_sum < cp_payload + 1 ||
>>>>>>>>    		cp_pack_start_sum > blocks_per_seg - 1 -
>>>>>>>> -			NR_CURSEG_TYPE) {
>>>>>>>> +			NR_CURSEG_PERSIST_TYPE) {
>>>>>>>>    		f2fs_err(sbi, "Wrong cp_pack_start_sum: %u",
>>>>>>>>    			 cp_pack_start_sum);
>>>>>>>>    		return 1;
>>>>>>>> -- 
>>>>>>>> 2.26.2
>>>>>>> .
>>>>>>>
>>>>> .
>>>>>
>>> .
>>>
>>
>>
>> _______________________________________________
>> Linux-f2fs-devel mailing list
>> Linux-f2fs-devel@lists.sourceforge.net
>> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
>> .
>>
> 
> 
> _______________________________________________
> Linux-f2fs-devel mailing list
> Linux-f2fs-devel@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
> .
> 


_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [f2fs-dev] [PATCH RFC 1/5] f2fs: introduce inmem curseg
  2020-08-04  1:49               ` Chao Yu
@ 2020-08-04  2:44                 ` Jaegeuk Kim
  2020-08-04  2:53                   ` Chao Yu
  0 siblings, 1 reply; 19+ messages in thread
From: Jaegeuk Kim @ 2020-08-04  2:44 UTC (permalink / raw)
  To: Chao Yu; +Cc: linux-kernel, linux-f2fs-devel

On 08/04, Chao Yu wrote:
> On 2020/7/25 16:42, Chao Yu wrote:
> > On 2020/7/16 9:24, Chao Yu wrote:
> > > On 2020/7/16 3:07, Jaegeuk Kim wrote:
> > > > On 07/15, Chao Yu wrote:
> > > > > On 2020/7/7 11:51, Jaegeuk Kim wrote:
> > > > > > On 07/07, Chao Yu wrote:
> > > > > > > On 2020/7/7 11:21, Jaegeuk Kim wrote:
> > > > > > > > Hi Chao,
> > > > > > > > 
> > > > > > > > Do you have any brief design doc to present the idea?
> > > > > > > 
> > > > > > > Hi Jaegeuk,
> > > > > > > 
> > > > > > > You mean this whole patchset, right?
> > > > > > > 
> > > > > > > I can add a brief design description in patch 0/5.
> > > > > > 
> > > > > > Yeah, it's a bit hard to understand the whole flow.
> > > > > 
> > > > > Jaegeuk,
> > > > > 
> > > > > Do you have time to take a look at this idea summarized in
> > > > > [PATCH 0/5]'s cover letter?
> > > > 
> > > > Sorry, I couldn't afford to sitting down to review the design.
> > > > Let me give it a try soon.
> > > 
> > > Alright, let me know if you have any question about the idea.
> > 
> > Ping,
> > 
> > Jaegeuk, still be too busy...? :P
> 
> Ping again...
> 
> Any thoughts about left patches in patchset?

Ah, I was waiting for your another patch-set.

> 
> > 
> > Thanks,
> > 
> > > 
> > > > 
> > > > > 
> > > > > > 
> > > > > > Thanks,
> > > > > > 
> > > > > > > 
> > > > > > > > 
> > > > > > > > Thanks,
> > > > > > > > 
> > > > > > > > On 06/30, Chao Yu wrote:
> > > > > > > > > Previous implementation of aligned pinfile allocation will:
> > > > > > > > > - allocate new segment on cold data log no matter whether last used
> > > > > > > > > segment is partially used or not, it makes IOs more random;
> > > > > > > > > - force concurrent cold data/GCed IO going into warm data area, it
> > > > > > > > > can make a bad effect on hot/cold data separation;
> > > > > > > > > 
> > > > > > > > > In this patch, we introduce a new type of log named 'inmem curseg',
> > > > > > > > > the differents from normal curseg is:
> > > > > > > > > - it reuses existed segment type (CURSEG_XXX_NODE/DATA);
> > > > > > > > > - it only exists in memory, its segno, blkofs, summary will not b
> > > > > > > > >    persisted into checkpoint area;
> > > > > > > > > 
> > > > > > > > > With this new feature, we can enhance scalability of log, special
> > > > > > > > > allocators can be created for purposes:
> > > > > > > > > - pure lfs allocator for aligned pinfile allocation or file
> > > > > > > > > defragmentation
> > > > > > > > > - pure ssr allocator for later feature
> > > > > > > > > 
> > > > > > > > > So that, let's update aligned pinfile allocation to use this new
> > > > > > > > > inmem curseg fwk.
> > > > > > > > > 
> > > > > > > > > Signed-off-by: Chao Yu <yuchao0@huawei.com>
> > > > > > > > > ---
> > > > > > > > >    fs/f2fs/checkpoint.c |   7 ++-
> > > > > > > > >    fs/f2fs/debug.c      |   6 ++-
> > > > > > > > >    fs/f2fs/f2fs.h       |  12 +++--
> > > > > > > > >    fs/f2fs/file.c       |   3 +-
> > > > > > > > >    fs/f2fs/gc.c         |   2 +-
> > > > > > > > >    fs/f2fs/segment.c    | 107 ++++++++++++++++++++++++++++++-------------
> > > > > > > > >    fs/f2fs/segment.h    |  17 ++++---
> > > > > > > > >    fs/f2fs/super.c      |   9 ++--
> > > > > > > > >    8 files changed, 112 insertions(+), 51 deletions(-)
> > > > > > > > > 
> > > > > > > > > diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> > > > > > > > > index 1bb8278a1c4a..644a914af25a 100644
> > > > > > > > > --- a/fs/f2fs/checkpoint.c
> > > > > > > > > +++ b/fs/f2fs/checkpoint.c
> > > > > > > > > @@ -1623,11 +1623,16 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
> > > > > > > > >    	f2fs_flush_sit_entries(sbi, cpc);
> > > > > > > > > +	/* save inmem log status */
> > > > > > > > > +	f2fs_save_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
> > > > > > > > > +
> > > > > > > > >    	err = do_checkpoint(sbi, cpc);
> > > > > > > > >    	if (err)
> > > > > > > > >    		f2fs_release_discard_addrs(sbi);
> > > > > > > > >    	else
> > > > > > > > >    		f2fs_clear_prefree_segments(sbi, cpc);
> > > > > > > > > +
> > > > > > > > > +	f2fs_restore_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
> > > > > > > > >    stop:
> > > > > > > > >    	unblock_operations(sbi);
> > > > > > > > >    	stat_inc_cp_count(sbi->stat_info);
> > > > > > > > > @@ -1658,7 +1663,7 @@ void f2fs_init_ino_entry_info(struct f2fs_sb_info *sbi)
> > > > > > > > >    	}
> > > > > > > > >    	sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS -
> > > > > > > > > -			NR_CURSEG_TYPE - __cp_payload(sbi)) *
> > > > > > > > > +			NR_CURSEG_PERSIST_TYPE - __cp_payload(sbi)) *
> > > > > > > > >    				F2FS_ORPHANS_PER_BLOCK;
> > > > > > > > >    }
> > > > > > > > > diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
> > > > > > > > > index 4276c0f79beb..41a91aa8c262 100644
> > > > > > > > > --- a/fs/f2fs/debug.c
> > > > > > > > > +++ b/fs/f2fs/debug.c
> > > > > > > > > @@ -164,7 +164,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
> > > > > > > > >    		* 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg)
> > > > > > > > >    		/ 2;
> > > > > > > > >    	si->util_invalid = 50 - si->util_free - si->util_valid;
> > > > > > > > > -	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_NODE; i++) {
> > > > > > > > > +	for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) {
> > > > > > > > >    		struct curseg_info *curseg = CURSEG_I(sbi, i);
> > > > > > > > >    		si->curseg[i] = curseg->segno;
> > > > > > > > >    		si->cursec[i] = GET_SEC_FROM_SEG(sbi, curseg->segno);
> > > > > > > > > @@ -393,6 +393,10 @@ static int stat_show(struct seq_file *s, void *v)
> > > > > > > > >    			   si->dirty_seg[CURSEG_COLD_NODE],
> > > > > > > > >    			   si->full_seg[CURSEG_COLD_NODE],
> > > > > > > > >    			   si->valid_blks[CURSEG_COLD_NODE]);
> > > > > > > > > +		seq_printf(s, "  - Pinned file: %8d %8d %8d\n",
> > > > > > > > > +			   si->curseg[CURSEG_COLD_DATA_PINNED],
> > > > > > > > > +			   si->cursec[CURSEG_COLD_DATA_PINNED],
> > > > > > > > > +			   si->curzone[CURSEG_COLD_DATA_PINNED]);
> > > > > > > > >    		seq_printf(s, "\n  - Valid: %d\n  - Dirty: %d\n",
> > > > > > > > >    			   si->main_area_segs - si->dirty_count -
> > > > > > > > >    			   si->prefree_count - si->free_segs,
> > > > > > > > > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > > > > > > > > index 7d6c5f8ce16b..f06c77066284 100644
> > > > > > > > > --- a/fs/f2fs/f2fs.h
> > > > > > > > > +++ b/fs/f2fs/f2fs.h
> > > > > > > > > @@ -996,7 +996,9 @@ static inline void set_new_dnode(struct dnode_of_data *dn, struct inode *inode,
> > > > > > > > >     */
> > > > > > > > >    #define	NR_CURSEG_DATA_TYPE	(3)
> > > > > > > > >    #define NR_CURSEG_NODE_TYPE	(3)
> > > > > > > > > -#define NR_CURSEG_TYPE	(NR_CURSEG_DATA_TYPE + NR_CURSEG_NODE_TYPE)
> > > > > > > > > +#define NR_CURSEG_INMEM_TYPE	(1)
> > > > > > > > > +#define NR_CURSEG_PERSIST_TYPE	(NR_CURSEG_DATA_TYPE + NR_CURSEG_NODE_TYPE)
> > > > > > > > > +#define NR_CURSEG_TYPE		(NR_CURSEG_INMEM_TYPE + NR_CURSEG_PERSIST_TYPE)
> > > > > > > > >    enum {
> > > > > > > > >    	CURSEG_HOT_DATA	= 0,	/* directory entry blocks */
> > > > > > > > > @@ -1005,8 +1007,10 @@ enum {
> > > > > > > > >    	CURSEG_HOT_NODE,	/* direct node blocks of directory files */
> > > > > > > > >    	CURSEG_WARM_NODE,	/* direct node blocks of normal files */
> > > > > > > > >    	CURSEG_COLD_NODE,	/* indirect node blocks */
> > > > > > > > > -	NO_CHECK_TYPE,
> > > > > > > > > -	CURSEG_COLD_DATA_PINNED,/* cold data for pinned file */
> > > > > > > > > +	NR_PERSISTENT_LOG,	/* number of persistent log */
> > > > > > > > > +	CURSEG_COLD_DATA_PINNED = NR_PERSISTENT_LOG,
> > > > > > > > > +				/* pinned file that needs consecutive block address */
> > > > > > > > > +	NO_CHECK_TYPE,		/* number of persistent & inmem log */
> > > > > > > > >    };
> > > > > > > > >    struct flush_cmd {
> > > > > > > > > @@ -3359,6 +3363,8 @@ block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi);
> > > > > > > > >    int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable);
> > > > > > > > >    void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi);
> > > > > > > > >    int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra);
> > > > > > > > > +void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi, int type);
> > > > > > > > > +void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi, int type);
> > > > > > > > >    void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
> > > > > > > > >    					unsigned int start, unsigned int end);
> > > > > > > > >    void f2fs_allocate_new_segment(struct f2fs_sb_info *sbi, int type);
> > > > > > > > > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> > > > > > > > > index c10e82806c2a..8611ade06018 100644
> > > > > > > > > --- a/fs/f2fs/file.c
> > > > > > > > > +++ b/fs/f2fs/file.c
> > > > > > > > > @@ -1656,13 +1656,14 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
> > > > > > > > >    		}
> > > > > > > > >    		down_write(&sbi->pin_sem);
> > > > > > > > > -		map.m_seg_type = CURSEG_COLD_DATA_PINNED;
> > > > > > > > >    		f2fs_lock_op(sbi);
> > > > > > > > >    		f2fs_allocate_new_segment(sbi, CURSEG_COLD_DATA);
> > > > > > > > >    		f2fs_unlock_op(sbi);
> > > > > > > > > +		map.m_seg_type = CURSEG_COLD_DATA_PINNED;
> > > > > > > > >    		err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_DIO);
> > > > > > > > > +
> > > > > > > > >    		up_write(&sbi->pin_sem);
> > > > > > > > >    		done += map.m_len;
> > > > > > > > > diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> > > > > > > > > index 3b718da69910..84807abe4e00 100644
> > > > > > > > > --- a/fs/f2fs/gc.c
> > > > > > > > > +++ b/fs/f2fs/gc.c
> > > > > > > > > @@ -1448,7 +1448,7 @@ static int free_segment_range(struct f2fs_sb_info *sbi,
> > > > > > > > >    	mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
> > > > > > > > >    	/* Move out cursegs from the target range */
> > > > > > > > > -	for (type = CURSEG_HOT_DATA; type < NR_CURSEG_TYPE; type++)
> > > > > > > > > +	for (type = CURSEG_HOT_DATA; type < NR_CURSEG_PERSIST_TYPE; type++)
> > > > > > > > >    		f2fs_allocate_segment_for_resize(sbi, type, start, end);
> > > > > > > > >    	/* do GC to move out valid blocks in the range */
> > > > > > > > > diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> > > > > > > > > index 5924b3965ae4..863ec6f1fb87 100644
> > > > > > > > > --- a/fs/f2fs/segment.c
> > > > > > > > > +++ b/fs/f2fs/segment.c
> > > > > > > > > @@ -1958,7 +1958,7 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
> > > > > > > > >    	mutex_lock(&dirty_i->seglist_lock);
> > > > > > > > >    	for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi))
> > > > > > > > > -		__set_test_and_free(sbi, segno);
> > > > > > > > > +		__set_test_and_free(sbi, segno, false);
> > > > > > > > >    	mutex_unlock(&dirty_i->seglist_lock);
> > > > > > > > >    }
> > > > > > > > > @@ -2496,6 +2496,7 @@ static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
> > > > > > > > >    	struct curseg_info *curseg = CURSEG_I(sbi, type);
> > > > > > > > >    	struct summary_footer *sum_footer;
> > > > > > > > > +	curseg->inited = true;
> > > > > > > > >    	curseg->segno = curseg->next_segno;
> > > > > > > > >    	curseg->zone = GET_ZONE_FROM_SEG(sbi, curseg->segno);
> > > > > > > > >    	curseg->next_blkoff = 0;
> > > > > > > > > @@ -2503,24 +2504,31 @@ static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
> > > > > > > > >    	sum_footer = &(curseg->sum_blk->footer);
> > > > > > > > >    	memset(sum_footer, 0, sizeof(struct summary_footer));
> > > > > > > > > -	if (IS_DATASEG(type))
> > > > > > > > > +	if (IS_DATASEG(curseg->seg_type))
> > > > > > > > >    		SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA);
> > > > > > > > > -	if (IS_NODESEG(type))
> > > > > > > > > +	if (IS_NODESEG(curseg->seg_type))
> > > > > > > > >    		SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE);
> > > > > > > > > -	__set_sit_entry_type(sbi, type, curseg->segno, modified);
> > > > > > > > > +	__set_sit_entry_type(sbi, curseg->seg_type, curseg->segno, modified);
> > > > > > > > >    }
> > > > > > > > >    static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
> > > > > > > > >    {
> > > > > > > > > +	struct curseg_info *curseg = CURSEG_I(sbi, type);
> > > > > > > > > +
> > > > > > > > >    	/* if segs_per_sec is large than 1, we need to keep original policy. */
> > > > > > > > >    	if (__is_large_section(sbi))
> > > > > > > > > -		return CURSEG_I(sbi, type)->segno;
> > > > > > > > > +		return curseg->segno;
> > > > > > > > > +
> > > > > > > > > +	/* inmem log may not locate on any segment after mount */
> > > > > > > > > +	if (!curseg->inited)
> > > > > > > > > +		return 0;
> > > > > > > > >    	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
> > > > > > > > >    		return 0;
> > > > > > > > >    	if (test_opt(sbi, NOHEAP) &&
> > > > > > > > > -		(type == CURSEG_HOT_DATA || IS_NODESEG(type)))
> > > > > > > > > +		(curseg->seg_type == CURSEG_HOT_DATA ||
> > > > > > > > > +		IS_NODESEG(curseg->seg_type)))
> > > > > > > > >    		return 0;
> > > > > > > > >    	if (SIT_I(sbi)->last_victim[ALLOC_NEXT])
> > > > > > > > > @@ -2530,7 +2538,7 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
> > > > > > > > >    	if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE)
> > > > > > > > >    		return 0;
> > > > > > > > > -	return CURSEG_I(sbi, type)->segno;
> > > > > > > > > +	return curseg->segno;
> > > > > > > > >    }
> > > > > > > > >    /*
> > > > > > > > > @@ -2540,12 +2548,14 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
> > > > > > > > >    static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
> > > > > > > > >    {
> > > > > > > > >    	struct curseg_info *curseg = CURSEG_I(sbi, type);
> > > > > > > > > +	unsigned short seg_type = curseg->seg_type;
> > > > > > > > >    	unsigned int segno = curseg->segno;
> > > > > > > > >    	int dir = ALLOC_LEFT;
> > > > > > > > > -	write_sum_page(sbi, curseg->sum_blk,
> > > > > > > > > +	if (curseg->inited)
> > > > > > > > > +		write_sum_page(sbi, curseg->sum_blk,
> > > > > > > > >    				GET_SUM_BLOCK(sbi, segno));
> > > > > > > > > -	if (type == CURSEG_WARM_DATA || type == CURSEG_COLD_DATA)
> > > > > > > > > +	if (seg_type == CURSEG_WARM_DATA || seg_type == CURSEG_COLD_DATA)
> > > > > > > > >    		dir = ALLOC_RIGHT;
> > > > > > > > >    	if (test_opt(sbi, NOHEAP))
> > > > > > > > > @@ -2622,6 +2632,43 @@ static void change_curseg(struct f2fs_sb_info *sbi, int type)
> > > > > > > > >    	f2fs_put_page(sum_page, 1);
> > > > > > > > >    }
> > > > > > > > > +void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi, int type)
> > > > > > > > > +{
> > > > > > > > > +	struct curseg_info *curseg = CURSEG_I(sbi, type);
> > > > > > > > > +
> > > > > > > > > +	mutex_lock(&curseg->curseg_mutex);
> > > > > > > > > +	if (!curseg->inited)
> > > > > > > > > +		goto out;
> > > > > > > > > +
> > > > > > > > > +	if (get_valid_blocks(sbi, curseg->segno, false)) {
> > > > > > > > > +		write_sum_page(sbi, curseg->sum_blk,
> > > > > > > > > +				GET_SUM_BLOCK(sbi, curseg->segno));
> > > > > > > > > +	} else {
> > > > > > > > > +		mutex_lock(&DIRTY_I(sbi)->seglist_lock);
> > > > > > > > > +		__set_test_and_free(sbi, curseg->segno, true);
> > > > > > > > > +		mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
> > > > > > > > > +	}
> > > > > > > > > +out:
> > > > > > > > > +	mutex_unlock(&curseg->curseg_mutex);
> > > > > > > > > +}
> > > > > > > > > +
> > > > > > > > > +void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi, int type)
> > > > > > > > > +{
> > > > > > > > > +	struct curseg_info *curseg = CURSEG_I(sbi, type);
> > > > > > > > > +
> > > > > > > > > +	mutex_lock(&curseg->curseg_mutex);
> > > > > > > > > +	if (!curseg->inited)
> > > > > > > > > +		goto out;
> > > > > > > > > +	if (get_valid_blocks(sbi, curseg->segno, false))
> > > > > > > > > +		goto out;
> > > > > > > > > +
> > > > > > > > > +	mutex_lock(&DIRTY_I(sbi)->seglist_lock);
> > > > > > > > > +	__set_test_and_inuse(sbi, curseg->segno);
> > > > > > > > > +	mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
> > > > > > > > > +out:
> > > > > > > > > +	mutex_unlock(&curseg->curseg_mutex);
> > > > > > > > > +}
> > > > > > > > > +
> > > > > > > > >    static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
> > > > > > > > >    {
> > > > > > > > >    	struct curseg_info *curseg = CURSEG_I(sbi, type);
> > > > > > > > > @@ -2738,11 +2785,15 @@ void __allocate_new_segment(struct f2fs_sb_info *sbi, int type)
> > > > > > > > >    	struct curseg_info *curseg = CURSEG_I(sbi, type);
> > > > > > > > >    	unsigned int old_segno;
> > > > > > > > > +	if (!curseg->inited)
> > > > > > > > > +		goto alloc;
> > > > > > > > > +
> > > > > > > > >    	if (!curseg->next_blkoff &&
> > > > > > > > >    		!get_valid_blocks(sbi, curseg->segno, false) &&
> > > > > > > > >    		!get_ckpt_valid_blocks(sbi, curseg->segno))
> > > > > > > > >    		return;
> > > > > > > > > +alloc:
> > > > > > > > >    	old_segno = curseg->segno;
> > > > > > > > >    	SIT_I(sbi)->s_ops->allocate_segment(sbi, type, true);
> > > > > > > > >    	locate_dirty_segment(sbi, old_segno);
> > > > > > > > > @@ -3126,19 +3177,6 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
> > > > > > > > >    {
> > > > > > > > >    	struct sit_info *sit_i = SIT_I(sbi);
> > > > > > > > >    	struct curseg_info *curseg = CURSEG_I(sbi, type);
> > > > > > > > > -	bool put_pin_sem = false;
> > > > > > > > > -
> > > > > > > > > -	if (type == CURSEG_COLD_DATA) {
> > > > > > > > > -		/* GC during CURSEG_COLD_DATA_PINNED allocation */
> > > > > > > > > -		if (down_read_trylock(&sbi->pin_sem)) {
> > > > > > > > > -			put_pin_sem = true;
> > > > > > > > > -		} else {
> > > > > > > > > -			type = CURSEG_WARM_DATA;
> > > > > > > > > -			curseg = CURSEG_I(sbi, type);
> > > > > > > > > -		}
> > > > > > > > > -	} else if (type == CURSEG_COLD_DATA_PINNED) {
> > > > > > > > > -		type = CURSEG_COLD_DATA;
> > > > > > > > > -	}
> > > > > > > > >    	down_read(&SM_I(sbi)->curseg_lock);
> > > > > > > > > @@ -3204,9 +3242,6 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
> > > > > > > > >    	mutex_unlock(&curseg->curseg_mutex);
> > > > > > > > >    	up_read(&SM_I(sbi)->curseg_lock);
> > > > > > > > > -
> > > > > > > > > -	if (put_pin_sem)
> > > > > > > > > -		up_read(&sbi->pin_sem);
> > > > > > > > >    }
> > > > > > > > >    static void update_device_state(struct f2fs_io_info *fio)
> > > > > > > > > @@ -3574,7 +3609,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
> > > > > > > > >    		blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type -
> > > > > > > > >    							CURSEG_HOT_DATA]);
> > > > > > > > >    		if (__exist_node_summaries(sbi))
> > > > > > > > > -			blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type);
> > > > > > > > > +			blk_addr = sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type);
> > > > > > > > >    		else
> > > > > > > > >    			blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type);
> > > > > > > > >    	} else {
> > > > > > > > > @@ -3652,8 +3687,9 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
> > > > > > > > >    	}
> > > > > > > > >    	if (__exist_node_summaries(sbi))
> > > > > > > > > -		f2fs_ra_meta_pages(sbi, sum_blk_addr(sbi, NR_CURSEG_TYPE, type),
> > > > > > > > > -					NR_CURSEG_TYPE - type, META_CP, true);
> > > > > > > > > +		f2fs_ra_meta_pages(sbi,
> > > > > > > > > +				sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type),
> > > > > > > > > +				NR_CURSEG_PERSIST_TYPE - type, META_CP, true);
> > > > > > > > >    	for (; type <= CURSEG_COLD_NODE; type++) {
> > > > > > > > >    		err = read_normal_summaries(sbi, type);
> > > > > > > > > @@ -4155,14 +4191,14 @@ static int build_curseg(struct f2fs_sb_info *sbi)
> > > > > > > > >    	struct curseg_info *array;
> > > > > > > > >    	int i;
> > > > > > > > > -	array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE, sizeof(*array)),
> > > > > > > > > -			     GFP_KERNEL);
> > > > > > > > > +	array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE,
> > > > > > > > > +					sizeof(*array)), GFP_KERNEL);
> > > > > > > > >    	if (!array)
> > > > > > > > >    		return -ENOMEM;
> > > > > > > > >    	SM_I(sbi)->curseg_array = array;
> > > > > > > > > -	for (i = 0; i < NR_CURSEG_TYPE; i++) {
> > > > > > > > > +	for (i = 0; i < NO_CHECK_TYPE; i++) {
> > > > > > > > >    		mutex_init(&array[i].curseg_mutex);
> > > > > > > > >    		array[i].sum_blk = f2fs_kzalloc(sbi, PAGE_SIZE, GFP_KERNEL);
> > > > > > > > >    		if (!array[i].sum_blk)
> > > > > > > > > @@ -4172,8 +4208,13 @@ static int build_curseg(struct f2fs_sb_info *sbi)
> > > > > > > > >    				sizeof(struct f2fs_journal), GFP_KERNEL);
> > > > > > > > >    		if (!array[i].journal)
> > > > > > > > >    			return -ENOMEM;
> > > > > > > > > +		if (i < NR_PERSISTENT_LOG)
> > > > > > > > > +			array[i].seg_type = CURSEG_HOT_DATA + i;
> > > > > > > > > +		else if (i == CURSEG_COLD_DATA_PINNED)
> > > > > > > > > +			array[i].seg_type = CURSEG_COLD_DATA;
> > > > > > > > >    		array[i].segno = NULL_SEGNO;
> > > > > > > > >    		array[i].next_blkoff = 0;
> > > > > > > > > +		array[i].inited = false;
> > > > > > > > >    	}
> > > > > > > > >    	return restore_curseg_summaries(sbi);
> > > > > > > > >    }
> > > > > > > > > @@ -4408,7 +4449,7 @@ static int sanity_check_curseg(struct f2fs_sb_info *sbi)
> > > > > > > > >    	 * In LFS/SSR curseg, .next_blkoff should point to an unused blkaddr;
> > > > > > > > >    	 * In LFS curseg, all blkaddr after .next_blkoff should be unused.
> > > > > > > > >    	 */
> > > > > > > > > -	for (i = 0; i < NO_CHECK_TYPE; i++) {
> > > > > > > > > +	for (i = 0; i < NR_PERSISTENT_LOG; i++) {
> > > > > > > > >    		struct curseg_info *curseg = CURSEG_I(sbi, i);
> > > > > > > > >    		struct seg_entry *se = get_seg_entry(sbi, curseg->segno);
> > > > > > > > >    		unsigned int blkofs = curseg->next_blkoff;
> > > > > > > > > @@ -4637,7 +4678,7 @@ int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi)
> > > > > > > > >    {
> > > > > > > > >    	int i, ret;
> > > > > > > > > -	for (i = 0; i < NO_CHECK_TYPE; i++) {
> > > > > > > > > +	for (i = 0; i < NR_PERSISTENT_LOG; i++) {
> > > > > > > > >    		ret = fix_curseg_write_pointer(sbi, i);
> > > > > > > > >    		if (ret)
> > > > > > > > >    			return ret;
> > > > > > > > > diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
> > > > > > > > > index f261e3e6a69b..8ff261550cbb 100644
> > > > > > > > > --- a/fs/f2fs/segment.h
> > > > > > > > > +++ b/fs/f2fs/segment.h
> > > > > > > > > @@ -22,7 +22,7 @@
> > > > > > > > >    #define GET_R2L_SEGNO(free_i, segno)	((segno) + (free_i)->start_segno)
> > > > > > > > >    #define IS_DATASEG(t)	((t) <= CURSEG_COLD_DATA)
> > > > > > > > > -#define IS_NODESEG(t)	((t) >= CURSEG_HOT_NODE)
> > > > > > > > > +#define IS_NODESEG(t)	((t) >= CURSEG_HOT_NODE && (t) <= CURSEG_COLD_NODE)
> > > > > > > > >    #define IS_HOT(t)	((t) == CURSEG_HOT_NODE || (t) == CURSEG_HOT_DATA)
> > > > > > > > >    #define IS_WARM(t)	((t) == CURSEG_WARM_NODE || (t) == CURSEG_WARM_DATA)
> > > > > > > > > @@ -34,7 +34,8 @@
> > > > > > > > >    	 ((seg) == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno) ||	\
> > > > > > > > >    	 ((seg) == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno) ||	\
> > > > > > > > >    	 ((seg) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno) ||	\
> > > > > > > > > -	 ((seg) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno))
> > > > > > > > > +	 ((seg) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno) ||	\
> > > > > > > > > +	 ((seg) == CURSEG_I(sbi, CURSEG_COLD_DATA_PINNED)->segno))
> > > > > > > > >    #define IS_CURSEC(sbi, secno)						\
> > > > > > > > >    	(((secno) == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno /		\
> > > > > > > > > @@ -48,7 +49,9 @@
> > > > > > > > >    	 ((secno) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno /		\
> > > > > > > > >    	  (sbi)->segs_per_sec) ||	\
> > > > > > > > >    	 ((secno) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno /		\
> > > > > > > > > -	  (sbi)->segs_per_sec))	\
> > > > > > > > > +	  (sbi)->segs_per_sec) ||	\
> > > > > > > > > +	 ((secno) == CURSEG_I(sbi, CURSEG_COLD_DATA_PINNED)->segno /	\
> > > > > > > > > +	  (sbi)->segs_per_sec))
> > > > > > > > >    #define MAIN_BLKADDR(sbi)						\
> > > > > > > > >    	(SM_I(sbi) ? SM_I(sbi)->main_blkaddr : 				\
> > > > > > > > > @@ -288,10 +291,12 @@ struct curseg_info {
> > > > > > > > >    	struct rw_semaphore journal_rwsem;	/* protect journal area */
> > > > > > > > >    	struct f2fs_journal *journal;		/* cached journal info */
> > > > > > > > >    	unsigned char alloc_type;		/* current allocation type */
> > > > > > > > > +	unsigned short seg_type;		/* segment type like CURSEG_XXX_TYPE */
> > > > > > > > >    	unsigned int segno;			/* current segment number */
> > > > > > > > >    	unsigned short next_blkoff;		/* next block offset to write */
> > > > > > > > >    	unsigned int zone;			/* current zone number */
> > > > > > > > >    	unsigned int next_segno;		/* preallocated segment */
> > > > > > > > > +	bool inited;				/* indicate inmem log is inited */
> > > > > > > > >    };
> > > > > > > > >    struct sit_entry_set {
> > > > > > > > > @@ -305,8 +310,6 @@ struct sit_entry_set {
> > > > > > > > >     */
> > > > > > > > >    static inline struct curseg_info *CURSEG_I(struct f2fs_sb_info *sbi, int type)
> > > > > > > > >    {
> > > > > > > > > -	if (type == CURSEG_COLD_DATA_PINNED)
> > > > > > > > > -		type = CURSEG_COLD_DATA;
> > > > > > > > >    	return (struct curseg_info *)(SM_I(sbi)->curseg_array + type);
> > > > > > > > >    }
> > > > > > > > > @@ -438,7 +441,7 @@ static inline void __set_inuse(struct f2fs_sb_info *sbi,
> > > > > > > > >    }
> > > > > > > > >    static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
> > > > > > > > > -		unsigned int segno)
> > > > > > > > > +		unsigned int segno, bool inmem)
> > > > > > > > >    {
> > > > > > > > >    	struct free_segmap_info *free_i = FREE_I(sbi);
> > > > > > > > >    	unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
> > > > > > > > > @@ -449,7 +452,7 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
> > > > > > > > >    	if (test_and_clear_bit(segno, free_i->free_segmap)) {
> > > > > > > > >    		free_i->free_segments++;
> > > > > > > > > -		if (IS_CURSEC(sbi, secno))
> > > > > > > > > +		if (!inmem && IS_CURSEC(sbi, secno))
> > > > > > > > >    			goto skip_free;
> > > > > > > > >    		next = find_next_bit(free_i->free_segmap,
> > > > > > > > >    				start_segno + sbi->segs_per_sec, start_segno);
> > > > > > > > > diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> > > > > > > > > index 80cb7cd358f8..0fefa130585f 100644
> > > > > > > > > --- a/fs/f2fs/super.c
> > > > > > > > > +++ b/fs/f2fs/super.c
> > > > > > > > > @@ -575,7 +575,8 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
> > > > > > > > >    		case Opt_active_logs:
> > > > > > > > >    			if (args->from && match_int(args, &arg))
> > > > > > > > >    				return -EINVAL;
> > > > > > > > > -			if (arg != 2 && arg != 4 && arg != NR_CURSEG_TYPE)
> > > > > > > > > +			if (arg != 2 && arg != 4 &&
> > > > > > > > > +				arg != NR_CURSEG_PERSIST_TYPE)
> > > > > > > > >    				return -EINVAL;
> > > > > > > > >    			F2FS_OPTION(sbi).active_logs = arg;
> > > > > > > > >    			break;
> > > > > > > > > @@ -981,7 +982,7 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
> > > > > > > > >    	}
> > > > > > > > >    	/* Not pass down write hints if the number of active logs is lesser
> > > > > > > > > -	 * than NR_CURSEG_TYPE.
> > > > > > > > > +	 * than NR_CURSEG_PERSIST_TYPE.
> > > > > > > > >    	 */
> > > > > > > > >    	if (F2FS_OPTION(sbi).active_logs != NR_CURSEG_TYPE)
> > > > > > > > >    		F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF;
> > > > > > > > > @@ -1614,7 +1615,7 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
> > > > > > > > >    static void default_options(struct f2fs_sb_info *sbi)
> > > > > > > > >    {
> > > > > > > > >    	/* init some FS parameters */
> > > > > > > > > -	F2FS_OPTION(sbi).active_logs = NR_CURSEG_TYPE;
> > > > > > > > > +	F2FS_OPTION(sbi).active_logs = NR_CURSEG_PERSIST_TYPE;
> > > > > > > > >    	F2FS_OPTION(sbi).inline_xattr_size = DEFAULT_INLINE_XATTR_ADDRS;
> > > > > > > > >    	F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF;
> > > > > > > > >    	F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_DEFAULT;
> > > > > > > > > @@ -2946,7 +2947,7 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
> > > > > > > > >    	cp_payload = __cp_payload(sbi);
> > > > > > > > >    	if (cp_pack_start_sum < cp_payload + 1 ||
> > > > > > > > >    		cp_pack_start_sum > blocks_per_seg - 1 -
> > > > > > > > > -			NR_CURSEG_TYPE) {
> > > > > > > > > +			NR_CURSEG_PERSIST_TYPE) {
> > > > > > > > >    		f2fs_err(sbi, "Wrong cp_pack_start_sum: %u",
> > > > > > > > >    			 cp_pack_start_sum);
> > > > > > > > >    		return 1;
> > > > > > > > > -- 
> > > > > > > > > 2.26.2
> > > > > > > > .
> > > > > > > > 
> > > > > > .
> > > > > > 
> > > > .
> > > > 
> > > 
> > > 
> > > _______________________________________________
> > > Linux-f2fs-devel mailing list
> > > Linux-f2fs-devel@lists.sourceforge.net
> > > https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
> > > .
> > > 
> > 
> > 
> > _______________________________________________
> > Linux-f2fs-devel mailing list
> > Linux-f2fs-devel@lists.sourceforge.net
> > https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
> > .
> > 


_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [f2fs-dev] [PATCH RFC 1/5] f2fs: introduce inmem curseg
  2020-08-04  2:44                 ` Jaegeuk Kim
@ 2020-08-04  2:53                   ` Chao Yu
  2020-08-04  3:56                     ` Jaegeuk Kim
  0 siblings, 1 reply; 19+ messages in thread
From: Chao Yu @ 2020-08-04  2:53 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: linux-kernel, linux-f2fs-devel

On 2020/8/4 10:44, Jaegeuk Kim wrote:
> On 08/04, Chao Yu wrote:
>> On 2020/7/25 16:42, Chao Yu wrote:
>>> On 2020/7/16 9:24, Chao Yu wrote:
>>>> On 2020/7/16 3:07, Jaegeuk Kim wrote:
>>>>> On 07/15, Chao Yu wrote:
>>>>>> On 2020/7/7 11:51, Jaegeuk Kim wrote:
>>>>>>> On 07/07, Chao Yu wrote:
>>>>>>>> On 2020/7/7 11:21, Jaegeuk Kim wrote:
>>>>>>>>> Hi Chao,
>>>>>>>>>
>>>>>>>>> Do you have any brief design doc to present the idea?
>>>>>>>>
>>>>>>>> Hi Jaegeuk,
>>>>>>>>
>>>>>>>> You mean this whole patchset, right?
>>>>>>>>
>>>>>>>> I can add a brief design description in patch 0/5.
>>>>>>>
>>>>>>> Yeah, it's a bit hard to understand the whole flow.
>>>>>>
>>>>>> Jaegeuk,
>>>>>>
>>>>>> Do you have time to take a look at this idea summarized in
>>>>>> [PATCH 0/5]'s cover letter?
>>>>>
>>>>> Sorry, I couldn't afford to sitting down to review the design.
>>>>> Let me give it a try soon.
>>>>
>>>> Alright, let me know if you have any question about the idea.
>>>
>>> Ping,
>>>
>>> Jaegeuk, still be too busy...? :P
>>
>> Ping again...
>>
>> Any thoughts about left patches in patchset?
> 
> Ah, I was waiting for your another patch-set.

Oops, I thought that you have not finished reviewing all patches...

Anyway, let me send v2 w/ changes mentioned by you.

BTW, we can discuss about how to enable such feature, options can be:
- enable when sb feature F2FS_FEATURE_ATGC was set
- enable via mount option
- enable via sysfs

> 
>>
>>>
>>> Thanks,
>>>
>>>>
>>>>>
>>>>>>
>>>>>>>
>>>>>>> Thanks,
>>>>>>>
>>>>>>>>
>>>>>>>>>
>>>>>>>>> Thanks,
>>>>>>>>>
>>>>>>>>> On 06/30, Chao Yu wrote:
>>>>>>>>>> Previous implementation of aligned pinfile allocation will:
>>>>>>>>>> - allocate new segment on cold data log no matter whether last used
>>>>>>>>>> segment is partially used or not, it makes IOs more random;
>>>>>>>>>> - force concurrent cold data/GCed IO going into warm data area, it
>>>>>>>>>> can make a bad effect on hot/cold data separation;
>>>>>>>>>>
>>>>>>>>>> In this patch, we introduce a new type of log named 'inmem curseg',
>>>>>>>>>> the differents from normal curseg is:
>>>>>>>>>> - it reuses existed segment type (CURSEG_XXX_NODE/DATA);
>>>>>>>>>> - it only exists in memory, its segno, blkofs, summary will not b
>>>>>>>>>>     persisted into checkpoint area;
>>>>>>>>>>
>>>>>>>>>> With this new feature, we can enhance scalability of log, special
>>>>>>>>>> allocators can be created for purposes:
>>>>>>>>>> - pure lfs allocator for aligned pinfile allocation or file
>>>>>>>>>> defragmentation
>>>>>>>>>> - pure ssr allocator for later feature
>>>>>>>>>>
>>>>>>>>>> So that, let's update aligned pinfile allocation to use this new
>>>>>>>>>> inmem curseg fwk.
>>>>>>>>>>
>>>>>>>>>> Signed-off-by: Chao Yu <yuchao0@huawei.com>
>>>>>>>>>> ---
>>>>>>>>>>     fs/f2fs/checkpoint.c |   7 ++-
>>>>>>>>>>     fs/f2fs/debug.c      |   6 ++-
>>>>>>>>>>     fs/f2fs/f2fs.h       |  12 +++--
>>>>>>>>>>     fs/f2fs/file.c       |   3 +-
>>>>>>>>>>     fs/f2fs/gc.c         |   2 +-
>>>>>>>>>>     fs/f2fs/segment.c    | 107 ++++++++++++++++++++++++++++++-------------
>>>>>>>>>>     fs/f2fs/segment.h    |  17 ++++---
>>>>>>>>>>     fs/f2fs/super.c      |   9 ++--
>>>>>>>>>>     8 files changed, 112 insertions(+), 51 deletions(-)
>>>>>>>>>>
>>>>>>>>>> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
>>>>>>>>>> index 1bb8278a1c4a..644a914af25a 100644
>>>>>>>>>> --- a/fs/f2fs/checkpoint.c
>>>>>>>>>> +++ b/fs/f2fs/checkpoint.c
>>>>>>>>>> @@ -1623,11 +1623,16 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>>>>>>>>>>     	f2fs_flush_sit_entries(sbi, cpc);
>>>>>>>>>> +	/* save inmem log status */
>>>>>>>>>> +	f2fs_save_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
>>>>>>>>>> +
>>>>>>>>>>     	err = do_checkpoint(sbi, cpc);
>>>>>>>>>>     	if (err)
>>>>>>>>>>     		f2fs_release_discard_addrs(sbi);
>>>>>>>>>>     	else
>>>>>>>>>>     		f2fs_clear_prefree_segments(sbi, cpc);
>>>>>>>>>> +
>>>>>>>>>> +	f2fs_restore_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
>>>>>>>>>>     stop:
>>>>>>>>>>     	unblock_operations(sbi);
>>>>>>>>>>     	stat_inc_cp_count(sbi->stat_info);
>>>>>>>>>> @@ -1658,7 +1663,7 @@ void f2fs_init_ino_entry_info(struct f2fs_sb_info *sbi)
>>>>>>>>>>     	}
>>>>>>>>>>     	sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS -
>>>>>>>>>> -			NR_CURSEG_TYPE - __cp_payload(sbi)) *
>>>>>>>>>> +			NR_CURSEG_PERSIST_TYPE - __cp_payload(sbi)) *
>>>>>>>>>>     				F2FS_ORPHANS_PER_BLOCK;
>>>>>>>>>>     }
>>>>>>>>>> diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
>>>>>>>>>> index 4276c0f79beb..41a91aa8c262 100644
>>>>>>>>>> --- a/fs/f2fs/debug.c
>>>>>>>>>> +++ b/fs/f2fs/debug.c
>>>>>>>>>> @@ -164,7 +164,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
>>>>>>>>>>     		* 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg)
>>>>>>>>>>     		/ 2;
>>>>>>>>>>     	si->util_invalid = 50 - si->util_free - si->util_valid;
>>>>>>>>>> -	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_NODE; i++) {
>>>>>>>>>> +	for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) {
>>>>>>>>>>     		struct curseg_info *curseg = CURSEG_I(sbi, i);
>>>>>>>>>>     		si->curseg[i] = curseg->segno;
>>>>>>>>>>     		si->cursec[i] = GET_SEC_FROM_SEG(sbi, curseg->segno);
>>>>>>>>>> @@ -393,6 +393,10 @@ static int stat_show(struct seq_file *s, void *v)
>>>>>>>>>>     			   si->dirty_seg[CURSEG_COLD_NODE],
>>>>>>>>>>     			   si->full_seg[CURSEG_COLD_NODE],
>>>>>>>>>>     			   si->valid_blks[CURSEG_COLD_NODE]);
>>>>>>>>>> +		seq_printf(s, "  - Pinned file: %8d %8d %8d\n",
>>>>>>>>>> +			   si->curseg[CURSEG_COLD_DATA_PINNED],
>>>>>>>>>> +			   si->cursec[CURSEG_COLD_DATA_PINNED],
>>>>>>>>>> +			   si->curzone[CURSEG_COLD_DATA_PINNED]);
>>>>>>>>>>     		seq_printf(s, "\n  - Valid: %d\n  - Dirty: %d\n",
>>>>>>>>>>     			   si->main_area_segs - si->dirty_count -
>>>>>>>>>>     			   si->prefree_count - si->free_segs,
>>>>>>>>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>>>>>>>>> index 7d6c5f8ce16b..f06c77066284 100644
>>>>>>>>>> --- a/fs/f2fs/f2fs.h
>>>>>>>>>> +++ b/fs/f2fs/f2fs.h
>>>>>>>>>> @@ -996,7 +996,9 @@ static inline void set_new_dnode(struct dnode_of_data *dn, struct inode *inode,
>>>>>>>>>>      */
>>>>>>>>>>     #define	NR_CURSEG_DATA_TYPE	(3)
>>>>>>>>>>     #define NR_CURSEG_NODE_TYPE	(3)
>>>>>>>>>> -#define NR_CURSEG_TYPE	(NR_CURSEG_DATA_TYPE + NR_CURSEG_NODE_TYPE)
>>>>>>>>>> +#define NR_CURSEG_INMEM_TYPE	(1)
>>>>>>>>>> +#define NR_CURSEG_PERSIST_TYPE	(NR_CURSEG_DATA_TYPE + NR_CURSEG_NODE_TYPE)
>>>>>>>>>> +#define NR_CURSEG_TYPE		(NR_CURSEG_INMEM_TYPE + NR_CURSEG_PERSIST_TYPE)
>>>>>>>>>>     enum {
>>>>>>>>>>     	CURSEG_HOT_DATA	= 0,	/* directory entry blocks */
>>>>>>>>>> @@ -1005,8 +1007,10 @@ enum {
>>>>>>>>>>     	CURSEG_HOT_NODE,	/* direct node blocks of directory files */
>>>>>>>>>>     	CURSEG_WARM_NODE,	/* direct node blocks of normal files */
>>>>>>>>>>     	CURSEG_COLD_NODE,	/* indirect node blocks */
>>>>>>>>>> -	NO_CHECK_TYPE,
>>>>>>>>>> -	CURSEG_COLD_DATA_PINNED,/* cold data for pinned file */
>>>>>>>>>> +	NR_PERSISTENT_LOG,	/* number of persistent log */
>>>>>>>>>> +	CURSEG_COLD_DATA_PINNED = NR_PERSISTENT_LOG,
>>>>>>>>>> +				/* pinned file that needs consecutive block address */
>>>>>>>>>> +	NO_CHECK_TYPE,		/* number of persistent & inmem log */
>>>>>>>>>>     };
>>>>>>>>>>     struct flush_cmd {
>>>>>>>>>> @@ -3359,6 +3363,8 @@ block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi);
>>>>>>>>>>     int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable);
>>>>>>>>>>     void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi);
>>>>>>>>>>     int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra);
>>>>>>>>>> +void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi, int type);
>>>>>>>>>> +void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi, int type);
>>>>>>>>>>     void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
>>>>>>>>>>     					unsigned int start, unsigned int end);
>>>>>>>>>>     void f2fs_allocate_new_segment(struct f2fs_sb_info *sbi, int type);
>>>>>>>>>> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
>>>>>>>>>> index c10e82806c2a..8611ade06018 100644
>>>>>>>>>> --- a/fs/f2fs/file.c
>>>>>>>>>> +++ b/fs/f2fs/file.c
>>>>>>>>>> @@ -1656,13 +1656,14 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
>>>>>>>>>>     		}
>>>>>>>>>>     		down_write(&sbi->pin_sem);
>>>>>>>>>> -		map.m_seg_type = CURSEG_COLD_DATA_PINNED;
>>>>>>>>>>     		f2fs_lock_op(sbi);
>>>>>>>>>>     		f2fs_allocate_new_segment(sbi, CURSEG_COLD_DATA);
>>>>>>>>>>     		f2fs_unlock_op(sbi);
>>>>>>>>>> +		map.m_seg_type = CURSEG_COLD_DATA_PINNED;
>>>>>>>>>>     		err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_DIO);
>>>>>>>>>> +
>>>>>>>>>>     		up_write(&sbi->pin_sem);
>>>>>>>>>>     		done += map.m_len;
>>>>>>>>>> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
>>>>>>>>>> index 3b718da69910..84807abe4e00 100644
>>>>>>>>>> --- a/fs/f2fs/gc.c
>>>>>>>>>> +++ b/fs/f2fs/gc.c
>>>>>>>>>> @@ -1448,7 +1448,7 @@ static int free_segment_range(struct f2fs_sb_info *sbi,
>>>>>>>>>>     	mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
>>>>>>>>>>     	/* Move out cursegs from the target range */
>>>>>>>>>> -	for (type = CURSEG_HOT_DATA; type < NR_CURSEG_TYPE; type++)
>>>>>>>>>> +	for (type = CURSEG_HOT_DATA; type < NR_CURSEG_PERSIST_TYPE; type++)
>>>>>>>>>>     		f2fs_allocate_segment_for_resize(sbi, type, start, end);
>>>>>>>>>>     	/* do GC to move out valid blocks in the range */
>>>>>>>>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>>>>>>>>>> index 5924b3965ae4..863ec6f1fb87 100644
>>>>>>>>>> --- a/fs/f2fs/segment.c
>>>>>>>>>> +++ b/fs/f2fs/segment.c
>>>>>>>>>> @@ -1958,7 +1958,7 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
>>>>>>>>>>     	mutex_lock(&dirty_i->seglist_lock);
>>>>>>>>>>     	for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi))
>>>>>>>>>> -		__set_test_and_free(sbi, segno);
>>>>>>>>>> +		__set_test_and_free(sbi, segno, false);
>>>>>>>>>>     	mutex_unlock(&dirty_i->seglist_lock);
>>>>>>>>>>     }
>>>>>>>>>> @@ -2496,6 +2496,7 @@ static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
>>>>>>>>>>     	struct curseg_info *curseg = CURSEG_I(sbi, type);
>>>>>>>>>>     	struct summary_footer *sum_footer;
>>>>>>>>>> +	curseg->inited = true;
>>>>>>>>>>     	curseg->segno = curseg->next_segno;
>>>>>>>>>>     	curseg->zone = GET_ZONE_FROM_SEG(sbi, curseg->segno);
>>>>>>>>>>     	curseg->next_blkoff = 0;
>>>>>>>>>> @@ -2503,24 +2504,31 @@ static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
>>>>>>>>>>     	sum_footer = &(curseg->sum_blk->footer);
>>>>>>>>>>     	memset(sum_footer, 0, sizeof(struct summary_footer));
>>>>>>>>>> -	if (IS_DATASEG(type))
>>>>>>>>>> +	if (IS_DATASEG(curseg->seg_type))
>>>>>>>>>>     		SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA);
>>>>>>>>>> -	if (IS_NODESEG(type))
>>>>>>>>>> +	if (IS_NODESEG(curseg->seg_type))
>>>>>>>>>>     		SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE);
>>>>>>>>>> -	__set_sit_entry_type(sbi, type, curseg->segno, modified);
>>>>>>>>>> +	__set_sit_entry_type(sbi, curseg->seg_type, curseg->segno, modified);
>>>>>>>>>>     }
>>>>>>>>>>     static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
>>>>>>>>>>     {
>>>>>>>>>> +	struct curseg_info *curseg = CURSEG_I(sbi, type);
>>>>>>>>>> +
>>>>>>>>>>     	/* if segs_per_sec is large than 1, we need to keep original policy. */
>>>>>>>>>>     	if (__is_large_section(sbi))
>>>>>>>>>> -		return CURSEG_I(sbi, type)->segno;
>>>>>>>>>> +		return curseg->segno;
>>>>>>>>>> +
>>>>>>>>>> +	/* inmem log may not locate on any segment after mount */
>>>>>>>>>> +	if (!curseg->inited)
>>>>>>>>>> +		return 0;
>>>>>>>>>>     	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
>>>>>>>>>>     		return 0;
>>>>>>>>>>     	if (test_opt(sbi, NOHEAP) &&
>>>>>>>>>> -		(type == CURSEG_HOT_DATA || IS_NODESEG(type)))
>>>>>>>>>> +		(curseg->seg_type == CURSEG_HOT_DATA ||
>>>>>>>>>> +		IS_NODESEG(curseg->seg_type)))
>>>>>>>>>>     		return 0;
>>>>>>>>>>     	if (SIT_I(sbi)->last_victim[ALLOC_NEXT])
>>>>>>>>>> @@ -2530,7 +2538,7 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
>>>>>>>>>>     	if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE)
>>>>>>>>>>     		return 0;
>>>>>>>>>> -	return CURSEG_I(sbi, type)->segno;
>>>>>>>>>> +	return curseg->segno;
>>>>>>>>>>     }
>>>>>>>>>>     /*
>>>>>>>>>> @@ -2540,12 +2548,14 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
>>>>>>>>>>     static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
>>>>>>>>>>     {
>>>>>>>>>>     	struct curseg_info *curseg = CURSEG_I(sbi, type);
>>>>>>>>>> +	unsigned short seg_type = curseg->seg_type;
>>>>>>>>>>     	unsigned int segno = curseg->segno;
>>>>>>>>>>     	int dir = ALLOC_LEFT;
>>>>>>>>>> -	write_sum_page(sbi, curseg->sum_blk,
>>>>>>>>>> +	if (curseg->inited)
>>>>>>>>>> +		write_sum_page(sbi, curseg->sum_blk,
>>>>>>>>>>     				GET_SUM_BLOCK(sbi, segno));
>>>>>>>>>> -	if (type == CURSEG_WARM_DATA || type == CURSEG_COLD_DATA)
>>>>>>>>>> +	if (seg_type == CURSEG_WARM_DATA || seg_type == CURSEG_COLD_DATA)
>>>>>>>>>>     		dir = ALLOC_RIGHT;
>>>>>>>>>>     	if (test_opt(sbi, NOHEAP))
>>>>>>>>>> @@ -2622,6 +2632,43 @@ static void change_curseg(struct f2fs_sb_info *sbi, int type)
>>>>>>>>>>     	f2fs_put_page(sum_page, 1);
>>>>>>>>>>     }
>>>>>>>>>> +void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi, int type)
>>>>>>>>>> +{
>>>>>>>>>> +	struct curseg_info *curseg = CURSEG_I(sbi, type);
>>>>>>>>>> +
>>>>>>>>>> +	mutex_lock(&curseg->curseg_mutex);
>>>>>>>>>> +	if (!curseg->inited)
>>>>>>>>>> +		goto out;
>>>>>>>>>> +
>>>>>>>>>> +	if (get_valid_blocks(sbi, curseg->segno, false)) {
>>>>>>>>>> +		write_sum_page(sbi, curseg->sum_blk,
>>>>>>>>>> +				GET_SUM_BLOCK(sbi, curseg->segno));
>>>>>>>>>> +	} else {
>>>>>>>>>> +		mutex_lock(&DIRTY_I(sbi)->seglist_lock);
>>>>>>>>>> +		__set_test_and_free(sbi, curseg->segno, true);
>>>>>>>>>> +		mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
>>>>>>>>>> +	}
>>>>>>>>>> +out:
>>>>>>>>>> +	mutex_unlock(&curseg->curseg_mutex);
>>>>>>>>>> +}
>>>>>>>>>> +
>>>>>>>>>> +void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi, int type)
>>>>>>>>>> +{
>>>>>>>>>> +	struct curseg_info *curseg = CURSEG_I(sbi, type);
>>>>>>>>>> +
>>>>>>>>>> +	mutex_lock(&curseg->curseg_mutex);
>>>>>>>>>> +	if (!curseg->inited)
>>>>>>>>>> +		goto out;
>>>>>>>>>> +	if (get_valid_blocks(sbi, curseg->segno, false))
>>>>>>>>>> +		goto out;
>>>>>>>>>> +
>>>>>>>>>> +	mutex_lock(&DIRTY_I(sbi)->seglist_lock);
>>>>>>>>>> +	__set_test_and_inuse(sbi, curseg->segno);
>>>>>>>>>> +	mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
>>>>>>>>>> +out:
>>>>>>>>>> +	mutex_unlock(&curseg->curseg_mutex);
>>>>>>>>>> +}
>>>>>>>>>> +
>>>>>>>>>>     static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
>>>>>>>>>>     {
>>>>>>>>>>     	struct curseg_info *curseg = CURSEG_I(sbi, type);
>>>>>>>>>> @@ -2738,11 +2785,15 @@ void __allocate_new_segment(struct f2fs_sb_info *sbi, int type)
>>>>>>>>>>     	struct curseg_info *curseg = CURSEG_I(sbi, type);
>>>>>>>>>>     	unsigned int old_segno;
>>>>>>>>>> +	if (!curseg->inited)
>>>>>>>>>> +		goto alloc;
>>>>>>>>>> +
>>>>>>>>>>     	if (!curseg->next_blkoff &&
>>>>>>>>>>     		!get_valid_blocks(sbi, curseg->segno, false) &&
>>>>>>>>>>     		!get_ckpt_valid_blocks(sbi, curseg->segno))
>>>>>>>>>>     		return;
>>>>>>>>>> +alloc:
>>>>>>>>>>     	old_segno = curseg->segno;
>>>>>>>>>>     	SIT_I(sbi)->s_ops->allocate_segment(sbi, type, true);
>>>>>>>>>>     	locate_dirty_segment(sbi, old_segno);
>>>>>>>>>> @@ -3126,19 +3177,6 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
>>>>>>>>>>     {
>>>>>>>>>>     	struct sit_info *sit_i = SIT_I(sbi);
>>>>>>>>>>     	struct curseg_info *curseg = CURSEG_I(sbi, type);
>>>>>>>>>> -	bool put_pin_sem = false;
>>>>>>>>>> -
>>>>>>>>>> -	if (type == CURSEG_COLD_DATA) {
>>>>>>>>>> -		/* GC during CURSEG_COLD_DATA_PINNED allocation */
>>>>>>>>>> -		if (down_read_trylock(&sbi->pin_sem)) {
>>>>>>>>>> -			put_pin_sem = true;
>>>>>>>>>> -		} else {
>>>>>>>>>> -			type = CURSEG_WARM_DATA;
>>>>>>>>>> -			curseg = CURSEG_I(sbi, type);
>>>>>>>>>> -		}
>>>>>>>>>> -	} else if (type == CURSEG_COLD_DATA_PINNED) {
>>>>>>>>>> -		type = CURSEG_COLD_DATA;
>>>>>>>>>> -	}
>>>>>>>>>>     	down_read(&SM_I(sbi)->curseg_lock);
>>>>>>>>>> @@ -3204,9 +3242,6 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
>>>>>>>>>>     	mutex_unlock(&curseg->curseg_mutex);
>>>>>>>>>>     	up_read(&SM_I(sbi)->curseg_lock);
>>>>>>>>>> -
>>>>>>>>>> -	if (put_pin_sem)
>>>>>>>>>> -		up_read(&sbi->pin_sem);
>>>>>>>>>>     }
>>>>>>>>>>     static void update_device_state(struct f2fs_io_info *fio)
>>>>>>>>>> @@ -3574,7 +3609,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
>>>>>>>>>>     		blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type -
>>>>>>>>>>     							CURSEG_HOT_DATA]);
>>>>>>>>>>     		if (__exist_node_summaries(sbi))
>>>>>>>>>> -			blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type);
>>>>>>>>>> +			blk_addr = sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type);
>>>>>>>>>>     		else
>>>>>>>>>>     			blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type);
>>>>>>>>>>     	} else {
>>>>>>>>>> @@ -3652,8 +3687,9 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
>>>>>>>>>>     	}
>>>>>>>>>>     	if (__exist_node_summaries(sbi))
>>>>>>>>>> -		f2fs_ra_meta_pages(sbi, sum_blk_addr(sbi, NR_CURSEG_TYPE, type),
>>>>>>>>>> -					NR_CURSEG_TYPE - type, META_CP, true);
>>>>>>>>>> +		f2fs_ra_meta_pages(sbi,
>>>>>>>>>> +				sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type),
>>>>>>>>>> +				NR_CURSEG_PERSIST_TYPE - type, META_CP, true);
>>>>>>>>>>     	for (; type <= CURSEG_COLD_NODE; type++) {
>>>>>>>>>>     		err = read_normal_summaries(sbi, type);
>>>>>>>>>> @@ -4155,14 +4191,14 @@ static int build_curseg(struct f2fs_sb_info *sbi)
>>>>>>>>>>     	struct curseg_info *array;
>>>>>>>>>>     	int i;
>>>>>>>>>> -	array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE, sizeof(*array)),
>>>>>>>>>> -			     GFP_KERNEL);
>>>>>>>>>> +	array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE,
>>>>>>>>>> +					sizeof(*array)), GFP_KERNEL);
>>>>>>>>>>     	if (!array)
>>>>>>>>>>     		return -ENOMEM;
>>>>>>>>>>     	SM_I(sbi)->curseg_array = array;
>>>>>>>>>> -	for (i = 0; i < NR_CURSEG_TYPE; i++) {
>>>>>>>>>> +	for (i = 0; i < NO_CHECK_TYPE; i++) {
>>>>>>>>>>     		mutex_init(&array[i].curseg_mutex);
>>>>>>>>>>     		array[i].sum_blk = f2fs_kzalloc(sbi, PAGE_SIZE, GFP_KERNEL);
>>>>>>>>>>     		if (!array[i].sum_blk)
>>>>>>>>>> @@ -4172,8 +4208,13 @@ static int build_curseg(struct f2fs_sb_info *sbi)
>>>>>>>>>>     				sizeof(struct f2fs_journal), GFP_KERNEL);
>>>>>>>>>>     		if (!array[i].journal)
>>>>>>>>>>     			return -ENOMEM;
>>>>>>>>>> +		if (i < NR_PERSISTENT_LOG)
>>>>>>>>>> +			array[i].seg_type = CURSEG_HOT_DATA + i;
>>>>>>>>>> +		else if (i == CURSEG_COLD_DATA_PINNED)
>>>>>>>>>> +			array[i].seg_type = CURSEG_COLD_DATA;
>>>>>>>>>>     		array[i].segno = NULL_SEGNO;
>>>>>>>>>>     		array[i].next_blkoff = 0;
>>>>>>>>>> +		array[i].inited = false;
>>>>>>>>>>     	}
>>>>>>>>>>     	return restore_curseg_summaries(sbi);
>>>>>>>>>>     }
>>>>>>>>>> @@ -4408,7 +4449,7 @@ static int sanity_check_curseg(struct f2fs_sb_info *sbi)
>>>>>>>>>>     	 * In LFS/SSR curseg, .next_blkoff should point to an unused blkaddr;
>>>>>>>>>>     	 * In LFS curseg, all blkaddr after .next_blkoff should be unused.
>>>>>>>>>>     	 */
>>>>>>>>>> -	for (i = 0; i < NO_CHECK_TYPE; i++) {
>>>>>>>>>> +	for (i = 0; i < NR_PERSISTENT_LOG; i++) {
>>>>>>>>>>     		struct curseg_info *curseg = CURSEG_I(sbi, i);
>>>>>>>>>>     		struct seg_entry *se = get_seg_entry(sbi, curseg->segno);
>>>>>>>>>>     		unsigned int blkofs = curseg->next_blkoff;
>>>>>>>>>> @@ -4637,7 +4678,7 @@ int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi)
>>>>>>>>>>     {
>>>>>>>>>>     	int i, ret;
>>>>>>>>>> -	for (i = 0; i < NO_CHECK_TYPE; i++) {
>>>>>>>>>> +	for (i = 0; i < NR_PERSISTENT_LOG; i++) {
>>>>>>>>>>     		ret = fix_curseg_write_pointer(sbi, i);
>>>>>>>>>>     		if (ret)
>>>>>>>>>>     			return ret;
>>>>>>>>>> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
>>>>>>>>>> index f261e3e6a69b..8ff261550cbb 100644
>>>>>>>>>> --- a/fs/f2fs/segment.h
>>>>>>>>>> +++ b/fs/f2fs/segment.h
>>>>>>>>>> @@ -22,7 +22,7 @@
>>>>>>>>>>     #define GET_R2L_SEGNO(free_i, segno)	((segno) + (free_i)->start_segno)
>>>>>>>>>>     #define IS_DATASEG(t)	((t) <= CURSEG_COLD_DATA)
>>>>>>>>>> -#define IS_NODESEG(t)	((t) >= CURSEG_HOT_NODE)
>>>>>>>>>> +#define IS_NODESEG(t)	((t) >= CURSEG_HOT_NODE && (t) <= CURSEG_COLD_NODE)
>>>>>>>>>>     #define IS_HOT(t)	((t) == CURSEG_HOT_NODE || (t) == CURSEG_HOT_DATA)
>>>>>>>>>>     #define IS_WARM(t)	((t) == CURSEG_WARM_NODE || (t) == CURSEG_WARM_DATA)
>>>>>>>>>> @@ -34,7 +34,8 @@
>>>>>>>>>>     	 ((seg) == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno) ||	\
>>>>>>>>>>     	 ((seg) == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno) ||	\
>>>>>>>>>>     	 ((seg) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno) ||	\
>>>>>>>>>> -	 ((seg) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno))
>>>>>>>>>> +	 ((seg) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno) ||	\
>>>>>>>>>> +	 ((seg) == CURSEG_I(sbi, CURSEG_COLD_DATA_PINNED)->segno))
>>>>>>>>>>     #define IS_CURSEC(sbi, secno)						\
>>>>>>>>>>     	(((secno) == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno /		\
>>>>>>>>>> @@ -48,7 +49,9 @@
>>>>>>>>>>     	 ((secno) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno /		\
>>>>>>>>>>     	  (sbi)->segs_per_sec) ||	\
>>>>>>>>>>     	 ((secno) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno /		\
>>>>>>>>>> -	  (sbi)->segs_per_sec))	\
>>>>>>>>>> +	  (sbi)->segs_per_sec) ||	\
>>>>>>>>>> +	 ((secno) == CURSEG_I(sbi, CURSEG_COLD_DATA_PINNED)->segno /	\
>>>>>>>>>> +	  (sbi)->segs_per_sec))
>>>>>>>>>>     #define MAIN_BLKADDR(sbi)						\
>>>>>>>>>>     	(SM_I(sbi) ? SM_I(sbi)->main_blkaddr : 				\
>>>>>>>>>> @@ -288,10 +291,12 @@ struct curseg_info {
>>>>>>>>>>     	struct rw_semaphore journal_rwsem;	/* protect journal area */
>>>>>>>>>>     	struct f2fs_journal *journal;		/* cached journal info */
>>>>>>>>>>     	unsigned char alloc_type;		/* current allocation type */
>>>>>>>>>> +	unsigned short seg_type;		/* segment type like CURSEG_XXX_TYPE */
>>>>>>>>>>     	unsigned int segno;			/* current segment number */
>>>>>>>>>>     	unsigned short next_blkoff;		/* next block offset to write */
>>>>>>>>>>     	unsigned int zone;			/* current zone number */
>>>>>>>>>>     	unsigned int next_segno;		/* preallocated segment */
>>>>>>>>>> +	bool inited;				/* indicate inmem log is inited */
>>>>>>>>>>     };
>>>>>>>>>>     struct sit_entry_set {
>>>>>>>>>> @@ -305,8 +310,6 @@ struct sit_entry_set {
>>>>>>>>>>      */
>>>>>>>>>>     static inline struct curseg_info *CURSEG_I(struct f2fs_sb_info *sbi, int type)
>>>>>>>>>>     {
>>>>>>>>>> -	if (type == CURSEG_COLD_DATA_PINNED)
>>>>>>>>>> -		type = CURSEG_COLD_DATA;
>>>>>>>>>>     	return (struct curseg_info *)(SM_I(sbi)->curseg_array + type);
>>>>>>>>>>     }
>>>>>>>>>> @@ -438,7 +441,7 @@ static inline void __set_inuse(struct f2fs_sb_info *sbi,
>>>>>>>>>>     }
>>>>>>>>>>     static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
>>>>>>>>>> -		unsigned int segno)
>>>>>>>>>> +		unsigned int segno, bool inmem)
>>>>>>>>>>     {
>>>>>>>>>>     	struct free_segmap_info *free_i = FREE_I(sbi);
>>>>>>>>>>     	unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
>>>>>>>>>> @@ -449,7 +452,7 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
>>>>>>>>>>     	if (test_and_clear_bit(segno, free_i->free_segmap)) {
>>>>>>>>>>     		free_i->free_segments++;
>>>>>>>>>> -		if (IS_CURSEC(sbi, secno))
>>>>>>>>>> +		if (!inmem && IS_CURSEC(sbi, secno))
>>>>>>>>>>     			goto skip_free;
>>>>>>>>>>     		next = find_next_bit(free_i->free_segmap,
>>>>>>>>>>     				start_segno + sbi->segs_per_sec, start_segno);
>>>>>>>>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>>>>>>>>>> index 80cb7cd358f8..0fefa130585f 100644
>>>>>>>>>> --- a/fs/f2fs/super.c
>>>>>>>>>> +++ b/fs/f2fs/super.c
>>>>>>>>>> @@ -575,7 +575,8 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
>>>>>>>>>>     		case Opt_active_logs:
>>>>>>>>>>     			if (args->from && match_int(args, &arg))
>>>>>>>>>>     				return -EINVAL;
>>>>>>>>>> -			if (arg != 2 && arg != 4 && arg != NR_CURSEG_TYPE)
>>>>>>>>>> +			if (arg != 2 && arg != 4 &&
>>>>>>>>>> +				arg != NR_CURSEG_PERSIST_TYPE)
>>>>>>>>>>     				return -EINVAL;
>>>>>>>>>>     			F2FS_OPTION(sbi).active_logs = arg;
>>>>>>>>>>     			break;
>>>>>>>>>> @@ -981,7 +982,7 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
>>>>>>>>>>     	}
>>>>>>>>>>     	/* Not pass down write hints if the number of active logs is lesser
>>>>>>>>>> -	 * than NR_CURSEG_TYPE.
>>>>>>>>>> +	 * than NR_CURSEG_PERSIST_TYPE.
>>>>>>>>>>     	 */
>>>>>>>>>>     	if (F2FS_OPTION(sbi).active_logs != NR_CURSEG_TYPE)
>>>>>>>>>>     		F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF;
>>>>>>>>>> @@ -1614,7 +1615,7 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
>>>>>>>>>>     static void default_options(struct f2fs_sb_info *sbi)
>>>>>>>>>>     {
>>>>>>>>>>     	/* init some FS parameters */
>>>>>>>>>> -	F2FS_OPTION(sbi).active_logs = NR_CURSEG_TYPE;
>>>>>>>>>> +	F2FS_OPTION(sbi).active_logs = NR_CURSEG_PERSIST_TYPE;
>>>>>>>>>>     	F2FS_OPTION(sbi).inline_xattr_size = DEFAULT_INLINE_XATTR_ADDRS;
>>>>>>>>>>     	F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF;
>>>>>>>>>>     	F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_DEFAULT;
>>>>>>>>>> @@ -2946,7 +2947,7 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
>>>>>>>>>>     	cp_payload = __cp_payload(sbi);
>>>>>>>>>>     	if (cp_pack_start_sum < cp_payload + 1 ||
>>>>>>>>>>     		cp_pack_start_sum > blocks_per_seg - 1 -
>>>>>>>>>> -			NR_CURSEG_TYPE) {
>>>>>>>>>> +			NR_CURSEG_PERSIST_TYPE) {
>>>>>>>>>>     		f2fs_err(sbi, "Wrong cp_pack_start_sum: %u",
>>>>>>>>>>     			 cp_pack_start_sum);
>>>>>>>>>>     		return 1;
>>>>>>>>>> -- 
>>>>>>>>>> 2.26.2
>>>>>>>>> .
>>>>>>>>>
>>>>>>> .
>>>>>>>
>>>>> .
>>>>>
>>>>
>>>>
>>>> _______________________________________________
>>>> Linux-f2fs-devel mailing list
>>>> Linux-f2fs-devel@lists.sourceforge.net
>>>> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
>>>> .
>>>>
>>>
>>>
>>> _______________________________________________
>>> Linux-f2fs-devel mailing list
>>> Linux-f2fs-devel@lists.sourceforge.net
>>> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
>>> .
>>>
> .
> 


_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [f2fs-dev] [PATCH RFC 1/5] f2fs: introduce inmem curseg
  2020-08-04  2:53                   ` Chao Yu
@ 2020-08-04  3:56                     ` Jaegeuk Kim
  0 siblings, 0 replies; 19+ messages in thread
From: Jaegeuk Kim @ 2020-08-04  3:56 UTC (permalink / raw)
  To: Chao Yu; +Cc: linux-kernel, linux-f2fs-devel

On 08/04, Chao Yu wrote:
> On 2020/8/4 10:44, Jaegeuk Kim wrote:
> > On 08/04, Chao Yu wrote:
> > > On 2020/7/25 16:42, Chao Yu wrote:
> > > > On 2020/7/16 9:24, Chao Yu wrote:
> > > > > On 2020/7/16 3:07, Jaegeuk Kim wrote:
> > > > > > On 07/15, Chao Yu wrote:
> > > > > > > On 2020/7/7 11:51, Jaegeuk Kim wrote:
> > > > > > > > On 07/07, Chao Yu wrote:
> > > > > > > > > On 2020/7/7 11:21, Jaegeuk Kim wrote:
> > > > > > > > > > Hi Chao,
> > > > > > > > > > 
> > > > > > > > > > Do you have any brief design doc to present the idea?
> > > > > > > > > 
> > > > > > > > > Hi Jaegeuk,
> > > > > > > > > 
> > > > > > > > > You mean this whole patchset, right?
> > > > > > > > > 
> > > > > > > > > I can add a brief design description in patch 0/5.
> > > > > > > > 
> > > > > > > > Yeah, it's a bit hard to understand the whole flow.
> > > > > > > 
> > > > > > > Jaegeuk,
> > > > > > > 
> > > > > > > Do you have time to take a look at this idea summarized in
> > > > > > > [PATCH 0/5]'s cover letter?
> > > > > > 
> > > > > > Sorry, I couldn't afford to sitting down to review the design.
> > > > > > Let me give it a try soon.
> > > > > 
> > > > > Alright, let me know if you have any question about the idea.
> > > > 
> > > > Ping,
> > > > 
> > > > Jaegeuk, still be too busy...? :P
> > > 
> > > Ping again...
> > > 
> > > Any thoughts about left patches in patchset?
> > 
> > Ah, I was waiting for your another patch-set.
> 
> Oops, I thought that you have not finished reviewing all patches...
> 
> Anyway, let me send v2 w/ changes mentioned by you.
> 
> BTW, we can discuss about how to enable such feature, options can be:
> - enable when sb feature F2FS_FEATURE_ATGC was set
> - enable via mount option

Looks like mount option would be better.

> - enable via sysfs
> 
> > 
> > > 
> > > > 
> > > > Thanks,
> > > > 
> > > > > 
> > > > > > 
> > > > > > > 
> > > > > > > > 
> > > > > > > > Thanks,
> > > > > > > > 
> > > > > > > > > 
> > > > > > > > > > 
> > > > > > > > > > Thanks,
> > > > > > > > > > 
> > > > > > > > > > On 06/30, Chao Yu wrote:
> > > > > > > > > > > Previous implementation of aligned pinfile allocation will:
> > > > > > > > > > > - allocate new segment on cold data log no matter whether last used
> > > > > > > > > > > segment is partially used or not, it makes IOs more random;
> > > > > > > > > > > - force concurrent cold data/GCed IO going into warm data area, it
> > > > > > > > > > > can make a bad effect on hot/cold data separation;
> > > > > > > > > > > 
> > > > > > > > > > > In this patch, we introduce a new type of log named 'inmem curseg',
> > > > > > > > > > > the differents from normal curseg is:
> > > > > > > > > > > - it reuses existed segment type (CURSEG_XXX_NODE/DATA);
> > > > > > > > > > > - it only exists in memory, its segno, blkofs, summary will not b
> > > > > > > > > > >     persisted into checkpoint area;
> > > > > > > > > > > 
> > > > > > > > > > > With this new feature, we can enhance scalability of log, special
> > > > > > > > > > > allocators can be created for purposes:
> > > > > > > > > > > - pure lfs allocator for aligned pinfile allocation or file
> > > > > > > > > > > defragmentation
> > > > > > > > > > > - pure ssr allocator for later feature
> > > > > > > > > > > 
> > > > > > > > > > > So that, let's update aligned pinfile allocation to use this new
> > > > > > > > > > > inmem curseg fwk.
> > > > > > > > > > > 
> > > > > > > > > > > Signed-off-by: Chao Yu <yuchao0@huawei.com>
> > > > > > > > > > > ---
> > > > > > > > > > >     fs/f2fs/checkpoint.c |   7 ++-
> > > > > > > > > > >     fs/f2fs/debug.c      |   6 ++-
> > > > > > > > > > >     fs/f2fs/f2fs.h       |  12 +++--
> > > > > > > > > > >     fs/f2fs/file.c       |   3 +-
> > > > > > > > > > >     fs/f2fs/gc.c         |   2 +-
> > > > > > > > > > >     fs/f2fs/segment.c    | 107 ++++++++++++++++++++++++++++++-------------
> > > > > > > > > > >     fs/f2fs/segment.h    |  17 ++++---
> > > > > > > > > > >     fs/f2fs/super.c      |   9 ++--
> > > > > > > > > > >     8 files changed, 112 insertions(+), 51 deletions(-)
> > > > > > > > > > > 
> > > > > > > > > > > diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> > > > > > > > > > > index 1bb8278a1c4a..644a914af25a 100644
> > > > > > > > > > > --- a/fs/f2fs/checkpoint.c
> > > > > > > > > > > +++ b/fs/f2fs/checkpoint.c
> > > > > > > > > > > @@ -1623,11 +1623,16 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
> > > > > > > > > > >     	f2fs_flush_sit_entries(sbi, cpc);
> > > > > > > > > > > +	/* save inmem log status */
> > > > > > > > > > > +	f2fs_save_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
> > > > > > > > > > > +
> > > > > > > > > > >     	err = do_checkpoint(sbi, cpc);
> > > > > > > > > > >     	if (err)
> > > > > > > > > > >     		f2fs_release_discard_addrs(sbi);
> > > > > > > > > > >     	else
> > > > > > > > > > >     		f2fs_clear_prefree_segments(sbi, cpc);
> > > > > > > > > > > +
> > > > > > > > > > > +	f2fs_restore_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
> > > > > > > > > > >     stop:
> > > > > > > > > > >     	unblock_operations(sbi);
> > > > > > > > > > >     	stat_inc_cp_count(sbi->stat_info);
> > > > > > > > > > > @@ -1658,7 +1663,7 @@ void f2fs_init_ino_entry_info(struct f2fs_sb_info *sbi)
> > > > > > > > > > >     	}
> > > > > > > > > > >     	sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS -
> > > > > > > > > > > -			NR_CURSEG_TYPE - __cp_payload(sbi)) *
> > > > > > > > > > > +			NR_CURSEG_PERSIST_TYPE - __cp_payload(sbi)) *
> > > > > > > > > > >     				F2FS_ORPHANS_PER_BLOCK;
> > > > > > > > > > >     }
> > > > > > > > > > > diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
> > > > > > > > > > > index 4276c0f79beb..41a91aa8c262 100644
> > > > > > > > > > > --- a/fs/f2fs/debug.c
> > > > > > > > > > > +++ b/fs/f2fs/debug.c
> > > > > > > > > > > @@ -164,7 +164,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
> > > > > > > > > > >     		* 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg)
> > > > > > > > > > >     		/ 2;
> > > > > > > > > > >     	si->util_invalid = 50 - si->util_free - si->util_valid;
> > > > > > > > > > > -	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_NODE; i++) {
> > > > > > > > > > > +	for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) {
> > > > > > > > > > >     		struct curseg_info *curseg = CURSEG_I(sbi, i);
> > > > > > > > > > >     		si->curseg[i] = curseg->segno;
> > > > > > > > > > >     		si->cursec[i] = GET_SEC_FROM_SEG(sbi, curseg->segno);
> > > > > > > > > > > @@ -393,6 +393,10 @@ static int stat_show(struct seq_file *s, void *v)
> > > > > > > > > > >     			   si->dirty_seg[CURSEG_COLD_NODE],
> > > > > > > > > > >     			   si->full_seg[CURSEG_COLD_NODE],
> > > > > > > > > > >     			   si->valid_blks[CURSEG_COLD_NODE]);
> > > > > > > > > > > +		seq_printf(s, "  - Pinned file: %8d %8d %8d\n",
> > > > > > > > > > > +			   si->curseg[CURSEG_COLD_DATA_PINNED],
> > > > > > > > > > > +			   si->cursec[CURSEG_COLD_DATA_PINNED],
> > > > > > > > > > > +			   si->curzone[CURSEG_COLD_DATA_PINNED]);
> > > > > > > > > > >     		seq_printf(s, "\n  - Valid: %d\n  - Dirty: %d\n",
> > > > > > > > > > >     			   si->main_area_segs - si->dirty_count -
> > > > > > > > > > >     			   si->prefree_count - si->free_segs,
> > > > > > > > > > > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > > > > > > > > > > index 7d6c5f8ce16b..f06c77066284 100644
> > > > > > > > > > > --- a/fs/f2fs/f2fs.h
> > > > > > > > > > > +++ b/fs/f2fs/f2fs.h
> > > > > > > > > > > @@ -996,7 +996,9 @@ static inline void set_new_dnode(struct dnode_of_data *dn, struct inode *inode,
> > > > > > > > > > >      */
> > > > > > > > > > >     #define	NR_CURSEG_DATA_TYPE	(3)
> > > > > > > > > > >     #define NR_CURSEG_NODE_TYPE	(3)
> > > > > > > > > > > -#define NR_CURSEG_TYPE	(NR_CURSEG_DATA_TYPE + NR_CURSEG_NODE_TYPE)
> > > > > > > > > > > +#define NR_CURSEG_INMEM_TYPE	(1)
> > > > > > > > > > > +#define NR_CURSEG_PERSIST_TYPE	(NR_CURSEG_DATA_TYPE + NR_CURSEG_NODE_TYPE)
> > > > > > > > > > > +#define NR_CURSEG_TYPE		(NR_CURSEG_INMEM_TYPE + NR_CURSEG_PERSIST_TYPE)
> > > > > > > > > > >     enum {
> > > > > > > > > > >     	CURSEG_HOT_DATA	= 0,	/* directory entry blocks */
> > > > > > > > > > > @@ -1005,8 +1007,10 @@ enum {
> > > > > > > > > > >     	CURSEG_HOT_NODE,	/* direct node blocks of directory files */
> > > > > > > > > > >     	CURSEG_WARM_NODE,	/* direct node blocks of normal files */
> > > > > > > > > > >     	CURSEG_COLD_NODE,	/* indirect node blocks */
> > > > > > > > > > > -	NO_CHECK_TYPE,
> > > > > > > > > > > -	CURSEG_COLD_DATA_PINNED,/* cold data for pinned file */
> > > > > > > > > > > +	NR_PERSISTENT_LOG,	/* number of persistent log */
> > > > > > > > > > > +	CURSEG_COLD_DATA_PINNED = NR_PERSISTENT_LOG,
> > > > > > > > > > > +				/* pinned file that needs consecutive block address */
> > > > > > > > > > > +	NO_CHECK_TYPE,		/* number of persistent & inmem log */
> > > > > > > > > > >     };
> > > > > > > > > > >     struct flush_cmd {
> > > > > > > > > > > @@ -3359,6 +3363,8 @@ block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi);
> > > > > > > > > > >     int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable);
> > > > > > > > > > >     void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi);
> > > > > > > > > > >     int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra);
> > > > > > > > > > > +void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi, int type);
> > > > > > > > > > > +void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi, int type);
> > > > > > > > > > >     void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
> > > > > > > > > > >     					unsigned int start, unsigned int end);
> > > > > > > > > > >     void f2fs_allocate_new_segment(struct f2fs_sb_info *sbi, int type);
> > > > > > > > > > > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> > > > > > > > > > > index c10e82806c2a..8611ade06018 100644
> > > > > > > > > > > --- a/fs/f2fs/file.c
> > > > > > > > > > > +++ b/fs/f2fs/file.c
> > > > > > > > > > > @@ -1656,13 +1656,14 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
> > > > > > > > > > >     		}
> > > > > > > > > > >     		down_write(&sbi->pin_sem);
> > > > > > > > > > > -		map.m_seg_type = CURSEG_COLD_DATA_PINNED;
> > > > > > > > > > >     		f2fs_lock_op(sbi);
> > > > > > > > > > >     		f2fs_allocate_new_segment(sbi, CURSEG_COLD_DATA);
> > > > > > > > > > >     		f2fs_unlock_op(sbi);
> > > > > > > > > > > +		map.m_seg_type = CURSEG_COLD_DATA_PINNED;
> > > > > > > > > > >     		err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_DIO);
> > > > > > > > > > > +
> > > > > > > > > > >     		up_write(&sbi->pin_sem);
> > > > > > > > > > >     		done += map.m_len;
> > > > > > > > > > > diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> > > > > > > > > > > index 3b718da69910..84807abe4e00 100644
> > > > > > > > > > > --- a/fs/f2fs/gc.c
> > > > > > > > > > > +++ b/fs/f2fs/gc.c
> > > > > > > > > > > @@ -1448,7 +1448,7 @@ static int free_segment_range(struct f2fs_sb_info *sbi,
> > > > > > > > > > >     	mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
> > > > > > > > > > >     	/* Move out cursegs from the target range */
> > > > > > > > > > > -	for (type = CURSEG_HOT_DATA; type < NR_CURSEG_TYPE; type++)
> > > > > > > > > > > +	for (type = CURSEG_HOT_DATA; type < NR_CURSEG_PERSIST_TYPE; type++)
> > > > > > > > > > >     		f2fs_allocate_segment_for_resize(sbi, type, start, end);
> > > > > > > > > > >     	/* do GC to move out valid blocks in the range */
> > > > > > > > > > > diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> > > > > > > > > > > index 5924b3965ae4..863ec6f1fb87 100644
> > > > > > > > > > > --- a/fs/f2fs/segment.c
> > > > > > > > > > > +++ b/fs/f2fs/segment.c
> > > > > > > > > > > @@ -1958,7 +1958,7 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
> > > > > > > > > > >     	mutex_lock(&dirty_i->seglist_lock);
> > > > > > > > > > >     	for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi))
> > > > > > > > > > > -		__set_test_and_free(sbi, segno);
> > > > > > > > > > > +		__set_test_and_free(sbi, segno, false);
> > > > > > > > > > >     	mutex_unlock(&dirty_i->seglist_lock);
> > > > > > > > > > >     }
> > > > > > > > > > > @@ -2496,6 +2496,7 @@ static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
> > > > > > > > > > >     	struct curseg_info *curseg = CURSEG_I(sbi, type);
> > > > > > > > > > >     	struct summary_footer *sum_footer;
> > > > > > > > > > > +	curseg->inited = true;
> > > > > > > > > > >     	curseg->segno = curseg->next_segno;
> > > > > > > > > > >     	curseg->zone = GET_ZONE_FROM_SEG(sbi, curseg->segno);
> > > > > > > > > > >     	curseg->next_blkoff = 0;
> > > > > > > > > > > @@ -2503,24 +2504,31 @@ static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
> > > > > > > > > > >     	sum_footer = &(curseg->sum_blk->footer);
> > > > > > > > > > >     	memset(sum_footer, 0, sizeof(struct summary_footer));
> > > > > > > > > > > -	if (IS_DATASEG(type))
> > > > > > > > > > > +	if (IS_DATASEG(curseg->seg_type))
> > > > > > > > > > >     		SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA);
> > > > > > > > > > > -	if (IS_NODESEG(type))
> > > > > > > > > > > +	if (IS_NODESEG(curseg->seg_type))
> > > > > > > > > > >     		SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE);
> > > > > > > > > > > -	__set_sit_entry_type(sbi, type, curseg->segno, modified);
> > > > > > > > > > > +	__set_sit_entry_type(sbi, curseg->seg_type, curseg->segno, modified);
> > > > > > > > > > >     }
> > > > > > > > > > >     static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
> > > > > > > > > > >     {
> > > > > > > > > > > +	struct curseg_info *curseg = CURSEG_I(sbi, type);
> > > > > > > > > > > +
> > > > > > > > > > >     	/* if segs_per_sec is large than 1, we need to keep original policy. */
> > > > > > > > > > >     	if (__is_large_section(sbi))
> > > > > > > > > > > -		return CURSEG_I(sbi, type)->segno;
> > > > > > > > > > > +		return curseg->segno;
> > > > > > > > > > > +
> > > > > > > > > > > +	/* inmem log may not locate on any segment after mount */
> > > > > > > > > > > +	if (!curseg->inited)
> > > > > > > > > > > +		return 0;
> > > > > > > > > > >     	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
> > > > > > > > > > >     		return 0;
> > > > > > > > > > >     	if (test_opt(sbi, NOHEAP) &&
> > > > > > > > > > > -		(type == CURSEG_HOT_DATA || IS_NODESEG(type)))
> > > > > > > > > > > +		(curseg->seg_type == CURSEG_HOT_DATA ||
> > > > > > > > > > > +		IS_NODESEG(curseg->seg_type)))
> > > > > > > > > > >     		return 0;
> > > > > > > > > > >     	if (SIT_I(sbi)->last_victim[ALLOC_NEXT])
> > > > > > > > > > > @@ -2530,7 +2538,7 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
> > > > > > > > > > >     	if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE)
> > > > > > > > > > >     		return 0;
> > > > > > > > > > > -	return CURSEG_I(sbi, type)->segno;
> > > > > > > > > > > +	return curseg->segno;
> > > > > > > > > > >     }
> > > > > > > > > > >     /*
> > > > > > > > > > > @@ -2540,12 +2548,14 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
> > > > > > > > > > >     static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
> > > > > > > > > > >     {
> > > > > > > > > > >     	struct curseg_info *curseg = CURSEG_I(sbi, type);
> > > > > > > > > > > +	unsigned short seg_type = curseg->seg_type;
> > > > > > > > > > >     	unsigned int segno = curseg->segno;
> > > > > > > > > > >     	int dir = ALLOC_LEFT;
> > > > > > > > > > > -	write_sum_page(sbi, curseg->sum_blk,
> > > > > > > > > > > +	if (curseg->inited)
> > > > > > > > > > > +		write_sum_page(sbi, curseg->sum_blk,
> > > > > > > > > > >     				GET_SUM_BLOCK(sbi, segno));
> > > > > > > > > > > -	if (type == CURSEG_WARM_DATA || type == CURSEG_COLD_DATA)
> > > > > > > > > > > +	if (seg_type == CURSEG_WARM_DATA || seg_type == CURSEG_COLD_DATA)
> > > > > > > > > > >     		dir = ALLOC_RIGHT;
> > > > > > > > > > >     	if (test_opt(sbi, NOHEAP))
> > > > > > > > > > > @@ -2622,6 +2632,43 @@ static void change_curseg(struct f2fs_sb_info *sbi, int type)
> > > > > > > > > > >     	f2fs_put_page(sum_page, 1);
> > > > > > > > > > >     }
> > > > > > > > > > > +void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi, int type)
> > > > > > > > > > > +{
> > > > > > > > > > > +	struct curseg_info *curseg = CURSEG_I(sbi, type);
> > > > > > > > > > > +
> > > > > > > > > > > +	mutex_lock(&curseg->curseg_mutex);
> > > > > > > > > > > +	if (!curseg->inited)
> > > > > > > > > > > +		goto out;
> > > > > > > > > > > +
> > > > > > > > > > > +	if (get_valid_blocks(sbi, curseg->segno, false)) {
> > > > > > > > > > > +		write_sum_page(sbi, curseg->sum_blk,
> > > > > > > > > > > +				GET_SUM_BLOCK(sbi, curseg->segno));
> > > > > > > > > > > +	} else {
> > > > > > > > > > > +		mutex_lock(&DIRTY_I(sbi)->seglist_lock);
> > > > > > > > > > > +		__set_test_and_free(sbi, curseg->segno, true);
> > > > > > > > > > > +		mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
> > > > > > > > > > > +	}
> > > > > > > > > > > +out:
> > > > > > > > > > > +	mutex_unlock(&curseg->curseg_mutex);
> > > > > > > > > > > +}
> > > > > > > > > > > +
> > > > > > > > > > > +void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi, int type)
> > > > > > > > > > > +{
> > > > > > > > > > > +	struct curseg_info *curseg = CURSEG_I(sbi, type);
> > > > > > > > > > > +
> > > > > > > > > > > +	mutex_lock(&curseg->curseg_mutex);
> > > > > > > > > > > +	if (!curseg->inited)
> > > > > > > > > > > +		goto out;
> > > > > > > > > > > +	if (get_valid_blocks(sbi, curseg->segno, false))
> > > > > > > > > > > +		goto out;
> > > > > > > > > > > +
> > > > > > > > > > > +	mutex_lock(&DIRTY_I(sbi)->seglist_lock);
> > > > > > > > > > > +	__set_test_and_inuse(sbi, curseg->segno);
> > > > > > > > > > > +	mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
> > > > > > > > > > > +out:
> > > > > > > > > > > +	mutex_unlock(&curseg->curseg_mutex);
> > > > > > > > > > > +}
> > > > > > > > > > > +
> > > > > > > > > > >     static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
> > > > > > > > > > >     {
> > > > > > > > > > >     	struct curseg_info *curseg = CURSEG_I(sbi, type);
> > > > > > > > > > > @@ -2738,11 +2785,15 @@ void __allocate_new_segment(struct f2fs_sb_info *sbi, int type)
> > > > > > > > > > >     	struct curseg_info *curseg = CURSEG_I(sbi, type);
> > > > > > > > > > >     	unsigned int old_segno;
> > > > > > > > > > > +	if (!curseg->inited)
> > > > > > > > > > > +		goto alloc;
> > > > > > > > > > > +
> > > > > > > > > > >     	if (!curseg->next_blkoff &&
> > > > > > > > > > >     		!get_valid_blocks(sbi, curseg->segno, false) &&
> > > > > > > > > > >     		!get_ckpt_valid_blocks(sbi, curseg->segno))
> > > > > > > > > > >     		return;
> > > > > > > > > > > +alloc:
> > > > > > > > > > >     	old_segno = curseg->segno;
> > > > > > > > > > >     	SIT_I(sbi)->s_ops->allocate_segment(sbi, type, true);
> > > > > > > > > > >     	locate_dirty_segment(sbi, old_segno);
> > > > > > > > > > > @@ -3126,19 +3177,6 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
> > > > > > > > > > >     {
> > > > > > > > > > >     	struct sit_info *sit_i = SIT_I(sbi);
> > > > > > > > > > >     	struct curseg_info *curseg = CURSEG_I(sbi, type);
> > > > > > > > > > > -	bool put_pin_sem = false;
> > > > > > > > > > > -
> > > > > > > > > > > -	if (type == CURSEG_COLD_DATA) {
> > > > > > > > > > > -		/* GC during CURSEG_COLD_DATA_PINNED allocation */
> > > > > > > > > > > -		if (down_read_trylock(&sbi->pin_sem)) {
> > > > > > > > > > > -			put_pin_sem = true;
> > > > > > > > > > > -		} else {
> > > > > > > > > > > -			type = CURSEG_WARM_DATA;
> > > > > > > > > > > -			curseg = CURSEG_I(sbi, type);
> > > > > > > > > > > -		}
> > > > > > > > > > > -	} else if (type == CURSEG_COLD_DATA_PINNED) {
> > > > > > > > > > > -		type = CURSEG_COLD_DATA;
> > > > > > > > > > > -	}
> > > > > > > > > > >     	down_read(&SM_I(sbi)->curseg_lock);
> > > > > > > > > > > @@ -3204,9 +3242,6 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
> > > > > > > > > > >     	mutex_unlock(&curseg->curseg_mutex);
> > > > > > > > > > >     	up_read(&SM_I(sbi)->curseg_lock);
> > > > > > > > > > > -
> > > > > > > > > > > -	if (put_pin_sem)
> > > > > > > > > > > -		up_read(&sbi->pin_sem);
> > > > > > > > > > >     }
> > > > > > > > > > >     static void update_device_state(struct f2fs_io_info *fio)
> > > > > > > > > > > @@ -3574,7 +3609,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
> > > > > > > > > > >     		blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type -
> > > > > > > > > > >     							CURSEG_HOT_DATA]);
> > > > > > > > > > >     		if (__exist_node_summaries(sbi))
> > > > > > > > > > > -			blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type);
> > > > > > > > > > > +			blk_addr = sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type);
> > > > > > > > > > >     		else
> > > > > > > > > > >     			blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type);
> > > > > > > > > > >     	} else {
> > > > > > > > > > > @@ -3652,8 +3687,9 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
> > > > > > > > > > >     	}
> > > > > > > > > > >     	if (__exist_node_summaries(sbi))
> > > > > > > > > > > -		f2fs_ra_meta_pages(sbi, sum_blk_addr(sbi, NR_CURSEG_TYPE, type),
> > > > > > > > > > > -					NR_CURSEG_TYPE - type, META_CP, true);
> > > > > > > > > > > +		f2fs_ra_meta_pages(sbi,
> > > > > > > > > > > +				sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type),
> > > > > > > > > > > +				NR_CURSEG_PERSIST_TYPE - type, META_CP, true);
> > > > > > > > > > >     	for (; type <= CURSEG_COLD_NODE; type++) {
> > > > > > > > > > >     		err = read_normal_summaries(sbi, type);
> > > > > > > > > > > @@ -4155,14 +4191,14 @@ static int build_curseg(struct f2fs_sb_info *sbi)
> > > > > > > > > > >     	struct curseg_info *array;
> > > > > > > > > > >     	int i;
> > > > > > > > > > > -	array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE, sizeof(*array)),
> > > > > > > > > > > -			     GFP_KERNEL);
> > > > > > > > > > > +	array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE,
> > > > > > > > > > > +					sizeof(*array)), GFP_KERNEL);
> > > > > > > > > > >     	if (!array)
> > > > > > > > > > >     		return -ENOMEM;
> > > > > > > > > > >     	SM_I(sbi)->curseg_array = array;
> > > > > > > > > > > -	for (i = 0; i < NR_CURSEG_TYPE; i++) {
> > > > > > > > > > > +	for (i = 0; i < NO_CHECK_TYPE; i++) {
> > > > > > > > > > >     		mutex_init(&array[i].curseg_mutex);
> > > > > > > > > > >     		array[i].sum_blk = f2fs_kzalloc(sbi, PAGE_SIZE, GFP_KERNEL);
> > > > > > > > > > >     		if (!array[i].sum_blk)
> > > > > > > > > > > @@ -4172,8 +4208,13 @@ static int build_curseg(struct f2fs_sb_info *sbi)
> > > > > > > > > > >     				sizeof(struct f2fs_journal), GFP_KERNEL);
> > > > > > > > > > >     		if (!array[i].journal)
> > > > > > > > > > >     			return -ENOMEM;
> > > > > > > > > > > +		if (i < NR_PERSISTENT_LOG)
> > > > > > > > > > > +			array[i].seg_type = CURSEG_HOT_DATA + i;
> > > > > > > > > > > +		else if (i == CURSEG_COLD_DATA_PINNED)
> > > > > > > > > > > +			array[i].seg_type = CURSEG_COLD_DATA;
> > > > > > > > > > >     		array[i].segno = NULL_SEGNO;
> > > > > > > > > > >     		array[i].next_blkoff = 0;
> > > > > > > > > > > +		array[i].inited = false;
> > > > > > > > > > >     	}
> > > > > > > > > > >     	return restore_curseg_summaries(sbi);
> > > > > > > > > > >     }
> > > > > > > > > > > @@ -4408,7 +4449,7 @@ static int sanity_check_curseg(struct f2fs_sb_info *sbi)
> > > > > > > > > > >     	 * In LFS/SSR curseg, .next_blkoff should point to an unused blkaddr;
> > > > > > > > > > >     	 * In LFS curseg, all blkaddr after .next_blkoff should be unused.
> > > > > > > > > > >     	 */
> > > > > > > > > > > -	for (i = 0; i < NO_CHECK_TYPE; i++) {
> > > > > > > > > > > +	for (i = 0; i < NR_PERSISTENT_LOG; i++) {
> > > > > > > > > > >     		struct curseg_info *curseg = CURSEG_I(sbi, i);
> > > > > > > > > > >     		struct seg_entry *se = get_seg_entry(sbi, curseg->segno);
> > > > > > > > > > >     		unsigned int blkofs = curseg->next_blkoff;
> > > > > > > > > > > @@ -4637,7 +4678,7 @@ int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi)
> > > > > > > > > > >     {
> > > > > > > > > > >     	int i, ret;
> > > > > > > > > > > -	for (i = 0; i < NO_CHECK_TYPE; i++) {
> > > > > > > > > > > +	for (i = 0; i < NR_PERSISTENT_LOG; i++) {
> > > > > > > > > > >     		ret = fix_curseg_write_pointer(sbi, i);
> > > > > > > > > > >     		if (ret)
> > > > > > > > > > >     			return ret;
> > > > > > > > > > > diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
> > > > > > > > > > > index f261e3e6a69b..8ff261550cbb 100644
> > > > > > > > > > > --- a/fs/f2fs/segment.h
> > > > > > > > > > > +++ b/fs/f2fs/segment.h
> > > > > > > > > > > @@ -22,7 +22,7 @@
> > > > > > > > > > >     #define GET_R2L_SEGNO(free_i, segno)	((segno) + (free_i)->start_segno)
> > > > > > > > > > >     #define IS_DATASEG(t)	((t) <= CURSEG_COLD_DATA)
> > > > > > > > > > > -#define IS_NODESEG(t)	((t) >= CURSEG_HOT_NODE)
> > > > > > > > > > > +#define IS_NODESEG(t)	((t) >= CURSEG_HOT_NODE && (t) <= CURSEG_COLD_NODE)
> > > > > > > > > > >     #define IS_HOT(t)	((t) == CURSEG_HOT_NODE || (t) == CURSEG_HOT_DATA)
> > > > > > > > > > >     #define IS_WARM(t)	((t) == CURSEG_WARM_NODE || (t) == CURSEG_WARM_DATA)
> > > > > > > > > > > @@ -34,7 +34,8 @@
> > > > > > > > > > >     	 ((seg) == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno) ||	\
> > > > > > > > > > >     	 ((seg) == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno) ||	\
> > > > > > > > > > >     	 ((seg) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno) ||	\
> > > > > > > > > > > -	 ((seg) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno))
> > > > > > > > > > > +	 ((seg) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno) ||	\
> > > > > > > > > > > +	 ((seg) == CURSEG_I(sbi, CURSEG_COLD_DATA_PINNED)->segno))
> > > > > > > > > > >     #define IS_CURSEC(sbi, secno)						\
> > > > > > > > > > >     	(((secno) == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno /		\
> > > > > > > > > > > @@ -48,7 +49,9 @@
> > > > > > > > > > >     	 ((secno) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno /		\
> > > > > > > > > > >     	  (sbi)->segs_per_sec) ||	\
> > > > > > > > > > >     	 ((secno) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno /		\
> > > > > > > > > > > -	  (sbi)->segs_per_sec))	\
> > > > > > > > > > > +	  (sbi)->segs_per_sec) ||	\
> > > > > > > > > > > +	 ((secno) == CURSEG_I(sbi, CURSEG_COLD_DATA_PINNED)->segno /	\
> > > > > > > > > > > +	  (sbi)->segs_per_sec))
> > > > > > > > > > >     #define MAIN_BLKADDR(sbi)						\
> > > > > > > > > > >     	(SM_I(sbi) ? SM_I(sbi)->main_blkaddr : 				\
> > > > > > > > > > > @@ -288,10 +291,12 @@ struct curseg_info {
> > > > > > > > > > >     	struct rw_semaphore journal_rwsem;	/* protect journal area */
> > > > > > > > > > >     	struct f2fs_journal *journal;		/* cached journal info */
> > > > > > > > > > >     	unsigned char alloc_type;		/* current allocation type */
> > > > > > > > > > > +	unsigned short seg_type;		/* segment type like CURSEG_XXX_TYPE */
> > > > > > > > > > >     	unsigned int segno;			/* current segment number */
> > > > > > > > > > >     	unsigned short next_blkoff;		/* next block offset to write */
> > > > > > > > > > >     	unsigned int zone;			/* current zone number */
> > > > > > > > > > >     	unsigned int next_segno;		/* preallocated segment */
> > > > > > > > > > > +	bool inited;				/* indicate inmem log is inited */
> > > > > > > > > > >     };
> > > > > > > > > > >     struct sit_entry_set {
> > > > > > > > > > > @@ -305,8 +310,6 @@ struct sit_entry_set {
> > > > > > > > > > >      */
> > > > > > > > > > >     static inline struct curseg_info *CURSEG_I(struct f2fs_sb_info *sbi, int type)
> > > > > > > > > > >     {
> > > > > > > > > > > -	if (type == CURSEG_COLD_DATA_PINNED)
> > > > > > > > > > > -		type = CURSEG_COLD_DATA;
> > > > > > > > > > >     	return (struct curseg_info *)(SM_I(sbi)->curseg_array + type);
> > > > > > > > > > >     }
> > > > > > > > > > > @@ -438,7 +441,7 @@ static inline void __set_inuse(struct f2fs_sb_info *sbi,
> > > > > > > > > > >     }
> > > > > > > > > > >     static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
> > > > > > > > > > > -		unsigned int segno)
> > > > > > > > > > > +		unsigned int segno, bool inmem)
> > > > > > > > > > >     {
> > > > > > > > > > >     	struct free_segmap_info *free_i = FREE_I(sbi);
> > > > > > > > > > >     	unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
> > > > > > > > > > > @@ -449,7 +452,7 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
> > > > > > > > > > >     	if (test_and_clear_bit(segno, free_i->free_segmap)) {
> > > > > > > > > > >     		free_i->free_segments++;
> > > > > > > > > > > -		if (IS_CURSEC(sbi, secno))
> > > > > > > > > > > +		if (!inmem && IS_CURSEC(sbi, secno))
> > > > > > > > > > >     			goto skip_free;
> > > > > > > > > > >     		next = find_next_bit(free_i->free_segmap,
> > > > > > > > > > >     				start_segno + sbi->segs_per_sec, start_segno);
> > > > > > > > > > > diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> > > > > > > > > > > index 80cb7cd358f8..0fefa130585f 100644
> > > > > > > > > > > --- a/fs/f2fs/super.c
> > > > > > > > > > > +++ b/fs/f2fs/super.c
> > > > > > > > > > > @@ -575,7 +575,8 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
> > > > > > > > > > >     		case Opt_active_logs:
> > > > > > > > > > >     			if (args->from && match_int(args, &arg))
> > > > > > > > > > >     				return -EINVAL;
> > > > > > > > > > > -			if (arg != 2 && arg != 4 && arg != NR_CURSEG_TYPE)
> > > > > > > > > > > +			if (arg != 2 && arg != 4 &&
> > > > > > > > > > > +				arg != NR_CURSEG_PERSIST_TYPE)
> > > > > > > > > > >     				return -EINVAL;
> > > > > > > > > > >     			F2FS_OPTION(sbi).active_logs = arg;
> > > > > > > > > > >     			break;
> > > > > > > > > > > @@ -981,7 +982,7 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
> > > > > > > > > > >     	}
> > > > > > > > > > >     	/* Not pass down write hints if the number of active logs is lesser
> > > > > > > > > > > -	 * than NR_CURSEG_TYPE.
> > > > > > > > > > > +	 * than NR_CURSEG_PERSIST_TYPE.
> > > > > > > > > > >     	 */
> > > > > > > > > > >     	if (F2FS_OPTION(sbi).active_logs != NR_CURSEG_TYPE)
> > > > > > > > > > >     		F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF;
> > > > > > > > > > > @@ -1614,7 +1615,7 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
> > > > > > > > > > >     static void default_options(struct f2fs_sb_info *sbi)
> > > > > > > > > > >     {
> > > > > > > > > > >     	/* init some FS parameters */
> > > > > > > > > > > -	F2FS_OPTION(sbi).active_logs = NR_CURSEG_TYPE;
> > > > > > > > > > > +	F2FS_OPTION(sbi).active_logs = NR_CURSEG_PERSIST_TYPE;
> > > > > > > > > > >     	F2FS_OPTION(sbi).inline_xattr_size = DEFAULT_INLINE_XATTR_ADDRS;
> > > > > > > > > > >     	F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF;
> > > > > > > > > > >     	F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_DEFAULT;
> > > > > > > > > > > @@ -2946,7 +2947,7 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
> > > > > > > > > > >     	cp_payload = __cp_payload(sbi);
> > > > > > > > > > >     	if (cp_pack_start_sum < cp_payload + 1 ||
> > > > > > > > > > >     		cp_pack_start_sum > blocks_per_seg - 1 -
> > > > > > > > > > > -			NR_CURSEG_TYPE) {
> > > > > > > > > > > +			NR_CURSEG_PERSIST_TYPE) {
> > > > > > > > > > >     		f2fs_err(sbi, "Wrong cp_pack_start_sum: %u",
> > > > > > > > > > >     			 cp_pack_start_sum);
> > > > > > > > > > >     		return 1;
> > > > > > > > > > > -- 
> > > > > > > > > > > 2.26.2
> > > > > > > > > > .
> > > > > > > > > > 
> > > > > > > > .
> > > > > > > > 
> > > > > > .
> > > > > > 
> > > > > 
> > > > > 
> > > > > _______________________________________________
> > > > > Linux-f2fs-devel mailing list
> > > > > Linux-f2fs-devel@lists.sourceforge.net
> > > > > https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
> > > > > .
> > > > > 
> > > > 
> > > > 
> > > > _______________________________________________
> > > > Linux-f2fs-devel mailing list
> > > > Linux-f2fs-devel@lists.sourceforge.net
> > > > https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
> > > > .
> > > > 
> > .
> > 


_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, back to index

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-06-30 10:04 [f2fs-dev] [PATCH RFC 1/5] f2fs: introduce inmem curseg Chao Yu
2020-06-30 10:04 ` [f2fs-dev] [PATCH RFC 2/5] f2fs: record average update time of segment Chao Yu
2020-07-01 16:19   ` Jaegeuk Kim
2020-07-03  2:13     ` Chao Yu
2020-06-30 10:04 ` [f2fs-dev] [PATCH RFC 3/5] f2fs: inherit mtime of original block during GC Chao Yu
2020-06-30 10:04 ` [f2fs-dev] [PATCH RFC 4/5] f2fs: support 64-bits key in f2fs rb-tree node entry Chao Yu
2020-06-30 10:04 ` [f2fs-dev] [PATCH RFC 5/5] f2fs: support age threshold based garbage collection Chao Yu
2020-07-06  8:25   ` Chao Yu
2020-07-07  3:21 ` [f2fs-dev] [PATCH RFC 1/5] f2fs: introduce inmem curseg Jaegeuk Kim
2020-07-07  3:37   ` Chao Yu
2020-07-07  3:51     ` Jaegeuk Kim
2020-07-15  3:39       ` Chao Yu
2020-07-15 19:07         ` Jaegeuk Kim
2020-07-16  1:24           ` Chao Yu
2020-07-25  8:42             ` Chao Yu
2020-08-04  1:49               ` Chao Yu
2020-08-04  2:44                 ` Jaegeuk Kim
2020-08-04  2:53                   ` Chao Yu
2020-08-04  3:56                     ` Jaegeuk Kim

Linux-f2fs-devel Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-f2fs-devel/0 linux-f2fs-devel/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-f2fs-devel linux-f2fs-devel/ https://lore.kernel.org/linux-f2fs-devel \
		linux-f2fs-devel@lists.sourceforge.net
	public-inbox-index linux-f2fs-devel

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/net.sourceforge.lists.linux-f2fs-devel


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git