linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/6] f2fs: clean up flush/discard command namings
@ 2017-01-12 22:44 Jaegeuk Kim
  2017-01-12 22:44 ` [PATCH 2/6] f2fs: reorganize stat information Jaegeuk Kim
                   ` (5 more replies)
  0 siblings, 6 replies; 25+ messages in thread
From: Jaegeuk Kim @ 2017-01-12 22:44 UTC (permalink / raw)
  To: linux-kernel, linux-fsdevel, linux-f2fs-devel; +Cc: Jaegeuk Kim

This patch simply cleans up the names for flush/discard commands.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/debug.c   |  2 +-
 fs/f2fs/f2fs.h    | 20 +++++-------
 fs/f2fs/segment.c | 98 +++++++++++++++++++++++++++----------------------------
 3 files changed, 59 insertions(+), 61 deletions(-)

diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 29cdf0c1da1d..883f1ea9e0b6 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -194,7 +194,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
 		si->cache_mem += sizeof(struct f2fs_gc_kthread);
 
 	/* build merge flush thread */
-	if (SM_I(sbi)->cmd_control_info)
+	if (SM_I(sbi)->fcc_info)
 		si->cache_mem += sizeof(struct flush_cmd_control);
 
 	/* free nids */
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 4a84b3fbbfd1..548e75d18ec1 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -181,13 +181,12 @@ struct discard_entry {
 	int len;		/* # of consecutive blocks of the discard */
 };
 
-struct bio_entry {
-	struct list_head list;
-	block_t lstart;
-	block_t len;
-	struct bio *bio;
-	struct completion event;
-	int error;
+struct discard_cmd {
+	struct list_head list;		/* command list */
+	struct completion wait;		/* compleation */
+	block_t lstart;			/* logical start address */
+	block_t len;			/* length */
+	struct bio *bio;		/* bio */
 };
 
 /* for the list of fsync inodes, used only during recovery */
@@ -634,8 +633,8 @@ struct f2fs_sm_info {
 	unsigned int rec_prefree_segments;
 
 	/* for small discard management */
-	struct list_head discard_list;		/* 4KB discard list */
-	struct list_head wait_list;		/* linked with issued discard bio */
+	struct list_head discard_entry_list;	/* 4KB discard entry list */
+	struct list_head discard_cmd_list;	/* discard cmd list */
 	int nr_discards;			/* # of discards in the list */
 	int max_discards;			/* max. discards to be issued */
 
@@ -649,8 +648,7 @@ struct f2fs_sm_info {
 	unsigned int min_fsync_blocks;	/* threshold for fsync */
 
 	/* for flush command control */
-	struct flush_cmd_control *cmd_control_info;
-
+	struct flush_cmd_control *fcc_info;
 };
 
 /*
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 40fd04e1e23d..73282108fa33 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -26,7 +26,7 @@
 #define __reverse_ffz(x) __reverse_ffs(~(x))
 
 static struct kmem_cache *discard_entry_slab;
-static struct kmem_cache *bio_entry_slab;
+static struct kmem_cache *discard_cmd_slab;
 static struct kmem_cache *sit_entry_set_slab;
 static struct kmem_cache *inmem_entry_slab;
 
@@ -439,7 +439,7 @@ static int submit_flush_wait(struct f2fs_sb_info *sbi)
 static int issue_flush_thread(void *data)
 {
 	struct f2fs_sb_info *sbi = data;
-	struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
+	struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
 	wait_queue_head_t *q = &fcc->flush_wait_queue;
 repeat:
 	if (kthread_should_stop())
@@ -468,7 +468,7 @@ static int issue_flush_thread(void *data)
 
 int f2fs_issue_flush(struct f2fs_sb_info *sbi)
 {
-	struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
+	struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
 	struct flush_cmd cmd;
 
 	trace_f2fs_issue_flush(sbi->sb, test_opt(sbi, NOBARRIER),
@@ -511,8 +511,8 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi)
 	struct flush_cmd_control *fcc;
 	int err = 0;
 
-	if (SM_I(sbi)->cmd_control_info) {
-		fcc = SM_I(sbi)->cmd_control_info;
+	if (SM_I(sbi)->fcc_info) {
+		fcc = SM_I(sbi)->fcc_info;
 		goto init_thread;
 	}
 
@@ -522,14 +522,14 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi)
 	atomic_set(&fcc->submit_flush, 0);
 	init_waitqueue_head(&fcc->flush_wait_queue);
 	init_llist_head(&fcc->issue_list);
-	SM_I(sbi)->cmd_control_info = fcc;
+	SM_I(sbi)->fcc_info = fcc;
 init_thread:
 	fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
 				"f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
 	if (IS_ERR(fcc->f2fs_issue_flush)) {
 		err = PTR_ERR(fcc->f2fs_issue_flush);
 		kfree(fcc);
-		SM_I(sbi)->cmd_control_info = NULL;
+		SM_I(sbi)->fcc_info = NULL;
 		return err;
 	}
 
@@ -538,7 +538,7 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi)
 
 void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free)
 {
-	struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
+	struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
 
 	if (fcc && fcc->f2fs_issue_flush) {
 		struct task_struct *flush_thread = fcc->f2fs_issue_flush;
@@ -548,7 +548,7 @@ void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free)
 	}
 	if (free) {
 		kfree(fcc);
-		SM_I(sbi)->cmd_control_info = NULL;
+		SM_I(sbi)->fcc_info = NULL;
 	}
 }
 
@@ -628,42 +628,43 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
 	mutex_unlock(&dirty_i->seglist_lock);
 }
 
-static struct bio_entry *__add_bio_entry(struct f2fs_sb_info *sbi,
+static struct discard_cmd *__add_discard_cmd(struct f2fs_sb_info *sbi,
 			struct bio *bio, block_t lstart, block_t len)
 {
-	struct list_head *wait_list = &(SM_I(sbi)->wait_list);
-	struct bio_entry *be = f2fs_kmem_cache_alloc(bio_entry_slab, GFP_NOFS);
+	struct list_head *wait_list = &(SM_I(sbi)->discard_cmd_list);
+	struct discard_cmd *dc;
 
-	INIT_LIST_HEAD(&be->list);
-	be->bio = bio;
-	be->lstart = lstart;
-	be->len = len;
-	init_completion(&be->event);
-	list_add_tail(&be->list, wait_list);
+	dc = f2fs_kmem_cache_alloc(discard_cmd_slab, GFP_NOFS);
+	INIT_LIST_HEAD(&dc->list);
+	dc->bio = bio;
+	dc->lstart = lstart;
+	dc->len = len;
+	init_completion(&dc->wait);
+	list_add_tail(&dc->list, wait_list);
 
-	return be;
+	return dc;
 }
 
 /* This should be covered by global mutex, &sit_i->sentry_lock */
 void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
 {
-	struct list_head *wait_list = &(SM_I(sbi)->wait_list);
-	struct bio_entry *be, *tmp;
+	struct list_head *wait_list = &(SM_I(sbi)->discard_cmd_list);
+	struct discard_cmd *dc, *tmp;
 
-	list_for_each_entry_safe(be, tmp, wait_list, list) {
-		struct bio *bio = be->bio;
+	list_for_each_entry_safe(dc, tmp, wait_list, list) {
+		struct bio *bio = dc->bio;
 		int err;
 
-		if (!completion_done(&be->event)) {
-			if ((be->lstart <= blkaddr &&
-					blkaddr < be->lstart + be->len) ||
+		if (!completion_done(&dc->wait)) {
+			if ((dc->lstart <= blkaddr &&
+					blkaddr < dc->lstart + dc->len) ||
 					blkaddr == NULL_ADDR)
-				wait_for_completion_io(&be->event);
+				wait_for_completion_io(&dc->wait);
 			else
 				continue;
 		}
 
-		err = be->error;
+		err = bio->bi_error;
 		if (err == -EOPNOTSUPP)
 			err = 0;
 
@@ -672,17 +673,16 @@ void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
 				"Issue discard failed, ret: %d", err);
 
 		bio_put(bio);
-		list_del(&be->list);
-		kmem_cache_free(bio_entry_slab, be);
+		list_del(&dc->list);
+		kmem_cache_free(discard_cmd_slab, dc);
 	}
 }
 
-static void f2fs_submit_bio_wait_endio(struct bio *bio)
+static void f2fs_submit_discard_endio(struct bio *bio)
 {
-	struct bio_entry *be = (struct bio_entry *)bio->bi_private;
+	struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private;
 
-	be->error = bio->bi_error;
-	complete(&be->event);
+	complete(&dc->wait);
 }
 
 /* this function is copied from blkdev_issue_discard from block/blk-lib.c */
@@ -705,11 +705,11 @@ static int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi,
 				SECTOR_FROM_BLOCK(blklen),
 				GFP_NOFS, 0, &bio);
 	if (!err && bio) {
-		struct bio_entry *be = __add_bio_entry(sbi, bio,
+		struct discard_cmd *dc = __add_discard_cmd(sbi, bio,
 						lblkstart, blklen);
 
-		bio->bi_private = be;
-		bio->bi_end_io = f2fs_submit_bio_wait_endio;
+		bio->bi_private = dc;
+		bio->bi_end_io = f2fs_submit_discard_endio;
 		bio->bi_opf |= REQ_SYNC;
 		submit_bio(bio);
 	}
@@ -817,7 +817,7 @@ static void __add_discard_entry(struct f2fs_sb_info *sbi,
 		struct cp_control *cpc, struct seg_entry *se,
 		unsigned int start, unsigned int end)
 {
-	struct list_head *head = &SM_I(sbi)->discard_list;
+	struct list_head *head = &SM_I(sbi)->discard_entry_list;
 	struct discard_entry *new, *last;
 
 	if (!list_empty(head)) {
@@ -886,7 +886,7 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc,
 
 void release_discard_addrs(struct f2fs_sb_info *sbi)
 {
-	struct list_head *head = &(SM_I(sbi)->discard_list);
+	struct list_head *head = &(SM_I(sbi)->discard_entry_list);
 	struct discard_entry *entry, *this;
 
 	/* drop caches */
@@ -912,7 +912,7 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
 
 void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 {
-	struct list_head *head = &(SM_I(sbi)->discard_list);
+	struct list_head *head = &(SM_I(sbi)->discard_entry_list);
 	struct discard_entry *entry, *this;
 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
 	struct blk_plug plug;
@@ -2703,8 +2703,8 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
 	sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
 	sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
 
-	INIT_LIST_HEAD(&sm_info->discard_list);
-	INIT_LIST_HEAD(&sm_info->wait_list);
+	INIT_LIST_HEAD(&sm_info->discard_entry_list);
+	INIT_LIST_HEAD(&sm_info->discard_cmd_list);
 	sm_info->nr_discards = 0;
 	sm_info->max_discards = 0;
 
@@ -2854,15 +2854,15 @@ int __init create_segment_manager_caches(void)
 	if (!discard_entry_slab)
 		goto fail;
 
-	bio_entry_slab = f2fs_kmem_cache_create("bio_entry",
-			sizeof(struct bio_entry));
-	if (!bio_entry_slab)
+	discard_cmd_slab = f2fs_kmem_cache_create("discard_cmd",
+			sizeof(struct discard_cmd));
+	if (!discard_cmd_slab)
 		goto destroy_discard_entry;
 
 	sit_entry_set_slab = f2fs_kmem_cache_create("sit_entry_set",
 			sizeof(struct sit_entry_set));
 	if (!sit_entry_set_slab)
-		goto destroy_bio_entry;
+		goto destroy_discard_cmd;
 
 	inmem_entry_slab = f2fs_kmem_cache_create("inmem_page_entry",
 			sizeof(struct inmem_pages));
@@ -2872,8 +2872,8 @@ int __init create_segment_manager_caches(void)
 
 destroy_sit_entry_set:
 	kmem_cache_destroy(sit_entry_set_slab);
-destroy_bio_entry:
-	kmem_cache_destroy(bio_entry_slab);
+destroy_discard_cmd:
+	kmem_cache_destroy(discard_cmd_slab);
 destroy_discard_entry:
 	kmem_cache_destroy(discard_entry_slab);
 fail:
@@ -2883,7 +2883,7 @@ int __init create_segment_manager_caches(void)
 void destroy_segment_manager_caches(void)
 {
 	kmem_cache_destroy(sit_entry_set_slab);
-	kmem_cache_destroy(bio_entry_slab);
+	kmem_cache_destroy(discard_cmd_slab);
 	kmem_cache_destroy(discard_entry_slab);
 	kmem_cache_destroy(inmem_entry_slab);
 }
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 2/6] f2fs: reorganize stat information
  2017-01-12 22:44 [PATCH 1/6] f2fs: clean up flush/discard command namings Jaegeuk Kim
@ 2017-01-12 22:44 ` Jaegeuk Kim
  2017-02-22  9:40   ` [f2fs-dev] " Chao Yu
  2017-01-12 22:44 ` [PATCH 3/6] f2fs: remove batched discard in f2fs_trim_fs Jaegeuk Kim
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 25+ messages in thread
From: Jaegeuk Kim @ 2017-01-12 22:44 UTC (permalink / raw)
  To: linux-kernel, linux-fsdevel, linux-f2fs-devel; +Cc: Jaegeuk Kim

This patch modifies stat information more clearly.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/debug.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 883f1ea9e0b6..cd338ca24941 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -258,8 +258,6 @@ static int stat_show(struct seq_file *s, void *v)
 			   si->inline_dir);
 		seq_printf(s, "  - Orphan Inode: %u\n",
 			   si->orphans);
-		seq_printf(s, "  - Atomic write count: %4d (Max. %4d)\n",
-			   si->aw_cnt, si->max_aw_cnt);
 		seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n",
 			   si->main_area_segs, si->main_area_sections,
 			   si->main_area_zones);
@@ -318,8 +316,10 @@ static int stat_show(struct seq_file *s, void *v)
 		seq_printf(s, "  - Inner Struct Count: tree: %d(%d), node: %d\n",
 				si->ext_tree, si->zombie_tree, si->ext_node);
 		seq_puts(s, "\nBalancing F2FS Async:\n");
-		seq_printf(s, "  - inmem: %4d, wb_cp_data: %4d, wb_data: %4d\n",
-			   si->inmem_pages, si->nr_wb_cp_data, si->nr_wb_data);
+		seq_printf(s, "  - IO (CP: %4d, Data: %4d)\n",
+			   si->nr_wb_cp_data, si->nr_wb_data);
+		seq_printf(s, "  - inmem: %4d, atomic IO: %4d (Max. %4d)\n",
+			   si->inmem_pages, si->aw_cnt, si->max_aw_cnt);
 		seq_printf(s, "  - nodes: %4d in %4d\n",
 			   si->ndirty_node, si->node_pages);
 		seq_printf(s, "  - dents: %4d in dirs:%4d (%4d)\n",
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 3/6] f2fs: remove batched discard in f2fs_trim_fs
  2017-01-12 22:44 [PATCH 1/6] f2fs: clean up flush/discard command namings Jaegeuk Kim
  2017-01-12 22:44 ` [PATCH 2/6] f2fs: reorganize stat information Jaegeuk Kim
@ 2017-01-12 22:44 ` Jaegeuk Kim
  2017-02-22  9:40   ` Chao Yu
  2017-01-12 22:44 ` [PATCH 4/6] f2fs: factor out discard command info into discard_cmd_control Jaegeuk Kim
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 25+ messages in thread
From: Jaegeuk Kim @ 2017-01-12 22:44 UTC (permalink / raw)
  To: linux-kernel, linux-fsdevel, linux-f2fs-devel; +Cc: Jaegeuk Kim

We don't need to do multiple checkpoints, since we don't actually wait for
completion of discard commands during checkpoint.
Instead, we still need to avoid very big discard commands, since that large
discard can interfere block allocation.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 Documentation/ABI/testing/sysfs-fs-f2fs |  6 ------
 fs/f2fs/f2fs.h                          |  9 +-------
 fs/f2fs/segment.c                       | 38 +++++++++++----------------------
 fs/f2fs/super.c                         |  2 --
 4 files changed, 14 insertions(+), 41 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index a809f6005f14..df6b3f6e164a 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -75,12 +75,6 @@ Contact:	"Jaegeuk Kim" <jaegeuk.kim@samsung.com>
 Description:
 		 Controls the memory footprint used by f2fs.
 
-What:		/sys/fs/f2fs/<disk>/trim_sections
-Date:		February 2015
-Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
-Description:
-		 Controls the trimming rate in batch mode.
-
 What:		/sys/fs/f2fs/<disk>/cp_interval
 Date:		October 2015
 Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 548e75d18ec1..a2850bf2a487 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -128,11 +128,7 @@ enum {
 	CP_DISCARD,
 };
 
-#define DEF_BATCHED_TRIM_SECTIONS	2
-#define BATCHED_TRIM_SEGMENTS(sbi)	\
-		(SM_I(sbi)->trim_sections * (sbi)->segs_per_sec)
-#define BATCHED_TRIM_BLOCKS(sbi)	\
-		(BATCHED_TRIM_SEGMENTS(sbi) << (sbi)->log_blocks_per_seg)
+#define MAX_DISCARD_BLOCKS(sbi) (1 << (sbi)->log_blocks_per_seg)
 #define DEF_CP_INTERVAL			60	/* 60 secs */
 #define DEF_IDLE_INTERVAL		5	/* 5 secs */
 
@@ -638,9 +634,6 @@ struct f2fs_sm_info {
 	int nr_discards;			/* # of discards in the list */
 	int max_discards;			/* max. discards to be issued */
 
-	/* for batched trimming */
-	unsigned int trim_sections;		/* # of sections to trim */
-
 	struct list_head sit_entry_set;	/* sit entry set list */
 
 	unsigned int ipu_policy;	/* in-place-update policy */
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 73282108fa33..e6f3c6db7616 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -823,7 +823,8 @@ static void __add_discard_entry(struct f2fs_sb_info *sbi,
 	if (!list_empty(head)) {
 		last = list_last_entry(head, struct discard_entry, list);
 		if (START_BLOCK(sbi, cpc->trim_start) + start ==
-						last->blkaddr + last->len) {
+				last->blkaddr + last->len &&
+				last->len <= MAX_DISCARD_BLOCKS(sbi)) {
 			last->len += end - start;
 			goto done;
 		}
@@ -1513,36 +1514,25 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
 			"Found FS corruption, run fsck to fix.");
 		goto out;
 	}
+	if (sbi->discard_blks == 0)
+		goto out;
 
 	/* start/end segment number in main_area */
 	start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start);
 	end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 :
 						GET_SEGNO(sbi, end);
+	/*
+	 * do checkpoint to issue discard commands safely since we now can
+	 * use async discard.
+	 */
 	cpc.reason = CP_DISCARD;
 	cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen));
+	cpc.trim_start = start_segno;
+	cpc.trim_end = end_segno;
 
-	/* do checkpoint to issue discard commands safely */
-	for (; start_segno <= end_segno; start_segno = cpc.trim_end + 1) {
-		cpc.trim_start = start_segno;
-
-		if (sbi->discard_blks == 0)
-			break;
-		else if (sbi->discard_blks < BATCHED_TRIM_BLOCKS(sbi))
-			cpc.trim_end = end_segno;
-		else
-			cpc.trim_end = min_t(unsigned int,
-				rounddown(start_segno +
-				BATCHED_TRIM_SEGMENTS(sbi),
-				sbi->segs_per_sec) - 1, end_segno);
-
-		mutex_lock(&sbi->gc_mutex);
-		err = write_checkpoint(sbi, &cpc);
-		mutex_unlock(&sbi->gc_mutex);
-		if (err)
-			break;
-
-		schedule();
-	}
+	mutex_lock(&sbi->gc_mutex);
+	err = write_checkpoint(sbi, &cpc);
+	mutex_unlock(&sbi->gc_mutex);
 out:
 	range->len = F2FS_BLK_TO_BYTES(cpc.trimmed);
 	return err;
@@ -2708,8 +2698,6 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
 	sm_info->nr_discards = 0;
 	sm_info->max_discards = 0;
 
-	sm_info->trim_sections = DEF_BATCHED_TRIM_SECTIONS;
-
 	INIT_LIST_HEAD(&sm_info->sit_entry_set);
 
 	if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) {
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 461c29043aec..37d40d8aa9c4 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -284,7 +284,6 @@ F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time);
 F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle);
 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments);
 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, max_small_discards, max_discards);
-F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
@@ -309,7 +308,6 @@ static struct attribute *f2fs_attrs[] = {
 	ATTR_LIST(gc_idle),
 	ATTR_LIST(reclaim_segments),
 	ATTR_LIST(max_small_discards),
-	ATTR_LIST(batched_trim_sections),
 	ATTR_LIST(ipu_policy),
 	ATTR_LIST(min_ipu_util),
 	ATTR_LIST(min_fsync_blocks),
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 4/6] f2fs: factor out discard command info into discard_cmd_control
  2017-01-12 22:44 [PATCH 1/6] f2fs: clean up flush/discard command namings Jaegeuk Kim
  2017-01-12 22:44 ` [PATCH 2/6] f2fs: reorganize stat information Jaegeuk Kim
  2017-01-12 22:44 ` [PATCH 3/6] f2fs: remove batched discard in f2fs_trim_fs Jaegeuk Kim
@ 2017-01-12 22:44 ` Jaegeuk Kim
  2017-02-22  9:40   ` Chao Yu
  2017-01-12 22:44 ` [PATCH 5/6] f2fs: add a kernel thread to issue discard commands asynchronously Jaegeuk Kim
                   ` (2 subsequent siblings)
  5 siblings, 1 reply; 25+ messages in thread
From: Jaegeuk Kim @ 2017-01-12 22:44 UTC (permalink / raw)
  To: linux-kernel, linux-fsdevel, linux-f2fs-devel; +Cc: Jaegeuk Kim

This patch adds discard_cmd_control with the existing discarding controls.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/debug.c   |  2 ++
 fs/f2fs/f2fs.h    | 16 ++++++++-----
 fs/f2fs/segment.c | 68 +++++++++++++++++++++++++++++++++++++++++++------------
 fs/f2fs/super.c   |  5 +++-
 4 files changed, 69 insertions(+), 22 deletions(-)

diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index cd338ca24941..f9f6b0aeba02 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -196,6 +196,8 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
 	/* build merge flush thread */
 	if (SM_I(sbi)->fcc_info)
 		si->cache_mem += sizeof(struct flush_cmd_control);
+	if (SM_I(sbi)->dcc_info)
+		si->cache_mem += sizeof(struct discard_cmd_control);
 
 	/* free nids */
 	si->cache_mem += (NM_I(sbi)->nid_cnt[FREE_NID_LIST] +
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index a2850bf2a487..e7b403cbd70f 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -185,6 +185,13 @@ struct discard_cmd {
 	struct bio *bio;		/* bio */
 };
 
+struct discard_cmd_control {
+	struct list_head discard_entry_list;	/* 4KB discard entry list */
+	int nr_discards;			/* # of discards in the list */
+	struct list_head discard_cmd_list;	/* discard cmd list */
+	int max_discards;			/* max. discards to be issued */
+};
+
 /* for the list of fsync inodes, used only during recovery */
 struct fsync_inode_entry {
 	struct list_head list;	/* list head */
@@ -628,12 +635,6 @@ struct f2fs_sm_info {
 	/* a threshold to reclaim prefree segments */
 	unsigned int rec_prefree_segments;
 
-	/* for small discard management */
-	struct list_head discard_entry_list;	/* 4KB discard entry list */
-	struct list_head discard_cmd_list;	/* discard cmd list */
-	int nr_discards;			/* # of discards in the list */
-	int max_discards;			/* max. discards to be issued */
-
 	struct list_head sit_entry_set;	/* sit entry set list */
 
 	unsigned int ipu_policy;	/* in-place-update policy */
@@ -642,6 +643,9 @@ struct f2fs_sm_info {
 
 	/* for flush command control */
 	struct flush_cmd_control *fcc_info;
+
+	/* for discard command control */
+	struct discard_cmd_control *dcc_info;
 };
 
 /*
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index e6f3c6db7616..e3bec31de961 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -631,7 +631,8 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
 static struct discard_cmd *__add_discard_cmd(struct f2fs_sb_info *sbi,
 			struct bio *bio, block_t lstart, block_t len)
 {
-	struct list_head *wait_list = &(SM_I(sbi)->discard_cmd_list);
+	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+	struct list_head *cmd_list = &(dcc->discard_cmd_list);
 	struct discard_cmd *dc;
 
 	dc = f2fs_kmem_cache_alloc(discard_cmd_slab, GFP_NOFS);
@@ -640,7 +641,7 @@ static struct discard_cmd *__add_discard_cmd(struct f2fs_sb_info *sbi,
 	dc->lstart = lstart;
 	dc->len = len;
 	init_completion(&dc->wait);
-	list_add_tail(&dc->list, wait_list);
+	list_add_tail(&dc->list, cmd_list);
 
 	return dc;
 }
@@ -648,7 +649,8 @@ static struct discard_cmd *__add_discard_cmd(struct f2fs_sb_info *sbi,
 /* This should be covered by global mutex, &sit_i->sentry_lock */
 void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
 {
-	struct list_head *wait_list = &(SM_I(sbi)->discard_cmd_list);
+	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+	struct list_head *wait_list = &(dcc->discard_cmd_list);
 	struct discard_cmd *dc, *tmp;
 
 	list_for_each_entry_safe(dc, tmp, wait_list, list) {
@@ -817,7 +819,7 @@ static void __add_discard_entry(struct f2fs_sb_info *sbi,
 		struct cp_control *cpc, struct seg_entry *se,
 		unsigned int start, unsigned int end)
 {
-	struct list_head *head = &SM_I(sbi)->discard_entry_list;
+	struct list_head *head = &SM_I(sbi)->dcc_info->discard_entry_list;
 	struct discard_entry *new, *last;
 
 	if (!list_empty(head)) {
@@ -836,7 +838,7 @@ static void __add_discard_entry(struct f2fs_sb_info *sbi,
 	new->len = end - start;
 	list_add_tail(&new->list, head);
 done:
-	SM_I(sbi)->nr_discards += end - start;
+	SM_I(sbi)->dcc_info->nr_discards += end - start;
 }
 
 static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc,
@@ -858,7 +860,8 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc,
 
 	if (!force) {
 		if (!test_opt(sbi, DISCARD) || !se->valid_blocks ||
-		    SM_I(sbi)->nr_discards >= SM_I(sbi)->max_discards)
+			SM_I(sbi)->dcc_info->nr_discards >=
+				SM_I(sbi)->dcc_info->max_discards)
 			return false;
 	}
 
@@ -867,7 +870,8 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc,
 		dmap[i] = force ? ~ckpt_map[i] & ~discard_map[i] :
 				(cur_map[i] ^ ckpt_map[i]) & ckpt_map[i];
 
-	while (force || SM_I(sbi)->nr_discards <= SM_I(sbi)->max_discards) {
+	while (force || SM_I(sbi)->dcc_info->nr_discards <=
+				SM_I(sbi)->dcc_info->max_discards) {
 		start = __find_rev_next_bit(dmap, max_blocks, end + 1);
 		if (start >= max_blocks)
 			break;
@@ -887,7 +891,7 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc,
 
 void release_discard_addrs(struct f2fs_sb_info *sbi)
 {
-	struct list_head *head = &(SM_I(sbi)->discard_entry_list);
+	struct list_head *head = &(SM_I(sbi)->dcc_info->discard_entry_list);
 	struct discard_entry *entry, *this;
 
 	/* drop caches */
@@ -913,7 +917,7 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
 
 void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 {
-	struct list_head *head = &(SM_I(sbi)->discard_entry_list);
+	struct list_head *head = &(SM_I(sbi)->dcc_info->discard_entry_list);
 	struct discard_entry *entry, *this;
 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
 	struct blk_plug plug;
@@ -973,13 +977,47 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 		cpc->trimmed += entry->len;
 skip:
 		list_del(&entry->list);
-		SM_I(sbi)->nr_discards -= entry->len;
+		SM_I(sbi)->dcc_info->nr_discards -= entry->len;
 		kmem_cache_free(discard_entry_slab, entry);
 	}
 
 	blk_finish_plug(&plug);
 }
 
+int create_discard_cmd_control(struct f2fs_sb_info *sbi)
+{
+	struct discard_cmd_control *dcc;
+	int err = 0;
+
+	if (SM_I(sbi)->dcc_info) {
+		dcc = SM_I(sbi)->dcc_info;
+		goto init_thread;
+	}
+
+	dcc = kzalloc(sizeof(struct discard_cmd_control), GFP_KERNEL);
+	if (!dcc)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&dcc->discard_entry_list);
+	INIT_LIST_HEAD(&dcc->discard_cmd_list);
+	dcc->nr_discards = 0;
+	dcc->max_discards = 0;
+
+	SM_I(sbi)->dcc_info = dcc;
+init_thread:
+	return err;
+}
+
+void destroy_discard_cmd_control(struct f2fs_sb_info *sbi, bool free)
+{
+	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+
+	if (free) {
+		kfree(dcc);
+		SM_I(sbi)->dcc_info = NULL;
+	}
+}
+
 static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno)
 {
 	struct sit_info *sit_i = SIT_I(sbi);
@@ -2693,11 +2731,6 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
 	sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
 	sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
 
-	INIT_LIST_HEAD(&sm_info->discard_entry_list);
-	INIT_LIST_HEAD(&sm_info->discard_cmd_list);
-	sm_info->nr_discards = 0;
-	sm_info->max_discards = 0;
-
 	INIT_LIST_HEAD(&sm_info->sit_entry_set);
 
 	if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) {
@@ -2706,6 +2739,10 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
 			return err;
 	}
 
+	err = create_discard_cmd_control(sbi);
+	if (err)
+		return err;
+
 	err = build_sit_info(sbi);
 	if (err)
 		return err;
@@ -2827,6 +2864,7 @@ void destroy_segment_manager(struct f2fs_sb_info *sbi)
 	if (!sm_info)
 		return;
 	destroy_flush_cmd_control(sbi, true);
+	destroy_discard_cmd_control(sbi, true);
 	destroy_dirty_segmap(sbi);
 	destroy_curseg(sbi);
 	destroy_free_segmap(sbi);
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 37d40d8aa9c4..2ddd2dc50b08 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -145,6 +145,7 @@ static match_table_t f2fs_tokens = {
 enum {
 	GC_THREAD,	/* struct f2fs_gc_thread */
 	SM_INFO,	/* struct f2fs_sm_info */
+	DCC_INFO,	/* struct discard_cmd_control */
 	NM_INFO,	/* struct f2fs_nm_info */
 	F2FS_SBI,	/* struct f2fs_sb_info */
 #ifdef CONFIG_F2FS_FAULT_INJECTION
@@ -168,6 +169,8 @@ static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type)
 		return (unsigned char *)sbi->gc_thread;
 	else if (struct_type == SM_INFO)
 		return (unsigned char *)SM_I(sbi);
+	else if (struct_type == DCC_INFO)
+		return (unsigned char *)SM_I(sbi)->dcc_info;
 	else if (struct_type == NM_INFO)
 		return (unsigned char *)NM_I(sbi);
 	else if (struct_type == F2FS_SBI)
@@ -283,7 +286,7 @@ F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_max_sleep_time, max_sleep_time);
 F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time);
 F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle);
 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments);
-F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, max_small_discards, max_discards);
+F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_small_discards, max_discards);
 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 5/6] f2fs: add a kernel thread to issue discard commands asynchronously
  2017-01-12 22:44 [PATCH 1/6] f2fs: clean up flush/discard command namings Jaegeuk Kim
                   ` (2 preceding siblings ...)
  2017-01-12 22:44 ` [PATCH 4/6] f2fs: factor out discard command info into discard_cmd_control Jaegeuk Kim
@ 2017-01-12 22:44 ` Jaegeuk Kim
  2017-01-13  8:01   ` Christoph Hellwig
  2017-02-22  9:40   ` Chao Yu
  2017-01-12 22:44 ` [PATCH 6/6] f2fs: show # of on-going flush and discard bios Jaegeuk Kim
  2017-02-22  9:40 ` [f2fs-dev] [PATCH 1/6] f2fs: clean up flush/discard command namings Chao Yu
  5 siblings, 2 replies; 25+ messages in thread
From: Jaegeuk Kim @ 2017-01-12 22:44 UTC (permalink / raw)
  To: linux-kernel, linux-fsdevel, linux-f2fs-devel; +Cc: Jaegeuk Kim

This patch adds a kernel thread to issue discard commands.
It proposes three states, D_PREP, D_SUBMIT, and D_DONE to identify current
bio status.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/f2fs.h    |  11 +++++
 fs/f2fs/segment.c | 128 +++++++++++++++++++++++++++++++++++++++++-------------
 2 files changed, 108 insertions(+), 31 deletions(-)

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index e7b403cbd70f..ef5e3709c161 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -129,6 +129,7 @@ enum {
 };
 
 #define MAX_DISCARD_BLOCKS(sbi) (1 << (sbi)->log_blocks_per_seg)
+#define DISCARD_ISSUE_RATE	8
 #define DEF_CP_INTERVAL			60	/* 60 secs */
 #define DEF_IDLE_INTERVAL		5	/* 5 secs */
 
@@ -177,18 +178,28 @@ struct discard_entry {
 	int len;		/* # of consecutive blocks of the discard */
 };
 
+enum {
+	D_PREP,
+	D_SUBMIT,
+	D_DONE,
+};
+
 struct discard_cmd {
 	struct list_head list;		/* command list */
 	struct completion wait;		/* compleation */
 	block_t lstart;			/* logical start address */
 	block_t len;			/* length */
 	struct bio *bio;		/* bio */
+	int state;			/* state */
 };
 
 struct discard_cmd_control {
+	struct task_struct *f2fs_issue_discard;	/* discard thread */
 	struct list_head discard_entry_list;	/* 4KB discard entry list */
 	int nr_discards;			/* # of discards in the list */
 	struct list_head discard_cmd_list;	/* discard cmd list */
+	wait_queue_head_t discard_wait_queue;	/* waiting queue for wake-up */
+	struct mutex cmd_lock;
 	int max_discards;			/* max. discards to be issued */
 };
 
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index e3bec31de961..74364006bfa6 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -628,7 +628,7 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
 	mutex_unlock(&dirty_i->seglist_lock);
 }
 
-static struct discard_cmd *__add_discard_cmd(struct f2fs_sb_info *sbi,
+static void __add_discard_cmd(struct f2fs_sb_info *sbi,
 			struct bio *bio, block_t lstart, block_t len)
 {
 	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
@@ -638,12 +638,30 @@ static struct discard_cmd *__add_discard_cmd(struct f2fs_sb_info *sbi,
 	dc = f2fs_kmem_cache_alloc(discard_cmd_slab, GFP_NOFS);
 	INIT_LIST_HEAD(&dc->list);
 	dc->bio = bio;
+	bio->bi_private = dc;
 	dc->lstart = lstart;
 	dc->len = len;
+	dc->state = D_PREP;
 	init_completion(&dc->wait);
+
+	mutex_lock(&dcc->cmd_lock);
 	list_add_tail(&dc->list, cmd_list);
+	mutex_unlock(&dcc->cmd_lock);
+}
+
+static void __remove_discard_cmd(struct f2fs_sb_info *sbi, struct discard_cmd *dc)
+{
+	int err = dc->bio->bi_error;
 
-	return dc;
+	if (err == -EOPNOTSUPP)
+		err = 0;
+
+	if (err)
+		f2fs_msg(sbi->sb, KERN_INFO,
+				"Issue discard failed, ret: %d", err);
+	bio_put(dc->bio);
+	list_del(&dc->list);
+	kmem_cache_free(discard_cmd_slab, dc);
 }
 
 /* This should be covered by global mutex, &sit_i->sentry_lock */
@@ -653,31 +671,28 @@ void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
 	struct list_head *wait_list = &(dcc->discard_cmd_list);
 	struct discard_cmd *dc, *tmp;
 
+	mutex_lock(&dcc->cmd_lock);
 	list_for_each_entry_safe(dc, tmp, wait_list, list) {
-		struct bio *bio = dc->bio;
-		int err;
 
-		if (!completion_done(&dc->wait)) {
-			if ((dc->lstart <= blkaddr &&
-					blkaddr < dc->lstart + dc->len) ||
-					blkaddr == NULL_ADDR)
+		if (blkaddr == NULL_ADDR) {
+			if (dc->state == D_PREP) {
+				dc->state = D_SUBMIT;
+				submit_bio(dc->bio);
+			}
+			wait_for_completion_io(&dc->wait);
+
+			__remove_discard_cmd(sbi, dc);
+			continue;
+		}
+
+		if (dc->lstart <= blkaddr && blkaddr < dc->lstart + dc->len) {
+			if (dc->state == D_SUBMIT)
 				wait_for_completion_io(&dc->wait);
 			else
-				continue;
+				__remove_discard_cmd(sbi, dc);
 		}
-
-		err = bio->bi_error;
-		if (err == -EOPNOTSUPP)
-			err = 0;
-
-		if (err)
-			f2fs_msg(sbi->sb, KERN_INFO,
-				"Issue discard failed, ret: %d", err);
-
-		bio_put(bio);
-		list_del(&dc->list);
-		kmem_cache_free(discard_cmd_slab, dc);
 	}
+	mutex_unlock(&dcc->cmd_lock);
 }
 
 static void f2fs_submit_discard_endio(struct bio *bio)
@@ -685,8 +700,48 @@ static void f2fs_submit_discard_endio(struct bio *bio)
 	struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private;
 
 	complete(&dc->wait);
+	dc->state = D_DONE;
 }
 
+static int issue_discard_thread(void *data)
+{
+	struct f2fs_sb_info *sbi = data;
+	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+	wait_queue_head_t *q = &dcc->discard_wait_queue;
+	struct list_head *cmd_list = &dcc->discard_cmd_list;
+	struct discard_cmd *dc, *tmp;
+	struct blk_plug plug;
+	int iter = 0;
+repeat:
+	if (kthread_should_stop())
+		return 0;
+
+	blk_start_plug(&plug);
+
+	mutex_lock(&dcc->cmd_lock);
+	list_for_each_entry_safe(dc, tmp, cmd_list, list) {
+		if (dc->state == D_PREP) {
+			dc->state = D_SUBMIT;
+			submit_bio(dc->bio);
+			if (iter++ > DISCARD_ISSUE_RATE)
+				break;
+		} else if (dc->state == D_DONE) {
+			__remove_discard_cmd(sbi, dc);
+		}
+	}
+	mutex_unlock(&dcc->cmd_lock);
+
+	blk_finish_plug(&plug);
+
+	iter = 0;
+	congestion_wait(BLK_RW_SYNC, HZ/50);
+
+	wait_event_interruptible(*q,
+		kthread_should_stop() || !list_empty(&dcc->discard_cmd_list));
+	goto repeat;
+}
+
+
 /* this function is copied from blkdev_issue_discard from block/blk-lib.c */
 static int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi,
 		struct block_device *bdev, block_t blkstart, block_t blklen)
@@ -707,13 +762,11 @@ static int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi,
 				SECTOR_FROM_BLOCK(blklen),
 				GFP_NOFS, 0, &bio);
 	if (!err && bio) {
-		struct discard_cmd *dc = __add_discard_cmd(sbi, bio,
-						lblkstart, blklen);
-
-		bio->bi_private = dc;
 		bio->bi_end_io = f2fs_submit_discard_endio;
 		bio->bi_opf |= REQ_SYNC;
-		submit_bio(bio);
+
+		__add_discard_cmd(sbi, bio, lblkstart, blklen);
+		wake_up(&SM_I(sbi)->dcc_info->discard_wait_queue);
 	}
 	return err;
 }
@@ -920,14 +973,11 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 	struct list_head *head = &(SM_I(sbi)->dcc_info->discard_entry_list);
 	struct discard_entry *entry, *this;
 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
-	struct blk_plug plug;
 	unsigned long *prefree_map = dirty_i->dirty_segmap[PRE];
 	unsigned int start = 0, end = -1;
 	unsigned int secno, start_segno;
 	bool force = (cpc->reason == CP_DISCARD);
 
-	blk_start_plug(&plug);
-
 	mutex_lock(&dirty_i->seglist_lock);
 
 	while (1) {
@@ -980,12 +1030,11 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 		SM_I(sbi)->dcc_info->nr_discards -= entry->len;
 		kmem_cache_free(discard_entry_slab, entry);
 	}
-
-	blk_finish_plug(&plug);
 }
 
 int create_discard_cmd_control(struct f2fs_sb_info *sbi)
 {
+	dev_t dev = sbi->sb->s_bdev->bd_dev;
 	struct discard_cmd_control *dcc;
 	int err = 0;
 
@@ -1000,11 +1049,22 @@ int create_discard_cmd_control(struct f2fs_sb_info *sbi)
 
 	INIT_LIST_HEAD(&dcc->discard_entry_list);
 	INIT_LIST_HEAD(&dcc->discard_cmd_list);
+	mutex_init(&dcc->cmd_lock);
 	dcc->nr_discards = 0;
 	dcc->max_discards = 0;
 
+	init_waitqueue_head(&dcc->discard_wait_queue);
 	SM_I(sbi)->dcc_info = dcc;
 init_thread:
+	dcc->f2fs_issue_discard = kthread_run(issue_discard_thread, sbi,
+				"f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev));
+	if (IS_ERR(dcc->f2fs_issue_discard)) {
+		err = PTR_ERR(dcc->f2fs_issue_discard);
+		kfree(dcc);
+		SM_I(sbi)->dcc_info = NULL;
+		return err;
+	}
+
 	return err;
 }
 
@@ -1012,6 +1072,12 @@ void destroy_discard_cmd_control(struct f2fs_sb_info *sbi, bool free)
 {
 	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
 
+	if (dcc && dcc->f2fs_issue_discard) {
+		struct task_struct *discard_thread = dcc->f2fs_issue_discard;
+
+		dcc->f2fs_issue_discard = NULL;
+		kthread_stop(discard_thread);
+	}
 	if (free) {
 		kfree(dcc);
 		SM_I(sbi)->dcc_info = NULL;
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 6/6] f2fs: show # of on-going flush and discard bios
  2017-01-12 22:44 [PATCH 1/6] f2fs: clean up flush/discard command namings Jaegeuk Kim
                   ` (3 preceding siblings ...)
  2017-01-12 22:44 ` [PATCH 5/6] f2fs: add a kernel thread to issue discard commands asynchronously Jaegeuk Kim
@ 2017-01-12 22:44 ` Jaegeuk Kim
  2017-01-14  2:26   ` [f2fs-dev] " heyunlei
                     ` (3 more replies)
  2017-02-22  9:40 ` [f2fs-dev] [PATCH 1/6] f2fs: clean up flush/discard command namings Chao Yu
  5 siblings, 4 replies; 25+ messages in thread
From: Jaegeuk Kim @ 2017-01-12 22:44 UTC (permalink / raw)
  To: linux-kernel, linux-fsdevel, linux-f2fs-devel; +Cc: Jaegeuk Kim

This patch adds stat information for flush and discard commands.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/debug.c   | 7 +++++--
 fs/f2fs/f2fs.h    | 3 ++-
 fs/f2fs/segment.c | 4 ++++
 3 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index f9f6b0aeba02..e67cab2fafac 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -54,6 +54,8 @@ static void update_general_status(struct f2fs_sb_info *sbi)
 	si->max_aw_cnt = atomic_read(&sbi->max_aw_cnt);
 	si->nr_wb_cp_data = get_pages(sbi, F2FS_WB_CP_DATA);
 	si->nr_wb_data = get_pages(sbi, F2FS_WB_DATA);
+	si->nr_flush = atomic_read(&SM_I(sbi)->fcc_info->submit_flush);
+	si->nr_discard = atomic_read(&SM_I(sbi)->dcc_info->submit_discard);
 	si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg;
 	si->rsvd_segs = reserved_segments(sbi);
 	si->overp_segs = overprovision_segments(sbi);
@@ -318,8 +320,9 @@ static int stat_show(struct seq_file *s, void *v)
 		seq_printf(s, "  - Inner Struct Count: tree: %d(%d), node: %d\n",
 				si->ext_tree, si->zombie_tree, si->ext_node);
 		seq_puts(s, "\nBalancing F2FS Async:\n");
-		seq_printf(s, "  - IO (CP: %4d, Data: %4d)\n",
-			   si->nr_wb_cp_data, si->nr_wb_data);
+		seq_printf(s, "  - IO (CP: %4d, Data: %4d, Flush: %4d, Discard: %4d)\n",
+			   si->nr_wb_cp_data, si->nr_wb_data,
+			   si->nr_flush, si->nr_discard);
 		seq_printf(s, "  - inmem: %4d, atomic IO: %4d (Max. %4d)\n",
 			   si->inmem_pages, si->aw_cnt, si->max_aw_cnt);
 		seq_printf(s, "  - nodes: %4d in %4d\n",
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index ef5e3709c161..d51bc18e7292 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -201,6 +201,7 @@ struct discard_cmd_control {
 	wait_queue_head_t discard_wait_queue;	/* waiting queue for wake-up */
 	struct mutex cmd_lock;
 	int max_discards;			/* max. discards to be issued */
+	atomic_t submit_discard;		/* # of issued discard */
 };
 
 /* for the list of fsync inodes, used only during recovery */
@@ -2254,7 +2255,7 @@ struct f2fs_stat_info {
 	unsigned int ndirty_dirs, ndirty_files, ndirty_all;
 	int nats, dirty_nats, sits, dirty_sits, free_nids, alloc_nids;
 	int total_count, utilization;
-	int bg_gc, nr_wb_cp_data, nr_wb_data;
+	int bg_gc, nr_wb_cp_data, nr_wb_data, nr_flush, nr_discard;
 	int inline_xattr, inline_inode, inline_dir, orphans;
 	int aw_cnt, max_aw_cnt;
 	unsigned int valid_count, valid_node_count, valid_inode_count, discard_blks;
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 74364006bfa6..5a2d380182d9 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -653,6 +653,8 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi, struct discard_cmd *d
 {
 	int err = dc->bio->bi_error;
 
+	atomic_dec(&(SM_I(sbi)->dcc_info->submit_discard));
+
 	if (err == -EOPNOTSUPP)
 		err = 0;
 
@@ -678,6 +680,7 @@ void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
 			if (dc->state == D_PREP) {
 				dc->state = D_SUBMIT;
 				submit_bio(dc->bio);
+				atomic_inc(&dcc->submit_discard);
 			}
 			wait_for_completion_io(&dc->wait);
 
@@ -723,6 +726,7 @@ static int issue_discard_thread(void *data)
 		if (dc->state == D_PREP) {
 			dc->state = D_SUBMIT;
 			submit_bio(dc->bio);
+			atomic_inc(&dcc->submit_discard);
 			if (iter++ > DISCARD_ISSUE_RATE)
 				break;
 		} else if (dc->state == D_DONE) {
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* Re: [PATCH 5/6] f2fs: add a kernel thread to issue discard commands asynchronously
  2017-01-12 22:44 ` [PATCH 5/6] f2fs: add a kernel thread to issue discard commands asynchronously Jaegeuk Kim
@ 2017-01-13  8:01   ` Christoph Hellwig
  2017-01-13 19:12     ` Jaegeuk Kim
  2017-02-22  9:40   ` Chao Yu
  1 sibling, 1 reply; 25+ messages in thread
From: Christoph Hellwig @ 2017-01-13  8:01 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: linux-kernel, linux-fsdevel, linux-f2fs-devel

On Thu, Jan 12, 2017 at 02:44:06PM -0800, Jaegeuk Kim wrote:
> This patch adds a kernel thread to issue discard commands.
> It proposes three states, D_PREP, D_SUBMIT, and D_DONE to identify current
> bio status.

Why?  Instead of creating the complexity of a thread you should look
into issuing the discard bios asynchronously, which should help to
simplify a lot of the discard logic in f2fs.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 5/6] f2fs: add a kernel thread to issue discard commands asynchronously
  2017-01-13  8:01   ` Christoph Hellwig
@ 2017-01-13 19:12     ` Jaegeuk Kim
  2017-01-16 17:32       ` Christoph Hellwig
  0 siblings, 1 reply; 25+ messages in thread
From: Jaegeuk Kim @ 2017-01-13 19:12 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-kernel, linux-fsdevel, linux-f2fs-devel

On 01/13, Christoph Hellwig wrote:
> On Thu, Jan 12, 2017 at 02:44:06PM -0800, Jaegeuk Kim wrote:
> > This patch adds a kernel thread to issue discard commands.
> > It proposes three states, D_PREP, D_SUBMIT, and D_DONE to identify current
> > bio status.
> 
> Why?  Instead of creating the complexity of a thread you should look
> into issuing the discard bios asynchronously, which should help to
> simplify a lot of the discard logic in f2fs.

Previously, I've done to issue discard bios asynchronously. But the problem that
I've got is that was not enough. When testing nvme SSD with noop IO scheduler,
submit_bio() was blocked at every 8 async discard bios, resulting in very slow
checkpoint process which blocks most of other FS operations.

Thanks,

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [f2fs-dev] [PATCH 6/6] f2fs: show # of on-going flush and discard bios
  2017-01-12 22:44 ` [PATCH 6/6] f2fs: show # of on-going flush and discard bios Jaegeuk Kim
@ 2017-01-14  2:26   ` heyunlei
  2017-01-14  7:27   ` heyunlei
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 25+ messages in thread
From: heyunlei @ 2017-01-14  2:26 UTC (permalink / raw)
  To: Jaegeuk Kim, linux-kernel, linux-fsdevel, linux-f2fs-devel

Hi Jaegeuk Kim,

On 2017/1/13 6:44, Jaegeuk Kim wrote:
> This patch adds stat information for flush and discard commands.
>
> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> ---
>  fs/f2fs/debug.c   | 7 +++++--
>  fs/f2fs/f2fs.h    | 3 ++-
>  fs/f2fs/segment.c | 4 ++++
>  3 files changed, 11 insertions(+), 3 deletions(-)
>
> diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
> index f9f6b0aeba02..e67cab2fafac 100644
> --- a/fs/f2fs/debug.c
> +++ b/fs/f2fs/debug.c
> @@ -54,6 +54,8 @@ static void update_general_status(struct f2fs_sb_info *sbi)
>  	si->max_aw_cnt = atomic_read(&sbi->max_aw_cnt);
>  	si->nr_wb_cp_data = get_pages(sbi, F2FS_WB_CP_DATA);
>  	si->nr_wb_data = get_pages(sbi, F2FS_WB_DATA);
> +	si->nr_flush = atomic_read(&SM_I(sbi)->fcc_info->submit_flush);
> +	si->nr_discard = atomic_read(&SM_I(sbi)->dcc_info->submit_discard);
>  	si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg;
>  	si->rsvd_segs = reserved_segments(sbi);
>  	si->overp_segs = overprovision_segments(sbi);
> @@ -318,8 +320,9 @@ static int stat_show(struct seq_file *s, void *v)
>  		seq_printf(s, "  - Inner Struct Count: tree: %d(%d), node: %d\n",
>  				si->ext_tree, si->zombie_tree, si->ext_node);
>  		seq_puts(s, "\nBalancing F2FS Async:\n");
> -		seq_printf(s, "  - IO (CP: %4d, Data: %4d)\n",
> -			   si->nr_wb_cp_data, si->nr_wb_data);
> +		seq_printf(s, "  - IO (CP: %4d, Data: %4d, Flush: %4d, Discard: %4d)\n",
> +			   si->nr_wb_cp_data, si->nr_wb_data,
> +			   si->nr_flush, si->nr_discard);
>  		seq_printf(s, "  - inmem: %4d, atomic IO: %4d (Max. %4d)\n",
>  			   si->inmem_pages, si->aw_cnt, si->max_aw_cnt);
>  		seq_printf(s, "  - nodes: %4d in %4d\n",
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index ef5e3709c161..d51bc18e7292 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -201,6 +201,7 @@ struct discard_cmd_control {
>  	wait_queue_head_t discard_wait_queue;	/* waiting queue for wake-up */
>  	struct mutex cmd_lock;
>  	int max_discards;			/* max. discards to be issued */
> +	atomic_t submit_discard;		/* # of issued discard */
>  };

Here, we need to initialize submit_discard.

Thanks.
>
>  /* for the list of fsync inodes, used only during recovery */
> @@ -2254,7 +2255,7 @@ struct f2fs_stat_info {
>  	unsigned int ndirty_dirs, ndirty_files, ndirty_all;
>  	int nats, dirty_nats, sits, dirty_sits, free_nids, alloc_nids;
>  	int total_count, utilization;
> -	int bg_gc, nr_wb_cp_data, nr_wb_data;
> +	int bg_gc, nr_wb_cp_data, nr_wb_data, nr_flush, nr_discard;
>  	int inline_xattr, inline_inode, inline_dir, orphans;
>  	int aw_cnt, max_aw_cnt;
>  	unsigned int valid_count, valid_node_count, valid_inode_count, discard_blks;
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index 74364006bfa6..5a2d380182d9 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -653,6 +653,8 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi, struct discard_cmd *d
>  {
>  	int err = dc->bio->bi_error;
>
> +	atomic_dec(&(SM_I(sbi)->dcc_info->submit_discard));
> +
>  	if (err == -EOPNOTSUPP)
>  		err = 0;
>
> @@ -678,6 +680,7 @@ void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
>  			if (dc->state == D_PREP) {
>  				dc->state = D_SUBMIT;
>  				submit_bio(dc->bio);
> +				atomic_inc(&dcc->submit_discard);
>  			}
>  			wait_for_completion_io(&dc->wait);
>
> @@ -723,6 +726,7 @@ static int issue_discard_thread(void *data)
>  		if (dc->state == D_PREP) {
>  			dc->state = D_SUBMIT;
>  			submit_bio(dc->bio);
> +			atomic_inc(&dcc->submit_discard);
>  			if (iter++ > DISCARD_ISSUE_RATE)
>  				break;
>  		} else if (dc->state == D_DONE) {
>

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [f2fs-dev] [PATCH 6/6] f2fs: show # of on-going flush and discard bios
  2017-01-12 22:44 ` [PATCH 6/6] f2fs: show # of on-going flush and discard bios Jaegeuk Kim
  2017-01-14  2:26   ` [f2fs-dev] " heyunlei
@ 2017-01-14  7:27   ` heyunlei
  2017-01-14 23:32   ` [PATCH 6/6 v2] " Jaegeuk Kim
  2017-02-22  9:40   ` [f2fs-dev] [PATCH 6/6] " Chao Yu
  3 siblings, 0 replies; 25+ messages in thread
From: heyunlei @ 2017-01-14  7:27 UTC (permalink / raw)
  To: Jaegeuk Kim, linux-kernel, linux-fsdevel, linux-f2fs-devel

Hi Jaegeuk,

On 2017/1/13 6:44, Jaegeuk Kim wrote:
> This patch adds stat information for flush and discard commands.
>
> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> ---
>  fs/f2fs/debug.c   | 7 +++++--
>  fs/f2fs/f2fs.h    | 3 ++-
>  fs/f2fs/segment.c | 4 ++++
>  3 files changed, 11 insertions(+), 3 deletions(-)
>
> diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
> index f9f6b0aeba02..e67cab2fafac 100644
> --- a/fs/f2fs/debug.c
> +++ b/fs/f2fs/debug.c
> @@ -54,6 +54,8 @@ static void update_general_status(struct f2fs_sb_info *sbi)
>  	si->max_aw_cnt = atomic_read(&sbi->max_aw_cnt);
>  	si->nr_wb_cp_data = get_pages(sbi, F2FS_WB_CP_DATA);
>  	si->nr_wb_data = get_pages(sbi, F2FS_WB_DATA);
> +	si->nr_flush = atomic_read(&SM_I(sbi)->fcc_info->submit_flush);
> +	si->nr_discard = atomic_read(&SM_I(sbi)->dcc_info->submit_discard);
>  	si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg;
>  	si->rsvd_segs = reserved_segments(sbi);
>  	si->overp_segs = overprovision_segments(sbi);
> @@ -318,8 +320,9 @@ static int stat_show(struct seq_file *s, void *v)
>  		seq_printf(s, "  - Inner Struct Count: tree: %d(%d), node: %d\n",
>  				si->ext_tree, si->zombie_tree, si->ext_node);
>  		seq_puts(s, "\nBalancing F2FS Async:\n");
> -		seq_printf(s, "  - IO (CP: %4d, Data: %4d)\n",
> -			   si->nr_wb_cp_data, si->nr_wb_data);
> +		seq_printf(s, "  - IO (CP: %4d, Data: %4d, Flush: %4d, Discard: %4d)\n",
> +			   si->nr_wb_cp_data, si->nr_wb_data,
> +			   si->nr_flush, si->nr_discard);
>  		seq_printf(s, "  - inmem: %4d, atomic IO: %4d (Max. %4d)\n",
>  			   si->inmem_pages, si->aw_cnt, si->max_aw_cnt);
>  		seq_printf(s, "  - nodes: %4d in %4d\n",
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index ef5e3709c161..d51bc18e7292 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -201,6 +201,7 @@ struct discard_cmd_control {
>  	wait_queue_head_t discard_wait_queue;	/* waiting queue for wake-up */
>  	struct mutex cmd_lock;
>  	int max_discards;			/* max. discards to be issued */
> +	atomic_t submit_discard;		/* # of issued discard */
>  };
>
>  /* for the list of fsync inodes, used only during recovery */
> @@ -2254,7 +2255,7 @@ struct f2fs_stat_info {
>  	unsigned int ndirty_dirs, ndirty_files, ndirty_all;
>  	int nats, dirty_nats, sits, dirty_sits, free_nids, alloc_nids;
>  	int total_count, utilization;
> -	int bg_gc, nr_wb_cp_data, nr_wb_data;
> +	int bg_gc, nr_wb_cp_data, nr_wb_data, nr_flush, nr_discard;
>  	int inline_xattr, inline_inode, inline_dir, orphans;
>  	int aw_cnt, max_aw_cnt;
>  	unsigned int valid_count, valid_node_count, valid_inode_count, discard_blks;
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index 74364006bfa6..5a2d380182d9 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -653,6 +653,8 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi, struct discard_cmd *d
>  {
>  	int err = dc->bio->bi_error;
>
> +	atomic_dec(&(SM_I(sbi)->dcc_info->submit_discard));
Here submit_discard decrease will include discard command without bio submit.

Thanks.
> +
>  	if (err == -EOPNOTSUPP)
>  		err = 0;
>
> @@ -678,6 +680,7 @@ void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
>  			if (dc->state == D_PREP) {
>  				dc->state = D_SUBMIT;
>  				submit_bio(dc->bio);
> +				atomic_inc(&dcc->submit_discard);
>  			}
>  			wait_for_completion_io(&dc->wait);
>
> @@ -723,6 +726,7 @@ static int issue_discard_thread(void *data)
>  		if (dc->state == D_PREP) {
>  			dc->state = D_SUBMIT;
>  			submit_bio(dc->bio);
> +			atomic_inc(&dcc->submit_discard);
>  			if (iter++ > DISCARD_ISSUE_RATE)
>  				break;
>  		} else if (dc->state == D_DONE) {
>

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 6/6 v2] f2fs: show # of on-going flush and discard bios
  2017-01-12 22:44 ` [PATCH 6/6] f2fs: show # of on-going flush and discard bios Jaegeuk Kim
  2017-01-14  2:26   ` [f2fs-dev] " heyunlei
  2017-01-14  7:27   ` heyunlei
@ 2017-01-14 23:32   ` Jaegeuk Kim
  2017-02-22  9:40   ` [f2fs-dev] [PATCH 6/6] " Chao Yu
  3 siblings, 0 replies; 25+ messages in thread
From: Jaegeuk Kim @ 2017-01-14 23:32 UTC (permalink / raw)
  To: linux-kernel, linux-fsdevel, linux-f2fs-devel

Change log from v1:
 - Fix missing path pointed by Yunlei

>From 5d86527e4c70aeb8482dac24059624da720a0807 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Wed, 11 Jan 2017 10:20:04 -0800
Subject: [PATCH] f2fs: show # of on-going flush and discard bios

This patch adds stat information for flush and discard commands.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/debug.c   | 11 +++++++++--
 fs/f2fs/f2fs.h    |  3 ++-
 fs/f2fs/segment.c |  6 ++++++
 3 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index f9f6b0aeba02..0ca977a94c13 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -54,6 +54,12 @@ static void update_general_status(struct f2fs_sb_info *sbi)
 	si->max_aw_cnt = atomic_read(&sbi->max_aw_cnt);
 	si->nr_wb_cp_data = get_pages(sbi, F2FS_WB_CP_DATA);
 	si->nr_wb_data = get_pages(sbi, F2FS_WB_DATA);
+	if (SM_I(sbi) && SM_I(sbi)->fcc_info)
+		si->nr_flush =
+			atomic_read(&SM_I(sbi)->fcc_info->submit_flush);
+	if (SM_I(sbi) && SM_I(sbi)->dcc_info)
+		si->nr_discard =
+			atomic_read(&SM_I(sbi)->dcc_info->submit_discard);
 	si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg;
 	si->rsvd_segs = reserved_segments(sbi);
 	si->overp_segs = overprovision_segments(sbi);
@@ -318,8 +324,9 @@ static int stat_show(struct seq_file *s, void *v)
 		seq_printf(s, "  - Inner Struct Count: tree: %d(%d), node: %d\n",
 				si->ext_tree, si->zombie_tree, si->ext_node);
 		seq_puts(s, "\nBalancing F2FS Async:\n");
-		seq_printf(s, "  - IO (CP: %4d, Data: %4d)\n",
-			   si->nr_wb_cp_data, si->nr_wb_data);
+		seq_printf(s, "  - IO (CP: %4d, Data: %4d, Flush: %4d, Discard: %4d)\n",
+			   si->nr_wb_cp_data, si->nr_wb_data,
+			   si->nr_flush, si->nr_discard);
 		seq_printf(s, "  - inmem: %4d, atomic IO: %4d (Max. %4d)\n",
 			   si->inmem_pages, si->aw_cnt, si->max_aw_cnt);
 		seq_printf(s, "  - nodes: %4d in %4d\n",
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index ef5e3709c161..d51bc18e7292 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -201,6 +201,7 @@ struct discard_cmd_control {
 	wait_queue_head_t discard_wait_queue;	/* waiting queue for wake-up */
 	struct mutex cmd_lock;
 	int max_discards;			/* max. discards to be issued */
+	atomic_t submit_discard;		/* # of issued discard */
 };
 
 /* for the list of fsync inodes, used only during recovery */
@@ -2254,7 +2255,7 @@ struct f2fs_stat_info {
 	unsigned int ndirty_dirs, ndirty_files, ndirty_all;
 	int nats, dirty_nats, sits, dirty_sits, free_nids, alloc_nids;
 	int total_count, utilization;
-	int bg_gc, nr_wb_cp_data, nr_wb_data;
+	int bg_gc, nr_wb_cp_data, nr_wb_data, nr_flush, nr_discard;
 	int inline_xattr, inline_inode, inline_dir, orphans;
 	int aw_cnt, max_aw_cnt;
 	unsigned int valid_count, valid_node_count, valid_inode_count, discard_blks;
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 74364006bfa6..6b3d847bcb96 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -653,6 +653,9 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi, struct discard_cmd *d
 {
 	int err = dc->bio->bi_error;
 
+	if (dc->state == D_DONE)
+		atomic_dec(&(SM_I(sbi)->dcc_info->submit_discard));
+
 	if (err == -EOPNOTSUPP)
 		err = 0;
 
@@ -678,6 +681,7 @@ void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
 			if (dc->state == D_PREP) {
 				dc->state = D_SUBMIT;
 				submit_bio(dc->bio);
+				atomic_inc(&dcc->submit_discard);
 			}
 			wait_for_completion_io(&dc->wait);
 
@@ -723,6 +727,7 @@ static int issue_discard_thread(void *data)
 		if (dc->state == D_PREP) {
 			dc->state = D_SUBMIT;
 			submit_bio(dc->bio);
+			atomic_inc(&dcc->submit_discard);
 			if (iter++ > DISCARD_ISSUE_RATE)
 				break;
 		} else if (dc->state == D_DONE) {
@@ -1050,6 +1055,7 @@ int create_discard_cmd_control(struct f2fs_sb_info *sbi)
 	INIT_LIST_HEAD(&dcc->discard_entry_list);
 	INIT_LIST_HEAD(&dcc->discard_cmd_list);
 	mutex_init(&dcc->cmd_lock);
+	atomic_set(&dcc->submit_discard, 0);
 	dcc->nr_discards = 0;
 	dcc->max_discards = 0;
 
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* Re: [PATCH 5/6] f2fs: add a kernel thread to issue discard commands asynchronously
  2017-01-13 19:12     ` Jaegeuk Kim
@ 2017-01-16 17:32       ` Christoph Hellwig
  2017-02-05  8:59         ` Christoph Hellwig
  0 siblings, 1 reply; 25+ messages in thread
From: Christoph Hellwig @ 2017-01-16 17:32 UTC (permalink / raw)
  To: Jaegeuk Kim
  Cc: Christoph Hellwig, linux-kernel, linux-fsdevel, linux-f2fs-devel, axboe

On Fri, Jan 13, 2017 at 11:12:11AM -0800, Jaegeuk Kim wrote:
> Previously, I've done to issue discard bios asynchronously. But the problem that
> I've got is that was not enough. When testing nvme SSD with noop IO scheduler,
> submit_bio() was blocked at every 8 async discard bios, resulting in very slow
> checkpoint process which blocks most of other FS operations.

Where does it block?  Are you running out of request?  What driver is
this on top of?

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 5/6] f2fs: add a kernel thread to issue discard commands asynchronously
  2017-01-16 17:32       ` Christoph Hellwig
@ 2017-02-05  8:59         ` Christoph Hellwig
  2017-02-07  3:44           ` Jaegeuk Kim
  0 siblings, 1 reply; 25+ messages in thread
From: Christoph Hellwig @ 2017-02-05  8:59 UTC (permalink / raw)
  To: Jaegeuk Kim
  Cc: Christoph Hellwig, linux-kernel, linux-fsdevel, linux-f2fs-devel, axboe

On Mon, Jan 16, 2017 at 09:32:20AM -0800, Christoph Hellwig wrote:
> On Fri, Jan 13, 2017 at 11:12:11AM -0800, Jaegeuk Kim wrote:
> > Previously, I've done to issue discard bios asynchronously. But the problem that
> > I've got is that was not enough. When testing nvme SSD with noop IO scheduler,
> > submit_bio() was blocked at every 8 async discard bios, resulting in very slow
> > checkpoint process which blocks most of other FS operations.
> 
> Where does it block?  Are you running out of request?  What driver is
> this on top of?

Ping?  I'm currently spending a lot of effort on fs and block dіscard
code, and I'd like to make sure we get common infrastructure instead
of local hacks.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 5/6] f2fs: add a kernel thread to issue discard commands asynchronously
  2017-02-05  8:59         ` Christoph Hellwig
@ 2017-02-07  3:44           ` Jaegeuk Kim
  2017-02-08 16:02             ` Christoph Hellwig
  0 siblings, 1 reply; 25+ messages in thread
From: Jaegeuk Kim @ 2017-02-07  3:44 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-kernel, linux-fsdevel, linux-f2fs-devel, axboe

On 02/05, Christoph Hellwig wrote:
> On Mon, Jan 16, 2017 at 09:32:20AM -0800, Christoph Hellwig wrote:
> > On Fri, Jan 13, 2017 at 11:12:11AM -0800, Jaegeuk Kim wrote:
> > > Previously, I've done to issue discard bios asynchronously. But the problem that
> > > I've got is that was not enough. When testing nvme SSD with noop IO scheduler,
> > > submit_bio() was blocked at every 8 async discard bios, resulting in very slow
> > > checkpoint process which blocks most of other FS operations.
> > 
> > Where does it block?  Are you running out of request?  What driver is
> > this on top of?
> 
> Ping?  I'm currently spending a lot of effort on fs and block dіscard
> code, and I'd like to make sure we get common infrastructure instead
> of local hacks.

Sorry for the late response due to the travel.

When doing fstrim with a fresh f2fs image fomatted on Intel NVMe SSD whose
model name is SSDPE2MW012T4, I've got the following trace.

...
fstrim-12620 [000] .... 334572.907534: f2fs_issue_discard: dev = (259,1), blkstart = 0x902900, blklen = 0x400
fstrim-12620 [000] .... 334572.907535: block_bio_remap: 259,0 D 75583488 + 8192 <- (259,1) 75581440
fstrim-12620 [000] .... 334572.907535: block_bio_queue: 259,0 D 75583488 + 8192 [fstrim]
fstrim-12620 [000] .... 334572.907535: block_getrq: 259,0 D 75583488 + 8192 [fstrim]
fstrim-12620 [000] .... 334572.907536: block_unplug: [fstrim] 1
fstrim-12620 [000] .... 334572.907536: block_rq_insert: 259,0 D 0 () 75583488 + 8192 [fstrim]
fstrim-12620 [000] .... 334572.907536: block_rq_issue: 259,0 D 0 () 75583488 + 8192 [fstrim]
 < repeat 6 times >
fstrim-12620 [000] .... 334572.907620: f2fs_issue_discard: dev = (259,1), blkstart = 0x904500, blklen = 0x400
fstrim-12620 [000] .... 334572.907620: block_bio_remap: 259,0 D 75640832 + 8192 <- (259,1) 75638784
fstrim-12620 [000] .... 334572.907620: block_bio_queue: 259,0 D 75640832 + 8192 [fstrim]
fstrim-12620 [000] .... 334572.907621: block_getrq: 259,0 D 75640832 + 8192 [fstrim]
<idle>-0     [000] d.h. 334572.907723: block_rq_complete: 259,0 D () 67260416 + 8192 [0]
<idle>-0     [000] d.h. 334572.907942: block_rq_complete: 259,0 D () 67268608 + 8192 [0]
<idle>-0     [000] d.h. 334572.908155: block_rq_complete: 259,0 D () 67276800 + 8192 [0]
<idle>-0     [000] d.h. 334572.908374: block_rq_complete: 259,0 D () 67284992 + 8192 [0]
<idle>-0     [000] d.h. 334572.908597: block_rq_complete: 259,0 D () 67293184 + 8192 [0]
<idle>-0     [000] d.h. 334572.908823: block_rq_complete: 259,0 D () 67301376 + 8192 [0]
<idle>-0     [000] d.h. 334572.909033: block_rq_complete: 259,0 D () 67309568 + 8192 [0]
<idle>-0     [000] d.h. 334572.909216: block_rq_complete: 259,0 D () 67317760 + 8192 [0]
fstrim-12620 [000] .... 334572.909222: block_unplug: [fstrim] 1
fstrim-12620 [000] .... 334572.909223: block_rq_insert: 259,0 D 0 () 75640832 + 8192 [fstrim]
fstrim-12620 [000] .... 334572.909224: block_rq_issue: 259,0 D 0 () 75640832 + 8192 [fstrim]
fstrim-12620 [000] .... 334572.909240: f2fs_issue_discard: dev = (259,1), blkstart = 0x904900, blklen = 0x400
fstrim-12620 [000] .... 334572.909241: block_bio_remap: 259,0 D 75649024 + 8192 <- (259,1) 75646976
fstrim-12620 [000] .... 334572.909241: block_bio_queue: 259,0 D 75649024 + 8192 [fstrim]
fstrim-12620 [000] .... 334572.909241: block_getrq: 259,0 D 75649024 + 8192 [fstrim]
fstrim-12620 [000] .... 334572.909242: block_unplug: [fstrim] 1
fstrim-12620 [000] .... 334572.909242: block_rq_insert: 259,0 D 0 () 75649024 + 8192 [fstrim]
fstrim-12620 [000] .... 334572.909242: block_rq_issue: 259,0 D 0 () 75649024 + 8192 [fstrim]
 < repeat >

So, I investigated why block_rq_complete() happened in more detail.

The root-caused call path looks like:
 - submit_bio
  - generic_make_request
   - q->make_request_fn
    - blk_mq_make_request
     - blk_mq_map_request
      - blk_mq_alloc_request
       - blk_mq_get_tag
        - __blk_mq_get_tag
         - bt_get
          - blk_mq_run_hw_queue
          - finish_wait
          --> this waits for pending 8 discard bios!

It seems the problem comes from the storage processing discard commands too
slowly comparing to normal read/write IOs.

Any thoughts?

Thanks,

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 5/6] f2fs: add a kernel thread to issue discard commands asynchronously
  2017-02-07  3:44           ` Jaegeuk Kim
@ 2017-02-08 16:02             ` Christoph Hellwig
  2017-02-08 22:05               ` Jaegeuk Kim
  0 siblings, 1 reply; 25+ messages in thread
From: Christoph Hellwig @ 2017-02-08 16:02 UTC (permalink / raw)
  To: Jaegeuk Kim
  Cc: linux-kernel, linux-fsdevel, linux-f2fs-devel, axboe, linux-nvme

On Mon, Feb 06, 2017 at 07:44:03PM -0800, Jaegeuk Kim wrote:
> Sorry for the late response due to the travel.
> 
> When doing fstrim with a fresh f2fs image fomatted on Intel NVMe SSD whose
> model name is SSDPE2MW012T4, I've got the following trace.

<snip>

> So, I investigated why block_rq_complete() happened in more detail.
> 
> The root-caused call path looks like:
>  - submit_bio
>   - generic_make_request
>    - q->make_request_fn
>     - blk_mq_make_request
>      - blk_mq_map_request
>       - blk_mq_alloc_request
>        - blk_mq_get_tag
>         - __blk_mq_get_tag
>          - bt_get
>           - blk_mq_run_hw_queue
>           - finish_wait
>           --> this waits for pending 8 discard bios!

You're blocking on tag allocation.  How many tags per queue does
your device have?, e.g. do a

cat /sys/block/nvme0n1/mq/0/nr_tags

> It seems the problem comes from the storage processing discard commands too
> slowly comparing to normal read/write IOs.
> 
> Any thoughts?

Deallocate is always going to be an exception path compared to normal
read/write… but just how much slower is going to be device
dependent.

One option would be to reuse the number of discards, for that can you
try the series here to support vectored discards:

http://git.infradead.org/users/hch/block.git/shortlog/refs/heads/vectored-discard-for-axboe

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 5/6] f2fs: add a kernel thread to issue discard commands asynchronously
  2017-02-08 16:02             ` Christoph Hellwig
@ 2017-02-08 22:05               ` Jaegeuk Kim
  0 siblings, 0 replies; 25+ messages in thread
From: Jaegeuk Kim @ 2017-02-08 22:05 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: linux-kernel, linux-fsdevel, linux-f2fs-devel, axboe, linux-nvme

On 02/08, Christoph Hellwig wrote:
> On Mon, Feb 06, 2017 at 07:44:03PM -0800, Jaegeuk Kim wrote:
> > Sorry for the late response due to the travel.
> > 
> > When doing fstrim with a fresh f2fs image fomatted on Intel NVMe SSD whose
> > model name is SSDPE2MW012T4, I've got the following trace.
> 
> <snip>
> 
> > So, I investigated why block_rq_complete() happened in more detail.
> > 
> > The root-caused call path looks like:
> >  - submit_bio
> >   - generic_make_request
> >    - q->make_request_fn
> >     - blk_mq_make_request
> >      - blk_mq_map_request
> >       - blk_mq_alloc_request
> >        - blk_mq_get_tag
> >         - __blk_mq_get_tag
> >          - bt_get
> >           - blk_mq_run_hw_queue
> >           - finish_wait
> >           --> this waits for pending 8 discard bios!
> 
> You're blocking on tag allocation.  How many tags per queue does
> your device have?, e.g. do a
> 
> cat /sys/block/nvme0n1/mq/0/nr_tags

It shows 1023.

> > It seems the problem comes from the storage processing discard commands too
> > slowly comparing to normal read/write IOs.
> > 
> > Any thoughts?
> 
> Deallocate is always going to be an exception path compared to normal
> read/write… but just how much slower is going to be device
> dependent.
> 
> One option would be to reuse the number of discards, for that can you
> try the series here to support vectored discards:
> 
> http://git.infradead.org/users/hch/block.git/shortlog/refs/heads/vectored-discard-for-axboe

I tried this, but couldn't see any difference.

Thanks,

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [f2fs-dev] [PATCH 1/6] f2fs: clean up flush/discard command namings
  2017-01-12 22:44 [PATCH 1/6] f2fs: clean up flush/discard command namings Jaegeuk Kim
                   ` (4 preceding siblings ...)
  2017-01-12 22:44 ` [PATCH 6/6] f2fs: show # of on-going flush and discard bios Jaegeuk Kim
@ 2017-02-22  9:40 ` Chao Yu
  5 siblings, 0 replies; 25+ messages in thread
From: Chao Yu @ 2017-02-22  9:40 UTC (permalink / raw)
  To: Jaegeuk Kim, linux-kernel, linux-fsdevel, linux-f2fs-devel

On 2017/1/13 6:44, Jaegeuk Kim wrote:
> This patch simply cleans up the names for flush/discard commands.
> 
> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>

Reviewed-by: Chao Yu <yuchao0@huawei.com>

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [f2fs-dev] [PATCH 2/6] f2fs: reorganize stat information
  2017-01-12 22:44 ` [PATCH 2/6] f2fs: reorganize stat information Jaegeuk Kim
@ 2017-02-22  9:40   ` Chao Yu
  0 siblings, 0 replies; 25+ messages in thread
From: Chao Yu @ 2017-02-22  9:40 UTC (permalink / raw)
  To: Jaegeuk Kim, linux-kernel, linux-fsdevel, linux-f2fs-devel

On 2017/1/13 6:44, Jaegeuk Kim wrote:
> This patch modifies stat information more clearly.
> 
> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>

Reviewed-by: Chao Yu <yuchao0@huawei.com>

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 3/6] f2fs: remove batched discard in f2fs_trim_fs
  2017-01-12 22:44 ` [PATCH 3/6] f2fs: remove batched discard in f2fs_trim_fs Jaegeuk Kim
@ 2017-02-22  9:40   ` Chao Yu
  2017-02-22 21:55     ` [f2fs-dev] " Jaegeuk Kim
  0 siblings, 1 reply; 25+ messages in thread
From: Chao Yu @ 2017-02-22  9:40 UTC (permalink / raw)
  To: Jaegeuk Kim, linux-kernel, linux-fsdevel, linux-f2fs-devel

On 2017/1/13 6:44, Jaegeuk Kim wrote:
> We don't need to do multiple checkpoints, since we don't actually wait for
> completion of discard commands during checkpoint.
> Instead, we still need to avoid very big discard commands, since that large
> discard can interfere block allocation.

I hope we can keep this functionality and related sysfs interface, the main
concern is that: for 16T size fragmented partition, at most there will be
~sizeof(u32)/2 4K blocks which could be invalid and needed to be discarded.
After removal of in-batch discard, we have to handle billions of discard message
with gc_mutex & cp_rwsem held, which will freeze f2fs for very long time.

So I suggest we just set trim_sections to 512 segments (covers one GB) by
default. How do you think?

Thanks,

> 
> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> ---
>  Documentation/ABI/testing/sysfs-fs-f2fs |  6 ------
>  fs/f2fs/f2fs.h                          |  9 +-------
>  fs/f2fs/segment.c                       | 38 +++++++++++----------------------
>  fs/f2fs/super.c                         |  2 --
>  4 files changed, 14 insertions(+), 41 deletions(-)
> 
> diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
> index a809f6005f14..df6b3f6e164a 100644
> --- a/Documentation/ABI/testing/sysfs-fs-f2fs
> +++ b/Documentation/ABI/testing/sysfs-fs-f2fs
> @@ -75,12 +75,6 @@ Contact:	"Jaegeuk Kim" <jaegeuk.kim@samsung.com>
>  Description:
>  		 Controls the memory footprint used by f2fs.
>  
> -What:		/sys/fs/f2fs/<disk>/trim_sections
> -Date:		February 2015
> -Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
> -Description:
> -		 Controls the trimming rate in batch mode.
> -
>  What:		/sys/fs/f2fs/<disk>/cp_interval
>  Date:		October 2015
>  Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 548e75d18ec1..a2850bf2a487 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -128,11 +128,7 @@ enum {
>  	CP_DISCARD,
>  };
>  
> -#define DEF_BATCHED_TRIM_SECTIONS	2
> -#define BATCHED_TRIM_SEGMENTS(sbi)	\
> -		(SM_I(sbi)->trim_sections * (sbi)->segs_per_sec)
> -#define BATCHED_TRIM_BLOCKS(sbi)	\
> -		(BATCHED_TRIM_SEGMENTS(sbi) << (sbi)->log_blocks_per_seg)
> +#define MAX_DISCARD_BLOCKS(sbi) (1 << (sbi)->log_blocks_per_seg)
>  #define DEF_CP_INTERVAL			60	/* 60 secs */
>  #define DEF_IDLE_INTERVAL		5	/* 5 secs */
>  
> @@ -638,9 +634,6 @@ struct f2fs_sm_info {
>  	int nr_discards;			/* # of discards in the list */
>  	int max_discards;			/* max. discards to be issued */
>  
> -	/* for batched trimming */
> -	unsigned int trim_sections;		/* # of sections to trim */
> -
>  	struct list_head sit_entry_set;	/* sit entry set list */
>  
>  	unsigned int ipu_policy;	/* in-place-update policy */
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index 73282108fa33..e6f3c6db7616 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -823,7 +823,8 @@ static void __add_discard_entry(struct f2fs_sb_info *sbi,
>  	if (!list_empty(head)) {
>  		last = list_last_entry(head, struct discard_entry, list);
>  		if (START_BLOCK(sbi, cpc->trim_start) + start ==
> -						last->blkaddr + last->len) {
> +				last->blkaddr + last->len &&
> +				last->len <= MAX_DISCARD_BLOCKS(sbi)) {
>  			last->len += end - start;
>  			goto done;
>  		}
> @@ -1513,36 +1514,25 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
>  			"Found FS corruption, run fsck to fix.");
>  		goto out;
>  	}
> +	if (sbi->discard_blks == 0)
> +		goto out;
>  
>  	/* start/end segment number in main_area */
>  	start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start);
>  	end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 :
>  						GET_SEGNO(sbi, end);
> +	/*
> +	 * do checkpoint to issue discard commands safely since we now can
> +	 * use async discard.
> +	 */
>  	cpc.reason = CP_DISCARD;
>  	cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen));
> +	cpc.trim_start = start_segno;
> +	cpc.trim_end = end_segno;
>  
> -	/* do checkpoint to issue discard commands safely */
> -	for (; start_segno <= end_segno; start_segno = cpc.trim_end + 1) {
> -		cpc.trim_start = start_segno;
> -
> -		if (sbi->discard_blks == 0)
> -			break;
> -		else if (sbi->discard_blks < BATCHED_TRIM_BLOCKS(sbi))
> -			cpc.trim_end = end_segno;
> -		else
> -			cpc.trim_end = min_t(unsigned int,
> -				rounddown(start_segno +
> -				BATCHED_TRIM_SEGMENTS(sbi),
> -				sbi->segs_per_sec) - 1, end_segno);
> -
> -		mutex_lock(&sbi->gc_mutex);
> -		err = write_checkpoint(sbi, &cpc);
> -		mutex_unlock(&sbi->gc_mutex);
> -		if (err)
> -			break;
> -
> -		schedule();
> -	}
> +	mutex_lock(&sbi->gc_mutex);
> +	err = write_checkpoint(sbi, &cpc);
> +	mutex_unlock(&sbi->gc_mutex);
>  out:
>  	range->len = F2FS_BLK_TO_BYTES(cpc.trimmed);
>  	return err;
> @@ -2708,8 +2698,6 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
>  	sm_info->nr_discards = 0;
>  	sm_info->max_discards = 0;
>  
> -	sm_info->trim_sections = DEF_BATCHED_TRIM_SECTIONS;
> -
>  	INIT_LIST_HEAD(&sm_info->sit_entry_set);
>  
>  	if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) {
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index 461c29043aec..37d40d8aa9c4 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -284,7 +284,6 @@ F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time);
>  F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle);
>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments);
>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, max_small_discards, max_discards);
> -F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
> @@ -309,7 +308,6 @@ static struct attribute *f2fs_attrs[] = {
>  	ATTR_LIST(gc_idle),
>  	ATTR_LIST(reclaim_segments),
>  	ATTR_LIST(max_small_discards),
> -	ATTR_LIST(batched_trim_sections),
>  	ATTR_LIST(ipu_policy),
>  	ATTR_LIST(min_ipu_util),
>  	ATTR_LIST(min_fsync_blocks),
> 

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 4/6] f2fs: factor out discard command info into discard_cmd_control
  2017-01-12 22:44 ` [PATCH 4/6] f2fs: factor out discard command info into discard_cmd_control Jaegeuk Kim
@ 2017-02-22  9:40   ` Chao Yu
  0 siblings, 0 replies; 25+ messages in thread
From: Chao Yu @ 2017-02-22  9:40 UTC (permalink / raw)
  To: Jaegeuk Kim, linux-kernel, linux-fsdevel, linux-f2fs-devel

On 2017/1/13 6:44, Jaegeuk Kim wrote:
> This patch adds discard_cmd_control with the existing discarding controls.
> 
> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>

Reviewed-by: Chao Yu <yuchao0@huawei.com>

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 5/6] f2fs: add a kernel thread to issue discard commands asynchronously
  2017-01-12 22:44 ` [PATCH 5/6] f2fs: add a kernel thread to issue discard commands asynchronously Jaegeuk Kim
  2017-01-13  8:01   ` Christoph Hellwig
@ 2017-02-22  9:40   ` Chao Yu
  1 sibling, 0 replies; 25+ messages in thread
From: Chao Yu @ 2017-02-22  9:40 UTC (permalink / raw)
  To: Jaegeuk Kim, linux-kernel, linux-fsdevel, linux-f2fs-devel

On 2017/1/13 6:44, Jaegeuk Kim wrote:
> This patch adds a kernel thread to issue discard commands.
> It proposes three states, D_PREP, D_SUBMIT, and D_DONE to identify current
> bio status.
> 
> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>

Reviewed-by: Chao Yu <yuchao0@huawei.com>

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [f2fs-dev] [PATCH 6/6] f2fs: show # of on-going flush and discard bios
  2017-01-12 22:44 ` [PATCH 6/6] f2fs: show # of on-going flush and discard bios Jaegeuk Kim
                     ` (2 preceding siblings ...)
  2017-01-14 23:32   ` [PATCH 6/6 v2] " Jaegeuk Kim
@ 2017-02-22  9:40   ` Chao Yu
  3 siblings, 0 replies; 25+ messages in thread
From: Chao Yu @ 2017-02-22  9:40 UTC (permalink / raw)
  To: Jaegeuk Kim, linux-kernel, linux-fsdevel, linux-f2fs-devel

On 2017/1/13 6:44, Jaegeuk Kim wrote:
> This patch adds stat information for flush and discard commands.

Need initialization of atomical variable?

> 
> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>

Reviewed-by: Chao Yu <yuchao0@huawei.com>

Thanks,

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [f2fs-dev] [PATCH 3/6] f2fs: remove batched discard in f2fs_trim_fs
  2017-02-22  9:40   ` Chao Yu
@ 2017-02-22 21:55     ` Jaegeuk Kim
  2017-02-23  2:19       ` Chao Yu
  0 siblings, 1 reply; 25+ messages in thread
From: Jaegeuk Kim @ 2017-02-22 21:55 UTC (permalink / raw)
  To: Chao Yu; +Cc: linux-kernel, linux-fsdevel, linux-f2fs-devel

On 02/22, Chao Yu wrote:
> On 2017/1/13 6:44, Jaegeuk Kim wrote:
> > We don't need to do multiple checkpoints, since we don't actually wait for
> > completion of discard commands during checkpoint.
> > Instead, we still need to avoid very big discard commands, since that large
> > discard can interfere block allocation.
> 
> I hope we can keep this functionality and related sysfs interface, the main
> concern is that: for 16T size fragmented partition, at most there will be
> ~sizeof(u32)/2 4K blocks which could be invalid and needed to be discarded.
> After removal of in-batch discard, we have to handle billions of discard message
> with gc_mutex & cp_rwsem held, which will freeze f2fs for very long time.

The baseline of this patch is that we have a kernel thread to issue discard
commands asynchronously. So, I don't expect such the huge latency anymore.

Thanks,

> 
> So I suggest we just set trim_sections to 512 segments (covers one GB) by
> default. How do you think?
> 
> Thanks,
> 
> > 
> > Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> > ---
> >  Documentation/ABI/testing/sysfs-fs-f2fs |  6 ------
> >  fs/f2fs/f2fs.h                          |  9 +-------
> >  fs/f2fs/segment.c                       | 38 +++++++++++----------------------
> >  fs/f2fs/super.c                         |  2 --
> >  4 files changed, 14 insertions(+), 41 deletions(-)
> > 
> > diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
> > index a809f6005f14..df6b3f6e164a 100644
> > --- a/Documentation/ABI/testing/sysfs-fs-f2fs
> > +++ b/Documentation/ABI/testing/sysfs-fs-f2fs
> > @@ -75,12 +75,6 @@ Contact:	"Jaegeuk Kim" <jaegeuk.kim@samsung.com>
> >  Description:
> >  		 Controls the memory footprint used by f2fs.
> >  
> > -What:		/sys/fs/f2fs/<disk>/trim_sections
> > -Date:		February 2015
> > -Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
> > -Description:
> > -		 Controls the trimming rate in batch mode.
> > -
> >  What:		/sys/fs/f2fs/<disk>/cp_interval
> >  Date:		October 2015
> >  Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
> > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > index 548e75d18ec1..a2850bf2a487 100644
> > --- a/fs/f2fs/f2fs.h
> > +++ b/fs/f2fs/f2fs.h
> > @@ -128,11 +128,7 @@ enum {
> >  	CP_DISCARD,
> >  };
> >  
> > -#define DEF_BATCHED_TRIM_SECTIONS	2
> > -#define BATCHED_TRIM_SEGMENTS(sbi)	\
> > -		(SM_I(sbi)->trim_sections * (sbi)->segs_per_sec)
> > -#define BATCHED_TRIM_BLOCKS(sbi)	\
> > -		(BATCHED_TRIM_SEGMENTS(sbi) << (sbi)->log_blocks_per_seg)
> > +#define MAX_DISCARD_BLOCKS(sbi) (1 << (sbi)->log_blocks_per_seg)
> >  #define DEF_CP_INTERVAL			60	/* 60 secs */
> >  #define DEF_IDLE_INTERVAL		5	/* 5 secs */
> >  
> > @@ -638,9 +634,6 @@ struct f2fs_sm_info {
> >  	int nr_discards;			/* # of discards in the list */
> >  	int max_discards;			/* max. discards to be issued */
> >  
> > -	/* for batched trimming */
> > -	unsigned int trim_sections;		/* # of sections to trim */
> > -
> >  	struct list_head sit_entry_set;	/* sit entry set list */
> >  
> >  	unsigned int ipu_policy;	/* in-place-update policy */
> > diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> > index 73282108fa33..e6f3c6db7616 100644
> > --- a/fs/f2fs/segment.c
> > +++ b/fs/f2fs/segment.c
> > @@ -823,7 +823,8 @@ static void __add_discard_entry(struct f2fs_sb_info *sbi,
> >  	if (!list_empty(head)) {
> >  		last = list_last_entry(head, struct discard_entry, list);
> >  		if (START_BLOCK(sbi, cpc->trim_start) + start ==
> > -						last->blkaddr + last->len) {
> > +				last->blkaddr + last->len &&
> > +				last->len <= MAX_DISCARD_BLOCKS(sbi)) {
> >  			last->len += end - start;
> >  			goto done;
> >  		}
> > @@ -1513,36 +1514,25 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
> >  			"Found FS corruption, run fsck to fix.");
> >  		goto out;
> >  	}
> > +	if (sbi->discard_blks == 0)
> > +		goto out;
> >  
> >  	/* start/end segment number in main_area */
> >  	start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start);
> >  	end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 :
> >  						GET_SEGNO(sbi, end);
> > +	/*
> > +	 * do checkpoint to issue discard commands safely since we now can
> > +	 * use async discard.
> > +	 */
> >  	cpc.reason = CP_DISCARD;
> >  	cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen));
> > +	cpc.trim_start = start_segno;
> > +	cpc.trim_end = end_segno;
> >  
> > -	/* do checkpoint to issue discard commands safely */
> > -	for (; start_segno <= end_segno; start_segno = cpc.trim_end + 1) {
> > -		cpc.trim_start = start_segno;
> > -
> > -		if (sbi->discard_blks == 0)
> > -			break;
> > -		else if (sbi->discard_blks < BATCHED_TRIM_BLOCKS(sbi))
> > -			cpc.trim_end = end_segno;
> > -		else
> > -			cpc.trim_end = min_t(unsigned int,
> > -				rounddown(start_segno +
> > -				BATCHED_TRIM_SEGMENTS(sbi),
> > -				sbi->segs_per_sec) - 1, end_segno);
> > -
> > -		mutex_lock(&sbi->gc_mutex);
> > -		err = write_checkpoint(sbi, &cpc);
> > -		mutex_unlock(&sbi->gc_mutex);
> > -		if (err)
> > -			break;
> > -
> > -		schedule();
> > -	}
> > +	mutex_lock(&sbi->gc_mutex);
> > +	err = write_checkpoint(sbi, &cpc);
> > +	mutex_unlock(&sbi->gc_mutex);
> >  out:
> >  	range->len = F2FS_BLK_TO_BYTES(cpc.trimmed);
> >  	return err;
> > @@ -2708,8 +2698,6 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
> >  	sm_info->nr_discards = 0;
> >  	sm_info->max_discards = 0;
> >  
> > -	sm_info->trim_sections = DEF_BATCHED_TRIM_SECTIONS;
> > -
> >  	INIT_LIST_HEAD(&sm_info->sit_entry_set);
> >  
> >  	if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) {
> > diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> > index 461c29043aec..37d40d8aa9c4 100644
> > --- a/fs/f2fs/super.c
> > +++ b/fs/f2fs/super.c
> > @@ -284,7 +284,6 @@ F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time);
> >  F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle);
> >  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments);
> >  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, max_small_discards, max_discards);
> > -F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
> >  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
> >  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
> >  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
> > @@ -309,7 +308,6 @@ static struct attribute *f2fs_attrs[] = {
> >  	ATTR_LIST(gc_idle),
> >  	ATTR_LIST(reclaim_segments),
> >  	ATTR_LIST(max_small_discards),
> > -	ATTR_LIST(batched_trim_sections),
> >  	ATTR_LIST(ipu_policy),
> >  	ATTR_LIST(min_ipu_util),
> >  	ATTR_LIST(min_fsync_blocks),
> > 
> 
> 
> ------------------------------------------------------------------------------
> Check out the vibrant tech community on one of the world's most
> engaging tech sites, SlashDot.org! http://sdm.link/slashdot
> _______________________________________________
> Linux-f2fs-devel mailing list
> Linux-f2fs-devel@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [f2fs-dev] [PATCH 3/6] f2fs: remove batched discard in f2fs_trim_fs
  2017-02-22 21:55     ` [f2fs-dev] " Jaegeuk Kim
@ 2017-02-23  2:19       ` Chao Yu
  2017-02-23  4:25         ` Jaegeuk Kim
  0 siblings, 1 reply; 25+ messages in thread
From: Chao Yu @ 2017-02-23  2:19 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: linux-kernel, linux-fsdevel, linux-f2fs-devel

On 2017/2/23 5:55, Jaegeuk Kim wrote:
> On 02/22, Chao Yu wrote:
>> On 2017/1/13 6:44, Jaegeuk Kim wrote:
>>> We don't need to do multiple checkpoints, since we don't actually wait for
>>> completion of discard commands during checkpoint.
>>> Instead, we still need to avoid very big discard commands, since that large
>>> discard can interfere block allocation.
>>
>> I hope we can keep this functionality and related sysfs interface, the main
>> concern is that: for 16T size fragmented partition, at most there will be
>> ~sizeof(u32)/2 4K blocks which could be invalid and needed to be discarded.
>> After removal of in-batch discard, we have to handle billions of discard message
>> with gc_mutex & cp_rwsem held, which will freeze f2fs for very long time.
> 
> The baseline of this patch is that we have a kernel thread to issue discard
> commands asynchronously. So, I don't expect such the huge latency anymore.

Yes, I know async kernel thread was already introduced, and we can expect such
implementation can help to reduce huge handling latency. But still in checkpoint
we need to check region of invalid block and send the pending discard entries to
kernel thread, so what I concern is that in scenario of there is huge number of
discard messages, we will still face long latency with cp lock held.

Thanks,

> 
> Thanks,
> 
>>
>> So I suggest we just set trim_sections to 512 segments (covers one GB) by
>> default. How do you think?
>>
>> Thanks,
>>
>>>
>>> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
>>> ---
>>>  Documentation/ABI/testing/sysfs-fs-f2fs |  6 ------
>>>  fs/f2fs/f2fs.h                          |  9 +-------
>>>  fs/f2fs/segment.c                       | 38 +++++++++++----------------------
>>>  fs/f2fs/super.c                         |  2 --
>>>  4 files changed, 14 insertions(+), 41 deletions(-)
>>>
>>> diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
>>> index a809f6005f14..df6b3f6e164a 100644
>>> --- a/Documentation/ABI/testing/sysfs-fs-f2fs
>>> +++ b/Documentation/ABI/testing/sysfs-fs-f2fs
>>> @@ -75,12 +75,6 @@ Contact:	"Jaegeuk Kim" <jaegeuk.kim@samsung.com>
>>>  Description:
>>>  		 Controls the memory footprint used by f2fs.
>>>  
>>> -What:		/sys/fs/f2fs/<disk>/trim_sections
>>> -Date:		February 2015
>>> -Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
>>> -Description:
>>> -		 Controls the trimming rate in batch mode.
>>> -
>>>  What:		/sys/fs/f2fs/<disk>/cp_interval
>>>  Date:		October 2015
>>>  Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>> index 548e75d18ec1..a2850bf2a487 100644
>>> --- a/fs/f2fs/f2fs.h
>>> +++ b/fs/f2fs/f2fs.h
>>> @@ -128,11 +128,7 @@ enum {
>>>  	CP_DISCARD,
>>>  };
>>>  
>>> -#define DEF_BATCHED_TRIM_SECTIONS	2
>>> -#define BATCHED_TRIM_SEGMENTS(sbi)	\
>>> -		(SM_I(sbi)->trim_sections * (sbi)->segs_per_sec)
>>> -#define BATCHED_TRIM_BLOCKS(sbi)	\
>>> -		(BATCHED_TRIM_SEGMENTS(sbi) << (sbi)->log_blocks_per_seg)
>>> +#define MAX_DISCARD_BLOCKS(sbi) (1 << (sbi)->log_blocks_per_seg)
>>>  #define DEF_CP_INTERVAL			60	/* 60 secs */
>>>  #define DEF_IDLE_INTERVAL		5	/* 5 secs */
>>>  
>>> @@ -638,9 +634,6 @@ struct f2fs_sm_info {
>>>  	int nr_discards;			/* # of discards in the list */
>>>  	int max_discards;			/* max. discards to be issued */
>>>  
>>> -	/* for batched trimming */
>>> -	unsigned int trim_sections;		/* # of sections to trim */
>>> -
>>>  	struct list_head sit_entry_set;	/* sit entry set list */
>>>  
>>>  	unsigned int ipu_policy;	/* in-place-update policy */
>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>>> index 73282108fa33..e6f3c6db7616 100644
>>> --- a/fs/f2fs/segment.c
>>> +++ b/fs/f2fs/segment.c
>>> @@ -823,7 +823,8 @@ static void __add_discard_entry(struct f2fs_sb_info *sbi,
>>>  	if (!list_empty(head)) {
>>>  		last = list_last_entry(head, struct discard_entry, list);
>>>  		if (START_BLOCK(sbi, cpc->trim_start) + start ==
>>> -						last->blkaddr + last->len) {
>>> +				last->blkaddr + last->len &&
>>> +				last->len <= MAX_DISCARD_BLOCKS(sbi)) {
>>>  			last->len += end - start;
>>>  			goto done;
>>>  		}
>>> @@ -1513,36 +1514,25 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
>>>  			"Found FS corruption, run fsck to fix.");
>>>  		goto out;
>>>  	}
>>> +	if (sbi->discard_blks == 0)
>>> +		goto out;
>>>  
>>>  	/* start/end segment number in main_area */
>>>  	start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start);
>>>  	end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 :
>>>  						GET_SEGNO(sbi, end);
>>> +	/*
>>> +	 * do checkpoint to issue discard commands safely since we now can
>>> +	 * use async discard.
>>> +	 */
>>>  	cpc.reason = CP_DISCARD;
>>>  	cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen));
>>> +	cpc.trim_start = start_segno;
>>> +	cpc.trim_end = end_segno;
>>>  
>>> -	/* do checkpoint to issue discard commands safely */
>>> -	for (; start_segno <= end_segno; start_segno = cpc.trim_end + 1) {
>>> -		cpc.trim_start = start_segno;
>>> -
>>> -		if (sbi->discard_blks == 0)
>>> -			break;
>>> -		else if (sbi->discard_blks < BATCHED_TRIM_BLOCKS(sbi))
>>> -			cpc.trim_end = end_segno;
>>> -		else
>>> -			cpc.trim_end = min_t(unsigned int,
>>> -				rounddown(start_segno +
>>> -				BATCHED_TRIM_SEGMENTS(sbi),
>>> -				sbi->segs_per_sec) - 1, end_segno);
>>> -
>>> -		mutex_lock(&sbi->gc_mutex);
>>> -		err = write_checkpoint(sbi, &cpc);
>>> -		mutex_unlock(&sbi->gc_mutex);
>>> -		if (err)
>>> -			break;
>>> -
>>> -		schedule();
>>> -	}
>>> +	mutex_lock(&sbi->gc_mutex);
>>> +	err = write_checkpoint(sbi, &cpc);
>>> +	mutex_unlock(&sbi->gc_mutex);
>>>  out:
>>>  	range->len = F2FS_BLK_TO_BYTES(cpc.trimmed);
>>>  	return err;
>>> @@ -2708,8 +2698,6 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
>>>  	sm_info->nr_discards = 0;
>>>  	sm_info->max_discards = 0;
>>>  
>>> -	sm_info->trim_sections = DEF_BATCHED_TRIM_SECTIONS;
>>> -
>>>  	INIT_LIST_HEAD(&sm_info->sit_entry_set);
>>>  
>>>  	if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) {
>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>>> index 461c29043aec..37d40d8aa9c4 100644
>>> --- a/fs/f2fs/super.c
>>> +++ b/fs/f2fs/super.c
>>> @@ -284,7 +284,6 @@ F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time);
>>>  F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle);
>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments);
>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, max_small_discards, max_discards);
>>> -F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
>>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
>>> @@ -309,7 +308,6 @@ static struct attribute *f2fs_attrs[] = {
>>>  	ATTR_LIST(gc_idle),
>>>  	ATTR_LIST(reclaim_segments),
>>>  	ATTR_LIST(max_small_discards),
>>> -	ATTR_LIST(batched_trim_sections),
>>>  	ATTR_LIST(ipu_policy),
>>>  	ATTR_LIST(min_ipu_util),
>>>  	ATTR_LIST(min_fsync_blocks),
>>>
>>
>>
>> ------------------------------------------------------------------------------
>> Check out the vibrant tech community on one of the world's most
>> engaging tech sites, SlashDot.org! http://sdm.link/slashdot
>> _______________________________________________
>> Linux-f2fs-devel mailing list
>> Linux-f2fs-devel@lists.sourceforge.net
>> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
> 
> .
> 

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [f2fs-dev] [PATCH 3/6] f2fs: remove batched discard in f2fs_trim_fs
  2017-02-23  2:19       ` Chao Yu
@ 2017-02-23  4:25         ` Jaegeuk Kim
  0 siblings, 0 replies; 25+ messages in thread
From: Jaegeuk Kim @ 2017-02-23  4:25 UTC (permalink / raw)
  To: Chao Yu; +Cc: linux-fsdevel, linux-kernel, linux-f2fs-devel

On 02/23, Chao Yu wrote:
> On 2017/2/23 5:55, Jaegeuk Kim wrote:
> > On 02/22, Chao Yu wrote:
> >> On 2017/1/13 6:44, Jaegeuk Kim wrote:
> >>> We don't need to do multiple checkpoints, since we don't actually wait for
> >>> completion of discard commands during checkpoint.
> >>> Instead, we still need to avoid very big discard commands, since that large
> >>> discard can interfere block allocation.
> >>
> >> I hope we can keep this functionality and related sysfs interface, the main
> >> concern is that: for 16T size fragmented partition, at most there will be
> >> ~sizeof(u32)/2 4K blocks which could be invalid and needed to be discarded.
> >> After removal of in-batch discard, we have to handle billions of discard message
> >> with gc_mutex & cp_rwsem held, which will freeze f2fs for very long time.
> > 
> > The baseline of this patch is that we have a kernel thread to issue discard
> > commands asynchronously. So, I don't expect such the huge latency anymore.
> 
> Yes, I know async kernel thread was already introduced, and we can expect such
> implementation can help to reduce huge handling latency. But still in checkpoint
> we need to check region of invalid block and send the pending discard entries to
> kernel thread, so what I concern is that in scenario of there is huge number of
> discard messages, we will still face long latency with cp lock held.

Okay, I removed the below patch now, and changed 4GB range by default. ;)

Thanks,

> 
> Thanks,
> 
> > 
> > Thanks,
> > 
> >>
> >> So I suggest we just set trim_sections to 512 segments (covers one GB) by
> >> default. How do you think?
> >>
> >> Thanks,
> >>
> >>>
> >>> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> >>> ---
> >>>  Documentation/ABI/testing/sysfs-fs-f2fs |  6 ------
> >>>  fs/f2fs/f2fs.h                          |  9 +-------
> >>>  fs/f2fs/segment.c                       | 38 +++++++++++----------------------
> >>>  fs/f2fs/super.c                         |  2 --
> >>>  4 files changed, 14 insertions(+), 41 deletions(-)
> >>>
> >>> diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
> >>> index a809f6005f14..df6b3f6e164a 100644
> >>> --- a/Documentation/ABI/testing/sysfs-fs-f2fs
> >>> +++ b/Documentation/ABI/testing/sysfs-fs-f2fs
> >>> @@ -75,12 +75,6 @@ Contact:	"Jaegeuk Kim" <jaegeuk.kim@samsung.com>
> >>>  Description:
> >>>  		 Controls the memory footprint used by f2fs.
> >>>  
> >>> -What:		/sys/fs/f2fs/<disk>/trim_sections
> >>> -Date:		February 2015
> >>> -Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
> >>> -Description:
> >>> -		 Controls the trimming rate in batch mode.
> >>> -
> >>>  What:		/sys/fs/f2fs/<disk>/cp_interval
> >>>  Date:		October 2015
> >>>  Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
> >>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> >>> index 548e75d18ec1..a2850bf2a487 100644
> >>> --- a/fs/f2fs/f2fs.h
> >>> +++ b/fs/f2fs/f2fs.h
> >>> @@ -128,11 +128,7 @@ enum {
> >>>  	CP_DISCARD,
> >>>  };
> >>>  
> >>> -#define DEF_BATCHED_TRIM_SECTIONS	2
> >>> -#define BATCHED_TRIM_SEGMENTS(sbi)	\
> >>> -		(SM_I(sbi)->trim_sections * (sbi)->segs_per_sec)
> >>> -#define BATCHED_TRIM_BLOCKS(sbi)	\
> >>> -		(BATCHED_TRIM_SEGMENTS(sbi) << (sbi)->log_blocks_per_seg)
> >>> +#define MAX_DISCARD_BLOCKS(sbi) (1 << (sbi)->log_blocks_per_seg)
> >>>  #define DEF_CP_INTERVAL			60	/* 60 secs */
> >>>  #define DEF_IDLE_INTERVAL		5	/* 5 secs */
> >>>  
> >>> @@ -638,9 +634,6 @@ struct f2fs_sm_info {
> >>>  	int nr_discards;			/* # of discards in the list */
> >>>  	int max_discards;			/* max. discards to be issued */
> >>>  
> >>> -	/* for batched trimming */
> >>> -	unsigned int trim_sections;		/* # of sections to trim */
> >>> -
> >>>  	struct list_head sit_entry_set;	/* sit entry set list */
> >>>  
> >>>  	unsigned int ipu_policy;	/* in-place-update policy */
> >>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> >>> index 73282108fa33..e6f3c6db7616 100644
> >>> --- a/fs/f2fs/segment.c
> >>> +++ b/fs/f2fs/segment.c
> >>> @@ -823,7 +823,8 @@ static void __add_discard_entry(struct f2fs_sb_info *sbi,
> >>>  	if (!list_empty(head)) {
> >>>  		last = list_last_entry(head, struct discard_entry, list);
> >>>  		if (START_BLOCK(sbi, cpc->trim_start) + start ==
> >>> -						last->blkaddr + last->len) {
> >>> +				last->blkaddr + last->len &&
> >>> +				last->len <= MAX_DISCARD_BLOCKS(sbi)) {
> >>>  			last->len += end - start;
> >>>  			goto done;
> >>>  		}
> >>> @@ -1513,36 +1514,25 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
> >>>  			"Found FS corruption, run fsck to fix.");
> >>>  		goto out;
> >>>  	}
> >>> +	if (sbi->discard_blks == 0)
> >>> +		goto out;
> >>>  
> >>>  	/* start/end segment number in main_area */
> >>>  	start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start);
> >>>  	end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 :
> >>>  						GET_SEGNO(sbi, end);
> >>> +	/*
> >>> +	 * do checkpoint to issue discard commands safely since we now can
> >>> +	 * use async discard.
> >>> +	 */
> >>>  	cpc.reason = CP_DISCARD;
> >>>  	cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen));
> >>> +	cpc.trim_start = start_segno;
> >>> +	cpc.trim_end = end_segno;
> >>>  
> >>> -	/* do checkpoint to issue discard commands safely */
> >>> -	for (; start_segno <= end_segno; start_segno = cpc.trim_end + 1) {
> >>> -		cpc.trim_start = start_segno;
> >>> -
> >>> -		if (sbi->discard_blks == 0)
> >>> -			break;
> >>> -		else if (sbi->discard_blks < BATCHED_TRIM_BLOCKS(sbi))
> >>> -			cpc.trim_end = end_segno;
> >>> -		else
> >>> -			cpc.trim_end = min_t(unsigned int,
> >>> -				rounddown(start_segno +
> >>> -				BATCHED_TRIM_SEGMENTS(sbi),
> >>> -				sbi->segs_per_sec) - 1, end_segno);
> >>> -
> >>> -		mutex_lock(&sbi->gc_mutex);
> >>> -		err = write_checkpoint(sbi, &cpc);
> >>> -		mutex_unlock(&sbi->gc_mutex);
> >>> -		if (err)
> >>> -			break;
> >>> -
> >>> -		schedule();
> >>> -	}
> >>> +	mutex_lock(&sbi->gc_mutex);
> >>> +	err = write_checkpoint(sbi, &cpc);
> >>> +	mutex_unlock(&sbi->gc_mutex);
> >>>  out:
> >>>  	range->len = F2FS_BLK_TO_BYTES(cpc.trimmed);
> >>>  	return err;
> >>> @@ -2708,8 +2698,6 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
> >>>  	sm_info->nr_discards = 0;
> >>>  	sm_info->max_discards = 0;
> >>>  
> >>> -	sm_info->trim_sections = DEF_BATCHED_TRIM_SECTIONS;
> >>> -
> >>>  	INIT_LIST_HEAD(&sm_info->sit_entry_set);
> >>>  
> >>>  	if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) {
> >>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> >>> index 461c29043aec..37d40d8aa9c4 100644
> >>> --- a/fs/f2fs/super.c
> >>> +++ b/fs/f2fs/super.c
> >>> @@ -284,7 +284,6 @@ F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time);
> >>>  F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle);
> >>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments);
> >>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, max_small_discards, max_discards);
> >>> -F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
> >>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
> >>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
> >>>  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
> >>> @@ -309,7 +308,6 @@ static struct attribute *f2fs_attrs[] = {
> >>>  	ATTR_LIST(gc_idle),
> >>>  	ATTR_LIST(reclaim_segments),
> >>>  	ATTR_LIST(max_small_discards),
> >>> -	ATTR_LIST(batched_trim_sections),
> >>>  	ATTR_LIST(ipu_policy),
> >>>  	ATTR_LIST(min_ipu_util),
> >>>  	ATTR_LIST(min_fsync_blocks),
> >>>
> >>
> >>
> >> ------------------------------------------------------------------------------
> >> Check out the vibrant tech community on one of the world's most
> >> engaging tech sites, SlashDot.org! http://sdm.link/slashdot
> >> _______________________________________________
> >> Linux-f2fs-devel mailing list
> >> Linux-f2fs-devel@lists.sourceforge.net
> >> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
> > 
> > .
> > 
> 
> 
> ------------------------------------------------------------------------------
> Check out the vibrant tech community on one of the world's most
> engaging tech sites, SlashDot.org! http://sdm.link/slashdot
> _______________________________________________
> Linux-f2fs-devel mailing list
> Linux-f2fs-devel@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

^ permalink raw reply	[flat|nested] 25+ messages in thread

end of thread, other threads:[~2017-02-23  4:27 UTC | newest]

Thread overview: 25+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-01-12 22:44 [PATCH 1/6] f2fs: clean up flush/discard command namings Jaegeuk Kim
2017-01-12 22:44 ` [PATCH 2/6] f2fs: reorganize stat information Jaegeuk Kim
2017-02-22  9:40   ` [f2fs-dev] " Chao Yu
2017-01-12 22:44 ` [PATCH 3/6] f2fs: remove batched discard in f2fs_trim_fs Jaegeuk Kim
2017-02-22  9:40   ` Chao Yu
2017-02-22 21:55     ` [f2fs-dev] " Jaegeuk Kim
2017-02-23  2:19       ` Chao Yu
2017-02-23  4:25         ` Jaegeuk Kim
2017-01-12 22:44 ` [PATCH 4/6] f2fs: factor out discard command info into discard_cmd_control Jaegeuk Kim
2017-02-22  9:40   ` Chao Yu
2017-01-12 22:44 ` [PATCH 5/6] f2fs: add a kernel thread to issue discard commands asynchronously Jaegeuk Kim
2017-01-13  8:01   ` Christoph Hellwig
2017-01-13 19:12     ` Jaegeuk Kim
2017-01-16 17:32       ` Christoph Hellwig
2017-02-05  8:59         ` Christoph Hellwig
2017-02-07  3:44           ` Jaegeuk Kim
2017-02-08 16:02             ` Christoph Hellwig
2017-02-08 22:05               ` Jaegeuk Kim
2017-02-22  9:40   ` Chao Yu
2017-01-12 22:44 ` [PATCH 6/6] f2fs: show # of on-going flush and discard bios Jaegeuk Kim
2017-01-14  2:26   ` [f2fs-dev] " heyunlei
2017-01-14  7:27   ` heyunlei
2017-01-14 23:32   ` [PATCH 6/6 v2] " Jaegeuk Kim
2017-02-22  9:40   ` [f2fs-dev] [PATCH 6/6] " Chao Yu
2017-02-22  9:40 ` [f2fs-dev] [PATCH 1/6] f2fs: clean up flush/discard command namings Chao Yu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).