linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 01/10] f2fs: reassign new segment for mode=lfs
@ 2016-12-30 18:51 Jaegeuk Kim
  2016-12-30 18:51 ` [PATCH 02/10] f2fs: fix wrong tracepoints for op and op_flags Jaegeuk Kim
                   ` (9 more replies)
  0 siblings, 10 replies; 41+ messages in thread
From: Jaegeuk Kim @ 2016-12-30 18:51 UTC (permalink / raw)
  To: linux-kernel, linux-fsdevel, linux-f2fs-devel; +Cc: Jaegeuk Kim

Otherwise we can remain wrong curseg->next_blkoff, resulting in fsck failure.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/segment.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index be9e4d244d75..4e5ffe1d97e4 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1428,9 +1428,6 @@ void allocate_new_segments(struct f2fs_sb_info *sbi)
 	unsigned int old_segno;
 	int i;
 
-	if (test_opt(sbi, LFS))
-		return;
-
 	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
 		curseg = CURSEG_I(sbi, i);
 		old_segno = curseg->segno;
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 41+ messages in thread

* [PATCH 02/10] f2fs: fix wrong tracepoints for op and op_flags
  2016-12-30 18:51 [PATCH 01/10] f2fs: reassign new segment for mode=lfs Jaegeuk Kim
@ 2016-12-30 18:51 ` Jaegeuk Kim
  2017-01-04  3:25   ` [f2fs-dev] " Chao Yu
  2016-12-30 18:51 ` [PATCH 03/10] f2fs: add submit_bio tracepoint Jaegeuk Kim
                   ` (8 subsequent siblings)
  9 siblings, 1 reply; 41+ messages in thread
From: Jaegeuk Kim @ 2016-12-30 18:51 UTC (permalink / raw)
  To: linux-kernel, linux-fsdevel, linux-f2fs-devel; +Cc: Jaegeuk Kim

This patch fixes wrong tracepoints in terms of op and op_flags.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 include/trace/events/f2fs.h | 41 +++++++++++++++++++++++++----------------
 1 file changed, 25 insertions(+), 16 deletions(-)

diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index 01b3c9869a0d..4c942599581b 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -55,25 +55,34 @@ TRACE_DEFINE_ENUM(CP_DISCARD);
 		{ IPU,		"IN-PLACE" },				\
 		{ OPU,		"OUT-OF-PLACE" })
 
-#define F2FS_BIO_FLAG_MASK(t)	(t & (REQ_RAHEAD | REQ_PREFLUSH | REQ_FUA))
-#define F2FS_BIO_EXTRA_MASK(t)	(t & (REQ_META | REQ_PRIO))
-
-#define show_bio_type(op_flags)	show_bio_op_flags(op_flags), 		\
-						show_bio_extra(op_flags)
+#define F2FS_OP_FLAGS (REQ_RAHEAD | REQ_SYNC | REQ_PREFLUSH | REQ_META |\
+			REQ_PRIO)
+#define F2FS_BIO_FLAG_MASK(t)	(t & F2FS_OP_FLAGS)
+
+#define show_bio_type(op,op_flags)	show_bio_op(op),		\
+						show_bio_op_flags(op_flags)
+
+#define show_bio_op(op)							\
+	__print_symbolic(op,						\
+		{ REQ_OP_READ,			"READ" },		\
+		{ REQ_OP_WRITE,			"WRITE" },		\
+		{ REQ_OP_FLUSH,			"FLUSH" },		\
+		{ REQ_OP_DISCARD,		"DISCARD" },		\
+		{ REQ_OP_ZONE_REPORT,		"ZONE_REPORT" },	\
+		{ REQ_OP_SECURE_ERASE,		"SECURE_ERASE" },	\
+		{ REQ_OP_ZONE_RESET,		"ZONE_RESET" },		\
+		{ REQ_OP_WRITE_SAME,		"WRITE_SAME" },		\
+		{ REQ_OP_WRITE_ZEROES,		"WRITE_ZEROES" })
 
 #define show_bio_op_flags(flags)					\
 	__print_symbolic(F2FS_BIO_FLAG_MASK(flags),			\
-		{ 0,			"WRITE" },			\
-		{ REQ_RAHEAD, 		"READAHEAD" },			\
-		{ REQ_SYNC, 		"REQ_SYNC" },			\
-		{ REQ_PREFLUSH,		"REQ_PREFLUSH" },		\
-		{ REQ_FUA,		"REQ_FUA" })
-
-#define show_bio_extra(type)						\
-	__print_symbolic(F2FS_BIO_EXTRA_MASK(type),			\
+		{ REQ_RAHEAD, 		"(RA)" },			\
+		{ REQ_SYNC, 		"(S)" },			\
+		{ REQ_SYNC | REQ_PRIO,	"(SP)" },			\
 		{ REQ_META, 		"(M)" },			\
-		{ REQ_PRIO, 		"(P)" },			\
 		{ REQ_META | REQ_PRIO,	"(MP)" },			\
+		{ REQ_SYNC | REQ_META | REQ_PRIO, "(SMP)" },		\
+		{ REQ_PREFLUSH | REQ_META | REQ_PRIO, "(FMP)" },	\
 		{ 0, " \b" })
 
 #define show_data_type(type)						\
@@ -753,7 +762,7 @@ DECLARE_EVENT_CLASS(f2fs__submit_page_bio,
 		(unsigned long)__entry->index,
 		(unsigned long long)__entry->old_blkaddr,
 		(unsigned long long)__entry->new_blkaddr,
-		show_bio_type(__entry->op_flags),
+		show_bio_type(__entry->op, __entry->op_flags),
 		show_block_type(__entry->type))
 );
 
@@ -802,7 +811,7 @@ DECLARE_EVENT_CLASS(f2fs__submit_bio,
 
 	TP_printk("dev = (%d,%d), rw = %s%s, %s, sector = %lld, size = %u",
 		show_dev(__entry),
-		show_bio_type(__entry->op_flags),
+		show_bio_type(__entry->op, __entry->op_flags),
 		show_block_type(__entry->type),
 		(unsigned long long)__entry->sector,
 		__entry->size)
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 41+ messages in thread

* [PATCH 03/10] f2fs: add submit_bio tracepoint
  2016-12-30 18:51 [PATCH 01/10] f2fs: reassign new segment for mode=lfs Jaegeuk Kim
  2016-12-30 18:51 ` [PATCH 02/10] f2fs: fix wrong tracepoints for op and op_flags Jaegeuk Kim
@ 2016-12-30 18:51 ` Jaegeuk Kim
  2017-01-04  3:32   ` [f2fs-dev] " Chao Yu
  2017-01-04 23:35   ` [PATCH 03/10 v2] " Jaegeuk Kim
  2016-12-30 18:51 ` [PATCH 04/10] f2fs: support IO alignment for DATA and NODE writes Jaegeuk Kim
                   ` (7 subsequent siblings)
  9 siblings, 2 replies; 41+ messages in thread
From: Jaegeuk Kim @ 2016-12-30 18:51 UTC (permalink / raw)
  To: linux-kernel, linux-fsdevel, linux-f2fs-devel; +Cc: Jaegeuk Kim

This patch adds final submit_bio() tracepoint.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/data.c              |  1 +
 include/trace/events/f2fs.h | 31 +++++++++++++++++++++++++++++++
 2 files changed, 32 insertions(+)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 2c5df1dc1479..54da4e2f1318 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -175,6 +175,7 @@ static inline void __submit_bio(struct f2fs_sb_info *sbi,
 			current->plug && (type == DATA || type == NODE))
 			blk_finish_plug(current->plug);
 	}
+	trace_f2fs_submit_bio(sbi->sb, bio);
 	submit_bio(bio);
 }
 
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index 4c942599581b..094d92b52450 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -55,6 +55,8 @@ TRACE_DEFINE_ENUM(CP_DISCARD);
 		{ IPU,		"IN-PLACE" },				\
 		{ OPU,		"OUT-OF-PLACE" })
 
+#define F2FS_OP	(REQ_OP_READ | REQ_OP_RAHEAD | REQ_SYNC | REQ_PREFLUSH | REQ_META |\
+			REQ_PRIO)
 #define F2FS_OP_FLAGS (REQ_RAHEAD | REQ_SYNC | REQ_PREFLUSH | REQ_META |\
 			REQ_PRIO)
 #define F2FS_BIO_FLAG_MASK(t)	(t & F2FS_OP_FLAGS)
@@ -837,6 +839,35 @@ DEFINE_EVENT_CONDITION(f2fs__submit_bio, f2fs_submit_read_bio,
 	TP_CONDITION(bio)
 );
 
+TRACE_EVENT(f2fs_submit_bio,
+
+	TP_PROTO(struct super_block *sb, struct bio *bio),
+
+	TP_ARGS(sb, bio),
+
+	TP_STRUCT__entry(
+		__field(dev_t,	dev)
+		__field(int,	op)
+		__field(int,	op_flags)
+		__field(sector_t,	sector)
+		__field(unsigned int,	size)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= sb->s_dev;
+		__entry->op		= bio->bi_opf & REQ_OP_MASK;
+		__entry->op_flags	= bio->bi_opf;
+		__entry->sector		= bio->bi_iter.bi_sector;
+		__entry->size		= bio->bi_iter.bi_size;
+	),
+
+	TP_printk("dev = (%d,%d), rw = %s%s, sector = %lld, size = %u",
+		show_dev(__entry),
+		show_bio_type(__entry->op, __entry->op_flags),
+		(unsigned long long)__entry->sector,
+		__entry->size)
+);
+
 TRACE_EVENT(f2fs_write_begin,
 
 	TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 41+ messages in thread

* [PATCH 04/10] f2fs: support IO alignment for DATA and NODE writes
  2016-12-30 18:51 [PATCH 01/10] f2fs: reassign new segment for mode=lfs Jaegeuk Kim
  2016-12-30 18:51 ` [PATCH 02/10] f2fs: fix wrong tracepoints for op and op_flags Jaegeuk Kim
  2016-12-30 18:51 ` [PATCH 03/10] f2fs: add submit_bio tracepoint Jaegeuk Kim
@ 2016-12-30 18:51 ` Jaegeuk Kim
  2017-01-04  8:23   ` [f2fs-dev] " Chao Yu
  2016-12-30 18:51 ` [PATCH 05/10] f2fs: get io size bit from mount option Jaegeuk Kim
                   ` (6 subsequent siblings)
  9 siblings, 1 reply; 41+ messages in thread
From: Jaegeuk Kim @ 2016-12-30 18:51 UTC (permalink / raw)
  To: linux-kernel, linux-fsdevel, linux-f2fs-devel; +Cc: Jaegeuk Kim

This patch implements IO alignment by filling dummy blocks in DATA and NODE
write bios. If we can guarantee, for example, 32KB or 64KB for such the IOs,
we can eliminate underlying dummy page problem which FTL conducts in order to
close MLC or TLC partial written pages.

Note that,
 - it requires "-o mode=lfs".
 - IO size should be power of 2, not exceed BIO_MAX_PAGES, 256.
 - read IO is still 4KB.
 - do checkpoint at fsync, if dummy NODE page was written.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/data.c          | 55 +++++++++++++++++++++++++++++++++++++++++++++++--
 fs/f2fs/f2fs.h          |  4 +++-
 fs/f2fs/segment.c       |  9 ++++++--
 fs/f2fs/segment.h       |  3 +++
 fs/f2fs/super.c         | 13 +++++++++++-
 include/linux/f2fs_fs.h |  6 ++++++
 6 files changed, 84 insertions(+), 6 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 54da4e2f1318..f17df9f76d55 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -93,6 +93,17 @@ static void f2fs_write_end_io(struct bio *bio)
 		struct page *page = bvec->bv_page;
 		enum count_type type = WB_DATA_TYPE(page);
 
+		if (IS_DUMMY_WRITTEN_PAGE(page)) {
+			set_page_private(page, (unsigned long)NULL);
+			ClearPagePrivate(page);
+			unlock_page(page);
+			mempool_free(page, sbi->write_io_dummy);
+
+			if (unlikely(bio->bi_error))
+				f2fs_stop_checkpoint(sbi, true);
+			continue;
+		}
+
 		fscrypt_pullback_bio_page(&page, true);
 
 		if (unlikely(bio->bi_error)) {
@@ -171,10 +182,42 @@ static inline void __submit_bio(struct f2fs_sb_info *sbi,
 				struct bio *bio, enum page_type type)
 {
 	if (!is_read_io(bio_op(bio))) {
+		unsigned int start;
+
 		if (f2fs_sb_mounted_blkzoned(sbi->sb) &&
 			current->plug && (type == DATA || type == NODE))
 			blk_finish_plug(current->plug);
+
+		if (type != DATA && type != NODE)
+			goto submit_io;
+
+		start = bio->bi_iter.bi_size >> F2FS_BLKSIZE_BITS;
+		start %= F2FS_IO_SIZE(sbi);
+
+		if (start == 0)
+			goto submit_io;
+
+		/* fill dummy pages */
+		for (; start < F2FS_IO_SIZE(sbi); start++) {
+			struct page *page =
+				mempool_alloc(sbi->write_io_dummy,
+					GFP_NOIO | __GFP_ZERO | __GFP_NOFAIL);
+			f2fs_bug_on(sbi, !page);
+
+			SetPagePrivate(page);
+			set_page_private(page, (unsigned long)DUMMY_WRITTEN_PAGE);
+			lock_page(page);
+			if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE)
+				f2fs_bug_on(sbi, 1);
+		}
+		/*
+		 * In the NODE case, we lose next block address chain. So, we
+		 * need to do checkpoint in f2fs_sync_file.
+		 */
+		if (type == NODE)
+			set_sbi_flag(sbi, SBI_NEED_CP);
 	}
+submit_io:
 	trace_f2fs_submit_bio(sbi->sb, bio);
 	submit_bio(bio);
 }
@@ -316,13 +359,14 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
 	return 0;
 }
 
-void f2fs_submit_page_mbio(struct f2fs_io_info *fio)
+int f2fs_submit_page_mbio(struct f2fs_io_info *fio)
 {
 	struct f2fs_sb_info *sbi = fio->sbi;
 	enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
 	struct f2fs_bio_info *io;
 	bool is_read = is_read_io(fio->op);
 	struct page *bio_page;
+	int err = 0;
 
 	io = is_read ? &sbi->read_io : &sbi->write_io[btype];
 
@@ -343,6 +387,12 @@ void f2fs_submit_page_mbio(struct f2fs_io_info *fio)
 		__submit_merged_bio(io);
 alloc_new:
 	if (io->bio == NULL) {
+		if ((fio->type == DATA || fio->type == NODE) &&
+				fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) {
+			err = -EAGAIN;
+			dec_page_count(sbi, WB_DATA_TYPE(bio_page));
+			goto out_fail;
+		}
 		io->bio = __bio_alloc(sbi, fio->new_blkaddr,
 						BIO_MAX_PAGES, is_read);
 		io->fio = *fio;
@@ -356,9 +406,10 @@ void f2fs_submit_page_mbio(struct f2fs_io_info *fio)
 
 	io->last_block_in_bio = fio->new_blkaddr;
 	f2fs_trace_ios(fio, 0);
-
+out_fail:
 	up_write(&io->io_rwsem);
 	trace_f2fs_submit_page_mbio(fio->page, fio);
+	return err;
 }
 
 static void __set_data_blkaddr(struct dnode_of_data *dn)
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 2da8c3aa0ce5..0d7eaddef4a0 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -792,6 +792,8 @@ struct f2fs_sb_info {
 	struct f2fs_bio_info read_io;			/* for read bios */
 	struct f2fs_bio_info write_io[NR_PAGE_TYPE];	/* for write bios */
 	struct mutex wio_mutex[NODE + 1];	/* bio ordering for NODE/DATA */
+	int write_io_size_bits;			/* Write IO size bits */
+	mempool_t *write_io_dummy;		/* Dummy pages */
 
 	/* for checkpoint */
 	struct f2fs_checkpoint *ckpt;		/* raw checkpoint pointer */
@@ -2174,7 +2176,7 @@ void f2fs_submit_merged_bio_cond(struct f2fs_sb_info *, struct inode *,
 				struct page *, nid_t, enum page_type, int);
 void f2fs_flush_merged_bios(struct f2fs_sb_info *);
 int f2fs_submit_page_bio(struct f2fs_io_info *);
-void f2fs_submit_page_mbio(struct f2fs_io_info *);
+int f2fs_submit_page_mbio(struct f2fs_io_info *);
 struct block_device *f2fs_target_device(struct f2fs_sb_info *,
 				block_t, struct bio *);
 int f2fs_target_device_index(struct f2fs_sb_info *, block_t);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 4e5ffe1d97e4..6d197f0c8151 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1604,15 +1604,20 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
 static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
 {
 	int type = __get_segment_type(fio->page, fio->type);
+	int err;
 
 	if (fio->type == NODE || fio->type == DATA)
 		mutex_lock(&fio->sbi->wio_mutex[fio->type]);
-
+reallocate:
 	allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr,
 					&fio->new_blkaddr, sum, type);
 
 	/* writeout dirty page into bdev */
-	f2fs_submit_page_mbio(fio);
+	err = f2fs_submit_page_mbio(fio);
+	if (err == -EAGAIN) {
+		fio->old_blkaddr = fio->new_blkaddr;
+		goto reallocate;
+	}
 
 	if (fio->type == NODE || fio->type == DATA)
 		mutex_unlock(&fio->sbi->wio_mutex[fio->type]);
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 9d44ce83acb2..08f1455c812c 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -186,9 +186,12 @@ struct segment_allocation {
  * the page is atomically written, and it is in inmem_pages list.
  */
 #define ATOMIC_WRITTEN_PAGE		((unsigned long)-1)
+#define DUMMY_WRITTEN_PAGE		((unsigned long)-2)
 
 #define IS_ATOMIC_WRITTEN_PAGE(page)			\
 		(page_private(page) == (unsigned long)ATOMIC_WRITTEN_PAGE)
+#define IS_DUMMY_WRITTEN_PAGE(page)			\
+		(page_private(page) == (unsigned long)DUMMY_WRITTEN_PAGE)
 
 struct inmem_pages {
 	struct list_head list;
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 702638e21c76..692ff1b5adf5 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1763,6 +1763,8 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
 				FDEV(i).total_segments,
 				FDEV(i).start_blk, FDEV(i).end_blk);
 	}
+	f2fs_msg(sbi->sb, KERN_INFO,
+			"IO Block Size: %8d KB", F2FS_IO_SIZE_KB(sbi));
 	return 0;
 }
 
@@ -1880,12 +1882,19 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 	if (err)
 		goto free_options;
 
+	if (F2FS_IO_SIZE(sbi) > 1) {
+		sbi->write_io_dummy =
+			mempool_create_page_pool(F2FS_IO_SIZE(sbi) - 1, 0);
+		if (!sbi->write_io_dummy)
+			goto free_options;
+	}
+
 	/* get an inode for meta space */
 	sbi->meta_inode = f2fs_iget(sb, F2FS_META_INO(sbi));
 	if (IS_ERR(sbi->meta_inode)) {
 		f2fs_msg(sb, KERN_ERR, "Failed to read F2FS meta data inode");
 		err = PTR_ERR(sbi->meta_inode);
-		goto free_options;
+		goto free_io_dummy;
 	}
 
 	err = get_valid_checkpoint(sbi);
@@ -2103,6 +2112,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 free_meta_inode:
 	make_bad_inode(sbi->meta_inode);
 	iput(sbi->meta_inode);
+free_io_dummy:
+	mempool_destroy(sbi->write_io_dummy);
 free_options:
 	destroy_percpu_info(sbi);
 	kfree(options);
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index cea41a124a80..f0748524ca8c 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -36,6 +36,12 @@
 #define F2FS_NODE_INO(sbi)	(sbi->node_ino_num)
 #define F2FS_META_INO(sbi)	(sbi->meta_ino_num)
 
+#define F2FS_IO_SIZE(sbi)	(1 << (sbi)->write_io_size_bits) /* Blocks */
+#define F2FS_IO_SIZE_KB(sbi)	(1 << ((sbi)->write_io_size_bits + 2)) /* KB */
+#define F2FS_IO_SIZE_BYTES(sbi)	(1 << ((sbi)->write_io_size_bits + 12)) /* B */
+#define F2FS_IO_SIZE_BITS(sbi)	((sbi)->write_io_size_bits) /* power of 2 */
+#define F2FS_IO_SIZE_MASK(sbi)	(F2FS_IO_SIZE(sbi) - 1)
+
 /* This flag is used by node and meta inodes, and by recovery */
 #define GFP_F2FS_ZERO		(GFP_NOFS | __GFP_ZERO)
 #define GFP_F2FS_HIGH_ZERO	(GFP_NOFS | __GFP_ZERO | __GFP_HIGHMEM)
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 41+ messages in thread

* [PATCH 05/10] f2fs: get io size bit from mount option
  2016-12-30 18:51 [PATCH 01/10] f2fs: reassign new segment for mode=lfs Jaegeuk Kim
                   ` (2 preceding siblings ...)
  2016-12-30 18:51 ` [PATCH 04/10] f2fs: support IO alignment for DATA and NODE writes Jaegeuk Kim
@ 2016-12-30 18:51 ` Jaegeuk Kim
  2016-12-30 18:51 ` [PATCH 06/10] f2fs: show the max number of atomic operations Jaegeuk Kim
                   ` (5 subsequent siblings)
  9 siblings, 0 replies; 41+ messages in thread
From: Jaegeuk Kim @ 2016-12-30 18:51 UTC (permalink / raw)
  To: linux-kernel, linux-fsdevel, linux-f2fs-devel; +Cc: Jaegeuk Kim

This patch adds to set io_size_bits from mount option.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 Documentation/filesystems/f2fs.txt |  2 ++
 fs/f2fs/super.c                    | 22 ++++++++++++++++++++++
 2 files changed, 24 insertions(+)

diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt
index 753dd4f96afe..d99faced79cb 100644
--- a/Documentation/filesystems/f2fs.txt
+++ b/Documentation/filesystems/f2fs.txt
@@ -157,6 +157,8 @@ data_flush             Enable data flushing before checkpoint in order to
 mode=%s                Control block allocation mode which supports "adaptive"
                        and "lfs". In "lfs" mode, there should be no random
                        writes towards main area.
+io_bits=%u             Set the bit size of write IO requests. It should be set
+                       with "mode=lfs".
 
 ================================================================================
 DEBUGFS ENTRIES
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 692ff1b5adf5..f3697f97e527 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -101,6 +101,7 @@ enum {
 	Opt_noinline_data,
 	Opt_data_flush,
 	Opt_mode,
+	Opt_io_size_bits,
 	Opt_fault_injection,
 	Opt_lazytime,
 	Opt_nolazytime,
@@ -133,6 +134,7 @@ static match_table_t f2fs_tokens = {
 	{Opt_noinline_data, "noinline_data"},
 	{Opt_data_flush, "data_flush"},
 	{Opt_mode, "mode=%s"},
+	{Opt_io_size_bits, "io_bits=%u"},
 	{Opt_fault_injection, "fault_injection=%u"},
 	{Opt_lazytime, "lazytime"},
 	{Opt_nolazytime, "nolazytime"},
@@ -535,6 +537,17 @@ static int parse_options(struct super_block *sb, char *options)
 			}
 			kfree(name);
 			break;
+		case Opt_io_size_bits:
+			if (args->from && match_int(args, &arg))
+				return -EINVAL;
+			if (arg > __ilog2_u32(BIO_MAX_PAGES)) {
+				f2fs_msg(sb, KERN_WARNING,
+					"Not support %d, larger than %d",
+					1 << arg, BIO_MAX_PAGES);
+				return -EINVAL;
+			}
+			sbi->write_io_size_bits = arg;
+			break;
 		case Opt_fault_injection:
 			if (args->from && match_int(args, &arg))
 				return -EINVAL;
@@ -558,6 +571,13 @@ static int parse_options(struct super_block *sb, char *options)
 			return -EINVAL;
 		}
 	}
+
+	if (F2FS_IO_SIZE_BITS(sbi) && !test_opt(sbi, LFS)) {
+		f2fs_msg(sb, KERN_ERR,
+				"Should set mode=lfs with %uKB-sized IO",
+				F2FS_IO_SIZE_KB(sbi));
+		return -EINVAL;
+	}
 	return 0;
 }
 
@@ -918,6 +938,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
 	else if (test_opt(sbi, LFS))
 		seq_puts(seq, "lfs");
 	seq_printf(seq, ",active_logs=%u", sbi->active_logs);
+	if (F2FS_IO_SIZE_BITS(sbi))
+		seq_printf(seq, ",io_size=%uKB", F2FS_IO_SIZE_KB(sbi));
 
 	return 0;
 }
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 41+ messages in thread

* [PATCH 06/10] f2fs: show the max number of atomic operations
  2016-12-30 18:51 [PATCH 01/10] f2fs: reassign new segment for mode=lfs Jaegeuk Kim
                   ` (3 preceding siblings ...)
  2016-12-30 18:51 ` [PATCH 05/10] f2fs: get io size bit from mount option Jaegeuk Kim
@ 2016-12-30 18:51 ` Jaegeuk Kim
  2017-01-04  8:45   ` Chao Yu
  2017-01-04 23:50   ` [PATCH 06/10 v2] " Jaegeuk Kim
  2016-12-30 18:51 ` [PATCH 07/10] f2fs: don't allow encrypted operations without keys Jaegeuk Kim
                   ` (4 subsequent siblings)
  9 siblings, 2 replies; 41+ messages in thread
From: Jaegeuk Kim @ 2016-12-30 18:51 UTC (permalink / raw)
  To: linux-kernel, linux-fsdevel, linux-f2fs-devel; +Cc: Jaegeuk Kim

This patch adds to show the max number of atomic operations which are
conducting concurrently.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/debug.c |  7 +++++++
 fs/f2fs/f2fs.h  | 17 +++++++++++++++++
 fs/f2fs/file.c  | 12 +++++++++---
 3 files changed, 33 insertions(+), 3 deletions(-)

diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index fbd5184140d0..29cdf0c1da1d 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -50,6 +50,8 @@ static void update_general_status(struct f2fs_sb_info *sbi)
 	si->ndirty_files = sbi->ndirty_inode[FILE_INODE];
 	si->ndirty_all = sbi->ndirty_inode[DIRTY_META];
 	si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES);
+	si->aw_cnt = atomic_read(&sbi->aw_cnt);
+	si->max_aw_cnt = atomic_read(&sbi->max_aw_cnt);
 	si->nr_wb_cp_data = get_pages(sbi, F2FS_WB_CP_DATA);
 	si->nr_wb_data = get_pages(sbi, F2FS_WB_DATA);
 	si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg;
@@ -256,6 +258,8 @@ static int stat_show(struct seq_file *s, void *v)
 			   si->inline_dir);
 		seq_printf(s, "  - Orphan Inode: %u\n",
 			   si->orphans);
+		seq_printf(s, "  - Atomic write count: %4d (Max. %4d)\n",
+			   si->aw_cnt, si->max_aw_cnt);
 		seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n",
 			   si->main_area_segs, si->main_area_sections,
 			   si->main_area_zones);
@@ -414,6 +418,9 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi)
 	atomic_set(&sbi->inline_dir, 0);
 	atomic_set(&sbi->inplace_count, 0);
 
+	atomic_set(&sbi->aw_cnt, 0);
+	atomic_set(&sbi->max_aw_cnt, 0);
+
 	mutex_lock(&f2fs_stat_mutex);
 	list_add_tail(&si->stat_list, &f2fs_stat_list);
 	mutex_unlock(&f2fs_stat_mutex);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 0d7eaddef4a0..bdcfe2a9b532 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -884,6 +884,8 @@ struct f2fs_sb_info {
 	atomic_t inline_xattr;			/* # of inline_xattr inodes */
 	atomic_t inline_inode;			/* # of inline_data inodes */
 	atomic_t inline_dir;			/* # of inline_dentry inodes */
+	atomic_t aw_cnt;			/* # of atomic writes */
+	atomic_t max_aw_cnt;			/* max # of atomic writes */
 	int bg_gc;				/* background gc calls */
 	unsigned int ndirty_inode[NR_INODE_TYPE];	/* # of dirty inodes */
 #endif
@@ -2236,6 +2238,7 @@ struct f2fs_stat_info {
 	int total_count, utilization;
 	int bg_gc, nr_wb_cp_data, nr_wb_data;
 	int inline_xattr, inline_inode, inline_dir, orphans;
+	int aw_cnt, max_aw_cnt;
 	unsigned int valid_count, valid_node_count, valid_inode_count, discard_blks;
 	unsigned int bimodal, avg_vblocks;
 	int util_free, util_valid, util_invalid;
@@ -2307,6 +2310,17 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
 		((sbi)->block_count[(curseg)->alloc_type]++)
 #define stat_inc_inplace_blocks(sbi)					\
 		(atomic_inc(&(sbi)->inplace_count))
+#define stat_inc_atomic_write(inode)					\
+		(atomic_inc(&F2FS_I_SB(inode)->aw_cnt));
+#define stat_dec_atomic_write(inode)					\
+		(atomic_dec(&F2FS_I_SB(inode)->aw_cnt));
+#define stat_update_max_atomic_write(inode)				\
+	do {								\
+		int cur = atomic_read(&F2FS_I_SB(inode)->aw_cnt);	\
+		int max = atomic_read(&F2FS_I_SB(inode)->max_aw_cnt);	\
+		if (cur > max)						\
+			atomic_set(&F2FS_I_SB(inode)->max_aw_cnt, cur);	\
+	} while (0)
 #define stat_inc_seg_count(sbi, type, gc_type)				\
 	do {								\
 		struct f2fs_stat_info *si = F2FS_STAT(sbi);		\
@@ -2360,6 +2374,9 @@ void f2fs_destroy_root_stats(void);
 #define stat_dec_inline_inode(inode)
 #define stat_inc_inline_dir(inode)
 #define stat_dec_inline_dir(inode)
+#define stat_inc_atomic_write(inode)
+#define stat_dec_atomic_write(inode)
+#define stat_update_max_atomic_write(inode)
 #define stat_inc_seg_type(sbi, curseg)
 #define stat_inc_block_count(sbi, curseg)
 #define stat_inc_inplace_blocks(sbi)
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 49f10dce817d..826fefc05fb1 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1542,6 +1542,8 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
 	if (ret)
 		clear_inode_flag(inode, FI_ATOMIC_FILE);
 out:
+	stat_inc_atomic_write(inode);
+	stat_update_max_atomic_write(inode);
 	inode_unlock(inode);
 	mnt_drop_write_file(filp);
 	return ret;
@@ -1571,9 +1573,11 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
 			set_inode_flag(inode, FI_ATOMIC_FILE);
 			goto err_out;
 		}
+		ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
+		stat_dec_atomic_write(inode);
+	} else {
+		ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
 	}
-
-	ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
 err_out:
 	inode_unlock(inode);
 	mnt_drop_write_file(filp);
@@ -1652,8 +1656,10 @@ static int f2fs_ioc_abort_volatile_write(struct file *filp)
 
 	inode_lock(inode);
 
-	if (f2fs_is_atomic_file(inode))
+	if (f2fs_is_atomic_file(inode)) {
 		drop_inmem_pages(inode);
+		stat_dec_atomic_write(inode);
+	}
 	if (f2fs_is_volatile_file(inode)) {
 		clear_inode_flag(inode, FI_VOLATILE_FILE);
 		ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 41+ messages in thread

* [PATCH 07/10] f2fs: don't allow encrypted operations without keys
  2016-12-30 18:51 [PATCH 01/10] f2fs: reassign new segment for mode=lfs Jaegeuk Kim
                   ` (4 preceding siblings ...)
  2016-12-30 18:51 ` [PATCH 06/10] f2fs: show the max number of atomic operations Jaegeuk Kim
@ 2016-12-30 18:51 ` Jaegeuk Kim
  2017-01-04  8:56   ` [f2fs-dev] " Chao Yu
  2016-12-30 18:51 ` [PATCH 08/10] f2fs: relax async discard commands more Jaegeuk Kim
                   ` (3 subsequent siblings)
  9 siblings, 1 reply; 41+ messages in thread
From: Jaegeuk Kim @ 2016-12-30 18:51 UTC (permalink / raw)
  To: linux-kernel, linux-fsdevel, linux-f2fs-devel
  Cc: Jaegeuk Kim, Theodore Ts'o

This patch fixes the renaming bug on encrypted filenames, which was pointed by

 (ext4: don't allow encrypted operations without keys)

Cc: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/namei.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 51c4d6d78058..6d511d349198 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -663,6 +663,12 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	bool is_old_inline = f2fs_has_inline_dentry(old_dir);
 	int err = -ENOENT;
 
+	if ((f2fs_encrypted_inode(old_dir) &&
+			!fscrypt_has_encryption_key(old_dir)) ||
+			(f2fs_encrypted_inode(new_dir) &&
+			!fscrypt_has_encryption_key(new_dir)))
+		return -ENOKEY;
+
 	if ((old_dir != new_dir) && f2fs_encrypted_inode(new_dir) &&
 			!fscrypt_has_permitted_context(new_dir, old_inode)) {
 		err = -EPERM;
@@ -843,6 +849,12 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
 	int old_nlink = 0, new_nlink = 0;
 	int err = -ENOENT;
 
+	if ((f2fs_encrypted_inode(old_dir) &&
+			!fscrypt_has_encryption_key(old_dir)) ||
+			(f2fs_encrypted_inode(new_dir) &&
+			!fscrypt_has_encryption_key(new_dir)))
+		return -ENOKEY;
+
 	if ((f2fs_encrypted_inode(old_dir) || f2fs_encrypted_inode(new_dir)) &&
 			(old_dir != new_dir) &&
 			(!fscrypt_has_permitted_context(new_dir, old_inode) ||
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 41+ messages in thread

* [PATCH 08/10] f2fs: relax async discard commands more
  2016-12-30 18:51 [PATCH 01/10] f2fs: reassign new segment for mode=lfs Jaegeuk Kim
                   ` (5 preceding siblings ...)
  2016-12-30 18:51 ` [PATCH 07/10] f2fs: don't allow encrypted operations without keys Jaegeuk Kim
@ 2016-12-30 18:51 ` Jaegeuk Kim
  2017-01-04  9:29   ` [f2fs-dev] " Chao Yu
  2016-12-30 18:51 ` [PATCH 09/10] f2fs: avoid needless checkpoint in f2fs_trim_fs Jaegeuk Kim
                   ` (2 subsequent siblings)
  9 siblings, 1 reply; 41+ messages in thread
From: Jaegeuk Kim @ 2016-12-30 18:51 UTC (permalink / raw)
  To: linux-kernel, linux-fsdevel, linux-f2fs-devel; +Cc: Jaegeuk Kim

This patch relaxes async discard commands to avoid waiting its end_io during
checkpoint.
Instead of waiting them during checkpoint, it will be done when actually reusing
them.

Test on initial partition of nvme drive.

 # time fstrim /mnt/test

Before : 6.158s
After : 4.822s

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/checkpoint.c |  3 ---
 fs/f2fs/f2fs.h       |  4 +++-
 fs/f2fs/segment.c    | 25 +++++++++++++++++++++----
 fs/f2fs/super.c      |  3 +++
 4 files changed, 27 insertions(+), 8 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 34bfe2b494ae..1a9ba69a22ba 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -1254,7 +1254,6 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 		f2fs_bug_on(sbi, prefree_segments(sbi));
 		flush_sit_entries(sbi, cpc);
 		clear_prefree_segments(sbi, cpc);
-		f2fs_wait_all_discard_bio(sbi);
 		unblock_operations(sbi);
 		goto out;
 	}
@@ -1278,8 +1277,6 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 	else
 		clear_prefree_segments(sbi, cpc);
 
-	f2fs_wait_all_discard_bio(sbi);
-
 	unblock_operations(sbi);
 	stat_inc_cp_count(sbi->stat_info);
 
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index bdcfe2a9b532..f2f40fce9d31 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -183,6 +183,8 @@ struct discard_entry {
 
 struct bio_entry {
 	struct list_head list;
+	unsigned int start_segno;
+	unsigned int end_segno;
 	struct bio *bio;
 	struct completion event;
 	int error;
@@ -2111,7 +2113,7 @@ void destroy_flush_cmd_control(struct f2fs_sb_info *, bool);
 void invalidate_blocks(struct f2fs_sb_info *, block_t);
 bool is_checkpointed_data(struct f2fs_sb_info *, block_t);
 void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t);
-void f2fs_wait_all_discard_bio(struct f2fs_sb_info *);
+void f2fs_wait_discard_bio(struct f2fs_sb_info *, unsigned int);
 void clear_prefree_segments(struct f2fs_sb_info *, struct cp_control *);
 void release_discard_addrs(struct f2fs_sb_info *);
 int npages_for_summary_flush(struct f2fs_sb_info *, bool);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 6d197f0c8151..9a38424c3c1f 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -624,20 +624,24 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
 }
 
 static struct bio_entry *__add_bio_entry(struct f2fs_sb_info *sbi,
-							struct bio *bio)
+			struct bio *bio, unsigned int start_segno,
+					unsigned int end_segno)
 {
 	struct list_head *wait_list = &(SM_I(sbi)->wait_list);
 	struct bio_entry *be = f2fs_kmem_cache_alloc(bio_entry_slab, GFP_NOFS);
 
 	INIT_LIST_HEAD(&be->list);
 	be->bio = bio;
+	be->start_segno = start_segno;
+	be->end_segno = end_segno;
 	init_completion(&be->event);
 	list_add_tail(&be->list, wait_list);
 
 	return be;
 }
 
-void f2fs_wait_all_discard_bio(struct f2fs_sb_info *sbi)
+/* This should be covered by global mutex, &sit_i->sentry_lock */
+void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, unsigned int segno)
 {
 	struct list_head *wait_list = &(SM_I(sbi)->wait_list);
 	struct bio_entry *be, *tmp;
@@ -646,7 +650,15 @@ void f2fs_wait_all_discard_bio(struct f2fs_sb_info *sbi)
 		struct bio *bio = be->bio;
 		int err;
 
-		wait_for_completion_io(&be->event);
+		if (!completion_done(&be->event)) {
+			if ((be->start_segno >= segno &&
+					be->end_segno <= segno) ||
+					segno == NULL_SEGNO)
+				wait_for_completion_io(&be->event);
+			else
+				continue;
+		}
+
 		err = be->error;
 		if (err == -EOPNOTSUPP)
 			err = 0;
@@ -674,6 +686,8 @@ static int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi,
 		struct block_device *bdev, block_t blkstart, block_t blklen)
 {
 	struct bio *bio = NULL;
+	unsigned int start_segno = GET_SEGNO(sbi, blkstart);
+	unsigned int end_segno = GET_SEGNO(sbi, blkstart + blklen);
 	int err;
 
 	trace_f2fs_issue_discard(sbi->sb, blkstart, blklen);
@@ -688,7 +702,8 @@ static int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi,
 				SECTOR_FROM_BLOCK(blklen),
 				GFP_NOFS, 0, &bio);
 	if (!err && bio) {
-		struct bio_entry *be = __add_bio_entry(sbi, bio);
+		struct bio_entry *be = __add_bio_entry(sbi, bio,
+					start_segno, end_segno);
 
 		bio->bi_private = be;
 		bio->bi_end_io = f2fs_submit_bio_wait_endio;
@@ -1574,6 +1589,8 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
 
 	*new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
 
+	f2fs_wait_discard_bio(sbi, GET_SEGNO(sbi, *new_blkaddr));
+
 	/*
 	 * __add_sum_entry should be resided under the curseg_mutex
 	 * because, this function updates a summary entry in the
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index f3697f97e527..16e2bc5209bb 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -770,6 +770,9 @@ static void f2fs_put_super(struct super_block *sb)
 		write_checkpoint(sbi, &cpc);
 	}
 
+	/* be sure to wait for any on-going discard commands */
+	f2fs_wait_discard_bio(sbi, NULL_SEGNO);
+
 	/* write_checkpoint can update stat informaion */
 	f2fs_destroy_stats(sbi);
 
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 41+ messages in thread

* [PATCH 09/10] f2fs: avoid needless checkpoint in f2fs_trim_fs
  2016-12-30 18:51 [PATCH 01/10] f2fs: reassign new segment for mode=lfs Jaegeuk Kim
                   ` (6 preceding siblings ...)
  2016-12-30 18:51 ` [PATCH 08/10] f2fs: relax async discard commands more Jaegeuk Kim
@ 2016-12-30 18:51 ` Jaegeuk Kim
  2017-02-22  7:23   ` [f2fs-dev] " Chao Yu
  2016-12-30 18:51 ` [PATCH 10/10] f2fs: return fs_trim if there is no candidate Jaegeuk Kim
  2017-01-04  3:24 ` [f2fs-dev] [PATCH 01/10] f2fs: reassign new segment for mode=lfs Chao Yu
  9 siblings, 1 reply; 41+ messages in thread
From: Jaegeuk Kim @ 2016-12-30 18:51 UTC (permalink / raw)
  To: linux-kernel, linux-fsdevel, linux-f2fs-devel; +Cc: Jaegeuk Kim

The f2fs_trim_fs() doesn't need to do checkpoint if there are newly allocated
data blocks only which didn't change the critical checkpoint data such as nat
and sit entries.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/checkpoint.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 1a9ba69a22ba..917b5c5053ae 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -1248,14 +1248,15 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 	f2fs_flush_merged_bios(sbi);
 
 	/* this is the case of multiple fstrims without any changes */
-	if (cpc->reason == CP_DISCARD && !is_sbi_flag_set(sbi, SBI_IS_DIRTY)) {
-		f2fs_bug_on(sbi, NM_I(sbi)->dirty_nat_cnt);
-		f2fs_bug_on(sbi, SIT_I(sbi)->dirty_sentries);
-		f2fs_bug_on(sbi, prefree_segments(sbi));
-		flush_sit_entries(sbi, cpc);
-		clear_prefree_segments(sbi, cpc);
-		unblock_operations(sbi);
-		goto out;
+	if (cpc->reason == CP_DISCARD) {
+		if (NM_I(sbi)->dirty_nat_cnt == 0 &&
+				SIT_I(sbi)->dirty_sentries == 0 &&
+				prefree_segments(sbi) == 0) {
+			flush_sit_entries(sbi, cpc);
+			clear_prefree_segments(sbi, cpc);
+			unblock_operations(sbi);
+			goto out;
+		}
 	}
 
 	/*
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 41+ messages in thread

* [PATCH 10/10] f2fs: return fs_trim if there is no candidate
  2016-12-30 18:51 [PATCH 01/10] f2fs: reassign new segment for mode=lfs Jaegeuk Kim
                   ` (7 preceding siblings ...)
  2016-12-30 18:51 ` [PATCH 09/10] f2fs: avoid needless checkpoint in f2fs_trim_fs Jaegeuk Kim
@ 2016-12-30 18:51 ` Jaegeuk Kim
  2017-02-22  7:23   ` [f2fs-dev] " Chao Yu
  2017-01-04  3:24 ` [f2fs-dev] [PATCH 01/10] f2fs: reassign new segment for mode=lfs Chao Yu
  9 siblings, 1 reply; 41+ messages in thread
From: Jaegeuk Kim @ 2016-12-30 18:51 UTC (permalink / raw)
  To: linux-kernel, linux-fsdevel, linux-f2fs-devel; +Cc: Jaegeuk Kim

If there is no candidate to submit discard command during f2sf_trim_fs, let's
return without checkpoint.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/checkpoint.c |  5 +++++
 fs/f2fs/f2fs.h       |  1 +
 fs/f2fs/segment.c    | 28 +++++++++++++++++++++++-----
 3 files changed, 29 insertions(+), 5 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 917b5c5053ae..ccea40763d9d 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -1249,6 +1249,11 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 
 	/* this is the case of multiple fstrims without any changes */
 	if (cpc->reason == CP_DISCARD) {
+		if (!exist_trim_candidates(sbi, cpc)) {
+			unblock_operations(sbi);
+			goto out;
+		}
+
 		if (NM_I(sbi)->dirty_nat_cnt == 0 &&
 				SIT_I(sbi)->dirty_sentries == 0 &&
 				prefree_segments(sbi) == 0) {
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index f2f40fce9d31..04d113f0d6bf 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -2119,6 +2119,7 @@ void release_discard_addrs(struct f2fs_sb_info *);
 int npages_for_summary_flush(struct f2fs_sb_info *, bool);
 void allocate_new_segments(struct f2fs_sb_info *);
 int f2fs_trim_fs(struct f2fs_sb_info *, struct fstrim_range *);
+bool exist_trim_candidates(struct f2fs_sb_info *, struct cp_control *);
 struct page *get_sum_page(struct f2fs_sb_info *, unsigned int);
 void update_meta_page(struct f2fs_sb_info *, void *, block_t);
 void write_meta_page(struct f2fs_sb_info *, struct page *);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 9a38424c3c1f..82078734f379 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -836,7 +836,8 @@ static void __add_discard_entry(struct f2fs_sb_info *sbi,
 	SM_I(sbi)->nr_discards += end - start;
 }
 
-static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
+static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc,
+							bool check_only)
 {
 	int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
 	int max_blocks = sbi->blocks_per_seg;
@@ -850,12 +851,12 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 	int i;
 
 	if (se->valid_blocks == max_blocks || !f2fs_discard_en(sbi))
-		return;
+		return false;
 
 	if (!force) {
 		if (!test_opt(sbi, DISCARD) || !se->valid_blocks ||
 		    SM_I(sbi)->nr_discards >= SM_I(sbi)->max_discards)
-			return;
+			return false;
 	}
 
 	/* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */
@@ -873,8 +874,12 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 					&& (end - start) < cpc->trim_minlen)
 			continue;
 
+		if (check_only)
+			return true;
+
 		__add_discard_entry(sbi, cpc, se, start, end);
 	}
+	return false;
 }
 
 void release_discard_addrs(struct f2fs_sb_info *sbi)
@@ -1455,6 +1460,19 @@ static const struct segment_allocation default_salloc_ops = {
 	.allocate_segment = allocate_segment_by_default,
 };
 
+bool exist_trim_candidates(struct f2fs_sb_info *sbi, struct cp_control *cpc)
+{
+	__u64 trim_start = cpc->trim_start;
+
+	mutex_lock(&SIT_I(sbi)->sentry_lock);
+	for (; trim_start <= cpc->trim_end; trim_start++)
+		if (add_discard_addrs(sbi, cpc, true))
+			break;
+	mutex_unlock(&SIT_I(sbi)->sentry_lock);
+
+	return trim_start <= cpc->trim_end;
+}
+
 int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
 {
 	__u64 start = F2FS_BYTES_TO_BLK(range->start);
@@ -2251,7 +2269,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 			/* add discard candidates */
 			if (cpc->reason != CP_DISCARD) {
 				cpc->trim_start = segno;
-				add_discard_addrs(sbi, cpc);
+				add_discard_addrs(sbi, cpc, false);
 			}
 
 			if (to_journal) {
@@ -2289,7 +2307,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 		__u64 trim_start = cpc->trim_start;
 
 		for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++)
-			add_discard_addrs(sbi, cpc);
+			add_discard_addrs(sbi, cpc, false);
 
 		cpc->trim_start = trim_start;
 	}
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 41+ messages in thread

* Re: [f2fs-dev] [PATCH 01/10] f2fs: reassign new segment for mode=lfs
  2016-12-30 18:51 [PATCH 01/10] f2fs: reassign new segment for mode=lfs Jaegeuk Kim
                   ` (8 preceding siblings ...)
  2016-12-30 18:51 ` [PATCH 10/10] f2fs: return fs_trim if there is no candidate Jaegeuk Kim
@ 2017-01-04  3:24 ` Chao Yu
  2017-01-04 22:48   ` Jaegeuk Kim
  9 siblings, 1 reply; 41+ messages in thread
From: Chao Yu @ 2017-01-04  3:24 UTC (permalink / raw)
  To: Jaegeuk Kim, linux-kernel, linux-fsdevel, linux-f2fs-devel

Hi Jaegeuk,

On 2016/12/31 2:51, Jaegeuk Kim wrote:
> Otherwise we can remain wrong curseg->next_blkoff, resulting in fsck failure.

Could you explain more about this case?

Thanks,

> 
> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> ---
>  fs/f2fs/segment.c | 3 ---
>  1 file changed, 3 deletions(-)
> 
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index be9e4d244d75..4e5ffe1d97e4 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -1428,9 +1428,6 @@ void allocate_new_segments(struct f2fs_sb_info *sbi)
>  	unsigned int old_segno;
>  	int i;
>  
> -	if (test_opt(sbi, LFS))
> -		return;
> -
>  	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
>  		curseg = CURSEG_I(sbi, i);
>  		old_segno = curseg->segno;
> 

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [f2fs-dev] [PATCH 02/10] f2fs: fix wrong tracepoints for op and op_flags
  2016-12-30 18:51 ` [PATCH 02/10] f2fs: fix wrong tracepoints for op and op_flags Jaegeuk Kim
@ 2017-01-04  3:25   ` Chao Yu
  0 siblings, 0 replies; 41+ messages in thread
From: Chao Yu @ 2017-01-04  3:25 UTC (permalink / raw)
  To: Jaegeuk Kim, linux-kernel, linux-fsdevel, linux-f2fs-devel

On 2016/12/31 2:51, Jaegeuk Kim wrote:
> This patch fixes wrong tracepoints in terms of op and op_flags.
> 
> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>

Reviewed-by: Chao Yu <yuchao0@huawei.com>

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [f2fs-dev] [PATCH 03/10] f2fs: add submit_bio tracepoint
  2016-12-30 18:51 ` [PATCH 03/10] f2fs: add submit_bio tracepoint Jaegeuk Kim
@ 2017-01-04  3:32   ` Chao Yu
  2017-01-04 23:35   ` [PATCH 03/10 v2] " Jaegeuk Kim
  1 sibling, 0 replies; 41+ messages in thread
From: Chao Yu @ 2017-01-04  3:32 UTC (permalink / raw)
  To: Jaegeuk Kim, linux-kernel, linux-fsdevel, linux-f2fs-devel

Hi Jaegeuk,

On 2016/12/31 2:51, Jaegeuk Kim wrote:
> This patch adds final submit_bio() tracepoint.
> 
> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> ---
>  fs/f2fs/data.c              |  1 +
>  include/trace/events/f2fs.h | 31 +++++++++++++++++++++++++++++++
>  2 files changed, 32 insertions(+)
> 
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 2c5df1dc1479..54da4e2f1318 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -175,6 +175,7 @@ static inline void __submit_bio(struct f2fs_sb_info *sbi,
>  			current->plug && (type == DATA || type == NODE))
>  			blk_finish_plug(current->plug);
>  	}
> +	trace_f2fs_submit_bio(sbi->sb, bio);
>  	submit_bio(bio);
>  }
>  
> diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
> index 4c942599581b..094d92b52450 100644
> --- a/include/trace/events/f2fs.h
> +++ b/include/trace/events/f2fs.h
> @@ -55,6 +55,8 @@ TRACE_DEFINE_ENUM(CP_DISCARD);
>  		{ IPU,		"IN-PLACE" },				\
>  		{ OPU,		"OUT-OF-PLACE" })
>  
> +#define F2FS_OP	(REQ_OP_READ | REQ_OP_RAHEAD | REQ_SYNC | REQ_PREFLUSH | REQ_META |\
> +			REQ_PRIO)

Needed?

>  #define F2FS_OP_FLAGS (REQ_RAHEAD | REQ_SYNC | REQ_PREFLUSH | REQ_META |\
>  			REQ_PRIO)
>  #define F2FS_BIO_FLAG_MASK(t)	(t & F2FS_OP_FLAGS)
> @@ -837,6 +839,35 @@ DEFINE_EVENT_CONDITION(f2fs__submit_bio, f2fs_submit_read_bio,
>  	TP_CONDITION(bio)
>  );
>  
> +TRACE_EVENT(f2fs_submit_bio,

Can we reuse f2fs__submit_bio class like f2fs_submit_{read,write}_bio?

DEFINE_EVENT_CONDITION(f2fs__submit_bio, f2fs_submit_last_bio,

Thanks,

> +
> +	TP_PROTO(struct super_block *sb, struct bio *bio),
> +
> +	TP_ARGS(sb, bio),
> +
> +	TP_STRUCT__entry(
> +		__field(dev_t,	dev)
> +		__field(int,	op)
> +		__field(int,	op_flags)
> +		__field(sector_t,	sector)
> +		__field(unsigned int,	size)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->dev		= sb->s_dev;
> +		__entry->op		= bio->bi_opf & REQ_OP_MASK;
> +		__entry->op_flags	= bio->bi_opf;
> +		__entry->sector		= bio->bi_iter.bi_sector;
> +		__entry->size		= bio->bi_iter.bi_size;
> +	),
> +
> +	TP_printk("dev = (%d,%d), rw = %s%s, sector = %lld, size = %u",
> +		show_dev(__entry),
> +		show_bio_type(__entry->op, __entry->op_flags),
> +		(unsigned long long)__entry->sector,
> +		__entry->size)
> +);
> +
>  TRACE_EVENT(f2fs_write_begin,
>  
>  	TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
> 

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [f2fs-dev] [PATCH 04/10] f2fs: support IO alignment for DATA and NODE writes
  2016-12-30 18:51 ` [PATCH 04/10] f2fs: support IO alignment for DATA and NODE writes Jaegeuk Kim
@ 2017-01-04  8:23   ` Chao Yu
  2017-01-04 23:44     ` Jaegeuk Kim
  0 siblings, 1 reply; 41+ messages in thread
From: Chao Yu @ 2017-01-04  8:23 UTC (permalink / raw)
  To: Jaegeuk Kim, linux-kernel, linux-fsdevel, linux-f2fs-devel

Hi Jaegeuk,

On 2016/12/31 2:51, Jaegeuk Kim wrote:
> This patch implements IO alignment by filling dummy blocks in DATA and NODE
> write bios. If we can guarantee, for example, 32KB or 64KB for such the IOs,
> we can eliminate underlying dummy page problem which FTL conducts in order to
> close MLC or TLC partial written pages.
> 
> Note that,
>  - it requires "-o mode=lfs".
>  - IO size should be power of 2, not exceed BIO_MAX_PAGES, 256.
>  - read IO is still 4KB.
>  - do checkpoint at fsync, if dummy NODE page was written.

Which scenario we can benefit from? Any numbers?

I doubt that there are some potential side-effect points:
 - write amplification will be more serious than before
 - free space will be more fragmented since dummy blocks is separated in whole
address space
 - there is less chance to merge small(unaligned) IOs in block layer

Thoughts?

Thanks,

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 06/10] f2fs: show the max number of atomic operations
  2016-12-30 18:51 ` [PATCH 06/10] f2fs: show the max number of atomic operations Jaegeuk Kim
@ 2017-01-04  8:45   ` Chao Yu
  2017-01-04 23:50   ` [PATCH 06/10 v2] " Jaegeuk Kim
  1 sibling, 0 replies; 41+ messages in thread
From: Chao Yu @ 2017-01-04  8:45 UTC (permalink / raw)
  To: Jaegeuk Kim, linux-kernel, linux-fsdevel, linux-f2fs-devel

On 2016/12/31 2:51, Jaegeuk Kim wrote:
> This patch adds to show the max number of atomic operations which are
> conducting concurrently.
> 
> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> ---
>  fs/f2fs/debug.c |  7 +++++++
>  fs/f2fs/f2fs.h  | 17 +++++++++++++++++
>  fs/f2fs/file.c  | 12 +++++++++---
>  3 files changed, 33 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
> index fbd5184140d0..29cdf0c1da1d 100644
> --- a/fs/f2fs/debug.c
> +++ b/fs/f2fs/debug.c
> @@ -50,6 +50,8 @@ static void update_general_status(struct f2fs_sb_info *sbi)
>  	si->ndirty_files = sbi->ndirty_inode[FILE_INODE];
>  	si->ndirty_all = sbi->ndirty_inode[DIRTY_META];
>  	si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES);
> +	si->aw_cnt = atomic_read(&sbi->aw_cnt);
> +	si->max_aw_cnt = atomic_read(&sbi->max_aw_cnt);
>  	si->nr_wb_cp_data = get_pages(sbi, F2FS_WB_CP_DATA);
>  	si->nr_wb_data = get_pages(sbi, F2FS_WB_DATA);
>  	si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg;
> @@ -256,6 +258,8 @@ static int stat_show(struct seq_file *s, void *v)
>  			   si->inline_dir);
>  		seq_printf(s, "  - Orphan Inode: %u\n",
>  			   si->orphans);
> +		seq_printf(s, "  - Atomic write count: %4d (Max. %4d)\n",
> +			   si->aw_cnt, si->max_aw_cnt);
>  		seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n",
>  			   si->main_area_segs, si->main_area_sections,
>  			   si->main_area_zones);
> @@ -414,6 +418,9 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi)
>  	atomic_set(&sbi->inline_dir, 0);
>  	atomic_set(&sbi->inplace_count, 0);
>  
> +	atomic_set(&sbi->aw_cnt, 0);
> +	atomic_set(&sbi->max_aw_cnt, 0);
> +
>  	mutex_lock(&f2fs_stat_mutex);
>  	list_add_tail(&si->stat_list, &f2fs_stat_list);
>  	mutex_unlock(&f2fs_stat_mutex);
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 0d7eaddef4a0..bdcfe2a9b532 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -884,6 +884,8 @@ struct f2fs_sb_info {
>  	atomic_t inline_xattr;			/* # of inline_xattr inodes */
>  	atomic_t inline_inode;			/* # of inline_data inodes */
>  	atomic_t inline_dir;			/* # of inline_dentry inodes */
> +	atomic_t aw_cnt;			/* # of atomic writes */
> +	atomic_t max_aw_cnt;			/* max # of atomic writes */
>  	int bg_gc;				/* background gc calls */
>  	unsigned int ndirty_inode[NR_INODE_TYPE];	/* # of dirty inodes */
>  #endif
> @@ -2236,6 +2238,7 @@ struct f2fs_stat_info {
>  	int total_count, utilization;
>  	int bg_gc, nr_wb_cp_data, nr_wb_data;
>  	int inline_xattr, inline_inode, inline_dir, orphans;
> +	int aw_cnt, max_aw_cnt;
>  	unsigned int valid_count, valid_node_count, valid_inode_count, discard_blks;
>  	unsigned int bimodal, avg_vblocks;
>  	int util_free, util_valid, util_invalid;
> @@ -2307,6 +2310,17 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
>  		((sbi)->block_count[(curseg)->alloc_type]++)
>  #define stat_inc_inplace_blocks(sbi)					\
>  		(atomic_inc(&(sbi)->inplace_count))
> +#define stat_inc_atomic_write(inode)					\
> +		(atomic_inc(&F2FS_I_SB(inode)->aw_cnt));
> +#define stat_dec_atomic_write(inode)					\
> +		(atomic_dec(&F2FS_I_SB(inode)->aw_cnt));
> +#define stat_update_max_atomic_write(inode)				\
> +	do {								\
> +		int cur = atomic_read(&F2FS_I_SB(inode)->aw_cnt);	\
> +		int max = atomic_read(&F2FS_I_SB(inode)->max_aw_cnt);	\
> +		if (cur > max)						\
> +			atomic_set(&F2FS_I_SB(inode)->max_aw_cnt, cur);	\
> +	} while (0)
>  #define stat_inc_seg_count(sbi, type, gc_type)				\
>  	do {								\
>  		struct f2fs_stat_info *si = F2FS_STAT(sbi);		\
> @@ -2360,6 +2374,9 @@ void f2fs_destroy_root_stats(void);
>  #define stat_dec_inline_inode(inode)
>  #define stat_inc_inline_dir(inode)
>  #define stat_dec_inline_dir(inode)
> +#define stat_inc_atomic_write(inode)
> +#define stat_dec_atomic_write(inode)
> +#define stat_update_max_atomic_write(inode)
>  #define stat_inc_seg_type(sbi, curseg)
>  #define stat_inc_block_count(sbi, curseg)
>  #define stat_inc_inplace_blocks(sbi)
> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> index 49f10dce817d..826fefc05fb1 100644
> --- a/fs/f2fs/file.c
> +++ b/fs/f2fs/file.c
> @@ -1542,6 +1542,8 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
>  	if (ret)
>  		clear_inode_flag(inode, FI_ATOMIC_FILE);
>  out:
> +	stat_inc_atomic_write(inode);
> +	stat_update_max_atomic_write(inode);
>  	inode_unlock(inode);
>  	mnt_drop_write_file(filp);
>  	return ret;
> @@ -1571,9 +1573,11 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
>  			set_inode_flag(inode, FI_ATOMIC_FILE);
>  			goto err_out;
>  		}
> +		ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
> +		stat_dec_atomic_write(inode);
> +	} else {
> +		ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
>  	}
> -
> -	ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
>  err_out:
>  	inode_unlock(inode);
>  	mnt_drop_write_file(filp);
> @@ -1652,8 +1656,10 @@ static int f2fs_ioc_abort_volatile_write(struct file *filp)
>  
>  	inode_lock(inode);
>  
> -	if (f2fs_is_atomic_file(inode))
> +	if (f2fs_is_atomic_file(inode)) {
>  		drop_inmem_pages(inode);
> +		stat_dec_atomic_write(inode);

Better to decline this count in drop_inmem_pages()?

Thanks,

> +	}
>  	if (f2fs_is_volatile_file(inode)) {
>  		clear_inode_flag(inode, FI_VOLATILE_FILE);
>  		ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
> 

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [f2fs-dev] [PATCH 07/10] f2fs: don't allow encrypted operations without keys
  2016-12-30 18:51 ` [PATCH 07/10] f2fs: don't allow encrypted operations without keys Jaegeuk Kim
@ 2017-01-04  8:56   ` Chao Yu
  0 siblings, 0 replies; 41+ messages in thread
From: Chao Yu @ 2017-01-04  8:56 UTC (permalink / raw)
  To: Jaegeuk Kim, linux-kernel, linux-fsdevel, linux-f2fs-devel
  Cc: Theodore Ts'o

On 2016/12/31 2:51, Jaegeuk Kim wrote:
> This patch fixes the renaming bug on encrypted filenames, which was pointed by
> 
>  (ext4: don't allow encrypted operations without keys)
> 
> Cc: Theodore Ts'o <tytso@mit.edu>
> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>

Reviewed-by: Chao Yu <yuchao0@huawei.com>

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [f2fs-dev] [PATCH 08/10] f2fs: relax async discard commands more
  2016-12-30 18:51 ` [PATCH 08/10] f2fs: relax async discard commands more Jaegeuk Kim
@ 2017-01-04  9:29   ` Chao Yu
  2017-01-05  3:19     ` Chao Yu
  0 siblings, 1 reply; 41+ messages in thread
From: Chao Yu @ 2017-01-04  9:29 UTC (permalink / raw)
  To: Jaegeuk Kim, linux-kernel, linux-fsdevel, linux-f2fs-devel

On 2016/12/31 2:51, Jaegeuk Kim wrote:
> This patch relaxes async discard commands to avoid waiting its end_io during
> checkpoint.
> Instead of waiting them during checkpoint, it will be done when actually reusing
> them.
> 
> Test on initial partition of nvme drive.
> 
>  # time fstrim /mnt/test
> 
> Before : 6.158s
> After : 4.822s
> 
> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>

Reviewed-by: Chao Yu <yuchao0@huawei.com>

One comment below,

> ---
>  fs/f2fs/checkpoint.c |  3 ---
>  fs/f2fs/f2fs.h       |  4 +++-
>  fs/f2fs/segment.c    | 25 +++++++++++++++++++++----
>  fs/f2fs/super.c      |  3 +++
>  4 files changed, 27 insertions(+), 8 deletions(-)
> 
> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> index 34bfe2b494ae..1a9ba69a22ba 100644
> --- a/fs/f2fs/checkpoint.c
> +++ b/fs/f2fs/checkpoint.c
> @@ -1254,7 +1254,6 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>  		f2fs_bug_on(sbi, prefree_segments(sbi));
>  		flush_sit_entries(sbi, cpc);
>  		clear_prefree_segments(sbi, cpc);
> -		f2fs_wait_all_discard_bio(sbi);
>  		unblock_operations(sbi);
>  		goto out;
>  	}
> @@ -1278,8 +1277,6 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>  	else
>  		clear_prefree_segments(sbi, cpc);
>  
> -	f2fs_wait_all_discard_bio(sbi);
> -
>  	unblock_operations(sbi);
>  	stat_inc_cp_count(sbi->stat_info);
>  
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index bdcfe2a9b532..f2f40fce9d31 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -183,6 +183,8 @@ struct discard_entry {
>  
>  struct bio_entry {
>  	struct list_head list;
> +	unsigned int start_segno;
> +	unsigned int end_segno;
>  	struct bio *bio;
>  	struct completion event;
>  	int error;
> @@ -2111,7 +2113,7 @@ void destroy_flush_cmd_control(struct f2fs_sb_info *, bool);
>  void invalidate_blocks(struct f2fs_sb_info *, block_t);
>  bool is_checkpointed_data(struct f2fs_sb_info *, block_t);
>  void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t);
> -void f2fs_wait_all_discard_bio(struct f2fs_sb_info *);
> +void f2fs_wait_discard_bio(struct f2fs_sb_info *, unsigned int);
>  void clear_prefree_segments(struct f2fs_sb_info *, struct cp_control *);
>  void release_discard_addrs(struct f2fs_sb_info *);
>  int npages_for_summary_flush(struct f2fs_sb_info *, bool);
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index 6d197f0c8151..9a38424c3c1f 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -624,20 +624,24 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
>  }
>  
>  static struct bio_entry *__add_bio_entry(struct f2fs_sb_info *sbi,
> -							struct bio *bio)
> +			struct bio *bio, unsigned int start_segno,
> +					unsigned int end_segno)
>  {
>  	struct list_head *wait_list = &(SM_I(sbi)->wait_list);
>  	struct bio_entry *be = f2fs_kmem_cache_alloc(bio_entry_slab, GFP_NOFS);
>  
>  	INIT_LIST_HEAD(&be->list);
>  	be->bio = bio;
> +	be->start_segno = start_segno;
> +	be->end_segno = end_segno;
>  	init_completion(&be->event);
>  	list_add_tail(&be->list, wait_list);
>  
>  	return be;
>  }
>  
> -void f2fs_wait_all_discard_bio(struct f2fs_sb_info *sbi)
> +/* This should be covered by global mutex, &sit_i->sentry_lock */
> +void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, unsigned int segno)
>  {
>  	struct list_head *wait_list = &(SM_I(sbi)->wait_list);
>  	struct bio_entry *be, *tmp;
> @@ -646,7 +650,15 @@ void f2fs_wait_all_discard_bio(struct f2fs_sb_info *sbi)
>  		struct bio *bio = be->bio;
>  		int err;
>  
> -		wait_for_completion_io(&be->event);
> +		if (!completion_done(&be->event)) {
> +			if ((be->start_segno >= segno &&
> +					be->end_segno <= segno) ||

segno >= be->start_segno && segno < be->end_segno ?

Thanks,

> +					segno == NULL_SEGNO)
> +				wait_for_completion_io(&be->event);
> +			else
> +				continue;
> +		}
> +
>  		err = be->error;
>  		if (err == -EOPNOTSUPP)
>  			err = 0;
> @@ -674,6 +686,8 @@ static int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi,
>  		struct block_device *bdev, block_t blkstart, block_t blklen)
>  {
>  	struct bio *bio = NULL;
> +	unsigned int start_segno = GET_SEGNO(sbi, blkstart);
> +	unsigned int end_segno = GET_SEGNO(sbi, blkstart + blklen);
>  	int err;
>  
>  	trace_f2fs_issue_discard(sbi->sb, blkstart, blklen);
> @@ -688,7 +702,8 @@ static int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi,
>  				SECTOR_FROM_BLOCK(blklen),
>  				GFP_NOFS, 0, &bio);
>  	if (!err && bio) {
> -		struct bio_entry *be = __add_bio_entry(sbi, bio);
> +		struct bio_entry *be = __add_bio_entry(sbi, bio,
> +					start_segno, end_segno);
>  
>  		bio->bi_private = be;
>  		bio->bi_end_io = f2fs_submit_bio_wait_endio;
> @@ -1574,6 +1589,8 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
>  
>  	*new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
>  
> +	f2fs_wait_discard_bio(sbi, GET_SEGNO(sbi, *new_blkaddr));
> +
>  	/*
>  	 * __add_sum_entry should be resided under the curseg_mutex
>  	 * because, this function updates a summary entry in the
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index f3697f97e527..16e2bc5209bb 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -770,6 +770,9 @@ static void f2fs_put_super(struct super_block *sb)
>  		write_checkpoint(sbi, &cpc);
>  	}
>  
> +	/* be sure to wait for any on-going discard commands */
> +	f2fs_wait_discard_bio(sbi, NULL_SEGNO);
> +
>  	/* write_checkpoint can update stat informaion */
>  	f2fs_destroy_stats(sbi);
>  
> 

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [f2fs-dev] [PATCH 01/10] f2fs: reassign new segment for mode=lfs
  2017-01-04  3:24 ` [f2fs-dev] [PATCH 01/10] f2fs: reassign new segment for mode=lfs Chao Yu
@ 2017-01-04 22:48   ` Jaegeuk Kim
  2017-01-12 11:03     ` Chao Yu
  0 siblings, 1 reply; 41+ messages in thread
From: Jaegeuk Kim @ 2017-01-04 22:48 UTC (permalink / raw)
  To: Chao Yu; +Cc: linux-kernel, linux-fsdevel, linux-f2fs-devel

On 01/04, Chao Yu wrote:
> Hi Jaegeuk,
> 
> On 2016/12/31 2:51, Jaegeuk Kim wrote:
> > Otherwise we can remain wrong curseg->next_blkoff, resulting in fsck failure.
> 
> Could you explain more about this case?

I remember that I hit an fsck failure when I was testing f2fs with an smr drive.
I didn't dig into the error, but the fact is that our roll-forward recovery
doesn't update current segment information at every time, but allocate a new
section at the end of the work like below.
I just enabled it for the LFS mode in order to avoid that failure.

Thanks,

> 
> Thanks,
> 
> > 
> > Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> > ---
> >  fs/f2fs/segment.c | 3 ---
> >  1 file changed, 3 deletions(-)
> > 
> > diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> > index be9e4d244d75..4e5ffe1d97e4 100644
> > --- a/fs/f2fs/segment.c
> > +++ b/fs/f2fs/segment.c
> > @@ -1428,9 +1428,6 @@ void allocate_new_segments(struct f2fs_sb_info *sbi)
> >  	unsigned int old_segno;
> >  	int i;
> >  
> > -	if (test_opt(sbi, LFS))
> > -		return;
> > -
> >  	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
> >  		curseg = CURSEG_I(sbi, i);
> >  		old_segno = curseg->segno;
> > 

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 03/10 v2] f2fs: add submit_bio tracepoint
  2016-12-30 18:51 ` [PATCH 03/10] f2fs: add submit_bio tracepoint Jaegeuk Kim
  2017-01-04  3:32   ` [f2fs-dev] " Chao Yu
@ 2017-01-04 23:35   ` Jaegeuk Kim
  2017-01-12 11:07     ` Chao Yu
  1 sibling, 1 reply; 41+ messages in thread
From: Jaegeuk Kim @ 2017-01-04 23:35 UTC (permalink / raw)
  To: linux-kernel, linux-fsdevel, linux-f2fs-devel

Change log from v1:
 - remove F2FS_OP
 - share single tracing structure, f2fs__bio
 - split f2fs_prepare_read/write_io and f2fs_submit_read/write_io

>From f5c6ab0f19fb627ebf28ab6a18c0a32b4c8f914f Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Wed, 21 Dec 2016 12:13:03 -0800
Subject: [PATCH] f2fs: add submit_bio tracepoint

This patch adds final submit_bio() tracepoint.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/data.c              | 12 ++++++++----
 include/trace/events/f2fs.h | 45 ++++++++++++++++++++++++++++++---------------
 2 files changed, 38 insertions(+), 19 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 2c5df1dc1479..a06b2d187aec 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -175,6 +175,10 @@ static inline void __submit_bio(struct f2fs_sb_info *sbi,
 			current->plug && (type == DATA || type == NODE))
 			blk_finish_plug(current->plug);
 	}
+	if (is_read_io(bio_op(bio)))
+		trace_f2fs_submit_read_bio(sbi->sb, type, bio);
+	else
+		trace_f2fs_submit_write_bio(sbi->sb, type, bio);
 	submit_bio(bio);
 }
 
@@ -185,12 +189,12 @@ static void __submit_merged_bio(struct f2fs_bio_info *io)
 	if (!io->bio)
 		return;
 
+	bio_set_op_attrs(io->bio, fio->op, fio->op_flags);
+
 	if (is_read_io(fio->op))
-		trace_f2fs_submit_read_bio(io->sbi->sb, fio, io->bio);
+		trace_f2fs_prepare_read_bio(io->sbi->sb, fio->type, io->bio);
 	else
-		trace_f2fs_submit_write_bio(io->sbi->sb, fio, io->bio);
-
-	bio_set_op_attrs(io->bio, fio->op, fio->op_flags);
+		trace_f2fs_prepare_write_bio(io->sbi->sb, fio->type, io->bio);
 
 	__submit_bio(io->sbi, io->bio, fio->type);
 	io->bio = NULL;
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index 4c942599581b..04c527410ecc 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -784,12 +784,11 @@ DEFINE_EVENT_CONDITION(f2fs__submit_page_bio, f2fs_submit_page_mbio,
 	TP_CONDITION(page->mapping)
 );
 
-DECLARE_EVENT_CLASS(f2fs__submit_bio,
+DECLARE_EVENT_CLASS(f2fs__bio,
 
-	TP_PROTO(struct super_block *sb, struct f2fs_io_info *fio,
-						struct bio *bio),
+	TP_PROTO(struct super_block *sb, int type, struct bio *bio),
 
-	TP_ARGS(sb, fio, bio),
+	TP_ARGS(sb, type, bio),
 
 	TP_STRUCT__entry(
 		__field(dev_t,	dev)
@@ -802,9 +801,9 @@ DECLARE_EVENT_CLASS(f2fs__submit_bio,
 
 	TP_fast_assign(
 		__entry->dev		= sb->s_dev;
-		__entry->op		= fio->op;
-		__entry->op_flags	= fio->op_flags;
-		__entry->type		= fio->type;
+		__entry->op		= bio_op(bio);
+		__entry->op_flags	= bio->bi_opf;
+		__entry->type		= type;
 		__entry->sector		= bio->bi_iter.bi_sector;
 		__entry->size		= bio->bi_iter.bi_size;
 	),
@@ -817,22 +816,38 @@ DECLARE_EVENT_CLASS(f2fs__submit_bio,
 		__entry->size)
 );
 
-DEFINE_EVENT_CONDITION(f2fs__submit_bio, f2fs_submit_write_bio,
+DEFINE_EVENT_CONDITION(f2fs__bio, f2fs_prepare_write_bio,
+
+	TP_PROTO(struct super_block *sb, int type, struct bio *bio),
+
+	TP_ARGS(sb, type, bio),
+
+	TP_CONDITION(bio)
+);
+
+DEFINE_EVENT_CONDITION(f2fs__bio, f2fs_prepare_read_bio,
+
+	TP_PROTO(struct super_block *sb, int type, struct bio *bio),
+
+	TP_ARGS(sb, type, bio),
+
+	TP_CONDITION(bio)
+);
+
+DEFINE_EVENT_CONDITION(f2fs__bio, f2fs_submit_read_bio,
 
-	TP_PROTO(struct super_block *sb, struct f2fs_io_info *fio,
-							struct bio *bio),
+	TP_PROTO(struct super_block *sb, int type, struct bio *bio),
 
-	TP_ARGS(sb, fio, bio),
+	TP_ARGS(sb, type, bio),
 
 	TP_CONDITION(bio)
 );
 
-DEFINE_EVENT_CONDITION(f2fs__submit_bio, f2fs_submit_read_bio,
+DEFINE_EVENT_CONDITION(f2fs__bio, f2fs_submit_write_bio,
 
-	TP_PROTO(struct super_block *sb, struct f2fs_io_info *fio,
-							struct bio *bio),
+	TP_PROTO(struct super_block *sb, int type, struct bio *bio),
 
-	TP_ARGS(sb, fio, bio),
+	TP_ARGS(sb, type, bio),
 
 	TP_CONDITION(bio)
 );
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 41+ messages in thread

* Re: [f2fs-dev] [PATCH 04/10] f2fs: support IO alignment for DATA and NODE writes
  2017-01-04  8:23   ` [f2fs-dev] " Chao Yu
@ 2017-01-04 23:44     ` Jaegeuk Kim
  2017-01-12 11:15       ` Chao Yu
  0 siblings, 1 reply; 41+ messages in thread
From: Jaegeuk Kim @ 2017-01-04 23:44 UTC (permalink / raw)
  To: Chao Yu; +Cc: linux-kernel, linux-fsdevel, linux-f2fs-devel

On 01/04, Chao Yu wrote:
> Hi Jaegeuk,
> 
> On 2016/12/31 2:51, Jaegeuk Kim wrote:
> > This patch implements IO alignment by filling dummy blocks in DATA and NODE
> > write bios. If we can guarantee, for example, 32KB or 64KB for such the IOs,
> > we can eliminate underlying dummy page problem which FTL conducts in order to
> > close MLC or TLC partial written pages.
> > 
> > Note that,
> >  - it requires "-o mode=lfs".
> >  - IO size should be power of 2, not exceed BIO_MAX_PAGES, 256.
> >  - read IO is still 4KB.
> >  - do checkpoint at fsync, if dummy NODE page was written.
> 
> Which scenario we can benefit from? Any numbers?

I described it in the patch. This is not targetting for performance improvement
for now, but to address the dummy page write problem in FTL so that we can later
implement very simple host-level FTL on top of open-channel SSD.

> I doubt that there are some potential side-effect points:
>  - write amplification will be more serious than before
>  - free space will be more fragmented since dummy blocks is separated in whole
> address space
>  - there is less chance to merge small(unaligned) IOs in block layer

I agree, so I just added this as a mount option experimentally.
One point would be that, if f2fs doesn't do this, FTL should do it. So I think,
from the system point of view, f2fs is a better layer to do it.

Thanks,

> 
> Thoughts?
> 
> Thanks,

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 06/10 v2] f2fs: show the max number of atomic operations
  2016-12-30 18:51 ` [PATCH 06/10] f2fs: show the max number of atomic operations Jaegeuk Kim
  2017-01-04  8:45   ` Chao Yu
@ 2017-01-04 23:50   ` Jaegeuk Kim
  2017-01-12 11:15     ` Chao Yu
  1 sibling, 1 reply; 41+ messages in thread
From: Jaegeuk Kim @ 2017-01-04 23:50 UTC (permalink / raw)
  To: linux-kernel, linux-fsdevel, linux-f2fs-devel

Chang log from v1:
 - move stat_dec_atomic_write(inode); into drop_inmem_pages()

>From 5464f744fe28187b3d71ba6c3cf983659a569e9a Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Wed, 28 Dec 2016 13:55:09 -0800
Subject: [PATCH] f2fs: show the max number of atomic operations

This patch adds to show the max number of atomic operations which are
conducting concurrently.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/debug.c   |  7 +++++++
 fs/f2fs/f2fs.h    | 17 +++++++++++++++++
 fs/f2fs/file.c    |  8 ++++++--
 fs/f2fs/segment.c |  1 +
 4 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index fbd5184140d0..29cdf0c1da1d 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -50,6 +50,8 @@ static void update_general_status(struct f2fs_sb_info *sbi)
 	si->ndirty_files = sbi->ndirty_inode[FILE_INODE];
 	si->ndirty_all = sbi->ndirty_inode[DIRTY_META];
 	si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES);
+	si->aw_cnt = atomic_read(&sbi->aw_cnt);
+	si->max_aw_cnt = atomic_read(&sbi->max_aw_cnt);
 	si->nr_wb_cp_data = get_pages(sbi, F2FS_WB_CP_DATA);
 	si->nr_wb_data = get_pages(sbi, F2FS_WB_DATA);
 	si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg;
@@ -256,6 +258,8 @@ static int stat_show(struct seq_file *s, void *v)
 			   si->inline_dir);
 		seq_printf(s, "  - Orphan Inode: %u\n",
 			   si->orphans);
+		seq_printf(s, "  - Atomic write count: %4d (Max. %4d)\n",
+			   si->aw_cnt, si->max_aw_cnt);
 		seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n",
 			   si->main_area_segs, si->main_area_sections,
 			   si->main_area_zones);
@@ -414,6 +418,9 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi)
 	atomic_set(&sbi->inline_dir, 0);
 	atomic_set(&sbi->inplace_count, 0);
 
+	atomic_set(&sbi->aw_cnt, 0);
+	atomic_set(&sbi->max_aw_cnt, 0);
+
 	mutex_lock(&f2fs_stat_mutex);
 	list_add_tail(&si->stat_list, &f2fs_stat_list);
 	mutex_unlock(&f2fs_stat_mutex);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 0d7eaddef4a0..bdcfe2a9b532 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -884,6 +884,8 @@ struct f2fs_sb_info {
 	atomic_t inline_xattr;			/* # of inline_xattr inodes */
 	atomic_t inline_inode;			/* # of inline_data inodes */
 	atomic_t inline_dir;			/* # of inline_dentry inodes */
+	atomic_t aw_cnt;			/* # of atomic writes */
+	atomic_t max_aw_cnt;			/* max # of atomic writes */
 	int bg_gc;				/* background gc calls */
 	unsigned int ndirty_inode[NR_INODE_TYPE];	/* # of dirty inodes */
 #endif
@@ -2236,6 +2238,7 @@ struct f2fs_stat_info {
 	int total_count, utilization;
 	int bg_gc, nr_wb_cp_data, nr_wb_data;
 	int inline_xattr, inline_inode, inline_dir, orphans;
+	int aw_cnt, max_aw_cnt;
 	unsigned int valid_count, valid_node_count, valid_inode_count, discard_blks;
 	unsigned int bimodal, avg_vblocks;
 	int util_free, util_valid, util_invalid;
@@ -2307,6 +2310,17 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
 		((sbi)->block_count[(curseg)->alloc_type]++)
 #define stat_inc_inplace_blocks(sbi)					\
 		(atomic_inc(&(sbi)->inplace_count))
+#define stat_inc_atomic_write(inode)					\
+		(atomic_inc(&F2FS_I_SB(inode)->aw_cnt));
+#define stat_dec_atomic_write(inode)					\
+		(atomic_dec(&F2FS_I_SB(inode)->aw_cnt));
+#define stat_update_max_atomic_write(inode)				\
+	do {								\
+		int cur = atomic_read(&F2FS_I_SB(inode)->aw_cnt);	\
+		int max = atomic_read(&F2FS_I_SB(inode)->max_aw_cnt);	\
+		if (cur > max)						\
+			atomic_set(&F2FS_I_SB(inode)->max_aw_cnt, cur);	\
+	} while (0)
 #define stat_inc_seg_count(sbi, type, gc_type)				\
 	do {								\
 		struct f2fs_stat_info *si = F2FS_STAT(sbi);		\
@@ -2360,6 +2374,9 @@ void f2fs_destroy_root_stats(void);
 #define stat_dec_inline_inode(inode)
 #define stat_inc_inline_dir(inode)
 #define stat_dec_inline_dir(inode)
+#define stat_inc_atomic_write(inode)
+#define stat_dec_atomic_write(inode)
+#define stat_update_max_atomic_write(inode)
 #define stat_inc_seg_type(sbi, curseg)
 #define stat_inc_block_count(sbi, curseg)
 #define stat_inc_inplace_blocks(sbi)
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 49f10dce817d..291d2ca53a4c 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1542,6 +1542,8 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
 	if (ret)
 		clear_inode_flag(inode, FI_ATOMIC_FILE);
 out:
+	stat_inc_atomic_write(inode);
+	stat_update_max_atomic_write(inode);
 	inode_unlock(inode);
 	mnt_drop_write_file(filp);
 	return ret;
@@ -1571,9 +1573,11 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
 			set_inode_flag(inode, FI_ATOMIC_FILE);
 			goto err_out;
 		}
+		ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
+		stat_dec_atomic_write(inode);
+	} else {
+		ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
 	}
-
-	ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
 err_out:
 	inode_unlock(inode);
 	mnt_drop_write_file(filp);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 6d197f0c8151..485b9118b418 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -243,6 +243,7 @@ void drop_inmem_pages(struct inode *inode)
 	struct f2fs_inode_info *fi = F2FS_I(inode);
 
 	clear_inode_flag(inode, FI_ATOMIC_FILE);
+	stat_dec_atomic_write(inode);
 
 	mutex_lock(&fi->inmem_lock);
 	__revoke_inmem_pages(inode, &fi->inmem_pages, true, false);
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 41+ messages in thread

* Re: [f2fs-dev] [PATCH 08/10] f2fs: relax async discard commands more
  2017-01-04  9:29   ` [f2fs-dev] " Chao Yu
@ 2017-01-05  3:19     ` Chao Yu
  2017-01-05  8:17       ` Chao Yu
  2017-01-05 19:46       ` Jaegeuk Kim
  0 siblings, 2 replies; 41+ messages in thread
From: Chao Yu @ 2017-01-05  3:19 UTC (permalink / raw)
  To: Jaegeuk Kim, linux-kernel, linux-fsdevel, linux-f2fs-devel

On 2017/1/4 17:29, Chao Yu wrote:
> On 2016/12/31 2:51, Jaegeuk Kim wrote:
>> This patch relaxes async discard commands to avoid waiting its end_io during
>> checkpoint.
>> Instead of waiting them during checkpoint, it will be done when actually reusing
>> them.
>>
>> Test on initial partition of nvme drive.
>>
>>  # time fstrim /mnt/test
>>
>> Before : 6.158s
>> After : 4.822s
>>
>> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> 
> Reviewed-by: Chao Yu <yuchao0@huawei.com>
> 
> One comment below,

I still have a comment on this patch.

>> -void f2fs_wait_all_discard_bio(struct f2fs_sb_info *sbi)
>> +/* This should be covered by global mutex, &sit_i->sentry_lock */
>> +void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, unsigned int segno)
>>  {
>>  	struct list_head *wait_list = &(SM_I(sbi)->wait_list);
>>  	struct bio_entry *be, *tmp;
>> @@ -646,7 +650,15 @@ void f2fs_wait_all_discard_bio(struct f2fs_sb_info *sbi)
>>  		struct bio *bio = be->bio;
>>  		int err;
>>  
>> -		wait_for_completion_io(&be->event);
>> +		if (!completion_done(&be->event)) {
>> +			if ((be->start_segno >= segno &&
>> +					be->end_segno <= segno) ||
> 
> segno >= be->start_segno && segno < be->end_segno ?

Can you check this?

Thanks,

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [f2fs-dev] [PATCH 08/10] f2fs: relax async discard commands more
  2017-01-05  3:19     ` Chao Yu
@ 2017-01-05  8:17       ` Chao Yu
  2017-01-05 19:59         ` Jaegeuk Kim
  2017-01-05 19:46       ` Jaegeuk Kim
  1 sibling, 1 reply; 41+ messages in thread
From: Chao Yu @ 2017-01-05  8:17 UTC (permalink / raw)
  To: Jaegeuk Kim, linux-kernel, linux-fsdevel, linux-f2fs-devel

Hi Jaegeuk,

I can see patch named ("f2fs: call f2fs_wait_all_discard_bio for an error case")
was merged in dev-test, but I think it's no needed to change error case handling
like this since f2fs_wait_all_discard_bio should always be called after
clear_prefree_segments.

Thanks,

On 2017/1/5 11:19, Chao Yu wrote:
> On 2017/1/4 17:29, Chao Yu wrote:
>> On 2016/12/31 2:51, Jaegeuk Kim wrote:
>>> This patch relaxes async discard commands to avoid waiting its end_io during
>>> checkpoint.
>>> Instead of waiting them during checkpoint, it will be done when actually reusing
>>> them.
>>>
>>> Test on initial partition of nvme drive.
>>>
>>>  # time fstrim /mnt/test
>>>
>>> Before : 6.158s
>>> After : 4.822s
>>>
>>> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
>>
>> Reviewed-by: Chao Yu <yuchao0@huawei.com>
>>
>> One comment below,
> 
> I still have a comment on this patch.
> 
>>> -void f2fs_wait_all_discard_bio(struct f2fs_sb_info *sbi)
>>> +/* This should be covered by global mutex, &sit_i->sentry_lock */
>>> +void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, unsigned int segno)
>>>  {
>>>  	struct list_head *wait_list = &(SM_I(sbi)->wait_list);
>>>  	struct bio_entry *be, *tmp;
>>> @@ -646,7 +650,15 @@ void f2fs_wait_all_discard_bio(struct f2fs_sb_info *sbi)
>>>  		struct bio *bio = be->bio;
>>>  		int err;
>>>  
>>> -		wait_for_completion_io(&be->event);
>>> +		if (!completion_done(&be->event)) {
>>> +			if ((be->start_segno >= segno &&
>>> +					be->end_segno <= segno) ||
>>
>> segno >= be->start_segno && segno < be->end_segno ?
> 
> Can you check this?
> 
> Thanks,
> 

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [f2fs-dev] [PATCH 08/10] f2fs: relax async discard commands more
  2017-01-05  3:19     ` Chao Yu
  2017-01-05  8:17       ` Chao Yu
@ 2017-01-05 19:46       ` Jaegeuk Kim
  2017-01-06  2:06         ` Chao Yu
  1 sibling, 1 reply; 41+ messages in thread
From: Jaegeuk Kim @ 2017-01-05 19:46 UTC (permalink / raw)
  To: Chao Yu; +Cc: linux-kernel, linux-fsdevel, linux-f2fs-devel

On 01/05, Chao Yu wrote:
> On 2017/1/4 17:29, Chao Yu wrote:
> > On 2016/12/31 2:51, Jaegeuk Kim wrote:
> >> This patch relaxes async discard commands to avoid waiting its end_io during
> >> checkpoint.
> >> Instead of waiting them during checkpoint, it will be done when actually reusing
> >> them.
> >>
> >> Test on initial partition of nvme drive.
> >>
> >>  # time fstrim /mnt/test
> >>
> >> Before : 6.158s
> >> After : 4.822s
> >>
> >> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> > 
> > Reviewed-by: Chao Yu <yuchao0@huawei.com>
> > 
> > One comment below,
> 
> I still have a comment on this patch.
> 
> >> -void f2fs_wait_all_discard_bio(struct f2fs_sb_info *sbi)
> >> +/* This should be covered by global mutex, &sit_i->sentry_lock */
> >> +void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, unsigned int segno)
> >>  {
> >>  	struct list_head *wait_list = &(SM_I(sbi)->wait_list);
> >>  	struct bio_entry *be, *tmp;
> >> @@ -646,7 +650,15 @@ void f2fs_wait_all_discard_bio(struct f2fs_sb_info *sbi)
> >>  		struct bio *bio = be->bio;
> >>  		int err;
> >>  
> >> -		wait_for_completion_io(&be->event);
> >> +		if (!completion_done(&be->event)) {
> >> +			if ((be->start_segno >= segno &&
> >> +					be->end_segno <= segno) ||
> > 
> > segno >= be->start_segno && segno < be->end_segno ?
> 
> Can you check this?

The be->start_segno and be->end_segno are assigned by:

unsigned int start_segno = GET_SEGNO(sbi, blkstart);
unsigned int end_segno = GET_SEGNO(sbi, blkstart + blklen);

in __f2fs_issue_discard_async().

So, the problem comes when, for example, blkstart = 0 and blklen = 512.

I should have got the end_segno by:

unsigned int end_segno = GET_SEGNO(sbi, blkstart + blklen - 1);

Thanks,

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [f2fs-dev] [PATCH 08/10] f2fs: relax async discard commands more
  2017-01-05  8:17       ` Chao Yu
@ 2017-01-05 19:59         ` Jaegeuk Kim
  0 siblings, 0 replies; 41+ messages in thread
From: Jaegeuk Kim @ 2017-01-05 19:59 UTC (permalink / raw)
  To: Chao Yu; +Cc: linux-kernel, linux-fsdevel, linux-f2fs-devel

Hi Chao,

On 01/05, Chao Yu wrote:
> Hi Jaegeuk,
> 
> I can see patch named ("f2fs: call f2fs_wait_all_discard_bio for an error case")
> was merged in dev-test, but I think it's no needed to change error case handling
> like this since f2fs_wait_all_discard_bio should always be called after
> clear_prefree_segments.

Indeed, it's right. ;)

Thanks,

> 
> Thanks,
> 
> On 2017/1/5 11:19, Chao Yu wrote:
> > On 2017/1/4 17:29, Chao Yu wrote:
> >> On 2016/12/31 2:51, Jaegeuk Kim wrote:
> >>> This patch relaxes async discard commands to avoid waiting its end_io during
> >>> checkpoint.
> >>> Instead of waiting them during checkpoint, it will be done when actually reusing
> >>> them.
> >>>
> >>> Test on initial partition of nvme drive.
> >>>
> >>>  # time fstrim /mnt/test
> >>>
> >>> Before : 6.158s
> >>> After : 4.822s
> >>>
> >>> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> >>
> >> Reviewed-by: Chao Yu <yuchao0@huawei.com>
> >>
> >> One comment below,
> > 
> > I still have a comment on this patch.
> > 
> >>> -void f2fs_wait_all_discard_bio(struct f2fs_sb_info *sbi)
> >>> +/* This should be covered by global mutex, &sit_i->sentry_lock */
> >>> +void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, unsigned int segno)
> >>>  {
> >>>  	struct list_head *wait_list = &(SM_I(sbi)->wait_list);
> >>>  	struct bio_entry *be, *tmp;
> >>> @@ -646,7 +650,15 @@ void f2fs_wait_all_discard_bio(struct f2fs_sb_info *sbi)
> >>>  		struct bio *bio = be->bio;
> >>>  		int err;
> >>>  
> >>> -		wait_for_completion_io(&be->event);
> >>> +		if (!completion_done(&be->event)) {
> >>> +			if ((be->start_segno >= segno &&
> >>> +					be->end_segno <= segno) ||
> >>
> >> segno >= be->start_segno && segno < be->end_segno ?
> > 
> > Can you check this?
> > 
> > Thanks,
> > 

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [f2fs-dev] [PATCH 08/10] f2fs: relax async discard commands more
  2017-01-05 19:46       ` Jaegeuk Kim
@ 2017-01-06  2:06         ` Chao Yu
  2017-01-06  2:42           ` Jaegeuk Kim
  0 siblings, 1 reply; 41+ messages in thread
From: Chao Yu @ 2017-01-06  2:06 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: linux-kernel, linux-fsdevel, linux-f2fs-devel

On 2017/1/6 3:46, Jaegeuk Kim wrote:
> On 01/05, Chao Yu wrote:
>> On 2017/1/4 17:29, Chao Yu wrote:
>>> On 2016/12/31 2:51, Jaegeuk Kim wrote:
>>>> This patch relaxes async discard commands to avoid waiting its end_io during
>>>> checkpoint.
>>>> Instead of waiting them during checkpoint, it will be done when actually reusing
>>>> them.
>>>>
>>>> Test on initial partition of nvme drive.
>>>>
>>>>  # time fstrim /mnt/test
>>>>
>>>> Before : 6.158s
>>>> After : 4.822s
>>>>
>>>> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
>>>
>>> Reviewed-by: Chao Yu <yuchao0@huawei.com>
>>>
>>> One comment below,
>>
>> I still have a comment on this patch.
>>
>>>> -void f2fs_wait_all_discard_bio(struct f2fs_sb_info *sbi)
>>>> +/* This should be covered by global mutex, &sit_i->sentry_lock */
>>>> +void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, unsigned int segno)
>>>>  {
>>>>  	struct list_head *wait_list = &(SM_I(sbi)->wait_list);
>>>>  	struct bio_entry *be, *tmp;
>>>> @@ -646,7 +650,15 @@ void f2fs_wait_all_discard_bio(struct f2fs_sb_info *sbi)
>>>>  		struct bio *bio = be->bio;
>>>>  		int err;
>>>>  
>>>> -		wait_for_completion_io(&be->event);
>>>> +		if (!completion_done(&be->event)) {
>>>> +			if ((be->start_segno >= segno &&
>>>> +					be->end_segno <= segno) ||
>>>
>>> segno >= be->start_segno && segno < be->end_segno ?

Still can not understand this judgment condition, we should wait completion of
discard command only when segno is locate in range of [start_segno, end_segno]?

But now, this condition can be true only when segno, start_segno, end_segno have
equal value.

Please correct me if I'm wrong.

Thanks,

>>
>> Can you check this?
> 
> The be->start_segno and be->end_segno are assigned by:
> 
> unsigned int start_segno = GET_SEGNO(sbi, blkstart);
> unsigned int end_segno = GET_SEGNO(sbi, blkstart + blklen);
> 
> in __f2fs_issue_discard_async().
> 
> So, the problem comes when, for example, blkstart = 0 and blklen = 512.
> 
> I should have got the end_segno by:
> 
> unsigned int end_segno = GET_SEGNO(sbi, blkstart + blklen - 1);
> 
> Thanks,
> 
> .
> 

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [f2fs-dev] [PATCH 08/10] f2fs: relax async discard commands more
  2017-01-06  2:06         ` Chao Yu
@ 2017-01-06  2:42           ` Jaegeuk Kim
  2017-01-06  3:32             ` Chao Yu
  2017-02-22  7:23             ` Chao Yu
  0 siblings, 2 replies; 41+ messages in thread
From: Jaegeuk Kim @ 2017-01-06  2:42 UTC (permalink / raw)
  To: Chao Yu; +Cc: linux-kernel, linux-fsdevel, linux-f2fs-devel

Hi Chao,

On 01/06, Chao Yu wrote:
> On 2017/1/6 3:46, Jaegeuk Kim wrote:
> > On 01/05, Chao Yu wrote:
> >> On 2017/1/4 17:29, Chao Yu wrote:
> >>> On 2016/12/31 2:51, Jaegeuk Kim wrote:
> >>>> This patch relaxes async discard commands to avoid waiting its end_io during
> >>>> checkpoint.
> >>>> Instead of waiting them during checkpoint, it will be done when actually reusing
> >>>> them.
> >>>>
> >>>> Test on initial partition of nvme drive.
> >>>>
> >>>>  # time fstrim /mnt/test
> >>>>
> >>>> Before : 6.158s
> >>>> After : 4.822s
> >>>>
> >>>> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> >>>
> >>> Reviewed-by: Chao Yu <yuchao0@huawei.com>
> >>>
> >>> One comment below,
> >>
> >> I still have a comment on this patch.
> >>
> >>>> -void f2fs_wait_all_discard_bio(struct f2fs_sb_info *sbi)
> >>>> +/* This should be covered by global mutex, &sit_i->sentry_lock */
> >>>> +void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, unsigned int segno)
> >>>>  {
> >>>>  	struct list_head *wait_list = &(SM_I(sbi)->wait_list);
> >>>>  	struct bio_entry *be, *tmp;
> >>>> @@ -646,7 +650,15 @@ void f2fs_wait_all_discard_bio(struct f2fs_sb_info *sbi)
> >>>>  		struct bio *bio = be->bio;
> >>>>  		int err;
> >>>>  
> >>>> -		wait_for_completion_io(&be->event);
> >>>> +		if (!completion_done(&be->event)) {
> >>>> +			if ((be->start_segno >= segno &&
> >>>> +					be->end_segno <= segno) ||
> >>>
> >>> segno >= be->start_segno && segno < be->end_segno ?
> 
> Still can not understand this judgment condition, we should wait completion of
> discard command only when segno is locate in range of [start_segno, end_segno]?
> 
> But now, this condition can be true only when segno, start_segno, end_segno have
> equal value.
> 
> Please correct me if I'm wrong.

Urg. I rewrote it to use block addresses.

How about this?

>From fe24461eedd62815e0c56317f010a3a6e3004434 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Thu, 29 Dec 2016 14:07:53 -0800
Subject: [PATCH] f2fs: relax async discard commands more

This patch relaxes async discard commands to avoid waiting its end_io during
checkpoint.
Instead of waiting them during checkpoint, it will be done when actually reusing
them.

Test on initial partition of nvme drive.

 # time fstrim /mnt/test

Before : 6.158s
After : 4.822s

Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/checkpoint.c |  7 ++-----
 fs/f2fs/f2fs.h       |  4 +++-
 fs/f2fs/segment.c    | 23 +++++++++++++++++++----
 fs/f2fs/super.c      |  3 +++
 4 files changed, 27 insertions(+), 10 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index f73ee9534d83..1a9ba69a22ba 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -1254,7 +1254,6 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 		f2fs_bug_on(sbi, prefree_segments(sbi));
 		flush_sit_entries(sbi, cpc);
 		clear_prefree_segments(sbi, cpc);
-		f2fs_wait_all_discard_bio(sbi);
 		unblock_operations(sbi);
 		goto out;
 	}
@@ -1273,12 +1272,10 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 
 	/* unlock all the fs_lock[] in do_checkpoint() */
 	err = do_checkpoint(sbi, cpc);
-	if (err) {
+	if (err)
 		release_discard_addrs(sbi);
-	} else {
+	else
 		clear_prefree_segments(sbi, cpc);
-		f2fs_wait_all_discard_bio(sbi);
-	}
 
 	unblock_operations(sbi);
 	stat_inc_cp_count(sbi->stat_info);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index bdcfe2a9b532..e0db895fd84c 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -183,6 +183,8 @@ struct discard_entry {
 
 struct bio_entry {
 	struct list_head list;
+	block_t lstart;
+	block_t len;
 	struct bio *bio;
 	struct completion event;
 	int error;
@@ -2111,7 +2113,7 @@ void destroy_flush_cmd_control(struct f2fs_sb_info *, bool);
 void invalidate_blocks(struct f2fs_sb_info *, block_t);
 bool is_checkpointed_data(struct f2fs_sb_info *, block_t);
 void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t);
-void f2fs_wait_all_discard_bio(struct f2fs_sb_info *);
+void f2fs_wait_discard_bio(struct f2fs_sb_info *, block_t);
 void clear_prefree_segments(struct f2fs_sb_info *, struct cp_control *);
 void release_discard_addrs(struct f2fs_sb_info *);
 int npages_for_summary_flush(struct f2fs_sb_info *, bool);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 485b9118b418..672bcdefe836 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -625,20 +625,23 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
 }
 
 static struct bio_entry *__add_bio_entry(struct f2fs_sb_info *sbi,
-							struct bio *bio)
+			struct bio *bio, block_t lstart, block_t len)
 {
 	struct list_head *wait_list = &(SM_I(sbi)->wait_list);
 	struct bio_entry *be = f2fs_kmem_cache_alloc(bio_entry_slab, GFP_NOFS);
 
 	INIT_LIST_HEAD(&be->list);
 	be->bio = bio;
+	be->lstart = lstart;
+	be->len = len;
 	init_completion(&be->event);
 	list_add_tail(&be->list, wait_list);
 
 	return be;
 }
 
-void f2fs_wait_all_discard_bio(struct f2fs_sb_info *sbi)
+/* This should be covered by global mutex, &sit_i->sentry_lock */
+void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
 {
 	struct list_head *wait_list = &(SM_I(sbi)->wait_list);
 	struct bio_entry *be, *tmp;
@@ -647,7 +650,15 @@ void f2fs_wait_all_discard_bio(struct f2fs_sb_info *sbi)
 		struct bio *bio = be->bio;
 		int err;
 
-		wait_for_completion_io(&be->event);
+		if (!completion_done(&be->event)) {
+			if ((be->lstart <= blkaddr &&
+					blkaddr < be->lstart + be->len) ||
+					blkaddr == NULL_ADDR)
+				wait_for_completion_io(&be->event);
+			else
+				continue;
+		}
+
 		err = be->error;
 		if (err == -EOPNOTSUPP)
 			err = 0;
@@ -675,6 +686,7 @@ static int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi,
 		struct block_device *bdev, block_t blkstart, block_t blklen)
 {
 	struct bio *bio = NULL;
+	block_t lblkstart = blkstart;
 	int err;
 
 	trace_f2fs_issue_discard(sbi->sb, blkstart, blklen);
@@ -689,7 +701,8 @@ static int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi,
 				SECTOR_FROM_BLOCK(blklen),
 				GFP_NOFS, 0, &bio);
 	if (!err && bio) {
-		struct bio_entry *be = __add_bio_entry(sbi, bio);
+		struct bio_entry *be = __add_bio_entry(sbi, bio,
+						lblkstart, blklen);
 
 		bio->bi_private = be;
 		bio->bi_end_io = f2fs_submit_bio_wait_endio;
@@ -1575,6 +1588,8 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
 
 	*new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
 
+	f2fs_wait_discard_bio(sbi, *new_blkaddr);
+
 	/*
 	 * __add_sum_entry should be resided under the curseg_mutex
 	 * because, this function updates a summary entry in the
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index f3697f97e527..461c29043aec 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -770,6 +770,9 @@ static void f2fs_put_super(struct super_block *sb)
 		write_checkpoint(sbi, &cpc);
 	}
 
+	/* be sure to wait for any on-going discard commands */
+	f2fs_wait_discard_bio(sbi, NULL_ADDR);
+
 	/* write_checkpoint can update stat informaion */
 	f2fs_destroy_stats(sbi);
 
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 41+ messages in thread

* Re: [f2fs-dev] [PATCH 08/10] f2fs: relax async discard commands more
  2017-01-06  2:42           ` Jaegeuk Kim
@ 2017-01-06  3:32             ` Chao Yu
  2017-02-22  7:23             ` Chao Yu
  1 sibling, 0 replies; 41+ messages in thread
From: Chao Yu @ 2017-01-06  3:32 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: linux-kernel, linux-fsdevel, linux-f2fs-devel

Hi Jaegeuk,

On 2017/1/6 10:42, Jaegeuk Kim wrote:
> Hi Chao,
> 
> On 01/06, Chao Yu wrote:
>> On 2017/1/6 3:46, Jaegeuk Kim wrote:
>>> On 01/05, Chao Yu wrote:
>>>> On 2017/1/4 17:29, Chao Yu wrote:
>>>>> On 2016/12/31 2:51, Jaegeuk Kim wrote:
>>>>>> This patch relaxes async discard commands to avoid waiting its end_io during
>>>>>> checkpoint.
>>>>>> Instead of waiting them during checkpoint, it will be done when actually reusing
>>>>>> them.
>>>>>>
>>>>>> Test on initial partition of nvme drive.
>>>>>>
>>>>>>  # time fstrim /mnt/test
>>>>>>
>>>>>> Before : 6.158s
>>>>>> After : 4.822s
>>>>>>
>>>>>> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
>>>>>
>>>>> Reviewed-by: Chao Yu <yuchao0@huawei.com>
>>>>>
>>>>> One comment below,
>>>>
>>>> I still have a comment on this patch.
>>>>
>>>>>> -void f2fs_wait_all_discard_bio(struct f2fs_sb_info *sbi)
>>>>>> +/* This should be covered by global mutex, &sit_i->sentry_lock */
>>>>>> +void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, unsigned int segno)
>>>>>>  {
>>>>>>  	struct list_head *wait_list = &(SM_I(sbi)->wait_list);
>>>>>>  	struct bio_entry *be, *tmp;
>>>>>> @@ -646,7 +650,15 @@ void f2fs_wait_all_discard_bio(struct f2fs_sb_info *sbi)
>>>>>>  		struct bio *bio = be->bio;
>>>>>>  		int err;
>>>>>>  
>>>>>> -		wait_for_completion_io(&be->event);
>>>>>> +		if (!completion_done(&be->event)) {
>>>>>> +			if ((be->start_segno >= segno &&
>>>>>> +					be->end_segno <= segno) ||
>>>>>
>>>>> segno >= be->start_segno && segno < be->end_segno ?
>>
>> Still can not understand this judgment condition, we should wait completion of
>> discard command only when segno is locate in range of [start_segno, end_segno]?
>>
>> But now, this condition can be true only when segno, start_segno, end_segno have
>> equal value.
>>
>> Please correct me if I'm wrong.
> 
> Urg. I rewrote it to use block addresses.
> 
> How about this?

Looks good to me. Nice work!

Thanks,

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [f2fs-dev] [PATCH 01/10] f2fs: reassign new segment for mode=lfs
  2017-01-04 22:48   ` Jaegeuk Kim
@ 2017-01-12 11:03     ` Chao Yu
  0 siblings, 0 replies; 41+ messages in thread
From: Chao Yu @ 2017-01-12 11:03 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: linux-kernel, linux-fsdevel, linux-f2fs-devel

On 2017/1/5 6:48, Jaegeuk Kim wrote:
> On 01/04, Chao Yu wrote:
>> Hi Jaegeuk,
>>
>> On 2016/12/31 2:51, Jaegeuk Kim wrote:
>>> Otherwise we can remain wrong curseg->next_blkoff, resulting in fsck failure.
>>
>> Could you explain more about this case?
> 
> I remember that I hit an fsck failure when I was testing f2fs with an smr drive.
> I didn't dig into the error, but the fact is that our roll-forward recovery
> doesn't update current segment information at every time, but allocate a new
> section at the end of the work like below.
> I just enabled it for the LFS mode in order to avoid that failure.

Alright, thanks for the explanation.

Thanks,

> 
> Thanks,
> 
>>
>> Thanks,
>>
>>>
>>> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
>>> ---
>>>  fs/f2fs/segment.c | 3 ---
>>>  1 file changed, 3 deletions(-)
>>>
>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>>> index be9e4d244d75..4e5ffe1d97e4 100644
>>> --- a/fs/f2fs/segment.c
>>> +++ b/fs/f2fs/segment.c
>>> @@ -1428,9 +1428,6 @@ void allocate_new_segments(struct f2fs_sb_info *sbi)
>>>  	unsigned int old_segno;
>>>  	int i;
>>>  
>>> -	if (test_opt(sbi, LFS))
>>> -		return;
>>> -
>>>  	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
>>>  		curseg = CURSEG_I(sbi, i);
>>>  		old_segno = curseg->segno;
>>>
> 
> .
> 

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 03/10 v2] f2fs: add submit_bio tracepoint
  2017-01-04 23:35   ` [PATCH 03/10 v2] " Jaegeuk Kim
@ 2017-01-12 11:07     ` Chao Yu
  0 siblings, 0 replies; 41+ messages in thread
From: Chao Yu @ 2017-01-12 11:07 UTC (permalink / raw)
  To: Jaegeuk Kim, linux-kernel, linux-fsdevel, linux-f2fs-devel

On 2017/1/5 7:35, Jaegeuk Kim wrote:
> Change log from v1:
>  - remove F2FS_OP
>  - share single tracing structure, f2fs__bio
>  - split f2fs_prepare_read/write_io and f2fs_submit_read/write_io
> 
>>From f5c6ab0f19fb627ebf28ab6a18c0a32b4c8f914f Mon Sep 17 00:00:00 2001
> From: Jaegeuk Kim <jaegeuk@kernel.org>
> Date: Wed, 21 Dec 2016 12:13:03 -0800
> Subject: [PATCH] f2fs: add submit_bio tracepoint
> 
> This patch adds final submit_bio() tracepoint.

More neat, looks good to me!

> 
> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>

Reviewed-by: Chao Yu <yuchao0@huawei.com>

Thanks,

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [f2fs-dev] [PATCH 04/10] f2fs: support IO alignment for DATA and NODE writes
  2017-01-04 23:44     ` Jaegeuk Kim
@ 2017-01-12 11:15       ` Chao Yu
  0 siblings, 0 replies; 41+ messages in thread
From: Chao Yu @ 2017-01-12 11:15 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: linux-kernel, linux-fsdevel, linux-f2fs-devel

On 2017/1/5 7:44, Jaegeuk Kim wrote:
> On 01/04, Chao Yu wrote:
>> Hi Jaegeuk,
>>
>> On 2016/12/31 2:51, Jaegeuk Kim wrote:
>>> This patch implements IO alignment by filling dummy blocks in DATA and NODE
>>> write bios. If we can guarantee, for example, 32KB or 64KB for such the IOs,
>>> we can eliminate underlying dummy page problem which FTL conducts in order to
>>> close MLC or TLC partial written pages.
>>>
>>> Note that,
>>>  - it requires "-o mode=lfs".
>>>  - IO size should be power of 2, not exceed BIO_MAX_PAGES, 256.
>>>  - read IO is still 4KB.
>>>  - do checkpoint at fsync, if dummy NODE page was written.
>>
>> Which scenario we can benefit from? Any numbers?
> 
> I described it in the patch. This is not targetting for performance improvement
> for now, but to address the dummy page write problem in FTL so that we can later
> implement very simple host-level FTL on top of open-channel SSD.

Alright, if we are doing this since FTL implementation is moved up, so I can
understand that.

Thanks,

> 
>> I doubt that there are some potential side-effect points:
>>  - write amplification will be more serious than before
>>  - free space will be more fragmented since dummy blocks is separated in whole
>> address space
>>  - there is less chance to merge small(unaligned) IOs in block layer
> 
> I agree, so I just added this as a mount option experimentally.
> One point would be that, if f2fs doesn't do this, FTL should do it. So I think,
> from the system point of view, f2fs is a better layer to do it.
> 
> Thanks,
> 
>>
>> Thoughts?
>>
>> Thanks,
> 
> .
> 

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 06/10 v2] f2fs: show the max number of atomic operations
  2017-01-04 23:50   ` [PATCH 06/10 v2] " Jaegeuk Kim
@ 2017-01-12 11:15     ` Chao Yu
  0 siblings, 0 replies; 41+ messages in thread
From: Chao Yu @ 2017-01-12 11:15 UTC (permalink / raw)
  To: Jaegeuk Kim, linux-kernel, linux-fsdevel, linux-f2fs-devel

On 2017/1/5 7:50, Jaegeuk Kim wrote:
> Chang log from v1:
>  - move stat_dec_atomic_write(inode); into drop_inmem_pages()
> 
>>From 5464f744fe28187b3d71ba6c3cf983659a569e9a Mon Sep 17 00:00:00 2001
> From: Jaegeuk Kim <jaegeuk@kernel.org>
> Date: Wed, 28 Dec 2016 13:55:09 -0800
> Subject: [PATCH] f2fs: show the max number of atomic operations
> 
> This patch adds to show the max number of atomic operations which are
> conducting concurrently.
> 
> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>

Reviewed-by: Chao Yu <yuchao0@huawei.com>

Thanks,

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [f2fs-dev] [PATCH 08/10] f2fs: relax async discard commands more
  2017-01-06  2:42           ` Jaegeuk Kim
  2017-01-06  3:32             ` Chao Yu
@ 2017-02-22  7:23             ` Chao Yu
  2017-02-22 21:15               ` Jaegeuk Kim
  1 sibling, 1 reply; 41+ messages in thread
From: Chao Yu @ 2017-02-22  7:23 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: linux-kernel, linux-fsdevel, linux-f2fs-devel

On 2017/1/6 10:42, Jaegeuk Kim wrote:
> Hi Chao,
> 
> On 01/06, Chao Yu wrote:
>> On 2017/1/6 3:46, Jaegeuk Kim wrote:
>>> On 01/05, Chao Yu wrote:
>>>> On 2017/1/4 17:29, Chao Yu wrote:
>>>>> On 2016/12/31 2:51, Jaegeuk Kim wrote:
>>>>>> This patch relaxes async discard commands to avoid waiting its end_io during
>>>>>> checkpoint.
>>>>>> Instead of waiting them during checkpoint, it will be done when actually reusing
>>>>>> them.
>>>>>>
>>>>>> Test on initial partition of nvme drive.
>>>>>>
>>>>>>  # time fstrim /mnt/test
>>>>>>
>>>>>> Before : 6.158s
>>>>>> After : 4.822s
>>>>>>
>>>>>> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
>>>>>
>>>>> Reviewed-by: Chao Yu <yuchao0@huawei.com>
>>>>>
>>>>> One comment below,
>>>>
>>>> I still have a comment on this patch.
>>>>
>>>>>> -void f2fs_wait_all_discard_bio(struct f2fs_sb_info *sbi)
>>>>>> +/* This should be covered by global mutex, &sit_i->sentry_lock */
>>>>>> +void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, unsigned int segno)
>>>>>>  {
>>>>>>  	struct list_head *wait_list = &(SM_I(sbi)->wait_list);
>>>>>>  	struct bio_entry *be, *tmp;
>>>>>> @@ -646,7 +650,15 @@ void f2fs_wait_all_discard_bio(struct f2fs_sb_info *sbi)
>>>>>>  		struct bio *bio = be->bio;
>>>>>>  		int err;
>>>>>>  
>>>>>> -		wait_for_completion_io(&be->event);
>>>>>> +		if (!completion_done(&be->event)) {
>>>>>> +			if ((be->start_segno >= segno &&
>>>>>> +					be->end_segno <= segno) ||
>>>>>
>>>>> segno >= be->start_segno && segno < be->end_segno ?
>>
>> Still can not understand this judgment condition, we should wait completion of
>> discard command only when segno is locate in range of [start_segno, end_segno]?
>>
>> But now, this condition can be true only when segno, start_segno, end_segno have
>> equal value.
>>
>> Please correct me if I'm wrong.
> 
> Urg. I rewrote it to use block addresses.
> 
> How about this?
> 
>>From fe24461eedd62815e0c56317f010a3a6e3004434 Mon Sep 17 00:00:00 2001
> From: Jaegeuk Kim <jaegeuk@kernel.org>
> Date: Thu, 29 Dec 2016 14:07:53 -0800
> Subject: [PATCH] f2fs: relax async discard commands more
> 
> This patch relaxes async discard commands to avoid waiting its end_io during
> checkpoint.
> Instead of waiting them during checkpoint, it will be done when actually reusing
> them.
> 
> Test on initial partition of nvme drive.
> 
>  # time fstrim /mnt/test
> 
> Before : 6.158s
> After : 4.822s

We should wait for completion of all discard IOs in fstrim, since we can't
guarantee discard command can be handled successfully by device after we return
success to caller. Right?

Thanks,

> 
> Reviewed-by: Chao Yu <yuchao0@huawei.com>
> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> ---
>  fs/f2fs/checkpoint.c |  7 ++-----
>  fs/f2fs/f2fs.h       |  4 +++-
>  fs/f2fs/segment.c    | 23 +++++++++++++++++++----
>  fs/f2fs/super.c      |  3 +++
>  4 files changed, 27 insertions(+), 10 deletions(-)
> 
> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> index f73ee9534d83..1a9ba69a22ba 100644
> --- a/fs/f2fs/checkpoint.c
> +++ b/fs/f2fs/checkpoint.c
> @@ -1254,7 +1254,6 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>  		f2fs_bug_on(sbi, prefree_segments(sbi));
>  		flush_sit_entries(sbi, cpc);
>  		clear_prefree_segments(sbi, cpc);
> -		f2fs_wait_all_discard_bio(sbi);
>  		unblock_operations(sbi);
>  		goto out;
>  	}
> @@ -1273,12 +1272,10 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>  
>  	/* unlock all the fs_lock[] in do_checkpoint() */
>  	err = do_checkpoint(sbi, cpc);
> -	if (err) {
> +	if (err)
>  		release_discard_addrs(sbi);
> -	} else {
> +	else
>  		clear_prefree_segments(sbi, cpc);
> -		f2fs_wait_all_discard_bio(sbi);
> -	}
>  
>  	unblock_operations(sbi);
>  	stat_inc_cp_count(sbi->stat_info);
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index bdcfe2a9b532..e0db895fd84c 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -183,6 +183,8 @@ struct discard_entry {
>  
>  struct bio_entry {
>  	struct list_head list;
> +	block_t lstart;
> +	block_t len;
>  	struct bio *bio;
>  	struct completion event;
>  	int error;
> @@ -2111,7 +2113,7 @@ void destroy_flush_cmd_control(struct f2fs_sb_info *, bool);
>  void invalidate_blocks(struct f2fs_sb_info *, block_t);
>  bool is_checkpointed_data(struct f2fs_sb_info *, block_t);
>  void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t);
> -void f2fs_wait_all_discard_bio(struct f2fs_sb_info *);
> +void f2fs_wait_discard_bio(struct f2fs_sb_info *, block_t);
>  void clear_prefree_segments(struct f2fs_sb_info *, struct cp_control *);
>  void release_discard_addrs(struct f2fs_sb_info *);
>  int npages_for_summary_flush(struct f2fs_sb_info *, bool);
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index 485b9118b418..672bcdefe836 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -625,20 +625,23 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
>  }
>  
>  static struct bio_entry *__add_bio_entry(struct f2fs_sb_info *sbi,
> -							struct bio *bio)
> +			struct bio *bio, block_t lstart, block_t len)
>  {
>  	struct list_head *wait_list = &(SM_I(sbi)->wait_list);
>  	struct bio_entry *be = f2fs_kmem_cache_alloc(bio_entry_slab, GFP_NOFS);
>  
>  	INIT_LIST_HEAD(&be->list);
>  	be->bio = bio;
> +	be->lstart = lstart;
> +	be->len = len;
>  	init_completion(&be->event);
>  	list_add_tail(&be->list, wait_list);
>  
>  	return be;
>  }
>  
> -void f2fs_wait_all_discard_bio(struct f2fs_sb_info *sbi)
> +/* This should be covered by global mutex, &sit_i->sentry_lock */
> +void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
>  {
>  	struct list_head *wait_list = &(SM_I(sbi)->wait_list);
>  	struct bio_entry *be, *tmp;
> @@ -647,7 +650,15 @@ void f2fs_wait_all_discard_bio(struct f2fs_sb_info *sbi)
>  		struct bio *bio = be->bio;
>  		int err;
>  
> -		wait_for_completion_io(&be->event);
> +		if (!completion_done(&be->event)) {
> +			if ((be->lstart <= blkaddr &&
> +					blkaddr < be->lstart + be->len) ||
> +					blkaddr == NULL_ADDR)
> +				wait_for_completion_io(&be->event);
> +			else
> +				continue;
> +		}
> +
>  		err = be->error;
>  		if (err == -EOPNOTSUPP)
>  			err = 0;
> @@ -675,6 +686,7 @@ static int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi,
>  		struct block_device *bdev, block_t blkstart, block_t blklen)
>  {
>  	struct bio *bio = NULL;
> +	block_t lblkstart = blkstart;
>  	int err;
>  
>  	trace_f2fs_issue_discard(sbi->sb, blkstart, blklen);
> @@ -689,7 +701,8 @@ static int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi,
>  				SECTOR_FROM_BLOCK(blklen),
>  				GFP_NOFS, 0, &bio);
>  	if (!err && bio) {
> -		struct bio_entry *be = __add_bio_entry(sbi, bio);
> +		struct bio_entry *be = __add_bio_entry(sbi, bio,
> +						lblkstart, blklen);
>  
>  		bio->bi_private = be;
>  		bio->bi_end_io = f2fs_submit_bio_wait_endio;
> @@ -1575,6 +1588,8 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
>  
>  	*new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
>  
> +	f2fs_wait_discard_bio(sbi, *new_blkaddr);
> +
>  	/*
>  	 * __add_sum_entry should be resided under the curseg_mutex
>  	 * because, this function updates a summary entry in the
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index f3697f97e527..461c29043aec 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -770,6 +770,9 @@ static void f2fs_put_super(struct super_block *sb)
>  		write_checkpoint(sbi, &cpc);
>  	}
>  
> +	/* be sure to wait for any on-going discard commands */
> +	f2fs_wait_discard_bio(sbi, NULL_ADDR);
> +
>  	/* write_checkpoint can update stat informaion */
>  	f2fs_destroy_stats(sbi);
>  
> 

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [f2fs-dev] [PATCH 09/10] f2fs: avoid needless checkpoint in f2fs_trim_fs
  2016-12-30 18:51 ` [PATCH 09/10] f2fs: avoid needless checkpoint in f2fs_trim_fs Jaegeuk Kim
@ 2017-02-22  7:23   ` Chao Yu
  0 siblings, 0 replies; 41+ messages in thread
From: Chao Yu @ 2017-02-22  7:23 UTC (permalink / raw)
  To: Jaegeuk Kim, linux-kernel, linux-fsdevel, linux-f2fs-devel

On 2016/12/31 2:51, Jaegeuk Kim wrote:
> The f2fs_trim_fs() doesn't need to do checkpoint if there are newly allocated
> data blocks only which didn't change the critical checkpoint data such as nat
> and sit entries.
> 
> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>

Reviewed-by: Chao Yu <yuchao0@huawei.com>

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [f2fs-dev] [PATCH 10/10] f2fs: return fs_trim if there is no candidate
  2016-12-30 18:51 ` [PATCH 10/10] f2fs: return fs_trim if there is no candidate Jaegeuk Kim
@ 2017-02-22  7:23   ` Chao Yu
  2017-02-22 21:26     ` Jaegeuk Kim
  0 siblings, 1 reply; 41+ messages in thread
From: Chao Yu @ 2017-02-22  7:23 UTC (permalink / raw)
  To: Jaegeuk Kim, linux-kernel, linux-fsdevel, linux-f2fs-devel

On 2016/12/31 2:51, Jaegeuk Kim wrote:
> If there is no candidate to submit discard command during f2sf_trim_fs, let's
> return without checkpoint.
> 
> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>

Reviewed-by: Chao Yu <yuchao0@huawei.com>

Please see comments below.

> ---
>  fs/f2fs/checkpoint.c |  5 +++++
>  fs/f2fs/f2fs.h       |  1 +
>  fs/f2fs/segment.c    | 28 +++++++++++++++++++++++-----
>  3 files changed, 29 insertions(+), 5 deletions(-)
> 
> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> index 917b5c5053ae..ccea40763d9d 100644
> --- a/fs/f2fs/checkpoint.c
> +++ b/fs/f2fs/checkpoint.c
> @@ -1249,6 +1249,11 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>  
>  	/* this is the case of multiple fstrims without any changes */
>  	if (cpc->reason == CP_DISCARD) {
> +		if (!exist_trim_candidates(sbi, cpc)) {
> +			unblock_operations(sbi);
> +			goto out;
> +		}
> +
>  		if (NM_I(sbi)->dirty_nat_cnt == 0 &&
>  				SIT_I(sbi)->dirty_sentries == 0 &&
>  				prefree_segments(sbi) == 0) {
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index f2f40fce9d31..04d113f0d6bf 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -2119,6 +2119,7 @@ void release_discard_addrs(struct f2fs_sb_info *);
>  int npages_for_summary_flush(struct f2fs_sb_info *, bool);
>  void allocate_new_segments(struct f2fs_sb_info *);
>  int f2fs_trim_fs(struct f2fs_sb_info *, struct fstrim_range *);
> +bool exist_trim_candidates(struct f2fs_sb_info *, struct cp_control *);
>  struct page *get_sum_page(struct f2fs_sb_info *, unsigned int);
>  void update_meta_page(struct f2fs_sb_info *, void *, block_t);
>  void write_meta_page(struct f2fs_sb_info *, struct page *);
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index 9a38424c3c1f..82078734f379 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -836,7 +836,8 @@ static void __add_discard_entry(struct f2fs_sb_info *sbi,
>  	SM_I(sbi)->nr_discards += end - start;
>  }
>  
> -static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
> +static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc,
> +							bool check_only)
>  {
>  	int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
>  	int max_blocks = sbi->blocks_per_seg;
> @@ -850,12 +851,12 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>  	int i;
>  
>  	if (se->valid_blocks == max_blocks || !f2fs_discard_en(sbi))
> -		return;
> +		return false;
>  
>  	if (!force) {
>  		if (!test_opt(sbi, DISCARD) || !se->valid_blocks ||
>  		    SM_I(sbi)->nr_discards >= SM_I(sbi)->max_discards)
> -			return;
> +			return false;
>  	}
>  
>  	/* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */
> @@ -873,8 +874,12 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>  					&& (end - start) < cpc->trim_minlen)
>  			continue;
>  
> +		if (check_only)
> +			return true;
> +
>  		__add_discard_entry(sbi, cpc, se, start, end);
>  	}
> +	return false;
>  }
>  
>  void release_discard_addrs(struct f2fs_sb_info *sbi)
> @@ -1455,6 +1460,19 @@ static const struct segment_allocation default_salloc_ops = {
>  	.allocate_segment = allocate_segment_by_default,
>  };
>  
> +bool exist_trim_candidates(struct f2fs_sb_info *sbi, struct cp_control *cpc)
> +{
> +	__u64 trim_start = cpc->trim_start;

bool has_candidate = false;

> +
> +	mutex_lock(&SIT_I(sbi)->sentry_lock);
> +	for (; trim_start <= cpc->trim_end; trim_start++)
> +		if (add_discard_addrs(sbi, cpc, true))
> +			break;

for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++)
	if (add_discard_addrs(sbi, cpc, true)) {
		has_candidate = true;
		break;
	}

cpc->trim_start = trim_start;

> +	mutex_unlock(&SIT_I(sbi)->sentry_lock);
> +
> +	return trim_start <= cpc->trim_end;

return has_candidate;

Thanks,

> +}
> +
>  int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
>  {
>  	__u64 start = F2FS_BYTES_TO_BLK(range->start);
> @@ -2251,7 +2269,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>  			/* add discard candidates */
>  			if (cpc->reason != CP_DISCARD) {
>  				cpc->trim_start = segno;
> -				add_discard_addrs(sbi, cpc);
> +				add_discard_addrs(sbi, cpc, false);
>  			}
>  
>  			if (to_journal) {
> @@ -2289,7 +2307,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>  		__u64 trim_start = cpc->trim_start;
>  
>  		for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++)
> -			add_discard_addrs(sbi, cpc);
> +			add_discard_addrs(sbi, cpc, false);
>  
>  		cpc->trim_start = trim_start;
>  	}
> 

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [f2fs-dev] [PATCH 08/10] f2fs: relax async discard commands more
  2017-02-22  7:23             ` Chao Yu
@ 2017-02-22 21:15               ` Jaegeuk Kim
  2017-02-23  2:08                 ` Chao Yu
  0 siblings, 1 reply; 41+ messages in thread
From: Jaegeuk Kim @ 2017-02-22 21:15 UTC (permalink / raw)
  To: Chao Yu; +Cc: linux-fsdevel, linux-kernel, linux-f2fs-devel

On 02/22, Chao Yu wrote:
> On 2017/1/6 10:42, Jaegeuk Kim wrote:
> > Hi Chao,
> > 
> > On 01/06, Chao Yu wrote:
> >> On 2017/1/6 3:46, Jaegeuk Kim wrote:
> >>> On 01/05, Chao Yu wrote:
> >>>> On 2017/1/4 17:29, Chao Yu wrote:
> >>>>> On 2016/12/31 2:51, Jaegeuk Kim wrote:
> >>>>>> This patch relaxes async discard commands to avoid waiting its end_io during
> >>>>>> checkpoint.
> >>>>>> Instead of waiting them during checkpoint, it will be done when actually reusing
> >>>>>> them.
> >>>>>>
> >>>>>> Test on initial partition of nvme drive.
> >>>>>>
> >>>>>>  # time fstrim /mnt/test
> >>>>>>
> >>>>>> Before : 6.158s
> >>>>>> After : 4.822s
> >>>>>>
> >>>>>> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> >>>>>
> >>>>> Reviewed-by: Chao Yu <yuchao0@huawei.com>
> >>>>>
> >>>>> One comment below,
> >>>>
> >>>> I still have a comment on this patch.
> >>>>
> >>>>>> -void f2fs_wait_all_discard_bio(struct f2fs_sb_info *sbi)
> >>>>>> +/* This should be covered by global mutex, &sit_i->sentry_lock */
> >>>>>> +void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, unsigned int segno)
> >>>>>>  {
> >>>>>>  	struct list_head *wait_list = &(SM_I(sbi)->wait_list);
> >>>>>>  	struct bio_entry *be, *tmp;
> >>>>>> @@ -646,7 +650,15 @@ void f2fs_wait_all_discard_bio(struct f2fs_sb_info *sbi)
> >>>>>>  		struct bio *bio = be->bio;
> >>>>>>  		int err;
> >>>>>>  
> >>>>>> -		wait_for_completion_io(&be->event);
> >>>>>> +		if (!completion_done(&be->event)) {
> >>>>>> +			if ((be->start_segno >= segno &&
> >>>>>> +					be->end_segno <= segno) ||
> >>>>>
> >>>>> segno >= be->start_segno && segno < be->end_segno ?
> >>
> >> Still can not understand this judgment condition, we should wait completion of
> >> discard command only when segno is locate in range of [start_segno, end_segno]?
> >>
> >> But now, this condition can be true only when segno, start_segno, end_segno have
> >> equal value.
> >>
> >> Please correct me if I'm wrong.
> > 
> > Urg. I rewrote it to use block addresses.
> > 
> > How about this?
> > 
> >>From fe24461eedd62815e0c56317f010a3a6e3004434 Mon Sep 17 00:00:00 2001
> > From: Jaegeuk Kim <jaegeuk@kernel.org>
> > Date: Thu, 29 Dec 2016 14:07:53 -0800
> > Subject: [PATCH] f2fs: relax async discard commands more
> > 
> > This patch relaxes async discard commands to avoid waiting its end_io during
> > checkpoint.
> > Instead of waiting them during checkpoint, it will be done when actually reusing
> > them.
> > 
> > Test on initial partition of nvme drive.
> > 
> >  # time fstrim /mnt/test
> > 
> > Before : 6.158s
> > After : 4.822s
> 
> We should wait for completion of all discard IOs in fstrim, since we can't
> guarantee discard command can be handled successfully by device after we return
> success to caller. Right?

Do we have to guarantee? I think trim commands just try to give a hint to flash
storage for its healthier status, since it doesn't hurt filesystem consistency
and device/filesystem can freely ignore the commands as well.
If we want to guarantee discarding region, we may need to issue BLKSECDISCARD.

Thanks,

> 
> Thanks,
> 
> > 
> > Reviewed-by: Chao Yu <yuchao0@huawei.com>
> > Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> > ---
> >  fs/f2fs/checkpoint.c |  7 ++-----
> >  fs/f2fs/f2fs.h       |  4 +++-
> >  fs/f2fs/segment.c    | 23 +++++++++++++++++++----
> >  fs/f2fs/super.c      |  3 +++
> >  4 files changed, 27 insertions(+), 10 deletions(-)
> > 
> > diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> > index f73ee9534d83..1a9ba69a22ba 100644
> > --- a/fs/f2fs/checkpoint.c
> > +++ b/fs/f2fs/checkpoint.c
> > @@ -1254,7 +1254,6 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
> >  		f2fs_bug_on(sbi, prefree_segments(sbi));
> >  		flush_sit_entries(sbi, cpc);
> >  		clear_prefree_segments(sbi, cpc);
> > -		f2fs_wait_all_discard_bio(sbi);
> >  		unblock_operations(sbi);
> >  		goto out;
> >  	}
> > @@ -1273,12 +1272,10 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
> >  
> >  	/* unlock all the fs_lock[] in do_checkpoint() */
> >  	err = do_checkpoint(sbi, cpc);
> > -	if (err) {
> > +	if (err)
> >  		release_discard_addrs(sbi);
> > -	} else {
> > +	else
> >  		clear_prefree_segments(sbi, cpc);
> > -		f2fs_wait_all_discard_bio(sbi);
> > -	}
> >  
> >  	unblock_operations(sbi);
> >  	stat_inc_cp_count(sbi->stat_info);
> > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > index bdcfe2a9b532..e0db895fd84c 100644
> > --- a/fs/f2fs/f2fs.h
> > +++ b/fs/f2fs/f2fs.h
> > @@ -183,6 +183,8 @@ struct discard_entry {
> >  
> >  struct bio_entry {
> >  	struct list_head list;
> > +	block_t lstart;
> > +	block_t len;
> >  	struct bio *bio;
> >  	struct completion event;
> >  	int error;
> > @@ -2111,7 +2113,7 @@ void destroy_flush_cmd_control(struct f2fs_sb_info *, bool);
> >  void invalidate_blocks(struct f2fs_sb_info *, block_t);
> >  bool is_checkpointed_data(struct f2fs_sb_info *, block_t);
> >  void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t);
> > -void f2fs_wait_all_discard_bio(struct f2fs_sb_info *);
> > +void f2fs_wait_discard_bio(struct f2fs_sb_info *, block_t);
> >  void clear_prefree_segments(struct f2fs_sb_info *, struct cp_control *);
> >  void release_discard_addrs(struct f2fs_sb_info *);
> >  int npages_for_summary_flush(struct f2fs_sb_info *, bool);
> > diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> > index 485b9118b418..672bcdefe836 100644
> > --- a/fs/f2fs/segment.c
> > +++ b/fs/f2fs/segment.c
> > @@ -625,20 +625,23 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
> >  }
> >  
> >  static struct bio_entry *__add_bio_entry(struct f2fs_sb_info *sbi,
> > -							struct bio *bio)
> > +			struct bio *bio, block_t lstart, block_t len)
> >  {
> >  	struct list_head *wait_list = &(SM_I(sbi)->wait_list);
> >  	struct bio_entry *be = f2fs_kmem_cache_alloc(bio_entry_slab, GFP_NOFS);
> >  
> >  	INIT_LIST_HEAD(&be->list);
> >  	be->bio = bio;
> > +	be->lstart = lstart;
> > +	be->len = len;
> >  	init_completion(&be->event);
> >  	list_add_tail(&be->list, wait_list);
> >  
> >  	return be;
> >  }
> >  
> > -void f2fs_wait_all_discard_bio(struct f2fs_sb_info *sbi)
> > +/* This should be covered by global mutex, &sit_i->sentry_lock */
> > +void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
> >  {
> >  	struct list_head *wait_list = &(SM_I(sbi)->wait_list);
> >  	struct bio_entry *be, *tmp;
> > @@ -647,7 +650,15 @@ void f2fs_wait_all_discard_bio(struct f2fs_sb_info *sbi)
> >  		struct bio *bio = be->bio;
> >  		int err;
> >  
> > -		wait_for_completion_io(&be->event);
> > +		if (!completion_done(&be->event)) {
> > +			if ((be->lstart <= blkaddr &&
> > +					blkaddr < be->lstart + be->len) ||
> > +					blkaddr == NULL_ADDR)
> > +				wait_for_completion_io(&be->event);
> > +			else
> > +				continue;
> > +		}
> > +
> >  		err = be->error;
> >  		if (err == -EOPNOTSUPP)
> >  			err = 0;
> > @@ -675,6 +686,7 @@ static int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi,
> >  		struct block_device *bdev, block_t blkstart, block_t blklen)
> >  {
> >  	struct bio *bio = NULL;
> > +	block_t lblkstart = blkstart;
> >  	int err;
> >  
> >  	trace_f2fs_issue_discard(sbi->sb, blkstart, blklen);
> > @@ -689,7 +701,8 @@ static int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi,
> >  				SECTOR_FROM_BLOCK(blklen),
> >  				GFP_NOFS, 0, &bio);
> >  	if (!err && bio) {
> > -		struct bio_entry *be = __add_bio_entry(sbi, bio);
> > +		struct bio_entry *be = __add_bio_entry(sbi, bio,
> > +						lblkstart, blklen);
> >  
> >  		bio->bi_private = be;
> >  		bio->bi_end_io = f2fs_submit_bio_wait_endio;
> > @@ -1575,6 +1588,8 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
> >  
> >  	*new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
> >  
> > +	f2fs_wait_discard_bio(sbi, *new_blkaddr);
> > +
> >  	/*
> >  	 * __add_sum_entry should be resided under the curseg_mutex
> >  	 * because, this function updates a summary entry in the
> > diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> > index f3697f97e527..461c29043aec 100644
> > --- a/fs/f2fs/super.c
> > +++ b/fs/f2fs/super.c
> > @@ -770,6 +770,9 @@ static void f2fs_put_super(struct super_block *sb)
> >  		write_checkpoint(sbi, &cpc);
> >  	}
> >  
> > +	/* be sure to wait for any on-going discard commands */
> > +	f2fs_wait_discard_bio(sbi, NULL_ADDR);
> > +
> >  	/* write_checkpoint can update stat informaion */
> >  	f2fs_destroy_stats(sbi);
> >  
> > 
> 
> 
> ------------------------------------------------------------------------------
> Check out the vibrant tech community on one of the world's most
> engaging tech sites, SlashDot.org! http://sdm.link/slashdot
> _______________________________________________
> Linux-f2fs-devel mailing list
> Linux-f2fs-devel@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [f2fs-dev] [PATCH 10/10] f2fs: return fs_trim if there is no candidate
  2017-02-22  7:23   ` [f2fs-dev] " Chao Yu
@ 2017-02-22 21:26     ` Jaegeuk Kim
  2017-02-23  2:12       ` Chao Yu
  0 siblings, 1 reply; 41+ messages in thread
From: Jaegeuk Kim @ 2017-02-22 21:26 UTC (permalink / raw)
  To: Chao Yu; +Cc: linux-kernel, linux-fsdevel, linux-f2fs-devel

Hi Chao,

On 02/22, Chao Yu wrote:
> On 2016/12/31 2:51, Jaegeuk Kim wrote:
> > If there is no candidate to submit discard command during f2sf_trim_fs, let's
> > return without checkpoint.
> > 
> > Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> 
> Reviewed-by: Chao Yu <yuchao0@huawei.com>
> 
> Please see comments below.
> 
> > ---
> >  fs/f2fs/checkpoint.c |  5 +++++
> >  fs/f2fs/f2fs.h       |  1 +
> >  fs/f2fs/segment.c    | 28 +++++++++++++++++++++++-----
> >  3 files changed, 29 insertions(+), 5 deletions(-)
> > 
> > diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> > index 917b5c5053ae..ccea40763d9d 100644
> > --- a/fs/f2fs/checkpoint.c
> > +++ b/fs/f2fs/checkpoint.c
> > @@ -1249,6 +1249,11 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
> >  
> >  	/* this is the case of multiple fstrims without any changes */
> >  	if (cpc->reason == CP_DISCARD) {
> > +		if (!exist_trim_candidates(sbi, cpc)) {
> > +			unblock_operations(sbi);
> > +			goto out;
> > +		}
> > +
> >  		if (NM_I(sbi)->dirty_nat_cnt == 0 &&
> >  				SIT_I(sbi)->dirty_sentries == 0 &&
> >  				prefree_segments(sbi) == 0) {
> > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > index f2f40fce9d31..04d113f0d6bf 100644
> > --- a/fs/f2fs/f2fs.h
> > +++ b/fs/f2fs/f2fs.h
> > @@ -2119,6 +2119,7 @@ void release_discard_addrs(struct f2fs_sb_info *);
> >  int npages_for_summary_flush(struct f2fs_sb_info *, bool);
> >  void allocate_new_segments(struct f2fs_sb_info *);
> >  int f2fs_trim_fs(struct f2fs_sb_info *, struct fstrim_range *);
> > +bool exist_trim_candidates(struct f2fs_sb_info *, struct cp_control *);
> >  struct page *get_sum_page(struct f2fs_sb_info *, unsigned int);
> >  void update_meta_page(struct f2fs_sb_info *, void *, block_t);
> >  void write_meta_page(struct f2fs_sb_info *, struct page *);
> > diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> > index 9a38424c3c1f..82078734f379 100644
> > --- a/fs/f2fs/segment.c
> > +++ b/fs/f2fs/segment.c
> > @@ -836,7 +836,8 @@ static void __add_discard_entry(struct f2fs_sb_info *sbi,
> >  	SM_I(sbi)->nr_discards += end - start;
> >  }
> >  
> > -static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
> > +static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc,
> > +							bool check_only)
> >  {
> >  	int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
> >  	int max_blocks = sbi->blocks_per_seg;
> > @@ -850,12 +851,12 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
> >  	int i;
> >  
> >  	if (se->valid_blocks == max_blocks || !f2fs_discard_en(sbi))
> > -		return;
> > +		return false;
> >  
> >  	if (!force) {
> >  		if (!test_opt(sbi, DISCARD) || !se->valid_blocks ||
> >  		    SM_I(sbi)->nr_discards >= SM_I(sbi)->max_discards)
> > -			return;
> > +			return false;
> >  	}
> >  
> >  	/* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */
> > @@ -873,8 +874,12 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
> >  					&& (end - start) < cpc->trim_minlen)
> >  			continue;
> >  
> > +		if (check_only)
> > +			return true;
> > +
> >  		__add_discard_entry(sbi, cpc, se, start, end);
> >  	}
> > +	return false;
> >  }
> >  
> >  void release_discard_addrs(struct f2fs_sb_info *sbi)
> > @@ -1455,6 +1460,19 @@ static const struct segment_allocation default_salloc_ops = {
> >  	.allocate_segment = allocate_segment_by_default,
> >  };
> >  
> > +bool exist_trim_candidates(struct f2fs_sb_info *sbi, struct cp_control *cpc)
> > +{
> > +	__u64 trim_start = cpc->trim_start;
> 
> bool has_candidate = false;
> 
> > +
> > +	mutex_lock(&SIT_I(sbi)->sentry_lock);
> > +	for (; trim_start <= cpc->trim_end; trim_start++)
> > +		if (add_discard_addrs(sbi, cpc, true))
> > +			break;
> 
> for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++)
> 	if (add_discard_addrs(sbi, cpc, true)) {
> 		has_candidate = true;
> 		break;
> 	}

Why do we need to do like this in which needs additional variable with multiple
assignment?

Thanks,

> 
> cpc->trim_start = trim_start;
> 
> > +	mutex_unlock(&SIT_I(sbi)->sentry_lock);
> > +
> > +	return trim_start <= cpc->trim_end;
> 
> return has_candidate;
> 
> Thanks,
> 
> > +}
> > +
> >  int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
> >  {
> >  	__u64 start = F2FS_BYTES_TO_BLK(range->start);
> > @@ -2251,7 +2269,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
> >  			/* add discard candidates */
> >  			if (cpc->reason != CP_DISCARD) {
> >  				cpc->trim_start = segno;
> > -				add_discard_addrs(sbi, cpc);
> > +				add_discard_addrs(sbi, cpc, false);
> >  			}
> >  
> >  			if (to_journal) {
> > @@ -2289,7 +2307,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
> >  		__u64 trim_start = cpc->trim_start;
> >  
> >  		for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++)
> > -			add_discard_addrs(sbi, cpc);
> > +			add_discard_addrs(sbi, cpc, false);
> >  
> >  		cpc->trim_start = trim_start;
> >  	}
> > 
> 
> 
> ------------------------------------------------------------------------------
> Check out the vibrant tech community on one of the world's most
> engaging tech sites, SlashDot.org! http://sdm.link/slashdot
> _______________________________________________
> Linux-f2fs-devel mailing list
> Linux-f2fs-devel@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [f2fs-dev] [PATCH 08/10] f2fs: relax async discard commands more
  2017-02-22 21:15               ` Jaegeuk Kim
@ 2017-02-23  2:08                 ` Chao Yu
  2017-02-23  2:42                   ` Jaegeuk Kim
  0 siblings, 1 reply; 41+ messages in thread
From: Chao Yu @ 2017-02-23  2:08 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: linux-fsdevel, linux-kernel, linux-f2fs-devel

On 2017/2/23 5:15, Jaegeuk Kim wrote:
> On 02/22, Chao Yu wrote:
>> On 2017/1/6 10:42, Jaegeuk Kim wrote:
>>> Hi Chao,
>>>
>>> On 01/06, Chao Yu wrote:
>>>> On 2017/1/6 3:46, Jaegeuk Kim wrote:
>>>>> On 01/05, Chao Yu wrote:
>>>>>> On 2017/1/4 17:29, Chao Yu wrote:
>>>>>>> On 2016/12/31 2:51, Jaegeuk Kim wrote:
>>>>>>>> This patch relaxes async discard commands to avoid waiting its end_io during
>>>>>>>> checkpoint.
>>>>>>>> Instead of waiting them during checkpoint, it will be done when actually reusing
>>>>>>>> them.
>>>>>>>>
>>>>>>>> Test on initial partition of nvme drive.
>>>>>>>>
>>>>>>>>  # time fstrim /mnt/test
>>>>>>>>
>>>>>>>> Before : 6.158s
>>>>>>>> After : 4.822s
>>>>>>>>
>>>>>>>> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
>>>>>>>
>>>>>>> Reviewed-by: Chao Yu <yuchao0@huawei.com>
>>>>>>>
>>>>>>> One comment below,
>>>>>>
>>>>>> I still have a comment on this patch.
>>>>>>
>>>>>>>> -void f2fs_wait_all_discard_bio(struct f2fs_sb_info *sbi)
>>>>>>>> +/* This should be covered by global mutex, &sit_i->sentry_lock */
>>>>>>>> +void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, unsigned int segno)
>>>>>>>>  {
>>>>>>>>  	struct list_head *wait_list = &(SM_I(sbi)->wait_list);
>>>>>>>>  	struct bio_entry *be, *tmp;
>>>>>>>> @@ -646,7 +650,15 @@ void f2fs_wait_all_discard_bio(struct f2fs_sb_info *sbi)
>>>>>>>>  		struct bio *bio = be->bio;
>>>>>>>>  		int err;
>>>>>>>>  
>>>>>>>> -		wait_for_completion_io(&be->event);
>>>>>>>> +		if (!completion_done(&be->event)) {
>>>>>>>> +			if ((be->start_segno >= segno &&
>>>>>>>> +					be->end_segno <= segno) ||
>>>>>>>
>>>>>>> segno >= be->start_segno && segno < be->end_segno ?
>>>>
>>>> Still can not understand this judgment condition, we should wait completion of
>>>> discard command only when segno is locate in range of [start_segno, end_segno]?
>>>>
>>>> But now, this condition can be true only when segno, start_segno, end_segno have
>>>> equal value.
>>>>
>>>> Please correct me if I'm wrong.
>>>
>>> Urg. I rewrote it to use block addresses.
>>>
>>> How about this?
>>>
>>> >From fe24461eedd62815e0c56317f010a3a6e3004434 Mon Sep 17 00:00:00 2001
>>> From: Jaegeuk Kim <jaegeuk@kernel.org>
>>> Date: Thu, 29 Dec 2016 14:07:53 -0800
>>> Subject: [PATCH] f2fs: relax async discard commands more
>>>
>>> This patch relaxes async discard commands to avoid waiting its end_io during
>>> checkpoint.
>>> Instead of waiting them during checkpoint, it will be done when actually reusing
>>> them.
>>>
>>> Test on initial partition of nvme drive.
>>>
>>>  # time fstrim /mnt/test
>>>
>>> Before : 6.158s
>>> After : 4.822s
>>
>> We should wait for completion of all discard IOs in fstrim, since we can't
>> guarantee discard command can be handled successfully by device after we return
>> success to caller. Right?
> 
> Do we have to guarantee? I think trim commands just try to give a hint to flash
> storage for its healthier status, since it doesn't hurt filesystem consistency
> and device/filesystem can freely ignore the commands as well.

In manual of fstrim/BLKDISCARD, there is no explicit description that we should
guarantee that discard ioctl being designed as synchronization, but trim
interface in both block layer and most filesystems is designed as
synchronization, so what about keep consistent with them now until there is
requirement? then we can add a new async trim interface in ioctl.

> If we want to guarantee discarding region, we may need to issue BLKSECDISCARD.

Hmm.. I know that BLKSECDISCARD will trigger erasing operation in lower layer
FTL, but now it is also being designed as synchronized interface.

Thanks,

> 
> Thanks,
> 
>>
>> Thanks,
>>
>>>
>>> Reviewed-by: Chao Yu <yuchao0@huawei.com>
>>> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
>>> ---
>>>  fs/f2fs/checkpoint.c |  7 ++-----
>>>  fs/f2fs/f2fs.h       |  4 +++-
>>>  fs/f2fs/segment.c    | 23 +++++++++++++++++++----
>>>  fs/f2fs/super.c      |  3 +++
>>>  4 files changed, 27 insertions(+), 10 deletions(-)
>>>
>>> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
>>> index f73ee9534d83..1a9ba69a22ba 100644
>>> --- a/fs/f2fs/checkpoint.c
>>> +++ b/fs/f2fs/checkpoint.c
>>> @@ -1254,7 +1254,6 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>>>  		f2fs_bug_on(sbi, prefree_segments(sbi));
>>>  		flush_sit_entries(sbi, cpc);
>>>  		clear_prefree_segments(sbi, cpc);
>>> -		f2fs_wait_all_discard_bio(sbi);
>>>  		unblock_operations(sbi);
>>>  		goto out;
>>>  	}
>>> @@ -1273,12 +1272,10 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>>>  
>>>  	/* unlock all the fs_lock[] in do_checkpoint() */
>>>  	err = do_checkpoint(sbi, cpc);
>>> -	if (err) {
>>> +	if (err)
>>>  		release_discard_addrs(sbi);
>>> -	} else {
>>> +	else
>>>  		clear_prefree_segments(sbi, cpc);
>>> -		f2fs_wait_all_discard_bio(sbi);
>>> -	}
>>>  
>>>  	unblock_operations(sbi);
>>>  	stat_inc_cp_count(sbi->stat_info);
>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>> index bdcfe2a9b532..e0db895fd84c 100644
>>> --- a/fs/f2fs/f2fs.h
>>> +++ b/fs/f2fs/f2fs.h
>>> @@ -183,6 +183,8 @@ struct discard_entry {
>>>  
>>>  struct bio_entry {
>>>  	struct list_head list;
>>> +	block_t lstart;
>>> +	block_t len;
>>>  	struct bio *bio;
>>>  	struct completion event;
>>>  	int error;
>>> @@ -2111,7 +2113,7 @@ void destroy_flush_cmd_control(struct f2fs_sb_info *, bool);
>>>  void invalidate_blocks(struct f2fs_sb_info *, block_t);
>>>  bool is_checkpointed_data(struct f2fs_sb_info *, block_t);
>>>  void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t);
>>> -void f2fs_wait_all_discard_bio(struct f2fs_sb_info *);
>>> +void f2fs_wait_discard_bio(struct f2fs_sb_info *, block_t);
>>>  void clear_prefree_segments(struct f2fs_sb_info *, struct cp_control *);
>>>  void release_discard_addrs(struct f2fs_sb_info *);
>>>  int npages_for_summary_flush(struct f2fs_sb_info *, bool);
>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>>> index 485b9118b418..672bcdefe836 100644
>>> --- a/fs/f2fs/segment.c
>>> +++ b/fs/f2fs/segment.c
>>> @@ -625,20 +625,23 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
>>>  }
>>>  
>>>  static struct bio_entry *__add_bio_entry(struct f2fs_sb_info *sbi,
>>> -							struct bio *bio)
>>> +			struct bio *bio, block_t lstart, block_t len)
>>>  {
>>>  	struct list_head *wait_list = &(SM_I(sbi)->wait_list);
>>>  	struct bio_entry *be = f2fs_kmem_cache_alloc(bio_entry_slab, GFP_NOFS);
>>>  
>>>  	INIT_LIST_HEAD(&be->list);
>>>  	be->bio = bio;
>>> +	be->lstart = lstart;
>>> +	be->len = len;
>>>  	init_completion(&be->event);
>>>  	list_add_tail(&be->list, wait_list);
>>>  
>>>  	return be;
>>>  }
>>>  
>>> -void f2fs_wait_all_discard_bio(struct f2fs_sb_info *sbi)
>>> +/* This should be covered by global mutex, &sit_i->sentry_lock */
>>> +void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
>>>  {
>>>  	struct list_head *wait_list = &(SM_I(sbi)->wait_list);
>>>  	struct bio_entry *be, *tmp;
>>> @@ -647,7 +650,15 @@ void f2fs_wait_all_discard_bio(struct f2fs_sb_info *sbi)
>>>  		struct bio *bio = be->bio;
>>>  		int err;
>>>  
>>> -		wait_for_completion_io(&be->event);
>>> +		if (!completion_done(&be->event)) {
>>> +			if ((be->lstart <= blkaddr &&
>>> +					blkaddr < be->lstart + be->len) ||
>>> +					blkaddr == NULL_ADDR)
>>> +				wait_for_completion_io(&be->event);
>>> +			else
>>> +				continue;
>>> +		}
>>> +
>>>  		err = be->error;
>>>  		if (err == -EOPNOTSUPP)
>>>  			err = 0;
>>> @@ -675,6 +686,7 @@ static int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi,
>>>  		struct block_device *bdev, block_t blkstart, block_t blklen)
>>>  {
>>>  	struct bio *bio = NULL;
>>> +	block_t lblkstart = blkstart;
>>>  	int err;
>>>  
>>>  	trace_f2fs_issue_discard(sbi->sb, blkstart, blklen);
>>> @@ -689,7 +701,8 @@ static int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi,
>>>  				SECTOR_FROM_BLOCK(blklen),
>>>  				GFP_NOFS, 0, &bio);
>>>  	if (!err && bio) {
>>> -		struct bio_entry *be = __add_bio_entry(sbi, bio);
>>> +		struct bio_entry *be = __add_bio_entry(sbi, bio,
>>> +						lblkstart, blklen);
>>>  
>>>  		bio->bi_private = be;
>>>  		bio->bi_end_io = f2fs_submit_bio_wait_endio;
>>> @@ -1575,6 +1588,8 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
>>>  
>>>  	*new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
>>>  
>>> +	f2fs_wait_discard_bio(sbi, *new_blkaddr);
>>> +
>>>  	/*
>>>  	 * __add_sum_entry should be resided under the curseg_mutex
>>>  	 * because, this function updates a summary entry in the
>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>>> index f3697f97e527..461c29043aec 100644
>>> --- a/fs/f2fs/super.c
>>> +++ b/fs/f2fs/super.c
>>> @@ -770,6 +770,9 @@ static void f2fs_put_super(struct super_block *sb)
>>>  		write_checkpoint(sbi, &cpc);
>>>  	}
>>>  
>>> +	/* be sure to wait for any on-going discard commands */
>>> +	f2fs_wait_discard_bio(sbi, NULL_ADDR);
>>> +
>>>  	/* write_checkpoint can update stat informaion */
>>>  	f2fs_destroy_stats(sbi);
>>>  
>>>
>>
>>
>> ------------------------------------------------------------------------------
>> Check out the vibrant tech community on one of the world's most
>> engaging tech sites, SlashDot.org! http://sdm.link/slashdot
>> _______________________________________________
>> Linux-f2fs-devel mailing list
>> Linux-f2fs-devel@lists.sourceforge.net
>> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
> 
> .
> 

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [f2fs-dev] [PATCH 10/10] f2fs: return fs_trim if there is no candidate
  2017-02-22 21:26     ` Jaegeuk Kim
@ 2017-02-23  2:12       ` Chao Yu
  2017-02-23  2:47         ` Jaegeuk Kim
  0 siblings, 1 reply; 41+ messages in thread
From: Chao Yu @ 2017-02-23  2:12 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: linux-kernel, linux-fsdevel, linux-f2fs-devel

Hi Jaegeuk,

On 2017/2/23 5:26, Jaegeuk Kim wrote:
> Hi Chao,
> 
> On 02/22, Chao Yu wrote:
>> On 2016/12/31 2:51, Jaegeuk Kim wrote:
>>> If there is no candidate to submit discard command during f2sf_trim_fs, let's
>>> return without checkpoint.
>>>
>>> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
>>
>> Reviewed-by: Chao Yu <yuchao0@huawei.com>
>>
>> Please see comments below.
>>
>>> ---
>>>  fs/f2fs/checkpoint.c |  5 +++++
>>>  fs/f2fs/f2fs.h       |  1 +
>>>  fs/f2fs/segment.c    | 28 +++++++++++++++++++++++-----
>>>  3 files changed, 29 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
>>> index 917b5c5053ae..ccea40763d9d 100644
>>> --- a/fs/f2fs/checkpoint.c
>>> +++ b/fs/f2fs/checkpoint.c
>>> @@ -1249,6 +1249,11 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>>>  
>>>  	/* this is the case of multiple fstrims without any changes */
>>>  	if (cpc->reason == CP_DISCARD) {
>>> +		if (!exist_trim_candidates(sbi, cpc)) {
>>> +			unblock_operations(sbi);
>>> +			goto out;
>>> +		}
>>> +
>>>  		if (NM_I(sbi)->dirty_nat_cnt == 0 &&
>>>  				SIT_I(sbi)->dirty_sentries == 0 &&
>>>  				prefree_segments(sbi) == 0) {
>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>> index f2f40fce9d31..04d113f0d6bf 100644
>>> --- a/fs/f2fs/f2fs.h
>>> +++ b/fs/f2fs/f2fs.h
>>> @@ -2119,6 +2119,7 @@ void release_discard_addrs(struct f2fs_sb_info *);
>>>  int npages_for_summary_flush(struct f2fs_sb_info *, bool);
>>>  void allocate_new_segments(struct f2fs_sb_info *);
>>>  int f2fs_trim_fs(struct f2fs_sb_info *, struct fstrim_range *);
>>> +bool exist_trim_candidates(struct f2fs_sb_info *, struct cp_control *);
>>>  struct page *get_sum_page(struct f2fs_sb_info *, unsigned int);
>>>  void update_meta_page(struct f2fs_sb_info *, void *, block_t);
>>>  void write_meta_page(struct f2fs_sb_info *, struct page *);
>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>>> index 9a38424c3c1f..82078734f379 100644
>>> --- a/fs/f2fs/segment.c
>>> +++ b/fs/f2fs/segment.c
>>> @@ -836,7 +836,8 @@ static void __add_discard_entry(struct f2fs_sb_info *sbi,
>>>  	SM_I(sbi)->nr_discards += end - start;
>>>  }
>>>  
>>> -static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>>> +static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc,
>>> +							bool check_only)
>>>  {
>>>  	int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
>>>  	int max_blocks = sbi->blocks_per_seg;
>>> @@ -850,12 +851,12 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>>>  	int i;
>>>  
>>>  	if (se->valid_blocks == max_blocks || !f2fs_discard_en(sbi))
>>> -		return;
>>> +		return false;
>>>  
>>>  	if (!force) {
>>>  		if (!test_opt(sbi, DISCARD) || !se->valid_blocks ||
>>>  		    SM_I(sbi)->nr_discards >= SM_I(sbi)->max_discards)
>>> -			return;
>>> +			return false;
>>>  	}
>>>  
>>>  	/* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */
>>> @@ -873,8 +874,12 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>>>  					&& (end - start) < cpc->trim_minlen)
>>>  			continue;
>>>  
>>> +		if (check_only)
>>> +			return true;
>>> +
>>>  		__add_discard_entry(sbi, cpc, se, start, end);
>>>  	}
>>> +	return false;
>>>  }
>>>  
>>>  void release_discard_addrs(struct f2fs_sb_info *sbi)
>>> @@ -1455,6 +1460,19 @@ static const struct segment_allocation default_salloc_ops = {
>>>  	.allocate_segment = allocate_segment_by_default,
>>>  };
>>>  
>>> +bool exist_trim_candidates(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>>> +{
>>> +	__u64 trim_start = cpc->trim_start;
>>
>> bool has_candidate = false;
>>
>>> +
>>> +	mutex_lock(&SIT_I(sbi)->sentry_lock);
>>> +	for (; trim_start <= cpc->trim_end; trim_start++)
>>> +		if (add_discard_addrs(sbi, cpc, true))
>>> +			break;
>>
>> for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++)
>> 	if (add_discard_addrs(sbi, cpc, true)) {
>> 		has_candidate = true;
>> 		break;
>> 	}
> 
> Why do we need to do like this in which needs additional variable with multiple
> assignment?

Because add_discard_addrs will check segment which cpc->trim_start points to,
but if we do not increase cpc->trim_start in loop, we will always check one
segment, right?

Thanks,

> 
> Thanks,
> 
>>
>> cpc->trim_start = trim_start;
>>
>>> +	mutex_unlock(&SIT_I(sbi)->sentry_lock);
>>> +
>>> +	return trim_start <= cpc->trim_end;
>>
>> return has_candidate;
>>
>> Thanks,
>>
>>> +}
>>> +
>>>  int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
>>>  {
>>>  	__u64 start = F2FS_BYTES_TO_BLK(range->start);
>>> @@ -2251,7 +2269,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>>>  			/* add discard candidates */
>>>  			if (cpc->reason != CP_DISCARD) {
>>>  				cpc->trim_start = segno;
>>> -				add_discard_addrs(sbi, cpc);
>>> +				add_discard_addrs(sbi, cpc, false);
>>>  			}
>>>  
>>>  			if (to_journal) {
>>> @@ -2289,7 +2307,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>>>  		__u64 trim_start = cpc->trim_start;
>>>  
>>>  		for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++)
>>> -			add_discard_addrs(sbi, cpc);
>>> +			add_discard_addrs(sbi, cpc, false);
>>>  
>>>  		cpc->trim_start = trim_start;
>>>  	}
>>>
>>
>>
>> ------------------------------------------------------------------------------
>> Check out the vibrant tech community on one of the world's most
>> engaging tech sites, SlashDot.org! http://sdm.link/slashdot
>> _______________________________________________
>> Linux-f2fs-devel mailing list
>> Linux-f2fs-devel@lists.sourceforge.net
>> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
> 
> .
> 

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [f2fs-dev] [PATCH 08/10] f2fs: relax async discard commands more
  2017-02-23  2:08                 ` Chao Yu
@ 2017-02-23  2:42                   ` Jaegeuk Kim
  0 siblings, 0 replies; 41+ messages in thread
From: Jaegeuk Kim @ 2017-02-23  2:42 UTC (permalink / raw)
  To: Chao Yu; +Cc: linux-fsdevel, linux-kernel, linux-f2fs-devel

On 02/23, Chao Yu wrote:
> On 2017/2/23 5:15, Jaegeuk Kim wrote:
> > On 02/22, Chao Yu wrote:
> >> On 2017/1/6 10:42, Jaegeuk Kim wrote:
> >>> Hi Chao,
> >>>
> >>> On 01/06, Chao Yu wrote:
> >>>> On 2017/1/6 3:46, Jaegeuk Kim wrote:
> >>>>> On 01/05, Chao Yu wrote:
> >>>>>> On 2017/1/4 17:29, Chao Yu wrote:
> >>>>>>> On 2016/12/31 2:51, Jaegeuk Kim wrote:
> >>>>>>>> This patch relaxes async discard commands to avoid waiting its end_io during
> >>>>>>>> checkpoint.
> >>>>>>>> Instead of waiting them during checkpoint, it will be done when actually reusing
> >>>>>>>> them.
> >>>>>>>>
> >>>>>>>> Test on initial partition of nvme drive.
> >>>>>>>>
> >>>>>>>>  # time fstrim /mnt/test
> >>>>>>>>
> >>>>>>>> Before : 6.158s
> >>>>>>>> After : 4.822s
> >>>>>>>>
> >>>>>>>> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> >>>>>>>
> >>>>>>> Reviewed-by: Chao Yu <yuchao0@huawei.com>
> >>>>>>>
> >>>>>>> One comment below,
> >>>>>>
> >>>>>> I still have a comment on this patch.
> >>>>>>
> >>>>>>>> -void f2fs_wait_all_discard_bio(struct f2fs_sb_info *sbi)
> >>>>>>>> +/* This should be covered by global mutex, &sit_i->sentry_lock */
> >>>>>>>> +void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, unsigned int segno)
> >>>>>>>>  {
> >>>>>>>>  	struct list_head *wait_list = &(SM_I(sbi)->wait_list);
> >>>>>>>>  	struct bio_entry *be, *tmp;
> >>>>>>>> @@ -646,7 +650,15 @@ void f2fs_wait_all_discard_bio(struct f2fs_sb_info *sbi)
> >>>>>>>>  		struct bio *bio = be->bio;
> >>>>>>>>  		int err;
> >>>>>>>>  
> >>>>>>>> -		wait_for_completion_io(&be->event);
> >>>>>>>> +		if (!completion_done(&be->event)) {
> >>>>>>>> +			if ((be->start_segno >= segno &&
> >>>>>>>> +					be->end_segno <= segno) ||
> >>>>>>>
> >>>>>>> segno >= be->start_segno && segno < be->end_segno ?
> >>>>
> >>>> Still can not understand this judgment condition, we should wait completion of
> >>>> discard command only when segno is locate in range of [start_segno, end_segno]?
> >>>>
> >>>> But now, this condition can be true only when segno, start_segno, end_segno have
> >>>> equal value.
> >>>>
> >>>> Please correct me if I'm wrong.
> >>>
> >>> Urg. I rewrote it to use block addresses.
> >>>
> >>> How about this?
> >>>
> >>> >From fe24461eedd62815e0c56317f010a3a6e3004434 Mon Sep 17 00:00:00 2001
> >>> From: Jaegeuk Kim <jaegeuk@kernel.org>
> >>> Date: Thu, 29 Dec 2016 14:07:53 -0800
> >>> Subject: [PATCH] f2fs: relax async discard commands more
> >>>
> >>> This patch relaxes async discard commands to avoid waiting its end_io during
> >>> checkpoint.
> >>> Instead of waiting them during checkpoint, it will be done when actually reusing
> >>> them.
> >>>
> >>> Test on initial partition of nvme drive.
> >>>
> >>>  # time fstrim /mnt/test
> >>>
> >>> Before : 6.158s
> >>> After : 4.822s
> >>
> >> We should wait for completion of all discard IOs in fstrim, since we can't
> >> guarantee discard command can be handled successfully by device after we return
> >> success to caller. Right?
> > 
> > Do we have to guarantee? I think trim commands just try to give a hint to flash
> > storage for its healthier status, since it doesn't hurt filesystem consistency
> > and device/filesystem can freely ignore the commands as well.
> 
> In manual of fstrim/BLKDISCARD, there is no explicit description that we should
> guarantee that discard ioctl being designed as synchronization, but trim
> interface in both block layer and most filesystems is designed as
> synchronization, so what about keep consistent with them now until there is
> requirement? then we can add a new async trim interface in ioctl.

Well, I don't think we need to follow this. The only problem happens from sudden
power-cut, but after boot-up, use can trigger another fstrim.

Thanks,

> 
> > If we want to guarantee discarding region, we may need to issue BLKSECDISCARD.
> 
> Hmm.. I know that BLKSECDISCARD will trigger erasing operation in lower layer
> FTL, but now it is also being designed as synchronized interface.
> 
> Thanks,
> 
> > 
> > Thanks,
> > 
> >>
> >> Thanks,
> >>
> >>>
> >>> Reviewed-by: Chao Yu <yuchao0@huawei.com>
> >>> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> >>> ---
> >>>  fs/f2fs/checkpoint.c |  7 ++-----
> >>>  fs/f2fs/f2fs.h       |  4 +++-
> >>>  fs/f2fs/segment.c    | 23 +++++++++++++++++++----
> >>>  fs/f2fs/super.c      |  3 +++
> >>>  4 files changed, 27 insertions(+), 10 deletions(-)
> >>>
> >>> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> >>> index f73ee9534d83..1a9ba69a22ba 100644
> >>> --- a/fs/f2fs/checkpoint.c
> >>> +++ b/fs/f2fs/checkpoint.c
> >>> @@ -1254,7 +1254,6 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
> >>>  		f2fs_bug_on(sbi, prefree_segments(sbi));
> >>>  		flush_sit_entries(sbi, cpc);
> >>>  		clear_prefree_segments(sbi, cpc);
> >>> -		f2fs_wait_all_discard_bio(sbi);
> >>>  		unblock_operations(sbi);
> >>>  		goto out;
> >>>  	}
> >>> @@ -1273,12 +1272,10 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
> >>>  
> >>>  	/* unlock all the fs_lock[] in do_checkpoint() */
> >>>  	err = do_checkpoint(sbi, cpc);
> >>> -	if (err) {
> >>> +	if (err)
> >>>  		release_discard_addrs(sbi);
> >>> -	} else {
> >>> +	else
> >>>  		clear_prefree_segments(sbi, cpc);
> >>> -		f2fs_wait_all_discard_bio(sbi);
> >>> -	}
> >>>  
> >>>  	unblock_operations(sbi);
> >>>  	stat_inc_cp_count(sbi->stat_info);
> >>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> >>> index bdcfe2a9b532..e0db895fd84c 100644
> >>> --- a/fs/f2fs/f2fs.h
> >>> +++ b/fs/f2fs/f2fs.h
> >>> @@ -183,6 +183,8 @@ struct discard_entry {
> >>>  
> >>>  struct bio_entry {
> >>>  	struct list_head list;
> >>> +	block_t lstart;
> >>> +	block_t len;
> >>>  	struct bio *bio;
> >>>  	struct completion event;
> >>>  	int error;
> >>> @@ -2111,7 +2113,7 @@ void destroy_flush_cmd_control(struct f2fs_sb_info *, bool);
> >>>  void invalidate_blocks(struct f2fs_sb_info *, block_t);
> >>>  bool is_checkpointed_data(struct f2fs_sb_info *, block_t);
> >>>  void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t);
> >>> -void f2fs_wait_all_discard_bio(struct f2fs_sb_info *);
> >>> +void f2fs_wait_discard_bio(struct f2fs_sb_info *, block_t);
> >>>  void clear_prefree_segments(struct f2fs_sb_info *, struct cp_control *);
> >>>  void release_discard_addrs(struct f2fs_sb_info *);
> >>>  int npages_for_summary_flush(struct f2fs_sb_info *, bool);
> >>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> >>> index 485b9118b418..672bcdefe836 100644
> >>> --- a/fs/f2fs/segment.c
> >>> +++ b/fs/f2fs/segment.c
> >>> @@ -625,20 +625,23 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
> >>>  }
> >>>  
> >>>  static struct bio_entry *__add_bio_entry(struct f2fs_sb_info *sbi,
> >>> -							struct bio *bio)
> >>> +			struct bio *bio, block_t lstart, block_t len)
> >>>  {
> >>>  	struct list_head *wait_list = &(SM_I(sbi)->wait_list);
> >>>  	struct bio_entry *be = f2fs_kmem_cache_alloc(bio_entry_slab, GFP_NOFS);
> >>>  
> >>>  	INIT_LIST_HEAD(&be->list);
> >>>  	be->bio = bio;
> >>> +	be->lstart = lstart;
> >>> +	be->len = len;
> >>>  	init_completion(&be->event);
> >>>  	list_add_tail(&be->list, wait_list);
> >>>  
> >>>  	return be;
> >>>  }
> >>>  
> >>> -void f2fs_wait_all_discard_bio(struct f2fs_sb_info *sbi)
> >>> +/* This should be covered by global mutex, &sit_i->sentry_lock */
> >>> +void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
> >>>  {
> >>>  	struct list_head *wait_list = &(SM_I(sbi)->wait_list);
> >>>  	struct bio_entry *be, *tmp;
> >>> @@ -647,7 +650,15 @@ void f2fs_wait_all_discard_bio(struct f2fs_sb_info *sbi)
> >>>  		struct bio *bio = be->bio;
> >>>  		int err;
> >>>  
> >>> -		wait_for_completion_io(&be->event);
> >>> +		if (!completion_done(&be->event)) {
> >>> +			if ((be->lstart <= blkaddr &&
> >>> +					blkaddr < be->lstart + be->len) ||
> >>> +					blkaddr == NULL_ADDR)
> >>> +				wait_for_completion_io(&be->event);
> >>> +			else
> >>> +				continue;
> >>> +		}
> >>> +
> >>>  		err = be->error;
> >>>  		if (err == -EOPNOTSUPP)
> >>>  			err = 0;
> >>> @@ -675,6 +686,7 @@ static int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi,
> >>>  		struct block_device *bdev, block_t blkstart, block_t blklen)
> >>>  {
> >>>  	struct bio *bio = NULL;
> >>> +	block_t lblkstart = blkstart;
> >>>  	int err;
> >>>  
> >>>  	trace_f2fs_issue_discard(sbi->sb, blkstart, blklen);
> >>> @@ -689,7 +701,8 @@ static int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi,
> >>>  				SECTOR_FROM_BLOCK(blklen),
> >>>  				GFP_NOFS, 0, &bio);
> >>>  	if (!err && bio) {
> >>> -		struct bio_entry *be = __add_bio_entry(sbi, bio);
> >>> +		struct bio_entry *be = __add_bio_entry(sbi, bio,
> >>> +						lblkstart, blklen);
> >>>  
> >>>  		bio->bi_private = be;
> >>>  		bio->bi_end_io = f2fs_submit_bio_wait_endio;
> >>> @@ -1575,6 +1588,8 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
> >>>  
> >>>  	*new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
> >>>  
> >>> +	f2fs_wait_discard_bio(sbi, *new_blkaddr);
> >>> +
> >>>  	/*
> >>>  	 * __add_sum_entry should be resided under the curseg_mutex
> >>>  	 * because, this function updates a summary entry in the
> >>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> >>> index f3697f97e527..461c29043aec 100644
> >>> --- a/fs/f2fs/super.c
> >>> +++ b/fs/f2fs/super.c
> >>> @@ -770,6 +770,9 @@ static void f2fs_put_super(struct super_block *sb)
> >>>  		write_checkpoint(sbi, &cpc);
> >>>  	}
> >>>  
> >>> +	/* be sure to wait for any on-going discard commands */
> >>> +	f2fs_wait_discard_bio(sbi, NULL_ADDR);
> >>> +
> >>>  	/* write_checkpoint can update stat informaion */
> >>>  	f2fs_destroy_stats(sbi);
> >>>  
> >>>
> >>
> >>
> >> ------------------------------------------------------------------------------
> >> Check out the vibrant tech community on one of the world's most
> >> engaging tech sites, SlashDot.org! http://sdm.link/slashdot
> >> _______________________________________________
> >> Linux-f2fs-devel mailing list
> >> Linux-f2fs-devel@lists.sourceforge.net
> >> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
> > 
> > .
> > 
> 
> 
> ------------------------------------------------------------------------------
> Check out the vibrant tech community on one of the world's most
> engaging tech sites, SlashDot.org! http://sdm.link/slashdot
> _______________________________________________
> Linux-f2fs-devel mailing list
> Linux-f2fs-devel@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [f2fs-dev] [PATCH 10/10] f2fs: return fs_trim if there is no candidate
  2017-02-23  2:12       ` Chao Yu
@ 2017-02-23  2:47         ` Jaegeuk Kim
  0 siblings, 0 replies; 41+ messages in thread
From: Jaegeuk Kim @ 2017-02-23  2:47 UTC (permalink / raw)
  To: Chao Yu; +Cc: linux-fsdevel, linux-kernel, linux-f2fs-devel

On 02/23, Chao Yu wrote:
> Hi Jaegeuk,
> 
> On 2017/2/23 5:26, Jaegeuk Kim wrote:
...
> >>> +	__u64 trim_start = cpc->trim_start;
> >>
> >> bool has_candidate = false;
> >>
> >>> +
> >>> +	mutex_lock(&SIT_I(sbi)->sentry_lock);
> >>> +	for (; trim_start <= cpc->trim_end; trim_start++)
> >>> +		if (add_discard_addrs(sbi, cpc, true))
> >>> +			break;
> >>
> >> for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++)
> >> 	if (add_discard_addrs(sbi, cpc, true)) {
> >> 		has_candidate = true;
> >> 		break;
> >> 	}
> > 
> > Why do we need to do like this in which needs additional variable with multiple
> > assignment?
> 
> Because add_discard_addrs will check segment which cpc->trim_start points to,
> but if we do not increase cpc->trim_start in loop, we will always check one
> segment, right?

Yeah~ ;)

> 
> Thanks,
> 
> > 
> > Thanks,
> > 
> >>
> >> cpc->trim_start = trim_start;
> >>
> >>> +	mutex_unlock(&SIT_I(sbi)->sentry_lock);
> >>> +
> >>> +	return trim_start <= cpc->trim_end;
> >>
> >> return has_candidate;
> >>
> >> Thanks,
> >>
> >>> +}
> >>> +
> >>>  int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
> >>>  {
> >>>  	__u64 start = F2FS_BYTES_TO_BLK(range->start);
> >>> @@ -2251,7 +2269,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
> >>>  			/* add discard candidates */
> >>>  			if (cpc->reason != CP_DISCARD) {
> >>>  				cpc->trim_start = segno;
> >>> -				add_discard_addrs(sbi, cpc);
> >>> +				add_discard_addrs(sbi, cpc, false);
> >>>  			}
> >>>  
> >>>  			if (to_journal) {
> >>> @@ -2289,7 +2307,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
> >>>  		__u64 trim_start = cpc->trim_start;
> >>>  
> >>>  		for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++)
> >>> -			add_discard_addrs(sbi, cpc);
> >>> +			add_discard_addrs(sbi, cpc, false);
> >>>  
> >>>  		cpc->trim_start = trim_start;
> >>>  	}
> >>>
> >>
> >>
> >> ------------------------------------------------------------------------------
> >> Check out the vibrant tech community on one of the world's most
> >> engaging tech sites, SlashDot.org! http://sdm.link/slashdot
> >> _______________________________________________
> >> Linux-f2fs-devel mailing list
> >> Linux-f2fs-devel@lists.sourceforge.net
> >> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
> > 
> > .
> > 
> 
> 
> ------------------------------------------------------------------------------
> Check out the vibrant tech community on one of the world's most
> engaging tech sites, SlashDot.org! http://sdm.link/slashdot
> _______________________________________________
> Linux-f2fs-devel mailing list
> Linux-f2fs-devel@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

^ permalink raw reply	[flat|nested] 41+ messages in thread

end of thread, other threads:[~2017-02-23  2:48 UTC | newest]

Thread overview: 41+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-12-30 18:51 [PATCH 01/10] f2fs: reassign new segment for mode=lfs Jaegeuk Kim
2016-12-30 18:51 ` [PATCH 02/10] f2fs: fix wrong tracepoints for op and op_flags Jaegeuk Kim
2017-01-04  3:25   ` [f2fs-dev] " Chao Yu
2016-12-30 18:51 ` [PATCH 03/10] f2fs: add submit_bio tracepoint Jaegeuk Kim
2017-01-04  3:32   ` [f2fs-dev] " Chao Yu
2017-01-04 23:35   ` [PATCH 03/10 v2] " Jaegeuk Kim
2017-01-12 11:07     ` Chao Yu
2016-12-30 18:51 ` [PATCH 04/10] f2fs: support IO alignment for DATA and NODE writes Jaegeuk Kim
2017-01-04  8:23   ` [f2fs-dev] " Chao Yu
2017-01-04 23:44     ` Jaegeuk Kim
2017-01-12 11:15       ` Chao Yu
2016-12-30 18:51 ` [PATCH 05/10] f2fs: get io size bit from mount option Jaegeuk Kim
2016-12-30 18:51 ` [PATCH 06/10] f2fs: show the max number of atomic operations Jaegeuk Kim
2017-01-04  8:45   ` Chao Yu
2017-01-04 23:50   ` [PATCH 06/10 v2] " Jaegeuk Kim
2017-01-12 11:15     ` Chao Yu
2016-12-30 18:51 ` [PATCH 07/10] f2fs: don't allow encrypted operations without keys Jaegeuk Kim
2017-01-04  8:56   ` [f2fs-dev] " Chao Yu
2016-12-30 18:51 ` [PATCH 08/10] f2fs: relax async discard commands more Jaegeuk Kim
2017-01-04  9:29   ` [f2fs-dev] " Chao Yu
2017-01-05  3:19     ` Chao Yu
2017-01-05  8:17       ` Chao Yu
2017-01-05 19:59         ` Jaegeuk Kim
2017-01-05 19:46       ` Jaegeuk Kim
2017-01-06  2:06         ` Chao Yu
2017-01-06  2:42           ` Jaegeuk Kim
2017-01-06  3:32             ` Chao Yu
2017-02-22  7:23             ` Chao Yu
2017-02-22 21:15               ` Jaegeuk Kim
2017-02-23  2:08                 ` Chao Yu
2017-02-23  2:42                   ` Jaegeuk Kim
2016-12-30 18:51 ` [PATCH 09/10] f2fs: avoid needless checkpoint in f2fs_trim_fs Jaegeuk Kim
2017-02-22  7:23   ` [f2fs-dev] " Chao Yu
2016-12-30 18:51 ` [PATCH 10/10] f2fs: return fs_trim if there is no candidate Jaegeuk Kim
2017-02-22  7:23   ` [f2fs-dev] " Chao Yu
2017-02-22 21:26     ` Jaegeuk Kim
2017-02-23  2:12       ` Chao Yu
2017-02-23  2:47         ` Jaegeuk Kim
2017-01-04  3:24 ` [f2fs-dev] [PATCH 01/10] f2fs: reassign new segment for mode=lfs Chao Yu
2017-01-04 22:48   ` Jaegeuk Kim
2017-01-12 11:03     ` Chao Yu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).