All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] f2fs: checkpoint disabling
@ 2018-09-26 19:40 Jaegeuk Kim
  2018-09-28 17:36 ` [PATCH v2] " Jaegeuk Kim
  0 siblings, 1 reply; 11+ messages in thread
From: Jaegeuk Kim @ 2018-09-26 19:40 UTC (permalink / raw)
  To: linux-kernel, linux-f2fs-devel; +Cc: Daniel Rosenberg, Jaegeuk Kim

From: Daniel Rosenberg <drosen@google.com>

Note that, it requires "f2fs: return correct errno in f2fs_gc".

This adds a lightweight non-persistent snapshotting scheme to f2fs.

To use, mount with the option checkpoint=disable, and to return to
normal operation, remount with checkpoint=enable. If the filesystem
is shut down before remounting with checkpoint=enable, it will revert
back to its apparent state when it was first mounted with
checkpoint=disable. This is useful for situations where you wish to be
able to roll back the state of the disk in case of some critical
failure.

Signed-off-by: Daniel Rosenberg <drosen@google.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 Documentation/filesystems/f2fs.txt |   5 ++
 fs/f2fs/checkpoint.c               |  16 ++++
 fs/f2fs/data.c                     |  14 +++-
 fs/f2fs/debug.c                    |   3 +-
 fs/f2fs/f2fs.h                     |  15 +++-
 fs/f2fs/file.c                     |  12 +++
 fs/f2fs/gc.c                       |   9 ++-
 fs/f2fs/inode.c                    |   6 +-
 fs/f2fs/namei.c                    |  19 +++++
 fs/f2fs/segment.c                  |  75 +++++++++++++++++-
 fs/f2fs/segment.h                  |  15 ++++
 fs/f2fs/super.c                    | 119 ++++++++++++++++++++++++++++-
 include/linux/f2fs_fs.h            |   1 +
 13 files changed, 296 insertions(+), 13 deletions(-)

diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt
index bde3e91e5372..e46c2147ddf8 100644
--- a/Documentation/filesystems/f2fs.txt
+++ b/Documentation/filesystems/f2fs.txt
@@ -212,6 +212,11 @@ fsync_mode=%s          Control the policy of fsync. Currently supports "posix",
                        non-atomic files likewise "nobarrier" mount option.
 test_dummy_encryption  Enable dummy encryption, which provides a fake fscrypt
                        context. The fake fscrypt context is used by xfstests.
+checkpoint=%s          Set to "disable" to turn off checkpointing. Set to "enable"
+                       to reenable checkpointing. Is enabled by default. While
+                       disabled, any unmounting or unexpected shutdowns will cause
+                       the filesystem contents to appear as they did when the
+                       filesystem was mounted with that option.
 
 ================================================================================
 DEBUGFS ENTRIES
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index d312d2829d5a..01e0d8f5bbbe 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -1214,6 +1214,11 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 	if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
 		__set_ckpt_flags(ckpt, CP_FSCK_FLAG);
 
+	if (is_sbi_flag_set(sbi, SBI_CP_DISABLED))
+		__set_ckpt_flags(ckpt, CP_DISABLED_FLAG);
+	else
+		__clear_ckpt_flags(ckpt, CP_DISABLED_FLAG);
+
 	/* set this flag to activate crc|cp_ver for recovery */
 	__set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG);
 	__clear_ckpt_flags(ckpt, CP_NOCRC_RECOVERY_FLAG);
@@ -1421,6 +1426,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 
 	clear_sbi_flag(sbi, SBI_IS_DIRTY);
 	clear_sbi_flag(sbi, SBI_NEED_CP);
+	sbi->unusable_block_count = 0;
 	__set_cp_next_pack(sbi);
 
 	/*
@@ -1445,6 +1451,16 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 	unsigned long long ckpt_ver;
 	int err = 0;
 
+	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
+		if (cpc->reason != CP_PAUSE) {
+			if (cpc->reason != CP_UMOUNT)
+				f2fs_msg(sbi->sb, KERN_WARNING,
+					"Wrong disabled checkpoint!");
+			return 0;
+		}
+		f2fs_msg(sbi->sb, KERN_WARNING,
+				"Start checkpoint disabled!");
+	}
 	mutex_lock(&sbi->cp_mutex);
 
 	if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) &&
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index cb3ebcae0398..dc66c1948d8c 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -537,7 +537,8 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
 	if (fio->in_list)
 		goto next;
 out:
-	if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN))
+	if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) ||
+				f2fs_is_checkpoint_ready(sbi))
 		__submit_merged_bio(io);
 	up_write(&io->io_rwsem);
 }
@@ -1695,6 +1696,10 @@ static inline bool check_inplace_update_policy(struct inode *inode,
 			is_inode_flag_set(inode, FI_NEED_IPU))
 		return true;
 
+	if (unlikely(fio && is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
+			!f2fs_is_checkpointed_data(sbi, fio->old_blkaddr)))
+		return true;
+
 	return false;
 }
 
@@ -1725,6 +1730,9 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
 			return true;
 		if (IS_ATOMIC_WRITTEN_PAGE(fio->page))
 			return true;
+		if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
+			f2fs_is_checkpointed_data(sbi, fio->old_blkaddr)))
+			return true;
 	}
 	return false;
 }
@@ -2345,6 +2353,10 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
 
 	trace_f2fs_write_begin(inode, pos, len, flags);
 
+	err = f2fs_is_checkpoint_ready(sbi);
+	if (err)
+		goto fail;
+
 	if ((f2fs_is_atomic_file(inode) &&
 			!f2fs_available_free_memory(sbi, INMEM_PAGES)) ||
 			is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST)) {
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index d3c402183e3c..cb9e7679124b 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -267,7 +267,8 @@ static int stat_show(struct seq_file *s, void *v)
 		seq_printf(s, "\n=====[ partition info(%pg). #%d, %s, CP: %s]=====\n",
 			si->sbi->sb->s_bdev, i++,
 			f2fs_readonly(si->sbi->sb) ? "RO": "RW",
-			f2fs_cp_error(si->sbi) ? "Error": "Good");
+			is_set_ckpt_flags(si->sbi, CP_DISABLED_FLAG) ?
+			"Disabled": (f2fs_cp_error(si->sbi) ? "Error": "Good"));
 		seq_printf(s, "[SB: 1] [CP: 2] [SIT: %d] [NAT: %d] ",
 			   si->sit_area_segs, si->nat_area_segs);
 		seq_printf(s, "[SSA: %d] [MAIN: %d",
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index b32fd9f7da95..89ff070ee95a 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -98,6 +98,7 @@ extern char *f2fs_fault_name[FAULT_MAX];
 #define F2FS_MOUNT_QUOTA		0x00400000
 #define F2FS_MOUNT_INLINE_XATTR_SIZE	0x00800000
 #define F2FS_MOUNT_RESERVE_ROOT		0x01000000
+#define F2FS_MOUNT_DISABLE_CHECKPOINT	0x02000000
 
 #define F2FS_OPTION(sbi)	((sbi)->mount_opt)
 #define clear_opt(sbi, option)	(F2FS_OPTION(sbi).opt &= ~F2FS_MOUNT_##option)
@@ -176,6 +177,7 @@ enum {
 #define	CP_RECOVERY	0x00000008
 #define	CP_DISCARD	0x00000010
 #define CP_TRIMMED	0x00000020
+#define CP_PAUSE	0x00000040
 
 #define MAX_DISCARD_BLOCKS(sbi)		BLKS_PER_SEC(sbi)
 #define DEF_MAX_DISCARD_REQUEST		8	/* issue 8 discards per round */
@@ -1089,6 +1091,7 @@ enum {
 	SBI_NEED_CP,				/* need to checkpoint */
 	SBI_IS_SHUTDOWN,			/* shutdown by ioctl */
 	SBI_IS_RECOVERED,			/* recovered orphan/data */
+	SBI_CP_DISABLED,			/* CP was disabled last mount */
 };
 
 enum {
@@ -1222,6 +1225,9 @@ struct f2fs_sb_info {
 	block_t reserved_blocks;		/* configurable reserved blocks */
 	block_t current_reserved_blocks;	/* current reserved blocks */
 
+	/* Additional tracking for no checkpoint mode */
+	block_t unusable_block_count;		/* # of blocks saved by last cp */
+
 	unsigned int nquota_files;		/* # of quota sysfile */
 
 	u32 s_next_generation;			/* for NFS support */
@@ -1729,7 +1735,8 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
 
 	if (!__allow_reserved_blocks(sbi, inode, true))
 		avail_user_block_count -= F2FS_OPTION(sbi).root_reserved_blocks;
-
+	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+		avail_user_block_count -= sbi->unusable_block_count;
 	if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) {
 		diff = sbi->total_valid_block_count - avail_user_block_count;
 		if (diff > *count)
@@ -1936,6 +1943,8 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
 
 	if (!__allow_reserved_blocks(sbi, inode, false))
 		valid_block_count += F2FS_OPTION(sbi).root_reserved_blocks;
+	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+		valid_block_count += sbi->unusable_block_count;
 
 	if (unlikely(valid_block_count > sbi->user_block_count)) {
 		spin_unlock(&sbi->stat_lock);
@@ -2939,6 +2948,7 @@ void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi);
 bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi);
 void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
 					struct cp_control *cpc);
+void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi);
 void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi);
 int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra);
 void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi);
@@ -3469,7 +3479,8 @@ static inline bool f2fs_may_encrypt(struct inode *inode)
 static inline bool f2fs_force_buffered_io(struct inode *inode, int rw)
 {
 	return (f2fs_post_read_required(inode) ||
-			(rw == WRITE && test_opt(F2FS_I_SB(inode), LFS)) ||
+			(rw == WRITE && (test_opt(F2FS_I_SB(inode), LFS) ||
+			is_sbi_flag_set(F2FS_I_SB(inode), SBI_CP_DISABLED))) ||
 			F2FS_I_SB(inode)->s_ndevs);
 }
 
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 357422a4c319..3f092bde0240 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -146,6 +146,9 @@ static inline enum cp_reason_type need_do_checkpoint(struct inode *inode)
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 	enum cp_reason_type cp_reason = CP_NO_NEEDED;
 
+	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+		return CP_NO_NEEDED;
+
 	if (!S_ISREG(inode->i_mode))
 		cp_reason = CP_NON_REGULAR;
 	else if (inode->i_nlink != 1)
@@ -2158,6 +2161,12 @@ static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg)
 	if (f2fs_readonly(sbi->sb))
 		return -EROFS;
 
+	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
+		f2fs_msg(sbi->sb, KERN_INFO,
+			"Skipping Checkpoint. Checkpoints currently disabled.");
+		return -EINVAL;
+	}
+
 	ret = mnt_want_write_file(filp);
 	if (ret)
 		return ret;
@@ -2529,6 +2538,9 @@ static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg)
 	if (f2fs_readonly(sbi->sb))
 		return -EROFS;
 
+	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+		return -EINVAL;
+
 	if (copy_from_user(&range, (struct f2fs_flush_device __user *)arg,
 							sizeof(range)))
 		return -EFAULT;
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index c051dc4ddf1a..5d2fa37472be 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -362,6 +362,10 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
 
 		if (sec_usage_check(sbi, secno))
 			goto next;
+		/* Don't touch checkpointed data */
+		if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
+					get_ckpt_valid_blocks(sbi, segno)))
+			goto next;
 		if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap))
 			goto next;
 
@@ -1169,7 +1173,8 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
 		 * threshold, we can make them free by checkpoint. Then, we
 		 * secure free segments which doesn't need fggc any more.
 		 */
-		if (prefree_segments(sbi)) {
+		if (prefree_segments(sbi) &&
+				!is_sbi_flag_set(sbi, SBI_CP_DISABLED)) {
 			ret = f2fs_write_checkpoint(sbi, &cpc);
 			if (ret)
 				goto stop;
@@ -1221,7 +1226,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
 			segno = NULL_SEGNO;
 			goto gc_more;
 		}
-		if (gc_type == FG_GC)
+		if (gc_type == FG_GC && !is_sbi_flag_set(sbi, SBI_CP_DISABLED))
 			ret = f2fs_write_checkpoint(sbi, &cpc);
 	}
 stop:
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 86e7333d60c1..4ee9d6c4b719 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -607,6 +607,9 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
 	if (!is_inode_flag_set(inode, FI_DIRTY_INODE))
 		return 0;
 
+	if (f2fs_is_checkpoint_ready(sbi))
+		return -ENOSPC;
+
 	/*
 	 * We need to balance fs here to prevent from producing dirty node pages
 	 * during the urgent cleaning time when runing out of free sections.
@@ -688,7 +691,8 @@ void f2fs_evict_inode(struct inode *inode)
 	stat_dec_inline_dir(inode);
 	stat_dec_inline_inode(inode);
 
-	if (likely(!is_set_ckpt_flags(sbi, CP_ERROR_FLAG)))
+	if (likely(!is_set_ckpt_flags(sbi, CP_ERROR_FLAG) &&
+				!is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
 		f2fs_bug_on(sbi, is_inode_flag_set(inode, FI_DIRTY_INODE));
 	else
 		f2fs_inode_synced(inode);
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index ded185baa9ae..a14632744a6a 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -16,6 +16,7 @@
 
 #include "f2fs.h"
 #include "node.h"
+#include "segment.h"
 #include "xattr.h"
 #include "acl.h"
 #include <trace/events/f2fs.h>
@@ -269,6 +270,9 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 
 	if (unlikely(f2fs_cp_error(sbi)))
 		return -EIO;
+	err = f2fs_is_checkpoint_ready(sbi);
+	if (err)
+		return err;
 
 	err = dquot_initialize(dir);
 	if (err)
@@ -315,6 +319,9 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
 
 	if (unlikely(f2fs_cp_error(sbi)))
 		return -EIO;
+	err = f2fs_is_checkpoint_ready(sbi);
+	if (err)
+		return err;
 
 	err = fscrypt_prepare_link(old_dentry, dir, dentry);
 	if (err)
@@ -561,6 +568,9 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
 
 	if (unlikely(f2fs_cp_error(sbi)))
 		return -EIO;
+	err = f2fs_is_checkpoint_ready(sbi);
+	if (err)
+		return err;
 
 	err = fscrypt_prepare_symlink(dir, symname, len, dir->i_sb->s_blocksize,
 				      &disk_link);
@@ -690,6 +700,9 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
 
 	if (unlikely(f2fs_cp_error(sbi)))
 		return -EIO;
+	err = f2fs_is_checkpoint_ready(sbi);
+	if (err)
+		return err;
 
 	err = dquot_initialize(dir);
 	if (err)
@@ -824,6 +837,9 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
 
 	if (unlikely(f2fs_cp_error(sbi)))
 		return -EIO;
+	err = f2fs_is_checkpoint_ready(sbi);
+	if (err)
+		return err;
 
 	if (is_inode_flag_set(new_dir, FI_PROJ_INHERIT) &&
 			(!projid_eq(F2FS_I(new_dir)->i_projid,
@@ -1014,6 +1030,9 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
 
 	if (unlikely(f2fs_cp_error(sbi)))
 		return -EIO;
+	err = f2fs_is_checkpoint_ready(sbi);
+	if (err)
+		return err;
 
 	if ((is_inode_flag_set(new_dir, FI_PROJ_INHERIT) &&
 			!projid_eq(F2FS_I(new_dir)->i_projid,
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 97a4fae75651..471aa8821d4c 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -176,6 +176,8 @@ bool f2fs_need_SSR(struct f2fs_sb_info *sbi)
 		return false;
 	if (sbi->gc_mode == GC_URGENT)
 		return true;
+	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+		return true;
 
 	return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
 			SM_I(sbi)->min_ssr_sections + reserved_sections(sbi));
@@ -480,6 +482,9 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
 	if (need && excess_cached_nats(sbi))
 		f2fs_balance_fs_bg(sbi);
 
+	if (f2fs_is_checkpoint_ready(sbi))
+		return;
+
 	/*
 	 * We should do GC or end up with checkpoint, if there are so many dirty
 	 * dir/node pages without enough free segments.
@@ -796,7 +801,7 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
 static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
 {
 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
-	unsigned short valid_blocks;
+	unsigned short valid_blocks, ckpt_valid_blocks;
 
 	if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno))
 		return;
@@ -804,8 +809,10 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
 	mutex_lock(&dirty_i->seglist_lock);
 
 	valid_blocks = get_valid_blocks(sbi, segno, false);
+	ckpt_valid_blocks = get_ckpt_valid_blocks(sbi, segno);
 
-	if (valid_blocks == 0) {
+	if (valid_blocks == 0 && (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) ||
+				ckpt_valid_blocks == sbi->blocks_per_seg)) {
 		__locate_dirty_segment(sbi, segno, PRE);
 		__remove_dirty_segment(sbi, segno, DIRTY);
 	} else if (valid_blocks < sbi->blocks_per_seg) {
@@ -818,6 +825,43 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
 	mutex_unlock(&dirty_i->seglist_lock);
 }
 
+/* This moves currently empty dirty blocks to prefree. Must hold seglist_lock */
+void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi)
+{
+	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+	unsigned int segno;
+
+	mutex_lock(&dirty_i->seglist_lock);
+	for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
+		if (get_valid_blocks(sbi, segno, false))
+			continue;
+		if (IS_CURSEG(sbi, segno))
+			continue;
+		__locate_dirty_segment(sbi, segno, PRE);
+		__remove_dirty_segment(sbi, segno, DIRTY);
+	}
+	mutex_unlock(&dirty_i->seglist_lock);
+}
+
+/* This is only used by SBI_CP_DISABLED */
+static unsigned int get_free_segment(struct f2fs_sb_info *sbi)
+{
+	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+	unsigned int segno = 0;
+
+	mutex_lock(&dirty_i->seglist_lock);
+	for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
+		if (get_valid_blocks(sbi, segno, false))
+			continue;
+		if (get_ckpt_valid_blocks(sbi, segno))
+			continue;
+		mutex_unlock(&dirty_i->seglist_lock);
+		return segno;
+	}
+	mutex_unlock(&dirty_i->seglist_lock);
+	return NULL_SEGNO;
+}
+
 static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi,
 		struct block_device *bdev, block_t lstart,
 		block_t start, block_t len)
@@ -2028,7 +2072,8 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
 			sbi->discard_blks--;
 
 		/* don't overwrite by SSR to keep node chain */
-		if (IS_NODESEG(se->type)) {
+		if (IS_NODESEG(se->type) &&
+				!is_sbi_flag_set(sbi, SBI_CP_DISABLED)) {
 			if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map))
 				se->ckpt_valid_blocks++;
 		}
@@ -2050,6 +2095,15 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
 			f2fs_bug_on(sbi, 1);
 			se->valid_blocks++;
 			del = 0;
+		} else if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
+			/*
+			 * If checkpoints are off, we must not reuse data that
+			 * was used in the previous checkpoint. If it was used
+			 * before, we must track that to know how much space we
+			 * really have.
+			 */
+			if (f2fs_test_bit(offset, se->ckpt_valid_map))
+				sbi->unusable_block_count++;
 		}
 
 		if (f2fs_test_and_clear_bit(offset, se->discard_map))
@@ -2332,6 +2386,9 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
 	if (sbi->segs_per_sec != 1)
 		return CURSEG_I(sbi, type)->segno;
 
+	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+		return 0;
+
 	if (test_opt(sbi, NOHEAP) &&
 		(type == CURSEG_HOT_DATA || IS_NODESEG(type)))
 		return 0;
@@ -2475,6 +2532,15 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
 			return 1;
 		}
 	}
+
+	/* find valid_blocks=0 in dirty list */
+	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
+		segno = get_free_segment(sbi);
+		if (segno != NULL_SEGNO) {
+			curseg->next_segno = segno;
+			return 1;
+		}
+	}
 	return 0;
 }
 
@@ -2492,7 +2558,8 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
 	else if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) &&
 					type == CURSEG_WARM_NODE)
 		new_curseg(sbi, type, false);
-	else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type))
+	else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type) &&
+			likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
 		new_curseg(sbi, type, false);
 	else if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type))
 		change_curseg(sbi, type);
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 086150028c6d..ab3465faddf1 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -339,6 +339,12 @@ static inline unsigned int get_valid_blocks(struct f2fs_sb_info *sbi,
 		return get_seg_entry(sbi, segno)->valid_blocks;
 }
 
+static inline unsigned int get_ckpt_valid_blocks(struct f2fs_sb_info *sbi,
+				unsigned int segno)
+{
+	return get_seg_entry(sbi, segno)->ckpt_valid_blocks;
+}
+
 static inline void seg_info_from_raw_sit(struct seg_entry *se,
 					struct f2fs_sit_entry *rs)
 {
@@ -576,6 +582,15 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi,
 		reserved_sections(sbi) + needed);
 }
 
+static inline int f2fs_is_checkpoint_ready(struct f2fs_sb_info *sbi)
+{
+	if (likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+		return 0;
+	if (likely(!has_not_enough_free_secs(sbi, 0, 0)))
+		return 0;
+	return -ENOSPC;
+}
+
 static inline bool excess_prefree_segs(struct f2fs_sb_info *sbi)
 {
 	return prefree_segments(sbi) > SM_I(sbi)->rec_prefree_segments;
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index c47b1ef2685a..081d0d550468 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -136,6 +136,7 @@ enum {
 	Opt_alloc,
 	Opt_fsync,
 	Opt_test_dummy_encryption,
+	Opt_checkpoint,
 	Opt_err,
 };
 
@@ -194,6 +195,7 @@ static match_table_t f2fs_tokens = {
 	{Opt_alloc, "alloc_mode=%s"},
 	{Opt_fsync, "fsync_mode=%s"},
 	{Opt_test_dummy_encryption, "test_dummy_encryption"},
+	{Opt_checkpoint, "checkpoint=%s"},
 	{Opt_err, NULL},
 };
 
@@ -769,6 +771,23 @@ static int parse_options(struct super_block *sb, char *options)
 					"Test dummy encryption mount option ignored");
 #endif
 			break;
+		case Opt_checkpoint:
+			name = match_strdup(&args[0]);
+			if (!name)
+				return -ENOMEM;
+
+			if (strlen(name) == 6 &&
+					!strncmp(name, "enable", 6)) {
+				clear_opt(sbi, DISABLE_CHECKPOINT);
+			} else if (strlen(name) == 7 &&
+					!strncmp(name, "disable", 7)) {
+				set_opt(sbi, DISABLE_CHECKPOINT);
+			} else {
+				kfree(name);
+				return -EINVAL;
+			}
+			kfree(name);
+			break;
 		default:
 			f2fs_msg(sb, KERN_ERR,
 				"Unrecognized mount option \"%s\" or missing value",
@@ -827,6 +846,12 @@ static int parse_options(struct super_block *sb, char *options)
 		}
 	}
 
+	if (test_opt(sbi, DISABLE_CHECKPOINT) && test_opt(sbi, LFS)) {
+		f2fs_msg(sb, KERN_ERR,
+				"LFS not compatible with checkpoint=disable\n");
+		return -EINVAL;
+	}
+
 	/* Not pass down write hints if the number of active logs is lesser
 	 * than NR_CURSEG_TYPE.
 	 */
@@ -1014,8 +1039,8 @@ static void f2fs_put_super(struct super_block *sb)
 	 * But, the previous checkpoint was not done by umount, it needs to do
 	 * clean checkpoint again.
 	 */
-	if (is_sbi_flag_set(sbi, SBI_IS_DIRTY) ||
-			!is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) {
+	if ((is_sbi_flag_set(sbi, SBI_IS_DIRTY) ||
+			!is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG))) {
 		struct cp_control cpc = {
 			.reason = CP_UMOUNT,
 		};
@@ -1087,6 +1112,8 @@ int f2fs_sync_fs(struct super_block *sb, int sync)
 
 	if (unlikely(f2fs_cp_error(sbi)))
 		return 0;
+	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+		return 0;
 
 	trace_f2fs_sync_fs(sb, sync);
 
@@ -1186,6 +1213,11 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	buf->f_blocks = total_count - start_count;
 	buf->f_bfree = user_block_count - valid_user_blocks(sbi) -
 						sbi->current_reserved_blocks;
+	if (unlikely(buf->f_bfree <= sbi->unusable_block_count))
+		buf->f_bfree = 0;
+	else
+		buf->f_bfree -= sbi->unusable_block_count;
+
 	if (buf->f_bfree > F2FS_OPTION(sbi).root_reserved_blocks)
 		buf->f_bavail = buf->f_bfree -
 				F2FS_OPTION(sbi).root_reserved_blocks;
@@ -1365,6 +1397,9 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
 	else if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE)
 		seq_printf(seq, ",alloc_mode=%s", "reuse");
 
+	if (test_opt(sbi, DISABLE_CHECKPOINT))
+		seq_puts(seq, ",checkpoint=disable");
+
 	if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_POSIX)
 		seq_printf(seq, ",fsync_mode=%s", "posix");
 	else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT)
@@ -1392,6 +1427,7 @@ static void default_options(struct f2fs_sb_info *sbi)
 	set_opt(sbi, INLINE_DENTRY);
 	set_opt(sbi, EXTENT_CACHE);
 	set_opt(sbi, NOHEAP);
+	clear_opt(sbi, DISABLE_CHECKPOINT);
 	sbi->sb->s_flags |= SB_LAZYTIME;
 	set_opt(sbi, FLUSH_MERGE);
 	set_opt(sbi, DISCARD);
@@ -1413,6 +1449,52 @@ static void default_options(struct f2fs_sb_info *sbi)
 #ifdef CONFIG_QUOTA
 static int f2fs_enable_quotas(struct super_block *sb);
 #endif
+
+static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
+{
+	struct cp_control cpc;
+	int err;
+
+	sbi->sb->s_flags |= SB_ACTIVE;
+
+	mutex_lock(&sbi->gc_mutex);
+	for (; dirty_segments(sbi) > 2 * NR_CURSEG_TYPE;) {
+		err = f2fs_gc(sbi, true, false, NULL_SEGNO);
+		if (err == -ENODATA)
+			break;
+		if (err && err != -EAGAIN) {
+			mutex_unlock(&sbi->gc_mutex);
+			return err;
+		}
+	}
+	mutex_unlock(&sbi->gc_mutex);
+
+	err = sync_filesystem(sbi->sb);
+	if (err)
+		return err;
+
+	mutex_lock(&sbi->gc_mutex);
+	cpc.reason = CP_PAUSE;
+	set_sbi_flag(sbi, SBI_CP_DISABLED);
+	f2fs_write_checkpoint(sbi, &cpc);
+
+	sbi->unusable_block_count = 0;
+	mutex_unlock(&sbi->gc_mutex);
+	return 0;
+}
+
+static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi)
+{
+	mutex_lock(&sbi->gc_mutex);
+	f2fs_dirty_to_prefree(sbi);
+
+	clear_sbi_flag(sbi, SBI_CP_DISABLED);
+	set_sbi_flag(sbi, SBI_IS_DIRTY);
+	mutex_unlock(&sbi->gc_mutex);
+
+	f2fs_sync_fs(sbi->sb, 1);
+}
+
 static int f2fs_remount(struct super_block *sb, int *flags, char *data)
 {
 	struct f2fs_sb_info *sbi = F2FS_SB(sb);
@@ -1422,6 +1504,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
 	bool need_restart_gc = false;
 	bool need_stop_gc = false;
 	bool no_extent_cache = !test_opt(sbi, EXTENT_CACHE);
+	bool disable_checkpoint = test_opt(sbi, DISABLE_CHECKPOINT);
+	bool checkpoint_changed;
 #ifdef CONFIG_QUOTA
 	int i, j;
 #endif
@@ -1466,6 +1550,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
 	err = parse_options(sb, data);
 	if (err)
 		goto restore_opts;
+	checkpoint_changed =
+			disable_checkpoint != test_opt(sbi, DISABLE_CHECKPOINT);
 
 	/*
 	 * Previous and new state of filesystem is RO,
@@ -1499,6 +1585,13 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
 		goto restore_opts;
 	}
 
+	if ((*flags & MS_RDONLY) && test_opt(sbi, DISABLE_CHECKPOINT)) {
+		err = -EINVAL;
+		f2fs_msg(sbi->sb, KERN_WARNING,
+			"disabling checkpoint not compatible with read-only");
+		goto restore_opts;
+	}
+
 	/*
 	 * We stop the GC thread if FS is mounted as RO
 	 * or if background_gc = off is passed in mount
@@ -1527,6 +1620,16 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
 		clear_sbi_flag(sbi, SBI_IS_CLOSE);
 	}
 
+	if (checkpoint_changed) {
+		if (test_opt(sbi, DISABLE_CHECKPOINT)) {
+			err = f2fs_disable_checkpoint(sbi);
+			if (err)
+				goto restore_gc;
+		} else {
+			f2fs_enable_checkpoint(sbi);
+		}
+	}
+
 	/*
 	 * We stop issue flush thread if FS is mounted as RO
 	 * or if flush_merge is not passed in mount option.
@@ -3064,6 +3167,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 	if (err)
 		goto free_meta;
 
+	if (unlikely(is_set_ckpt_flags(sbi, CP_DISABLED_FLAG)))
+		goto skip_recovery;
+
 	/* recover fsynced data */
 	if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) {
 		/*
@@ -3103,6 +3209,14 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 	/* f2fs_recover_fsync_data() cleared this already */
 	clear_sbi_flag(sbi, SBI_POR_DOING);
 
+	if (test_opt(sbi, DISABLE_CHECKPOINT)) {
+		err = f2fs_disable_checkpoint(sbi);
+		if (err)
+			goto free_meta;
+	} else if (is_set_ckpt_flags(sbi, CP_DISABLED_FLAG)) {
+		f2fs_enable_checkpoint(sbi);
+	}
+
 	/*
 	 * If filesystem is not mounted as read-only then
 	 * do start the gc_thread.
@@ -3131,6 +3245,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 				cur_cp_version(F2FS_CKPT(sbi)));
 	f2fs_update_time(sbi, CP_TIME);
 	f2fs_update_time(sbi, REQ_TIME);
+
 	return 0;
 
 free_meta:
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index 1d4b196291d6..9e95bbce5dd4 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -115,6 +115,7 @@ struct f2fs_super_block {
 /*
  * For checkpoint
  */
+#define CP_DISABLED_FLAG		0x00001000
 #define CP_LARGE_NAT_BITMAP_FLAG	0x00000400
 #define CP_NOCRC_RECOVERY_FLAG	0x00000200
 #define CP_TRIMMED_FLAG		0x00000100
-- 
2.17.0.441.gb46fe60e1d-goog


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] f2fs: checkpoint disabling
  2018-09-26 19:40 [PATCH] f2fs: checkpoint disabling Jaegeuk Kim
@ 2018-09-28 17:36 ` Jaegeuk Kim
  0 siblings, 0 replies; 11+ messages in thread
From: Jaegeuk Kim @ 2018-09-28 17:36 UTC (permalink / raw)
  To: linux-kernel, linux-f2fs-devel; +Cc: Daniel Rosenberg

Note that, it requires "f2fs: return correct errno in f2fs_gc".

This adds a lightweight non-persistent snapshotting scheme to f2fs.

To use, mount with the option checkpoint=disable, and to return to
normal operation, remount with checkpoint=enable. If the filesystem
is shut down before remounting with checkpoint=enable, it will revert
back to its apparent state when it was first mounted with
checkpoint=disable. This is useful for situations where you wish to be
able to roll back the state of the disk in case of some critical
failure.

Signed-off-by: Daniel Rosenberg <drosen@google.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
Change log from v1:
 - fix fsync loop

 Documentation/filesystems/f2fs.txt |   5 ++
 fs/f2fs/checkpoint.c               |  16 ++++
 fs/f2fs/data.c                     |  14 +++-
 fs/f2fs/debug.c                    |   3 +-
 fs/f2fs/f2fs.h                     |  15 +++-
 fs/f2fs/file.c                     |  12 ++-
 fs/f2fs/gc.c                       |   9 ++-
 fs/f2fs/inode.c                    |   6 +-
 fs/f2fs/namei.c                    |  19 +++++
 fs/f2fs/segment.c                  |  75 +++++++++++++++++-
 fs/f2fs/segment.h                  |  15 ++++
 fs/f2fs/super.c                    | 118 ++++++++++++++++++++++++++++-
 include/linux/f2fs_fs.h            |   1 +
 13 files changed, 294 insertions(+), 14 deletions(-)

diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt
index bde3e91e5372..e46c2147ddf8 100644
--- a/Documentation/filesystems/f2fs.txt
+++ b/Documentation/filesystems/f2fs.txt
@@ -212,6 +212,11 @@ fsync_mode=%s          Control the policy of fsync. Currently supports "posix",
                        non-atomic files likewise "nobarrier" mount option.
 test_dummy_encryption  Enable dummy encryption, which provides a fake fscrypt
                        context. The fake fscrypt context is used by xfstests.
+checkpoint=%s          Set to "disable" to turn off checkpointing. Set to "enable"
+                       to reenable checkpointing. Is enabled by default. While
+                       disabled, any unmounting or unexpected shutdowns will cause
+                       the filesystem contents to appear as they did when the
+                       filesystem was mounted with that option.
 
 ================================================================================
 DEBUGFS ENTRIES
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index d312d2829d5a..01e0d8f5bbbe 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -1214,6 +1214,11 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 	if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
 		__set_ckpt_flags(ckpt, CP_FSCK_FLAG);
 
+	if (is_sbi_flag_set(sbi, SBI_CP_DISABLED))
+		__set_ckpt_flags(ckpt, CP_DISABLED_FLAG);
+	else
+		__clear_ckpt_flags(ckpt, CP_DISABLED_FLAG);
+
 	/* set this flag to activate crc|cp_ver for recovery */
 	__set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG);
 	__clear_ckpt_flags(ckpt, CP_NOCRC_RECOVERY_FLAG);
@@ -1421,6 +1426,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 
 	clear_sbi_flag(sbi, SBI_IS_DIRTY);
 	clear_sbi_flag(sbi, SBI_NEED_CP);
+	sbi->unusable_block_count = 0;
 	__set_cp_next_pack(sbi);
 
 	/*
@@ -1445,6 +1451,16 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 	unsigned long long ckpt_ver;
 	int err = 0;
 
+	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
+		if (cpc->reason != CP_PAUSE) {
+			if (cpc->reason != CP_UMOUNT)
+				f2fs_msg(sbi->sb, KERN_WARNING,
+					"Wrong disabled checkpoint!");
+			return 0;
+		}
+		f2fs_msg(sbi->sb, KERN_WARNING,
+				"Start checkpoint disabled!");
+	}
 	mutex_lock(&sbi->cp_mutex);
 
 	if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) &&
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index cb3ebcae0398..dc66c1948d8c 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -537,7 +537,8 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
 	if (fio->in_list)
 		goto next;
 out:
-	if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN))
+	if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) ||
+				f2fs_is_checkpoint_ready(sbi))
 		__submit_merged_bio(io);
 	up_write(&io->io_rwsem);
 }
@@ -1695,6 +1696,10 @@ static inline bool check_inplace_update_policy(struct inode *inode,
 			is_inode_flag_set(inode, FI_NEED_IPU))
 		return true;
 
+	if (unlikely(fio && is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
+			!f2fs_is_checkpointed_data(sbi, fio->old_blkaddr)))
+		return true;
+
 	return false;
 }
 
@@ -1725,6 +1730,9 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
 			return true;
 		if (IS_ATOMIC_WRITTEN_PAGE(fio->page))
 			return true;
+		if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
+			f2fs_is_checkpointed_data(sbi, fio->old_blkaddr)))
+			return true;
 	}
 	return false;
 }
@@ -2345,6 +2353,10 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
 
 	trace_f2fs_write_begin(inode, pos, len, flags);
 
+	err = f2fs_is_checkpoint_ready(sbi);
+	if (err)
+		goto fail;
+
 	if ((f2fs_is_atomic_file(inode) &&
 			!f2fs_available_free_memory(sbi, INMEM_PAGES)) ||
 			is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST)) {
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index d3c402183e3c..cb9e7679124b 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -267,7 +267,8 @@ static int stat_show(struct seq_file *s, void *v)
 		seq_printf(s, "\n=====[ partition info(%pg). #%d, %s, CP: %s]=====\n",
 			si->sbi->sb->s_bdev, i++,
 			f2fs_readonly(si->sbi->sb) ? "RO": "RW",
-			f2fs_cp_error(si->sbi) ? "Error": "Good");
+			is_set_ckpt_flags(si->sbi, CP_DISABLED_FLAG) ?
+			"Disabled": (f2fs_cp_error(si->sbi) ? "Error": "Good"));
 		seq_printf(s, "[SB: 1] [CP: 2] [SIT: %d] [NAT: %d] ",
 			   si->sit_area_segs, si->nat_area_segs);
 		seq_printf(s, "[SSA: %d] [MAIN: %d",
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index b32fd9f7da95..89ff070ee95a 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -98,6 +98,7 @@ extern char *f2fs_fault_name[FAULT_MAX];
 #define F2FS_MOUNT_QUOTA		0x00400000
 #define F2FS_MOUNT_INLINE_XATTR_SIZE	0x00800000
 #define F2FS_MOUNT_RESERVE_ROOT		0x01000000
+#define F2FS_MOUNT_DISABLE_CHECKPOINT	0x02000000
 
 #define F2FS_OPTION(sbi)	((sbi)->mount_opt)
 #define clear_opt(sbi, option)	(F2FS_OPTION(sbi).opt &= ~F2FS_MOUNT_##option)
@@ -176,6 +177,7 @@ enum {
 #define	CP_RECOVERY	0x00000008
 #define	CP_DISCARD	0x00000010
 #define CP_TRIMMED	0x00000020
+#define CP_PAUSE	0x00000040
 
 #define MAX_DISCARD_BLOCKS(sbi)		BLKS_PER_SEC(sbi)
 #define DEF_MAX_DISCARD_REQUEST		8	/* issue 8 discards per round */
@@ -1089,6 +1091,7 @@ enum {
 	SBI_NEED_CP,				/* need to checkpoint */
 	SBI_IS_SHUTDOWN,			/* shutdown by ioctl */
 	SBI_IS_RECOVERED,			/* recovered orphan/data */
+	SBI_CP_DISABLED,			/* CP was disabled last mount */
 };
 
 enum {
@@ -1222,6 +1225,9 @@ struct f2fs_sb_info {
 	block_t reserved_blocks;		/* configurable reserved blocks */
 	block_t current_reserved_blocks;	/* current reserved blocks */
 
+	/* Additional tracking for no checkpoint mode */
+	block_t unusable_block_count;		/* # of blocks saved by last cp */
+
 	unsigned int nquota_files;		/* # of quota sysfile */
 
 	u32 s_next_generation;			/* for NFS support */
@@ -1729,7 +1735,8 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
 
 	if (!__allow_reserved_blocks(sbi, inode, true))
 		avail_user_block_count -= F2FS_OPTION(sbi).root_reserved_blocks;
-
+	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+		avail_user_block_count -= sbi->unusable_block_count;
 	if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) {
 		diff = sbi->total_valid_block_count - avail_user_block_count;
 		if (diff > *count)
@@ -1936,6 +1943,8 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
 
 	if (!__allow_reserved_blocks(sbi, inode, false))
 		valid_block_count += F2FS_OPTION(sbi).root_reserved_blocks;
+	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+		valid_block_count += sbi->unusable_block_count;
 
 	if (unlikely(valid_block_count > sbi->user_block_count)) {
 		spin_unlock(&sbi->stat_lock);
@@ -2939,6 +2948,7 @@ void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi);
 bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi);
 void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
 					struct cp_control *cpc);
+void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi);
 void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi);
 int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra);
 void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi);
@@ -3469,7 +3479,8 @@ static inline bool f2fs_may_encrypt(struct inode *inode)
 static inline bool f2fs_force_buffered_io(struct inode *inode, int rw)
 {
 	return (f2fs_post_read_required(inode) ||
-			(rw == WRITE && test_opt(F2FS_I_SB(inode), LFS)) ||
+			(rw == WRITE && (test_opt(F2FS_I_SB(inode), LFS) ||
+			is_sbi_flag_set(F2FS_I_SB(inode), SBI_CP_DISABLED))) ||
 			F2FS_I_SB(inode)->s_ndevs);
 }
 
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 357422a4c319..d68a1b00e3fd 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -211,7 +211,8 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end,
 	};
 	unsigned int seq_id = 0;
 
-	if (unlikely(f2fs_readonly(inode->i_sb)))
+	if (unlikely(f2fs_readonly(inode->i_sb) ||
+				is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
 		return 0;
 
 	trace_f2fs_sync_file_enter(inode);
@@ -2158,6 +2159,12 @@ static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg)
 	if (f2fs_readonly(sbi->sb))
 		return -EROFS;
 
+	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
+		f2fs_msg(sbi->sb, KERN_INFO,
+			"Skipping Checkpoint. Checkpoints currently disabled.");
+		return -EINVAL;
+	}
+
 	ret = mnt_want_write_file(filp);
 	if (ret)
 		return ret;
@@ -2529,6 +2536,9 @@ static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg)
 	if (f2fs_readonly(sbi->sb))
 		return -EROFS;
 
+	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+		return -EINVAL;
+
 	if (copy_from_user(&range, (struct f2fs_flush_device __user *)arg,
 							sizeof(range)))
 		return -EFAULT;
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index c051dc4ddf1a..5d2fa37472be 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -362,6 +362,10 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
 
 		if (sec_usage_check(sbi, secno))
 			goto next;
+		/* Don't touch checkpointed data */
+		if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
+					get_ckpt_valid_blocks(sbi, segno)))
+			goto next;
 		if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap))
 			goto next;
 
@@ -1169,7 +1173,8 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
 		 * threshold, we can make them free by checkpoint. Then, we
 		 * secure free segments which doesn't need fggc any more.
 		 */
-		if (prefree_segments(sbi)) {
+		if (prefree_segments(sbi) &&
+				!is_sbi_flag_set(sbi, SBI_CP_DISABLED)) {
 			ret = f2fs_write_checkpoint(sbi, &cpc);
 			if (ret)
 				goto stop;
@@ -1221,7 +1226,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
 			segno = NULL_SEGNO;
 			goto gc_more;
 		}
-		if (gc_type == FG_GC)
+		if (gc_type == FG_GC && !is_sbi_flag_set(sbi, SBI_CP_DISABLED))
 			ret = f2fs_write_checkpoint(sbi, &cpc);
 	}
 stop:
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 86e7333d60c1..4ee9d6c4b719 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -607,6 +607,9 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
 	if (!is_inode_flag_set(inode, FI_DIRTY_INODE))
 		return 0;
 
+	if (f2fs_is_checkpoint_ready(sbi))
+		return -ENOSPC;
+
 	/*
 	 * We need to balance fs here to prevent from producing dirty node pages
 	 * during the urgent cleaning time when runing out of free sections.
@@ -688,7 +691,8 @@ void f2fs_evict_inode(struct inode *inode)
 	stat_dec_inline_dir(inode);
 	stat_dec_inline_inode(inode);
 
-	if (likely(!is_set_ckpt_flags(sbi, CP_ERROR_FLAG)))
+	if (likely(!is_set_ckpt_flags(sbi, CP_ERROR_FLAG) &&
+				!is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
 		f2fs_bug_on(sbi, is_inode_flag_set(inode, FI_DIRTY_INODE));
 	else
 		f2fs_inode_synced(inode);
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index ded185baa9ae..a14632744a6a 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -16,6 +16,7 @@
 
 #include "f2fs.h"
 #include "node.h"
+#include "segment.h"
 #include "xattr.h"
 #include "acl.h"
 #include <trace/events/f2fs.h>
@@ -269,6 +270,9 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 
 	if (unlikely(f2fs_cp_error(sbi)))
 		return -EIO;
+	err = f2fs_is_checkpoint_ready(sbi);
+	if (err)
+		return err;
 
 	err = dquot_initialize(dir);
 	if (err)
@@ -315,6 +319,9 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
 
 	if (unlikely(f2fs_cp_error(sbi)))
 		return -EIO;
+	err = f2fs_is_checkpoint_ready(sbi);
+	if (err)
+		return err;
 
 	err = fscrypt_prepare_link(old_dentry, dir, dentry);
 	if (err)
@@ -561,6 +568,9 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
 
 	if (unlikely(f2fs_cp_error(sbi)))
 		return -EIO;
+	err = f2fs_is_checkpoint_ready(sbi);
+	if (err)
+		return err;
 
 	err = fscrypt_prepare_symlink(dir, symname, len, dir->i_sb->s_blocksize,
 				      &disk_link);
@@ -690,6 +700,9 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
 
 	if (unlikely(f2fs_cp_error(sbi)))
 		return -EIO;
+	err = f2fs_is_checkpoint_ready(sbi);
+	if (err)
+		return err;
 
 	err = dquot_initialize(dir);
 	if (err)
@@ -824,6 +837,9 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
 
 	if (unlikely(f2fs_cp_error(sbi)))
 		return -EIO;
+	err = f2fs_is_checkpoint_ready(sbi);
+	if (err)
+		return err;
 
 	if (is_inode_flag_set(new_dir, FI_PROJ_INHERIT) &&
 			(!projid_eq(F2FS_I(new_dir)->i_projid,
@@ -1014,6 +1030,9 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
 
 	if (unlikely(f2fs_cp_error(sbi)))
 		return -EIO;
+	err = f2fs_is_checkpoint_ready(sbi);
+	if (err)
+		return err;
 
 	if ((is_inode_flag_set(new_dir, FI_PROJ_INHERIT) &&
 			!projid_eq(F2FS_I(new_dir)->i_projid,
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 97a4fae75651..471aa8821d4c 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -176,6 +176,8 @@ bool f2fs_need_SSR(struct f2fs_sb_info *sbi)
 		return false;
 	if (sbi->gc_mode == GC_URGENT)
 		return true;
+	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+		return true;
 
 	return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
 			SM_I(sbi)->min_ssr_sections + reserved_sections(sbi));
@@ -480,6 +482,9 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
 	if (need && excess_cached_nats(sbi))
 		f2fs_balance_fs_bg(sbi);
 
+	if (f2fs_is_checkpoint_ready(sbi))
+		return;
+
 	/*
 	 * We should do GC or end up with checkpoint, if there are so many dirty
 	 * dir/node pages without enough free segments.
@@ -796,7 +801,7 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
 static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
 {
 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
-	unsigned short valid_blocks;
+	unsigned short valid_blocks, ckpt_valid_blocks;
 
 	if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno))
 		return;
@@ -804,8 +809,10 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
 	mutex_lock(&dirty_i->seglist_lock);
 
 	valid_blocks = get_valid_blocks(sbi, segno, false);
+	ckpt_valid_blocks = get_ckpt_valid_blocks(sbi, segno);
 
-	if (valid_blocks == 0) {
+	if (valid_blocks == 0 && (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) ||
+				ckpt_valid_blocks == sbi->blocks_per_seg)) {
 		__locate_dirty_segment(sbi, segno, PRE);
 		__remove_dirty_segment(sbi, segno, DIRTY);
 	} else if (valid_blocks < sbi->blocks_per_seg) {
@@ -818,6 +825,43 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
 	mutex_unlock(&dirty_i->seglist_lock);
 }
 
+/* This moves currently empty dirty blocks to prefree. Must hold seglist_lock */
+void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi)
+{
+	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+	unsigned int segno;
+
+	mutex_lock(&dirty_i->seglist_lock);
+	for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
+		if (get_valid_blocks(sbi, segno, false))
+			continue;
+		if (IS_CURSEG(sbi, segno))
+			continue;
+		__locate_dirty_segment(sbi, segno, PRE);
+		__remove_dirty_segment(sbi, segno, DIRTY);
+	}
+	mutex_unlock(&dirty_i->seglist_lock);
+}
+
+/* This is only used by SBI_CP_DISABLED */
+static unsigned int get_free_segment(struct f2fs_sb_info *sbi)
+{
+	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+	unsigned int segno = 0;
+
+	mutex_lock(&dirty_i->seglist_lock);
+	for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
+		if (get_valid_blocks(sbi, segno, false))
+			continue;
+		if (get_ckpt_valid_blocks(sbi, segno))
+			continue;
+		mutex_unlock(&dirty_i->seglist_lock);
+		return segno;
+	}
+	mutex_unlock(&dirty_i->seglist_lock);
+	return NULL_SEGNO;
+}
+
 static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi,
 		struct block_device *bdev, block_t lstart,
 		block_t start, block_t len)
@@ -2028,7 +2072,8 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
 			sbi->discard_blks--;
 
 		/* don't overwrite by SSR to keep node chain */
-		if (IS_NODESEG(se->type)) {
+		if (IS_NODESEG(se->type) &&
+				!is_sbi_flag_set(sbi, SBI_CP_DISABLED)) {
 			if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map))
 				se->ckpt_valid_blocks++;
 		}
@@ -2050,6 +2095,15 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
 			f2fs_bug_on(sbi, 1);
 			se->valid_blocks++;
 			del = 0;
+		} else if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
+			/*
+			 * If checkpoints are off, we must not reuse data that
+			 * was used in the previous checkpoint. If it was used
+			 * before, we must track that to know how much space we
+			 * really have.
+			 */
+			if (f2fs_test_bit(offset, se->ckpt_valid_map))
+				sbi->unusable_block_count++;
 		}
 
 		if (f2fs_test_and_clear_bit(offset, se->discard_map))
@@ -2332,6 +2386,9 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
 	if (sbi->segs_per_sec != 1)
 		return CURSEG_I(sbi, type)->segno;
 
+	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+		return 0;
+
 	if (test_opt(sbi, NOHEAP) &&
 		(type == CURSEG_HOT_DATA || IS_NODESEG(type)))
 		return 0;
@@ -2475,6 +2532,15 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
 			return 1;
 		}
 	}
+
+	/* find valid_blocks=0 in dirty list */
+	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
+		segno = get_free_segment(sbi);
+		if (segno != NULL_SEGNO) {
+			curseg->next_segno = segno;
+			return 1;
+		}
+	}
 	return 0;
 }
 
@@ -2492,7 +2558,8 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
 	else if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) &&
 					type == CURSEG_WARM_NODE)
 		new_curseg(sbi, type, false);
-	else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type))
+	else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type) &&
+			likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
 		new_curseg(sbi, type, false);
 	else if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type))
 		change_curseg(sbi, type);
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 086150028c6d..ab3465faddf1 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -339,6 +339,12 @@ static inline unsigned int get_valid_blocks(struct f2fs_sb_info *sbi,
 		return get_seg_entry(sbi, segno)->valid_blocks;
 }
 
+static inline unsigned int get_ckpt_valid_blocks(struct f2fs_sb_info *sbi,
+				unsigned int segno)
+{
+	return get_seg_entry(sbi, segno)->ckpt_valid_blocks;
+}
+
 static inline void seg_info_from_raw_sit(struct seg_entry *se,
 					struct f2fs_sit_entry *rs)
 {
@@ -576,6 +582,15 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi,
 		reserved_sections(sbi) + needed);
 }
 
+static inline int f2fs_is_checkpoint_ready(struct f2fs_sb_info *sbi)
+{
+	if (likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+		return 0;
+	if (likely(!has_not_enough_free_secs(sbi, 0, 0)))
+		return 0;
+	return -ENOSPC;
+}
+
 static inline bool excess_prefree_segs(struct f2fs_sb_info *sbi)
 {
 	return prefree_segments(sbi) > SM_I(sbi)->rec_prefree_segments;
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index c47b1ef2685a..d698e11b06e4 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -136,6 +136,7 @@ enum {
 	Opt_alloc,
 	Opt_fsync,
 	Opt_test_dummy_encryption,
+	Opt_checkpoint,
 	Opt_err,
 };
 
@@ -194,6 +195,7 @@ static match_table_t f2fs_tokens = {
 	{Opt_alloc, "alloc_mode=%s"},
 	{Opt_fsync, "fsync_mode=%s"},
 	{Opt_test_dummy_encryption, "test_dummy_encryption"},
+	{Opt_checkpoint, "checkpoint=%s"},
 	{Opt_err, NULL},
 };
 
@@ -769,6 +771,23 @@ static int parse_options(struct super_block *sb, char *options)
 					"Test dummy encryption mount option ignored");
 #endif
 			break;
+		case Opt_checkpoint:
+			name = match_strdup(&args[0]);
+			if (!name)
+				return -ENOMEM;
+
+			if (strlen(name) == 6 &&
+					!strncmp(name, "enable", 6)) {
+				clear_opt(sbi, DISABLE_CHECKPOINT);
+			} else if (strlen(name) == 7 &&
+					!strncmp(name, "disable", 7)) {
+				set_opt(sbi, DISABLE_CHECKPOINT);
+			} else {
+				kfree(name);
+				return -EINVAL;
+			}
+			kfree(name);
+			break;
 		default:
 			f2fs_msg(sb, KERN_ERR,
 				"Unrecognized mount option \"%s\" or missing value",
@@ -827,6 +846,12 @@ static int parse_options(struct super_block *sb, char *options)
 		}
 	}
 
+	if (test_opt(sbi, DISABLE_CHECKPOINT) && test_opt(sbi, LFS)) {
+		f2fs_msg(sb, KERN_ERR,
+				"LFS not compatible with checkpoint=disable\n");
+		return -EINVAL;
+	}
+
 	/* Not pass down write hints if the number of active logs is lesser
 	 * than NR_CURSEG_TYPE.
 	 */
@@ -1014,8 +1039,8 @@ static void f2fs_put_super(struct super_block *sb)
 	 * But, the previous checkpoint was not done by umount, it needs to do
 	 * clean checkpoint again.
 	 */
-	if (is_sbi_flag_set(sbi, SBI_IS_DIRTY) ||
-			!is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) {
+	if ((is_sbi_flag_set(sbi, SBI_IS_DIRTY) ||
+			!is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG))) {
 		struct cp_control cpc = {
 			.reason = CP_UMOUNT,
 		};
@@ -1087,6 +1112,8 @@ int f2fs_sync_fs(struct super_block *sb, int sync)
 
 	if (unlikely(f2fs_cp_error(sbi)))
 		return 0;
+	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+		return 0;
 
 	trace_f2fs_sync_fs(sb, sync);
 
@@ -1186,6 +1213,11 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	buf->f_blocks = total_count - start_count;
 	buf->f_bfree = user_block_count - valid_user_blocks(sbi) -
 						sbi->current_reserved_blocks;
+	if (unlikely(buf->f_bfree <= sbi->unusable_block_count))
+		buf->f_bfree = 0;
+	else
+		buf->f_bfree -= sbi->unusable_block_count;
+
 	if (buf->f_bfree > F2FS_OPTION(sbi).root_reserved_blocks)
 		buf->f_bavail = buf->f_bfree -
 				F2FS_OPTION(sbi).root_reserved_blocks;
@@ -1365,6 +1397,9 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
 	else if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE)
 		seq_printf(seq, ",alloc_mode=%s", "reuse");
 
+	if (test_opt(sbi, DISABLE_CHECKPOINT))
+		seq_puts(seq, ",checkpoint=disable");
+
 	if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_POSIX)
 		seq_printf(seq, ",fsync_mode=%s", "posix");
 	else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT)
@@ -1392,6 +1427,7 @@ static void default_options(struct f2fs_sb_info *sbi)
 	set_opt(sbi, INLINE_DENTRY);
 	set_opt(sbi, EXTENT_CACHE);
 	set_opt(sbi, NOHEAP);
+	clear_opt(sbi, DISABLE_CHECKPOINT);
 	sbi->sb->s_flags |= SB_LAZYTIME;
 	set_opt(sbi, FLUSH_MERGE);
 	set_opt(sbi, DISCARD);
@@ -1413,6 +1449,52 @@ static void default_options(struct f2fs_sb_info *sbi)
 #ifdef CONFIG_QUOTA
 static int f2fs_enable_quotas(struct super_block *sb);
 #endif
+
+static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
+{
+	struct cp_control cpc;
+	int err;
+
+	sbi->sb->s_flags |= SB_ACTIVE;
+
+	mutex_lock(&sbi->gc_mutex);
+	for (; dirty_segments(sbi) > 2 * NR_CURSEG_TYPE;) {
+		err = f2fs_gc(sbi, true, false, NULL_SEGNO);
+		if (err == -ENODATA)
+			break;
+		if (err && err != -EAGAIN) {
+			mutex_unlock(&sbi->gc_mutex);
+			return err;
+		}
+	}
+	mutex_unlock(&sbi->gc_mutex);
+
+	err = sync_filesystem(sbi->sb);
+	if (err)
+		return err;
+
+	mutex_lock(&sbi->gc_mutex);
+	cpc.reason = CP_PAUSE;
+	set_sbi_flag(sbi, SBI_CP_DISABLED);
+	f2fs_write_checkpoint(sbi, &cpc);
+
+	sbi->unusable_block_count = 0;
+	mutex_unlock(&sbi->gc_mutex);
+	return 0;
+}
+
+static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi)
+{
+	mutex_lock(&sbi->gc_mutex);
+	f2fs_dirty_to_prefree(sbi);
+
+	clear_sbi_flag(sbi, SBI_CP_DISABLED);
+	set_sbi_flag(sbi, SBI_IS_DIRTY);
+	mutex_unlock(&sbi->gc_mutex);
+
+	f2fs_sync_fs(sbi->sb, 1);
+}
+
 static int f2fs_remount(struct super_block *sb, int *flags, char *data)
 {
 	struct f2fs_sb_info *sbi = F2FS_SB(sb);
@@ -1422,6 +1504,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
 	bool need_restart_gc = false;
 	bool need_stop_gc = false;
 	bool no_extent_cache = !test_opt(sbi, EXTENT_CACHE);
+	bool disable_checkpoint = test_opt(sbi, DISABLE_CHECKPOINT);
+	bool checkpoint_changed;
 #ifdef CONFIG_QUOTA
 	int i, j;
 #endif
@@ -1466,6 +1550,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
 	err = parse_options(sb, data);
 	if (err)
 		goto restore_opts;
+	checkpoint_changed =
+			disable_checkpoint != test_opt(sbi, DISABLE_CHECKPOINT);
 
 	/*
 	 * Previous and new state of filesystem is RO,
@@ -1499,6 +1585,13 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
 		goto restore_opts;
 	}
 
+	if ((*flags & MS_RDONLY) && test_opt(sbi, DISABLE_CHECKPOINT)) {
+		err = -EINVAL;
+		f2fs_msg(sbi->sb, KERN_WARNING,
+			"disabling checkpoint not compatible with read-only");
+		goto restore_opts;
+	}
+
 	/*
 	 * We stop the GC thread if FS is mounted as RO
 	 * or if background_gc = off is passed in mount
@@ -1527,6 +1620,16 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
 		clear_sbi_flag(sbi, SBI_IS_CLOSE);
 	}
 
+	if (checkpoint_changed) {
+		if (test_opt(sbi, DISABLE_CHECKPOINT)) {
+			err = f2fs_disable_checkpoint(sbi);
+			if (err)
+				goto restore_gc;
+		} else {
+			f2fs_enable_checkpoint(sbi);
+		}
+	}
+
 	/*
 	 * We stop issue flush thread if FS is mounted as RO
 	 * or if flush_merge is not passed in mount option.
@@ -3064,6 +3167,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 	if (err)
 		goto free_meta;
 
+	if (unlikely(is_set_ckpt_flags(sbi, CP_DISABLED_FLAG)))
+		goto skip_recovery;
+
 	/* recover fsynced data */
 	if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) {
 		/*
@@ -3103,6 +3209,14 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 	/* f2fs_recover_fsync_data() cleared this already */
 	clear_sbi_flag(sbi, SBI_POR_DOING);
 
+	if (test_opt(sbi, DISABLE_CHECKPOINT)) {
+		err = f2fs_disable_checkpoint(sbi);
+		if (err)
+			goto free_meta;
+	} else if (is_set_ckpt_flags(sbi, CP_DISABLED_FLAG)) {
+		f2fs_enable_checkpoint(sbi);
+	}
+
 	/*
 	 * If filesystem is not mounted as read-only then
 	 * do start the gc_thread.
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index 1d4b196291d6..9e95bbce5dd4 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -115,6 +115,7 @@ struct f2fs_super_block {
 /*
  * For checkpoint
  */
+#define CP_DISABLED_FLAG		0x00001000
 #define CP_LARGE_NAT_BITMAP_FLAG	0x00000400
 #define CP_NOCRC_RECOVERY_FLAG	0x00000200
 #define CP_TRIMMED_FLAG		0x00000100
-- 
2.19.0.605.g01d371f741-goog


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] f2fs: checkpoint disabling
  2018-07-26  0:19 ` Daniel Rosenberg
  (?)
@ 2018-08-04 22:31   ` Jaegeuk Kim
  -1 siblings, 0 replies; 11+ messages in thread
From: Jaegeuk Kim @ 2018-08-04 22:31 UTC (permalink / raw)
  To: Daniel Rosenberg
  Cc: Chao Yu, Jonathan Corbet, linux-f2fs-devel, linux-kernel,
	linux-doc, linux-fsdevel, kernel-team

Hi Daniel,

Could you please rebase the patch on top of f2fs/dev-test branch?
I'd like to review the code by playing with a little bit.

Thanks,

On 07/25, Daniel Rosenberg wrote:
> This adds a lightweight non-persistent snapshotting scheme to f2fs.
> 
> To use, mount with the option checkpoint=disable, and to return to
> normal operation, remount with checkpoint=enable. If the filesystem
> is shut down before remounting with checkpoint=enable, it will revert
> back to its apparent state when it was first mounted with
> checkpoint=disable. This is useful for situations where you wish to be
> able to roll back the state of the disk in case of some critical
> failure.
> 
> Signed-off-by: Daniel Rosenberg <drosen@google.com>
> ---
> 
> Included changes suggested by Jaegeuk and Chao.
> it now holds the gc lock around setting up free space tracking in
> f2fs_disable_checkpoint. I wasn't quite sure what was mean by
> the comment on should_update_outplace, but I've changed it to return
> true for NEW_ADDR as well. It may be better to instead skip over the
> reset of that block if old_blkaddr is NEW_ADDR, because I think that
> means the data has not yet been committed, and thus wouldn't be
> overwriting data from the previous checkpoint.
> 
> We're currently using this during updates to extend the period of time
> that we can safely roll back a faulty update. After performing an update
> and reboot, we'd mount in this mode. If there ends up being some sort of
> fatal error, data changes are automatically reverted, allowing us to
> revert to the state before the update without worrying about any incompatible
> changes that may've been made to data while running under the faulty system.
> It increases our ability to revert a faulty update from up to mounting user
> data partitions to a bit farther along.
> 
>  Documentation/filesystems/f2fs.txt |   5 ++
>  fs/f2fs/data.c                     |  23 ++++++
>  fs/f2fs/f2fs.h                     |  55 +++++++++++++-
>  fs/f2fs/file.c                     |  18 +++++
>  fs/f2fs/gc.c                       |   4 +
>  fs/f2fs/segment.c                  |  58 ++++++++++++---
>  fs/f2fs/segment.h                  |  26 +++++++
>  fs/f2fs/super.c                    | 116 +++++++++++++++++++++++++++--
>  8 files changed, 288 insertions(+), 17 deletions(-)
> 
> diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt
> index 69f8de9957397..a026b353a99d4 100644
> --- a/Documentation/filesystems/f2fs.txt
> +++ b/Documentation/filesystems/f2fs.txt
> @@ -193,6 +193,11 @@ fsync_mode=%s          Control the policy of fsync. Currently supports "posix",
>                         non-atomic files likewise "nobarrier" mount option.
>  test_dummy_encryption  Enable dummy encryption, which provides a fake fscrypt
>                         context. The fake fscrypt context is used by xfstests.
> +checkpoint=%s          Set to "disable" to turn off checkpointing. Set to "enable"
> +                       to reenable checkpointing. Is enabled by default. While
> +                       disabled, any unmounting or unexpected shutdowns will cause
> +                       the filesystem contents to appear as they did when the
> +                       filesystem was mounted with that option.
>  
>  ================================================================================
>  DEBUGFS ENTRIES
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 6b8ca5011bfd6..8f32df9b7b8f3 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -1690,6 +1690,19 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
>  
>  	if (test_opt(sbi, LFS))
>  		return true;
> +	if (test_opt(sbi, DISABLE_CHECKPOINT)) {
> +		struct seg_entry *se;
> +		unsigned int segno, offset;
> +
> +		if (!fio || fio->old_blkaddr == NULL_ADDR ||
> +				fio->old_blkaddr == NEW_ADDR)
> +			return true;
> +		segno = GET_SEGNO(sbi, fio->old_blkaddr);
> +		se = get_seg_entry(sbi, segno);
> +		offset = GET_BLKOFF_FROM_SEG0(sbi, fio->old_blkaddr);
> +		if (f2fs_test_bit(offset, se->ckpt_valid_map))
> +			return true;
> +	}
>  	if (S_ISDIR(inode->i_mode))
>  		return true;
>  	if (f2fs_is_atomic_file(inode))
> @@ -1717,10 +1730,13 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio)
>  {
>  	struct page *page = fio->page;
>  	struct inode *inode = page->mapping->host;
> +	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
>  	struct dnode_of_data dn;
>  	struct extent_info ei = {0,0,0};
>  	struct node_info ni;
>  	bool ipu_force = false;
> +	bool need_tmp_grab = test_opt(sbi, DISABLE_CHECKPOINT);
> +	blkcnt_t tmp_block = 1;
>  	int err = 0;
>  
>  	set_new_dnode(&dn, inode, NULL, NULL, 0);
> @@ -1798,6 +1814,11 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio)
>  	if (err)
>  		goto out_writepage;
>  
> +	if (need_tmp_grab) {
> +		err = inc_valid_block_count(sbi, dn.inode, &tmp_block);
> +		if (err)
> +			goto out_writepage;
> +	}
>  	set_page_writeback(page);
>  	ClearPageError(page);
>  
> @@ -1807,6 +1828,8 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio)
>  	set_inode_flag(inode, FI_APPEND_WRITE);
>  	if (page->index == 0)
>  		set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN);
> +	if (need_tmp_grab)
> +		dec_valid_block_count(sbi, dn.inode, tmp_block);
>  out_writepage:
>  	f2fs_put_dnode(&dn);
>  out:
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 0374f069520cf..f594957f9d0cd 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -97,6 +97,7 @@ extern char *f2fs_fault_name[FAULT_MAX];
>  #define F2FS_MOUNT_QUOTA		0x00400000
>  #define F2FS_MOUNT_INLINE_XATTR_SIZE	0x00800000
>  #define F2FS_MOUNT_RESERVE_ROOT		0x01000000
> +#define F2FS_MOUNT_DISABLE_CHECKPOINT	0x02000000
>  
>  #define F2FS_OPTION(sbi)	((sbi)->mount_opt)
>  #define clear_opt(sbi, option)	(F2FS_OPTION(sbi).opt &= ~F2FS_MOUNT_##option)
> @@ -175,6 +176,7 @@ enum {
>  #define	CP_RECOVERY	0x00000008
>  #define	CP_DISCARD	0x00000010
>  #define CP_TRIMMED	0x00000020
> +#define CP_PAUSE	0x00000040
>  
>  #define MAX_DISCARD_BLOCKS(sbi)		BLKS_PER_SEC(sbi)
>  #define DEF_MAX_DISCARD_REQUEST		8	/* issue 8 discards per round */
> @@ -1074,6 +1076,7 @@ enum {
>  	SBI_NEED_CP,				/* need to checkpoint */
>  	SBI_DISABLE_ATOMIC_WRITE,		/* turn off atomic write */
>  	SBI_IS_SHUTDOWN,			/* shutdown by ioctl */
> +	SBI_CP_DISABLED,			/* CP was disabled last mount */
>  };
>  
>  enum {
> @@ -1199,6 +1202,12 @@ struct f2fs_sb_info {
>  	block_t reserved_blocks;		/* configurable reserved blocks */
>  	block_t current_reserved_blocks;	/* current reserved blocks */
>  
> +	/* Additional tracking for no checkpoint mode */
> +	block_t unusable_block_count;		/* # of blocks saved by last cp */
> +	block_t free_ssr_data_block;
> +	block_t free_ssr_node_block;
> +	block_t free_segments;
> +
>  	unsigned int nquota_files;		/* # of quota sysfile */
>  
>  	u32 s_next_generation;			/* for NFS support */
> @@ -1650,7 +1659,7 @@ static inline void f2fs_i_blocks_write(struct inode *, block_t, bool, bool);
>  static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
>  				 struct inode *inode, blkcnt_t *count)
>  {
> -	blkcnt_t diff = 0, release = 0;
> +	blkcnt_t diff = 0, release = 0, seg_diff = 0, seg_rel = 0;
>  	block_t avail_user_block_count;
>  	int ret;
>  
> @@ -1678,6 +1687,8 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
>  
>  	if (!__allow_reserved_blocks(sbi, inode, true))
>  		avail_user_block_count -= F2FS_OPTION(sbi).root_reserved_blocks;
> +	if (test_opt(sbi, DISABLE_CHECKPOINT))
> +		avail_user_block_count -= sbi->unusable_block_count;
>  
>  	if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) {
>  		diff = sbi->total_valid_block_count - avail_user_block_count;
> @@ -1691,6 +1702,35 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
>  			goto enospc;
>  		}
>  	}
> +	if (likely(!test_opt(sbi, DISABLE_CHECKPOINT)))
> +		goto normal;
> +	if (unlikely(*count > sbi->free_ssr_data_block)) {
> +		/* We'll need to pull from free. */
> +		blkcnt_t needed = *count - sbi->free_ssr_data_block;
> +		blkcnt_t new_segs = ((needed - 1) >>
> +					sbi->log_blocks_per_seg) + 1;
> +
> +		/* Check if we have enough free */
> +		if (unlikely(new_segs > sbi->free_segments)) {
> +			seg_diff = new_segs - sbi->free_segments;
> +
> +			seg_rel = ((needed - 1) % sbi->log_blocks_per_seg) + 1;
> +			seg_rel += (seg_diff - 1) << sbi->log_blocks_per_seg;
> +			new_segs -= seg_diff;
> +			*count -= seg_rel;
> +			release += seg_rel;
> +			if (!*count) {
> +				spin_unlock(&sbi->stat_lock);
> +				goto enospc;
> +			}
> +		}
> +
> +		sbi->free_segments -= new_segs;
> +		sbi->free_ssr_data_block += new_segs << sbi->log_blocks_per_seg;
> +
> +	}
> +	sbi->free_ssr_data_block -= *count;
> +normal:
>  	spin_unlock(&sbi->stat_lock);
>  
>  	if (unlikely(release)) {
> @@ -1887,6 +1927,8 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
>  
>  	if (!__allow_reserved_blocks(sbi, inode, false))
>  		valid_block_count += F2FS_OPTION(sbi).root_reserved_blocks;
> +	if (test_opt(sbi, DISABLE_CHECKPOINT))
> +		valid_block_count += sbi->unusable_block_count;
>  
>  	if (unlikely(valid_block_count > sbi->user_block_count)) {
>  		spin_unlock(&sbi->stat_lock);
> @@ -1899,6 +1941,17 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
>  		goto enospc;
>  	}
>  
> +	if (test_opt(sbi, DISABLE_CHECKPOINT)) {
> +		if (unlikely(!sbi->free_ssr_node_block)) {
> +			if (unlikely(!sbi->free_segments)) {
> +				spin_unlock(&sbi->stat_lock);
> +				goto enospc;
> +			}
> +			sbi->free_segments--;
> +		}
> +		sbi->free_ssr_node_block--;
> +	}
> +
>  	sbi->total_valid_node_count++;
>  	sbi->total_valid_block_count++;
>  	spin_unlock(&sbi->stat_lock);
> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> index 3a5c35fa06039..20124f5d287b8 100644
> --- a/fs/f2fs/file.c
> +++ b/fs/f2fs/file.c
> @@ -150,6 +150,9 @@ static inline enum cp_reason_type need_do_checkpoint(struct inode *inode)
>  	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
>  	enum cp_reason_type cp_reason = CP_NO_NEEDED;
>  
> +	if (test_opt(sbi, DISABLE_CHECKPOINT))
> +		return CP_NO_NEEDED;
> +
>  	if (!S_ISREG(inode->i_mode))
>  		cp_reason = CP_NON_REGULAR;
>  	else if (inode->i_nlink != 1)
> @@ -2070,6 +2073,9 @@ static int f2fs_ioc_gc(struct file *filp, unsigned long arg)
>  	if (f2fs_readonly(sbi->sb))
>  		return -EROFS;
>  
> +	if (test_opt(sbi, DISABLE_CHECKPOINT))
> +		return -EINVAL;
> +
>  	ret = mnt_want_write_file(filp);
>  	if (ret)
>  		return ret;
> @@ -2112,6 +2118,9 @@ static int f2fs_ioc_gc_range(struct file *filp, unsigned long arg)
>  		return -EINVAL;
>  	}
>  
> +	if (test_opt(sbi, DISABLE_CHECKPOINT))
> +		return -EINVAL;
> +
>  	ret = mnt_want_write_file(filp);
>  	if (ret)
>  		return ret;
> @@ -2147,6 +2156,12 @@ static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg)
>  	if (f2fs_readonly(sbi->sb))
>  		return -EROFS;
>  
> +	if (test_opt(sbi, DISABLE_CHECKPOINT)) {
> +		f2fs_msg(sbi->sb, KERN_INFO,
> +			"Skipping Checkpoint. Checkpoints currently disabled.");
> +		return -EINVAL;
> +	}
> +
>  	ret = mnt_want_write_file(filp);
>  	if (ret)
>  		return ret;
> @@ -2513,6 +2528,9 @@ static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg)
>  	if (f2fs_readonly(sbi->sb))
>  		return -EROFS;
>  
> +	if (test_opt(sbi, DISABLE_CHECKPOINT))
> +		return -EINVAL;
> +
>  	if (copy_from_user(&range, (struct f2fs_flush_device __user *)arg,
>  							sizeof(range)))
>  		return -EFAULT;
> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> index fe0da3e05ef14..1326f5b6aa99d 100644
> --- a/fs/f2fs/gc.c
> +++ b/fs/f2fs/gc.c
> @@ -60,6 +60,9 @@ static int gc_thread_func(void *data)
>  		}
>  #endif
>  
> +		if (test_opt(sbi, DISABLE_CHECKPOINT))
> +			goto do_balance;
> +
>  		if (!sb_start_write_trylock(sbi->sb))
>  			continue;
>  
> @@ -105,6 +108,7 @@ static int gc_thread_func(void *data)
>  		trace_f2fs_background_gc(sbi->sb, wait_ms,
>  				prefree_segments(sbi), free_segments(sbi));
>  
> +do_balance:
>  		/* balancing f2fs's metadata periodically */
>  		f2fs_balance_fs_bg(sbi);
>  next:
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index 17354089b4ab2..3d5d8098d4ead 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -179,6 +179,10 @@ bool f2fs_need_SSR(struct f2fs_sb_info *sbi)
>  		return false;
>  	if (sbi->gc_mode == GC_URGENT)
>  		return true;
> +	if (test_opt(sbi, DISABLE_CHECKPOINT))
> +		return true;
> +	if (sbi->gc_mode == GC_URGENT)
> +		return true;
>  
>  	return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
>  			SM_I(sbi)->min_ssr_sections + reserved_sections(sbi));
> @@ -485,6 +489,8 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
>  	 * We should do GC or end up with checkpoint, if there are so many dirty
>  	 * dir/node pages without enough free segments.
>  	 */
> +	if (test_opt(sbi, DISABLE_CHECKPOINT))
> +		return;
>  	if (has_not_enough_free_secs(sbi, 0, 0)) {
>  		mutex_lock(&sbi->gc_mutex);
>  		f2fs_gc(sbi, false, false, NULL_SEGNO);
> @@ -525,8 +531,10 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
>  			f2fs_sync_dirty_inodes(sbi, FILE_INODE);
>  			blk_finish_plug(&plug);
>  		}
> -		f2fs_sync_fs(sbi->sb, true);
> -		stat_inc_bg_cp_count(sbi->stat_info);
> +		if (!test_opt(sbi, DISABLE_CHECKPOINT)) {
> +			f2fs_sync_fs(sbi->sb, true);
> +			stat_inc_bg_cp_count(sbi->stat_info);
> +		}
>  	}
>  }
>  
> @@ -741,8 +749,8 @@ int f2fs_flush_device_cache(struct f2fs_sb_info *sbi)
>  	return ret;
>  }
>  
> -static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
> -		enum dirty_type dirty_type)
> +void __locate_dirty_segment(struct f2fs_sb_info *sbi,
> +		unsigned int segno, enum dirty_type dirty_type)
>  {
>  	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
>  
> @@ -766,8 +774,8 @@ static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
>  	}
>  }
>  
> -static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
> -		enum dirty_type dirty_type)
> +void __remove_dirty_segment(struct f2fs_sb_info *sbi,
> +		unsigned int segno, enum dirty_type dirty_type)
>  {
>  	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
>  
> @@ -787,6 +795,7 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
>  	}
>  }
>  
> +
>  /*
>   * Should not occur error such as -ENOMEM.
>   * Adding dirty entry into seglist is not critical operation.
> @@ -795,7 +804,7 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
>  static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
>  {
>  	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
> -	unsigned short valid_blocks;
> +	unsigned short valid_blocks, ckpt_valid_blocks;
>  
>  	if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno))
>  		return;
> @@ -803,8 +812,10 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
>  	mutex_lock(&dirty_i->seglist_lock);
>  
>  	valid_blocks = get_valid_blocks(sbi, segno, false);
> +	ckpt_valid_blocks = get_ckpt_valid_blocks(sbi, segno);
>  
> -	if (valid_blocks == 0) {
> +	if (valid_blocks == 0 && (ckpt_valid_blocks == sbi->blocks_per_seg ||
> +					!test_opt(sbi, DISABLE_CHECKPOINT))) {
>  		__locate_dirty_segment(sbi, segno, PRE);
>  		__remove_dirty_segment(sbi, segno, DIRTY);
>  	} else if (valid_blocks < sbi->blocks_per_seg) {
> @@ -1926,7 +1937,8 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
>  			sbi->discard_blks--;
>  
>  		/* don't overwrite by SSR to keep node chain */
> -		if (IS_NODESEG(se->type)) {
> +		if (IS_NODESEG(se->type) &&
> +				!test_opt(sbi, DISABLE_CHECKPOINT)) {
>  			if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map))
>  				se->ckpt_valid_blocks++;
>  		}
> @@ -1948,6 +1960,25 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
>  			f2fs_bug_on(sbi, 1);
>  			se->valid_blocks++;
>  			del = 0;
> +		} else {
> +			/* If checkpoints are off, we must not reuse data that
> +			 * was used in the previous checkpoint. If it was used
> +			 * before, we must track that to know how much space we
> +			 * really have
> +			 */
> +			if (f2fs_test_bit(offset, se->ckpt_valid_map)) {
> +				spin_lock(&sbi->stat_lock);
> +				sbi->unusable_block_count++;
> +				spin_unlock(&sbi->stat_lock);
> +			} else {
> +				spin_lock(&sbi->stat_lock);
> +				if (IS_DATASEG(se->type))
> +					sbi->free_ssr_data_block++;
> +				else
> +					sbi->free_ssr_node_block++;
> +				spin_unlock(&sbi->stat_lock);
> +			}
> +
>  		}
>  
>  		if (f2fs_discard_en(sbi) &&
> @@ -2237,7 +2268,8 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
>  		return SIT_I(sbi)->last_victim[ALLOC_NEXT];
>  
>  	/* find segments from 0 to reuse freed segments */
> -	if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE)
> +	if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE
> +			|| test_opt(sbi, DISABLE_CHECKPOINT))
>  		return 0;
>  
>  	return CURSEG_I(sbi, type)->segno;
> @@ -2389,7 +2421,8 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
>  	else if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) &&
>  					type == CURSEG_WARM_NODE)
>  		new_curseg(sbi, type, false);
> -	else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type))
> +	else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type) &&
> +			!test_opt(sbi, DISABLE_CHECKPOINT))
>  		new_curseg(sbi, type, false);
>  	else if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type))
>  		change_curseg(sbi, type);
> @@ -3568,6 +3601,9 @@ void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>  			sit_i->dirty_sentries--;
>  			ses->entry_cnt--;
>  		}
> +		spin_lock(&sbi->stat_lock);
> +		sbi->unusable_block_count = 0;
> +		spin_unlock(&sbi->stat_lock);
>  
>  		if (to_journal)
>  			up_write(&curseg->journal_rwsem);
> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
> index 50495515f0a0d..37222e133d52a 100644
> --- a/fs/f2fs/segment.h
> +++ b/fs/f2fs/segment.h
> @@ -342,6 +342,12 @@ static inline unsigned int get_valid_blocks(struct f2fs_sb_info *sbi,
>  		return get_seg_entry(sbi, segno)->valid_blocks;
>  }
>  
> +static inline unsigned int get_ckpt_valid_blocks(struct f2fs_sb_info *sbi,
> +				unsigned int segno)
> +{
> +	return get_seg_entry(sbi, segno)->ckpt_valid_blocks;
> +}
> +
>  static inline void seg_info_from_raw_sit(struct seg_entry *se,
>  					struct f2fs_sit_entry *rs)
>  {
> @@ -524,6 +530,26 @@ static inline unsigned int dirty_segments(struct f2fs_sb_info *sbi)
>  		DIRTY_I(sbi)->nr_dirty[DIRTY_COLD_NODE];
>  }
>  
> +void __locate_dirty_segment(struct f2fs_sb_info *sbi,
> +		unsigned int segno, enum dirty_type dirty_type);
> +
> +void __remove_dirty_segment(struct f2fs_sb_info *sbi,
> +		unsigned int segno, enum dirty_type dirty_type);
> +
> +/* This moves currently empty dirty blocks to prefree. Must hold seglist_lock */
> +static inline void dirty_to_prefree(struct f2fs_sb_info *sbi)
> +{
> +	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
> +	unsigned int segno;
> +
> +	for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
> +		if (!get_valid_blocks(sbi, segno, false)) {
> +			__locate_dirty_segment(sbi, segno, PRE);
> +			__remove_dirty_segment(sbi, segno, DIRTY);
> +		}
> +	}
> +}
> +
>  static inline int overprovision_segments(struct f2fs_sb_info *sbi)
>  {
>  	return SM_I(sbi)->ovp_segments;
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index 7187885da2515..7568ae00319cb 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -132,6 +132,7 @@ enum {
>  	Opt_alloc,
>  	Opt_fsync,
>  	Opt_test_dummy_encryption,
> +	Opt_checkpoint,
>  	Opt_err,
>  };
>  
> @@ -189,6 +190,7 @@ static match_table_t f2fs_tokens = {
>  	{Opt_alloc, "alloc_mode=%s"},
>  	{Opt_fsync, "fsync_mode=%s"},
>  	{Opt_test_dummy_encryption, "test_dummy_encryption"},
> +	{Opt_checkpoint, "checkpoint=%s"},
>  	{Opt_err, NULL},
>  };
>  
> @@ -764,6 +766,23 @@ static int parse_options(struct super_block *sb, char *options)
>  					"Test dummy encryption mount option ignored");
>  #endif
>  			break;
> +		case Opt_checkpoint:
> +			name = match_strdup(&args[0]);
> +			if (!name)
> +				return -ENOMEM;
> +
> +			if (strlen(name) == 6 &&
> +					!strncmp(name, "enable", 6)) {
> +				clear_opt(sbi, DISABLE_CHECKPOINT);
> +			} else if (strlen(name) == 7 &&
> +					!strncmp(name, "disable", 7)) {
> +				set_opt(sbi, DISABLE_CHECKPOINT);
> +			} else {
> +				kfree(name);
> +				return -EINVAL;
> +			}
> +			kfree(name);
> +			break;
>  		default:
>  			f2fs_msg(sb, KERN_ERR,
>  				"Unrecognized mount option \"%s\" or missing value",
> @@ -809,6 +828,12 @@ static int parse_options(struct super_block *sb, char *options)
>  		}
>  	}
>  
> +	if (test_opt(sbi, DISABLE_CHECKPOINT) && test_opt(sbi, LFS)) {
> +		f2fs_msg(sb, KERN_ERR,
> +				"LFS not compatible with checkpoint=disable\n");
> +		return -EINVAL;
> +	}
> +
>  	/* Not pass down write hints if the number of active logs is lesser
>  	 * than NR_CURSEG_TYPE.
>  	 */
> @@ -996,8 +1021,9 @@ static void f2fs_put_super(struct super_block *sb)
>  	 * But, the previous checkpoint was not done by umount, it needs to do
>  	 * clean checkpoint again.
>  	 */
> -	if (is_sbi_flag_set(sbi, SBI_IS_DIRTY) ||
> -			!is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) {
> +	if ((is_sbi_flag_set(sbi, SBI_IS_DIRTY) ||
> +			!is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) &&
> +			!test_opt(sbi, DISABLE_CHECKPOINT)) {
>  		struct cp_control cpc = {
>  			.reason = CP_UMOUNT,
>  		};
> @@ -1007,7 +1033,8 @@ static void f2fs_put_super(struct super_block *sb)
>  	/* be sure to wait for any on-going discard commands */
>  	dropped = f2fs_wait_discard_bios(sbi);
>  
> -	if (f2fs_discard_en(sbi) && !sbi->discard_blks && !dropped) {
> +	if (f2fs_discard_en(sbi) && !sbi->discard_blks && !dropped &&
> +			!test_opt(sbi, DISABLE_CHECKPOINT)) {
>  		struct cp_control cpc = {
>  			.reason = CP_UMOUNT | CP_TRIMMED,
>  		};
> @@ -1064,6 +1091,8 @@ int f2fs_sync_fs(struct super_block *sb, int sync)
>  
>  	if (unlikely(f2fs_cp_error(sbi)))
>  		return 0;
> +	if (test_opt(sbi, DISABLE_CHECKPOINT))
> +		return 0;
>  
>  	trace_f2fs_sync_fs(sb, sync);
>  
> @@ -1162,7 +1191,8 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf)
>  
>  	buf->f_blocks = total_count - start_count;
>  	buf->f_bfree = user_block_count - valid_user_blocks(sbi) -
> -						sbi->current_reserved_blocks;
> +						sbi->current_reserved_blocks -
> +						sbi->unusable_block_count;
>  	if (buf->f_bfree > F2FS_OPTION(sbi).root_reserved_blocks)
>  		buf->f_bavail = buf->f_bfree -
>  				F2FS_OPTION(sbi).root_reserved_blocks;
> @@ -1338,6 +1368,9 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
>  	else if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE)
>  		seq_printf(seq, ",alloc_mode=%s", "reuse");
>  
> +	if (test_opt(sbi, DISABLE_CHECKPOINT))
> +		seq_puts(seq, ",checkpoint=disable");
> +
>  	if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_POSIX)
>  		seq_printf(seq, ",fsync_mode=%s", "posix");
>  	else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT)
> @@ -1365,6 +1398,7 @@ static void default_options(struct f2fs_sb_info *sbi)
>  	set_opt(sbi, INLINE_DENTRY);
>  	set_opt(sbi, EXTENT_CACHE);
>  	set_opt(sbi, NOHEAP);
> +	clear_opt(sbi, DISABLE_CHECKPOINT);
>  	sbi->sb->s_flags |= SB_LAZYTIME;
>  	set_opt(sbi, FLUSH_MERGE);
>  	if (blk_queue_discard(bdev_get_queue(sbi->sb->s_bdev)))
> @@ -1387,6 +1421,60 @@ static void default_options(struct f2fs_sb_info *sbi)
>  #ifdef CONFIG_QUOTA
>  static int f2fs_enable_quotas(struct super_block *sb);
>  #endif
> +
> +static void f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
> +{
> +	struct cp_control cpc;
> +	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
> +	unsigned int segno;
> +	int type;
> +
> +	set_sbi_flag(sbi, SBI_CP_DISABLED);
> +
> +	cpc.reason = CP_PAUSE;
> +
> +	mutex_lock(&sbi->gc_mutex);
> +	f2fs_write_checkpoint(sbi, &cpc);
> +
> +	mutex_lock(&dirty_i->seglist_lock);
> +	for (type = 0; type < NR_CURSEG_TYPE; type++) {
> +		for_each_set_bit(segno, dirty_i->dirty_segmap[type],
> +							MAIN_SEGS(sbi)) {
> +			if (IS_DATASEG(type))
> +				sbi->free_ssr_data_block +=
> +					get_valid_blocks(sbi, segno, false);
> +			else
> +				sbi->free_ssr_node_block +=
> +					get_valid_blocks(sbi, segno, false);
> +		}
> +	}
> +	sbi->free_segments = FREE_I(sbi)->free_segments;
> +	mutex_unlock(&dirty_i->seglist_lock);
> +	mutex_unlock(&sbi->gc_mutex);
> +}
> +
> +static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi)
> +{
> +	struct super_block *sb = sbi->sb;
> +	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
> +
> +	clear_sbi_flag(sbi, SBI_CP_DISABLED);
> +	writeback_inodes_sb(sb, WB_REASON_SYNC);
> +	sync_inodes_sb(sb);
> +
> +	mutex_lock(&dirty_i->seglist_lock);
> +	dirty_to_prefree(sbi);
> +	sbi->free_segments = 0;
> +	sbi->free_ssr_data_block = 0;
> +	sbi->free_ssr_node_block = 0;
> +	mutex_unlock(&dirty_i->seglist_lock);
> +
> +	set_sbi_flag(sbi, SBI_IS_DIRTY);
> +	set_sbi_flag(sbi, SBI_IS_CLOSE);
> +	f2fs_sync_fs(sb, 1);
> +	clear_sbi_flag(sbi, SBI_IS_CLOSE);
> +}
> +
>  static int f2fs_remount(struct super_block *sb, int *flags, char *data)
>  {
>  	struct f2fs_sb_info *sbi = F2FS_SB(sb);
> @@ -1396,6 +1484,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
>  	bool need_restart_gc = false;
>  	bool need_stop_gc = false;
>  	bool no_extent_cache = !test_opt(sbi, EXTENT_CACHE);
> +	bool disable_checkpoint = test_opt(sbi, DISABLE_CHECKPOINT);
> +	bool checkpoint_changed;
>  #ifdef CONFIG_QUOTA
>  	int i, j;
>  #endif
> @@ -1440,6 +1530,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
>  	err = parse_options(sb, data);
>  	if (err)
>  		goto restore_opts;
> +	checkpoint_changed =
> +			disable_checkpoint != test_opt(sbi, DISABLE_CHECKPOINT);
>  
>  	/*
>  	 * Previous and new state of filesystem is RO,
> @@ -1501,6 +1593,13 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
>  		clear_sbi_flag(sbi, SBI_IS_CLOSE);
>  	}
>  
> +	if (checkpoint_changed) {
> +		if (test_opt(sbi, DISABLE_CHECKPOINT))
> +			f2fs_disable_checkpoint(sbi);
> +		else
> +			f2fs_enable_checkpoint(sbi);
> +	}
> +
>  	/*
>  	 * We stop issue flush thread if FS is mounted as RO
>  	 * or if flush_merge is not passed in mount option.
> @@ -2987,7 +3086,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>  		goto free_meta;
>  
>  	/* recover fsynced data */
> -	if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) {
> +	if (!test_opt(sbi, DISABLE_ROLL_FORWARD) &&
> +			!is_sbi_flag_set(sbi, SBI_CP_DISABLED)) {
>  		/*
>  		 * mount should be failed, when device has readonly mode, and
>  		 * previous checkpoint was not done by clean system shutdown.
> @@ -3053,6 +3153,12 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>  				cur_cp_version(F2FS_CKPT(sbi)));
>  	f2fs_update_time(sbi, CP_TIME);
>  	f2fs_update_time(sbi, REQ_TIME);
> +
> +	if (test_opt(sbi, DISABLE_CHECKPOINT))
> +		f2fs_disable_checkpoint(sbi);
> +	else if (is_sbi_flag_set(sbi, SBI_CP_DISABLED))
> +		f2fs_enable_checkpoint(sbi);
> +
>  	return 0;
>  
>  free_meta:
> -- 
> 2.18.0.233.g985f88cf7e-goog

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] f2fs: checkpoint disabling
@ 2018-08-04 22:31   ` Jaegeuk Kim
  0 siblings, 0 replies; 11+ messages in thread
From: Jaegeuk Kim @ 2018-08-04 22:31 UTC (permalink / raw)
  To: Daniel Rosenberg
  Cc: Chao Yu, Jonathan Corbet, linux-f2fs-devel, linux-kernel,
	linux-doc, linux-fsdevel, kernel-team

Hi Daniel,

Could you please rebase the patch on top of f2fs/dev-test branch?
I'd like to review the code by playing with a little bit.

Thanks,

On 07/25, Daniel Rosenberg wrote:
> This adds a lightweight non-persistent snapshotting scheme to f2fs.
> 
> To use, mount with the option checkpoint=disable, and to return to
> normal operation, remount with checkpoint=enable. If the filesystem
> is shut down before remounting with checkpoint=enable, it will revert
> back to its apparent state when it was first mounted with
> checkpoint=disable. This is useful for situations where you wish to be
> able to roll back the state of the disk in case of some critical
> failure.
> 
> Signed-off-by: Daniel Rosenberg <drosen@google.com>
> ---
> 
> Included changes suggested by Jaegeuk and Chao.
> it now holds the gc lock around setting up free space tracking in
> f2fs_disable_checkpoint. I wasn't quite sure what was mean by
> the comment on should_update_outplace, but I've changed it to return
> true for NEW_ADDR as well. It may be better to instead skip over the
> reset of that block if old_blkaddr is NEW_ADDR, because I think that
> means the data has not yet been committed, and thus wouldn't be
> overwriting data from the previous checkpoint.
> 
> We're currently using this during updates to extend the period of time
> that we can safely roll back a faulty update. After performing an update
> and reboot, we'd mount in this mode. If there ends up being some sort of
> fatal error, data changes are automatically reverted, allowing us to
> revert to the state before the update without worrying about any incompatible
> changes that may've been made to data while running under the faulty system.
> It increases our ability to revert a faulty update from up to mounting user
> data partitions to a bit farther along.
> 
>  Documentation/filesystems/f2fs.txt |   5 ++
>  fs/f2fs/data.c                     |  23 ++++++
>  fs/f2fs/f2fs.h                     |  55 +++++++++++++-
>  fs/f2fs/file.c                     |  18 +++++
>  fs/f2fs/gc.c                       |   4 +
>  fs/f2fs/segment.c                  |  58 ++++++++++++---
>  fs/f2fs/segment.h                  |  26 +++++++
>  fs/f2fs/super.c                    | 116 +++++++++++++++++++++++++++--
>  8 files changed, 288 insertions(+), 17 deletions(-)
> 
> diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt
> index 69f8de9957397..a026b353a99d4 100644
> --- a/Documentation/filesystems/f2fs.txt
> +++ b/Documentation/filesystems/f2fs.txt
> @@ -193,6 +193,11 @@ fsync_mode=%s          Control the policy of fsync. Currently supports "posix",
>                         non-atomic files likewise "nobarrier" mount option.
>  test_dummy_encryption  Enable dummy encryption, which provides a fake fscrypt
>                         context. The fake fscrypt context is used by xfstests.
> +checkpoint=%s          Set to "disable" to turn off checkpointing. Set to "enable"
> +                       to reenable checkpointing. Is enabled by default. While
> +                       disabled, any unmounting or unexpected shutdowns will cause
> +                       the filesystem contents to appear as they did when the
> +                       filesystem was mounted with that option.
>  
>  ================================================================================
>  DEBUGFS ENTRIES
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 6b8ca5011bfd6..8f32df9b7b8f3 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -1690,6 +1690,19 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
>  
>  	if (test_opt(sbi, LFS))
>  		return true;
> +	if (test_opt(sbi, DISABLE_CHECKPOINT)) {
> +		struct seg_entry *se;
> +		unsigned int segno, offset;
> +
> +		if (!fio || fio->old_blkaddr == NULL_ADDR ||
> +				fio->old_blkaddr == NEW_ADDR)
> +			return true;
> +		segno = GET_SEGNO(sbi, fio->old_blkaddr);
> +		se = get_seg_entry(sbi, segno);
> +		offset = GET_BLKOFF_FROM_SEG0(sbi, fio->old_blkaddr);
> +		if (f2fs_test_bit(offset, se->ckpt_valid_map))
> +			return true;
> +	}
>  	if (S_ISDIR(inode->i_mode))
>  		return true;
>  	if (f2fs_is_atomic_file(inode))
> @@ -1717,10 +1730,13 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio)
>  {
>  	struct page *page = fio->page;
>  	struct inode *inode = page->mapping->host;
> +	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
>  	struct dnode_of_data dn;
>  	struct extent_info ei = {0,0,0};
>  	struct node_info ni;
>  	bool ipu_force = false;
> +	bool need_tmp_grab = test_opt(sbi, DISABLE_CHECKPOINT);
> +	blkcnt_t tmp_block = 1;
>  	int err = 0;
>  
>  	set_new_dnode(&dn, inode, NULL, NULL, 0);
> @@ -1798,6 +1814,11 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio)
>  	if (err)
>  		goto out_writepage;
>  
> +	if (need_tmp_grab) {
> +		err = inc_valid_block_count(sbi, dn.inode, &tmp_block);
> +		if (err)
> +			goto out_writepage;
> +	}
>  	set_page_writeback(page);
>  	ClearPageError(page);
>  
> @@ -1807,6 +1828,8 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio)
>  	set_inode_flag(inode, FI_APPEND_WRITE);
>  	if (page->index == 0)
>  		set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN);
> +	if (need_tmp_grab)
> +		dec_valid_block_count(sbi, dn.inode, tmp_block);
>  out_writepage:
>  	f2fs_put_dnode(&dn);
>  out:
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 0374f069520cf..f594957f9d0cd 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -97,6 +97,7 @@ extern char *f2fs_fault_name[FAULT_MAX];
>  #define F2FS_MOUNT_QUOTA		0x00400000
>  #define F2FS_MOUNT_INLINE_XATTR_SIZE	0x00800000
>  #define F2FS_MOUNT_RESERVE_ROOT		0x01000000
> +#define F2FS_MOUNT_DISABLE_CHECKPOINT	0x02000000
>  
>  #define F2FS_OPTION(sbi)	((sbi)->mount_opt)
>  #define clear_opt(sbi, option)	(F2FS_OPTION(sbi).opt &= ~F2FS_MOUNT_##option)
> @@ -175,6 +176,7 @@ enum {
>  #define	CP_RECOVERY	0x00000008
>  #define	CP_DISCARD	0x00000010
>  #define CP_TRIMMED	0x00000020
> +#define CP_PAUSE	0x00000040
>  
>  #define MAX_DISCARD_BLOCKS(sbi)		BLKS_PER_SEC(sbi)
>  #define DEF_MAX_DISCARD_REQUEST		8	/* issue 8 discards per round */
> @@ -1074,6 +1076,7 @@ enum {
>  	SBI_NEED_CP,				/* need to checkpoint */
>  	SBI_DISABLE_ATOMIC_WRITE,		/* turn off atomic write */
>  	SBI_IS_SHUTDOWN,			/* shutdown by ioctl */
> +	SBI_CP_DISABLED,			/* CP was disabled last mount */
>  };
>  
>  enum {
> @@ -1199,6 +1202,12 @@ struct f2fs_sb_info {
>  	block_t reserved_blocks;		/* configurable reserved blocks */
>  	block_t current_reserved_blocks;	/* current reserved blocks */
>  
> +	/* Additional tracking for no checkpoint mode */
> +	block_t unusable_block_count;		/* # of blocks saved by last cp */
> +	block_t free_ssr_data_block;
> +	block_t free_ssr_node_block;
> +	block_t free_segments;
> +
>  	unsigned int nquota_files;		/* # of quota sysfile */
>  
>  	u32 s_next_generation;			/* for NFS support */
> @@ -1650,7 +1659,7 @@ static inline void f2fs_i_blocks_write(struct inode *, block_t, bool, bool);
>  static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
>  				 struct inode *inode, blkcnt_t *count)
>  {
> -	blkcnt_t diff = 0, release = 0;
> +	blkcnt_t diff = 0, release = 0, seg_diff = 0, seg_rel = 0;
>  	block_t avail_user_block_count;
>  	int ret;
>  
> @@ -1678,6 +1687,8 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
>  
>  	if (!__allow_reserved_blocks(sbi, inode, true))
>  		avail_user_block_count -= F2FS_OPTION(sbi).root_reserved_blocks;
> +	if (test_opt(sbi, DISABLE_CHECKPOINT))
> +		avail_user_block_count -= sbi->unusable_block_count;
>  
>  	if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) {
>  		diff = sbi->total_valid_block_count - avail_user_block_count;
> @@ -1691,6 +1702,35 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
>  			goto enospc;
>  		}
>  	}
> +	if (likely(!test_opt(sbi, DISABLE_CHECKPOINT)))
> +		goto normal;
> +	if (unlikely(*count > sbi->free_ssr_data_block)) {
> +		/* We'll need to pull from free. */
> +		blkcnt_t needed = *count - sbi->free_ssr_data_block;
> +		blkcnt_t new_segs = ((needed - 1) >>
> +					sbi->log_blocks_per_seg) + 1;
> +
> +		/* Check if we have enough free */
> +		if (unlikely(new_segs > sbi->free_segments)) {
> +			seg_diff = new_segs - sbi->free_segments;
> +
> +			seg_rel = ((needed - 1) % sbi->log_blocks_per_seg) + 1;
> +			seg_rel += (seg_diff - 1) << sbi->log_blocks_per_seg;
> +			new_segs -= seg_diff;
> +			*count -= seg_rel;
> +			release += seg_rel;
> +			if (!*count) {
> +				spin_unlock(&sbi->stat_lock);
> +				goto enospc;
> +			}
> +		}
> +
> +		sbi->free_segments -= new_segs;
> +		sbi->free_ssr_data_block += new_segs << sbi->log_blocks_per_seg;
> +
> +	}
> +	sbi->free_ssr_data_block -= *count;
> +normal:
>  	spin_unlock(&sbi->stat_lock);
>  
>  	if (unlikely(release)) {
> @@ -1887,6 +1927,8 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
>  
>  	if (!__allow_reserved_blocks(sbi, inode, false))
>  		valid_block_count += F2FS_OPTION(sbi).root_reserved_blocks;
> +	if (test_opt(sbi, DISABLE_CHECKPOINT))
> +		valid_block_count += sbi->unusable_block_count;
>  
>  	if (unlikely(valid_block_count > sbi->user_block_count)) {
>  		spin_unlock(&sbi->stat_lock);
> @@ -1899,6 +1941,17 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
>  		goto enospc;
>  	}
>  
> +	if (test_opt(sbi, DISABLE_CHECKPOINT)) {
> +		if (unlikely(!sbi->free_ssr_node_block)) {
> +			if (unlikely(!sbi->free_segments)) {
> +				spin_unlock(&sbi->stat_lock);
> +				goto enospc;
> +			}
> +			sbi->free_segments--;
> +		}
> +		sbi->free_ssr_node_block--;
> +	}
> +
>  	sbi->total_valid_node_count++;
>  	sbi->total_valid_block_count++;
>  	spin_unlock(&sbi->stat_lock);
> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> index 3a5c35fa06039..20124f5d287b8 100644
> --- a/fs/f2fs/file.c
> +++ b/fs/f2fs/file.c
> @@ -150,6 +150,9 @@ static inline enum cp_reason_type need_do_checkpoint(struct inode *inode)
>  	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
>  	enum cp_reason_type cp_reason = CP_NO_NEEDED;
>  
> +	if (test_opt(sbi, DISABLE_CHECKPOINT))
> +		return CP_NO_NEEDED;
> +
>  	if (!S_ISREG(inode->i_mode))
>  		cp_reason = CP_NON_REGULAR;
>  	else if (inode->i_nlink != 1)
> @@ -2070,6 +2073,9 @@ static int f2fs_ioc_gc(struct file *filp, unsigned long arg)
>  	if (f2fs_readonly(sbi->sb))
>  		return -EROFS;
>  
> +	if (test_opt(sbi, DISABLE_CHECKPOINT))
> +		return -EINVAL;
> +
>  	ret = mnt_want_write_file(filp);
>  	if (ret)
>  		return ret;
> @@ -2112,6 +2118,9 @@ static int f2fs_ioc_gc_range(struct file *filp, unsigned long arg)
>  		return -EINVAL;
>  	}
>  
> +	if (test_opt(sbi, DISABLE_CHECKPOINT))
> +		return -EINVAL;
> +
>  	ret = mnt_want_write_file(filp);
>  	if (ret)
>  		return ret;
> @@ -2147,6 +2156,12 @@ static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg)
>  	if (f2fs_readonly(sbi->sb))
>  		return -EROFS;
>  
> +	if (test_opt(sbi, DISABLE_CHECKPOINT)) {
> +		f2fs_msg(sbi->sb, KERN_INFO,
> +			"Skipping Checkpoint. Checkpoints currently disabled.");
> +		return -EINVAL;
> +	}
> +
>  	ret = mnt_want_write_file(filp);
>  	if (ret)
>  		return ret;
> @@ -2513,6 +2528,9 @@ static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg)
>  	if (f2fs_readonly(sbi->sb))
>  		return -EROFS;
>  
> +	if (test_opt(sbi, DISABLE_CHECKPOINT))
> +		return -EINVAL;
> +
>  	if (copy_from_user(&range, (struct f2fs_flush_device __user *)arg,
>  							sizeof(range)))
>  		return -EFAULT;
> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> index fe0da3e05ef14..1326f5b6aa99d 100644
> --- a/fs/f2fs/gc.c
> +++ b/fs/f2fs/gc.c
> @@ -60,6 +60,9 @@ static int gc_thread_func(void *data)
>  		}
>  #endif
>  
> +		if (test_opt(sbi, DISABLE_CHECKPOINT))
> +			goto do_balance;
> +
>  		if (!sb_start_write_trylock(sbi->sb))
>  			continue;
>  
> @@ -105,6 +108,7 @@ static int gc_thread_func(void *data)
>  		trace_f2fs_background_gc(sbi->sb, wait_ms,
>  				prefree_segments(sbi), free_segments(sbi));
>  
> +do_balance:
>  		/* balancing f2fs's metadata periodically */
>  		f2fs_balance_fs_bg(sbi);
>  next:
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index 17354089b4ab2..3d5d8098d4ead 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -179,6 +179,10 @@ bool f2fs_need_SSR(struct f2fs_sb_info *sbi)
>  		return false;
>  	if (sbi->gc_mode == GC_URGENT)
>  		return true;
> +	if (test_opt(sbi, DISABLE_CHECKPOINT))
> +		return true;
> +	if (sbi->gc_mode == GC_URGENT)
> +		return true;
>  
>  	return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
>  			SM_I(sbi)->min_ssr_sections + reserved_sections(sbi));
> @@ -485,6 +489,8 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
>  	 * We should do GC or end up with checkpoint, if there are so many dirty
>  	 * dir/node pages without enough free segments.
>  	 */
> +	if (test_opt(sbi, DISABLE_CHECKPOINT))
> +		return;
>  	if (has_not_enough_free_secs(sbi, 0, 0)) {
>  		mutex_lock(&sbi->gc_mutex);
>  		f2fs_gc(sbi, false, false, NULL_SEGNO);
> @@ -525,8 +531,10 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
>  			f2fs_sync_dirty_inodes(sbi, FILE_INODE);
>  			blk_finish_plug(&plug);
>  		}
> -		f2fs_sync_fs(sbi->sb, true);
> -		stat_inc_bg_cp_count(sbi->stat_info);
> +		if (!test_opt(sbi, DISABLE_CHECKPOINT)) {
> +			f2fs_sync_fs(sbi->sb, true);
> +			stat_inc_bg_cp_count(sbi->stat_info);
> +		}
>  	}
>  }
>  
> @@ -741,8 +749,8 @@ int f2fs_flush_device_cache(struct f2fs_sb_info *sbi)
>  	return ret;
>  }
>  
> -static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
> -		enum dirty_type dirty_type)
> +void __locate_dirty_segment(struct f2fs_sb_info *sbi,
> +		unsigned int segno, enum dirty_type dirty_type)
>  {
>  	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
>  
> @@ -766,8 +774,8 @@ static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
>  	}
>  }
>  
> -static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
> -		enum dirty_type dirty_type)
> +void __remove_dirty_segment(struct f2fs_sb_info *sbi,
> +		unsigned int segno, enum dirty_type dirty_type)
>  {
>  	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
>  
> @@ -787,6 +795,7 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
>  	}
>  }
>  
> +
>  /*
>   * Should not occur error such as -ENOMEM.
>   * Adding dirty entry into seglist is not critical operation.
> @@ -795,7 +804,7 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
>  static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
>  {
>  	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
> -	unsigned short valid_blocks;
> +	unsigned short valid_blocks, ckpt_valid_blocks;
>  
>  	if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno))
>  		return;
> @@ -803,8 +812,10 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
>  	mutex_lock(&dirty_i->seglist_lock);
>  
>  	valid_blocks = get_valid_blocks(sbi, segno, false);
> +	ckpt_valid_blocks = get_ckpt_valid_blocks(sbi, segno);
>  
> -	if (valid_blocks == 0) {
> +	if (valid_blocks == 0 && (ckpt_valid_blocks == sbi->blocks_per_seg ||
> +					!test_opt(sbi, DISABLE_CHECKPOINT))) {
>  		__locate_dirty_segment(sbi, segno, PRE);
>  		__remove_dirty_segment(sbi, segno, DIRTY);
>  	} else if (valid_blocks < sbi->blocks_per_seg) {
> @@ -1926,7 +1937,8 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
>  			sbi->discard_blks--;
>  
>  		/* don't overwrite by SSR to keep node chain */
> -		if (IS_NODESEG(se->type)) {
> +		if (IS_NODESEG(se->type) &&
> +				!test_opt(sbi, DISABLE_CHECKPOINT)) {
>  			if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map))
>  				se->ckpt_valid_blocks++;
>  		}
> @@ -1948,6 +1960,25 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
>  			f2fs_bug_on(sbi, 1);
>  			se->valid_blocks++;
>  			del = 0;
> +		} else {
> +			/* If checkpoints are off, we must not reuse data that
> +			 * was used in the previous checkpoint. If it was used
> +			 * before, we must track that to know how much space we
> +			 * really have
> +			 */
> +			if (f2fs_test_bit(offset, se->ckpt_valid_map)) {
> +				spin_lock(&sbi->stat_lock);
> +				sbi->unusable_block_count++;
> +				spin_unlock(&sbi->stat_lock);
> +			} else {
> +				spin_lock(&sbi->stat_lock);
> +				if (IS_DATASEG(se->type))
> +					sbi->free_ssr_data_block++;
> +				else
> +					sbi->free_ssr_node_block++;
> +				spin_unlock(&sbi->stat_lock);
> +			}
> +
>  		}
>  
>  		if (f2fs_discard_en(sbi) &&
> @@ -2237,7 +2268,8 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
>  		return SIT_I(sbi)->last_victim[ALLOC_NEXT];
>  
>  	/* find segments from 0 to reuse freed segments */
> -	if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE)
> +	if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE
> +			|| test_opt(sbi, DISABLE_CHECKPOINT))
>  		return 0;
>  
>  	return CURSEG_I(sbi, type)->segno;
> @@ -2389,7 +2421,8 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
>  	else if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) &&
>  					type == CURSEG_WARM_NODE)
>  		new_curseg(sbi, type, false);
> -	else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type))
> +	else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type) &&
> +			!test_opt(sbi, DISABLE_CHECKPOINT))
>  		new_curseg(sbi, type, false);
>  	else if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type))
>  		change_curseg(sbi, type);
> @@ -3568,6 +3601,9 @@ void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>  			sit_i->dirty_sentries--;
>  			ses->entry_cnt--;
>  		}
> +		spin_lock(&sbi->stat_lock);
> +		sbi->unusable_block_count = 0;
> +		spin_unlock(&sbi->stat_lock);
>  
>  		if (to_journal)
>  			up_write(&curseg->journal_rwsem);
> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
> index 50495515f0a0d..37222e133d52a 100644
> --- a/fs/f2fs/segment.h
> +++ b/fs/f2fs/segment.h
> @@ -342,6 +342,12 @@ static inline unsigned int get_valid_blocks(struct f2fs_sb_info *sbi,
>  		return get_seg_entry(sbi, segno)->valid_blocks;
>  }
>  
> +static inline unsigned int get_ckpt_valid_blocks(struct f2fs_sb_info *sbi,
> +				unsigned int segno)
> +{
> +	return get_seg_entry(sbi, segno)->ckpt_valid_blocks;
> +}
> +
>  static inline void seg_info_from_raw_sit(struct seg_entry *se,
>  					struct f2fs_sit_entry *rs)
>  {
> @@ -524,6 +530,26 @@ static inline unsigned int dirty_segments(struct f2fs_sb_info *sbi)
>  		DIRTY_I(sbi)->nr_dirty[DIRTY_COLD_NODE];
>  }
>  
> +void __locate_dirty_segment(struct f2fs_sb_info *sbi,
> +		unsigned int segno, enum dirty_type dirty_type);
> +
> +void __remove_dirty_segment(struct f2fs_sb_info *sbi,
> +		unsigned int segno, enum dirty_type dirty_type);
> +
> +/* This moves currently empty dirty blocks to prefree. Must hold seglist_lock */
> +static inline void dirty_to_prefree(struct f2fs_sb_info *sbi)
> +{
> +	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
> +	unsigned int segno;
> +
> +	for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
> +		if (!get_valid_blocks(sbi, segno, false)) {
> +			__locate_dirty_segment(sbi, segno, PRE);
> +			__remove_dirty_segment(sbi, segno, DIRTY);
> +		}
> +	}
> +}
> +
>  static inline int overprovision_segments(struct f2fs_sb_info *sbi)
>  {
>  	return SM_I(sbi)->ovp_segments;
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index 7187885da2515..7568ae00319cb 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -132,6 +132,7 @@ enum {
>  	Opt_alloc,
>  	Opt_fsync,
>  	Opt_test_dummy_encryption,
> +	Opt_checkpoint,
>  	Opt_err,
>  };
>  
> @@ -189,6 +190,7 @@ static match_table_t f2fs_tokens = {
>  	{Opt_alloc, "alloc_mode=%s"},
>  	{Opt_fsync, "fsync_mode=%s"},
>  	{Opt_test_dummy_encryption, "test_dummy_encryption"},
> +	{Opt_checkpoint, "checkpoint=%s"},
>  	{Opt_err, NULL},
>  };
>  
> @@ -764,6 +766,23 @@ static int parse_options(struct super_block *sb, char *options)
>  					"Test dummy encryption mount option ignored");
>  #endif
>  			break;
> +		case Opt_checkpoint:
> +			name = match_strdup(&args[0]);
> +			if (!name)
> +				return -ENOMEM;
> +
> +			if (strlen(name) == 6 &&
> +					!strncmp(name, "enable", 6)) {
> +				clear_opt(sbi, DISABLE_CHECKPOINT);
> +			} else if (strlen(name) == 7 &&
> +					!strncmp(name, "disable", 7)) {
> +				set_opt(sbi, DISABLE_CHECKPOINT);
> +			} else {
> +				kfree(name);
> +				return -EINVAL;
> +			}
> +			kfree(name);
> +			break;
>  		default:
>  			f2fs_msg(sb, KERN_ERR,
>  				"Unrecognized mount option \"%s\" or missing value",
> @@ -809,6 +828,12 @@ static int parse_options(struct super_block *sb, char *options)
>  		}
>  	}
>  
> +	if (test_opt(sbi, DISABLE_CHECKPOINT) && test_opt(sbi, LFS)) {
> +		f2fs_msg(sb, KERN_ERR,
> +				"LFS not compatible with checkpoint=disable\n");
> +		return -EINVAL;
> +	}
> +
>  	/* Not pass down write hints if the number of active logs is lesser
>  	 * than NR_CURSEG_TYPE.
>  	 */
> @@ -996,8 +1021,9 @@ static void f2fs_put_super(struct super_block *sb)
>  	 * But, the previous checkpoint was not done by umount, it needs to do
>  	 * clean checkpoint again.
>  	 */
> -	if (is_sbi_flag_set(sbi, SBI_IS_DIRTY) ||
> -			!is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) {
> +	if ((is_sbi_flag_set(sbi, SBI_IS_DIRTY) ||
> +			!is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) &&
> +			!test_opt(sbi, DISABLE_CHECKPOINT)) {
>  		struct cp_control cpc = {
>  			.reason = CP_UMOUNT,
>  		};
> @@ -1007,7 +1033,8 @@ static void f2fs_put_super(struct super_block *sb)
>  	/* be sure to wait for any on-going discard commands */
>  	dropped = f2fs_wait_discard_bios(sbi);
>  
> -	if (f2fs_discard_en(sbi) && !sbi->discard_blks && !dropped) {
> +	if (f2fs_discard_en(sbi) && !sbi->discard_blks && !dropped &&
> +			!test_opt(sbi, DISABLE_CHECKPOINT)) {
>  		struct cp_control cpc = {
>  			.reason = CP_UMOUNT | CP_TRIMMED,
>  		};
> @@ -1064,6 +1091,8 @@ int f2fs_sync_fs(struct super_block *sb, int sync)
>  
>  	if (unlikely(f2fs_cp_error(sbi)))
>  		return 0;
> +	if (test_opt(sbi, DISABLE_CHECKPOINT))
> +		return 0;
>  
>  	trace_f2fs_sync_fs(sb, sync);
>  
> @@ -1162,7 +1191,8 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf)
>  
>  	buf->f_blocks = total_count - start_count;
>  	buf->f_bfree = user_block_count - valid_user_blocks(sbi) -
> -						sbi->current_reserved_blocks;
> +						sbi->current_reserved_blocks -
> +						sbi->unusable_block_count;
>  	if (buf->f_bfree > F2FS_OPTION(sbi).root_reserved_blocks)
>  		buf->f_bavail = buf->f_bfree -
>  				F2FS_OPTION(sbi).root_reserved_blocks;
> @@ -1338,6 +1368,9 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
>  	else if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE)
>  		seq_printf(seq, ",alloc_mode=%s", "reuse");
>  
> +	if (test_opt(sbi, DISABLE_CHECKPOINT))
> +		seq_puts(seq, ",checkpoint=disable");
> +
>  	if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_POSIX)
>  		seq_printf(seq, ",fsync_mode=%s", "posix");
>  	else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT)
> @@ -1365,6 +1398,7 @@ static void default_options(struct f2fs_sb_info *sbi)
>  	set_opt(sbi, INLINE_DENTRY);
>  	set_opt(sbi, EXTENT_CACHE);
>  	set_opt(sbi, NOHEAP);
> +	clear_opt(sbi, DISABLE_CHECKPOINT);
>  	sbi->sb->s_flags |= SB_LAZYTIME;
>  	set_opt(sbi, FLUSH_MERGE);
>  	if (blk_queue_discard(bdev_get_queue(sbi->sb->s_bdev)))
> @@ -1387,6 +1421,60 @@ static void default_options(struct f2fs_sb_info *sbi)
>  #ifdef CONFIG_QUOTA
>  static int f2fs_enable_quotas(struct super_block *sb);
>  #endif
> +
> +static void f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
> +{
> +	struct cp_control cpc;
> +	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
> +	unsigned int segno;
> +	int type;
> +
> +	set_sbi_flag(sbi, SBI_CP_DISABLED);
> +
> +	cpc.reason = CP_PAUSE;
> +
> +	mutex_lock(&sbi->gc_mutex);
> +	f2fs_write_checkpoint(sbi, &cpc);
> +
> +	mutex_lock(&dirty_i->seglist_lock);
> +	for (type = 0; type < NR_CURSEG_TYPE; type++) {
> +		for_each_set_bit(segno, dirty_i->dirty_segmap[type],
> +							MAIN_SEGS(sbi)) {
> +			if (IS_DATASEG(type))
> +				sbi->free_ssr_data_block +=
> +					get_valid_blocks(sbi, segno, false);
> +			else
> +				sbi->free_ssr_node_block +=
> +					get_valid_blocks(sbi, segno, false);
> +		}
> +	}
> +	sbi->free_segments = FREE_I(sbi)->free_segments;
> +	mutex_unlock(&dirty_i->seglist_lock);
> +	mutex_unlock(&sbi->gc_mutex);
> +}
> +
> +static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi)
> +{
> +	struct super_block *sb = sbi->sb;
> +	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
> +
> +	clear_sbi_flag(sbi, SBI_CP_DISABLED);
> +	writeback_inodes_sb(sb, WB_REASON_SYNC);
> +	sync_inodes_sb(sb);
> +
> +	mutex_lock(&dirty_i->seglist_lock);
> +	dirty_to_prefree(sbi);
> +	sbi->free_segments = 0;
> +	sbi->free_ssr_data_block = 0;
> +	sbi->free_ssr_node_block = 0;
> +	mutex_unlock(&dirty_i->seglist_lock);
> +
> +	set_sbi_flag(sbi, SBI_IS_DIRTY);
> +	set_sbi_flag(sbi, SBI_IS_CLOSE);
> +	f2fs_sync_fs(sb, 1);
> +	clear_sbi_flag(sbi, SBI_IS_CLOSE);
> +}
> +
>  static int f2fs_remount(struct super_block *sb, int *flags, char *data)
>  {
>  	struct f2fs_sb_info *sbi = F2FS_SB(sb);
> @@ -1396,6 +1484,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
>  	bool need_restart_gc = false;
>  	bool need_stop_gc = false;
>  	bool no_extent_cache = !test_opt(sbi, EXTENT_CACHE);
> +	bool disable_checkpoint = test_opt(sbi, DISABLE_CHECKPOINT);
> +	bool checkpoint_changed;
>  #ifdef CONFIG_QUOTA
>  	int i, j;
>  #endif
> @@ -1440,6 +1530,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
>  	err = parse_options(sb, data);
>  	if (err)
>  		goto restore_opts;
> +	checkpoint_changed =
> +			disable_checkpoint != test_opt(sbi, DISABLE_CHECKPOINT);
>  
>  	/*
>  	 * Previous and new state of filesystem is RO,
> @@ -1501,6 +1593,13 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
>  		clear_sbi_flag(sbi, SBI_IS_CLOSE);
>  	}
>  
> +	if (checkpoint_changed) {
> +		if (test_opt(sbi, DISABLE_CHECKPOINT))
> +			f2fs_disable_checkpoint(sbi);
> +		else
> +			f2fs_enable_checkpoint(sbi);
> +	}
> +
>  	/*
>  	 * We stop issue flush thread if FS is mounted as RO
>  	 * or if flush_merge is not passed in mount option.
> @@ -2987,7 +3086,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>  		goto free_meta;
>  
>  	/* recover fsynced data */
> -	if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) {
> +	if (!test_opt(sbi, DISABLE_ROLL_FORWARD) &&
> +			!is_sbi_flag_set(sbi, SBI_CP_DISABLED)) {
>  		/*
>  		 * mount should be failed, when device has readonly mode, and
>  		 * previous checkpoint was not done by clean system shutdown.
> @@ -3053,6 +3153,12 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>  				cur_cp_version(F2FS_CKPT(sbi)));
>  	f2fs_update_time(sbi, CP_TIME);
>  	f2fs_update_time(sbi, REQ_TIME);
> +
> +	if (test_opt(sbi, DISABLE_CHECKPOINT))
> +		f2fs_disable_checkpoint(sbi);
> +	else if (is_sbi_flag_set(sbi, SBI_CP_DISABLED))
> +		f2fs_enable_checkpoint(sbi);
> +
>  	return 0;
>  
>  free_meta:
> -- 
> 2.18.0.233.g985f88cf7e-goog
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] f2fs: checkpoint disabling
@ 2018-08-04 22:31   ` Jaegeuk Kim
  0 siblings, 0 replies; 11+ messages in thread
From: Jaegeuk Kim @ 2018-08-04 22:31 UTC (permalink / raw)
  To: Daniel Rosenberg
  Cc: Jonathan Corbet, linux-doc, linux-kernel, linux-f2fs-devel,
	linux-fsdevel, kernel-team

Hi Daniel,

Could you please rebase the patch on top of f2fs/dev-test branch?
I'd like to review the code by playing with a little bit.

Thanks,

On 07/25, Daniel Rosenberg wrote:
> This adds a lightweight non-persistent snapshotting scheme to f2fs.
> 
> To use, mount with the option checkpoint=disable, and to return to
> normal operation, remount with checkpoint=enable. If the filesystem
> is shut down before remounting with checkpoint=enable, it will revert
> back to its apparent state when it was first mounted with
> checkpoint=disable. This is useful for situations where you wish to be
> able to roll back the state of the disk in case of some critical
> failure.
> 
> Signed-off-by: Daniel Rosenberg <drosen@google.com>
> ---
> 
> Included changes suggested by Jaegeuk and Chao.
> it now holds the gc lock around setting up free space tracking in
> f2fs_disable_checkpoint. I wasn't quite sure what was mean by
> the comment on should_update_outplace, but I've changed it to return
> true for NEW_ADDR as well. It may be better to instead skip over the
> reset of that block if old_blkaddr is NEW_ADDR, because I think that
> means the data has not yet been committed, and thus wouldn't be
> overwriting data from the previous checkpoint.
> 
> We're currently using this during updates to extend the period of time
> that we can safely roll back a faulty update. After performing an update
> and reboot, we'd mount in this mode. If there ends up being some sort of
> fatal error, data changes are automatically reverted, allowing us to
> revert to the state before the update without worrying about any incompatible
> changes that may've been made to data while running under the faulty system.
> It increases our ability to revert a faulty update from up to mounting user
> data partitions to a bit farther along.
> 
>  Documentation/filesystems/f2fs.txt |   5 ++
>  fs/f2fs/data.c                     |  23 ++++++
>  fs/f2fs/f2fs.h                     |  55 +++++++++++++-
>  fs/f2fs/file.c                     |  18 +++++
>  fs/f2fs/gc.c                       |   4 +
>  fs/f2fs/segment.c                  |  58 ++++++++++++---
>  fs/f2fs/segment.h                  |  26 +++++++
>  fs/f2fs/super.c                    | 116 +++++++++++++++++++++++++++--
>  8 files changed, 288 insertions(+), 17 deletions(-)
> 
> diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt
> index 69f8de9957397..a026b353a99d4 100644
> --- a/Documentation/filesystems/f2fs.txt
> +++ b/Documentation/filesystems/f2fs.txt
> @@ -193,6 +193,11 @@ fsync_mode=%s          Control the policy of fsync. Currently supports "posix",
>                         non-atomic files likewise "nobarrier" mount option.
>  test_dummy_encryption  Enable dummy encryption, which provides a fake fscrypt
>                         context. The fake fscrypt context is used by xfstests.
> +checkpoint=%s          Set to "disable" to turn off checkpointing. Set to "enable"
> +                       to reenable checkpointing. Is enabled by default. While
> +                       disabled, any unmounting or unexpected shutdowns will cause
> +                       the filesystem contents to appear as they did when the
> +                       filesystem was mounted with that option.
>  
>  ================================================================================
>  DEBUGFS ENTRIES
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 6b8ca5011bfd6..8f32df9b7b8f3 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -1690,6 +1690,19 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
>  
>  	if (test_opt(sbi, LFS))
>  		return true;
> +	if (test_opt(sbi, DISABLE_CHECKPOINT)) {
> +		struct seg_entry *se;
> +		unsigned int segno, offset;
> +
> +		if (!fio || fio->old_blkaddr == NULL_ADDR ||
> +				fio->old_blkaddr == NEW_ADDR)
> +			return true;
> +		segno = GET_SEGNO(sbi, fio->old_blkaddr);
> +		se = get_seg_entry(sbi, segno);
> +		offset = GET_BLKOFF_FROM_SEG0(sbi, fio->old_blkaddr);
> +		if (f2fs_test_bit(offset, se->ckpt_valid_map))
> +			return true;
> +	}
>  	if (S_ISDIR(inode->i_mode))
>  		return true;
>  	if (f2fs_is_atomic_file(inode))
> @@ -1717,10 +1730,13 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio)
>  {
>  	struct page *page = fio->page;
>  	struct inode *inode = page->mapping->host;
> +	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
>  	struct dnode_of_data dn;
>  	struct extent_info ei = {0,0,0};
>  	struct node_info ni;
>  	bool ipu_force = false;
> +	bool need_tmp_grab = test_opt(sbi, DISABLE_CHECKPOINT);
> +	blkcnt_t tmp_block = 1;
>  	int err = 0;
>  
>  	set_new_dnode(&dn, inode, NULL, NULL, 0);
> @@ -1798,6 +1814,11 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio)
>  	if (err)
>  		goto out_writepage;
>  
> +	if (need_tmp_grab) {
> +		err = inc_valid_block_count(sbi, dn.inode, &tmp_block);
> +		if (err)
> +			goto out_writepage;
> +	}
>  	set_page_writeback(page);
>  	ClearPageError(page);
>  
> @@ -1807,6 +1828,8 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio)
>  	set_inode_flag(inode, FI_APPEND_WRITE);
>  	if (page->index == 0)
>  		set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN);
> +	if (need_tmp_grab)
> +		dec_valid_block_count(sbi, dn.inode, tmp_block);
>  out_writepage:
>  	f2fs_put_dnode(&dn);
>  out:
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 0374f069520cf..f594957f9d0cd 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -97,6 +97,7 @@ extern char *f2fs_fault_name[FAULT_MAX];
>  #define F2FS_MOUNT_QUOTA		0x00400000
>  #define F2FS_MOUNT_INLINE_XATTR_SIZE	0x00800000
>  #define F2FS_MOUNT_RESERVE_ROOT		0x01000000
> +#define F2FS_MOUNT_DISABLE_CHECKPOINT	0x02000000
>  
>  #define F2FS_OPTION(sbi)	((sbi)->mount_opt)
>  #define clear_opt(sbi, option)	(F2FS_OPTION(sbi).opt &= ~F2FS_MOUNT_##option)
> @@ -175,6 +176,7 @@ enum {
>  #define	CP_RECOVERY	0x00000008
>  #define	CP_DISCARD	0x00000010
>  #define CP_TRIMMED	0x00000020
> +#define CP_PAUSE	0x00000040
>  
>  #define MAX_DISCARD_BLOCKS(sbi)		BLKS_PER_SEC(sbi)
>  #define DEF_MAX_DISCARD_REQUEST		8	/* issue 8 discards per round */
> @@ -1074,6 +1076,7 @@ enum {
>  	SBI_NEED_CP,				/* need to checkpoint */
>  	SBI_DISABLE_ATOMIC_WRITE,		/* turn off atomic write */
>  	SBI_IS_SHUTDOWN,			/* shutdown by ioctl */
> +	SBI_CP_DISABLED,			/* CP was disabled last mount */
>  };
>  
>  enum {
> @@ -1199,6 +1202,12 @@ struct f2fs_sb_info {
>  	block_t reserved_blocks;		/* configurable reserved blocks */
>  	block_t current_reserved_blocks;	/* current reserved blocks */
>  
> +	/* Additional tracking for no checkpoint mode */
> +	block_t unusable_block_count;		/* # of blocks saved by last cp */
> +	block_t free_ssr_data_block;
> +	block_t free_ssr_node_block;
> +	block_t free_segments;
> +
>  	unsigned int nquota_files;		/* # of quota sysfile */
>  
>  	u32 s_next_generation;			/* for NFS support */
> @@ -1650,7 +1659,7 @@ static inline void f2fs_i_blocks_write(struct inode *, block_t, bool, bool);
>  static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
>  				 struct inode *inode, blkcnt_t *count)
>  {
> -	blkcnt_t diff = 0, release = 0;
> +	blkcnt_t diff = 0, release = 0, seg_diff = 0, seg_rel = 0;
>  	block_t avail_user_block_count;
>  	int ret;
>  
> @@ -1678,6 +1687,8 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
>  
>  	if (!__allow_reserved_blocks(sbi, inode, true))
>  		avail_user_block_count -= F2FS_OPTION(sbi).root_reserved_blocks;
> +	if (test_opt(sbi, DISABLE_CHECKPOINT))
> +		avail_user_block_count -= sbi->unusable_block_count;
>  
>  	if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) {
>  		diff = sbi->total_valid_block_count - avail_user_block_count;
> @@ -1691,6 +1702,35 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
>  			goto enospc;
>  		}
>  	}
> +	if (likely(!test_opt(sbi, DISABLE_CHECKPOINT)))
> +		goto normal;
> +	if (unlikely(*count > sbi->free_ssr_data_block)) {
> +		/* We'll need to pull from free. */
> +		blkcnt_t needed = *count - sbi->free_ssr_data_block;
> +		blkcnt_t new_segs = ((needed - 1) >>
> +					sbi->log_blocks_per_seg) + 1;
> +
> +		/* Check if we have enough free */
> +		if (unlikely(new_segs > sbi->free_segments)) {
> +			seg_diff = new_segs - sbi->free_segments;
> +
> +			seg_rel = ((needed - 1) % sbi->log_blocks_per_seg) + 1;
> +			seg_rel += (seg_diff - 1) << sbi->log_blocks_per_seg;
> +			new_segs -= seg_diff;
> +			*count -= seg_rel;
> +			release += seg_rel;
> +			if (!*count) {
> +				spin_unlock(&sbi->stat_lock);
> +				goto enospc;
> +			}
> +		}
> +
> +		sbi->free_segments -= new_segs;
> +		sbi->free_ssr_data_block += new_segs << sbi->log_blocks_per_seg;
> +
> +	}
> +	sbi->free_ssr_data_block -= *count;
> +normal:
>  	spin_unlock(&sbi->stat_lock);
>  
>  	if (unlikely(release)) {
> @@ -1887,6 +1927,8 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
>  
>  	if (!__allow_reserved_blocks(sbi, inode, false))
>  		valid_block_count += F2FS_OPTION(sbi).root_reserved_blocks;
> +	if (test_opt(sbi, DISABLE_CHECKPOINT))
> +		valid_block_count += sbi->unusable_block_count;
>  
>  	if (unlikely(valid_block_count > sbi->user_block_count)) {
>  		spin_unlock(&sbi->stat_lock);
> @@ -1899,6 +1941,17 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
>  		goto enospc;
>  	}
>  
> +	if (test_opt(sbi, DISABLE_CHECKPOINT)) {
> +		if (unlikely(!sbi->free_ssr_node_block)) {
> +			if (unlikely(!sbi->free_segments)) {
> +				spin_unlock(&sbi->stat_lock);
> +				goto enospc;
> +			}
> +			sbi->free_segments--;
> +		}
> +		sbi->free_ssr_node_block--;
> +	}
> +
>  	sbi->total_valid_node_count++;
>  	sbi->total_valid_block_count++;
>  	spin_unlock(&sbi->stat_lock);
> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> index 3a5c35fa06039..20124f5d287b8 100644
> --- a/fs/f2fs/file.c
> +++ b/fs/f2fs/file.c
> @@ -150,6 +150,9 @@ static inline enum cp_reason_type need_do_checkpoint(struct inode *inode)
>  	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
>  	enum cp_reason_type cp_reason = CP_NO_NEEDED;
>  
> +	if (test_opt(sbi, DISABLE_CHECKPOINT))
> +		return CP_NO_NEEDED;
> +
>  	if (!S_ISREG(inode->i_mode))
>  		cp_reason = CP_NON_REGULAR;
>  	else if (inode->i_nlink != 1)
> @@ -2070,6 +2073,9 @@ static int f2fs_ioc_gc(struct file *filp, unsigned long arg)
>  	if (f2fs_readonly(sbi->sb))
>  		return -EROFS;
>  
> +	if (test_opt(sbi, DISABLE_CHECKPOINT))
> +		return -EINVAL;
> +
>  	ret = mnt_want_write_file(filp);
>  	if (ret)
>  		return ret;
> @@ -2112,6 +2118,9 @@ static int f2fs_ioc_gc_range(struct file *filp, unsigned long arg)
>  		return -EINVAL;
>  	}
>  
> +	if (test_opt(sbi, DISABLE_CHECKPOINT))
> +		return -EINVAL;
> +
>  	ret = mnt_want_write_file(filp);
>  	if (ret)
>  		return ret;
> @@ -2147,6 +2156,12 @@ static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg)
>  	if (f2fs_readonly(sbi->sb))
>  		return -EROFS;
>  
> +	if (test_opt(sbi, DISABLE_CHECKPOINT)) {
> +		f2fs_msg(sbi->sb, KERN_INFO,
> +			"Skipping Checkpoint. Checkpoints currently disabled.");
> +		return -EINVAL;
> +	}
> +
>  	ret = mnt_want_write_file(filp);
>  	if (ret)
>  		return ret;
> @@ -2513,6 +2528,9 @@ static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg)
>  	if (f2fs_readonly(sbi->sb))
>  		return -EROFS;
>  
> +	if (test_opt(sbi, DISABLE_CHECKPOINT))
> +		return -EINVAL;
> +
>  	if (copy_from_user(&range, (struct f2fs_flush_device __user *)arg,
>  							sizeof(range)))
>  		return -EFAULT;
> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> index fe0da3e05ef14..1326f5b6aa99d 100644
> --- a/fs/f2fs/gc.c
> +++ b/fs/f2fs/gc.c
> @@ -60,6 +60,9 @@ static int gc_thread_func(void *data)
>  		}
>  #endif
>  
> +		if (test_opt(sbi, DISABLE_CHECKPOINT))
> +			goto do_balance;
> +
>  		if (!sb_start_write_trylock(sbi->sb))
>  			continue;
>  
> @@ -105,6 +108,7 @@ static int gc_thread_func(void *data)
>  		trace_f2fs_background_gc(sbi->sb, wait_ms,
>  				prefree_segments(sbi), free_segments(sbi));
>  
> +do_balance:
>  		/* balancing f2fs's metadata periodically */
>  		f2fs_balance_fs_bg(sbi);
>  next:
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index 17354089b4ab2..3d5d8098d4ead 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -179,6 +179,10 @@ bool f2fs_need_SSR(struct f2fs_sb_info *sbi)
>  		return false;
>  	if (sbi->gc_mode == GC_URGENT)
>  		return true;
> +	if (test_opt(sbi, DISABLE_CHECKPOINT))
> +		return true;
> +	if (sbi->gc_mode == GC_URGENT)
> +		return true;
>  
>  	return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
>  			SM_I(sbi)->min_ssr_sections + reserved_sections(sbi));
> @@ -485,6 +489,8 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
>  	 * We should do GC or end up with checkpoint, if there are so many dirty
>  	 * dir/node pages without enough free segments.
>  	 */
> +	if (test_opt(sbi, DISABLE_CHECKPOINT))
> +		return;
>  	if (has_not_enough_free_secs(sbi, 0, 0)) {
>  		mutex_lock(&sbi->gc_mutex);
>  		f2fs_gc(sbi, false, false, NULL_SEGNO);
> @@ -525,8 +531,10 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
>  			f2fs_sync_dirty_inodes(sbi, FILE_INODE);
>  			blk_finish_plug(&plug);
>  		}
> -		f2fs_sync_fs(sbi->sb, true);
> -		stat_inc_bg_cp_count(sbi->stat_info);
> +		if (!test_opt(sbi, DISABLE_CHECKPOINT)) {
> +			f2fs_sync_fs(sbi->sb, true);
> +			stat_inc_bg_cp_count(sbi->stat_info);
> +		}
>  	}
>  }
>  
> @@ -741,8 +749,8 @@ int f2fs_flush_device_cache(struct f2fs_sb_info *sbi)
>  	return ret;
>  }
>  
> -static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
> -		enum dirty_type dirty_type)
> +void __locate_dirty_segment(struct f2fs_sb_info *sbi,
> +		unsigned int segno, enum dirty_type dirty_type)
>  {
>  	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
>  
> @@ -766,8 +774,8 @@ static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
>  	}
>  }
>  
> -static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
> -		enum dirty_type dirty_type)
> +void __remove_dirty_segment(struct f2fs_sb_info *sbi,
> +		unsigned int segno, enum dirty_type dirty_type)
>  {
>  	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
>  
> @@ -787,6 +795,7 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
>  	}
>  }
>  
> +
>  /*
>   * Should not occur error such as -ENOMEM.
>   * Adding dirty entry into seglist is not critical operation.
> @@ -795,7 +804,7 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
>  static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
>  {
>  	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
> -	unsigned short valid_blocks;
> +	unsigned short valid_blocks, ckpt_valid_blocks;
>  
>  	if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno))
>  		return;
> @@ -803,8 +812,10 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
>  	mutex_lock(&dirty_i->seglist_lock);
>  
>  	valid_blocks = get_valid_blocks(sbi, segno, false);
> +	ckpt_valid_blocks = get_ckpt_valid_blocks(sbi, segno);
>  
> -	if (valid_blocks == 0) {
> +	if (valid_blocks == 0 && (ckpt_valid_blocks == sbi->blocks_per_seg ||
> +					!test_opt(sbi, DISABLE_CHECKPOINT))) {
>  		__locate_dirty_segment(sbi, segno, PRE);
>  		__remove_dirty_segment(sbi, segno, DIRTY);
>  	} else if (valid_blocks < sbi->blocks_per_seg) {
> @@ -1926,7 +1937,8 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
>  			sbi->discard_blks--;
>  
>  		/* don't overwrite by SSR to keep node chain */
> -		if (IS_NODESEG(se->type)) {
> +		if (IS_NODESEG(se->type) &&
> +				!test_opt(sbi, DISABLE_CHECKPOINT)) {
>  			if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map))
>  				se->ckpt_valid_blocks++;
>  		}
> @@ -1948,6 +1960,25 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
>  			f2fs_bug_on(sbi, 1);
>  			se->valid_blocks++;
>  			del = 0;
> +		} else {
> +			/* If checkpoints are off, we must not reuse data that
> +			 * was used in the previous checkpoint. If it was used
> +			 * before, we must track that to know how much space we
> +			 * really have
> +			 */
> +			if (f2fs_test_bit(offset, se->ckpt_valid_map)) {
> +				spin_lock(&sbi->stat_lock);
> +				sbi->unusable_block_count++;
> +				spin_unlock(&sbi->stat_lock);
> +			} else {
> +				spin_lock(&sbi->stat_lock);
> +				if (IS_DATASEG(se->type))
> +					sbi->free_ssr_data_block++;
> +				else
> +					sbi->free_ssr_node_block++;
> +				spin_unlock(&sbi->stat_lock);
> +			}
> +
>  		}
>  
>  		if (f2fs_discard_en(sbi) &&
> @@ -2237,7 +2268,8 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
>  		return SIT_I(sbi)->last_victim[ALLOC_NEXT];
>  
>  	/* find segments from 0 to reuse freed segments */
> -	if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE)
> +	if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE
> +			|| test_opt(sbi, DISABLE_CHECKPOINT))
>  		return 0;
>  
>  	return CURSEG_I(sbi, type)->segno;
> @@ -2389,7 +2421,8 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
>  	else if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) &&
>  					type == CURSEG_WARM_NODE)
>  		new_curseg(sbi, type, false);
> -	else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type))
> +	else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type) &&
> +			!test_opt(sbi, DISABLE_CHECKPOINT))
>  		new_curseg(sbi, type, false);
>  	else if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type))
>  		change_curseg(sbi, type);
> @@ -3568,6 +3601,9 @@ void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>  			sit_i->dirty_sentries--;
>  			ses->entry_cnt--;
>  		}
> +		spin_lock(&sbi->stat_lock);
> +		sbi->unusable_block_count = 0;
> +		spin_unlock(&sbi->stat_lock);
>  
>  		if (to_journal)
>  			up_write(&curseg->journal_rwsem);
> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
> index 50495515f0a0d..37222e133d52a 100644
> --- a/fs/f2fs/segment.h
> +++ b/fs/f2fs/segment.h
> @@ -342,6 +342,12 @@ static inline unsigned int get_valid_blocks(struct f2fs_sb_info *sbi,
>  		return get_seg_entry(sbi, segno)->valid_blocks;
>  }
>  
> +static inline unsigned int get_ckpt_valid_blocks(struct f2fs_sb_info *sbi,
> +				unsigned int segno)
> +{
> +	return get_seg_entry(sbi, segno)->ckpt_valid_blocks;
> +}
> +
>  static inline void seg_info_from_raw_sit(struct seg_entry *se,
>  					struct f2fs_sit_entry *rs)
>  {
> @@ -524,6 +530,26 @@ static inline unsigned int dirty_segments(struct f2fs_sb_info *sbi)
>  		DIRTY_I(sbi)->nr_dirty[DIRTY_COLD_NODE];
>  }
>  
> +void __locate_dirty_segment(struct f2fs_sb_info *sbi,
> +		unsigned int segno, enum dirty_type dirty_type);
> +
> +void __remove_dirty_segment(struct f2fs_sb_info *sbi,
> +		unsigned int segno, enum dirty_type dirty_type);
> +
> +/* This moves currently empty dirty blocks to prefree. Must hold seglist_lock */
> +static inline void dirty_to_prefree(struct f2fs_sb_info *sbi)
> +{
> +	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
> +	unsigned int segno;
> +
> +	for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
> +		if (!get_valid_blocks(sbi, segno, false)) {
> +			__locate_dirty_segment(sbi, segno, PRE);
> +			__remove_dirty_segment(sbi, segno, DIRTY);
> +		}
> +	}
> +}
> +
>  static inline int overprovision_segments(struct f2fs_sb_info *sbi)
>  {
>  	return SM_I(sbi)->ovp_segments;
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index 7187885da2515..7568ae00319cb 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -132,6 +132,7 @@ enum {
>  	Opt_alloc,
>  	Opt_fsync,
>  	Opt_test_dummy_encryption,
> +	Opt_checkpoint,
>  	Opt_err,
>  };
>  
> @@ -189,6 +190,7 @@ static match_table_t f2fs_tokens = {
>  	{Opt_alloc, "alloc_mode=%s"},
>  	{Opt_fsync, "fsync_mode=%s"},
>  	{Opt_test_dummy_encryption, "test_dummy_encryption"},
> +	{Opt_checkpoint, "checkpoint=%s"},
>  	{Opt_err, NULL},
>  };
>  
> @@ -764,6 +766,23 @@ static int parse_options(struct super_block *sb, char *options)
>  					"Test dummy encryption mount option ignored");
>  #endif
>  			break;
> +		case Opt_checkpoint:
> +			name = match_strdup(&args[0]);
> +			if (!name)
> +				return -ENOMEM;
> +
> +			if (strlen(name) == 6 &&
> +					!strncmp(name, "enable", 6)) {
> +				clear_opt(sbi, DISABLE_CHECKPOINT);
> +			} else if (strlen(name) == 7 &&
> +					!strncmp(name, "disable", 7)) {
> +				set_opt(sbi, DISABLE_CHECKPOINT);
> +			} else {
> +				kfree(name);
> +				return -EINVAL;
> +			}
> +			kfree(name);
> +			break;
>  		default:
>  			f2fs_msg(sb, KERN_ERR,
>  				"Unrecognized mount option \"%s\" or missing value",
> @@ -809,6 +828,12 @@ static int parse_options(struct super_block *sb, char *options)
>  		}
>  	}
>  
> +	if (test_opt(sbi, DISABLE_CHECKPOINT) && test_opt(sbi, LFS)) {
> +		f2fs_msg(sb, KERN_ERR,
> +				"LFS not compatible with checkpoint=disable\n");
> +		return -EINVAL;
> +	}
> +
>  	/* Not pass down write hints if the number of active logs is lesser
>  	 * than NR_CURSEG_TYPE.
>  	 */
> @@ -996,8 +1021,9 @@ static void f2fs_put_super(struct super_block *sb)
>  	 * But, the previous checkpoint was not done by umount, it needs to do
>  	 * clean checkpoint again.
>  	 */
> -	if (is_sbi_flag_set(sbi, SBI_IS_DIRTY) ||
> -			!is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) {
> +	if ((is_sbi_flag_set(sbi, SBI_IS_DIRTY) ||
> +			!is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) &&
> +			!test_opt(sbi, DISABLE_CHECKPOINT)) {
>  		struct cp_control cpc = {
>  			.reason = CP_UMOUNT,
>  		};
> @@ -1007,7 +1033,8 @@ static void f2fs_put_super(struct super_block *sb)
>  	/* be sure to wait for any on-going discard commands */
>  	dropped = f2fs_wait_discard_bios(sbi);
>  
> -	if (f2fs_discard_en(sbi) && !sbi->discard_blks && !dropped) {
> +	if (f2fs_discard_en(sbi) && !sbi->discard_blks && !dropped &&
> +			!test_opt(sbi, DISABLE_CHECKPOINT)) {
>  		struct cp_control cpc = {
>  			.reason = CP_UMOUNT | CP_TRIMMED,
>  		};
> @@ -1064,6 +1091,8 @@ int f2fs_sync_fs(struct super_block *sb, int sync)
>  
>  	if (unlikely(f2fs_cp_error(sbi)))
>  		return 0;
> +	if (test_opt(sbi, DISABLE_CHECKPOINT))
> +		return 0;
>  
>  	trace_f2fs_sync_fs(sb, sync);
>  
> @@ -1162,7 +1191,8 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf)
>  
>  	buf->f_blocks = total_count - start_count;
>  	buf->f_bfree = user_block_count - valid_user_blocks(sbi) -
> -						sbi->current_reserved_blocks;
> +						sbi->current_reserved_blocks -
> +						sbi->unusable_block_count;
>  	if (buf->f_bfree > F2FS_OPTION(sbi).root_reserved_blocks)
>  		buf->f_bavail = buf->f_bfree -
>  				F2FS_OPTION(sbi).root_reserved_blocks;
> @@ -1338,6 +1368,9 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
>  	else if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE)
>  		seq_printf(seq, ",alloc_mode=%s", "reuse");
>  
> +	if (test_opt(sbi, DISABLE_CHECKPOINT))
> +		seq_puts(seq, ",checkpoint=disable");
> +
>  	if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_POSIX)
>  		seq_printf(seq, ",fsync_mode=%s", "posix");
>  	else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT)
> @@ -1365,6 +1398,7 @@ static void default_options(struct f2fs_sb_info *sbi)
>  	set_opt(sbi, INLINE_DENTRY);
>  	set_opt(sbi, EXTENT_CACHE);
>  	set_opt(sbi, NOHEAP);
> +	clear_opt(sbi, DISABLE_CHECKPOINT);
>  	sbi->sb->s_flags |= SB_LAZYTIME;
>  	set_opt(sbi, FLUSH_MERGE);
>  	if (blk_queue_discard(bdev_get_queue(sbi->sb->s_bdev)))
> @@ -1387,6 +1421,60 @@ static void default_options(struct f2fs_sb_info *sbi)
>  #ifdef CONFIG_QUOTA
>  static int f2fs_enable_quotas(struct super_block *sb);
>  #endif
> +
> +static void f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
> +{
> +	struct cp_control cpc;
> +	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
> +	unsigned int segno;
> +	int type;
> +
> +	set_sbi_flag(sbi, SBI_CP_DISABLED);
> +
> +	cpc.reason = CP_PAUSE;
> +
> +	mutex_lock(&sbi->gc_mutex);
> +	f2fs_write_checkpoint(sbi, &cpc);
> +
> +	mutex_lock(&dirty_i->seglist_lock);
> +	for (type = 0; type < NR_CURSEG_TYPE; type++) {
> +		for_each_set_bit(segno, dirty_i->dirty_segmap[type],
> +							MAIN_SEGS(sbi)) {
> +			if (IS_DATASEG(type))
> +				sbi->free_ssr_data_block +=
> +					get_valid_blocks(sbi, segno, false);
> +			else
> +				sbi->free_ssr_node_block +=
> +					get_valid_blocks(sbi, segno, false);
> +		}
> +	}
> +	sbi->free_segments = FREE_I(sbi)->free_segments;
> +	mutex_unlock(&dirty_i->seglist_lock);
> +	mutex_unlock(&sbi->gc_mutex);
> +}
> +
> +static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi)
> +{
> +	struct super_block *sb = sbi->sb;
> +	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
> +
> +	clear_sbi_flag(sbi, SBI_CP_DISABLED);
> +	writeback_inodes_sb(sb, WB_REASON_SYNC);
> +	sync_inodes_sb(sb);
> +
> +	mutex_lock(&dirty_i->seglist_lock);
> +	dirty_to_prefree(sbi);
> +	sbi->free_segments = 0;
> +	sbi->free_ssr_data_block = 0;
> +	sbi->free_ssr_node_block = 0;
> +	mutex_unlock(&dirty_i->seglist_lock);
> +
> +	set_sbi_flag(sbi, SBI_IS_DIRTY);
> +	set_sbi_flag(sbi, SBI_IS_CLOSE);
> +	f2fs_sync_fs(sb, 1);
> +	clear_sbi_flag(sbi, SBI_IS_CLOSE);
> +}
> +
>  static int f2fs_remount(struct super_block *sb, int *flags, char *data)
>  {
>  	struct f2fs_sb_info *sbi = F2FS_SB(sb);
> @@ -1396,6 +1484,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
>  	bool need_restart_gc = false;
>  	bool need_stop_gc = false;
>  	bool no_extent_cache = !test_opt(sbi, EXTENT_CACHE);
> +	bool disable_checkpoint = test_opt(sbi, DISABLE_CHECKPOINT);
> +	bool checkpoint_changed;
>  #ifdef CONFIG_QUOTA
>  	int i, j;
>  #endif
> @@ -1440,6 +1530,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
>  	err = parse_options(sb, data);
>  	if (err)
>  		goto restore_opts;
> +	checkpoint_changed =
> +			disable_checkpoint != test_opt(sbi, DISABLE_CHECKPOINT);
>  
>  	/*
>  	 * Previous and new state of filesystem is RO,
> @@ -1501,6 +1593,13 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
>  		clear_sbi_flag(sbi, SBI_IS_CLOSE);
>  	}
>  
> +	if (checkpoint_changed) {
> +		if (test_opt(sbi, DISABLE_CHECKPOINT))
> +			f2fs_disable_checkpoint(sbi);
> +		else
> +			f2fs_enable_checkpoint(sbi);
> +	}
> +
>  	/*
>  	 * We stop issue flush thread if FS is mounted as RO
>  	 * or if flush_merge is not passed in mount option.
> @@ -2987,7 +3086,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>  		goto free_meta;
>  
>  	/* recover fsynced data */
> -	if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) {
> +	if (!test_opt(sbi, DISABLE_ROLL_FORWARD) &&
> +			!is_sbi_flag_set(sbi, SBI_CP_DISABLED)) {
>  		/*
>  		 * mount should be failed, when device has readonly mode, and
>  		 * previous checkpoint was not done by clean system shutdown.
> @@ -3053,6 +3153,12 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>  				cur_cp_version(F2FS_CKPT(sbi)));
>  	f2fs_update_time(sbi, CP_TIME);
>  	f2fs_update_time(sbi, REQ_TIME);
> +
> +	if (test_opt(sbi, DISABLE_CHECKPOINT))
> +		f2fs_disable_checkpoint(sbi);
> +	else if (is_sbi_flag_set(sbi, SBI_CP_DISABLED))
> +		f2fs_enable_checkpoint(sbi);
> +
>  	return 0;
>  
>  free_meta:
> -- 
> 2.18.0.233.g985f88cf7e-goog

------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] f2fs: checkpoint disabling
  2018-07-26  0:19 ` Daniel Rosenberg
@ 2018-07-28  0:19   ` kbuild test robot
  -1 siblings, 0 replies; 11+ messages in thread
From: kbuild test robot @ 2018-07-28  0:19 UTC (permalink / raw)
  To: Daniel Rosenberg
  Cc: kbuild-all, Jaegeuk Kim, Chao Yu, Jonathan Corbet,
	linux-f2fs-devel, linux-kernel, linux-doc, linux-fsdevel,
	kernel-team, Daniel Rosenberg

[-- Attachment #1: Type: text/plain, Size: 877 bytes --]

Hi Daniel,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on f2fs/dev-test]
[cannot apply to v4.18-rc6 next-20180727]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Daniel-Rosenberg/f2fs-checkpoint-disabling/20180728-035921
base:   https://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git dev-test
config: i386-randconfig-a0-201829 (attached as .config)
compiler: gcc-4.9 (Debian 4.9.4-2) 4.9.4
reproduce:
        # save the attached .config to linux build tree
        make ARCH=i386 

All errors (new ones prefixed by >>):

>> ERROR: "__umoddi3" [fs/f2fs/f2fs.ko] undefined!

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 28736 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] f2fs: checkpoint disabling
@ 2018-07-28  0:19   ` kbuild test robot
  0 siblings, 0 replies; 11+ messages in thread
From: kbuild test robot @ 2018-07-28  0:19 UTC (permalink / raw)
  Cc: kbuild-all, Jaegeuk Kim, Chao Yu, Jonathan Corbet,
	linux-f2fs-devel, linux-kernel, linux-doc, linux-fsdevel,
	kernel-team, Daniel Rosenberg

[-- Attachment #1: Type: text/plain, Size: 877 bytes --]

Hi Daniel,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on f2fs/dev-test]
[cannot apply to v4.18-rc6 next-20180727]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Daniel-Rosenberg/f2fs-checkpoint-disabling/20180728-035921
base:   https://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git dev-test
config: i386-randconfig-a0-201829 (attached as .config)
compiler: gcc-4.9 (Debian 4.9.4-2) 4.9.4
reproduce:
        # save the attached .config to linux build tree
        make ARCH=i386 

All errors (new ones prefixed by >>):

>> ERROR: "__umoddi3" [fs/f2fs/f2fs.ko] undefined!

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 28736 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] f2fs: checkpoint disabling
  2018-07-26  0:19 ` Daniel Rosenberg
@ 2018-07-27 23:22   ` kbuild test robot
  -1 siblings, 0 replies; 11+ messages in thread
From: kbuild test robot @ 2018-07-27 23:22 UTC (permalink / raw)
  To: Daniel Rosenberg
  Cc: kbuild-all, Jaegeuk Kim, Chao Yu, Jonathan Corbet,
	linux-f2fs-devel, linux-kernel, linux-doc, linux-fsdevel,
	kernel-team, Daniel Rosenberg

[-- Attachment #1: Type: text/plain, Size: 4264 bytes --]

Hi Daniel,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on f2fs/dev-test]
[cannot apply to v4.18-rc6 next-20180727]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Daniel-Rosenberg/f2fs-checkpoint-disabling/20180728-035921
base:   https://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git dev-test
config: i386-randconfig-x0-07280409 (attached as .config)
compiler: gcc-5 (Debian 5.5.0-3) 5.4.1 20171010
reproduce:
        # save the attached .config to linux build tree
        make ARCH=i386 

All errors (new ones prefixed by >>):

   fs/f2fs/data.o: In function `inc_valid_block_count':
>> fs/f2fs/f2fs.h:1717: undefined reference to `__umoddi3'

vim +1717 fs/f2fs/f2fs.h

  1657	
  1658	static inline void f2fs_i_blocks_write(struct inode *, block_t, bool, bool);
  1659	static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
  1660					 struct inode *inode, blkcnt_t *count)
  1661	{
  1662		blkcnt_t diff = 0, release = 0, seg_diff = 0, seg_rel = 0;
  1663		block_t avail_user_block_count;
  1664		int ret;
  1665	
  1666		ret = dquot_reserve_block(inode, *count);
  1667		if (ret)
  1668			return ret;
  1669	
  1670	#ifdef CONFIG_F2FS_FAULT_INJECTION
  1671		if (time_to_inject(sbi, FAULT_BLOCK)) {
  1672			f2fs_show_injection_info(FAULT_BLOCK);
  1673			release = *count;
  1674			goto enospc;
  1675		}
  1676	#endif
  1677		/*
  1678		 * let's increase this in prior to actual block count change in order
  1679		 * for f2fs_sync_file to avoid data races when deciding checkpoint.
  1680		 */
  1681		percpu_counter_add(&sbi->alloc_valid_block_count, (*count));
  1682	
  1683		spin_lock(&sbi->stat_lock);
  1684		sbi->total_valid_block_count += (block_t)(*count);
  1685		avail_user_block_count = sbi->user_block_count -
  1686						sbi->current_reserved_blocks;
  1687	
  1688		if (!__allow_reserved_blocks(sbi, inode, true))
  1689			avail_user_block_count -= F2FS_OPTION(sbi).root_reserved_blocks;
  1690		if (test_opt(sbi, DISABLE_CHECKPOINT))
  1691			avail_user_block_count -= sbi->unusable_block_count;
  1692	
  1693		if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) {
  1694			diff = sbi->total_valid_block_count - avail_user_block_count;
  1695			if (diff > *count)
  1696				diff = *count;
  1697			*count -= diff;
  1698			release = diff;
  1699			sbi->total_valid_block_count -= diff;
  1700			if (!*count) {
  1701				spin_unlock(&sbi->stat_lock);
  1702				goto enospc;
  1703			}
  1704		}
  1705		if (likely(!test_opt(sbi, DISABLE_CHECKPOINT)))
  1706			goto normal;
  1707		if (unlikely(*count > sbi->free_ssr_data_block)) {
  1708			/* We'll need to pull from free. */
  1709			blkcnt_t needed = *count - sbi->free_ssr_data_block;
  1710			blkcnt_t new_segs = ((needed - 1) >>
  1711						sbi->log_blocks_per_seg) + 1;
  1712	
  1713			/* Check if we have enough free */
  1714			if (unlikely(new_segs > sbi->free_segments)) {
  1715				seg_diff = new_segs - sbi->free_segments;
  1716	
> 1717				seg_rel = ((needed - 1) % sbi->log_blocks_per_seg) + 1;
  1718				seg_rel += (seg_diff - 1) << sbi->log_blocks_per_seg;
  1719				new_segs -= seg_diff;
  1720				*count -= seg_rel;
  1721				release += seg_rel;
  1722				if (!*count) {
  1723					spin_unlock(&sbi->stat_lock);
  1724					goto enospc;
  1725				}
  1726			}
  1727	
  1728			sbi->free_segments -= new_segs;
  1729			sbi->free_ssr_data_block += new_segs << sbi->log_blocks_per_seg;
  1730	
  1731		}
  1732		sbi->free_ssr_data_block -= *count;
  1733	normal:
  1734		spin_unlock(&sbi->stat_lock);
  1735	
  1736		if (unlikely(release)) {
  1737			percpu_counter_sub(&sbi->alloc_valid_block_count, release);
  1738			dquot_release_reservation_block(inode, release);
  1739		}
  1740		f2fs_i_blocks_write(inode, *count, true, true);
  1741		return 0;
  1742	
  1743	enospc:
  1744		percpu_counter_sub(&sbi->alloc_valid_block_count, release);
  1745		dquot_release_reservation_block(inode, release);
  1746		return -ENOSPC;
  1747	}
  1748	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 30480 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] f2fs: checkpoint disabling
@ 2018-07-27 23:22   ` kbuild test robot
  0 siblings, 0 replies; 11+ messages in thread
From: kbuild test robot @ 2018-07-27 23:22 UTC (permalink / raw)
  Cc: kbuild-all, Jaegeuk Kim, Chao Yu, Jonathan Corbet,
	linux-f2fs-devel, linux-kernel, linux-doc, linux-fsdevel,
	kernel-team, Daniel Rosenberg

[-- Attachment #1: Type: text/plain, Size: 4264 bytes --]

Hi Daniel,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on f2fs/dev-test]
[cannot apply to v4.18-rc6 next-20180727]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Daniel-Rosenberg/f2fs-checkpoint-disabling/20180728-035921
base:   https://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git dev-test
config: i386-randconfig-x0-07280409 (attached as .config)
compiler: gcc-5 (Debian 5.5.0-3) 5.4.1 20171010
reproduce:
        # save the attached .config to linux build tree
        make ARCH=i386 

All errors (new ones prefixed by >>):

   fs/f2fs/data.o: In function `inc_valid_block_count':
>> fs/f2fs/f2fs.h:1717: undefined reference to `__umoddi3'

vim +1717 fs/f2fs/f2fs.h

  1657	
  1658	static inline void f2fs_i_blocks_write(struct inode *, block_t, bool, bool);
  1659	static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
  1660					 struct inode *inode, blkcnt_t *count)
  1661	{
  1662		blkcnt_t diff = 0, release = 0, seg_diff = 0, seg_rel = 0;
  1663		block_t avail_user_block_count;
  1664		int ret;
  1665	
  1666		ret = dquot_reserve_block(inode, *count);
  1667		if (ret)
  1668			return ret;
  1669	
  1670	#ifdef CONFIG_F2FS_FAULT_INJECTION
  1671		if (time_to_inject(sbi, FAULT_BLOCK)) {
  1672			f2fs_show_injection_info(FAULT_BLOCK);
  1673			release = *count;
  1674			goto enospc;
  1675		}
  1676	#endif
  1677		/*
  1678		 * let's increase this in prior to actual block count change in order
  1679		 * for f2fs_sync_file to avoid data races when deciding checkpoint.
  1680		 */
  1681		percpu_counter_add(&sbi->alloc_valid_block_count, (*count));
  1682	
  1683		spin_lock(&sbi->stat_lock);
  1684		sbi->total_valid_block_count += (block_t)(*count);
  1685		avail_user_block_count = sbi->user_block_count -
  1686						sbi->current_reserved_blocks;
  1687	
  1688		if (!__allow_reserved_blocks(sbi, inode, true))
  1689			avail_user_block_count -= F2FS_OPTION(sbi).root_reserved_blocks;
  1690		if (test_opt(sbi, DISABLE_CHECKPOINT))
  1691			avail_user_block_count -= sbi->unusable_block_count;
  1692	
  1693		if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) {
  1694			diff = sbi->total_valid_block_count - avail_user_block_count;
  1695			if (diff > *count)
  1696				diff = *count;
  1697			*count -= diff;
  1698			release = diff;
  1699			sbi->total_valid_block_count -= diff;
  1700			if (!*count) {
  1701				spin_unlock(&sbi->stat_lock);
  1702				goto enospc;
  1703			}
  1704		}
  1705		if (likely(!test_opt(sbi, DISABLE_CHECKPOINT)))
  1706			goto normal;
  1707		if (unlikely(*count > sbi->free_ssr_data_block)) {
  1708			/* We'll need to pull from free. */
  1709			blkcnt_t needed = *count - sbi->free_ssr_data_block;
  1710			blkcnt_t new_segs = ((needed - 1) >>
  1711						sbi->log_blocks_per_seg) + 1;
  1712	
  1713			/* Check if we have enough free */
  1714			if (unlikely(new_segs > sbi->free_segments)) {
  1715				seg_diff = new_segs - sbi->free_segments;
  1716	
> 1717				seg_rel = ((needed - 1) % sbi->log_blocks_per_seg) + 1;
  1718				seg_rel += (seg_diff - 1) << sbi->log_blocks_per_seg;
  1719				new_segs -= seg_diff;
  1720				*count -= seg_rel;
  1721				release += seg_rel;
  1722				if (!*count) {
  1723					spin_unlock(&sbi->stat_lock);
  1724					goto enospc;
  1725				}
  1726			}
  1727	
  1728			sbi->free_segments -= new_segs;
  1729			sbi->free_ssr_data_block += new_segs << sbi->log_blocks_per_seg;
  1730	
  1731		}
  1732		sbi->free_ssr_data_block -= *count;
  1733	normal:
  1734		spin_unlock(&sbi->stat_lock);
  1735	
  1736		if (unlikely(release)) {
  1737			percpu_counter_sub(&sbi->alloc_valid_block_count, release);
  1738			dquot_release_reservation_block(inode, release);
  1739		}
  1740		f2fs_i_blocks_write(inode, *count, true, true);
  1741		return 0;
  1742	
  1743	enospc:
  1744		percpu_counter_sub(&sbi->alloc_valid_block_count, release);
  1745		dquot_release_reservation_block(inode, release);
  1746		return -ENOSPC;
  1747	}
  1748	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 30480 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v2] f2fs: checkpoint disabling
@ 2018-07-26  0:19 ` Daniel Rosenberg
  0 siblings, 0 replies; 11+ messages in thread
From: Daniel Rosenberg @ 2018-07-26  0:19 UTC (permalink / raw)
  To: Jaegeuk Kim, Chao Yu, Jonathan Corbet, linux-f2fs-devel
  Cc: linux-kernel, linux-doc, linux-fsdevel, kernel-team, Daniel Rosenberg

This adds a lightweight non-persistent snapshotting scheme to f2fs.

To use, mount with the option checkpoint=disable, and to return to
normal operation, remount with checkpoint=enable. If the filesystem
is shut down before remounting with checkpoint=enable, it will revert
back to its apparent state when it was first mounted with
checkpoint=disable. This is useful for situations where you wish to be
able to roll back the state of the disk in case of some critical
failure.

Signed-off-by: Daniel Rosenberg <drosen@google.com>
---

Included changes suggested by Jaegeuk and Chao.
it now holds the gc lock around setting up free space tracking in
f2fs_disable_checkpoint. I wasn't quite sure what was mean by
the comment on should_update_outplace, but I've changed it to return
true for NEW_ADDR as well. It may be better to instead skip over the
reset of that block if old_blkaddr is NEW_ADDR, because I think that
means the data has not yet been committed, and thus wouldn't be
overwriting data from the previous checkpoint.

We're currently using this during updates to extend the period of time
that we can safely roll back a faulty update. After performing an update
and reboot, we'd mount in this mode. If there ends up being some sort of
fatal error, data changes are automatically reverted, allowing us to
revert to the state before the update without worrying about any incompatible
changes that may've been made to data while running under the faulty system.
It increases our ability to revert a faulty update from up to mounting user
data partitions to a bit farther along.

 Documentation/filesystems/f2fs.txt |   5 ++
 fs/f2fs/data.c                     |  23 ++++++
 fs/f2fs/f2fs.h                     |  55 +++++++++++++-
 fs/f2fs/file.c                     |  18 +++++
 fs/f2fs/gc.c                       |   4 +
 fs/f2fs/segment.c                  |  58 ++++++++++++---
 fs/f2fs/segment.h                  |  26 +++++++
 fs/f2fs/super.c                    | 116 +++++++++++++++++++++++++++--
 8 files changed, 288 insertions(+), 17 deletions(-)

diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt
index 69f8de9957397..a026b353a99d4 100644
--- a/Documentation/filesystems/f2fs.txt
+++ b/Documentation/filesystems/f2fs.txt
@@ -193,6 +193,11 @@ fsync_mode=%s          Control the policy of fsync. Currently supports "posix",
                        non-atomic files likewise "nobarrier" mount option.
 test_dummy_encryption  Enable dummy encryption, which provides a fake fscrypt
                        context. The fake fscrypt context is used by xfstests.
+checkpoint=%s          Set to "disable" to turn off checkpointing. Set to "enable"
+                       to reenable checkpointing. Is enabled by default. While
+                       disabled, any unmounting or unexpected shutdowns will cause
+                       the filesystem contents to appear as they did when the
+                       filesystem was mounted with that option.
 
 ================================================================================
 DEBUGFS ENTRIES
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 6b8ca5011bfd6..8f32df9b7b8f3 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1690,6 +1690,19 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
 
 	if (test_opt(sbi, LFS))
 		return true;
+	if (test_opt(sbi, DISABLE_CHECKPOINT)) {
+		struct seg_entry *se;
+		unsigned int segno, offset;
+
+		if (!fio || fio->old_blkaddr == NULL_ADDR ||
+				fio->old_blkaddr == NEW_ADDR)
+			return true;
+		segno = GET_SEGNO(sbi, fio->old_blkaddr);
+		se = get_seg_entry(sbi, segno);
+		offset = GET_BLKOFF_FROM_SEG0(sbi, fio->old_blkaddr);
+		if (f2fs_test_bit(offset, se->ckpt_valid_map))
+			return true;
+	}
 	if (S_ISDIR(inode->i_mode))
 		return true;
 	if (f2fs_is_atomic_file(inode))
@@ -1717,10 +1730,13 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio)
 {
 	struct page *page = fio->page;
 	struct inode *inode = page->mapping->host;
+	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 	struct dnode_of_data dn;
 	struct extent_info ei = {0,0,0};
 	struct node_info ni;
 	bool ipu_force = false;
+	bool need_tmp_grab = test_opt(sbi, DISABLE_CHECKPOINT);
+	blkcnt_t tmp_block = 1;
 	int err = 0;
 
 	set_new_dnode(&dn, inode, NULL, NULL, 0);
@@ -1798,6 +1814,11 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio)
 	if (err)
 		goto out_writepage;
 
+	if (need_tmp_grab) {
+		err = inc_valid_block_count(sbi, dn.inode, &tmp_block);
+		if (err)
+			goto out_writepage;
+	}
 	set_page_writeback(page);
 	ClearPageError(page);
 
@@ -1807,6 +1828,8 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio)
 	set_inode_flag(inode, FI_APPEND_WRITE);
 	if (page->index == 0)
 		set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN);
+	if (need_tmp_grab)
+		dec_valid_block_count(sbi, dn.inode, tmp_block);
 out_writepage:
 	f2fs_put_dnode(&dn);
 out:
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 0374f069520cf..f594957f9d0cd 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -97,6 +97,7 @@ extern char *f2fs_fault_name[FAULT_MAX];
 #define F2FS_MOUNT_QUOTA		0x00400000
 #define F2FS_MOUNT_INLINE_XATTR_SIZE	0x00800000
 #define F2FS_MOUNT_RESERVE_ROOT		0x01000000
+#define F2FS_MOUNT_DISABLE_CHECKPOINT	0x02000000
 
 #define F2FS_OPTION(sbi)	((sbi)->mount_opt)
 #define clear_opt(sbi, option)	(F2FS_OPTION(sbi).opt &= ~F2FS_MOUNT_##option)
@@ -175,6 +176,7 @@ enum {
 #define	CP_RECOVERY	0x00000008
 #define	CP_DISCARD	0x00000010
 #define CP_TRIMMED	0x00000020
+#define CP_PAUSE	0x00000040
 
 #define MAX_DISCARD_BLOCKS(sbi)		BLKS_PER_SEC(sbi)
 #define DEF_MAX_DISCARD_REQUEST		8	/* issue 8 discards per round */
@@ -1074,6 +1076,7 @@ enum {
 	SBI_NEED_CP,				/* need to checkpoint */
 	SBI_DISABLE_ATOMIC_WRITE,		/* turn off atomic write */
 	SBI_IS_SHUTDOWN,			/* shutdown by ioctl */
+	SBI_CP_DISABLED,			/* CP was disabled last mount */
 };
 
 enum {
@@ -1199,6 +1202,12 @@ struct f2fs_sb_info {
 	block_t reserved_blocks;		/* configurable reserved blocks */
 	block_t current_reserved_blocks;	/* current reserved blocks */
 
+	/* Additional tracking for no checkpoint mode */
+	block_t unusable_block_count;		/* # of blocks saved by last cp */
+	block_t free_ssr_data_block;
+	block_t free_ssr_node_block;
+	block_t free_segments;
+
 	unsigned int nquota_files;		/* # of quota sysfile */
 
 	u32 s_next_generation;			/* for NFS support */
@@ -1650,7 +1659,7 @@ static inline void f2fs_i_blocks_write(struct inode *, block_t, bool, bool);
 static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
 				 struct inode *inode, blkcnt_t *count)
 {
-	blkcnt_t diff = 0, release = 0;
+	blkcnt_t diff = 0, release = 0, seg_diff = 0, seg_rel = 0;
 	block_t avail_user_block_count;
 	int ret;
 
@@ -1678,6 +1687,8 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
 
 	if (!__allow_reserved_blocks(sbi, inode, true))
 		avail_user_block_count -= F2FS_OPTION(sbi).root_reserved_blocks;
+	if (test_opt(sbi, DISABLE_CHECKPOINT))
+		avail_user_block_count -= sbi->unusable_block_count;
 
 	if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) {
 		diff = sbi->total_valid_block_count - avail_user_block_count;
@@ -1691,6 +1702,35 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
 			goto enospc;
 		}
 	}
+	if (likely(!test_opt(sbi, DISABLE_CHECKPOINT)))
+		goto normal;
+	if (unlikely(*count > sbi->free_ssr_data_block)) {
+		/* We'll need to pull from free. */
+		blkcnt_t needed = *count - sbi->free_ssr_data_block;
+		blkcnt_t new_segs = ((needed - 1) >>
+					sbi->log_blocks_per_seg) + 1;
+
+		/* Check if we have enough free */
+		if (unlikely(new_segs > sbi->free_segments)) {
+			seg_diff = new_segs - sbi->free_segments;
+
+			seg_rel = ((needed - 1) % sbi->log_blocks_per_seg) + 1;
+			seg_rel += (seg_diff - 1) << sbi->log_blocks_per_seg;
+			new_segs -= seg_diff;
+			*count -= seg_rel;
+			release += seg_rel;
+			if (!*count) {
+				spin_unlock(&sbi->stat_lock);
+				goto enospc;
+			}
+		}
+
+		sbi->free_segments -= new_segs;
+		sbi->free_ssr_data_block += new_segs << sbi->log_blocks_per_seg;
+
+	}
+	sbi->free_ssr_data_block -= *count;
+normal:
 	spin_unlock(&sbi->stat_lock);
 
 	if (unlikely(release)) {
@@ -1887,6 +1927,8 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
 
 	if (!__allow_reserved_blocks(sbi, inode, false))
 		valid_block_count += F2FS_OPTION(sbi).root_reserved_blocks;
+	if (test_opt(sbi, DISABLE_CHECKPOINT))
+		valid_block_count += sbi->unusable_block_count;
 
 	if (unlikely(valid_block_count > sbi->user_block_count)) {
 		spin_unlock(&sbi->stat_lock);
@@ -1899,6 +1941,17 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
 		goto enospc;
 	}
 
+	if (test_opt(sbi, DISABLE_CHECKPOINT)) {
+		if (unlikely(!sbi->free_ssr_node_block)) {
+			if (unlikely(!sbi->free_segments)) {
+				spin_unlock(&sbi->stat_lock);
+				goto enospc;
+			}
+			sbi->free_segments--;
+		}
+		sbi->free_ssr_node_block--;
+	}
+
 	sbi->total_valid_node_count++;
 	sbi->total_valid_block_count++;
 	spin_unlock(&sbi->stat_lock);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 3a5c35fa06039..20124f5d287b8 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -150,6 +150,9 @@ static inline enum cp_reason_type need_do_checkpoint(struct inode *inode)
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 	enum cp_reason_type cp_reason = CP_NO_NEEDED;
 
+	if (test_opt(sbi, DISABLE_CHECKPOINT))
+		return CP_NO_NEEDED;
+
 	if (!S_ISREG(inode->i_mode))
 		cp_reason = CP_NON_REGULAR;
 	else if (inode->i_nlink != 1)
@@ -2070,6 +2073,9 @@ static int f2fs_ioc_gc(struct file *filp, unsigned long arg)
 	if (f2fs_readonly(sbi->sb))
 		return -EROFS;
 
+	if (test_opt(sbi, DISABLE_CHECKPOINT))
+		return -EINVAL;
+
 	ret = mnt_want_write_file(filp);
 	if (ret)
 		return ret;
@@ -2112,6 +2118,9 @@ static int f2fs_ioc_gc_range(struct file *filp, unsigned long arg)
 		return -EINVAL;
 	}
 
+	if (test_opt(sbi, DISABLE_CHECKPOINT))
+		return -EINVAL;
+
 	ret = mnt_want_write_file(filp);
 	if (ret)
 		return ret;
@@ -2147,6 +2156,12 @@ static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg)
 	if (f2fs_readonly(sbi->sb))
 		return -EROFS;
 
+	if (test_opt(sbi, DISABLE_CHECKPOINT)) {
+		f2fs_msg(sbi->sb, KERN_INFO,
+			"Skipping Checkpoint. Checkpoints currently disabled.");
+		return -EINVAL;
+	}
+
 	ret = mnt_want_write_file(filp);
 	if (ret)
 		return ret;
@@ -2513,6 +2528,9 @@ static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg)
 	if (f2fs_readonly(sbi->sb))
 		return -EROFS;
 
+	if (test_opt(sbi, DISABLE_CHECKPOINT))
+		return -EINVAL;
+
 	if (copy_from_user(&range, (struct f2fs_flush_device __user *)arg,
 							sizeof(range)))
 		return -EFAULT;
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index fe0da3e05ef14..1326f5b6aa99d 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -60,6 +60,9 @@ static int gc_thread_func(void *data)
 		}
 #endif
 
+		if (test_opt(sbi, DISABLE_CHECKPOINT))
+			goto do_balance;
+
 		if (!sb_start_write_trylock(sbi->sb))
 			continue;
 
@@ -105,6 +108,7 @@ static int gc_thread_func(void *data)
 		trace_f2fs_background_gc(sbi->sb, wait_ms,
 				prefree_segments(sbi), free_segments(sbi));
 
+do_balance:
 		/* balancing f2fs's metadata periodically */
 		f2fs_balance_fs_bg(sbi);
 next:
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 17354089b4ab2..3d5d8098d4ead 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -179,6 +179,10 @@ bool f2fs_need_SSR(struct f2fs_sb_info *sbi)
 		return false;
 	if (sbi->gc_mode == GC_URGENT)
 		return true;
+	if (test_opt(sbi, DISABLE_CHECKPOINT))
+		return true;
+	if (sbi->gc_mode == GC_URGENT)
+		return true;
 
 	return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
 			SM_I(sbi)->min_ssr_sections + reserved_sections(sbi));
@@ -485,6 +489,8 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
 	 * We should do GC or end up with checkpoint, if there are so many dirty
 	 * dir/node pages without enough free segments.
 	 */
+	if (test_opt(sbi, DISABLE_CHECKPOINT))
+		return;
 	if (has_not_enough_free_secs(sbi, 0, 0)) {
 		mutex_lock(&sbi->gc_mutex);
 		f2fs_gc(sbi, false, false, NULL_SEGNO);
@@ -525,8 +531,10 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
 			f2fs_sync_dirty_inodes(sbi, FILE_INODE);
 			blk_finish_plug(&plug);
 		}
-		f2fs_sync_fs(sbi->sb, true);
-		stat_inc_bg_cp_count(sbi->stat_info);
+		if (!test_opt(sbi, DISABLE_CHECKPOINT)) {
+			f2fs_sync_fs(sbi->sb, true);
+			stat_inc_bg_cp_count(sbi->stat_info);
+		}
 	}
 }
 
@@ -741,8 +749,8 @@ int f2fs_flush_device_cache(struct f2fs_sb_info *sbi)
 	return ret;
 }
 
-static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
-		enum dirty_type dirty_type)
+void __locate_dirty_segment(struct f2fs_sb_info *sbi,
+		unsigned int segno, enum dirty_type dirty_type)
 {
 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
 
@@ -766,8 +774,8 @@ static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
 	}
 }
 
-static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
-		enum dirty_type dirty_type)
+void __remove_dirty_segment(struct f2fs_sb_info *sbi,
+		unsigned int segno, enum dirty_type dirty_type)
 {
 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
 
@@ -787,6 +795,7 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
 	}
 }
 
+
 /*
  * Should not occur error such as -ENOMEM.
  * Adding dirty entry into seglist is not critical operation.
@@ -795,7 +804,7 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
 static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
 {
 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
-	unsigned short valid_blocks;
+	unsigned short valid_blocks, ckpt_valid_blocks;
 
 	if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno))
 		return;
@@ -803,8 +812,10 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
 	mutex_lock(&dirty_i->seglist_lock);
 
 	valid_blocks = get_valid_blocks(sbi, segno, false);
+	ckpt_valid_blocks = get_ckpt_valid_blocks(sbi, segno);
 
-	if (valid_blocks == 0) {
+	if (valid_blocks == 0 && (ckpt_valid_blocks == sbi->blocks_per_seg ||
+					!test_opt(sbi, DISABLE_CHECKPOINT))) {
 		__locate_dirty_segment(sbi, segno, PRE);
 		__remove_dirty_segment(sbi, segno, DIRTY);
 	} else if (valid_blocks < sbi->blocks_per_seg) {
@@ -1926,7 +1937,8 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
 			sbi->discard_blks--;
 
 		/* don't overwrite by SSR to keep node chain */
-		if (IS_NODESEG(se->type)) {
+		if (IS_NODESEG(se->type) &&
+				!test_opt(sbi, DISABLE_CHECKPOINT)) {
 			if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map))
 				se->ckpt_valid_blocks++;
 		}
@@ -1948,6 +1960,25 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
 			f2fs_bug_on(sbi, 1);
 			se->valid_blocks++;
 			del = 0;
+		} else {
+			/* If checkpoints are off, we must not reuse data that
+			 * was used in the previous checkpoint. If it was used
+			 * before, we must track that to know how much space we
+			 * really have
+			 */
+			if (f2fs_test_bit(offset, se->ckpt_valid_map)) {
+				spin_lock(&sbi->stat_lock);
+				sbi->unusable_block_count++;
+				spin_unlock(&sbi->stat_lock);
+			} else {
+				spin_lock(&sbi->stat_lock);
+				if (IS_DATASEG(se->type))
+					sbi->free_ssr_data_block++;
+				else
+					sbi->free_ssr_node_block++;
+				spin_unlock(&sbi->stat_lock);
+			}
+
 		}
 
 		if (f2fs_discard_en(sbi) &&
@@ -2237,7 +2268,8 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
 		return SIT_I(sbi)->last_victim[ALLOC_NEXT];
 
 	/* find segments from 0 to reuse freed segments */
-	if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE)
+	if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE
+			|| test_opt(sbi, DISABLE_CHECKPOINT))
 		return 0;
 
 	return CURSEG_I(sbi, type)->segno;
@@ -2389,7 +2421,8 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
 	else if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) &&
 					type == CURSEG_WARM_NODE)
 		new_curseg(sbi, type, false);
-	else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type))
+	else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type) &&
+			!test_opt(sbi, DISABLE_CHECKPOINT))
 		new_curseg(sbi, type, false);
 	else if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type))
 		change_curseg(sbi, type);
@@ -3568,6 +3601,9 @@ void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 			sit_i->dirty_sentries--;
 			ses->entry_cnt--;
 		}
+		spin_lock(&sbi->stat_lock);
+		sbi->unusable_block_count = 0;
+		spin_unlock(&sbi->stat_lock);
 
 		if (to_journal)
 			up_write(&curseg->journal_rwsem);
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 50495515f0a0d..37222e133d52a 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -342,6 +342,12 @@ static inline unsigned int get_valid_blocks(struct f2fs_sb_info *sbi,
 		return get_seg_entry(sbi, segno)->valid_blocks;
 }
 
+static inline unsigned int get_ckpt_valid_blocks(struct f2fs_sb_info *sbi,
+				unsigned int segno)
+{
+	return get_seg_entry(sbi, segno)->ckpt_valid_blocks;
+}
+
 static inline void seg_info_from_raw_sit(struct seg_entry *se,
 					struct f2fs_sit_entry *rs)
 {
@@ -524,6 +530,26 @@ static inline unsigned int dirty_segments(struct f2fs_sb_info *sbi)
 		DIRTY_I(sbi)->nr_dirty[DIRTY_COLD_NODE];
 }
 
+void __locate_dirty_segment(struct f2fs_sb_info *sbi,
+		unsigned int segno, enum dirty_type dirty_type);
+
+void __remove_dirty_segment(struct f2fs_sb_info *sbi,
+		unsigned int segno, enum dirty_type dirty_type);
+
+/* This moves currently empty dirty blocks to prefree. Must hold seglist_lock */
+static inline void dirty_to_prefree(struct f2fs_sb_info *sbi)
+{
+	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+	unsigned int segno;
+
+	for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
+		if (!get_valid_blocks(sbi, segno, false)) {
+			__locate_dirty_segment(sbi, segno, PRE);
+			__remove_dirty_segment(sbi, segno, DIRTY);
+		}
+	}
+}
+
 static inline int overprovision_segments(struct f2fs_sb_info *sbi)
 {
 	return SM_I(sbi)->ovp_segments;
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 7187885da2515..7568ae00319cb 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -132,6 +132,7 @@ enum {
 	Opt_alloc,
 	Opt_fsync,
 	Opt_test_dummy_encryption,
+	Opt_checkpoint,
 	Opt_err,
 };
 
@@ -189,6 +190,7 @@ static match_table_t f2fs_tokens = {
 	{Opt_alloc, "alloc_mode=%s"},
 	{Opt_fsync, "fsync_mode=%s"},
 	{Opt_test_dummy_encryption, "test_dummy_encryption"},
+	{Opt_checkpoint, "checkpoint=%s"},
 	{Opt_err, NULL},
 };
 
@@ -764,6 +766,23 @@ static int parse_options(struct super_block *sb, char *options)
 					"Test dummy encryption mount option ignored");
 #endif
 			break;
+		case Opt_checkpoint:
+			name = match_strdup(&args[0]);
+			if (!name)
+				return -ENOMEM;
+
+			if (strlen(name) == 6 &&
+					!strncmp(name, "enable", 6)) {
+				clear_opt(sbi, DISABLE_CHECKPOINT);
+			} else if (strlen(name) == 7 &&
+					!strncmp(name, "disable", 7)) {
+				set_opt(sbi, DISABLE_CHECKPOINT);
+			} else {
+				kfree(name);
+				return -EINVAL;
+			}
+			kfree(name);
+			break;
 		default:
 			f2fs_msg(sb, KERN_ERR,
 				"Unrecognized mount option \"%s\" or missing value",
@@ -809,6 +828,12 @@ static int parse_options(struct super_block *sb, char *options)
 		}
 	}
 
+	if (test_opt(sbi, DISABLE_CHECKPOINT) && test_opt(sbi, LFS)) {
+		f2fs_msg(sb, KERN_ERR,
+				"LFS not compatible with checkpoint=disable\n");
+		return -EINVAL;
+	}
+
 	/* Not pass down write hints if the number of active logs is lesser
 	 * than NR_CURSEG_TYPE.
 	 */
@@ -996,8 +1021,9 @@ static void f2fs_put_super(struct super_block *sb)
 	 * But, the previous checkpoint was not done by umount, it needs to do
 	 * clean checkpoint again.
 	 */
-	if (is_sbi_flag_set(sbi, SBI_IS_DIRTY) ||
-			!is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) {
+	if ((is_sbi_flag_set(sbi, SBI_IS_DIRTY) ||
+			!is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) &&
+			!test_opt(sbi, DISABLE_CHECKPOINT)) {
 		struct cp_control cpc = {
 			.reason = CP_UMOUNT,
 		};
@@ -1007,7 +1033,8 @@ static void f2fs_put_super(struct super_block *sb)
 	/* be sure to wait for any on-going discard commands */
 	dropped = f2fs_wait_discard_bios(sbi);
 
-	if (f2fs_discard_en(sbi) && !sbi->discard_blks && !dropped) {
+	if (f2fs_discard_en(sbi) && !sbi->discard_blks && !dropped &&
+			!test_opt(sbi, DISABLE_CHECKPOINT)) {
 		struct cp_control cpc = {
 			.reason = CP_UMOUNT | CP_TRIMMED,
 		};
@@ -1064,6 +1091,8 @@ int f2fs_sync_fs(struct super_block *sb, int sync)
 
 	if (unlikely(f2fs_cp_error(sbi)))
 		return 0;
+	if (test_opt(sbi, DISABLE_CHECKPOINT))
+		return 0;
 
 	trace_f2fs_sync_fs(sb, sync);
 
@@ -1162,7 +1191,8 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf)
 
 	buf->f_blocks = total_count - start_count;
 	buf->f_bfree = user_block_count - valid_user_blocks(sbi) -
-						sbi->current_reserved_blocks;
+						sbi->current_reserved_blocks -
+						sbi->unusable_block_count;
 	if (buf->f_bfree > F2FS_OPTION(sbi).root_reserved_blocks)
 		buf->f_bavail = buf->f_bfree -
 				F2FS_OPTION(sbi).root_reserved_blocks;
@@ -1338,6 +1368,9 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
 	else if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE)
 		seq_printf(seq, ",alloc_mode=%s", "reuse");
 
+	if (test_opt(sbi, DISABLE_CHECKPOINT))
+		seq_puts(seq, ",checkpoint=disable");
+
 	if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_POSIX)
 		seq_printf(seq, ",fsync_mode=%s", "posix");
 	else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT)
@@ -1365,6 +1398,7 @@ static void default_options(struct f2fs_sb_info *sbi)
 	set_opt(sbi, INLINE_DENTRY);
 	set_opt(sbi, EXTENT_CACHE);
 	set_opt(sbi, NOHEAP);
+	clear_opt(sbi, DISABLE_CHECKPOINT);
 	sbi->sb->s_flags |= SB_LAZYTIME;
 	set_opt(sbi, FLUSH_MERGE);
 	if (blk_queue_discard(bdev_get_queue(sbi->sb->s_bdev)))
@@ -1387,6 +1421,60 @@ static void default_options(struct f2fs_sb_info *sbi)
 #ifdef CONFIG_QUOTA
 static int f2fs_enable_quotas(struct super_block *sb);
 #endif
+
+static void f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
+{
+	struct cp_control cpc;
+	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+	unsigned int segno;
+	int type;
+
+	set_sbi_flag(sbi, SBI_CP_DISABLED);
+
+	cpc.reason = CP_PAUSE;
+
+	mutex_lock(&sbi->gc_mutex);
+	f2fs_write_checkpoint(sbi, &cpc);
+
+	mutex_lock(&dirty_i->seglist_lock);
+	for (type = 0; type < NR_CURSEG_TYPE; type++) {
+		for_each_set_bit(segno, dirty_i->dirty_segmap[type],
+							MAIN_SEGS(sbi)) {
+			if (IS_DATASEG(type))
+				sbi->free_ssr_data_block +=
+					get_valid_blocks(sbi, segno, false);
+			else
+				sbi->free_ssr_node_block +=
+					get_valid_blocks(sbi, segno, false);
+		}
+	}
+	sbi->free_segments = FREE_I(sbi)->free_segments;
+	mutex_unlock(&dirty_i->seglist_lock);
+	mutex_unlock(&sbi->gc_mutex);
+}
+
+static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi)
+{
+	struct super_block *sb = sbi->sb;
+	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+
+	clear_sbi_flag(sbi, SBI_CP_DISABLED);
+	writeback_inodes_sb(sb, WB_REASON_SYNC);
+	sync_inodes_sb(sb);
+
+	mutex_lock(&dirty_i->seglist_lock);
+	dirty_to_prefree(sbi);
+	sbi->free_segments = 0;
+	sbi->free_ssr_data_block = 0;
+	sbi->free_ssr_node_block = 0;
+	mutex_unlock(&dirty_i->seglist_lock);
+
+	set_sbi_flag(sbi, SBI_IS_DIRTY);
+	set_sbi_flag(sbi, SBI_IS_CLOSE);
+	f2fs_sync_fs(sb, 1);
+	clear_sbi_flag(sbi, SBI_IS_CLOSE);
+}
+
 static int f2fs_remount(struct super_block *sb, int *flags, char *data)
 {
 	struct f2fs_sb_info *sbi = F2FS_SB(sb);
@@ -1396,6 +1484,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
 	bool need_restart_gc = false;
 	bool need_stop_gc = false;
 	bool no_extent_cache = !test_opt(sbi, EXTENT_CACHE);
+	bool disable_checkpoint = test_opt(sbi, DISABLE_CHECKPOINT);
+	bool checkpoint_changed;
 #ifdef CONFIG_QUOTA
 	int i, j;
 #endif
@@ -1440,6 +1530,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
 	err = parse_options(sb, data);
 	if (err)
 		goto restore_opts;
+	checkpoint_changed =
+			disable_checkpoint != test_opt(sbi, DISABLE_CHECKPOINT);
 
 	/*
 	 * Previous and new state of filesystem is RO,
@@ -1501,6 +1593,13 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
 		clear_sbi_flag(sbi, SBI_IS_CLOSE);
 	}
 
+	if (checkpoint_changed) {
+		if (test_opt(sbi, DISABLE_CHECKPOINT))
+			f2fs_disable_checkpoint(sbi);
+		else
+			f2fs_enable_checkpoint(sbi);
+	}
+
 	/*
 	 * We stop issue flush thread if FS is mounted as RO
 	 * or if flush_merge is not passed in mount option.
@@ -2987,7 +3086,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 		goto free_meta;
 
 	/* recover fsynced data */
-	if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) {
+	if (!test_opt(sbi, DISABLE_ROLL_FORWARD) &&
+			!is_sbi_flag_set(sbi, SBI_CP_DISABLED)) {
 		/*
 		 * mount should be failed, when device has readonly mode, and
 		 * previous checkpoint was not done by clean system shutdown.
@@ -3053,6 +3153,12 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 				cur_cp_version(F2FS_CKPT(sbi)));
 	f2fs_update_time(sbi, CP_TIME);
 	f2fs_update_time(sbi, REQ_TIME);
+
+	if (test_opt(sbi, DISABLE_CHECKPOINT))
+		f2fs_disable_checkpoint(sbi);
+	else if (is_sbi_flag_set(sbi, SBI_CP_DISABLED))
+		f2fs_enable_checkpoint(sbi);
+
 	return 0;
 
 free_meta:
-- 
2.18.0.233.g985f88cf7e-goog


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v2] f2fs: checkpoint disabling
@ 2018-07-26  0:19 ` Daniel Rosenberg
  0 siblings, 0 replies; 11+ messages in thread
From: Daniel Rosenberg @ 2018-07-26  0:19 UTC (permalink / raw)
  To: Jaegeuk Kim, Chao Yu, Jonathan Corbet, linux-f2fs-devel
  Cc: linux-kernel, linux-doc, linux-fsdevel, kernel-team, Daniel Rosenberg

This adds a lightweight non-persistent snapshotting scheme to f2fs.

To use, mount with the option checkpoint=disable, and to return to
normal operation, remount with checkpoint=enable. If the filesystem
is shut down before remounting with checkpoint=enable, it will revert
back to its apparent state when it was first mounted with
checkpoint=disable. This is useful for situations where you wish to be
able to roll back the state of the disk in case of some critical
failure.

Signed-off-by: Daniel Rosenberg <drosen@google.com>
---

Included changes suggested by Jaegeuk and Chao.
it now holds the gc lock around setting up free space tracking in
f2fs_disable_checkpoint. I wasn't quite sure what was mean by
the comment on should_update_outplace, but I've changed it to return
true for NEW_ADDR as well. It may be better to instead skip over the
reset of that block if old_blkaddr is NEW_ADDR, because I think that
means the data has not yet been committed, and thus wouldn't be
overwriting data from the previous checkpoint.

We're currently using this during updates to extend the period of time
that we can safely roll back a faulty update. After performing an update
and reboot, we'd mount in this mode. If there ends up being some sort of
fatal error, data changes are automatically reverted, allowing us to
revert to the state before the update without worrying about any incompatible
changes that may've been made to data while running under the faulty system.
It increases our ability to revert a faulty update from up to mounting user
data partitions to a bit farther along.

 Documentation/filesystems/f2fs.txt |   5 ++
 fs/f2fs/data.c                     |  23 ++++++
 fs/f2fs/f2fs.h                     |  55 +++++++++++++-
 fs/f2fs/file.c                     |  18 +++++
 fs/f2fs/gc.c                       |   4 +
 fs/f2fs/segment.c                  |  58 ++++++++++++---
 fs/f2fs/segment.h                  |  26 +++++++
 fs/f2fs/super.c                    | 116 +++++++++++++++++++++++++++--
 8 files changed, 288 insertions(+), 17 deletions(-)

diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt
index 69f8de9957397..a026b353a99d4 100644
--- a/Documentation/filesystems/f2fs.txt
+++ b/Documentation/filesystems/f2fs.txt
@@ -193,6 +193,11 @@ fsync_mode=%s          Control the policy of fsync. Currently supports "posix",
                        non-atomic files likewise "nobarrier" mount option.
 test_dummy_encryption  Enable dummy encryption, which provides a fake fscrypt
                        context. The fake fscrypt context is used by xfstests.
+checkpoint=%s          Set to "disable" to turn off checkpointing. Set to "enable"
+                       to reenable checkpointing. Is enabled by default. While
+                       disabled, any unmounting or unexpected shutdowns will cause
+                       the filesystem contents to appear as they did when the
+                       filesystem was mounted with that option.
 
 ================================================================================
 DEBUGFS ENTRIES
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 6b8ca5011bfd6..8f32df9b7b8f3 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1690,6 +1690,19 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
 
 	if (test_opt(sbi, LFS))
 		return true;
+	if (test_opt(sbi, DISABLE_CHECKPOINT)) {
+		struct seg_entry *se;
+		unsigned int segno, offset;
+
+		if (!fio || fio->old_blkaddr == NULL_ADDR ||
+				fio->old_blkaddr == NEW_ADDR)
+			return true;
+		segno = GET_SEGNO(sbi, fio->old_blkaddr);
+		se = get_seg_entry(sbi, segno);
+		offset = GET_BLKOFF_FROM_SEG0(sbi, fio->old_blkaddr);
+		if (f2fs_test_bit(offset, se->ckpt_valid_map))
+			return true;
+	}
 	if (S_ISDIR(inode->i_mode))
 		return true;
 	if (f2fs_is_atomic_file(inode))
@@ -1717,10 +1730,13 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio)
 {
 	struct page *page = fio->page;
 	struct inode *inode = page->mapping->host;
+	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 	struct dnode_of_data dn;
 	struct extent_info ei = {0,0,0};
 	struct node_info ni;
 	bool ipu_force = false;
+	bool need_tmp_grab = test_opt(sbi, DISABLE_CHECKPOINT);
+	blkcnt_t tmp_block = 1;
 	int err = 0;
 
 	set_new_dnode(&dn, inode, NULL, NULL, 0);
@@ -1798,6 +1814,11 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio)
 	if (err)
 		goto out_writepage;
 
+	if (need_tmp_grab) {
+		err = inc_valid_block_count(sbi, dn.inode, &tmp_block);
+		if (err)
+			goto out_writepage;
+	}
 	set_page_writeback(page);
 	ClearPageError(page);
 
@@ -1807,6 +1828,8 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio)
 	set_inode_flag(inode, FI_APPEND_WRITE);
 	if (page->index == 0)
 		set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN);
+	if (need_tmp_grab)
+		dec_valid_block_count(sbi, dn.inode, tmp_block);
 out_writepage:
 	f2fs_put_dnode(&dn);
 out:
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 0374f069520cf..f594957f9d0cd 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -97,6 +97,7 @@ extern char *f2fs_fault_name[FAULT_MAX];
 #define F2FS_MOUNT_QUOTA		0x00400000
 #define F2FS_MOUNT_INLINE_XATTR_SIZE	0x00800000
 #define F2FS_MOUNT_RESERVE_ROOT		0x01000000
+#define F2FS_MOUNT_DISABLE_CHECKPOINT	0x02000000
 
 #define F2FS_OPTION(sbi)	((sbi)->mount_opt)
 #define clear_opt(sbi, option)	(F2FS_OPTION(sbi).opt &= ~F2FS_MOUNT_##option)
@@ -175,6 +176,7 @@ enum {
 #define	CP_RECOVERY	0x00000008
 #define	CP_DISCARD	0x00000010
 #define CP_TRIMMED	0x00000020
+#define CP_PAUSE	0x00000040
 
 #define MAX_DISCARD_BLOCKS(sbi)		BLKS_PER_SEC(sbi)
 #define DEF_MAX_DISCARD_REQUEST		8	/* issue 8 discards per round */
@@ -1074,6 +1076,7 @@ enum {
 	SBI_NEED_CP,				/* need to checkpoint */
 	SBI_DISABLE_ATOMIC_WRITE,		/* turn off atomic write */
 	SBI_IS_SHUTDOWN,			/* shutdown by ioctl */
+	SBI_CP_DISABLED,			/* CP was disabled last mount */
 };
 
 enum {
@@ -1199,6 +1202,12 @@ struct f2fs_sb_info {
 	block_t reserved_blocks;		/* configurable reserved blocks */
 	block_t current_reserved_blocks;	/* current reserved blocks */
 
+	/* Additional tracking for no checkpoint mode */
+	block_t unusable_block_count;		/* # of blocks saved by last cp */
+	block_t free_ssr_data_block;
+	block_t free_ssr_node_block;
+	block_t free_segments;
+
 	unsigned int nquota_files;		/* # of quota sysfile */
 
 	u32 s_next_generation;			/* for NFS support */
@@ -1650,7 +1659,7 @@ static inline void f2fs_i_blocks_write(struct inode *, block_t, bool, bool);
 static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
 				 struct inode *inode, blkcnt_t *count)
 {
-	blkcnt_t diff = 0, release = 0;
+	blkcnt_t diff = 0, release = 0, seg_diff = 0, seg_rel = 0;
 	block_t avail_user_block_count;
 	int ret;
 
@@ -1678,6 +1687,8 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
 
 	if (!__allow_reserved_blocks(sbi, inode, true))
 		avail_user_block_count -= F2FS_OPTION(sbi).root_reserved_blocks;
+	if (test_opt(sbi, DISABLE_CHECKPOINT))
+		avail_user_block_count -= sbi->unusable_block_count;
 
 	if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) {
 		diff = sbi->total_valid_block_count - avail_user_block_count;
@@ -1691,6 +1702,35 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
 			goto enospc;
 		}
 	}
+	if (likely(!test_opt(sbi, DISABLE_CHECKPOINT)))
+		goto normal;
+	if (unlikely(*count > sbi->free_ssr_data_block)) {
+		/* We'll need to pull from free. */
+		blkcnt_t needed = *count - sbi->free_ssr_data_block;
+		blkcnt_t new_segs = ((needed - 1) >>
+					sbi->log_blocks_per_seg) + 1;
+
+		/* Check if we have enough free */
+		if (unlikely(new_segs > sbi->free_segments)) {
+			seg_diff = new_segs - sbi->free_segments;
+
+			seg_rel = ((needed - 1) % sbi->log_blocks_per_seg) + 1;
+			seg_rel += (seg_diff - 1) << sbi->log_blocks_per_seg;
+			new_segs -= seg_diff;
+			*count -= seg_rel;
+			release += seg_rel;
+			if (!*count) {
+				spin_unlock(&sbi->stat_lock);
+				goto enospc;
+			}
+		}
+
+		sbi->free_segments -= new_segs;
+		sbi->free_ssr_data_block += new_segs << sbi->log_blocks_per_seg;
+
+	}
+	sbi->free_ssr_data_block -= *count;
+normal:
 	spin_unlock(&sbi->stat_lock);
 
 	if (unlikely(release)) {
@@ -1887,6 +1927,8 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
 
 	if (!__allow_reserved_blocks(sbi, inode, false))
 		valid_block_count += F2FS_OPTION(sbi).root_reserved_blocks;
+	if (test_opt(sbi, DISABLE_CHECKPOINT))
+		valid_block_count += sbi->unusable_block_count;
 
 	if (unlikely(valid_block_count > sbi->user_block_count)) {
 		spin_unlock(&sbi->stat_lock);
@@ -1899,6 +1941,17 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
 		goto enospc;
 	}
 
+	if (test_opt(sbi, DISABLE_CHECKPOINT)) {
+		if (unlikely(!sbi->free_ssr_node_block)) {
+			if (unlikely(!sbi->free_segments)) {
+				spin_unlock(&sbi->stat_lock);
+				goto enospc;
+			}
+			sbi->free_segments--;
+		}
+		sbi->free_ssr_node_block--;
+	}
+
 	sbi->total_valid_node_count++;
 	sbi->total_valid_block_count++;
 	spin_unlock(&sbi->stat_lock);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 3a5c35fa06039..20124f5d287b8 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -150,6 +150,9 @@ static inline enum cp_reason_type need_do_checkpoint(struct inode *inode)
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 	enum cp_reason_type cp_reason = CP_NO_NEEDED;
 
+	if (test_opt(sbi, DISABLE_CHECKPOINT))
+		return CP_NO_NEEDED;
+
 	if (!S_ISREG(inode->i_mode))
 		cp_reason = CP_NON_REGULAR;
 	else if (inode->i_nlink != 1)
@@ -2070,6 +2073,9 @@ static int f2fs_ioc_gc(struct file *filp, unsigned long arg)
 	if (f2fs_readonly(sbi->sb))
 		return -EROFS;
 
+	if (test_opt(sbi, DISABLE_CHECKPOINT))
+		return -EINVAL;
+
 	ret = mnt_want_write_file(filp);
 	if (ret)
 		return ret;
@@ -2112,6 +2118,9 @@ static int f2fs_ioc_gc_range(struct file *filp, unsigned long arg)
 		return -EINVAL;
 	}
 
+	if (test_opt(sbi, DISABLE_CHECKPOINT))
+		return -EINVAL;
+
 	ret = mnt_want_write_file(filp);
 	if (ret)
 		return ret;
@@ -2147,6 +2156,12 @@ static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg)
 	if (f2fs_readonly(sbi->sb))
 		return -EROFS;
 
+	if (test_opt(sbi, DISABLE_CHECKPOINT)) {
+		f2fs_msg(sbi->sb, KERN_INFO,
+			"Skipping Checkpoint. Checkpoints currently disabled.");
+		return -EINVAL;
+	}
+
 	ret = mnt_want_write_file(filp);
 	if (ret)
 		return ret;
@@ -2513,6 +2528,9 @@ static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg)
 	if (f2fs_readonly(sbi->sb))
 		return -EROFS;
 
+	if (test_opt(sbi, DISABLE_CHECKPOINT))
+		return -EINVAL;
+
 	if (copy_from_user(&range, (struct f2fs_flush_device __user *)arg,
 							sizeof(range)))
 		return -EFAULT;
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index fe0da3e05ef14..1326f5b6aa99d 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -60,6 +60,9 @@ static int gc_thread_func(void *data)
 		}
 #endif
 
+		if (test_opt(sbi, DISABLE_CHECKPOINT))
+			goto do_balance;
+
 		if (!sb_start_write_trylock(sbi->sb))
 			continue;
 
@@ -105,6 +108,7 @@ static int gc_thread_func(void *data)
 		trace_f2fs_background_gc(sbi->sb, wait_ms,
 				prefree_segments(sbi), free_segments(sbi));
 
+do_balance:
 		/* balancing f2fs's metadata periodically */
 		f2fs_balance_fs_bg(sbi);
 next:
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 17354089b4ab2..3d5d8098d4ead 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -179,6 +179,10 @@ bool f2fs_need_SSR(struct f2fs_sb_info *sbi)
 		return false;
 	if (sbi->gc_mode == GC_URGENT)
 		return true;
+	if (test_opt(sbi, DISABLE_CHECKPOINT))
+		return true;
+	if (sbi->gc_mode == GC_URGENT)
+		return true;
 
 	return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
 			SM_I(sbi)->min_ssr_sections + reserved_sections(sbi));
@@ -485,6 +489,8 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
 	 * We should do GC or end up with checkpoint, if there are so many dirty
 	 * dir/node pages without enough free segments.
 	 */
+	if (test_opt(sbi, DISABLE_CHECKPOINT))
+		return;
 	if (has_not_enough_free_secs(sbi, 0, 0)) {
 		mutex_lock(&sbi->gc_mutex);
 		f2fs_gc(sbi, false, false, NULL_SEGNO);
@@ -525,8 +531,10 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
 			f2fs_sync_dirty_inodes(sbi, FILE_INODE);
 			blk_finish_plug(&plug);
 		}
-		f2fs_sync_fs(sbi->sb, true);
-		stat_inc_bg_cp_count(sbi->stat_info);
+		if (!test_opt(sbi, DISABLE_CHECKPOINT)) {
+			f2fs_sync_fs(sbi->sb, true);
+			stat_inc_bg_cp_count(sbi->stat_info);
+		}
 	}
 }
 
@@ -741,8 +749,8 @@ int f2fs_flush_device_cache(struct f2fs_sb_info *sbi)
 	return ret;
 }
 
-static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
-		enum dirty_type dirty_type)
+void __locate_dirty_segment(struct f2fs_sb_info *sbi,
+		unsigned int segno, enum dirty_type dirty_type)
 {
 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
 
@@ -766,8 +774,8 @@ static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
 	}
 }
 
-static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
-		enum dirty_type dirty_type)
+void __remove_dirty_segment(struct f2fs_sb_info *sbi,
+		unsigned int segno, enum dirty_type dirty_type)
 {
 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
 
@@ -787,6 +795,7 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
 	}
 }
 
+
 /*
  * Should not occur error such as -ENOMEM.
  * Adding dirty entry into seglist is not critical operation.
@@ -795,7 +804,7 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
 static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
 {
 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
-	unsigned short valid_blocks;
+	unsigned short valid_blocks, ckpt_valid_blocks;
 
 	if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno))
 		return;
@@ -803,8 +812,10 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
 	mutex_lock(&dirty_i->seglist_lock);
 
 	valid_blocks = get_valid_blocks(sbi, segno, false);
+	ckpt_valid_blocks = get_ckpt_valid_blocks(sbi, segno);
 
-	if (valid_blocks == 0) {
+	if (valid_blocks == 0 && (ckpt_valid_blocks == sbi->blocks_per_seg ||
+					!test_opt(sbi, DISABLE_CHECKPOINT))) {
 		__locate_dirty_segment(sbi, segno, PRE);
 		__remove_dirty_segment(sbi, segno, DIRTY);
 	} else if (valid_blocks < sbi->blocks_per_seg) {
@@ -1926,7 +1937,8 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
 			sbi->discard_blks--;
 
 		/* don't overwrite by SSR to keep node chain */
-		if (IS_NODESEG(se->type)) {
+		if (IS_NODESEG(se->type) &&
+				!test_opt(sbi, DISABLE_CHECKPOINT)) {
 			if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map))
 				se->ckpt_valid_blocks++;
 		}
@@ -1948,6 +1960,25 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
 			f2fs_bug_on(sbi, 1);
 			se->valid_blocks++;
 			del = 0;
+		} else {
+			/* If checkpoints are off, we must not reuse data that
+			 * was used in the previous checkpoint. If it was used
+			 * before, we must track that to know how much space we
+			 * really have
+			 */
+			if (f2fs_test_bit(offset, se->ckpt_valid_map)) {
+				spin_lock(&sbi->stat_lock);
+				sbi->unusable_block_count++;
+				spin_unlock(&sbi->stat_lock);
+			} else {
+				spin_lock(&sbi->stat_lock);
+				if (IS_DATASEG(se->type))
+					sbi->free_ssr_data_block++;
+				else
+					sbi->free_ssr_node_block++;
+				spin_unlock(&sbi->stat_lock);
+			}
+
 		}
 
 		if (f2fs_discard_en(sbi) &&
@@ -2237,7 +2268,8 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
 		return SIT_I(sbi)->last_victim[ALLOC_NEXT];
 
 	/* find segments from 0 to reuse freed segments */
-	if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE)
+	if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE
+			|| test_opt(sbi, DISABLE_CHECKPOINT))
 		return 0;
 
 	return CURSEG_I(sbi, type)->segno;
@@ -2389,7 +2421,8 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
 	else if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) &&
 					type == CURSEG_WARM_NODE)
 		new_curseg(sbi, type, false);
-	else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type))
+	else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type) &&
+			!test_opt(sbi, DISABLE_CHECKPOINT))
 		new_curseg(sbi, type, false);
 	else if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type))
 		change_curseg(sbi, type);
@@ -3568,6 +3601,9 @@ void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 			sit_i->dirty_sentries--;
 			ses->entry_cnt--;
 		}
+		spin_lock(&sbi->stat_lock);
+		sbi->unusable_block_count = 0;
+		spin_unlock(&sbi->stat_lock);
 
 		if (to_journal)
 			up_write(&curseg->journal_rwsem);
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 50495515f0a0d..37222e133d52a 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -342,6 +342,12 @@ static inline unsigned int get_valid_blocks(struct f2fs_sb_info *sbi,
 		return get_seg_entry(sbi, segno)->valid_blocks;
 }
 
+static inline unsigned int get_ckpt_valid_blocks(struct f2fs_sb_info *sbi,
+				unsigned int segno)
+{
+	return get_seg_entry(sbi, segno)->ckpt_valid_blocks;
+}
+
 static inline void seg_info_from_raw_sit(struct seg_entry *se,
 					struct f2fs_sit_entry *rs)
 {
@@ -524,6 +530,26 @@ static inline unsigned int dirty_segments(struct f2fs_sb_info *sbi)
 		DIRTY_I(sbi)->nr_dirty[DIRTY_COLD_NODE];
 }
 
+void __locate_dirty_segment(struct f2fs_sb_info *sbi,
+		unsigned int segno, enum dirty_type dirty_type);
+
+void __remove_dirty_segment(struct f2fs_sb_info *sbi,
+		unsigned int segno, enum dirty_type dirty_type);
+
+/* This moves currently empty dirty blocks to prefree. Must hold seglist_lock */
+static inline void dirty_to_prefree(struct f2fs_sb_info *sbi)
+{
+	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+	unsigned int segno;
+
+	for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
+		if (!get_valid_blocks(sbi, segno, false)) {
+			__locate_dirty_segment(sbi, segno, PRE);
+			__remove_dirty_segment(sbi, segno, DIRTY);
+		}
+	}
+}
+
 static inline int overprovision_segments(struct f2fs_sb_info *sbi)
 {
 	return SM_I(sbi)->ovp_segments;
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 7187885da2515..7568ae00319cb 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -132,6 +132,7 @@ enum {
 	Opt_alloc,
 	Opt_fsync,
 	Opt_test_dummy_encryption,
+	Opt_checkpoint,
 	Opt_err,
 };
 
@@ -189,6 +190,7 @@ static match_table_t f2fs_tokens = {
 	{Opt_alloc, "alloc_mode=%s"},
 	{Opt_fsync, "fsync_mode=%s"},
 	{Opt_test_dummy_encryption, "test_dummy_encryption"},
+	{Opt_checkpoint, "checkpoint=%s"},
 	{Opt_err, NULL},
 };
 
@@ -764,6 +766,23 @@ static int parse_options(struct super_block *sb, char *options)
 					"Test dummy encryption mount option ignored");
 #endif
 			break;
+		case Opt_checkpoint:
+			name = match_strdup(&args[0]);
+			if (!name)
+				return -ENOMEM;
+
+			if (strlen(name) == 6 &&
+					!strncmp(name, "enable", 6)) {
+				clear_opt(sbi, DISABLE_CHECKPOINT);
+			} else if (strlen(name) == 7 &&
+					!strncmp(name, "disable", 7)) {
+				set_opt(sbi, DISABLE_CHECKPOINT);
+			} else {
+				kfree(name);
+				return -EINVAL;
+			}
+			kfree(name);
+			break;
 		default:
 			f2fs_msg(sb, KERN_ERR,
 				"Unrecognized mount option \"%s\" or missing value",
@@ -809,6 +828,12 @@ static int parse_options(struct super_block *sb, char *options)
 		}
 	}
 
+	if (test_opt(sbi, DISABLE_CHECKPOINT) && test_opt(sbi, LFS)) {
+		f2fs_msg(sb, KERN_ERR,
+				"LFS not compatible with checkpoint=disable\n");
+		return -EINVAL;
+	}
+
 	/* Not pass down write hints if the number of active logs is lesser
 	 * than NR_CURSEG_TYPE.
 	 */
@@ -996,8 +1021,9 @@ static void f2fs_put_super(struct super_block *sb)
 	 * But, the previous checkpoint was not done by umount, it needs to do
 	 * clean checkpoint again.
 	 */
-	if (is_sbi_flag_set(sbi, SBI_IS_DIRTY) ||
-			!is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) {
+	if ((is_sbi_flag_set(sbi, SBI_IS_DIRTY) ||
+			!is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) &&
+			!test_opt(sbi, DISABLE_CHECKPOINT)) {
 		struct cp_control cpc = {
 			.reason = CP_UMOUNT,
 		};
@@ -1007,7 +1033,8 @@ static void f2fs_put_super(struct super_block *sb)
 	/* be sure to wait for any on-going discard commands */
 	dropped = f2fs_wait_discard_bios(sbi);
 
-	if (f2fs_discard_en(sbi) && !sbi->discard_blks && !dropped) {
+	if (f2fs_discard_en(sbi) && !sbi->discard_blks && !dropped &&
+			!test_opt(sbi, DISABLE_CHECKPOINT)) {
 		struct cp_control cpc = {
 			.reason = CP_UMOUNT | CP_TRIMMED,
 		};
@@ -1064,6 +1091,8 @@ int f2fs_sync_fs(struct super_block *sb, int sync)
 
 	if (unlikely(f2fs_cp_error(sbi)))
 		return 0;
+	if (test_opt(sbi, DISABLE_CHECKPOINT))
+		return 0;
 
 	trace_f2fs_sync_fs(sb, sync);
 
@@ -1162,7 +1191,8 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf)
 
 	buf->f_blocks = total_count - start_count;
 	buf->f_bfree = user_block_count - valid_user_blocks(sbi) -
-						sbi->current_reserved_blocks;
+						sbi->current_reserved_blocks -
+						sbi->unusable_block_count;
 	if (buf->f_bfree > F2FS_OPTION(sbi).root_reserved_blocks)
 		buf->f_bavail = buf->f_bfree -
 				F2FS_OPTION(sbi).root_reserved_blocks;
@@ -1338,6 +1368,9 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
 	else if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE)
 		seq_printf(seq, ",alloc_mode=%s", "reuse");
 
+	if (test_opt(sbi, DISABLE_CHECKPOINT))
+		seq_puts(seq, ",checkpoint=disable");
+
 	if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_POSIX)
 		seq_printf(seq, ",fsync_mode=%s", "posix");
 	else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT)
@@ -1365,6 +1398,7 @@ static void default_options(struct f2fs_sb_info *sbi)
 	set_opt(sbi, INLINE_DENTRY);
 	set_opt(sbi, EXTENT_CACHE);
 	set_opt(sbi, NOHEAP);
+	clear_opt(sbi, DISABLE_CHECKPOINT);
 	sbi->sb->s_flags |= SB_LAZYTIME;
 	set_opt(sbi, FLUSH_MERGE);
 	if (blk_queue_discard(bdev_get_queue(sbi->sb->s_bdev)))
@@ -1387,6 +1421,60 @@ static void default_options(struct f2fs_sb_info *sbi)
 #ifdef CONFIG_QUOTA
 static int f2fs_enable_quotas(struct super_block *sb);
 #endif
+
+static void f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
+{
+	struct cp_control cpc;
+	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+	unsigned int segno;
+	int type;
+
+	set_sbi_flag(sbi, SBI_CP_DISABLED);
+
+	cpc.reason = CP_PAUSE;
+
+	mutex_lock(&sbi->gc_mutex);
+	f2fs_write_checkpoint(sbi, &cpc);
+
+	mutex_lock(&dirty_i->seglist_lock);
+	for (type = 0; type < NR_CURSEG_TYPE; type++) {
+		for_each_set_bit(segno, dirty_i->dirty_segmap[type],
+							MAIN_SEGS(sbi)) {
+			if (IS_DATASEG(type))
+				sbi->free_ssr_data_block +=
+					get_valid_blocks(sbi, segno, false);
+			else
+				sbi->free_ssr_node_block +=
+					get_valid_blocks(sbi, segno, false);
+		}
+	}
+	sbi->free_segments = FREE_I(sbi)->free_segments;
+	mutex_unlock(&dirty_i->seglist_lock);
+	mutex_unlock(&sbi->gc_mutex);
+}
+
+static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi)
+{
+	struct super_block *sb = sbi->sb;
+	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+
+	clear_sbi_flag(sbi, SBI_CP_DISABLED);
+	writeback_inodes_sb(sb, WB_REASON_SYNC);
+	sync_inodes_sb(sb);
+
+	mutex_lock(&dirty_i->seglist_lock);
+	dirty_to_prefree(sbi);
+	sbi->free_segments = 0;
+	sbi->free_ssr_data_block = 0;
+	sbi->free_ssr_node_block = 0;
+	mutex_unlock(&dirty_i->seglist_lock);
+
+	set_sbi_flag(sbi, SBI_IS_DIRTY);
+	set_sbi_flag(sbi, SBI_IS_CLOSE);
+	f2fs_sync_fs(sb, 1);
+	clear_sbi_flag(sbi, SBI_IS_CLOSE);
+}
+
 static int f2fs_remount(struct super_block *sb, int *flags, char *data)
 {
 	struct f2fs_sb_info *sbi = F2FS_SB(sb);
@@ -1396,6 +1484,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
 	bool need_restart_gc = false;
 	bool need_stop_gc = false;
 	bool no_extent_cache = !test_opt(sbi, EXTENT_CACHE);
+	bool disable_checkpoint = test_opt(sbi, DISABLE_CHECKPOINT);
+	bool checkpoint_changed;
 #ifdef CONFIG_QUOTA
 	int i, j;
 #endif
@@ -1440,6 +1530,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
 	err = parse_options(sb, data);
 	if (err)
 		goto restore_opts;
+	checkpoint_changed =
+			disable_checkpoint != test_opt(sbi, DISABLE_CHECKPOINT);
 
 	/*
 	 * Previous and new state of filesystem is RO,
@@ -1501,6 +1593,13 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
 		clear_sbi_flag(sbi, SBI_IS_CLOSE);
 	}
 
+	if (checkpoint_changed) {
+		if (test_opt(sbi, DISABLE_CHECKPOINT))
+			f2fs_disable_checkpoint(sbi);
+		else
+			f2fs_enable_checkpoint(sbi);
+	}
+
 	/*
 	 * We stop issue flush thread if FS is mounted as RO
 	 * or if flush_merge is not passed in mount option.
@@ -2987,7 +3086,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 		goto free_meta;
 
 	/* recover fsynced data */
-	if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) {
+	if (!test_opt(sbi, DISABLE_ROLL_FORWARD) &&
+			!is_sbi_flag_set(sbi, SBI_CP_DISABLED)) {
 		/*
 		 * mount should be failed, when device has readonly mode, and
 		 * previous checkpoint was not done by clean system shutdown.
@@ -3053,6 +3153,12 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 				cur_cp_version(F2FS_CKPT(sbi)));
 	f2fs_update_time(sbi, CP_TIME);
 	f2fs_update_time(sbi, REQ_TIME);
+
+	if (test_opt(sbi, DISABLE_CHECKPOINT))
+		f2fs_disable_checkpoint(sbi);
+	else if (is_sbi_flag_set(sbi, SBI_CP_DISABLED))
+		f2fs_enable_checkpoint(sbi);
+
 	return 0;
 
 free_meta:
-- 
2.18.0.233.g985f88cf7e-goog

--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2018-09-28 17:36 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-09-26 19:40 [PATCH] f2fs: checkpoint disabling Jaegeuk Kim
2018-09-28 17:36 ` [PATCH v2] " Jaegeuk Kim
  -- strict thread matches above, loose matches on Subject: below --
2018-07-26  0:19 Daniel Rosenberg
2018-07-26  0:19 ` Daniel Rosenberg
2018-07-27 23:22 ` kbuild test robot
2018-07-27 23:22   ` kbuild test robot
2018-07-28  0:19 ` kbuild test robot
2018-07-28  0:19   ` kbuild test robot
2018-08-04 22:31 ` Jaegeuk Kim
2018-08-04 22:31   ` Jaegeuk Kim
2018-08-04 22:31   ` Jaegeuk Kim

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.