All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v11] f2fs: guarantee journalled quota data by checkpoint
@ 2018-09-20 12:05 Chao Yu
  2018-10-01  0:06 ` Jaegeuk Kim
  2019-03-06  6:49   ` Chao Yu
  0 siblings, 2 replies; 18+ messages in thread
From: Chao Yu @ 2018-09-20 12:05 UTC (permalink / raw)
  To: jaegeuk; +Cc: linux-f2fs-devel, linux-kernel, Chao Yu, Weichao Guo

From: Chao Yu <yuchao0@huawei.com>

For journalled quota mode, let checkpoint to flush dquot dirty data
and quota file data to guarntee persistence of all quota sysfile in
last checkpoint, by this way, we can avoid corrupting quota sysfile
when encountering SPO.

The implementation is as below:

1. add a global state SBI_QUOTA_NEED_FLUSH to indicate that there is
cached dquot metadata changes in quota subsystem, and later checkpoint
should:
 a) flush dquot metadata into quota file.
 b) flush quota file to storage to keep file usage be consistent.

2. add a global state SBI_QUOTA_NEED_REPAIR to indicate that quota
operation failed due to -EIO or -ENOSPC, so later,
 a) checkpoint will skip syncing dquot metadata.
 b) CP_QUOTA_NEED_FSCK_FLAG will be set in last cp pack to give a
    hint for fsck repairing.

3. add a global state SBI_QUOTA_SKIP_FLUSH, in checkpoint, if quota
data updating is very heavy, it may cause hungtask in block_operation().
To avoid this, if our retry time exceed threshold, let's just skip
flushing and retry in next checkpoint().

Signed-off-by: Weichao Guo <guoweichao@huawei.com>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
---
v11:
- transfer quota data if fsynced inode's i_{u,g}id changed during
recovery.
 fs/f2fs/checkpoint.c    |  56 +++++++++++++++++--
 fs/f2fs/data.c          |  18 ++++--
 fs/f2fs/f2fs.h          |  50 ++++++++++++++---
 fs/f2fs/file.c          |  31 ++++++++---
 fs/f2fs/inline.c        |   4 +-
 fs/f2fs/inode.c         |  11 +++-
 fs/f2fs/namei.c         |   4 --
 fs/f2fs/recovery.c      |  43 +++++++++++++-
 fs/f2fs/super.c         | 120 ++++++++++++++++++++++++++++++++++++----
 include/linux/f2fs_fs.h |   1 +
 10 files changed, 289 insertions(+), 49 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index d312d2829d5a..d624d7983197 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -1083,6 +1083,21 @@ static void __prepare_cp_block(struct f2fs_sb_info *sbi)
 	ckpt->next_free_nid = cpu_to_le32(last_nid);
 }
 
+static bool __need_flush_quota(struct f2fs_sb_info *sbi)
+{
+	if (!is_journalled_quota(sbi))
+		return false;
+	if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH))
+		return false;
+	if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR))
+		return false;
+	if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_FLUSH))
+		return true;
+	if (get_pages(sbi, F2FS_DIRTY_QDATA))
+		return true;
+	return false;
+}
+
 /*
  * Freeze all the FS-operations for checkpoint.
  */
@@ -1094,12 +1109,30 @@ static int block_operations(struct f2fs_sb_info *sbi)
 		.for_reclaim = 0,
 	};
 	struct blk_plug plug;
-	int err = 0;
+	int err = 0, cnt = 0;
 
 	blk_start_plug(&plug);
 
-retry_flush_dents:
+retry_flush_quotas:
+	if (__need_flush_quota(sbi)) {
+		if (++cnt > DEFAULT_RETRY_QUOTA_FLUSH_COUNT) {
+			set_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH);
+			f2fs_lock_all(sbi);
+			goto retry_flush_dents;
+		}
+		clear_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH);
+
+		f2fs_quota_sync(sbi->sb, -1);
+	}
+
 	f2fs_lock_all(sbi);
+	if (__need_flush_quota(sbi)) {
+		f2fs_unlock_all(sbi);
+		cond_resched();
+		goto retry_flush_quotas;
+	}
+
+retry_flush_dents:
 	/* write all the dirty dentry pages */
 	if (get_pages(sbi, F2FS_DIRTY_DENTS)) {
 		f2fs_unlock_all(sbi);
@@ -1107,7 +1140,7 @@ static int block_operations(struct f2fs_sb_info *sbi)
 		if (err)
 			goto out;
 		cond_resched();
-		goto retry_flush_dents;
+		goto retry_flush_quotas;
 	}
 
 	/*
@@ -1116,6 +1149,12 @@ static int block_operations(struct f2fs_sb_info *sbi)
 	 */
 	down_write(&sbi->node_change);
 
+	if (__need_flush_quota(sbi)) {
+		up_write(&sbi->node_change);
+		f2fs_unlock_all(sbi);
+		goto retry_flush_quotas;
+	}
+
 	if (get_pages(sbi, F2FS_DIRTY_IMETA)) {
 		up_write(&sbi->node_change);
 		f2fs_unlock_all(sbi);
@@ -1123,7 +1162,7 @@ static int block_operations(struct f2fs_sb_info *sbi)
 		if (err)
 			goto out;
 		cond_resched();
-		goto retry_flush_dents;
+		goto retry_flush_quotas;
 	}
 
 retry_flush_nodes:
@@ -1214,6 +1253,14 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 	if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
 		__set_ckpt_flags(ckpt, CP_FSCK_FLAG);
 
+	if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH))
+		__set_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG);
+	else
+		__clear_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG);
+
+	if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR))
+		__set_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG);
+
 	/* set this flag to activate crc|cp_ver for recovery */
 	__set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG);
 	__clear_ckpt_flags(ckpt, CP_NOCRC_RECOVERY_FLAG);
@@ -1421,6 +1468,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 
 	clear_sbi_flag(sbi, SBI_IS_DIRTY);
 	clear_sbi_flag(sbi, SBI_NEED_CP);
+	clear_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH);
 	__set_cp_next_pack(sbi);
 
 	/*
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 57c0823d22e0..b96f8588d565 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -46,7 +46,7 @@ static bool __is_cp_guaranteed(struct page *page)
 			inode->i_ino ==  F2FS_NODE_INO(sbi) ||
 			S_ISDIR(inode->i_mode) ||
 			(S_ISREG(inode->i_mode) &&
-			is_inode_flag_set(inode, FI_ATOMIC_FILE)) ||
+			(f2fs_is_atomic_file(inode) || IS_NOQUOTA(inode))) ||
 			is_cold_data(page))
 		return true;
 	return false;
@@ -975,7 +975,7 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
 	return err;
 }
 
-static inline void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
+void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
 {
 	if (flag == F2FS_GET_BLOCK_PRE_AIO) {
 		if (lock)
@@ -1716,6 +1716,8 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
 		return true;
 	if (S_ISDIR(inode->i_mode))
 		return true;
+	if (IS_NOQUOTA(inode))
+		return true;
 	if (f2fs_is_atomic_file(inode))
 		return true;
 	if (fio) {
@@ -1960,7 +1962,7 @@ static int __write_data_page(struct page *page, bool *submitted,
 	}
 
 	unlock_page(page);
-	if (!S_ISDIR(inode->i_mode))
+	if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode))
 		f2fs_balance_fs(sbi, need_balance_fs);
 
 	if (unlikely(f2fs_cp_error(sbi))) {
@@ -2151,6 +2153,8 @@ static inline bool __should_serialize_io(struct inode *inode,
 {
 	if (!S_ISREG(inode->i_mode))
 		return false;
+	if (IS_NOQUOTA(inode))
+		return false;
 	if (wbc->sync_mode != WB_SYNC_ALL)
 		return true;
 	if (get_dirty_pages(inode) >= SM_I(F2FS_I_SB(inode))->min_seq_blocks)
@@ -2180,7 +2184,8 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
 	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
 		goto skip_write;
 
-	if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE &&
+	if ((S_ISDIR(inode->i_mode) || IS_NOQUOTA(inode)) &&
+			wbc->sync_mode == WB_SYNC_NONE &&
 			get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) &&
 			f2fs_available_free_memory(sbi, DIRTY_DENTS))
 		goto skip_write;
@@ -2245,7 +2250,7 @@ static void f2fs_write_failed(struct address_space *mapping, loff_t to)
 		down_write(&F2FS_I(inode)->i_mmap_sem);
 
 		truncate_pagecache(inode, i_size);
-		f2fs_truncate_blocks(inode, i_size, true);
+		f2fs_truncate_blocks(inode, i_size, true, true);
 
 		up_write(&F2FS_I(inode)->i_mmap_sem);
 		up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
@@ -2380,7 +2385,8 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
 	if (err)
 		goto fail;
 
-	if (need_balance && has_not_enough_free_secs(sbi, 0, 0)) {
+	if (need_balance && !IS_NOQUOTA(inode) &&
+			has_not_enough_free_secs(sbi, 0, 0)) {
 		unlock_page(page);
 		f2fs_balance_fs(sbi, true);
 		lock_page(page);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 917b2ca76aac..b5e400be73e0 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -525,6 +525,9 @@ enum {
 
 #define DEFAULT_RETRY_IO_COUNT	8	/* maximum retry read IO count */
 
+/* maximum retry quota flush count */
+#define DEFAULT_RETRY_QUOTA_FLUSH_COUNT		8
+
 #define F2FS_LINK_MAX	0xffffffff	/* maximum link count per file */
 
 #define MAX_DIR_RA_PAGES	4	/* maximum ra pages of dir */
@@ -1088,6 +1091,9 @@ enum {
 	SBI_NEED_CP,				/* need to checkpoint */
 	SBI_IS_SHUTDOWN,			/* shutdown by ioctl */
 	SBI_IS_RECOVERED,			/* recovered orphan/data */
+	SBI_QUOTA_NEED_FLUSH,			/* need to flush quota info in CP */
+	SBI_QUOTA_SKIP_FLUSH,			/* skip flushing quota in current CP */
+	SBI_QUOTA_NEED_REPAIR,			/* quota file may be corrupted */
 };
 
 enum {
@@ -1891,12 +1897,18 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
 {
 	block_t	valid_block_count;
 	unsigned int valid_node_count;
-	bool quota = inode && !is_inode;
+	int err;
 
-	if (quota) {
-		int ret = dquot_reserve_block(inode, 1);
-		if (ret)
-			return ret;
+	if (is_inode) {
+		if (inode) {
+			err = dquot_alloc_inode(inode);
+			if (err)
+				return err;
+		}
+	} else {
+		err = dquot_reserve_block(inode, 1);
+		if (err)
+			return err;
 	}
 
 	if (time_to_inject(sbi, FAULT_BLOCK)) {
@@ -1938,8 +1950,12 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
 	return 0;
 
 enospc:
-	if (quota)
+	if (is_inode) {
+		if (inode)
+			dquot_free_inode(inode);
+	} else {
 		dquot_release_reservation_block(inode, 1);
+	}
 	return -ENOSPC;
 }
 
@@ -1960,7 +1976,9 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi,
 
 	spin_unlock(&sbi->stat_lock);
 
-	if (!is_inode)
+	if (is_inode)
+		dquot_free_inode(inode);
+	else
 		f2fs_i_blocks_write(inode, 1, false, true);
 }
 
@@ -2739,7 +2757,8 @@ static inline bool is_valid_data_blkaddr(struct f2fs_sb_info *sbi,
  */
 int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);
 void f2fs_truncate_data_blocks(struct dnode_of_data *dn);
-int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock);
+int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock,
+							bool buf_write);
 int f2fs_truncate(struct inode *inode);
 int f2fs_getattr(const struct path *path, struct kstat *stat,
 			u32 request_mask, unsigned int flags);
@@ -2827,6 +2846,7 @@ static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode)
 int f2fs_inode_dirtied(struct inode *inode, bool sync);
 void f2fs_inode_synced(struct inode *inode);
 int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly);
+int f2fs_quota_sync(struct super_block *sb, int type);
 void f2fs_quota_off_umount(struct super_block *sb);
 int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover);
 int f2fs_sync_fs(struct super_block *sb, int sync);
@@ -3025,6 +3045,7 @@ struct page *f2fs_get_lock_data_page(struct inode *inode, pgoff_t index,
 struct page *f2fs_get_new_data_page(struct inode *inode,
 			struct page *ipage, pgoff_t index, bool new_i_size);
 int f2fs_do_write_data_page(struct f2fs_io_info *fio);
+void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock);
 int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
 			int create, int flag);
 int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
@@ -3456,3 +3477,16 @@ extern void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate,
 #endif
 
 #endif
+
+static inline bool is_journalled_quota(struct f2fs_sb_info *sbi)
+{
+#ifdef CONFIG_QUOTA
+	if (f2fs_sb_has_quota_ino(sbi->sb))
+		return true;
+	if (F2FS_OPTION(sbi).s_qf_names[USRQUOTA] ||
+		F2FS_OPTION(sbi).s_qf_names[GRPQUOTA] ||
+		F2FS_OPTION(sbi).s_qf_names[PRJQUOTA])
+		return true;
+#endif
+	return false;
+}
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 357422a4c319..a75f3e145bf1 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -586,7 +586,8 @@ static int truncate_partial_data_page(struct inode *inode, u64 from,
 	return 0;
 }
 
-int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock)
+int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock,
+							bool buf_write)
 {
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 	struct dnode_of_data dn;
@@ -594,6 +595,7 @@ int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock)
 	int count = 0, err = 0;
 	struct page *ipage;
 	bool truncate_page = false;
+	int flag = buf_write ? F2FS_GET_BLOCK_PRE_AIO : F2FS_GET_BLOCK_PRE_DIO;
 
 	trace_f2fs_truncate_blocks_enter(inode, from);
 
@@ -603,7 +605,7 @@ int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock)
 		goto free_partial;
 
 	if (lock)
-		f2fs_lock_op(sbi);
+		__do_map_lock(sbi, flag, true);
 
 	ipage = f2fs_get_node_page(sbi, inode->i_ino);
 	if (IS_ERR(ipage)) {
@@ -641,7 +643,7 @@ int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock)
 	err = f2fs_truncate_inode_blocks(inode, free_from);
 out:
 	if (lock)
-		f2fs_unlock_op(sbi);
+		__do_map_lock(sbi, flag, false);
 free_partial:
 	/* lastly zero out the first data page */
 	if (!err)
@@ -676,7 +678,7 @@ int f2fs_truncate(struct inode *inode)
 			return err;
 	}
 
-	err = f2fs_truncate_blocks(inode, i_size_read(inode), true);
+	err = f2fs_truncate_blocks(inode, i_size_read(inode), true, false);
 	if (err)
 		return err;
 
@@ -785,9 +787,24 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
 		!uid_eq(attr->ia_uid, inode->i_uid)) ||
 		(attr->ia_valid & ATTR_GID &&
 		!gid_eq(attr->ia_gid, inode->i_gid))) {
+		f2fs_lock_op(F2FS_I_SB(inode));
 		err = dquot_transfer(inode, attr);
-		if (err)
+		if (err) {
+			set_sbi_flag(F2FS_I_SB(inode),
+					SBI_QUOTA_NEED_REPAIR);
+			f2fs_unlock_op(F2FS_I_SB(inode));
 			return err;
+		}
+		/*
+		 * update uid/gid under lock_op(), so that dquot and inode can
+		 * be updated atomically.
+		 */
+		if (attr->ia_valid & ATTR_UID)
+			inode->i_uid = attr->ia_uid;
+		if (attr->ia_valid & ATTR_GID)
+			inode->i_gid = attr->ia_gid;
+		f2fs_mark_inode_dirty_sync(inode, true);
+		f2fs_unlock_op(F2FS_I_SB(inode));
 	}
 
 	if (attr->ia_valid & ATTR_SIZE) {
@@ -1242,7 +1259,7 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
 	new_size = i_size_read(inode) - len;
 	truncate_pagecache(inode, new_size);
 
-	ret = f2fs_truncate_blocks(inode, new_size, true);
+	ret = f2fs_truncate_blocks(inode, new_size, true, false);
 	up_write(&F2FS_I(inode)->i_mmap_sem);
 	if (!ret)
 		f2fs_i_size_write(inode, new_size);
@@ -1427,7 +1444,7 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
 	f2fs_balance_fs(sbi, true);
 
 	down_write(&F2FS_I(inode)->i_mmap_sem);
-	ret = f2fs_truncate_blocks(inode, i_size_read(inode), true);
+	ret = f2fs_truncate_blocks(inode, i_size_read(inode), true, false);
 	up_write(&F2FS_I(inode)->i_mmap_sem);
 	if (ret)
 		return ret;
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 425d740f87fd..cb31a719b048 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -298,7 +298,7 @@ bool f2fs_recover_inline_data(struct inode *inode, struct page *npage)
 		clear_inode_flag(inode, FI_INLINE_DATA);
 		f2fs_put_page(ipage, 1);
 	} else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) {
-		if (f2fs_truncate_blocks(inode, 0, false))
+		if (f2fs_truncate_blocks(inode, 0, false, false))
 			return false;
 		goto process_inline;
 	}
@@ -470,7 +470,7 @@ static int f2fs_add_inline_entries(struct inode *dir, void *inline_dentry)
 	return 0;
 punch_dentry_pages:
 	truncate_inode_pages(&dir->i_data, 0);
-	f2fs_truncate_blocks(dir, 0, false);
+	f2fs_truncate_blocks(dir, 0, false, false);
 	f2fs_remove_dirty_inode(dir);
 	return err;
 }
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 86e7333d60c1..3c278e63d1a3 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -645,7 +645,11 @@ void f2fs_evict_inode(struct inode *inode)
 	if (inode->i_nlink || is_bad_inode(inode))
 		goto no_delete;
 
-	dquot_initialize(inode);
+	err = dquot_initialize(inode);
+	if (err) {
+		err = 0;
+		set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
+	}
 
 	f2fs_remove_ino_entry(sbi, inode->i_ino, APPEND_INO);
 	f2fs_remove_ino_entry(sbi, inode->i_ino, UPDATE_INO);
@@ -677,9 +681,10 @@ void f2fs_evict_inode(struct inode *inode)
 		goto retry;
 	}
 
-	if (err)
+	if (err) {
 		f2fs_update_inode_page(inode);
-	dquot_free_inode(inode);
+		set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
+	}
 	sb_end_intwrite(inode->i_sb);
 no_delete:
 	dquot_drop(inode);
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 9ad451ac2cec..b65491a63115 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -71,10 +71,6 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
 	if (err)
 		goto fail_drop;
 
-	err = dquot_alloc_inode(inode);
-	if (err)
-		goto fail_drop;
-
 	set_inode_flag(inode, FI_NEW_INODE);
 
 	/* If the directory encrypted, then we should encrypt the inode. */
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 41f2c0fe6d8e..70f05650191e 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -191,6 +191,33 @@ static int recover_dentry(struct inode *inode, struct page *ipage,
 	return err;
 }
 
+static int recover_quota_data(struct inode *inode, struct page *page)
+{
+	struct f2fs_inode *raw = F2FS_INODE(page);
+	struct iattr attr;
+	uid_t i_uid = le32_to_cpu(raw->i_uid);
+	gid_t i_gid = le32_to_cpu(raw->i_gid);
+	int err;
+
+	memset(&attr, 0, sizeof(attr));
+
+	attr.ia_uid = make_kuid(inode->i_sb->s_user_ns, i_uid);
+	attr.ia_gid = make_kgid(inode->i_sb->s_user_ns, i_gid);
+
+	if (!uid_eq(attr.ia_uid, inode->i_uid))
+		attr.ia_valid |= ATTR_UID;
+	if (!gid_eq(attr.ia_gid, inode->i_gid))
+		attr.ia_valid |= ATTR_GID;
+
+	if (!attr.ia_valid)
+		return 0;
+
+	err = dquot_transfer(inode, &attr);
+	if (err)
+		set_sbi_flag(F2FS_I_SB(inode), SBI_QUOTA_NEED_REPAIR);
+	return err;
+}
+
 static void recover_inline_flags(struct inode *inode, struct f2fs_inode *ri)
 {
 	if (ri->i_inline & F2FS_PIN_FILE)
@@ -203,12 +230,18 @@ static void recover_inline_flags(struct inode *inode, struct f2fs_inode *ri)
 		clear_inode_flag(inode, FI_DATA_EXIST);
 }
 
-static void recover_inode(struct inode *inode, struct page *page)
+static int recover_inode(struct inode *inode, struct page *page)
 {
 	struct f2fs_inode *raw = F2FS_INODE(page);
 	char *name;
+	int err;
 
 	inode->i_mode = le16_to_cpu(raw->i_mode);
+
+	err = recover_quota_data(inode, page);
+	if (err)
+		return err;
+
 	i_uid_write(inode, le32_to_cpu(raw->i_uid));
 	i_gid_write(inode, le32_to_cpu(raw->i_gid));
 	f2fs_i_size_write(inode, le64_to_cpu(raw->i_size));
@@ -231,6 +264,7 @@ static void recover_inode(struct inode *inode, struct page *page)
 	f2fs_msg(inode->i_sb, KERN_NOTICE,
 		"recover_inode: ino = %x, name = %s, inline = %x",
 			ino_of_node(page), name, raw->i_inline);
+	return 0;
 }
 
 static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
@@ -597,8 +631,11 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
 		 * In this case, we can lose the latest inode(x).
 		 * So, call recover_inode for the inode update.
 		 */
-		if (IS_INODE(page))
-			recover_inode(entry->inode, page);
+		if (IS_INODE(page)) {
+			err = recover_inode(entry->inode, page);
+			if (err)
+				break;
+		}
 		if (entry->last_dentry == blkaddr) {
 			err = recover_dentry(entry->inode, page, dir_list);
 			if (err) {
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 945468968d4e..3a46c9b81188 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1689,6 +1689,13 @@ static qsize_t *f2fs_get_reserved_space(struct inode *inode)
 
 static int f2fs_quota_on_mount(struct f2fs_sb_info *sbi, int type)
 {
+
+	if (is_set_ckpt_flags(sbi, CP_QUOTA_NEED_FSCK_FLAG)) {
+		f2fs_msg(sbi->sb, KERN_ERR,
+			"quota sysfile may be corrupted, skip loading it");
+		return 0;
+	}
+
 	return dquot_quota_on_mount(sbi->sb, F2FS_OPTION(sbi).s_qf_names[type],
 					F2FS_OPTION(sbi).s_jquota_fmt, type);
 }
@@ -1759,7 +1766,14 @@ static int f2fs_enable_quotas(struct super_block *sb)
 		test_opt(F2FS_SB(sb), PRJQUOTA),
 	};
 
-	sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE | DQUOT_NOLIST_DIRTY;
+	if (is_set_ckpt_flags(F2FS_SB(sb), CP_QUOTA_NEED_FSCK_FLAG)) {
+		f2fs_msg(sb, KERN_ERR,
+			"quota file may be corrupted, skip loading it");
+		return 0;
+	}
+
+	sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
+
 	for (type = 0; type < MAXQUOTAS; type++) {
 		qf_inum = f2fs_qf_ino(sb, type);
 		if (qf_inum) {
@@ -1773,6 +1787,8 @@ static int f2fs_enable_quotas(struct super_block *sb)
 					"fsck to fix.", type, err);
 				for (type--; type >= 0; type--)
 					dquot_quota_off(sb, type);
+				set_sbi_flag(F2FS_SB(sb),
+						SBI_QUOTA_NEED_REPAIR);
 				return err;
 			}
 		}
@@ -1780,35 +1796,51 @@ static int f2fs_enable_quotas(struct super_block *sb)
 	return 0;
 }
 
-static int f2fs_quota_sync(struct super_block *sb, int type)
+int f2fs_quota_sync(struct super_block *sb, int type)
 {
+	struct f2fs_sb_info *sbi = F2FS_SB(sb);
 	struct quota_info *dqopt = sb_dqopt(sb);
 	int cnt;
 	int ret;
 
 	ret = dquot_writeback_dquots(sb, type);
 	if (ret)
-		return ret;
+		goto out;
 
 	/*
 	 * Now when everything is written we can discard the pagecache so
 	 * that userspace sees the changes.
 	 */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+		struct address_space *mapping;
+
 		if (type != -1 && cnt != type)
 			continue;
 		if (!sb_has_quota_active(sb, cnt))
 			continue;
 
-		ret = filemap_write_and_wait(dqopt->files[cnt]->i_mapping);
+		mapping = dqopt->files[cnt]->i_mapping;
+
+		ret = filemap_fdatawrite(mapping);
+		if (ret)
+			goto out;
+
+		/* if we are using journalled quota */
+		if (is_journalled_quota(sbi))
+			continue;
+
+		ret = filemap_fdatawait(mapping);
 		if (ret)
-			return ret;
+			set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
 
 		inode_lock(dqopt->files[cnt]);
 		truncate_inode_pages(&dqopt->files[cnt]->i_data, 0);
 		inode_unlock(dqopt->files[cnt]);
 	}
-	return 0;
+out:
+	if (ret)
+		set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
+	return ret;
 }
 
 static int f2fs_quota_on(struct super_block *sb, int type, int format_id,
@@ -1877,11 +1909,67 @@ void f2fs_quota_off_umount(struct super_block *sb)
 				"Fail to turn off disk quota "
 				"(type: %d, err: %d, ret:%d), Please "
 				"run fsck to fix it.", type, err, ret);
-			set_sbi_flag(F2FS_SB(sb), SBI_NEED_FSCK);
+			set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
 		}
 	}
 }
 
+static int f2fs_dquot_commit(struct dquot *dquot)
+{
+	int ret;
+
+	ret = dquot_commit(dquot);
+	if (ret < 0)
+		set_sbi_flag(F2FS_SB(dquot->dq_sb), SBI_QUOTA_NEED_REPAIR);
+	return ret;
+}
+
+static int f2fs_dquot_acquire(struct dquot *dquot)
+{
+	int ret;
+
+	ret = dquot_acquire(dquot);
+	if (ret < 0)
+		set_sbi_flag(F2FS_SB(dquot->dq_sb), SBI_QUOTA_NEED_REPAIR);
+
+	return ret;
+}
+
+static int f2fs_dquot_release(struct dquot *dquot)
+{
+	int ret;
+
+	ret = dquot_release(dquot);
+	if (ret < 0)
+		set_sbi_flag(F2FS_SB(dquot->dq_sb), SBI_QUOTA_NEED_REPAIR);
+	return ret;
+}
+
+static int f2fs_dquot_mark_dquot_dirty(struct dquot *dquot)
+{
+	struct super_block *sb = dquot->dq_sb;
+	struct f2fs_sb_info *sbi = F2FS_SB(sb);
+	int ret;
+
+	ret = dquot_mark_dquot_dirty(dquot);
+
+	/* if we are using journalled quota */
+	if (is_journalled_quota(sbi))
+		set_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH);
+
+	return ret;
+}
+
+static int f2fs_dquot_commit_info(struct super_block *sb, int type)
+{
+	int ret;
+
+	ret = dquot_commit_info(sb, type);
+	if (ret < 0)
+		set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
+	return ret;
+}
+
 static int f2fs_get_projid(struct inode *inode, kprojid_t *projid)
 {
 	*projid = F2FS_I(inode)->i_projid;
@@ -1890,11 +1978,11 @@ static int f2fs_get_projid(struct inode *inode, kprojid_t *projid)
 
 static const struct dquot_operations f2fs_quota_operations = {
 	.get_reserved_space = f2fs_get_reserved_space,
-	.write_dquot	= dquot_commit,
-	.acquire_dquot	= dquot_acquire,
-	.release_dquot	= dquot_release,
-	.mark_dirty	= dquot_mark_dquot_dirty,
-	.write_info	= dquot_commit_info,
+	.write_dquot	= f2fs_dquot_commit,
+	.acquire_dquot	= f2fs_dquot_acquire,
+	.release_dquot	= f2fs_dquot_release,
+	.mark_dirty	= f2fs_dquot_mark_dquot_dirty,
+	.write_info	= f2fs_dquot_commit_info,
 	.alloc_dquot	= dquot_alloc,
 	.destroy_dquot	= dquot_destroy,
 	.get_projid	= f2fs_get_projid,
@@ -1912,6 +2000,11 @@ static const struct quotactl_ops f2fs_quotactl_ops = {
 	.get_nextdqblk	= dquot_get_next_dqblk,
 };
 #else
+int f2fs_quota_sync(struct super_block *sb, int type)
+{
+	return 0;
+}
+
 void f2fs_quota_off_umount(struct super_block *sb)
 {
 }
@@ -2951,6 +3044,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 		goto free_meta_inode;
 	}
 
+	if (__is_set_ckpt_flags(F2FS_CKPT(sbi), CP_QUOTA_NEED_FSCK_FLAG))
+		set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
+
 	/* Initialize device list */
 	err = f2fs_scan_devices(sbi);
 	if (err) {
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index 1d4b196291d6..aaca9f5d5f5c 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -115,6 +115,7 @@ struct f2fs_super_block {
 /*
  * For checkpoint
  */
+#define CP_QUOTA_NEED_FSCK_FLAG		0x00000800
 #define CP_LARGE_NAT_BITMAP_FLAG	0x00000400
 #define CP_NOCRC_RECOVERY_FLAG	0x00000200
 #define CP_TRIMMED_FLAG		0x00000100
-- 
2.18.0


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH v11] f2fs: guarantee journalled quota data by checkpoint
  2018-09-20 12:05 [PATCH v11] f2fs: guarantee journalled quota data by checkpoint Chao Yu
@ 2018-10-01  0:06 ` Jaegeuk Kim
  2018-10-01  0:46   ` Chao Yu
  2019-03-06  6:49   ` Chao Yu
  1 sibling, 1 reply; 18+ messages in thread
From: Jaegeuk Kim @ 2018-10-01  0:06 UTC (permalink / raw)
  To: Chao Yu; +Cc: linux-f2fs-devel, linux-kernel, Chao Yu, Weichao Guo

Hi Chao,

This fails on fsstress with godown without fault injection. Could you please
test a bit? I assumed that this patch should give no fsck failure along with
valid checkpoint having no flag.

BTW, I'm in doubt that f2fs_lock_all covers entire quota modification. What
about prepare_write_begin() -> f2fs_get_block() ... -> inc_valid_block_count()?

On 09/20, Chao Yu wrote:
> From: Chao Yu <yuchao0@huawei.com>
> 
> For journalled quota mode, let checkpoint to flush dquot dirty data
> and quota file data to guarntee persistence of all quota sysfile in
> last checkpoint, by this way, we can avoid corrupting quota sysfile
> when encountering SPO.
> 
> The implementation is as below:
> 
> 1. add a global state SBI_QUOTA_NEED_FLUSH to indicate that there is
> cached dquot metadata changes in quota subsystem, and later checkpoint
> should:
>  a) flush dquot metadata into quota file.
>  b) flush quota file to storage to keep file usage be consistent.
> 
> 2. add a global state SBI_QUOTA_NEED_REPAIR to indicate that quota
> operation failed due to -EIO or -ENOSPC, so later,
>  a) checkpoint will skip syncing dquot metadata.
>  b) CP_QUOTA_NEED_FSCK_FLAG will be set in last cp pack to give a
>     hint for fsck repairing.
> 
> 3. add a global state SBI_QUOTA_SKIP_FLUSH, in checkpoint, if quota
> data updating is very heavy, it may cause hungtask in block_operation().
> To avoid this, if our retry time exceed threshold, let's just skip
> flushing and retry in next checkpoint().
> 
> Signed-off-by: Weichao Guo <guoweichao@huawei.com>
> Signed-off-by: Chao Yu <yuchao0@huawei.com>
> ---
> v11:
> - transfer quota data if fsynced inode's i_{u,g}id changed during
> recovery.
>  fs/f2fs/checkpoint.c    |  56 +++++++++++++++++--
>  fs/f2fs/data.c          |  18 ++++--
>  fs/f2fs/f2fs.h          |  50 ++++++++++++++---
>  fs/f2fs/file.c          |  31 ++++++++---
>  fs/f2fs/inline.c        |   4 +-
>  fs/f2fs/inode.c         |  11 +++-
>  fs/f2fs/namei.c         |   4 --
>  fs/f2fs/recovery.c      |  43 +++++++++++++-
>  fs/f2fs/super.c         | 120 ++++++++++++++++++++++++++++++++++++----
>  include/linux/f2fs_fs.h |   1 +
>  10 files changed, 289 insertions(+), 49 deletions(-)
> 
> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> index d312d2829d5a..d624d7983197 100644
> --- a/fs/f2fs/checkpoint.c
> +++ b/fs/f2fs/checkpoint.c
> @@ -1083,6 +1083,21 @@ static void __prepare_cp_block(struct f2fs_sb_info *sbi)
>  	ckpt->next_free_nid = cpu_to_le32(last_nid);
>  }
>  
> +static bool __need_flush_quota(struct f2fs_sb_info *sbi)
> +{
> +	if (!is_journalled_quota(sbi))
> +		return false;
> +	if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH))
> +		return false;
> +	if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR))
> +		return false;
> +	if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_FLUSH))
> +		return true;
> +	if (get_pages(sbi, F2FS_DIRTY_QDATA))
> +		return true;
> +	return false;
> +}
> +
>  /*
>   * Freeze all the FS-operations for checkpoint.
>   */
> @@ -1094,12 +1109,30 @@ static int block_operations(struct f2fs_sb_info *sbi)
>  		.for_reclaim = 0,
>  	};
>  	struct blk_plug plug;
> -	int err = 0;
> +	int err = 0, cnt = 0;
>  
>  	blk_start_plug(&plug);
>  
> -retry_flush_dents:
> +retry_flush_quotas:
> +	if (__need_flush_quota(sbi)) {
> +		if (++cnt > DEFAULT_RETRY_QUOTA_FLUSH_COUNT) {
> +			set_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH);
> +			f2fs_lock_all(sbi);
> +			goto retry_flush_dents;
> +		}
> +		clear_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH);
> +
> +		f2fs_quota_sync(sbi->sb, -1);
> +	}
> +
>  	f2fs_lock_all(sbi);
> +	if (__need_flush_quota(sbi)) {
> +		f2fs_unlock_all(sbi);
> +		cond_resched();
> +		goto retry_flush_quotas;
> +	}
> +
> +retry_flush_dents:
>  	/* write all the dirty dentry pages */
>  	if (get_pages(sbi, F2FS_DIRTY_DENTS)) {
>  		f2fs_unlock_all(sbi);
> @@ -1107,7 +1140,7 @@ static int block_operations(struct f2fs_sb_info *sbi)
>  		if (err)
>  			goto out;
>  		cond_resched();
> -		goto retry_flush_dents;
> +		goto retry_flush_quotas;
>  	}
>  
>  	/*
> @@ -1116,6 +1149,12 @@ static int block_operations(struct f2fs_sb_info *sbi)
>  	 */
>  	down_write(&sbi->node_change);
>  
> +	if (__need_flush_quota(sbi)) {
> +		up_write(&sbi->node_change);
> +		f2fs_unlock_all(sbi);
> +		goto retry_flush_quotas;
> +	}
> +
>  	if (get_pages(sbi, F2FS_DIRTY_IMETA)) {
>  		up_write(&sbi->node_change);
>  		f2fs_unlock_all(sbi);
> @@ -1123,7 +1162,7 @@ static int block_operations(struct f2fs_sb_info *sbi)
>  		if (err)
>  			goto out;
>  		cond_resched();
> -		goto retry_flush_dents;
> +		goto retry_flush_quotas;
>  	}
>  
>  retry_flush_nodes:
> @@ -1214,6 +1253,14 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>  	if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
>  		__set_ckpt_flags(ckpt, CP_FSCK_FLAG);
>  
> +	if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH))
> +		__set_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG);
> +	else
> +		__clear_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG);
> +
> +	if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR))
> +		__set_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG);
> +
>  	/* set this flag to activate crc|cp_ver for recovery */
>  	__set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG);
>  	__clear_ckpt_flags(ckpt, CP_NOCRC_RECOVERY_FLAG);
> @@ -1421,6 +1468,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>  
>  	clear_sbi_flag(sbi, SBI_IS_DIRTY);
>  	clear_sbi_flag(sbi, SBI_NEED_CP);
> +	clear_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH);
>  	__set_cp_next_pack(sbi);
>  
>  	/*
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 57c0823d22e0..b96f8588d565 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -46,7 +46,7 @@ static bool __is_cp_guaranteed(struct page *page)
>  			inode->i_ino ==  F2FS_NODE_INO(sbi) ||
>  			S_ISDIR(inode->i_mode) ||
>  			(S_ISREG(inode->i_mode) &&
> -			is_inode_flag_set(inode, FI_ATOMIC_FILE)) ||
> +			(f2fs_is_atomic_file(inode) || IS_NOQUOTA(inode))) ||
>  			is_cold_data(page))
>  		return true;
>  	return false;
> @@ -975,7 +975,7 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
>  	return err;
>  }
>  
> -static inline void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
> +void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
>  {
>  	if (flag == F2FS_GET_BLOCK_PRE_AIO) {
>  		if (lock)
> @@ -1716,6 +1716,8 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
>  		return true;
>  	if (S_ISDIR(inode->i_mode))
>  		return true;
> +	if (IS_NOQUOTA(inode))
> +		return true;
>  	if (f2fs_is_atomic_file(inode))
>  		return true;
>  	if (fio) {
> @@ -1960,7 +1962,7 @@ static int __write_data_page(struct page *page, bool *submitted,
>  	}
>  
>  	unlock_page(page);
> -	if (!S_ISDIR(inode->i_mode))
> +	if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode))
>  		f2fs_balance_fs(sbi, need_balance_fs);
>  
>  	if (unlikely(f2fs_cp_error(sbi))) {
> @@ -2151,6 +2153,8 @@ static inline bool __should_serialize_io(struct inode *inode,
>  {
>  	if (!S_ISREG(inode->i_mode))
>  		return false;
> +	if (IS_NOQUOTA(inode))
> +		return false;
>  	if (wbc->sync_mode != WB_SYNC_ALL)
>  		return true;
>  	if (get_dirty_pages(inode) >= SM_I(F2FS_I_SB(inode))->min_seq_blocks)
> @@ -2180,7 +2184,8 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
>  	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
>  		goto skip_write;
>  
> -	if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE &&
> +	if ((S_ISDIR(inode->i_mode) || IS_NOQUOTA(inode)) &&
> +			wbc->sync_mode == WB_SYNC_NONE &&
>  			get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) &&
>  			f2fs_available_free_memory(sbi, DIRTY_DENTS))
>  		goto skip_write;
> @@ -2245,7 +2250,7 @@ static void f2fs_write_failed(struct address_space *mapping, loff_t to)
>  		down_write(&F2FS_I(inode)->i_mmap_sem);
>  
>  		truncate_pagecache(inode, i_size);
> -		f2fs_truncate_blocks(inode, i_size, true);
> +		f2fs_truncate_blocks(inode, i_size, true, true);
>  
>  		up_write(&F2FS_I(inode)->i_mmap_sem);
>  		up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
> @@ -2380,7 +2385,8 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
>  	if (err)
>  		goto fail;
>  
> -	if (need_balance && has_not_enough_free_secs(sbi, 0, 0)) {
> +	if (need_balance && !IS_NOQUOTA(inode) &&
> +			has_not_enough_free_secs(sbi, 0, 0)) {
>  		unlock_page(page);
>  		f2fs_balance_fs(sbi, true);
>  		lock_page(page);
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 917b2ca76aac..b5e400be73e0 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -525,6 +525,9 @@ enum {
>  
>  #define DEFAULT_RETRY_IO_COUNT	8	/* maximum retry read IO count */
>  
> +/* maximum retry quota flush count */
> +#define DEFAULT_RETRY_QUOTA_FLUSH_COUNT		8
> +
>  #define F2FS_LINK_MAX	0xffffffff	/* maximum link count per file */
>  
>  #define MAX_DIR_RA_PAGES	4	/* maximum ra pages of dir */
> @@ -1088,6 +1091,9 @@ enum {
>  	SBI_NEED_CP,				/* need to checkpoint */
>  	SBI_IS_SHUTDOWN,			/* shutdown by ioctl */
>  	SBI_IS_RECOVERED,			/* recovered orphan/data */
> +	SBI_QUOTA_NEED_FLUSH,			/* need to flush quota info in CP */
> +	SBI_QUOTA_SKIP_FLUSH,			/* skip flushing quota in current CP */
> +	SBI_QUOTA_NEED_REPAIR,			/* quota file may be corrupted */
>  };
>  
>  enum {
> @@ -1891,12 +1897,18 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
>  {
>  	block_t	valid_block_count;
>  	unsigned int valid_node_count;
> -	bool quota = inode && !is_inode;
> +	int err;
>  
> -	if (quota) {
> -		int ret = dquot_reserve_block(inode, 1);
> -		if (ret)
> -			return ret;
> +	if (is_inode) {
> +		if (inode) {
> +			err = dquot_alloc_inode(inode);
> +			if (err)
> +				return err;
> +		}
> +	} else {
> +		err = dquot_reserve_block(inode, 1);
> +		if (err)
> +			return err;
>  	}
>  
>  	if (time_to_inject(sbi, FAULT_BLOCK)) {
> @@ -1938,8 +1950,12 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
>  	return 0;
>  
>  enospc:
> -	if (quota)
> +	if (is_inode) {
> +		if (inode)
> +			dquot_free_inode(inode);
> +	} else {
>  		dquot_release_reservation_block(inode, 1);
> +	}
>  	return -ENOSPC;
>  }
>  
> @@ -1960,7 +1976,9 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi,
>  
>  	spin_unlock(&sbi->stat_lock);
>  
> -	if (!is_inode)
> +	if (is_inode)
> +		dquot_free_inode(inode);
> +	else
>  		f2fs_i_blocks_write(inode, 1, false, true);
>  }
>  
> @@ -2739,7 +2757,8 @@ static inline bool is_valid_data_blkaddr(struct f2fs_sb_info *sbi,
>   */
>  int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);
>  void f2fs_truncate_data_blocks(struct dnode_of_data *dn);
> -int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock);
> +int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock,
> +							bool buf_write);
>  int f2fs_truncate(struct inode *inode);
>  int f2fs_getattr(const struct path *path, struct kstat *stat,
>  			u32 request_mask, unsigned int flags);
> @@ -2827,6 +2846,7 @@ static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode)
>  int f2fs_inode_dirtied(struct inode *inode, bool sync);
>  void f2fs_inode_synced(struct inode *inode);
>  int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly);
> +int f2fs_quota_sync(struct super_block *sb, int type);
>  void f2fs_quota_off_umount(struct super_block *sb);
>  int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover);
>  int f2fs_sync_fs(struct super_block *sb, int sync);
> @@ -3025,6 +3045,7 @@ struct page *f2fs_get_lock_data_page(struct inode *inode, pgoff_t index,
>  struct page *f2fs_get_new_data_page(struct inode *inode,
>  			struct page *ipage, pgoff_t index, bool new_i_size);
>  int f2fs_do_write_data_page(struct f2fs_io_info *fio);
> +void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock);
>  int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
>  			int create, int flag);
>  int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
> @@ -3456,3 +3477,16 @@ extern void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate,
>  #endif
>  
>  #endif
> +
> +static inline bool is_journalled_quota(struct f2fs_sb_info *sbi)
> +{
> +#ifdef CONFIG_QUOTA
> +	if (f2fs_sb_has_quota_ino(sbi->sb))
> +		return true;
> +	if (F2FS_OPTION(sbi).s_qf_names[USRQUOTA] ||
> +		F2FS_OPTION(sbi).s_qf_names[GRPQUOTA] ||
> +		F2FS_OPTION(sbi).s_qf_names[PRJQUOTA])
> +		return true;
> +#endif
> +	return false;
> +}
> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> index 357422a4c319..a75f3e145bf1 100644
> --- a/fs/f2fs/file.c
> +++ b/fs/f2fs/file.c
> @@ -586,7 +586,8 @@ static int truncate_partial_data_page(struct inode *inode, u64 from,
>  	return 0;
>  }
>  
> -int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock)
> +int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock,
> +							bool buf_write)
>  {
>  	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
>  	struct dnode_of_data dn;
> @@ -594,6 +595,7 @@ int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock)
>  	int count = 0, err = 0;
>  	struct page *ipage;
>  	bool truncate_page = false;
> +	int flag = buf_write ? F2FS_GET_BLOCK_PRE_AIO : F2FS_GET_BLOCK_PRE_DIO;
>  
>  	trace_f2fs_truncate_blocks_enter(inode, from);
>  
> @@ -603,7 +605,7 @@ int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock)
>  		goto free_partial;
>  
>  	if (lock)
> -		f2fs_lock_op(sbi);
> +		__do_map_lock(sbi, flag, true);
>  
>  	ipage = f2fs_get_node_page(sbi, inode->i_ino);
>  	if (IS_ERR(ipage)) {
> @@ -641,7 +643,7 @@ int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock)
>  	err = f2fs_truncate_inode_blocks(inode, free_from);
>  out:
>  	if (lock)
> -		f2fs_unlock_op(sbi);
> +		__do_map_lock(sbi, flag, false);
>  free_partial:
>  	/* lastly zero out the first data page */
>  	if (!err)
> @@ -676,7 +678,7 @@ int f2fs_truncate(struct inode *inode)
>  			return err;
>  	}
>  
> -	err = f2fs_truncate_blocks(inode, i_size_read(inode), true);
> +	err = f2fs_truncate_blocks(inode, i_size_read(inode), true, false);
>  	if (err)
>  		return err;
>  
> @@ -785,9 +787,24 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
>  		!uid_eq(attr->ia_uid, inode->i_uid)) ||
>  		(attr->ia_valid & ATTR_GID &&
>  		!gid_eq(attr->ia_gid, inode->i_gid))) {
> +		f2fs_lock_op(F2FS_I_SB(inode));
>  		err = dquot_transfer(inode, attr);
> -		if (err)
> +		if (err) {
> +			set_sbi_flag(F2FS_I_SB(inode),
> +					SBI_QUOTA_NEED_REPAIR);
> +			f2fs_unlock_op(F2FS_I_SB(inode));
>  			return err;
> +		}
> +		/*
> +		 * update uid/gid under lock_op(), so that dquot and inode can
> +		 * be updated atomically.
> +		 */
> +		if (attr->ia_valid & ATTR_UID)
> +			inode->i_uid = attr->ia_uid;
> +		if (attr->ia_valid & ATTR_GID)
> +			inode->i_gid = attr->ia_gid;
> +		f2fs_mark_inode_dirty_sync(inode, true);
> +		f2fs_unlock_op(F2FS_I_SB(inode));
>  	}
>  
>  	if (attr->ia_valid & ATTR_SIZE) {
> @@ -1242,7 +1259,7 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
>  	new_size = i_size_read(inode) - len;
>  	truncate_pagecache(inode, new_size);
>  
> -	ret = f2fs_truncate_blocks(inode, new_size, true);
> +	ret = f2fs_truncate_blocks(inode, new_size, true, false);
>  	up_write(&F2FS_I(inode)->i_mmap_sem);
>  	if (!ret)
>  		f2fs_i_size_write(inode, new_size);
> @@ -1427,7 +1444,7 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
>  	f2fs_balance_fs(sbi, true);
>  
>  	down_write(&F2FS_I(inode)->i_mmap_sem);
> -	ret = f2fs_truncate_blocks(inode, i_size_read(inode), true);
> +	ret = f2fs_truncate_blocks(inode, i_size_read(inode), true, false);
>  	up_write(&F2FS_I(inode)->i_mmap_sem);
>  	if (ret)
>  		return ret;
> diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
> index 425d740f87fd..cb31a719b048 100644
> --- a/fs/f2fs/inline.c
> +++ b/fs/f2fs/inline.c
> @@ -298,7 +298,7 @@ bool f2fs_recover_inline_data(struct inode *inode, struct page *npage)
>  		clear_inode_flag(inode, FI_INLINE_DATA);
>  		f2fs_put_page(ipage, 1);
>  	} else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) {
> -		if (f2fs_truncate_blocks(inode, 0, false))
> +		if (f2fs_truncate_blocks(inode, 0, false, false))
>  			return false;
>  		goto process_inline;
>  	}
> @@ -470,7 +470,7 @@ static int f2fs_add_inline_entries(struct inode *dir, void *inline_dentry)
>  	return 0;
>  punch_dentry_pages:
>  	truncate_inode_pages(&dir->i_data, 0);
> -	f2fs_truncate_blocks(dir, 0, false);
> +	f2fs_truncate_blocks(dir, 0, false, false);
>  	f2fs_remove_dirty_inode(dir);
>  	return err;
>  }
> diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
> index 86e7333d60c1..3c278e63d1a3 100644
> --- a/fs/f2fs/inode.c
> +++ b/fs/f2fs/inode.c
> @@ -645,7 +645,11 @@ void f2fs_evict_inode(struct inode *inode)
>  	if (inode->i_nlink || is_bad_inode(inode))
>  		goto no_delete;
>  
> -	dquot_initialize(inode);
> +	err = dquot_initialize(inode);
> +	if (err) {
> +		err = 0;
> +		set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
> +	}
>  
>  	f2fs_remove_ino_entry(sbi, inode->i_ino, APPEND_INO);
>  	f2fs_remove_ino_entry(sbi, inode->i_ino, UPDATE_INO);
> @@ -677,9 +681,10 @@ void f2fs_evict_inode(struct inode *inode)
>  		goto retry;
>  	}
>  
> -	if (err)
> +	if (err) {
>  		f2fs_update_inode_page(inode);
> -	dquot_free_inode(inode);
> +		set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
> +	}
>  	sb_end_intwrite(inode->i_sb);
>  no_delete:
>  	dquot_drop(inode);
> diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
> index 9ad451ac2cec..b65491a63115 100644
> --- a/fs/f2fs/namei.c
> +++ b/fs/f2fs/namei.c
> @@ -71,10 +71,6 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
>  	if (err)
>  		goto fail_drop;
>  
> -	err = dquot_alloc_inode(inode);
> -	if (err)
> -		goto fail_drop;
> -
>  	set_inode_flag(inode, FI_NEW_INODE);
>  
>  	/* If the directory encrypted, then we should encrypt the inode. */
> diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
> index 41f2c0fe6d8e..70f05650191e 100644
> --- a/fs/f2fs/recovery.c
> +++ b/fs/f2fs/recovery.c
> @@ -191,6 +191,33 @@ static int recover_dentry(struct inode *inode, struct page *ipage,
>  	return err;
>  }
>  
> +static int recover_quota_data(struct inode *inode, struct page *page)
> +{
> +	struct f2fs_inode *raw = F2FS_INODE(page);
> +	struct iattr attr;
> +	uid_t i_uid = le32_to_cpu(raw->i_uid);
> +	gid_t i_gid = le32_to_cpu(raw->i_gid);
> +	int err;
> +
> +	memset(&attr, 0, sizeof(attr));
> +
> +	attr.ia_uid = make_kuid(inode->i_sb->s_user_ns, i_uid);
> +	attr.ia_gid = make_kgid(inode->i_sb->s_user_ns, i_gid);
> +
> +	if (!uid_eq(attr.ia_uid, inode->i_uid))
> +		attr.ia_valid |= ATTR_UID;
> +	if (!gid_eq(attr.ia_gid, inode->i_gid))
> +		attr.ia_valid |= ATTR_GID;
> +
> +	if (!attr.ia_valid)
> +		return 0;
> +
> +	err = dquot_transfer(inode, &attr);
> +	if (err)
> +		set_sbi_flag(F2FS_I_SB(inode), SBI_QUOTA_NEED_REPAIR);
> +	return err;
> +}
> +
>  static void recover_inline_flags(struct inode *inode, struct f2fs_inode *ri)
>  {
>  	if (ri->i_inline & F2FS_PIN_FILE)
> @@ -203,12 +230,18 @@ static void recover_inline_flags(struct inode *inode, struct f2fs_inode *ri)
>  		clear_inode_flag(inode, FI_DATA_EXIST);
>  }
>  
> -static void recover_inode(struct inode *inode, struct page *page)
> +static int recover_inode(struct inode *inode, struct page *page)
>  {
>  	struct f2fs_inode *raw = F2FS_INODE(page);
>  	char *name;
> +	int err;
>  
>  	inode->i_mode = le16_to_cpu(raw->i_mode);
> +
> +	err = recover_quota_data(inode, page);
> +	if (err)
> +		return err;
> +
>  	i_uid_write(inode, le32_to_cpu(raw->i_uid));
>  	i_gid_write(inode, le32_to_cpu(raw->i_gid));
>  	f2fs_i_size_write(inode, le64_to_cpu(raw->i_size));
> @@ -231,6 +264,7 @@ static void recover_inode(struct inode *inode, struct page *page)
>  	f2fs_msg(inode->i_sb, KERN_NOTICE,
>  		"recover_inode: ino = %x, name = %s, inline = %x",
>  			ino_of_node(page), name, raw->i_inline);
> +	return 0;
>  }
>  
>  static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
> @@ -597,8 +631,11 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
>  		 * In this case, we can lose the latest inode(x).
>  		 * So, call recover_inode for the inode update.
>  		 */
> -		if (IS_INODE(page))
> -			recover_inode(entry->inode, page);
> +		if (IS_INODE(page)) {
> +			err = recover_inode(entry->inode, page);
> +			if (err)
> +				break;
> +		}
>  		if (entry->last_dentry == blkaddr) {
>  			err = recover_dentry(entry->inode, page, dir_list);
>  			if (err) {
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index 945468968d4e..3a46c9b81188 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -1689,6 +1689,13 @@ static qsize_t *f2fs_get_reserved_space(struct inode *inode)
>  
>  static int f2fs_quota_on_mount(struct f2fs_sb_info *sbi, int type)
>  {
> +
> +	if (is_set_ckpt_flags(sbi, CP_QUOTA_NEED_FSCK_FLAG)) {
> +		f2fs_msg(sbi->sb, KERN_ERR,
> +			"quota sysfile may be corrupted, skip loading it");
> +		return 0;
> +	}
> +
>  	return dquot_quota_on_mount(sbi->sb, F2FS_OPTION(sbi).s_qf_names[type],
>  					F2FS_OPTION(sbi).s_jquota_fmt, type);
>  }
> @@ -1759,7 +1766,14 @@ static int f2fs_enable_quotas(struct super_block *sb)
>  		test_opt(F2FS_SB(sb), PRJQUOTA),
>  	};
>  
> -	sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE | DQUOT_NOLIST_DIRTY;
> +	if (is_set_ckpt_flags(F2FS_SB(sb), CP_QUOTA_NEED_FSCK_FLAG)) {
> +		f2fs_msg(sb, KERN_ERR,
> +			"quota file may be corrupted, skip loading it");
> +		return 0;
> +	}
> +
> +	sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
> +
>  	for (type = 0; type < MAXQUOTAS; type++) {
>  		qf_inum = f2fs_qf_ino(sb, type);
>  		if (qf_inum) {
> @@ -1773,6 +1787,8 @@ static int f2fs_enable_quotas(struct super_block *sb)
>  					"fsck to fix.", type, err);
>  				for (type--; type >= 0; type--)
>  					dquot_quota_off(sb, type);
> +				set_sbi_flag(F2FS_SB(sb),
> +						SBI_QUOTA_NEED_REPAIR);
>  				return err;
>  			}
>  		}
> @@ -1780,35 +1796,51 @@ static int f2fs_enable_quotas(struct super_block *sb)
>  	return 0;
>  }
>  
> -static int f2fs_quota_sync(struct super_block *sb, int type)
> +int f2fs_quota_sync(struct super_block *sb, int type)
>  {
> +	struct f2fs_sb_info *sbi = F2FS_SB(sb);
>  	struct quota_info *dqopt = sb_dqopt(sb);
>  	int cnt;
>  	int ret;
>  
>  	ret = dquot_writeback_dquots(sb, type);
>  	if (ret)
> -		return ret;
> +		goto out;
>  
>  	/*
>  	 * Now when everything is written we can discard the pagecache so
>  	 * that userspace sees the changes.
>  	 */
>  	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
> +		struct address_space *mapping;
> +
>  		if (type != -1 && cnt != type)
>  			continue;
>  		if (!sb_has_quota_active(sb, cnt))
>  			continue;
>  
> -		ret = filemap_write_and_wait(dqopt->files[cnt]->i_mapping);
> +		mapping = dqopt->files[cnt]->i_mapping;
> +
> +		ret = filemap_fdatawrite(mapping);
> +		if (ret)
> +			goto out;
> +
> +		/* if we are using journalled quota */
> +		if (is_journalled_quota(sbi))
> +			continue;
> +
> +		ret = filemap_fdatawait(mapping);
>  		if (ret)
> -			return ret;
> +			set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
>  
>  		inode_lock(dqopt->files[cnt]);
>  		truncate_inode_pages(&dqopt->files[cnt]->i_data, 0);
>  		inode_unlock(dqopt->files[cnt]);
>  	}
> -	return 0;
> +out:
> +	if (ret)
> +		set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
> +	return ret;
>  }
>  
>  static int f2fs_quota_on(struct super_block *sb, int type, int format_id,
> @@ -1877,11 +1909,67 @@ void f2fs_quota_off_umount(struct super_block *sb)
>  				"Fail to turn off disk quota "
>  				"(type: %d, err: %d, ret:%d), Please "
>  				"run fsck to fix it.", type, err, ret);
> -			set_sbi_flag(F2FS_SB(sb), SBI_NEED_FSCK);
> +			set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
>  		}
>  	}
>  }
>  
> +static int f2fs_dquot_commit(struct dquot *dquot)
> +{
> +	int ret;
> +
> +	ret = dquot_commit(dquot);
> +	if (ret < 0)
> +		set_sbi_flag(F2FS_SB(dquot->dq_sb), SBI_QUOTA_NEED_REPAIR);
> +	return ret;
> +}
> +
> +static int f2fs_dquot_acquire(struct dquot *dquot)
> +{
> +	int ret;
> +
> +	ret = dquot_acquire(dquot);
> +	if (ret < 0)
> +		set_sbi_flag(F2FS_SB(dquot->dq_sb), SBI_QUOTA_NEED_REPAIR);
> +
> +	return ret;
> +}
> +
> +static int f2fs_dquot_release(struct dquot *dquot)
> +{
> +	int ret;
> +
> +	ret = dquot_release(dquot);
> +	if (ret < 0)
> +		set_sbi_flag(F2FS_SB(dquot->dq_sb), SBI_QUOTA_NEED_REPAIR);
> +	return ret;
> +}
> +
> +static int f2fs_dquot_mark_dquot_dirty(struct dquot *dquot)
> +{
> +	struct super_block *sb = dquot->dq_sb;
> +	struct f2fs_sb_info *sbi = F2FS_SB(sb);
> +	int ret;
> +
> +	ret = dquot_mark_dquot_dirty(dquot);
> +
> +	/* if we are using journalled quota */
> +	if (is_journalled_quota(sbi))
> +		set_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH);
> +
> +	return ret;
> +}
> +
> +static int f2fs_dquot_commit_info(struct super_block *sb, int type)
> +{
> +	int ret;
> +
> +	ret = dquot_commit_info(sb, type);
> +	if (ret < 0)
> +		set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
> +	return ret;
> +}
> +
>  static int f2fs_get_projid(struct inode *inode, kprojid_t *projid)
>  {
>  	*projid = F2FS_I(inode)->i_projid;
> @@ -1890,11 +1978,11 @@ static int f2fs_get_projid(struct inode *inode, kprojid_t *projid)
>  
>  static const struct dquot_operations f2fs_quota_operations = {
>  	.get_reserved_space = f2fs_get_reserved_space,
> -	.write_dquot	= dquot_commit,
> -	.acquire_dquot	= dquot_acquire,
> -	.release_dquot	= dquot_release,
> -	.mark_dirty	= dquot_mark_dquot_dirty,
> -	.write_info	= dquot_commit_info,
> +	.write_dquot	= f2fs_dquot_commit,
> +	.acquire_dquot	= f2fs_dquot_acquire,
> +	.release_dquot	= f2fs_dquot_release,
> +	.mark_dirty	= f2fs_dquot_mark_dquot_dirty,
> +	.write_info	= f2fs_dquot_commit_info,
>  	.alloc_dquot	= dquot_alloc,
>  	.destroy_dquot	= dquot_destroy,
>  	.get_projid	= f2fs_get_projid,
> @@ -1912,6 +2000,11 @@ static const struct quotactl_ops f2fs_quotactl_ops = {
>  	.get_nextdqblk	= dquot_get_next_dqblk,
>  };
>  #else
> +int f2fs_quota_sync(struct super_block *sb, int type)
> +{
> +	return 0;
> +}
> +
>  void f2fs_quota_off_umount(struct super_block *sb)
>  {
>  }
> @@ -2951,6 +3044,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>  		goto free_meta_inode;
>  	}
>  
> +	if (__is_set_ckpt_flags(F2FS_CKPT(sbi), CP_QUOTA_NEED_FSCK_FLAG))
> +		set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
> +
>  	/* Initialize device list */
>  	err = f2fs_scan_devices(sbi);
>  	if (err) {
> diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
> index 1d4b196291d6..aaca9f5d5f5c 100644
> --- a/include/linux/f2fs_fs.h
> +++ b/include/linux/f2fs_fs.h
> @@ -115,6 +115,7 @@ struct f2fs_super_block {
>  /*
>   * For checkpoint
>   */
> +#define CP_QUOTA_NEED_FSCK_FLAG		0x00000800
>  #define CP_LARGE_NAT_BITMAP_FLAG	0x00000400
>  #define CP_NOCRC_RECOVERY_FLAG	0x00000200
>  #define CP_TRIMMED_FLAG		0x00000100
> -- 
> 2.18.0

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v11] f2fs: guarantee journalled quota data by checkpoint
  2018-10-01  0:06 ` Jaegeuk Kim
@ 2018-10-01  0:46   ` Chao Yu
  2018-10-01  1:29     ` Jaegeuk Kim
  0 siblings, 1 reply; 18+ messages in thread
From: Chao Yu @ 2018-10-01  0:46 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: linux-f2fs-devel, linux-kernel, Chao Yu, Weichao Guo

Hi Jaegeuk,

On 2018-10-1 8:06, Jaegeuk Kim wrote:
> Hi Chao,
> 
> This fails on fsstress with godown without fault injection. Could you please
> test a bit? I assumed that this patch should give no fsck failure along with
> valid checkpoint having no flag.

Okay, let me reproduce with that case.

> 
> BTW, I'm in doubt that f2fs_lock_all covers entire quota modification. What
> about prepare_write_begin() -> f2fs_get_block() ... -> inc_valid_block_count()?

If quota data changed in above path, we will detect that in below condition:

block_operation()

	down_write(&sbi->node_change);

	if (__need_flush_quota(sbi)) {
		up_write(&sbi->node_change);
		f2fs_unlock_all(sbi);
		goto retry_flush_quotas;
	}

So there is no problem?

Thanks,

> 
> On 09/20, Chao Yu wrote:
>> From: Chao Yu <yuchao0@huawei.com>
>>
>> For journalled quota mode, let checkpoint to flush dquot dirty data
>> and quota file data to guarntee persistence of all quota sysfile in
>> last checkpoint, by this way, we can avoid corrupting quota sysfile
>> when encountering SPO.
>>
>> The implementation is as below:
>>
>> 1. add a global state SBI_QUOTA_NEED_FLUSH to indicate that there is
>> cached dquot metadata changes in quota subsystem, and later checkpoint
>> should:
>>  a) flush dquot metadata into quota file.
>>  b) flush quota file to storage to keep file usage be consistent.
>>
>> 2. add a global state SBI_QUOTA_NEED_REPAIR to indicate that quota
>> operation failed due to -EIO or -ENOSPC, so later,
>>  a) checkpoint will skip syncing dquot metadata.
>>  b) CP_QUOTA_NEED_FSCK_FLAG will be set in last cp pack to give a
>>     hint for fsck repairing.
>>
>> 3. add a global state SBI_QUOTA_SKIP_FLUSH, in checkpoint, if quota
>> data updating is very heavy, it may cause hungtask in block_operation().
>> To avoid this, if our retry time exceed threshold, let's just skip
>> flushing and retry in next checkpoint().
>>
>> Signed-off-by: Weichao Guo <guoweichao@huawei.com>
>> Signed-off-by: Chao Yu <yuchao0@huawei.com>
>> ---
>> v11:
>> - transfer quota data if fsynced inode's i_{u,g}id changed during
>> recovery.
>>  fs/f2fs/checkpoint.c    |  56 +++++++++++++++++--
>>  fs/f2fs/data.c          |  18 ++++--
>>  fs/f2fs/f2fs.h          |  50 ++++++++++++++---
>>  fs/f2fs/file.c          |  31 ++++++++---
>>  fs/f2fs/inline.c        |   4 +-
>>  fs/f2fs/inode.c         |  11 +++-
>>  fs/f2fs/namei.c         |   4 --
>>  fs/f2fs/recovery.c      |  43 +++++++++++++-
>>  fs/f2fs/super.c         | 120 ++++++++++++++++++++++++++++++++++++----
>>  include/linux/f2fs_fs.h |   1 +
>>  10 files changed, 289 insertions(+), 49 deletions(-)
>>
>> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
>> index d312d2829d5a..d624d7983197 100644
>> --- a/fs/f2fs/checkpoint.c
>> +++ b/fs/f2fs/checkpoint.c
>> @@ -1083,6 +1083,21 @@ static void __prepare_cp_block(struct f2fs_sb_info *sbi)
>>  	ckpt->next_free_nid = cpu_to_le32(last_nid);
>>  }
>>  
>> +static bool __need_flush_quota(struct f2fs_sb_info *sbi)
>> +{
>> +	if (!is_journalled_quota(sbi))
>> +		return false;
>> +	if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH))
>> +		return false;
>> +	if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR))
>> +		return false;
>> +	if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_FLUSH))
>> +		return true;
>> +	if (get_pages(sbi, F2FS_DIRTY_QDATA))
>> +		return true;
>> +	return false;
>> +}
>> +
>>  /*
>>   * Freeze all the FS-operations for checkpoint.
>>   */
>> @@ -1094,12 +1109,30 @@ static int block_operations(struct f2fs_sb_info *sbi)
>>  		.for_reclaim = 0,
>>  	};
>>  	struct blk_plug plug;
>> -	int err = 0;
>> +	int err = 0, cnt = 0;
>>  
>>  	blk_start_plug(&plug);
>>  
>> -retry_flush_dents:
>> +retry_flush_quotas:
>> +	if (__need_flush_quota(sbi)) {
>> +		if (++cnt > DEFAULT_RETRY_QUOTA_FLUSH_COUNT) {
>> +			set_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH);
>> +			f2fs_lock_all(sbi);
>> +			goto retry_flush_dents;
>> +		}
>> +		clear_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH);
>> +
>> +		f2fs_quota_sync(sbi->sb, -1);
>> +	}
>> +
>>  	f2fs_lock_all(sbi);
>> +	if (__need_flush_quota(sbi)) {
>> +		f2fs_unlock_all(sbi);
>> +		cond_resched();
>> +		goto retry_flush_quotas;
>> +	}
>> +
>> +retry_flush_dents:
>>  	/* write all the dirty dentry pages */
>>  	if (get_pages(sbi, F2FS_DIRTY_DENTS)) {
>>  		f2fs_unlock_all(sbi);
>> @@ -1107,7 +1140,7 @@ static int block_operations(struct f2fs_sb_info *sbi)
>>  		if (err)
>>  			goto out;
>>  		cond_resched();
>> -		goto retry_flush_dents;
>> +		goto retry_flush_quotas;
>>  	}
>>  
>>  	/*
>> @@ -1116,6 +1149,12 @@ static int block_operations(struct f2fs_sb_info *sbi)
>>  	 */
>>  	down_write(&sbi->node_change);
>>  
>> +	if (__need_flush_quota(sbi)) {
>> +		up_write(&sbi->node_change);
>> +		f2fs_unlock_all(sbi);
>> +		goto retry_flush_quotas;
>> +	}
>> +
>>  	if (get_pages(sbi, F2FS_DIRTY_IMETA)) {
>>  		up_write(&sbi->node_change);
>>  		f2fs_unlock_all(sbi);
>> @@ -1123,7 +1162,7 @@ static int block_operations(struct f2fs_sb_info *sbi)
>>  		if (err)
>>  			goto out;
>>  		cond_resched();
>> -		goto retry_flush_dents;
>> +		goto retry_flush_quotas;
>>  	}
>>  
>>  retry_flush_nodes:
>> @@ -1214,6 +1253,14 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>>  	if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
>>  		__set_ckpt_flags(ckpt, CP_FSCK_FLAG);
>>  
>> +	if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH))
>> +		__set_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG);
>> +	else
>> +		__clear_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG);
>> +
>> +	if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR))
>> +		__set_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG);
>> +
>>  	/* set this flag to activate crc|cp_ver for recovery */
>>  	__set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG);
>>  	__clear_ckpt_flags(ckpt, CP_NOCRC_RECOVERY_FLAG);
>> @@ -1421,6 +1468,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>>  
>>  	clear_sbi_flag(sbi, SBI_IS_DIRTY);
>>  	clear_sbi_flag(sbi, SBI_NEED_CP);
>> +	clear_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH);
>>  	__set_cp_next_pack(sbi);
>>  
>>  	/*
>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
>> index 57c0823d22e0..b96f8588d565 100644
>> --- a/fs/f2fs/data.c
>> +++ b/fs/f2fs/data.c
>> @@ -46,7 +46,7 @@ static bool __is_cp_guaranteed(struct page *page)
>>  			inode->i_ino ==  F2FS_NODE_INO(sbi) ||
>>  			S_ISDIR(inode->i_mode) ||
>>  			(S_ISREG(inode->i_mode) &&
>> -			is_inode_flag_set(inode, FI_ATOMIC_FILE)) ||
>> +			(f2fs_is_atomic_file(inode) || IS_NOQUOTA(inode))) ||
>>  			is_cold_data(page))
>>  		return true;
>>  	return false;
>> @@ -975,7 +975,7 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
>>  	return err;
>>  }
>>  
>> -static inline void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
>> +void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
>>  {
>>  	if (flag == F2FS_GET_BLOCK_PRE_AIO) {
>>  		if (lock)
>> @@ -1716,6 +1716,8 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
>>  		return true;
>>  	if (S_ISDIR(inode->i_mode))
>>  		return true;
>> +	if (IS_NOQUOTA(inode))
>> +		return true;
>>  	if (f2fs_is_atomic_file(inode))
>>  		return true;
>>  	if (fio) {
>> @@ -1960,7 +1962,7 @@ static int __write_data_page(struct page *page, bool *submitted,
>>  	}
>>  
>>  	unlock_page(page);
>> -	if (!S_ISDIR(inode->i_mode))
>> +	if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode))
>>  		f2fs_balance_fs(sbi, need_balance_fs);
>>  
>>  	if (unlikely(f2fs_cp_error(sbi))) {
>> @@ -2151,6 +2153,8 @@ static inline bool __should_serialize_io(struct inode *inode,
>>  {
>>  	if (!S_ISREG(inode->i_mode))
>>  		return false;
>> +	if (IS_NOQUOTA(inode))
>> +		return false;
>>  	if (wbc->sync_mode != WB_SYNC_ALL)
>>  		return true;
>>  	if (get_dirty_pages(inode) >= SM_I(F2FS_I_SB(inode))->min_seq_blocks)
>> @@ -2180,7 +2184,8 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
>>  	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
>>  		goto skip_write;
>>  
>> -	if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE &&
>> +	if ((S_ISDIR(inode->i_mode) || IS_NOQUOTA(inode)) &&
>> +			wbc->sync_mode == WB_SYNC_NONE &&
>>  			get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) &&
>>  			f2fs_available_free_memory(sbi, DIRTY_DENTS))
>>  		goto skip_write;
>> @@ -2245,7 +2250,7 @@ static void f2fs_write_failed(struct address_space *mapping, loff_t to)
>>  		down_write(&F2FS_I(inode)->i_mmap_sem);
>>  
>>  		truncate_pagecache(inode, i_size);
>> -		f2fs_truncate_blocks(inode, i_size, true);
>> +		f2fs_truncate_blocks(inode, i_size, true, true);
>>  
>>  		up_write(&F2FS_I(inode)->i_mmap_sem);
>>  		up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
>> @@ -2380,7 +2385,8 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
>>  	if (err)
>>  		goto fail;
>>  
>> -	if (need_balance && has_not_enough_free_secs(sbi, 0, 0)) {
>> +	if (need_balance && !IS_NOQUOTA(inode) &&
>> +			has_not_enough_free_secs(sbi, 0, 0)) {
>>  		unlock_page(page);
>>  		f2fs_balance_fs(sbi, true);
>>  		lock_page(page);
>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>> index 917b2ca76aac..b5e400be73e0 100644
>> --- a/fs/f2fs/f2fs.h
>> +++ b/fs/f2fs/f2fs.h
>> @@ -525,6 +525,9 @@ enum {
>>  
>>  #define DEFAULT_RETRY_IO_COUNT	8	/* maximum retry read IO count */
>>  
>> +/* maximum retry quota flush count */
>> +#define DEFAULT_RETRY_QUOTA_FLUSH_COUNT		8
>> +
>>  #define F2FS_LINK_MAX	0xffffffff	/* maximum link count per file */
>>  
>>  #define MAX_DIR_RA_PAGES	4	/* maximum ra pages of dir */
>> @@ -1088,6 +1091,9 @@ enum {
>>  	SBI_NEED_CP,				/* need to checkpoint */
>>  	SBI_IS_SHUTDOWN,			/* shutdown by ioctl */
>>  	SBI_IS_RECOVERED,			/* recovered orphan/data */
>> +	SBI_QUOTA_NEED_FLUSH,			/* need to flush quota info in CP */
>> +	SBI_QUOTA_SKIP_FLUSH,			/* skip flushing quota in current CP */
>> +	SBI_QUOTA_NEED_REPAIR,			/* quota file may be corrupted */
>>  };
>>  
>>  enum {
>> @@ -1891,12 +1897,18 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
>>  {
>>  	block_t	valid_block_count;
>>  	unsigned int valid_node_count;
>> -	bool quota = inode && !is_inode;
>> +	int err;
>>  
>> -	if (quota) {
>> -		int ret = dquot_reserve_block(inode, 1);
>> -		if (ret)
>> -			return ret;
>> +	if (is_inode) {
>> +		if (inode) {
>> +			err = dquot_alloc_inode(inode);
>> +			if (err)
>> +				return err;
>> +		}
>> +	} else {
>> +		err = dquot_reserve_block(inode, 1);
>> +		if (err)
>> +			return err;
>>  	}
>>  
>>  	if (time_to_inject(sbi, FAULT_BLOCK)) {
>> @@ -1938,8 +1950,12 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
>>  	return 0;
>>  
>>  enospc:
>> -	if (quota)
>> +	if (is_inode) {
>> +		if (inode)
>> +			dquot_free_inode(inode);
>> +	} else {
>>  		dquot_release_reservation_block(inode, 1);
>> +	}
>>  	return -ENOSPC;
>>  }
>>  
>> @@ -1960,7 +1976,9 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi,
>>  
>>  	spin_unlock(&sbi->stat_lock);
>>  
>> -	if (!is_inode)
>> +	if (is_inode)
>> +		dquot_free_inode(inode);
>> +	else
>>  		f2fs_i_blocks_write(inode, 1, false, true);
>>  }
>>  
>> @@ -2739,7 +2757,8 @@ static inline bool is_valid_data_blkaddr(struct f2fs_sb_info *sbi,
>>   */
>>  int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);
>>  void f2fs_truncate_data_blocks(struct dnode_of_data *dn);
>> -int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock);
>> +int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock,
>> +							bool buf_write);
>>  int f2fs_truncate(struct inode *inode);
>>  int f2fs_getattr(const struct path *path, struct kstat *stat,
>>  			u32 request_mask, unsigned int flags);
>> @@ -2827,6 +2846,7 @@ static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode)
>>  int f2fs_inode_dirtied(struct inode *inode, bool sync);
>>  void f2fs_inode_synced(struct inode *inode);
>>  int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly);
>> +int f2fs_quota_sync(struct super_block *sb, int type);
>>  void f2fs_quota_off_umount(struct super_block *sb);
>>  int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover);
>>  int f2fs_sync_fs(struct super_block *sb, int sync);
>> @@ -3025,6 +3045,7 @@ struct page *f2fs_get_lock_data_page(struct inode *inode, pgoff_t index,
>>  struct page *f2fs_get_new_data_page(struct inode *inode,
>>  			struct page *ipage, pgoff_t index, bool new_i_size);
>>  int f2fs_do_write_data_page(struct f2fs_io_info *fio);
>> +void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock);
>>  int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
>>  			int create, int flag);
>>  int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
>> @@ -3456,3 +3477,16 @@ extern void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate,
>>  #endif
>>  
>>  #endif
>> +
>> +static inline bool is_journalled_quota(struct f2fs_sb_info *sbi)
>> +{
>> +#ifdef CONFIG_QUOTA
>> +	if (f2fs_sb_has_quota_ino(sbi->sb))
>> +		return true;
>> +	if (F2FS_OPTION(sbi).s_qf_names[USRQUOTA] ||
>> +		F2FS_OPTION(sbi).s_qf_names[GRPQUOTA] ||
>> +		F2FS_OPTION(sbi).s_qf_names[PRJQUOTA])
>> +		return true;
>> +#endif
>> +	return false;
>> +}
>> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
>> index 357422a4c319..a75f3e145bf1 100644
>> --- a/fs/f2fs/file.c
>> +++ b/fs/f2fs/file.c
>> @@ -586,7 +586,8 @@ static int truncate_partial_data_page(struct inode *inode, u64 from,
>>  	return 0;
>>  }
>>  
>> -int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock)
>> +int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock,
>> +							bool buf_write)
>>  {
>>  	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
>>  	struct dnode_of_data dn;
>> @@ -594,6 +595,7 @@ int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock)
>>  	int count = 0, err = 0;
>>  	struct page *ipage;
>>  	bool truncate_page = false;
>> +	int flag = buf_write ? F2FS_GET_BLOCK_PRE_AIO : F2FS_GET_BLOCK_PRE_DIO;
>>  
>>  	trace_f2fs_truncate_blocks_enter(inode, from);
>>  
>> @@ -603,7 +605,7 @@ int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock)
>>  		goto free_partial;
>>  
>>  	if (lock)
>> -		f2fs_lock_op(sbi);
>> +		__do_map_lock(sbi, flag, true);
>>  
>>  	ipage = f2fs_get_node_page(sbi, inode->i_ino);
>>  	if (IS_ERR(ipage)) {
>> @@ -641,7 +643,7 @@ int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock)
>>  	err = f2fs_truncate_inode_blocks(inode, free_from);
>>  out:
>>  	if (lock)
>> -		f2fs_unlock_op(sbi);
>> +		__do_map_lock(sbi, flag, false);
>>  free_partial:
>>  	/* lastly zero out the first data page */
>>  	if (!err)
>> @@ -676,7 +678,7 @@ int f2fs_truncate(struct inode *inode)
>>  			return err;
>>  	}
>>  
>> -	err = f2fs_truncate_blocks(inode, i_size_read(inode), true);
>> +	err = f2fs_truncate_blocks(inode, i_size_read(inode), true, false);
>>  	if (err)
>>  		return err;
>>  
>> @@ -785,9 +787,24 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
>>  		!uid_eq(attr->ia_uid, inode->i_uid)) ||
>>  		(attr->ia_valid & ATTR_GID &&
>>  		!gid_eq(attr->ia_gid, inode->i_gid))) {
>> +		f2fs_lock_op(F2FS_I_SB(inode));
>>  		err = dquot_transfer(inode, attr);
>> -		if (err)
>> +		if (err) {
>> +			set_sbi_flag(F2FS_I_SB(inode),
>> +					SBI_QUOTA_NEED_REPAIR);
>> +			f2fs_unlock_op(F2FS_I_SB(inode));
>>  			return err;
>> +		}
>> +		/*
>> +		 * update uid/gid under lock_op(), so that dquot and inode can
>> +		 * be updated atomically.
>> +		 */
>> +		if (attr->ia_valid & ATTR_UID)
>> +			inode->i_uid = attr->ia_uid;
>> +		if (attr->ia_valid & ATTR_GID)
>> +			inode->i_gid = attr->ia_gid;
>> +		f2fs_mark_inode_dirty_sync(inode, true);
>> +		f2fs_unlock_op(F2FS_I_SB(inode));
>>  	}
>>  
>>  	if (attr->ia_valid & ATTR_SIZE) {
>> @@ -1242,7 +1259,7 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
>>  	new_size = i_size_read(inode) - len;
>>  	truncate_pagecache(inode, new_size);
>>  
>> -	ret = f2fs_truncate_blocks(inode, new_size, true);
>> +	ret = f2fs_truncate_blocks(inode, new_size, true, false);
>>  	up_write(&F2FS_I(inode)->i_mmap_sem);
>>  	if (!ret)
>>  		f2fs_i_size_write(inode, new_size);
>> @@ -1427,7 +1444,7 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
>>  	f2fs_balance_fs(sbi, true);
>>  
>>  	down_write(&F2FS_I(inode)->i_mmap_sem);
>> -	ret = f2fs_truncate_blocks(inode, i_size_read(inode), true);
>> +	ret = f2fs_truncate_blocks(inode, i_size_read(inode), true, false);
>>  	up_write(&F2FS_I(inode)->i_mmap_sem);
>>  	if (ret)
>>  		return ret;
>> diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
>> index 425d740f87fd..cb31a719b048 100644
>> --- a/fs/f2fs/inline.c
>> +++ b/fs/f2fs/inline.c
>> @@ -298,7 +298,7 @@ bool f2fs_recover_inline_data(struct inode *inode, struct page *npage)
>>  		clear_inode_flag(inode, FI_INLINE_DATA);
>>  		f2fs_put_page(ipage, 1);
>>  	} else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) {
>> -		if (f2fs_truncate_blocks(inode, 0, false))
>> +		if (f2fs_truncate_blocks(inode, 0, false, false))
>>  			return false;
>>  		goto process_inline;
>>  	}
>> @@ -470,7 +470,7 @@ static int f2fs_add_inline_entries(struct inode *dir, void *inline_dentry)
>>  	return 0;
>>  punch_dentry_pages:
>>  	truncate_inode_pages(&dir->i_data, 0);
>> -	f2fs_truncate_blocks(dir, 0, false);
>> +	f2fs_truncate_blocks(dir, 0, false, false);
>>  	f2fs_remove_dirty_inode(dir);
>>  	return err;
>>  }
>> diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
>> index 86e7333d60c1..3c278e63d1a3 100644
>> --- a/fs/f2fs/inode.c
>> +++ b/fs/f2fs/inode.c
>> @@ -645,7 +645,11 @@ void f2fs_evict_inode(struct inode *inode)
>>  	if (inode->i_nlink || is_bad_inode(inode))
>>  		goto no_delete;
>>  
>> -	dquot_initialize(inode);
>> +	err = dquot_initialize(inode);
>> +	if (err) {
>> +		err = 0;
>> +		set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
>> +	}
>>  
>>  	f2fs_remove_ino_entry(sbi, inode->i_ino, APPEND_INO);
>>  	f2fs_remove_ino_entry(sbi, inode->i_ino, UPDATE_INO);
>> @@ -677,9 +681,10 @@ void f2fs_evict_inode(struct inode *inode)
>>  		goto retry;
>>  	}
>>  
>> -	if (err)
>> +	if (err) {
>>  		f2fs_update_inode_page(inode);
>> -	dquot_free_inode(inode);
>> +		set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
>> +	}
>>  	sb_end_intwrite(inode->i_sb);
>>  no_delete:
>>  	dquot_drop(inode);
>> diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
>> index 9ad451ac2cec..b65491a63115 100644
>> --- a/fs/f2fs/namei.c
>> +++ b/fs/f2fs/namei.c
>> @@ -71,10 +71,6 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
>>  	if (err)
>>  		goto fail_drop;
>>  
>> -	err = dquot_alloc_inode(inode);
>> -	if (err)
>> -		goto fail_drop;
>> -
>>  	set_inode_flag(inode, FI_NEW_INODE);
>>  
>>  	/* If the directory encrypted, then we should encrypt the inode. */
>> diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
>> index 41f2c0fe6d8e..70f05650191e 100644
>> --- a/fs/f2fs/recovery.c
>> +++ b/fs/f2fs/recovery.c
>> @@ -191,6 +191,33 @@ static int recover_dentry(struct inode *inode, struct page *ipage,
>>  	return err;
>>  }
>>  
>> +static int recover_quota_data(struct inode *inode, struct page *page)
>> +{
>> +	struct f2fs_inode *raw = F2FS_INODE(page);
>> +	struct iattr attr;
>> +	uid_t i_uid = le32_to_cpu(raw->i_uid);
>> +	gid_t i_gid = le32_to_cpu(raw->i_gid);
>> +	int err;
>> +
>> +	memset(&attr, 0, sizeof(attr));
>> +
>> +	attr.ia_uid = make_kuid(inode->i_sb->s_user_ns, i_uid);
>> +	attr.ia_gid = make_kgid(inode->i_sb->s_user_ns, i_gid);
>> +
>> +	if (!uid_eq(attr.ia_uid, inode->i_uid))
>> +		attr.ia_valid |= ATTR_UID;
>> +	if (!gid_eq(attr.ia_gid, inode->i_gid))
>> +		attr.ia_valid |= ATTR_GID;
>> +
>> +	if (!attr.ia_valid)
>> +		return 0;
>> +
>> +	err = dquot_transfer(inode, &attr);
>> +	if (err)
>> +		set_sbi_flag(F2FS_I_SB(inode), SBI_QUOTA_NEED_REPAIR);
>> +	return err;
>> +}
>> +
>>  static void recover_inline_flags(struct inode *inode, struct f2fs_inode *ri)
>>  {
>>  	if (ri->i_inline & F2FS_PIN_FILE)
>> @@ -203,12 +230,18 @@ static void recover_inline_flags(struct inode *inode, struct f2fs_inode *ri)
>>  		clear_inode_flag(inode, FI_DATA_EXIST);
>>  }
>>  
>> -static void recover_inode(struct inode *inode, struct page *page)
>> +static int recover_inode(struct inode *inode, struct page *page)
>>  {
>>  	struct f2fs_inode *raw = F2FS_INODE(page);
>>  	char *name;
>> +	int err;
>>  
>>  	inode->i_mode = le16_to_cpu(raw->i_mode);
>> +
>> +	err = recover_quota_data(inode, page);
>> +	if (err)
>> +		return err;
>> +
>>  	i_uid_write(inode, le32_to_cpu(raw->i_uid));
>>  	i_gid_write(inode, le32_to_cpu(raw->i_gid));
>>  	f2fs_i_size_write(inode, le64_to_cpu(raw->i_size));
>> @@ -231,6 +264,7 @@ static void recover_inode(struct inode *inode, struct page *page)
>>  	f2fs_msg(inode->i_sb, KERN_NOTICE,
>>  		"recover_inode: ino = %x, name = %s, inline = %x",
>>  			ino_of_node(page), name, raw->i_inline);
>> +	return 0;
>>  }
>>  
>>  static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
>> @@ -597,8 +631,11 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
>>  		 * In this case, we can lose the latest inode(x).
>>  		 * So, call recover_inode for the inode update.
>>  		 */
>> -		if (IS_INODE(page))
>> -			recover_inode(entry->inode, page);
>> +		if (IS_INODE(page)) {
>> +			err = recover_inode(entry->inode, page);
>> +			if (err)
>> +				break;
>> +		}
>>  		if (entry->last_dentry == blkaddr) {
>>  			err = recover_dentry(entry->inode, page, dir_list);
>>  			if (err) {
>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>> index 945468968d4e..3a46c9b81188 100644
>> --- a/fs/f2fs/super.c
>> +++ b/fs/f2fs/super.c
>> @@ -1689,6 +1689,13 @@ static qsize_t *f2fs_get_reserved_space(struct inode *inode)
>>  
>>  static int f2fs_quota_on_mount(struct f2fs_sb_info *sbi, int type)
>>  {
>> +
>> +	if (is_set_ckpt_flags(sbi, CP_QUOTA_NEED_FSCK_FLAG)) {
>> +		f2fs_msg(sbi->sb, KERN_ERR,
>> +			"quota sysfile may be corrupted, skip loading it");
>> +		return 0;
>> +	}
>> +
>>  	return dquot_quota_on_mount(sbi->sb, F2FS_OPTION(sbi).s_qf_names[type],
>>  					F2FS_OPTION(sbi).s_jquota_fmt, type);
>>  }
>> @@ -1759,7 +1766,14 @@ static int f2fs_enable_quotas(struct super_block *sb)
>>  		test_opt(F2FS_SB(sb), PRJQUOTA),
>>  	};
>>  
>> -	sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE | DQUOT_NOLIST_DIRTY;
>> +	if (is_set_ckpt_flags(F2FS_SB(sb), CP_QUOTA_NEED_FSCK_FLAG)) {
>> +		f2fs_msg(sb, KERN_ERR,
>> +			"quota file may be corrupted, skip loading it");
>> +		return 0;
>> +	}
>> +
>> +	sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
>> +
>>  	for (type = 0; type < MAXQUOTAS; type++) {
>>  		qf_inum = f2fs_qf_ino(sb, type);
>>  		if (qf_inum) {
>> @@ -1773,6 +1787,8 @@ static int f2fs_enable_quotas(struct super_block *sb)
>>  					"fsck to fix.", type, err);
>>  				for (type--; type >= 0; type--)
>>  					dquot_quota_off(sb, type);
>> +				set_sbi_flag(F2FS_SB(sb),
>> +						SBI_QUOTA_NEED_REPAIR);
>>  				return err;
>>  			}
>>  		}
>> @@ -1780,35 +1796,51 @@ static int f2fs_enable_quotas(struct super_block *sb)
>>  	return 0;
>>  }
>>  
>> -static int f2fs_quota_sync(struct super_block *sb, int type)
>> +int f2fs_quota_sync(struct super_block *sb, int type)
>>  {
>> +	struct f2fs_sb_info *sbi = F2FS_SB(sb);
>>  	struct quota_info *dqopt = sb_dqopt(sb);
>>  	int cnt;
>>  	int ret;
>>  
>>  	ret = dquot_writeback_dquots(sb, type);
>>  	if (ret)
>> -		return ret;
>> +		goto out;
>>  
>>  	/*
>>  	 * Now when everything is written we can discard the pagecache so
>>  	 * that userspace sees the changes.
>>  	 */
>>  	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
>> +		struct address_space *mapping;
>> +
>>  		if (type != -1 && cnt != type)
>>  			continue;
>>  		if (!sb_has_quota_active(sb, cnt))
>>  			continue;
>>  
>> -		ret = filemap_write_and_wait(dqopt->files[cnt]->i_mapping);
>> +		mapping = dqopt->files[cnt]->i_mapping;
>> +
>> +		ret = filemap_fdatawrite(mapping);
>> +		if (ret)
>> +			goto out;
>> +
>> +		/* if we are using journalled quota */
>> +		if (is_journalled_quota(sbi))
>> +			continue;
>> +
>> +		ret = filemap_fdatawait(mapping);
>>  		if (ret)
>> -			return ret;
>> +			set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
>>  
>>  		inode_lock(dqopt->files[cnt]);
>>  		truncate_inode_pages(&dqopt->files[cnt]->i_data, 0);
>>  		inode_unlock(dqopt->files[cnt]);
>>  	}
>> -	return 0;
>> +out:
>> +	if (ret)
>> +		set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
>> +	return ret;
>>  }
>>  
>>  static int f2fs_quota_on(struct super_block *sb, int type, int format_id,
>> @@ -1877,11 +1909,67 @@ void f2fs_quota_off_umount(struct super_block *sb)
>>  				"Fail to turn off disk quota "
>>  				"(type: %d, err: %d, ret:%d), Please "
>>  				"run fsck to fix it.", type, err, ret);
>> -			set_sbi_flag(F2FS_SB(sb), SBI_NEED_FSCK);
>> +			set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
>>  		}
>>  	}
>>  }
>>  
>> +static int f2fs_dquot_commit(struct dquot *dquot)
>> +{
>> +	int ret;
>> +
>> +	ret = dquot_commit(dquot);
>> +	if (ret < 0)
>> +		set_sbi_flag(F2FS_SB(dquot->dq_sb), SBI_QUOTA_NEED_REPAIR);
>> +	return ret;
>> +}
>> +
>> +static int f2fs_dquot_acquire(struct dquot *dquot)
>> +{
>> +	int ret;
>> +
>> +	ret = dquot_acquire(dquot);
>> +	if (ret < 0)
>> +		set_sbi_flag(F2FS_SB(dquot->dq_sb), SBI_QUOTA_NEED_REPAIR);
>> +
>> +	return ret;
>> +}
>> +
>> +static int f2fs_dquot_release(struct dquot *dquot)
>> +{
>> +	int ret;
>> +
>> +	ret = dquot_release(dquot);
>> +	if (ret < 0)
>> +		set_sbi_flag(F2FS_SB(dquot->dq_sb), SBI_QUOTA_NEED_REPAIR);
>> +	return ret;
>> +}
>> +
>> +static int f2fs_dquot_mark_dquot_dirty(struct dquot *dquot)
>> +{
>> +	struct super_block *sb = dquot->dq_sb;
>> +	struct f2fs_sb_info *sbi = F2FS_SB(sb);
>> +	int ret;
>> +
>> +	ret = dquot_mark_dquot_dirty(dquot);
>> +
>> +	/* if we are using journalled quota */
>> +	if (is_journalled_quota(sbi))
>> +		set_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH);
>> +
>> +	return ret;
>> +}
>> +
>> +static int f2fs_dquot_commit_info(struct super_block *sb, int type)
>> +{
>> +	int ret;
>> +
>> +	ret = dquot_commit_info(sb, type);
>> +	if (ret < 0)
>> +		set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
>> +	return ret;
>> +}
>> +
>>  static int f2fs_get_projid(struct inode *inode, kprojid_t *projid)
>>  {
>>  	*projid = F2FS_I(inode)->i_projid;
>> @@ -1890,11 +1978,11 @@ static int f2fs_get_projid(struct inode *inode, kprojid_t *projid)
>>  
>>  static const struct dquot_operations f2fs_quota_operations = {
>>  	.get_reserved_space = f2fs_get_reserved_space,
>> -	.write_dquot	= dquot_commit,
>> -	.acquire_dquot	= dquot_acquire,
>> -	.release_dquot	= dquot_release,
>> -	.mark_dirty	= dquot_mark_dquot_dirty,
>> -	.write_info	= dquot_commit_info,
>> +	.write_dquot	= f2fs_dquot_commit,
>> +	.acquire_dquot	= f2fs_dquot_acquire,
>> +	.release_dquot	= f2fs_dquot_release,
>> +	.mark_dirty	= f2fs_dquot_mark_dquot_dirty,
>> +	.write_info	= f2fs_dquot_commit_info,
>>  	.alloc_dquot	= dquot_alloc,
>>  	.destroy_dquot	= dquot_destroy,
>>  	.get_projid	= f2fs_get_projid,
>> @@ -1912,6 +2000,11 @@ static const struct quotactl_ops f2fs_quotactl_ops = {
>>  	.get_nextdqblk	= dquot_get_next_dqblk,
>>  };
>>  #else
>> +int f2fs_quota_sync(struct super_block *sb, int type)
>> +{
>> +	return 0;
>> +}
>> +
>>  void f2fs_quota_off_umount(struct super_block *sb)
>>  {
>>  }
>> @@ -2951,6 +3044,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>>  		goto free_meta_inode;
>>  	}
>>  
>> +	if (__is_set_ckpt_flags(F2FS_CKPT(sbi), CP_QUOTA_NEED_FSCK_FLAG))
>> +		set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
>> +
>>  	/* Initialize device list */
>>  	err = f2fs_scan_devices(sbi);
>>  	if (err) {
>> diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
>> index 1d4b196291d6..aaca9f5d5f5c 100644
>> --- a/include/linux/f2fs_fs.h
>> +++ b/include/linux/f2fs_fs.h
>> @@ -115,6 +115,7 @@ struct f2fs_super_block {
>>  /*
>>   * For checkpoint
>>   */
>> +#define CP_QUOTA_NEED_FSCK_FLAG		0x00000800
>>  #define CP_LARGE_NAT_BITMAP_FLAG	0x00000400
>>  #define CP_NOCRC_RECOVERY_FLAG	0x00000200
>>  #define CP_TRIMMED_FLAG		0x00000100
>> -- 
>> 2.18.0

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v11] f2fs: guarantee journalled quota data by checkpoint
  2018-10-01  0:46   ` Chao Yu
@ 2018-10-01  1:29     ` Jaegeuk Kim
  2018-10-01  1:39       ` Chao Yu
  0 siblings, 1 reply; 18+ messages in thread
From: Jaegeuk Kim @ 2018-10-01  1:29 UTC (permalink / raw)
  To: Chao Yu; +Cc: linux-f2fs-devel, linux-kernel, Chao Yu, Weichao Guo

On 10/01, Chao Yu wrote:
> Hi Jaegeuk,
> 
> On 2018-10-1 8:06, Jaegeuk Kim wrote:
> > Hi Chao,
> > 
> > This fails on fsstress with godown without fault injection. Could you please
> > test a bit? I assumed that this patch should give no fsck failure along with
> > valid checkpoint having no flag.
> 
> Okay, let me reproduce with that case.
> 
> > 
> > BTW, I'm in doubt that f2fs_lock_all covers entire quota modification. What
> > about prepare_write_begin() -> f2fs_get_block() ... -> inc_valid_block_count()?
> 
> If quota data changed in above path, we will detect that in below condition:
> 
> block_operation()
> 
> 	down_write(&sbi->node_change);
> 
> 	if (__need_flush_quota(sbi)) {
> 		up_write(&sbi->node_change);
> 		f2fs_unlock_all(sbi);
> 		goto retry_flush_quotas;
> 	}
> 
> So there is no problem?

We may need to check quota is dirty, since we have no way to detect by
f2fs structures?

> 
> Thanks,
> 
> > 
> > On 09/20, Chao Yu wrote:
> >> From: Chao Yu <yuchao0@huawei.com>
> >>
> >> For journalled quota mode, let checkpoint to flush dquot dirty data
> >> and quota file data to guarntee persistence of all quota sysfile in
> >> last checkpoint, by this way, we can avoid corrupting quota sysfile
> >> when encountering SPO.
> >>
> >> The implementation is as below:
> >>
> >> 1. add a global state SBI_QUOTA_NEED_FLUSH to indicate that there is
> >> cached dquot metadata changes in quota subsystem, and later checkpoint
> >> should:
> >>  a) flush dquot metadata into quota file.
> >>  b) flush quota file to storage to keep file usage be consistent.
> >>
> >> 2. add a global state SBI_QUOTA_NEED_REPAIR to indicate that quota
> >> operation failed due to -EIO or -ENOSPC, so later,
> >>  a) checkpoint will skip syncing dquot metadata.
> >>  b) CP_QUOTA_NEED_FSCK_FLAG will be set in last cp pack to give a
> >>     hint for fsck repairing.
> >>
> >> 3. add a global state SBI_QUOTA_SKIP_FLUSH, in checkpoint, if quota
> >> data updating is very heavy, it may cause hungtask in block_operation().
> >> To avoid this, if our retry time exceed threshold, let's just skip
> >> flushing and retry in next checkpoint().
> >>
> >> Signed-off-by: Weichao Guo <guoweichao@huawei.com>
> >> Signed-off-by: Chao Yu <yuchao0@huawei.com>
> >> ---
> >> v11:
> >> - transfer quota data if fsynced inode's i_{u,g}id changed during
> >> recovery.
> >>  fs/f2fs/checkpoint.c    |  56 +++++++++++++++++--
> >>  fs/f2fs/data.c          |  18 ++++--
> >>  fs/f2fs/f2fs.h          |  50 ++++++++++++++---
> >>  fs/f2fs/file.c          |  31 ++++++++---
> >>  fs/f2fs/inline.c        |   4 +-
> >>  fs/f2fs/inode.c         |  11 +++-
> >>  fs/f2fs/namei.c         |   4 --
> >>  fs/f2fs/recovery.c      |  43 +++++++++++++-
> >>  fs/f2fs/super.c         | 120 ++++++++++++++++++++++++++++++++++++----
> >>  include/linux/f2fs_fs.h |   1 +
> >>  10 files changed, 289 insertions(+), 49 deletions(-)
> >>
> >> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> >> index d312d2829d5a..d624d7983197 100644
> >> --- a/fs/f2fs/checkpoint.c
> >> +++ b/fs/f2fs/checkpoint.c
> >> @@ -1083,6 +1083,21 @@ static void __prepare_cp_block(struct f2fs_sb_info *sbi)
> >>  	ckpt->next_free_nid = cpu_to_le32(last_nid);
> >>  }
> >>  
> >> +static bool __need_flush_quota(struct f2fs_sb_info *sbi)
> >> +{
> >> +	if (!is_journalled_quota(sbi))
> >> +		return false;
> >> +	if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH))
> >> +		return false;
> >> +	if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR))
> >> +		return false;
> >> +	if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_FLUSH))
> >> +		return true;
> >> +	if (get_pages(sbi, F2FS_DIRTY_QDATA))
> >> +		return true;
> >> +	return false;
> >> +}
> >> +
> >>  /*
> >>   * Freeze all the FS-operations for checkpoint.
> >>   */
> >> @@ -1094,12 +1109,30 @@ static int block_operations(struct f2fs_sb_info *sbi)
> >>  		.for_reclaim = 0,
> >>  	};
> >>  	struct blk_plug plug;
> >> -	int err = 0;
> >> +	int err = 0, cnt = 0;
> >>  
> >>  	blk_start_plug(&plug);
> >>  
> >> -retry_flush_dents:
> >> +retry_flush_quotas:
> >> +	if (__need_flush_quota(sbi)) {
> >> +		if (++cnt > DEFAULT_RETRY_QUOTA_FLUSH_COUNT) {
> >> +			set_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH);
> >> +			f2fs_lock_all(sbi);
> >> +			goto retry_flush_dents;
> >> +		}
> >> +		clear_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH);
> >> +
> >> +		f2fs_quota_sync(sbi->sb, -1);
> >> +	}
> >> +
> >>  	f2fs_lock_all(sbi);
> >> +	if (__need_flush_quota(sbi)) {
> >> +		f2fs_unlock_all(sbi);
> >> +		cond_resched();
> >> +		goto retry_flush_quotas;
> >> +	}
> >> +
> >> +retry_flush_dents:
> >>  	/* write all the dirty dentry pages */
> >>  	if (get_pages(sbi, F2FS_DIRTY_DENTS)) {
> >>  		f2fs_unlock_all(sbi);
> >> @@ -1107,7 +1140,7 @@ static int block_operations(struct f2fs_sb_info *sbi)
> >>  		if (err)
> >>  			goto out;
> >>  		cond_resched();
> >> -		goto retry_flush_dents;
> >> +		goto retry_flush_quotas;
> >>  	}
> >>  
> >>  	/*
> >> @@ -1116,6 +1149,12 @@ static int block_operations(struct f2fs_sb_info *sbi)
> >>  	 */
> >>  	down_write(&sbi->node_change);
> >>  
> >> +	if (__need_flush_quota(sbi)) {
> >> +		up_write(&sbi->node_change);
> >> +		f2fs_unlock_all(sbi);
> >> +		goto retry_flush_quotas;
> >> +	}
> >> +
> >>  	if (get_pages(sbi, F2FS_DIRTY_IMETA)) {
> >>  		up_write(&sbi->node_change);
> >>  		f2fs_unlock_all(sbi);
> >> @@ -1123,7 +1162,7 @@ static int block_operations(struct f2fs_sb_info *sbi)
> >>  		if (err)
> >>  			goto out;
> >>  		cond_resched();
> >> -		goto retry_flush_dents;
> >> +		goto retry_flush_quotas;
> >>  	}
> >>  
> >>  retry_flush_nodes:
> >> @@ -1214,6 +1253,14 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
> >>  	if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
> >>  		__set_ckpt_flags(ckpt, CP_FSCK_FLAG);
> >>  
> >> +	if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH))
> >> +		__set_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG);
> >> +	else
> >> +		__clear_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG);
> >> +
> >> +	if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR))
> >> +		__set_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG);
> >> +
> >>  	/* set this flag to activate crc|cp_ver for recovery */
> >>  	__set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG);
> >>  	__clear_ckpt_flags(ckpt, CP_NOCRC_RECOVERY_FLAG);
> >> @@ -1421,6 +1468,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
> >>  
> >>  	clear_sbi_flag(sbi, SBI_IS_DIRTY);
> >>  	clear_sbi_flag(sbi, SBI_NEED_CP);
> >> +	clear_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH);
> >>  	__set_cp_next_pack(sbi);
> >>  
> >>  	/*
> >> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> >> index 57c0823d22e0..b96f8588d565 100644
> >> --- a/fs/f2fs/data.c
> >> +++ b/fs/f2fs/data.c
> >> @@ -46,7 +46,7 @@ static bool __is_cp_guaranteed(struct page *page)
> >>  			inode->i_ino ==  F2FS_NODE_INO(sbi) ||
> >>  			S_ISDIR(inode->i_mode) ||
> >>  			(S_ISREG(inode->i_mode) &&
> >> -			is_inode_flag_set(inode, FI_ATOMIC_FILE)) ||
> >> +			(f2fs_is_atomic_file(inode) || IS_NOQUOTA(inode))) ||
> >>  			is_cold_data(page))
> >>  		return true;
> >>  	return false;
> >> @@ -975,7 +975,7 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
> >>  	return err;
> >>  }
> >>  
> >> -static inline void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
> >> +void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
> >>  {
> >>  	if (flag == F2FS_GET_BLOCK_PRE_AIO) {
> >>  		if (lock)
> >> @@ -1716,6 +1716,8 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
> >>  		return true;
> >>  	if (S_ISDIR(inode->i_mode))
> >>  		return true;
> >> +	if (IS_NOQUOTA(inode))
> >> +		return true;
> >>  	if (f2fs_is_atomic_file(inode))
> >>  		return true;
> >>  	if (fio) {
> >> @@ -1960,7 +1962,7 @@ static int __write_data_page(struct page *page, bool *submitted,
> >>  	}
> >>  
> >>  	unlock_page(page);
> >> -	if (!S_ISDIR(inode->i_mode))
> >> +	if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode))
> >>  		f2fs_balance_fs(sbi, need_balance_fs);
> >>  
> >>  	if (unlikely(f2fs_cp_error(sbi))) {
> >> @@ -2151,6 +2153,8 @@ static inline bool __should_serialize_io(struct inode *inode,
> >>  {
> >>  	if (!S_ISREG(inode->i_mode))
> >>  		return false;
> >> +	if (IS_NOQUOTA(inode))
> >> +		return false;
> >>  	if (wbc->sync_mode != WB_SYNC_ALL)
> >>  		return true;
> >>  	if (get_dirty_pages(inode) >= SM_I(F2FS_I_SB(inode))->min_seq_blocks)
> >> @@ -2180,7 +2184,8 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
> >>  	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
> >>  		goto skip_write;
> >>  
> >> -	if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE &&
> >> +	if ((S_ISDIR(inode->i_mode) || IS_NOQUOTA(inode)) &&
> >> +			wbc->sync_mode == WB_SYNC_NONE &&
> >>  			get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) &&
> >>  			f2fs_available_free_memory(sbi, DIRTY_DENTS))
> >>  		goto skip_write;
> >> @@ -2245,7 +2250,7 @@ static void f2fs_write_failed(struct address_space *mapping, loff_t to)
> >>  		down_write(&F2FS_I(inode)->i_mmap_sem);
> >>  
> >>  		truncate_pagecache(inode, i_size);
> >> -		f2fs_truncate_blocks(inode, i_size, true);
> >> +		f2fs_truncate_blocks(inode, i_size, true, true);
> >>  
> >>  		up_write(&F2FS_I(inode)->i_mmap_sem);
> >>  		up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
> >> @@ -2380,7 +2385,8 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
> >>  	if (err)
> >>  		goto fail;
> >>  
> >> -	if (need_balance && has_not_enough_free_secs(sbi, 0, 0)) {
> >> +	if (need_balance && !IS_NOQUOTA(inode) &&
> >> +			has_not_enough_free_secs(sbi, 0, 0)) {
> >>  		unlock_page(page);
> >>  		f2fs_balance_fs(sbi, true);
> >>  		lock_page(page);
> >> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> >> index 917b2ca76aac..b5e400be73e0 100644
> >> --- a/fs/f2fs/f2fs.h
> >> +++ b/fs/f2fs/f2fs.h
> >> @@ -525,6 +525,9 @@ enum {
> >>  
> >>  #define DEFAULT_RETRY_IO_COUNT	8	/* maximum retry read IO count */
> >>  
> >> +/* maximum retry quota flush count */
> >> +#define DEFAULT_RETRY_QUOTA_FLUSH_COUNT		8
> >> +
> >>  #define F2FS_LINK_MAX	0xffffffff	/* maximum link count per file */
> >>  
> >>  #define MAX_DIR_RA_PAGES	4	/* maximum ra pages of dir */
> >> @@ -1088,6 +1091,9 @@ enum {
> >>  	SBI_NEED_CP,				/* need to checkpoint */
> >>  	SBI_IS_SHUTDOWN,			/* shutdown by ioctl */
> >>  	SBI_IS_RECOVERED,			/* recovered orphan/data */
> >> +	SBI_QUOTA_NEED_FLUSH,			/* need to flush quota info in CP */
> >> +	SBI_QUOTA_SKIP_FLUSH,			/* skip flushing quota in current CP */
> >> +	SBI_QUOTA_NEED_REPAIR,			/* quota file may be corrupted */
> >>  };
> >>  
> >>  enum {
> >> @@ -1891,12 +1897,18 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
> >>  {
> >>  	block_t	valid_block_count;
> >>  	unsigned int valid_node_count;
> >> -	bool quota = inode && !is_inode;
> >> +	int err;
> >>  
> >> -	if (quota) {
> >> -		int ret = dquot_reserve_block(inode, 1);
> >> -		if (ret)
> >> -			return ret;
> >> +	if (is_inode) {
> >> +		if (inode) {
> >> +			err = dquot_alloc_inode(inode);
> >> +			if (err)
> >> +				return err;
> >> +		}
> >> +	} else {
> >> +		err = dquot_reserve_block(inode, 1);
> >> +		if (err)
> >> +			return err;
> >>  	}
> >>  
> >>  	if (time_to_inject(sbi, FAULT_BLOCK)) {
> >> @@ -1938,8 +1950,12 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
> >>  	return 0;
> >>  
> >>  enospc:
> >> -	if (quota)
> >> +	if (is_inode) {
> >> +		if (inode)
> >> +			dquot_free_inode(inode);
> >> +	} else {
> >>  		dquot_release_reservation_block(inode, 1);
> >> +	}
> >>  	return -ENOSPC;
> >>  }
> >>  
> >> @@ -1960,7 +1976,9 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi,
> >>  
> >>  	spin_unlock(&sbi->stat_lock);
> >>  
> >> -	if (!is_inode)
> >> +	if (is_inode)
> >> +		dquot_free_inode(inode);
> >> +	else
> >>  		f2fs_i_blocks_write(inode, 1, false, true);
> >>  }
> >>  
> >> @@ -2739,7 +2757,8 @@ static inline bool is_valid_data_blkaddr(struct f2fs_sb_info *sbi,
> >>   */
> >>  int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);
> >>  void f2fs_truncate_data_blocks(struct dnode_of_data *dn);
> >> -int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock);
> >> +int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock,
> >> +							bool buf_write);
> >>  int f2fs_truncate(struct inode *inode);
> >>  int f2fs_getattr(const struct path *path, struct kstat *stat,
> >>  			u32 request_mask, unsigned int flags);
> >> @@ -2827,6 +2846,7 @@ static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode)
> >>  int f2fs_inode_dirtied(struct inode *inode, bool sync);
> >>  void f2fs_inode_synced(struct inode *inode);
> >>  int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly);
> >> +int f2fs_quota_sync(struct super_block *sb, int type);
> >>  void f2fs_quota_off_umount(struct super_block *sb);
> >>  int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover);
> >>  int f2fs_sync_fs(struct super_block *sb, int sync);
> >> @@ -3025,6 +3045,7 @@ struct page *f2fs_get_lock_data_page(struct inode *inode, pgoff_t index,
> >>  struct page *f2fs_get_new_data_page(struct inode *inode,
> >>  			struct page *ipage, pgoff_t index, bool new_i_size);
> >>  int f2fs_do_write_data_page(struct f2fs_io_info *fio);
> >> +void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock);
> >>  int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
> >>  			int create, int flag);
> >>  int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
> >> @@ -3456,3 +3477,16 @@ extern void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate,
> >>  #endif
> >>  
> >>  #endif
> >> +
> >> +static inline bool is_journalled_quota(struct f2fs_sb_info *sbi)
> >> +{
> >> +#ifdef CONFIG_QUOTA
> >> +	if (f2fs_sb_has_quota_ino(sbi->sb))
> >> +		return true;
> >> +	if (F2FS_OPTION(sbi).s_qf_names[USRQUOTA] ||
> >> +		F2FS_OPTION(sbi).s_qf_names[GRPQUOTA] ||
> >> +		F2FS_OPTION(sbi).s_qf_names[PRJQUOTA])
> >> +		return true;
> >> +#endif
> >> +	return false;
> >> +}
> >> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> >> index 357422a4c319..a75f3e145bf1 100644
> >> --- a/fs/f2fs/file.c
> >> +++ b/fs/f2fs/file.c
> >> @@ -586,7 +586,8 @@ static int truncate_partial_data_page(struct inode *inode, u64 from,
> >>  	return 0;
> >>  }
> >>  
> >> -int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock)
> >> +int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock,
> >> +							bool buf_write)
> >>  {
> >>  	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> >>  	struct dnode_of_data dn;
> >> @@ -594,6 +595,7 @@ int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock)
> >>  	int count = 0, err = 0;
> >>  	struct page *ipage;
> >>  	bool truncate_page = false;
> >> +	int flag = buf_write ? F2FS_GET_BLOCK_PRE_AIO : F2FS_GET_BLOCK_PRE_DIO;
> >>  
> >>  	trace_f2fs_truncate_blocks_enter(inode, from);
> >>  
> >> @@ -603,7 +605,7 @@ int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock)
> >>  		goto free_partial;
> >>  
> >>  	if (lock)
> >> -		f2fs_lock_op(sbi);
> >> +		__do_map_lock(sbi, flag, true);
> >>  
> >>  	ipage = f2fs_get_node_page(sbi, inode->i_ino);
> >>  	if (IS_ERR(ipage)) {
> >> @@ -641,7 +643,7 @@ int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock)
> >>  	err = f2fs_truncate_inode_blocks(inode, free_from);
> >>  out:
> >>  	if (lock)
> >> -		f2fs_unlock_op(sbi);
> >> +		__do_map_lock(sbi, flag, false);
> >>  free_partial:
> >>  	/* lastly zero out the first data page */
> >>  	if (!err)
> >> @@ -676,7 +678,7 @@ int f2fs_truncate(struct inode *inode)
> >>  			return err;
> >>  	}
> >>  
> >> -	err = f2fs_truncate_blocks(inode, i_size_read(inode), true);
> >> +	err = f2fs_truncate_blocks(inode, i_size_read(inode), true, false);
> >>  	if (err)
> >>  		return err;
> >>  
> >> @@ -785,9 +787,24 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
> >>  		!uid_eq(attr->ia_uid, inode->i_uid)) ||
> >>  		(attr->ia_valid & ATTR_GID &&
> >>  		!gid_eq(attr->ia_gid, inode->i_gid))) {
> >> +		f2fs_lock_op(F2FS_I_SB(inode));
> >>  		err = dquot_transfer(inode, attr);
> >> -		if (err)
> >> +		if (err) {
> >> +			set_sbi_flag(F2FS_I_SB(inode),
> >> +					SBI_QUOTA_NEED_REPAIR);
> >> +			f2fs_unlock_op(F2FS_I_SB(inode));
> >>  			return err;
> >> +		}
> >> +		/*
> >> +		 * update uid/gid under lock_op(), so that dquot and inode can
> >> +		 * be updated atomically.
> >> +		 */
> >> +		if (attr->ia_valid & ATTR_UID)
> >> +			inode->i_uid = attr->ia_uid;
> >> +		if (attr->ia_valid & ATTR_GID)
> >> +			inode->i_gid = attr->ia_gid;
> >> +		f2fs_mark_inode_dirty_sync(inode, true);
> >> +		f2fs_unlock_op(F2FS_I_SB(inode));
> >>  	}
> >>  
> >>  	if (attr->ia_valid & ATTR_SIZE) {
> >> @@ -1242,7 +1259,7 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
> >>  	new_size = i_size_read(inode) - len;
> >>  	truncate_pagecache(inode, new_size);
> >>  
> >> -	ret = f2fs_truncate_blocks(inode, new_size, true);
> >> +	ret = f2fs_truncate_blocks(inode, new_size, true, false);
> >>  	up_write(&F2FS_I(inode)->i_mmap_sem);
> >>  	if (!ret)
> >>  		f2fs_i_size_write(inode, new_size);
> >> @@ -1427,7 +1444,7 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
> >>  	f2fs_balance_fs(sbi, true);
> >>  
> >>  	down_write(&F2FS_I(inode)->i_mmap_sem);
> >> -	ret = f2fs_truncate_blocks(inode, i_size_read(inode), true);
> >> +	ret = f2fs_truncate_blocks(inode, i_size_read(inode), true, false);
> >>  	up_write(&F2FS_I(inode)->i_mmap_sem);
> >>  	if (ret)
> >>  		return ret;
> >> diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
> >> index 425d740f87fd..cb31a719b048 100644
> >> --- a/fs/f2fs/inline.c
> >> +++ b/fs/f2fs/inline.c
> >> @@ -298,7 +298,7 @@ bool f2fs_recover_inline_data(struct inode *inode, struct page *npage)
> >>  		clear_inode_flag(inode, FI_INLINE_DATA);
> >>  		f2fs_put_page(ipage, 1);
> >>  	} else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) {
> >> -		if (f2fs_truncate_blocks(inode, 0, false))
> >> +		if (f2fs_truncate_blocks(inode, 0, false, false))
> >>  			return false;
> >>  		goto process_inline;
> >>  	}
> >> @@ -470,7 +470,7 @@ static int f2fs_add_inline_entries(struct inode *dir, void *inline_dentry)
> >>  	return 0;
> >>  punch_dentry_pages:
> >>  	truncate_inode_pages(&dir->i_data, 0);
> >> -	f2fs_truncate_blocks(dir, 0, false);
> >> +	f2fs_truncate_blocks(dir, 0, false, false);
> >>  	f2fs_remove_dirty_inode(dir);
> >>  	return err;
> >>  }
> >> diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
> >> index 86e7333d60c1..3c278e63d1a3 100644
> >> --- a/fs/f2fs/inode.c
> >> +++ b/fs/f2fs/inode.c
> >> @@ -645,7 +645,11 @@ void f2fs_evict_inode(struct inode *inode)
> >>  	if (inode->i_nlink || is_bad_inode(inode))
> >>  		goto no_delete;
> >>  
> >> -	dquot_initialize(inode);
> >> +	err = dquot_initialize(inode);
> >> +	if (err) {
> >> +		err = 0;
> >> +		set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
> >> +	}
> >>  
> >>  	f2fs_remove_ino_entry(sbi, inode->i_ino, APPEND_INO);
> >>  	f2fs_remove_ino_entry(sbi, inode->i_ino, UPDATE_INO);
> >> @@ -677,9 +681,10 @@ void f2fs_evict_inode(struct inode *inode)
> >>  		goto retry;
> >>  	}
> >>  
> >> -	if (err)
> >> +	if (err) {
> >>  		f2fs_update_inode_page(inode);
> >> -	dquot_free_inode(inode);
> >> +		set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
> >> +	}
> >>  	sb_end_intwrite(inode->i_sb);
> >>  no_delete:
> >>  	dquot_drop(inode);
> >> diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
> >> index 9ad451ac2cec..b65491a63115 100644
> >> --- a/fs/f2fs/namei.c
> >> +++ b/fs/f2fs/namei.c
> >> @@ -71,10 +71,6 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
> >>  	if (err)
> >>  		goto fail_drop;
> >>  
> >> -	err = dquot_alloc_inode(inode);
> >> -	if (err)
> >> -		goto fail_drop;
> >> -
> >>  	set_inode_flag(inode, FI_NEW_INODE);
> >>  
> >>  	/* If the directory encrypted, then we should encrypt the inode. */
> >> diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
> >> index 41f2c0fe6d8e..70f05650191e 100644
> >> --- a/fs/f2fs/recovery.c
> >> +++ b/fs/f2fs/recovery.c
> >> @@ -191,6 +191,33 @@ static int recover_dentry(struct inode *inode, struct page *ipage,
> >>  	return err;
> >>  }
> >>  
> >> +static int recover_quota_data(struct inode *inode, struct page *page)
> >> +{
> >> +	struct f2fs_inode *raw = F2FS_INODE(page);
> >> +	struct iattr attr;
> >> +	uid_t i_uid = le32_to_cpu(raw->i_uid);
> >> +	gid_t i_gid = le32_to_cpu(raw->i_gid);
> >> +	int err;
> >> +
> >> +	memset(&attr, 0, sizeof(attr));
> >> +
> >> +	attr.ia_uid = make_kuid(inode->i_sb->s_user_ns, i_uid);
> >> +	attr.ia_gid = make_kgid(inode->i_sb->s_user_ns, i_gid);
> >> +
> >> +	if (!uid_eq(attr.ia_uid, inode->i_uid))
> >> +		attr.ia_valid |= ATTR_UID;
> >> +	if (!gid_eq(attr.ia_gid, inode->i_gid))
> >> +		attr.ia_valid |= ATTR_GID;
> >> +
> >> +	if (!attr.ia_valid)
> >> +		return 0;
> >> +
> >> +	err = dquot_transfer(inode, &attr);
> >> +	if (err)
> >> +		set_sbi_flag(F2FS_I_SB(inode), SBI_QUOTA_NEED_REPAIR);
> >> +	return err;
> >> +}
> >> +
> >>  static void recover_inline_flags(struct inode *inode, struct f2fs_inode *ri)
> >>  {
> >>  	if (ri->i_inline & F2FS_PIN_FILE)
> >> @@ -203,12 +230,18 @@ static void recover_inline_flags(struct inode *inode, struct f2fs_inode *ri)
> >>  		clear_inode_flag(inode, FI_DATA_EXIST);
> >>  }
> >>  
> >> -static void recover_inode(struct inode *inode, struct page *page)
> >> +static int recover_inode(struct inode *inode, struct page *page)
> >>  {
> >>  	struct f2fs_inode *raw = F2FS_INODE(page);
> >>  	char *name;
> >> +	int err;
> >>  
> >>  	inode->i_mode = le16_to_cpu(raw->i_mode);
> >> +
> >> +	err = recover_quota_data(inode, page);
> >> +	if (err)
> >> +		return err;
> >> +
> >>  	i_uid_write(inode, le32_to_cpu(raw->i_uid));
> >>  	i_gid_write(inode, le32_to_cpu(raw->i_gid));
> >>  	f2fs_i_size_write(inode, le64_to_cpu(raw->i_size));
> >> @@ -231,6 +264,7 @@ static void recover_inode(struct inode *inode, struct page *page)
> >>  	f2fs_msg(inode->i_sb, KERN_NOTICE,
> >>  		"recover_inode: ino = %x, name = %s, inline = %x",
> >>  			ino_of_node(page), name, raw->i_inline);
> >> +	return 0;
> >>  }
> >>  
> >>  static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
> >> @@ -597,8 +631,11 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
> >>  		 * In this case, we can lose the latest inode(x).
> >>  		 * So, call recover_inode for the inode update.
> >>  		 */
> >> -		if (IS_INODE(page))
> >> -			recover_inode(entry->inode, page);
> >> +		if (IS_INODE(page)) {
> >> +			err = recover_inode(entry->inode, page);
> >> +			if (err)
> >> +				break;
> >> +		}
> >>  		if (entry->last_dentry == blkaddr) {
> >>  			err = recover_dentry(entry->inode, page, dir_list);
> >>  			if (err) {
> >> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> >> index 945468968d4e..3a46c9b81188 100644
> >> --- a/fs/f2fs/super.c
> >> +++ b/fs/f2fs/super.c
> >> @@ -1689,6 +1689,13 @@ static qsize_t *f2fs_get_reserved_space(struct inode *inode)
> >>  
> >>  static int f2fs_quota_on_mount(struct f2fs_sb_info *sbi, int type)
> >>  {
> >> +
> >> +	if (is_set_ckpt_flags(sbi, CP_QUOTA_NEED_FSCK_FLAG)) {
> >> +		f2fs_msg(sbi->sb, KERN_ERR,
> >> +			"quota sysfile may be corrupted, skip loading it");
> >> +		return 0;
> >> +	}
> >> +
> >>  	return dquot_quota_on_mount(sbi->sb, F2FS_OPTION(sbi).s_qf_names[type],
> >>  					F2FS_OPTION(sbi).s_jquota_fmt, type);
> >>  }
> >> @@ -1759,7 +1766,14 @@ static int f2fs_enable_quotas(struct super_block *sb)
> >>  		test_opt(F2FS_SB(sb), PRJQUOTA),
> >>  	};
> >>  
> >> -	sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE | DQUOT_NOLIST_DIRTY;
> >> +	if (is_set_ckpt_flags(F2FS_SB(sb), CP_QUOTA_NEED_FSCK_FLAG)) {
> >> +		f2fs_msg(sb, KERN_ERR,
> >> +			"quota file may be corrupted, skip loading it");
> >> +		return 0;
> >> +	}
> >> +
> >> +	sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
> >> +
> >>  	for (type = 0; type < MAXQUOTAS; type++) {
> >>  		qf_inum = f2fs_qf_ino(sb, type);
> >>  		if (qf_inum) {
> >> @@ -1773,6 +1787,8 @@ static int f2fs_enable_quotas(struct super_block *sb)
> >>  					"fsck to fix.", type, err);
> >>  				for (type--; type >= 0; type--)
> >>  					dquot_quota_off(sb, type);
> >> +				set_sbi_flag(F2FS_SB(sb),
> >> +						SBI_QUOTA_NEED_REPAIR);
> >>  				return err;
> >>  			}
> >>  		}
> >> @@ -1780,35 +1796,51 @@ static int f2fs_enable_quotas(struct super_block *sb)
> >>  	return 0;
> >>  }
> >>  
> >> -static int f2fs_quota_sync(struct super_block *sb, int type)
> >> +int f2fs_quota_sync(struct super_block *sb, int type)
> >>  {
> >> +	struct f2fs_sb_info *sbi = F2FS_SB(sb);
> >>  	struct quota_info *dqopt = sb_dqopt(sb);
> >>  	int cnt;
> >>  	int ret;
> >>  
> >>  	ret = dquot_writeback_dquots(sb, type);
> >>  	if (ret)
> >> -		return ret;
> >> +		goto out;
> >>  
> >>  	/*
> >>  	 * Now when everything is written we can discard the pagecache so
> >>  	 * that userspace sees the changes.
> >>  	 */
> >>  	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
> >> +		struct address_space *mapping;
> >> +
> >>  		if (type != -1 && cnt != type)
> >>  			continue;
> >>  		if (!sb_has_quota_active(sb, cnt))
> >>  			continue;
> >>  
> >> -		ret = filemap_write_and_wait(dqopt->files[cnt]->i_mapping);
> >> +		mapping = dqopt->files[cnt]->i_mapping;
> >> +
> >> +		ret = filemap_fdatawrite(mapping);
> >> +		if (ret)
> >> +			goto out;
> >> +
> >> +		/* if we are using journalled quota */
> >> +		if (is_journalled_quota(sbi))
> >> +			continue;
> >> +
> >> +		ret = filemap_fdatawait(mapping);
> >>  		if (ret)
> >> -			return ret;
> >> +			set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
> >>  
> >>  		inode_lock(dqopt->files[cnt]);
> >>  		truncate_inode_pages(&dqopt->files[cnt]->i_data, 0);
> >>  		inode_unlock(dqopt->files[cnt]);
> >>  	}
> >> -	return 0;
> >> +out:
> >> +	if (ret)
> >> +		set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
> >> +	return ret;
> >>  }
> >>  
> >>  static int f2fs_quota_on(struct super_block *sb, int type, int format_id,
> >> @@ -1877,11 +1909,67 @@ void f2fs_quota_off_umount(struct super_block *sb)
> >>  				"Fail to turn off disk quota "
> >>  				"(type: %d, err: %d, ret:%d), Please "
> >>  				"run fsck to fix it.", type, err, ret);
> >> -			set_sbi_flag(F2FS_SB(sb), SBI_NEED_FSCK);
> >> +			set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
> >>  		}
> >>  	}
> >>  }
> >>  
> >> +static int f2fs_dquot_commit(struct dquot *dquot)
> >> +{
> >> +	int ret;
> >> +
> >> +	ret = dquot_commit(dquot);
> >> +	if (ret < 0)
> >> +		set_sbi_flag(F2FS_SB(dquot->dq_sb), SBI_QUOTA_NEED_REPAIR);
> >> +	return ret;
> >> +}
> >> +
> >> +static int f2fs_dquot_acquire(struct dquot *dquot)
> >> +{
> >> +	int ret;
> >> +
> >> +	ret = dquot_acquire(dquot);
> >> +	if (ret < 0)
> >> +		set_sbi_flag(F2FS_SB(dquot->dq_sb), SBI_QUOTA_NEED_REPAIR);
> >> +
> >> +	return ret;
> >> +}
> >> +
> >> +static int f2fs_dquot_release(struct dquot *dquot)
> >> +{
> >> +	int ret;
> >> +
> >> +	ret = dquot_release(dquot);
> >> +	if (ret < 0)
> >> +		set_sbi_flag(F2FS_SB(dquot->dq_sb), SBI_QUOTA_NEED_REPAIR);
> >> +	return ret;
> >> +}
> >> +
> >> +static int f2fs_dquot_mark_dquot_dirty(struct dquot *dquot)
> >> +{
> >> +	struct super_block *sb = dquot->dq_sb;
> >> +	struct f2fs_sb_info *sbi = F2FS_SB(sb);
> >> +	int ret;
> >> +
> >> +	ret = dquot_mark_dquot_dirty(dquot);
> >> +
> >> +	/* if we are using journalled quota */
> >> +	if (is_journalled_quota(sbi))
> >> +		set_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH);
> >> +
> >> +	return ret;
> >> +}
> >> +
> >> +static int f2fs_dquot_commit_info(struct super_block *sb, int type)
> >> +{
> >> +	int ret;
> >> +
> >> +	ret = dquot_commit_info(sb, type);
> >> +	if (ret < 0)
> >> +		set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
> >> +	return ret;
> >> +}
> >> +
> >>  static int f2fs_get_projid(struct inode *inode, kprojid_t *projid)
> >>  {
> >>  	*projid = F2FS_I(inode)->i_projid;
> >> @@ -1890,11 +1978,11 @@ static int f2fs_get_projid(struct inode *inode, kprojid_t *projid)
> >>  
> >>  static const struct dquot_operations f2fs_quota_operations = {
> >>  	.get_reserved_space = f2fs_get_reserved_space,
> >> -	.write_dquot	= dquot_commit,
> >> -	.acquire_dquot	= dquot_acquire,
> >> -	.release_dquot	= dquot_release,
> >> -	.mark_dirty	= dquot_mark_dquot_dirty,
> >> -	.write_info	= dquot_commit_info,
> >> +	.write_dquot	= f2fs_dquot_commit,
> >> +	.acquire_dquot	= f2fs_dquot_acquire,
> >> +	.release_dquot	= f2fs_dquot_release,
> >> +	.mark_dirty	= f2fs_dquot_mark_dquot_dirty,
> >> +	.write_info	= f2fs_dquot_commit_info,
> >>  	.alloc_dquot	= dquot_alloc,
> >>  	.destroy_dquot	= dquot_destroy,
> >>  	.get_projid	= f2fs_get_projid,
> >> @@ -1912,6 +2000,11 @@ static const struct quotactl_ops f2fs_quotactl_ops = {
> >>  	.get_nextdqblk	= dquot_get_next_dqblk,
> >>  };
> >>  #else
> >> +int f2fs_quota_sync(struct super_block *sb, int type)
> >> +{
> >> +	return 0;
> >> +}
> >> +
> >>  void f2fs_quota_off_umount(struct super_block *sb)
> >>  {
> >>  }
> >> @@ -2951,6 +3044,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
> >>  		goto free_meta_inode;
> >>  	}
> >>  
> >> +	if (__is_set_ckpt_flags(F2FS_CKPT(sbi), CP_QUOTA_NEED_FSCK_FLAG))
> >> +		set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
> >> +
> >>  	/* Initialize device list */
> >>  	err = f2fs_scan_devices(sbi);
> >>  	if (err) {
> >> diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
> >> index 1d4b196291d6..aaca9f5d5f5c 100644
> >> --- a/include/linux/f2fs_fs.h
> >> +++ b/include/linux/f2fs_fs.h
> >> @@ -115,6 +115,7 @@ struct f2fs_super_block {
> >>  /*
> >>   * For checkpoint
> >>   */
> >> +#define CP_QUOTA_NEED_FSCK_FLAG		0x00000800
> >>  #define CP_LARGE_NAT_BITMAP_FLAG	0x00000400
> >>  #define CP_NOCRC_RECOVERY_FLAG	0x00000200
> >>  #define CP_TRIMMED_FLAG		0x00000100
> >> -- 
> >> 2.18.0

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v11] f2fs: guarantee journalled quota data by checkpoint
  2018-10-01  1:29     ` Jaegeuk Kim
@ 2018-10-01  1:39       ` Chao Yu
  2018-10-01  1:49           ` Jaegeuk Kim
  0 siblings, 1 reply; 18+ messages in thread
From: Chao Yu @ 2018-10-01  1:39 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: linux-f2fs-devel, linux-kernel, Chao Yu, Weichao Guo

On 2018-10-1 9:29, Jaegeuk Kim wrote:
> On 10/01, Chao Yu wrote:
>> Hi Jaegeuk,
>>
>> On 2018-10-1 8:06, Jaegeuk Kim wrote:
>>> Hi Chao,
>>>
>>> This fails on fsstress with godown without fault injection. Could you please
>>> test a bit? I assumed that this patch should give no fsck failure along with
>>> valid checkpoint having no flag.
>>
>> Okay, let me reproduce with that case.
>>
>>>
>>> BTW, I'm in doubt that f2fs_lock_all covers entire quota modification. What
>>> about prepare_write_begin() -> f2fs_get_block() ... -> inc_valid_block_count()?
>>
>> If quota data changed in above path, we will detect that in below condition:
>>
>> block_operation()
>>
>> 	down_write(&sbi->node_change);
>>
>> 	if (__need_flush_quota(sbi)) {
>> 		up_write(&sbi->node_change);
>> 		f2fs_unlock_all(sbi);
>> 		goto retry_flush_quotas;
>> 	}
>>
>> So there is no problem?
> 
> We may need to check quota is dirty, since we have no way to detect by
> f2fs structures?

Below condition can check that.

static bool __need_flush_quota(struct f2fs_sb_info *sbi)
{
...
	if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_FLUSH))
		return true;
	if (get_pages(sbi, F2FS_DIRTY_QDATA))
		return true;
...
}

static int f2fs_dquot_mark_dquot_dirty(struct dquot *dquot)
{
...
	ret = dquot_mark_dquot_dirty(dquot);

	/* if we are using journalled quota */
	if (is_journalled_quota(sbi))
		set_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH);
...
}

Thanks,

> 
>>
>> Thanks,
>>
>>>
>>> On 09/20, Chao Yu wrote:
>>>> From: Chao Yu <yuchao0@huawei.com>
>>>>
>>>> For journalled quota mode, let checkpoint to flush dquot dirty data
>>>> and quota file data to guarntee persistence of all quota sysfile in
>>>> last checkpoint, by this way, we can avoid corrupting quota sysfile
>>>> when encountering SPO.
>>>>
>>>> The implementation is as below:
>>>>
>>>> 1. add a global state SBI_QUOTA_NEED_FLUSH to indicate that there is
>>>> cached dquot metadata changes in quota subsystem, and later checkpoint
>>>> should:
>>>>  a) flush dquot metadata into quota file.
>>>>  b) flush quota file to storage to keep file usage be consistent.
>>>>
>>>> 2. add a global state SBI_QUOTA_NEED_REPAIR to indicate that quota
>>>> operation failed due to -EIO or -ENOSPC, so later,
>>>>  a) checkpoint will skip syncing dquot metadata.
>>>>  b) CP_QUOTA_NEED_FSCK_FLAG will be set in last cp pack to give a
>>>>     hint for fsck repairing.
>>>>
>>>> 3. add a global state SBI_QUOTA_SKIP_FLUSH, in checkpoint, if quota
>>>> data updating is very heavy, it may cause hungtask in block_operation().
>>>> To avoid this, if our retry time exceed threshold, let's just skip
>>>> flushing and retry in next checkpoint().
>>>>
>>>> Signed-off-by: Weichao Guo <guoweichao@huawei.com>
>>>> Signed-off-by: Chao Yu <yuchao0@huawei.com>
>>>> ---
>>>> v11:
>>>> - transfer quota data if fsynced inode's i_{u,g}id changed during
>>>> recovery.
>>>>  fs/f2fs/checkpoint.c    |  56 +++++++++++++++++--
>>>>  fs/f2fs/data.c          |  18 ++++--
>>>>  fs/f2fs/f2fs.h          |  50 ++++++++++++++---
>>>>  fs/f2fs/file.c          |  31 ++++++++---
>>>>  fs/f2fs/inline.c        |   4 +-
>>>>  fs/f2fs/inode.c         |  11 +++-
>>>>  fs/f2fs/namei.c         |   4 --
>>>>  fs/f2fs/recovery.c      |  43 +++++++++++++-
>>>>  fs/f2fs/super.c         | 120 ++++++++++++++++++++++++++++++++++++----
>>>>  include/linux/f2fs_fs.h |   1 +
>>>>  10 files changed, 289 insertions(+), 49 deletions(-)
>>>>
>>>> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
>>>> index d312d2829d5a..d624d7983197 100644
>>>> --- a/fs/f2fs/checkpoint.c
>>>> +++ b/fs/f2fs/checkpoint.c
>>>> @@ -1083,6 +1083,21 @@ static void __prepare_cp_block(struct f2fs_sb_info *sbi)
>>>>  	ckpt->next_free_nid = cpu_to_le32(last_nid);
>>>>  }
>>>>  
>>>> +static bool __need_flush_quota(struct f2fs_sb_info *sbi)
>>>> +{
>>>> +	if (!is_journalled_quota(sbi))
>>>> +		return false;
>>>> +	if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH))
>>>> +		return false;
>>>> +	if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR))
>>>> +		return false;
>>>> +	if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_FLUSH))
>>>> +		return true;
>>>> +	if (get_pages(sbi, F2FS_DIRTY_QDATA))
>>>> +		return true;
>>>> +	return false;
>>>> +}
>>>> +
>>>>  /*
>>>>   * Freeze all the FS-operations for checkpoint.
>>>>   */
>>>> @@ -1094,12 +1109,30 @@ static int block_operations(struct f2fs_sb_info *sbi)
>>>>  		.for_reclaim = 0,
>>>>  	};
>>>>  	struct blk_plug plug;
>>>> -	int err = 0;
>>>> +	int err = 0, cnt = 0;
>>>>  
>>>>  	blk_start_plug(&plug);
>>>>  
>>>> -retry_flush_dents:
>>>> +retry_flush_quotas:
>>>> +	if (__need_flush_quota(sbi)) {
>>>> +		if (++cnt > DEFAULT_RETRY_QUOTA_FLUSH_COUNT) {
>>>> +			set_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH);
>>>> +			f2fs_lock_all(sbi);
>>>> +			goto retry_flush_dents;
>>>> +		}
>>>> +		clear_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH);
>>>> +
>>>> +		f2fs_quota_sync(sbi->sb, -1);
>>>> +	}
>>>> +
>>>>  	f2fs_lock_all(sbi);
>>>> +	if (__need_flush_quota(sbi)) {
>>>> +		f2fs_unlock_all(sbi);
>>>> +		cond_resched();
>>>> +		goto retry_flush_quotas;
>>>> +	}
>>>> +
>>>> +retry_flush_dents:
>>>>  	/* write all the dirty dentry pages */
>>>>  	if (get_pages(sbi, F2FS_DIRTY_DENTS)) {
>>>>  		f2fs_unlock_all(sbi);
>>>> @@ -1107,7 +1140,7 @@ static int block_operations(struct f2fs_sb_info *sbi)
>>>>  		if (err)
>>>>  			goto out;
>>>>  		cond_resched();
>>>> -		goto retry_flush_dents;
>>>> +		goto retry_flush_quotas;
>>>>  	}
>>>>  
>>>>  	/*
>>>> @@ -1116,6 +1149,12 @@ static int block_operations(struct f2fs_sb_info *sbi)
>>>>  	 */
>>>>  	down_write(&sbi->node_change);
>>>>  
>>>> +	if (__need_flush_quota(sbi)) {
>>>> +		up_write(&sbi->node_change);
>>>> +		f2fs_unlock_all(sbi);
>>>> +		goto retry_flush_quotas;
>>>> +	}
>>>> +
>>>>  	if (get_pages(sbi, F2FS_DIRTY_IMETA)) {
>>>>  		up_write(&sbi->node_change);
>>>>  		f2fs_unlock_all(sbi);
>>>> @@ -1123,7 +1162,7 @@ static int block_operations(struct f2fs_sb_info *sbi)
>>>>  		if (err)
>>>>  			goto out;
>>>>  		cond_resched();
>>>> -		goto retry_flush_dents;
>>>> +		goto retry_flush_quotas;
>>>>  	}
>>>>  
>>>>  retry_flush_nodes:
>>>> @@ -1214,6 +1253,14 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>>>>  	if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
>>>>  		__set_ckpt_flags(ckpt, CP_FSCK_FLAG);
>>>>  
>>>> +	if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH))
>>>> +		__set_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG);
>>>> +	else
>>>> +		__clear_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG);
>>>> +
>>>> +	if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR))
>>>> +		__set_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG);
>>>> +
>>>>  	/* set this flag to activate crc|cp_ver for recovery */
>>>>  	__set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG);
>>>>  	__clear_ckpt_flags(ckpt, CP_NOCRC_RECOVERY_FLAG);
>>>> @@ -1421,6 +1468,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>>>>  
>>>>  	clear_sbi_flag(sbi, SBI_IS_DIRTY);
>>>>  	clear_sbi_flag(sbi, SBI_NEED_CP);
>>>> +	clear_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH);
>>>>  	__set_cp_next_pack(sbi);
>>>>  
>>>>  	/*
>>>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
>>>> index 57c0823d22e0..b96f8588d565 100644
>>>> --- a/fs/f2fs/data.c
>>>> +++ b/fs/f2fs/data.c
>>>> @@ -46,7 +46,7 @@ static bool __is_cp_guaranteed(struct page *page)
>>>>  			inode->i_ino ==  F2FS_NODE_INO(sbi) ||
>>>>  			S_ISDIR(inode->i_mode) ||
>>>>  			(S_ISREG(inode->i_mode) &&
>>>> -			is_inode_flag_set(inode, FI_ATOMIC_FILE)) ||
>>>> +			(f2fs_is_atomic_file(inode) || IS_NOQUOTA(inode))) ||
>>>>  			is_cold_data(page))
>>>>  		return true;
>>>>  	return false;
>>>> @@ -975,7 +975,7 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
>>>>  	return err;
>>>>  }
>>>>  
>>>> -static inline void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
>>>> +void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
>>>>  {
>>>>  	if (flag == F2FS_GET_BLOCK_PRE_AIO) {
>>>>  		if (lock)
>>>> @@ -1716,6 +1716,8 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
>>>>  		return true;
>>>>  	if (S_ISDIR(inode->i_mode))
>>>>  		return true;
>>>> +	if (IS_NOQUOTA(inode))
>>>> +		return true;
>>>>  	if (f2fs_is_atomic_file(inode))
>>>>  		return true;
>>>>  	if (fio) {
>>>> @@ -1960,7 +1962,7 @@ static int __write_data_page(struct page *page, bool *submitted,
>>>>  	}
>>>>  
>>>>  	unlock_page(page);
>>>> -	if (!S_ISDIR(inode->i_mode))
>>>> +	if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode))
>>>>  		f2fs_balance_fs(sbi, need_balance_fs);
>>>>  
>>>>  	if (unlikely(f2fs_cp_error(sbi))) {
>>>> @@ -2151,6 +2153,8 @@ static inline bool __should_serialize_io(struct inode *inode,
>>>>  {
>>>>  	if (!S_ISREG(inode->i_mode))
>>>>  		return false;
>>>> +	if (IS_NOQUOTA(inode))
>>>> +		return false;
>>>>  	if (wbc->sync_mode != WB_SYNC_ALL)
>>>>  		return true;
>>>>  	if (get_dirty_pages(inode) >= SM_I(F2FS_I_SB(inode))->min_seq_blocks)
>>>> @@ -2180,7 +2184,8 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
>>>>  	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
>>>>  		goto skip_write;
>>>>  
>>>> -	if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE &&
>>>> +	if ((S_ISDIR(inode->i_mode) || IS_NOQUOTA(inode)) &&
>>>> +			wbc->sync_mode == WB_SYNC_NONE &&
>>>>  			get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) &&
>>>>  			f2fs_available_free_memory(sbi, DIRTY_DENTS))
>>>>  		goto skip_write;
>>>> @@ -2245,7 +2250,7 @@ static void f2fs_write_failed(struct address_space *mapping, loff_t to)
>>>>  		down_write(&F2FS_I(inode)->i_mmap_sem);
>>>>  
>>>>  		truncate_pagecache(inode, i_size);
>>>> -		f2fs_truncate_blocks(inode, i_size, true);
>>>> +		f2fs_truncate_blocks(inode, i_size, true, true);
>>>>  
>>>>  		up_write(&F2FS_I(inode)->i_mmap_sem);
>>>>  		up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
>>>> @@ -2380,7 +2385,8 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
>>>>  	if (err)
>>>>  		goto fail;
>>>>  
>>>> -	if (need_balance && has_not_enough_free_secs(sbi, 0, 0)) {
>>>> +	if (need_balance && !IS_NOQUOTA(inode) &&
>>>> +			has_not_enough_free_secs(sbi, 0, 0)) {
>>>>  		unlock_page(page);
>>>>  		f2fs_balance_fs(sbi, true);
>>>>  		lock_page(page);
>>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>>> index 917b2ca76aac..b5e400be73e0 100644
>>>> --- a/fs/f2fs/f2fs.h
>>>> +++ b/fs/f2fs/f2fs.h
>>>> @@ -525,6 +525,9 @@ enum {
>>>>  
>>>>  #define DEFAULT_RETRY_IO_COUNT	8	/* maximum retry read IO count */
>>>>  
>>>> +/* maximum retry quota flush count */
>>>> +#define DEFAULT_RETRY_QUOTA_FLUSH_COUNT		8
>>>> +
>>>>  #define F2FS_LINK_MAX	0xffffffff	/* maximum link count per file */
>>>>  
>>>>  #define MAX_DIR_RA_PAGES	4	/* maximum ra pages of dir */
>>>> @@ -1088,6 +1091,9 @@ enum {
>>>>  	SBI_NEED_CP,				/* need to checkpoint */
>>>>  	SBI_IS_SHUTDOWN,			/* shutdown by ioctl */
>>>>  	SBI_IS_RECOVERED,			/* recovered orphan/data */
>>>> +	SBI_QUOTA_NEED_FLUSH,			/* need to flush quota info in CP */
>>>> +	SBI_QUOTA_SKIP_FLUSH,			/* skip flushing quota in current CP */
>>>> +	SBI_QUOTA_NEED_REPAIR,			/* quota file may be corrupted */
>>>>  };
>>>>  
>>>>  enum {
>>>> @@ -1891,12 +1897,18 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
>>>>  {
>>>>  	block_t	valid_block_count;
>>>>  	unsigned int valid_node_count;
>>>> -	bool quota = inode && !is_inode;
>>>> +	int err;
>>>>  
>>>> -	if (quota) {
>>>> -		int ret = dquot_reserve_block(inode, 1);
>>>> -		if (ret)
>>>> -			return ret;
>>>> +	if (is_inode) {
>>>> +		if (inode) {
>>>> +			err = dquot_alloc_inode(inode);
>>>> +			if (err)
>>>> +				return err;
>>>> +		}
>>>> +	} else {
>>>> +		err = dquot_reserve_block(inode, 1);
>>>> +		if (err)
>>>> +			return err;
>>>>  	}
>>>>  
>>>>  	if (time_to_inject(sbi, FAULT_BLOCK)) {
>>>> @@ -1938,8 +1950,12 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
>>>>  	return 0;
>>>>  
>>>>  enospc:
>>>> -	if (quota)
>>>> +	if (is_inode) {
>>>> +		if (inode)
>>>> +			dquot_free_inode(inode);
>>>> +	} else {
>>>>  		dquot_release_reservation_block(inode, 1);
>>>> +	}
>>>>  	return -ENOSPC;
>>>>  }
>>>>  
>>>> @@ -1960,7 +1976,9 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi,
>>>>  
>>>>  	spin_unlock(&sbi->stat_lock);
>>>>  
>>>> -	if (!is_inode)
>>>> +	if (is_inode)
>>>> +		dquot_free_inode(inode);
>>>> +	else
>>>>  		f2fs_i_blocks_write(inode, 1, false, true);
>>>>  }
>>>>  
>>>> @@ -2739,7 +2757,8 @@ static inline bool is_valid_data_blkaddr(struct f2fs_sb_info *sbi,
>>>>   */
>>>>  int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);
>>>>  void f2fs_truncate_data_blocks(struct dnode_of_data *dn);
>>>> -int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock);
>>>> +int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock,
>>>> +							bool buf_write);
>>>>  int f2fs_truncate(struct inode *inode);
>>>>  int f2fs_getattr(const struct path *path, struct kstat *stat,
>>>>  			u32 request_mask, unsigned int flags);
>>>> @@ -2827,6 +2846,7 @@ static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode)
>>>>  int f2fs_inode_dirtied(struct inode *inode, bool sync);
>>>>  void f2fs_inode_synced(struct inode *inode);
>>>>  int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly);
>>>> +int f2fs_quota_sync(struct super_block *sb, int type);
>>>>  void f2fs_quota_off_umount(struct super_block *sb);
>>>>  int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover);
>>>>  int f2fs_sync_fs(struct super_block *sb, int sync);
>>>> @@ -3025,6 +3045,7 @@ struct page *f2fs_get_lock_data_page(struct inode *inode, pgoff_t index,
>>>>  struct page *f2fs_get_new_data_page(struct inode *inode,
>>>>  			struct page *ipage, pgoff_t index, bool new_i_size);
>>>>  int f2fs_do_write_data_page(struct f2fs_io_info *fio);
>>>> +void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock);
>>>>  int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
>>>>  			int create, int flag);
>>>>  int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
>>>> @@ -3456,3 +3477,16 @@ extern void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate,
>>>>  #endif
>>>>  
>>>>  #endif
>>>> +
>>>> +static inline bool is_journalled_quota(struct f2fs_sb_info *sbi)
>>>> +{
>>>> +#ifdef CONFIG_QUOTA
>>>> +	if (f2fs_sb_has_quota_ino(sbi->sb))
>>>> +		return true;
>>>> +	if (F2FS_OPTION(sbi).s_qf_names[USRQUOTA] ||
>>>> +		F2FS_OPTION(sbi).s_qf_names[GRPQUOTA] ||
>>>> +		F2FS_OPTION(sbi).s_qf_names[PRJQUOTA])
>>>> +		return true;
>>>> +#endif
>>>> +	return false;
>>>> +}
>>>> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
>>>> index 357422a4c319..a75f3e145bf1 100644
>>>> --- a/fs/f2fs/file.c
>>>> +++ b/fs/f2fs/file.c
>>>> @@ -586,7 +586,8 @@ static int truncate_partial_data_page(struct inode *inode, u64 from,
>>>>  	return 0;
>>>>  }
>>>>  
>>>> -int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock)
>>>> +int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock,
>>>> +							bool buf_write)
>>>>  {
>>>>  	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
>>>>  	struct dnode_of_data dn;
>>>> @@ -594,6 +595,7 @@ int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock)
>>>>  	int count = 0, err = 0;
>>>>  	struct page *ipage;
>>>>  	bool truncate_page = false;
>>>> +	int flag = buf_write ? F2FS_GET_BLOCK_PRE_AIO : F2FS_GET_BLOCK_PRE_DIO;
>>>>  
>>>>  	trace_f2fs_truncate_blocks_enter(inode, from);
>>>>  
>>>> @@ -603,7 +605,7 @@ int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock)
>>>>  		goto free_partial;
>>>>  
>>>>  	if (lock)
>>>> -		f2fs_lock_op(sbi);
>>>> +		__do_map_lock(sbi, flag, true);
>>>>  
>>>>  	ipage = f2fs_get_node_page(sbi, inode->i_ino);
>>>>  	if (IS_ERR(ipage)) {
>>>> @@ -641,7 +643,7 @@ int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock)
>>>>  	err = f2fs_truncate_inode_blocks(inode, free_from);
>>>>  out:
>>>>  	if (lock)
>>>> -		f2fs_unlock_op(sbi);
>>>> +		__do_map_lock(sbi, flag, false);
>>>>  free_partial:
>>>>  	/* lastly zero out the first data page */
>>>>  	if (!err)
>>>> @@ -676,7 +678,7 @@ int f2fs_truncate(struct inode *inode)
>>>>  			return err;
>>>>  	}
>>>>  
>>>> -	err = f2fs_truncate_blocks(inode, i_size_read(inode), true);
>>>> +	err = f2fs_truncate_blocks(inode, i_size_read(inode), true, false);
>>>>  	if (err)
>>>>  		return err;
>>>>  
>>>> @@ -785,9 +787,24 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
>>>>  		!uid_eq(attr->ia_uid, inode->i_uid)) ||
>>>>  		(attr->ia_valid & ATTR_GID &&
>>>>  		!gid_eq(attr->ia_gid, inode->i_gid))) {
>>>> +		f2fs_lock_op(F2FS_I_SB(inode));
>>>>  		err = dquot_transfer(inode, attr);
>>>> -		if (err)
>>>> +		if (err) {
>>>> +			set_sbi_flag(F2FS_I_SB(inode),
>>>> +					SBI_QUOTA_NEED_REPAIR);
>>>> +			f2fs_unlock_op(F2FS_I_SB(inode));
>>>>  			return err;
>>>> +		}
>>>> +		/*
>>>> +		 * update uid/gid under lock_op(), so that dquot and inode can
>>>> +		 * be updated atomically.
>>>> +		 */
>>>> +		if (attr->ia_valid & ATTR_UID)
>>>> +			inode->i_uid = attr->ia_uid;
>>>> +		if (attr->ia_valid & ATTR_GID)
>>>> +			inode->i_gid = attr->ia_gid;
>>>> +		f2fs_mark_inode_dirty_sync(inode, true);
>>>> +		f2fs_unlock_op(F2FS_I_SB(inode));
>>>>  	}
>>>>  
>>>>  	if (attr->ia_valid & ATTR_SIZE) {
>>>> @@ -1242,7 +1259,7 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
>>>>  	new_size = i_size_read(inode) - len;
>>>>  	truncate_pagecache(inode, new_size);
>>>>  
>>>> -	ret = f2fs_truncate_blocks(inode, new_size, true);
>>>> +	ret = f2fs_truncate_blocks(inode, new_size, true, false);
>>>>  	up_write(&F2FS_I(inode)->i_mmap_sem);
>>>>  	if (!ret)
>>>>  		f2fs_i_size_write(inode, new_size);
>>>> @@ -1427,7 +1444,7 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
>>>>  	f2fs_balance_fs(sbi, true);
>>>>  
>>>>  	down_write(&F2FS_I(inode)->i_mmap_sem);
>>>> -	ret = f2fs_truncate_blocks(inode, i_size_read(inode), true);
>>>> +	ret = f2fs_truncate_blocks(inode, i_size_read(inode), true, false);
>>>>  	up_write(&F2FS_I(inode)->i_mmap_sem);
>>>>  	if (ret)
>>>>  		return ret;
>>>> diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
>>>> index 425d740f87fd..cb31a719b048 100644
>>>> --- a/fs/f2fs/inline.c
>>>> +++ b/fs/f2fs/inline.c
>>>> @@ -298,7 +298,7 @@ bool f2fs_recover_inline_data(struct inode *inode, struct page *npage)
>>>>  		clear_inode_flag(inode, FI_INLINE_DATA);
>>>>  		f2fs_put_page(ipage, 1);
>>>>  	} else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) {
>>>> -		if (f2fs_truncate_blocks(inode, 0, false))
>>>> +		if (f2fs_truncate_blocks(inode, 0, false, false))
>>>>  			return false;
>>>>  		goto process_inline;
>>>>  	}
>>>> @@ -470,7 +470,7 @@ static int f2fs_add_inline_entries(struct inode *dir, void *inline_dentry)
>>>>  	return 0;
>>>>  punch_dentry_pages:
>>>>  	truncate_inode_pages(&dir->i_data, 0);
>>>> -	f2fs_truncate_blocks(dir, 0, false);
>>>> +	f2fs_truncate_blocks(dir, 0, false, false);
>>>>  	f2fs_remove_dirty_inode(dir);
>>>>  	return err;
>>>>  }
>>>> diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
>>>> index 86e7333d60c1..3c278e63d1a3 100644
>>>> --- a/fs/f2fs/inode.c
>>>> +++ b/fs/f2fs/inode.c
>>>> @@ -645,7 +645,11 @@ void f2fs_evict_inode(struct inode *inode)
>>>>  	if (inode->i_nlink || is_bad_inode(inode))
>>>>  		goto no_delete;
>>>>  
>>>> -	dquot_initialize(inode);
>>>> +	err = dquot_initialize(inode);
>>>> +	if (err) {
>>>> +		err = 0;
>>>> +		set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
>>>> +	}
>>>>  
>>>>  	f2fs_remove_ino_entry(sbi, inode->i_ino, APPEND_INO);
>>>>  	f2fs_remove_ino_entry(sbi, inode->i_ino, UPDATE_INO);
>>>> @@ -677,9 +681,10 @@ void f2fs_evict_inode(struct inode *inode)
>>>>  		goto retry;
>>>>  	}
>>>>  
>>>> -	if (err)
>>>> +	if (err) {
>>>>  		f2fs_update_inode_page(inode);
>>>> -	dquot_free_inode(inode);
>>>> +		set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
>>>> +	}
>>>>  	sb_end_intwrite(inode->i_sb);
>>>>  no_delete:
>>>>  	dquot_drop(inode);
>>>> diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
>>>> index 9ad451ac2cec..b65491a63115 100644
>>>> --- a/fs/f2fs/namei.c
>>>> +++ b/fs/f2fs/namei.c
>>>> @@ -71,10 +71,6 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
>>>>  	if (err)
>>>>  		goto fail_drop;
>>>>  
>>>> -	err = dquot_alloc_inode(inode);
>>>> -	if (err)
>>>> -		goto fail_drop;
>>>> -
>>>>  	set_inode_flag(inode, FI_NEW_INODE);
>>>>  
>>>>  	/* If the directory encrypted, then we should encrypt the inode. */
>>>> diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
>>>> index 41f2c0fe6d8e..70f05650191e 100644
>>>> --- a/fs/f2fs/recovery.c
>>>> +++ b/fs/f2fs/recovery.c
>>>> @@ -191,6 +191,33 @@ static int recover_dentry(struct inode *inode, struct page *ipage,
>>>>  	return err;
>>>>  }
>>>>  
>>>> +static int recover_quota_data(struct inode *inode, struct page *page)
>>>> +{
>>>> +	struct f2fs_inode *raw = F2FS_INODE(page);
>>>> +	struct iattr attr;
>>>> +	uid_t i_uid = le32_to_cpu(raw->i_uid);
>>>> +	gid_t i_gid = le32_to_cpu(raw->i_gid);
>>>> +	int err;
>>>> +
>>>> +	memset(&attr, 0, sizeof(attr));
>>>> +
>>>> +	attr.ia_uid = make_kuid(inode->i_sb->s_user_ns, i_uid);
>>>> +	attr.ia_gid = make_kgid(inode->i_sb->s_user_ns, i_gid);
>>>> +
>>>> +	if (!uid_eq(attr.ia_uid, inode->i_uid))
>>>> +		attr.ia_valid |= ATTR_UID;
>>>> +	if (!gid_eq(attr.ia_gid, inode->i_gid))
>>>> +		attr.ia_valid |= ATTR_GID;
>>>> +
>>>> +	if (!attr.ia_valid)
>>>> +		return 0;
>>>> +
>>>> +	err = dquot_transfer(inode, &attr);
>>>> +	if (err)
>>>> +		set_sbi_flag(F2FS_I_SB(inode), SBI_QUOTA_NEED_REPAIR);
>>>> +	return err;
>>>> +}
>>>> +
>>>>  static void recover_inline_flags(struct inode *inode, struct f2fs_inode *ri)
>>>>  {
>>>>  	if (ri->i_inline & F2FS_PIN_FILE)
>>>> @@ -203,12 +230,18 @@ static void recover_inline_flags(struct inode *inode, struct f2fs_inode *ri)
>>>>  		clear_inode_flag(inode, FI_DATA_EXIST);
>>>>  }
>>>>  
>>>> -static void recover_inode(struct inode *inode, struct page *page)
>>>> +static int recover_inode(struct inode *inode, struct page *page)
>>>>  {
>>>>  	struct f2fs_inode *raw = F2FS_INODE(page);
>>>>  	char *name;
>>>> +	int err;
>>>>  
>>>>  	inode->i_mode = le16_to_cpu(raw->i_mode);
>>>> +
>>>> +	err = recover_quota_data(inode, page);
>>>> +	if (err)
>>>> +		return err;
>>>> +
>>>>  	i_uid_write(inode, le32_to_cpu(raw->i_uid));
>>>>  	i_gid_write(inode, le32_to_cpu(raw->i_gid));
>>>>  	f2fs_i_size_write(inode, le64_to_cpu(raw->i_size));
>>>> @@ -231,6 +264,7 @@ static void recover_inode(struct inode *inode, struct page *page)
>>>>  	f2fs_msg(inode->i_sb, KERN_NOTICE,
>>>>  		"recover_inode: ino = %x, name = %s, inline = %x",
>>>>  			ino_of_node(page), name, raw->i_inline);
>>>> +	return 0;
>>>>  }
>>>>  
>>>>  static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
>>>> @@ -597,8 +631,11 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
>>>>  		 * In this case, we can lose the latest inode(x).
>>>>  		 * So, call recover_inode for the inode update.
>>>>  		 */
>>>> -		if (IS_INODE(page))
>>>> -			recover_inode(entry->inode, page);
>>>> +		if (IS_INODE(page)) {
>>>> +			err = recover_inode(entry->inode, page);
>>>> +			if (err)
>>>> +				break;
>>>> +		}
>>>>  		if (entry->last_dentry == blkaddr) {
>>>>  			err = recover_dentry(entry->inode, page, dir_list);
>>>>  			if (err) {
>>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>>>> index 945468968d4e..3a46c9b81188 100644
>>>> --- a/fs/f2fs/super.c
>>>> +++ b/fs/f2fs/super.c
>>>> @@ -1689,6 +1689,13 @@ static qsize_t *f2fs_get_reserved_space(struct inode *inode)
>>>>  
>>>>  static int f2fs_quota_on_mount(struct f2fs_sb_info *sbi, int type)
>>>>  {
>>>> +
>>>> +	if (is_set_ckpt_flags(sbi, CP_QUOTA_NEED_FSCK_FLAG)) {
>>>> +		f2fs_msg(sbi->sb, KERN_ERR,
>>>> +			"quota sysfile may be corrupted, skip loading it");
>>>> +		return 0;
>>>> +	}
>>>> +
>>>>  	return dquot_quota_on_mount(sbi->sb, F2FS_OPTION(sbi).s_qf_names[type],
>>>>  					F2FS_OPTION(sbi).s_jquota_fmt, type);
>>>>  }
>>>> @@ -1759,7 +1766,14 @@ static int f2fs_enable_quotas(struct super_block *sb)
>>>>  		test_opt(F2FS_SB(sb), PRJQUOTA),
>>>>  	};
>>>>  
>>>> -	sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE | DQUOT_NOLIST_DIRTY;
>>>> +	if (is_set_ckpt_flags(F2FS_SB(sb), CP_QUOTA_NEED_FSCK_FLAG)) {
>>>> +		f2fs_msg(sb, KERN_ERR,
>>>> +			"quota file may be corrupted, skip loading it");
>>>> +		return 0;
>>>> +	}
>>>> +
>>>> +	sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
>>>> +
>>>>  	for (type = 0; type < MAXQUOTAS; type++) {
>>>>  		qf_inum = f2fs_qf_ino(sb, type);
>>>>  		if (qf_inum) {
>>>> @@ -1773,6 +1787,8 @@ static int f2fs_enable_quotas(struct super_block *sb)
>>>>  					"fsck to fix.", type, err);
>>>>  				for (type--; type >= 0; type--)
>>>>  					dquot_quota_off(sb, type);
>>>> +				set_sbi_flag(F2FS_SB(sb),
>>>> +						SBI_QUOTA_NEED_REPAIR);
>>>>  				return err;
>>>>  			}
>>>>  		}
>>>> @@ -1780,35 +1796,51 @@ static int f2fs_enable_quotas(struct super_block *sb)
>>>>  	return 0;
>>>>  }
>>>>  
>>>> -static int f2fs_quota_sync(struct super_block *sb, int type)
>>>> +int f2fs_quota_sync(struct super_block *sb, int type)
>>>>  {
>>>> +	struct f2fs_sb_info *sbi = F2FS_SB(sb);
>>>>  	struct quota_info *dqopt = sb_dqopt(sb);
>>>>  	int cnt;
>>>>  	int ret;
>>>>  
>>>>  	ret = dquot_writeback_dquots(sb, type);
>>>>  	if (ret)
>>>> -		return ret;
>>>> +		goto out;
>>>>  
>>>>  	/*
>>>>  	 * Now when everything is written we can discard the pagecache so
>>>>  	 * that userspace sees the changes.
>>>>  	 */
>>>>  	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
>>>> +		struct address_space *mapping;
>>>> +
>>>>  		if (type != -1 && cnt != type)
>>>>  			continue;
>>>>  		if (!sb_has_quota_active(sb, cnt))
>>>>  			continue;
>>>>  
>>>> -		ret = filemap_write_and_wait(dqopt->files[cnt]->i_mapping);
>>>> +		mapping = dqopt->files[cnt]->i_mapping;
>>>> +
>>>> +		ret = filemap_fdatawrite(mapping);
>>>> +		if (ret)
>>>> +			goto out;
>>>> +
>>>> +		/* if we are using journalled quota */
>>>> +		if (is_journalled_quota(sbi))
>>>> +			continue;
>>>> +
>>>> +		ret = filemap_fdatawait(mapping);
>>>>  		if (ret)
>>>> -			return ret;
>>>> +			set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
>>>>  
>>>>  		inode_lock(dqopt->files[cnt]);
>>>>  		truncate_inode_pages(&dqopt->files[cnt]->i_data, 0);
>>>>  		inode_unlock(dqopt->files[cnt]);
>>>>  	}
>>>> -	return 0;
>>>> +out:
>>>> +	if (ret)
>>>> +		set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
>>>> +	return ret;
>>>>  }
>>>>  
>>>>  static int f2fs_quota_on(struct super_block *sb, int type, int format_id,
>>>> @@ -1877,11 +1909,67 @@ void f2fs_quota_off_umount(struct super_block *sb)
>>>>  				"Fail to turn off disk quota "
>>>>  				"(type: %d, err: %d, ret:%d), Please "
>>>>  				"run fsck to fix it.", type, err, ret);
>>>> -			set_sbi_flag(F2FS_SB(sb), SBI_NEED_FSCK);
>>>> +			set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
>>>>  		}
>>>>  	}
>>>>  }
>>>>  
>>>> +static int f2fs_dquot_commit(struct dquot *dquot)
>>>> +{
>>>> +	int ret;
>>>> +
>>>> +	ret = dquot_commit(dquot);
>>>> +	if (ret < 0)
>>>> +		set_sbi_flag(F2FS_SB(dquot->dq_sb), SBI_QUOTA_NEED_REPAIR);
>>>> +	return ret;
>>>> +}
>>>> +
>>>> +static int f2fs_dquot_acquire(struct dquot *dquot)
>>>> +{
>>>> +	int ret;
>>>> +
>>>> +	ret = dquot_acquire(dquot);
>>>> +	if (ret < 0)
>>>> +		set_sbi_flag(F2FS_SB(dquot->dq_sb), SBI_QUOTA_NEED_REPAIR);
>>>> +
>>>> +	return ret;
>>>> +}
>>>> +
>>>> +static int f2fs_dquot_release(struct dquot *dquot)
>>>> +{
>>>> +	int ret;
>>>> +
>>>> +	ret = dquot_release(dquot);
>>>> +	if (ret < 0)
>>>> +		set_sbi_flag(F2FS_SB(dquot->dq_sb), SBI_QUOTA_NEED_REPAIR);
>>>> +	return ret;
>>>> +}
>>>> +
>>>> +static int f2fs_dquot_mark_dquot_dirty(struct dquot *dquot)
>>>> +{
>>>> +	struct super_block *sb = dquot->dq_sb;
>>>> +	struct f2fs_sb_info *sbi = F2FS_SB(sb);
>>>> +	int ret;
>>>> +
>>>> +	ret = dquot_mark_dquot_dirty(dquot);
>>>> +
>>>> +	/* if we are using journalled quota */
>>>> +	if (is_journalled_quota(sbi))
>>>> +		set_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH);
>>>> +
>>>> +	return ret;
>>>> +}
>>>> +
>>>> +static int f2fs_dquot_commit_info(struct super_block *sb, int type)
>>>> +{
>>>> +	int ret;
>>>> +
>>>> +	ret = dquot_commit_info(sb, type);
>>>> +	if (ret < 0)
>>>> +		set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
>>>> +	return ret;
>>>> +}
>>>> +
>>>>  static int f2fs_get_projid(struct inode *inode, kprojid_t *projid)
>>>>  {
>>>>  	*projid = F2FS_I(inode)->i_projid;
>>>> @@ -1890,11 +1978,11 @@ static int f2fs_get_projid(struct inode *inode, kprojid_t *projid)
>>>>  
>>>>  static const struct dquot_operations f2fs_quota_operations = {
>>>>  	.get_reserved_space = f2fs_get_reserved_space,
>>>> -	.write_dquot	= dquot_commit,
>>>> -	.acquire_dquot	= dquot_acquire,
>>>> -	.release_dquot	= dquot_release,
>>>> -	.mark_dirty	= dquot_mark_dquot_dirty,
>>>> -	.write_info	= dquot_commit_info,
>>>> +	.write_dquot	= f2fs_dquot_commit,
>>>> +	.acquire_dquot	= f2fs_dquot_acquire,
>>>> +	.release_dquot	= f2fs_dquot_release,
>>>> +	.mark_dirty	= f2fs_dquot_mark_dquot_dirty,
>>>> +	.write_info	= f2fs_dquot_commit_info,
>>>>  	.alloc_dquot	= dquot_alloc,
>>>>  	.destroy_dquot	= dquot_destroy,
>>>>  	.get_projid	= f2fs_get_projid,
>>>> @@ -1912,6 +2000,11 @@ static const struct quotactl_ops f2fs_quotactl_ops = {
>>>>  	.get_nextdqblk	= dquot_get_next_dqblk,
>>>>  };
>>>>  #else
>>>> +int f2fs_quota_sync(struct super_block *sb, int type)
>>>> +{
>>>> +	return 0;
>>>> +}
>>>> +
>>>>  void f2fs_quota_off_umount(struct super_block *sb)
>>>>  {
>>>>  }
>>>> @@ -2951,6 +3044,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>>>>  		goto free_meta_inode;
>>>>  	}
>>>>  
>>>> +	if (__is_set_ckpt_flags(F2FS_CKPT(sbi), CP_QUOTA_NEED_FSCK_FLAG))
>>>> +		set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
>>>> +
>>>>  	/* Initialize device list */
>>>>  	err = f2fs_scan_devices(sbi);
>>>>  	if (err) {
>>>> diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
>>>> index 1d4b196291d6..aaca9f5d5f5c 100644
>>>> --- a/include/linux/f2fs_fs.h
>>>> +++ b/include/linux/f2fs_fs.h
>>>> @@ -115,6 +115,7 @@ struct f2fs_super_block {
>>>>  /*
>>>>   * For checkpoint
>>>>   */
>>>> +#define CP_QUOTA_NEED_FSCK_FLAG		0x00000800
>>>>  #define CP_LARGE_NAT_BITMAP_FLAG	0x00000400
>>>>  #define CP_NOCRC_RECOVERY_FLAG	0x00000200
>>>>  #define CP_TRIMMED_FLAG		0x00000100
>>>> -- 
>>>> 2.18.0

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v11] f2fs: guarantee journalled quota data by checkpoint
  2018-10-01  1:39       ` Chao Yu
@ 2018-10-01  1:49           ` Jaegeuk Kim
  0 siblings, 0 replies; 18+ messages in thread
From: Jaegeuk Kim @ 2018-10-01  1:49 UTC (permalink / raw)
  To: Chao Yu; +Cc: linux-f2fs-devel, linux-kernel, Chao Yu, Weichao Guo

On 10/01, Chao Yu wrote:
> On 2018-10-1 9:29, Jaegeuk Kim wrote:
> > On 10/01, Chao Yu wrote:
> >> Hi Jaegeuk,
> >>
> >> On 2018-10-1 8:06, Jaegeuk Kim wrote:
> >>> Hi Chao,
> >>>
> >>> This fails on fsstress with godown without fault injection. Could you please
> >>> test a bit? I assumed that this patch should give no fsck failure along with
> >>> valid checkpoint having no flag.
> >>
> >> Okay, let me reproduce with that case.
> >>
> >>>
> >>> BTW, I'm in doubt that f2fs_lock_all covers entire quota modification. What
> >>> about prepare_write_begin() -> f2fs_get_block() ... -> inc_valid_block_count()?
> >>
> >> If quota data changed in above path, we will detect that in below condition:
> >>
> >> block_operation()
> >>
> >> 	down_write(&sbi->node_change);
> >>
> >> 	if (__need_flush_quota(sbi)) {
> >> 		up_write(&sbi->node_change);
> >> 		f2fs_unlock_all(sbi);
> >> 		goto retry_flush_quotas;
> >> 	}
> >>
> >> So there is no problem?
> > 
> > We may need to check quota is dirty, since we have no way to detect by
> > f2fs structures?
> 
> Below condition can check that.
> 
> static bool __need_flush_quota(struct f2fs_sb_info *sbi)
> {
> ...
> 	if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_FLUSH))
> 		return true;
> 	if (get_pages(sbi, F2FS_DIRTY_QDATA))
> 		return true;
> ...
> }
> 
> static int f2fs_dquot_mark_dquot_dirty(struct dquot *dquot)
> {
> ...
> 	ret = dquot_mark_dquot_dirty(dquot);
> 
> 	/* if we are using journalled quota */
> 	if (is_journalled_quota(sbi))
> 		set_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH);
> ...
> }

Okay, then, could you please run the above stress test to reproduce this?
Thanks,

> 
> Thanks,
> 
> > 
> >>
> >> Thanks,
> >>
> >>>
> >>> On 09/20, Chao Yu wrote:
> >>>> From: Chao Yu <yuchao0@huawei.com>
> >>>>
> >>>> For journalled quota mode, let checkpoint to flush dquot dirty data
> >>>> and quota file data to guarntee persistence of all quota sysfile in
> >>>> last checkpoint, by this way, we can avoid corrupting quota sysfile
> >>>> when encountering SPO.
> >>>>
> >>>> The implementation is as below:
> >>>>
> >>>> 1. add a global state SBI_QUOTA_NEED_FLUSH to indicate that there is
> >>>> cached dquot metadata changes in quota subsystem, and later checkpoint
> >>>> should:
> >>>>  a) flush dquot metadata into quota file.
> >>>>  b) flush quota file to storage to keep file usage be consistent.
> >>>>
> >>>> 2. add a global state SBI_QUOTA_NEED_REPAIR to indicate that quota
> >>>> operation failed due to -EIO or -ENOSPC, so later,
> >>>>  a) checkpoint will skip syncing dquot metadata.
> >>>>  b) CP_QUOTA_NEED_FSCK_FLAG will be set in last cp pack to give a
> >>>>     hint for fsck repairing.
> >>>>
> >>>> 3. add a global state SBI_QUOTA_SKIP_FLUSH, in checkpoint, if quota
> >>>> data updating is very heavy, it may cause hungtask in block_operation().
> >>>> To avoid this, if our retry time exceed threshold, let's just skip
> >>>> flushing and retry in next checkpoint().
> >>>>
> >>>> Signed-off-by: Weichao Guo <guoweichao@huawei.com>
> >>>> Signed-off-by: Chao Yu <yuchao0@huawei.com>
> >>>> ---
> >>>> v11:
> >>>> - transfer quota data if fsynced inode's i_{u,g}id changed during
> >>>> recovery.
> >>>>  fs/f2fs/checkpoint.c    |  56 +++++++++++++++++--
> >>>>  fs/f2fs/data.c          |  18 ++++--
> >>>>  fs/f2fs/f2fs.h          |  50 ++++++++++++++---
> >>>>  fs/f2fs/file.c          |  31 ++++++++---
> >>>>  fs/f2fs/inline.c        |   4 +-
> >>>>  fs/f2fs/inode.c         |  11 +++-
> >>>>  fs/f2fs/namei.c         |   4 --
> >>>>  fs/f2fs/recovery.c      |  43 +++++++++++++-
> >>>>  fs/f2fs/super.c         | 120 ++++++++++++++++++++++++++++++++++++----
> >>>>  include/linux/f2fs_fs.h |   1 +
> >>>>  10 files changed, 289 insertions(+), 49 deletions(-)
> >>>>
> >>>> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> >>>> index d312d2829d5a..d624d7983197 100644
> >>>> --- a/fs/f2fs/checkpoint.c
> >>>> +++ b/fs/f2fs/checkpoint.c
> >>>> @@ -1083,6 +1083,21 @@ static void __prepare_cp_block(struct f2fs_sb_info *sbi)
> >>>>  	ckpt->next_free_nid = cpu_to_le32(last_nid);
> >>>>  }
> >>>>  
> >>>> +static bool __need_flush_quota(struct f2fs_sb_info *sbi)
> >>>> +{
> >>>> +	if (!is_journalled_quota(sbi))
> >>>> +		return false;
> >>>> +	if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH))
> >>>> +		return false;
> >>>> +	if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR))
> >>>> +		return false;
> >>>> +	if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_FLUSH))
> >>>> +		return true;
> >>>> +	if (get_pages(sbi, F2FS_DIRTY_QDATA))
> >>>> +		return true;
> >>>> +	return false;
> >>>> +}
> >>>> +
> >>>>  /*
> >>>>   * Freeze all the FS-operations for checkpoint.
> >>>>   */
> >>>> @@ -1094,12 +1109,30 @@ static int block_operations(struct f2fs_sb_info *sbi)
> >>>>  		.for_reclaim = 0,
> >>>>  	};
> >>>>  	struct blk_plug plug;
> >>>> -	int err = 0;
> >>>> +	int err = 0, cnt = 0;
> >>>>  
> >>>>  	blk_start_plug(&plug);
> >>>>  
> >>>> -retry_flush_dents:
> >>>> +retry_flush_quotas:
> >>>> +	if (__need_flush_quota(sbi)) {
> >>>> +		if (++cnt > DEFAULT_RETRY_QUOTA_FLUSH_COUNT) {
> >>>> +			set_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH);
> >>>> +			f2fs_lock_all(sbi);
> >>>> +			goto retry_flush_dents;
> >>>> +		}
> >>>> +		clear_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH);
> >>>> +
> >>>> +		f2fs_quota_sync(sbi->sb, -1);
> >>>> +	}
> >>>> +
> >>>>  	f2fs_lock_all(sbi);
> >>>> +	if (__need_flush_quota(sbi)) {
> >>>> +		f2fs_unlock_all(sbi);
> >>>> +		cond_resched();
> >>>> +		goto retry_flush_quotas;
> >>>> +	}
> >>>> +
> >>>> +retry_flush_dents:
> >>>>  	/* write all the dirty dentry pages */
> >>>>  	if (get_pages(sbi, F2FS_DIRTY_DENTS)) {
> >>>>  		f2fs_unlock_all(sbi);
> >>>> @@ -1107,7 +1140,7 @@ static int block_operations(struct f2fs_sb_info *sbi)
> >>>>  		if (err)
> >>>>  			goto out;
> >>>>  		cond_resched();
> >>>> -		goto retry_flush_dents;
> >>>> +		goto retry_flush_quotas;
> >>>>  	}
> >>>>  
> >>>>  	/*
> >>>> @@ -1116,6 +1149,12 @@ static int block_operations(struct f2fs_sb_info *sbi)
> >>>>  	 */
> >>>>  	down_write(&sbi->node_change);
> >>>>  
> >>>> +	if (__need_flush_quota(sbi)) {
> >>>> +		up_write(&sbi->node_change);
> >>>> +		f2fs_unlock_all(sbi);
> >>>> +		goto retry_flush_quotas;
> >>>> +	}
> >>>> +
> >>>>  	if (get_pages(sbi, F2FS_DIRTY_IMETA)) {
> >>>>  		up_write(&sbi->node_change);
> >>>>  		f2fs_unlock_all(sbi);
> >>>> @@ -1123,7 +1162,7 @@ static int block_operations(struct f2fs_sb_info *sbi)
> >>>>  		if (err)
> >>>>  			goto out;
> >>>>  		cond_resched();
> >>>> -		goto retry_flush_dents;
> >>>> +		goto retry_flush_quotas;
> >>>>  	}
> >>>>  
> >>>>  retry_flush_nodes:
> >>>> @@ -1214,6 +1253,14 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
> >>>>  	if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
> >>>>  		__set_ckpt_flags(ckpt, CP_FSCK_FLAG);
> >>>>  
> >>>> +	if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH))
> >>>> +		__set_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG);
> >>>> +	else
> >>>> +		__clear_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG);
> >>>> +
> >>>> +	if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR))
> >>>> +		__set_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG);
> >>>> +
> >>>>  	/* set this flag to activate crc|cp_ver for recovery */
> >>>>  	__set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG);
> >>>>  	__clear_ckpt_flags(ckpt, CP_NOCRC_RECOVERY_FLAG);
> >>>> @@ -1421,6 +1468,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
> >>>>  
> >>>>  	clear_sbi_flag(sbi, SBI_IS_DIRTY);
> >>>>  	clear_sbi_flag(sbi, SBI_NEED_CP);
> >>>> +	clear_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH);
> >>>>  	__set_cp_next_pack(sbi);
> >>>>  
> >>>>  	/*
> >>>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> >>>> index 57c0823d22e0..b96f8588d565 100644
> >>>> --- a/fs/f2fs/data.c
> >>>> +++ b/fs/f2fs/data.c
> >>>> @@ -46,7 +46,7 @@ static bool __is_cp_guaranteed(struct page *page)
> >>>>  			inode->i_ino ==  F2FS_NODE_INO(sbi) ||
> >>>>  			S_ISDIR(inode->i_mode) ||
> >>>>  			(S_ISREG(inode->i_mode) &&
> >>>> -			is_inode_flag_set(inode, FI_ATOMIC_FILE)) ||
> >>>> +			(f2fs_is_atomic_file(inode) || IS_NOQUOTA(inode))) ||
> >>>>  			is_cold_data(page))
> >>>>  		return true;
> >>>>  	return false;
> >>>> @@ -975,7 +975,7 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
> >>>>  	return err;
> >>>>  }
> >>>>  
> >>>> -static inline void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
> >>>> +void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
> >>>>  {
> >>>>  	if (flag == F2FS_GET_BLOCK_PRE_AIO) {
> >>>>  		if (lock)
> >>>> @@ -1716,6 +1716,8 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
> >>>>  		return true;
> >>>>  	if (S_ISDIR(inode->i_mode))
> >>>>  		return true;
> >>>> +	if (IS_NOQUOTA(inode))
> >>>> +		return true;
> >>>>  	if (f2fs_is_atomic_file(inode))
> >>>>  		return true;
> >>>>  	if (fio) {
> >>>> @@ -1960,7 +1962,7 @@ static int __write_data_page(struct page *page, bool *submitted,
> >>>>  	}
> >>>>  
> >>>>  	unlock_page(page);
> >>>> -	if (!S_ISDIR(inode->i_mode))
> >>>> +	if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode))
> >>>>  		f2fs_balance_fs(sbi, need_balance_fs);
> >>>>  
> >>>>  	if (unlikely(f2fs_cp_error(sbi))) {
> >>>> @@ -2151,6 +2153,8 @@ static inline bool __should_serialize_io(struct inode *inode,
> >>>>  {
> >>>>  	if (!S_ISREG(inode->i_mode))
> >>>>  		return false;
> >>>> +	if (IS_NOQUOTA(inode))
> >>>> +		return false;
> >>>>  	if (wbc->sync_mode != WB_SYNC_ALL)
> >>>>  		return true;
> >>>>  	if (get_dirty_pages(inode) >= SM_I(F2FS_I_SB(inode))->min_seq_blocks)
> >>>> @@ -2180,7 +2184,8 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
> >>>>  	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
> >>>>  		goto skip_write;
> >>>>  
> >>>> -	if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE &&
> >>>> +	if ((S_ISDIR(inode->i_mode) || IS_NOQUOTA(inode)) &&
> >>>> +			wbc->sync_mode == WB_SYNC_NONE &&
> >>>>  			get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) &&
> >>>>  			f2fs_available_free_memory(sbi, DIRTY_DENTS))
> >>>>  		goto skip_write;
> >>>> @@ -2245,7 +2250,7 @@ static void f2fs_write_failed(struct address_space *mapping, loff_t to)
> >>>>  		down_write(&F2FS_I(inode)->i_mmap_sem);
> >>>>  
> >>>>  		truncate_pagecache(inode, i_size);
> >>>> -		f2fs_truncate_blocks(inode, i_size, true);
> >>>> +		f2fs_truncate_blocks(inode, i_size, true, true);
> >>>>  
> >>>>  		up_write(&F2FS_I(inode)->i_mmap_sem);
> >>>>  		up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
> >>>> @@ -2380,7 +2385,8 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
> >>>>  	if (err)
> >>>>  		goto fail;
> >>>>  
> >>>> -	if (need_balance && has_not_enough_free_secs(sbi, 0, 0)) {
> >>>> +	if (need_balance && !IS_NOQUOTA(inode) &&
> >>>> +			has_not_enough_free_secs(sbi, 0, 0)) {
> >>>>  		unlock_page(page);
> >>>>  		f2fs_balance_fs(sbi, true);
> >>>>  		lock_page(page);
> >>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> >>>> index 917b2ca76aac..b5e400be73e0 100644
> >>>> --- a/fs/f2fs/f2fs.h
> >>>> +++ b/fs/f2fs/f2fs.h
> >>>> @@ -525,6 +525,9 @@ enum {
> >>>>  
> >>>>  #define DEFAULT_RETRY_IO_COUNT	8	/* maximum retry read IO count */
> >>>>  
> >>>> +/* maximum retry quota flush count */
> >>>> +#define DEFAULT_RETRY_QUOTA_FLUSH_COUNT		8
> >>>> +
> >>>>  #define F2FS_LINK_MAX	0xffffffff	/* maximum link count per file */
> >>>>  
> >>>>  #define MAX_DIR_RA_PAGES	4	/* maximum ra pages of dir */
> >>>> @@ -1088,6 +1091,9 @@ enum {
> >>>>  	SBI_NEED_CP,				/* need to checkpoint */
> >>>>  	SBI_IS_SHUTDOWN,			/* shutdown by ioctl */
> >>>>  	SBI_IS_RECOVERED,			/* recovered orphan/data */
> >>>> +	SBI_QUOTA_NEED_FLUSH,			/* need to flush quota info in CP */
> >>>> +	SBI_QUOTA_SKIP_FLUSH,			/* skip flushing quota in current CP */
> >>>> +	SBI_QUOTA_NEED_REPAIR,			/* quota file may be corrupted */
> >>>>  };
> >>>>  
> >>>>  enum {
> >>>> @@ -1891,12 +1897,18 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
> >>>>  {
> >>>>  	block_t	valid_block_count;
> >>>>  	unsigned int valid_node_count;
> >>>> -	bool quota = inode && !is_inode;
> >>>> +	int err;
> >>>>  
> >>>> -	if (quota) {
> >>>> -		int ret = dquot_reserve_block(inode, 1);
> >>>> -		if (ret)
> >>>> -			return ret;
> >>>> +	if (is_inode) {
> >>>> +		if (inode) {
> >>>> +			err = dquot_alloc_inode(inode);
> >>>> +			if (err)
> >>>> +				return err;
> >>>> +		}
> >>>> +	} else {
> >>>> +		err = dquot_reserve_block(inode, 1);
> >>>> +		if (err)
> >>>> +			return err;
> >>>>  	}
> >>>>  
> >>>>  	if (time_to_inject(sbi, FAULT_BLOCK)) {
> >>>> @@ -1938,8 +1950,12 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
> >>>>  	return 0;
> >>>>  
> >>>>  enospc:
> >>>> -	if (quota)
> >>>> +	if (is_inode) {
> >>>> +		if (inode)
> >>>> +			dquot_free_inode(inode);
> >>>> +	} else {
> >>>>  		dquot_release_reservation_block(inode, 1);
> >>>> +	}
> >>>>  	return -ENOSPC;
> >>>>  }
> >>>>  
> >>>> @@ -1960,7 +1976,9 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi,
> >>>>  
> >>>>  	spin_unlock(&sbi->stat_lock);
> >>>>  
> >>>> -	if (!is_inode)
> >>>> +	if (is_inode)
> >>>> +		dquot_free_inode(inode);
> >>>> +	else
> >>>>  		f2fs_i_blocks_write(inode, 1, false, true);
> >>>>  }
> >>>>  
> >>>> @@ -2739,7 +2757,8 @@ static inline bool is_valid_data_blkaddr(struct f2fs_sb_info *sbi,
> >>>>   */
> >>>>  int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);
> >>>>  void f2fs_truncate_data_blocks(struct dnode_of_data *dn);
> >>>> -int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock);
> >>>> +int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock,
> >>>> +							bool buf_write);
> >>>>  int f2fs_truncate(struct inode *inode);
> >>>>  int f2fs_getattr(const struct path *path, struct kstat *stat,
> >>>>  			u32 request_mask, unsigned int flags);
> >>>> @@ -2827,6 +2846,7 @@ static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode)
> >>>>  int f2fs_inode_dirtied(struct inode *inode, bool sync);
> >>>>  void f2fs_inode_synced(struct inode *inode);
> >>>>  int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly);
> >>>> +int f2fs_quota_sync(struct super_block *sb, int type);
> >>>>  void f2fs_quota_off_umount(struct super_block *sb);
> >>>>  int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover);
> >>>>  int f2fs_sync_fs(struct super_block *sb, int sync);
> >>>> @@ -3025,6 +3045,7 @@ struct page *f2fs_get_lock_data_page(struct inode *inode, pgoff_t index,
> >>>>  struct page *f2fs_get_new_data_page(struct inode *inode,
> >>>>  			struct page *ipage, pgoff_t index, bool new_i_size);
> >>>>  int f2fs_do_write_data_page(struct f2fs_io_info *fio);
> >>>> +void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock);
> >>>>  int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
> >>>>  			int create, int flag);
> >>>>  int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
> >>>> @@ -3456,3 +3477,16 @@ extern void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate,
> >>>>  #endif
> >>>>  
> >>>>  #endif
> >>>> +
> >>>> +static inline bool is_journalled_quota(struct f2fs_sb_info *sbi)
> >>>> +{
> >>>> +#ifdef CONFIG_QUOTA
> >>>> +	if (f2fs_sb_has_quota_ino(sbi->sb))
> >>>> +		return true;
> >>>> +	if (F2FS_OPTION(sbi).s_qf_names[USRQUOTA] ||
> >>>> +		F2FS_OPTION(sbi).s_qf_names[GRPQUOTA] ||
> >>>> +		F2FS_OPTION(sbi).s_qf_names[PRJQUOTA])
> >>>> +		return true;
> >>>> +#endif
> >>>> +	return false;
> >>>> +}
> >>>> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> >>>> index 357422a4c319..a75f3e145bf1 100644
> >>>> --- a/fs/f2fs/file.c
> >>>> +++ b/fs/f2fs/file.c
> >>>> @@ -586,7 +586,8 @@ static int truncate_partial_data_page(struct inode *inode, u64 from,
> >>>>  	return 0;
> >>>>  }
> >>>>  
> >>>> -int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock)
> >>>> +int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock,
> >>>> +							bool buf_write)
> >>>>  {
> >>>>  	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> >>>>  	struct dnode_of_data dn;
> >>>> @@ -594,6 +595,7 @@ int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock)
> >>>>  	int count = 0, err = 0;
> >>>>  	struct page *ipage;
> >>>>  	bool truncate_page = false;
> >>>> +	int flag = buf_write ? F2FS_GET_BLOCK_PRE_AIO : F2FS_GET_BLOCK_PRE_DIO;
> >>>>  
> >>>>  	trace_f2fs_truncate_blocks_enter(inode, from);
> >>>>  
> >>>> @@ -603,7 +605,7 @@ int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock)
> >>>>  		goto free_partial;
> >>>>  
> >>>>  	if (lock)
> >>>> -		f2fs_lock_op(sbi);
> >>>> +		__do_map_lock(sbi, flag, true);
> >>>>  
> >>>>  	ipage = f2fs_get_node_page(sbi, inode->i_ino);
> >>>>  	if (IS_ERR(ipage)) {
> >>>> @@ -641,7 +643,7 @@ int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock)
> >>>>  	err = f2fs_truncate_inode_blocks(inode, free_from);
> >>>>  out:
> >>>>  	if (lock)
> >>>> -		f2fs_unlock_op(sbi);
> >>>> +		__do_map_lock(sbi, flag, false);
> >>>>  free_partial:
> >>>>  	/* lastly zero out the first data page */
> >>>>  	if (!err)
> >>>> @@ -676,7 +678,7 @@ int f2fs_truncate(struct inode *inode)
> >>>>  			return err;
> >>>>  	}
> >>>>  
> >>>> -	err = f2fs_truncate_blocks(inode, i_size_read(inode), true);
> >>>> +	err = f2fs_truncate_blocks(inode, i_size_read(inode), true, false);
> >>>>  	if (err)
> >>>>  		return err;
> >>>>  
> >>>> @@ -785,9 +787,24 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
> >>>>  		!uid_eq(attr->ia_uid, inode->i_uid)) ||
> >>>>  		(attr->ia_valid & ATTR_GID &&
> >>>>  		!gid_eq(attr->ia_gid, inode->i_gid))) {
> >>>> +		f2fs_lock_op(F2FS_I_SB(inode));
> >>>>  		err = dquot_transfer(inode, attr);
> >>>> -		if (err)
> >>>> +		if (err) {
> >>>> +			set_sbi_flag(F2FS_I_SB(inode),
> >>>> +					SBI_QUOTA_NEED_REPAIR);
> >>>> +			f2fs_unlock_op(F2FS_I_SB(inode));
> >>>>  			return err;
> >>>> +		}
> >>>> +		/*
> >>>> +		 * update uid/gid under lock_op(), so that dquot and inode can
> >>>> +		 * be updated atomically.
> >>>> +		 */
> >>>> +		if (attr->ia_valid & ATTR_UID)
> >>>> +			inode->i_uid = attr->ia_uid;
> >>>> +		if (attr->ia_valid & ATTR_GID)
> >>>> +			inode->i_gid = attr->ia_gid;
> >>>> +		f2fs_mark_inode_dirty_sync(inode, true);
> >>>> +		f2fs_unlock_op(F2FS_I_SB(inode));
> >>>>  	}
> >>>>  
> >>>>  	if (attr->ia_valid & ATTR_SIZE) {
> >>>> @@ -1242,7 +1259,7 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
> >>>>  	new_size = i_size_read(inode) - len;
> >>>>  	truncate_pagecache(inode, new_size);
> >>>>  
> >>>> -	ret = f2fs_truncate_blocks(inode, new_size, true);
> >>>> +	ret = f2fs_truncate_blocks(inode, new_size, true, false);
> >>>>  	up_write(&F2FS_I(inode)->i_mmap_sem);
> >>>>  	if (!ret)
> >>>>  		f2fs_i_size_write(inode, new_size);
> >>>> @@ -1427,7 +1444,7 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
> >>>>  	f2fs_balance_fs(sbi, true);
> >>>>  
> >>>>  	down_write(&F2FS_I(inode)->i_mmap_sem);
> >>>> -	ret = f2fs_truncate_blocks(inode, i_size_read(inode), true);
> >>>> +	ret = f2fs_truncate_blocks(inode, i_size_read(inode), true, false);
> >>>>  	up_write(&F2FS_I(inode)->i_mmap_sem);
> >>>>  	if (ret)
> >>>>  		return ret;
> >>>> diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
> >>>> index 425d740f87fd..cb31a719b048 100644
> >>>> --- a/fs/f2fs/inline.c
> >>>> +++ b/fs/f2fs/inline.c
> >>>> @@ -298,7 +298,7 @@ bool f2fs_recover_inline_data(struct inode *inode, struct page *npage)
> >>>>  		clear_inode_flag(inode, FI_INLINE_DATA);
> >>>>  		f2fs_put_page(ipage, 1);
> >>>>  	} else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) {
> >>>> -		if (f2fs_truncate_blocks(inode, 0, false))
> >>>> +		if (f2fs_truncate_blocks(inode, 0, false, false))
> >>>>  			return false;
> >>>>  		goto process_inline;
> >>>>  	}
> >>>> @@ -470,7 +470,7 @@ static int f2fs_add_inline_entries(struct inode *dir, void *inline_dentry)
> >>>>  	return 0;
> >>>>  punch_dentry_pages:
> >>>>  	truncate_inode_pages(&dir->i_data, 0);
> >>>> -	f2fs_truncate_blocks(dir, 0, false);
> >>>> +	f2fs_truncate_blocks(dir, 0, false, false);
> >>>>  	f2fs_remove_dirty_inode(dir);
> >>>>  	return err;
> >>>>  }
> >>>> diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
> >>>> index 86e7333d60c1..3c278e63d1a3 100644
> >>>> --- a/fs/f2fs/inode.c
> >>>> +++ b/fs/f2fs/inode.c
> >>>> @@ -645,7 +645,11 @@ void f2fs_evict_inode(struct inode *inode)
> >>>>  	if (inode->i_nlink || is_bad_inode(inode))
> >>>>  		goto no_delete;
> >>>>  
> >>>> -	dquot_initialize(inode);
> >>>> +	err = dquot_initialize(inode);
> >>>> +	if (err) {
> >>>> +		err = 0;
> >>>> +		set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
> >>>> +	}
> >>>>  
> >>>>  	f2fs_remove_ino_entry(sbi, inode->i_ino, APPEND_INO);
> >>>>  	f2fs_remove_ino_entry(sbi, inode->i_ino, UPDATE_INO);
> >>>> @@ -677,9 +681,10 @@ void f2fs_evict_inode(struct inode *inode)
> >>>>  		goto retry;
> >>>>  	}
> >>>>  
> >>>> -	if (err)
> >>>> +	if (err) {
> >>>>  		f2fs_update_inode_page(inode);
> >>>> -	dquot_free_inode(inode);
> >>>> +		set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
> >>>> +	}
> >>>>  	sb_end_intwrite(inode->i_sb);
> >>>>  no_delete:
> >>>>  	dquot_drop(inode);
> >>>> diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
> >>>> index 9ad451ac2cec..b65491a63115 100644
> >>>> --- a/fs/f2fs/namei.c
> >>>> +++ b/fs/f2fs/namei.c
> >>>> @@ -71,10 +71,6 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
> >>>>  	if (err)
> >>>>  		goto fail_drop;
> >>>>  
> >>>> -	err = dquot_alloc_inode(inode);
> >>>> -	if (err)
> >>>> -		goto fail_drop;
> >>>> -
> >>>>  	set_inode_flag(inode, FI_NEW_INODE);
> >>>>  
> >>>>  	/* If the directory encrypted, then we should encrypt the inode. */
> >>>> diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
> >>>> index 41f2c0fe6d8e..70f05650191e 100644
> >>>> --- a/fs/f2fs/recovery.c
> >>>> +++ b/fs/f2fs/recovery.c
> >>>> @@ -191,6 +191,33 @@ static int recover_dentry(struct inode *inode, struct page *ipage,
> >>>>  	return err;
> >>>>  }
> >>>>  
> >>>> +static int recover_quota_data(struct inode *inode, struct page *page)
> >>>> +{
> >>>> +	struct f2fs_inode *raw = F2FS_INODE(page);
> >>>> +	struct iattr attr;
> >>>> +	uid_t i_uid = le32_to_cpu(raw->i_uid);
> >>>> +	gid_t i_gid = le32_to_cpu(raw->i_gid);
> >>>> +	int err;
> >>>> +
> >>>> +	memset(&attr, 0, sizeof(attr));
> >>>> +
> >>>> +	attr.ia_uid = make_kuid(inode->i_sb->s_user_ns, i_uid);
> >>>> +	attr.ia_gid = make_kgid(inode->i_sb->s_user_ns, i_gid);
> >>>> +
> >>>> +	if (!uid_eq(attr.ia_uid, inode->i_uid))
> >>>> +		attr.ia_valid |= ATTR_UID;
> >>>> +	if (!gid_eq(attr.ia_gid, inode->i_gid))
> >>>> +		attr.ia_valid |= ATTR_GID;
> >>>> +
> >>>> +	if (!attr.ia_valid)
> >>>> +		return 0;
> >>>> +
> >>>> +	err = dquot_transfer(inode, &attr);
> >>>> +	if (err)
> >>>> +		set_sbi_flag(F2FS_I_SB(inode), SBI_QUOTA_NEED_REPAIR);
> >>>> +	return err;
> >>>> +}
> >>>> +
> >>>>  static void recover_inline_flags(struct inode *inode, struct f2fs_inode *ri)
> >>>>  {
> >>>>  	if (ri->i_inline & F2FS_PIN_FILE)
> >>>> @@ -203,12 +230,18 @@ static void recover_inline_flags(struct inode *inode, struct f2fs_inode *ri)
> >>>>  		clear_inode_flag(inode, FI_DATA_EXIST);
> >>>>  }
> >>>>  
> >>>> -static void recover_inode(struct inode *inode, struct page *page)
> >>>> +static int recover_inode(struct inode *inode, struct page *page)
> >>>>  {
> >>>>  	struct f2fs_inode *raw = F2FS_INODE(page);
> >>>>  	char *name;
> >>>> +	int err;
> >>>>  
> >>>>  	inode->i_mode = le16_to_cpu(raw->i_mode);
> >>>> +
> >>>> +	err = recover_quota_data(inode, page);
> >>>> +	if (err)
> >>>> +		return err;
> >>>> +
> >>>>  	i_uid_write(inode, le32_to_cpu(raw->i_uid));
> >>>>  	i_gid_write(inode, le32_to_cpu(raw->i_gid));
> >>>>  	f2fs_i_size_write(inode, le64_to_cpu(raw->i_size));
> >>>> @@ -231,6 +264,7 @@ static void recover_inode(struct inode *inode, struct page *page)
> >>>>  	f2fs_msg(inode->i_sb, KERN_NOTICE,
> >>>>  		"recover_inode: ino = %x, name = %s, inline = %x",
> >>>>  			ino_of_node(page), name, raw->i_inline);
> >>>> +	return 0;
> >>>>  }
> >>>>  
> >>>>  static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
> >>>> @@ -597,8 +631,11 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
> >>>>  		 * In this case, we can lose the latest inode(x).
> >>>>  		 * So, call recover_inode for the inode update.
> >>>>  		 */
> >>>> -		if (IS_INODE(page))
> >>>> -			recover_inode(entry->inode, page);
> >>>> +		if (IS_INODE(page)) {
> >>>> +			err = recover_inode(entry->inode, page);
> >>>> +			if (err)
> >>>> +				break;
> >>>> +		}
> >>>>  		if (entry->last_dentry == blkaddr) {
> >>>>  			err = recover_dentry(entry->inode, page, dir_list);
> >>>>  			if (err) {
> >>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> >>>> index 945468968d4e..3a46c9b81188 100644
> >>>> --- a/fs/f2fs/super.c
> >>>> +++ b/fs/f2fs/super.c
> >>>> @@ -1689,6 +1689,13 @@ static qsize_t *f2fs_get_reserved_space(struct inode *inode)
> >>>>  
> >>>>  static int f2fs_quota_on_mount(struct f2fs_sb_info *sbi, int type)
> >>>>  {
> >>>> +
> >>>> +	if (is_set_ckpt_flags(sbi, CP_QUOTA_NEED_FSCK_FLAG)) {
> >>>> +		f2fs_msg(sbi->sb, KERN_ERR,
> >>>> +			"quota sysfile may be corrupted, skip loading it");
> >>>> +		return 0;
> >>>> +	}
> >>>> +
> >>>>  	return dquot_quota_on_mount(sbi->sb, F2FS_OPTION(sbi).s_qf_names[type],
> >>>>  					F2FS_OPTION(sbi).s_jquota_fmt, type);
> >>>>  }
> >>>> @@ -1759,7 +1766,14 @@ static int f2fs_enable_quotas(struct super_block *sb)
> >>>>  		test_opt(F2FS_SB(sb), PRJQUOTA),
> >>>>  	};
> >>>>  
> >>>> -	sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE | DQUOT_NOLIST_DIRTY;
> >>>> +	if (is_set_ckpt_flags(F2FS_SB(sb), CP_QUOTA_NEED_FSCK_FLAG)) {
> >>>> +		f2fs_msg(sb, KERN_ERR,
> >>>> +			"quota file may be corrupted, skip loading it");
> >>>> +		return 0;
> >>>> +	}
> >>>> +
> >>>> +	sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
> >>>> +
> >>>>  	for (type = 0; type < MAXQUOTAS; type++) {
> >>>>  		qf_inum = f2fs_qf_ino(sb, type);
> >>>>  		if (qf_inum) {
> >>>> @@ -1773,6 +1787,8 @@ static int f2fs_enable_quotas(struct super_block *sb)
> >>>>  					"fsck to fix.", type, err);
> >>>>  				for (type--; type >= 0; type--)
> >>>>  					dquot_quota_off(sb, type);
> >>>> +				set_sbi_flag(F2FS_SB(sb),
> >>>> +						SBI_QUOTA_NEED_REPAIR);
> >>>>  				return err;
> >>>>  			}
> >>>>  		}
> >>>> @@ -1780,35 +1796,51 @@ static int f2fs_enable_quotas(struct super_block *sb)
> >>>>  	return 0;
> >>>>  }
> >>>>  
> >>>> -static int f2fs_quota_sync(struct super_block *sb, int type)
> >>>> +int f2fs_quota_sync(struct super_block *sb, int type)
> >>>>  {
> >>>> +	struct f2fs_sb_info *sbi = F2FS_SB(sb);
> >>>>  	struct quota_info *dqopt = sb_dqopt(sb);
> >>>>  	int cnt;
> >>>>  	int ret;
> >>>>  
> >>>>  	ret = dquot_writeback_dquots(sb, type);
> >>>>  	if (ret)
> >>>> -		return ret;
> >>>> +		goto out;
> >>>>  
> >>>>  	/*
> >>>>  	 * Now when everything is written we can discard the pagecache so
> >>>>  	 * that userspace sees the changes.
> >>>>  	 */
> >>>>  	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
> >>>> +		struct address_space *mapping;
> >>>> +
> >>>>  		if (type != -1 && cnt != type)
> >>>>  			continue;
> >>>>  		if (!sb_has_quota_active(sb, cnt))
> >>>>  			continue;
> >>>>  
> >>>> -		ret = filemap_write_and_wait(dqopt->files[cnt]->i_mapping);
> >>>> +		mapping = dqopt->files[cnt]->i_mapping;
> >>>> +
> >>>> +		ret = filemap_fdatawrite(mapping);
> >>>> +		if (ret)
> >>>> +			goto out;
> >>>> +
> >>>> +		/* if we are using journalled quota */
> >>>> +		if (is_journalled_quota(sbi))
> >>>> +			continue;
> >>>> +
> >>>> +		ret = filemap_fdatawait(mapping);
> >>>>  		if (ret)
> >>>> -			return ret;
> >>>> +			set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
> >>>>  
> >>>>  		inode_lock(dqopt->files[cnt]);
> >>>>  		truncate_inode_pages(&dqopt->files[cnt]->i_data, 0);
> >>>>  		inode_unlock(dqopt->files[cnt]);
> >>>>  	}
> >>>> -	return 0;
> >>>> +out:
> >>>> +	if (ret)
> >>>> +		set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
> >>>> +	return ret;
> >>>>  }
> >>>>  
> >>>>  static int f2fs_quota_on(struct super_block *sb, int type, int format_id,
> >>>> @@ -1877,11 +1909,67 @@ void f2fs_quota_off_umount(struct super_block *sb)
> >>>>  				"Fail to turn off disk quota "
> >>>>  				"(type: %d, err: %d, ret:%d), Please "
> >>>>  				"run fsck to fix it.", type, err, ret);
> >>>> -			set_sbi_flag(F2FS_SB(sb), SBI_NEED_FSCK);
> >>>> +			set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
> >>>>  		}
> >>>>  	}
> >>>>  }
> >>>>  
> >>>> +static int f2fs_dquot_commit(struct dquot *dquot)
> >>>> +{
> >>>> +	int ret;
> >>>> +
> >>>> +	ret = dquot_commit(dquot);
> >>>> +	if (ret < 0)
> >>>> +		set_sbi_flag(F2FS_SB(dquot->dq_sb), SBI_QUOTA_NEED_REPAIR);
> >>>> +	return ret;
> >>>> +}
> >>>> +
> >>>> +static int f2fs_dquot_acquire(struct dquot *dquot)
> >>>> +{
> >>>> +	int ret;
> >>>> +
> >>>> +	ret = dquot_acquire(dquot);
> >>>> +	if (ret < 0)
> >>>> +		set_sbi_flag(F2FS_SB(dquot->dq_sb), SBI_QUOTA_NEED_REPAIR);
> >>>> +
> >>>> +	return ret;
> >>>> +}
> >>>> +
> >>>> +static int f2fs_dquot_release(struct dquot *dquot)
> >>>> +{
> >>>> +	int ret;
> >>>> +
> >>>> +	ret = dquot_release(dquot);
> >>>> +	if (ret < 0)
> >>>> +		set_sbi_flag(F2FS_SB(dquot->dq_sb), SBI_QUOTA_NEED_REPAIR);
> >>>> +	return ret;
> >>>> +}
> >>>> +
> >>>> +static int f2fs_dquot_mark_dquot_dirty(struct dquot *dquot)
> >>>> +{
> >>>> +	struct super_block *sb = dquot->dq_sb;
> >>>> +	struct f2fs_sb_info *sbi = F2FS_SB(sb);
> >>>> +	int ret;
> >>>> +
> >>>> +	ret = dquot_mark_dquot_dirty(dquot);
> >>>> +
> >>>> +	/* if we are using journalled quota */
> >>>> +	if (is_journalled_quota(sbi))
> >>>> +		set_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH);
> >>>> +
> >>>> +	return ret;
> >>>> +}
> >>>> +
> >>>> +static int f2fs_dquot_commit_info(struct super_block *sb, int type)
> >>>> +{
> >>>> +	int ret;
> >>>> +
> >>>> +	ret = dquot_commit_info(sb, type);
> >>>> +	if (ret < 0)
> >>>> +		set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
> >>>> +	return ret;
> >>>> +}
> >>>> +
> >>>>  static int f2fs_get_projid(struct inode *inode, kprojid_t *projid)
> >>>>  {
> >>>>  	*projid = F2FS_I(inode)->i_projid;
> >>>> @@ -1890,11 +1978,11 @@ static int f2fs_get_projid(struct inode *inode, kprojid_t *projid)
> >>>>  
> >>>>  static const struct dquot_operations f2fs_quota_operations = {
> >>>>  	.get_reserved_space = f2fs_get_reserved_space,
> >>>> -	.write_dquot	= dquot_commit,
> >>>> -	.acquire_dquot	= dquot_acquire,
> >>>> -	.release_dquot	= dquot_release,
> >>>> -	.mark_dirty	= dquot_mark_dquot_dirty,
> >>>> -	.write_info	= dquot_commit_info,
> >>>> +	.write_dquot	= f2fs_dquot_commit,
> >>>> +	.acquire_dquot	= f2fs_dquot_acquire,
> >>>> +	.release_dquot	= f2fs_dquot_release,
> >>>> +	.mark_dirty	= f2fs_dquot_mark_dquot_dirty,
> >>>> +	.write_info	= f2fs_dquot_commit_info,
> >>>>  	.alloc_dquot	= dquot_alloc,
> >>>>  	.destroy_dquot	= dquot_destroy,
> >>>>  	.get_projid	= f2fs_get_projid,
> >>>> @@ -1912,6 +2000,11 @@ static const struct quotactl_ops f2fs_quotactl_ops = {
> >>>>  	.get_nextdqblk	= dquot_get_next_dqblk,
> >>>>  };
> >>>>  #else
> >>>> +int f2fs_quota_sync(struct super_block *sb, int type)
> >>>> +{
> >>>> +	return 0;
> >>>> +}
> >>>> +
> >>>>  void f2fs_quota_off_umount(struct super_block *sb)
> >>>>  {
> >>>>  }
> >>>> @@ -2951,6 +3044,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
> >>>>  		goto free_meta_inode;
> >>>>  	}
> >>>>  
> >>>> +	if (__is_set_ckpt_flags(F2FS_CKPT(sbi), CP_QUOTA_NEED_FSCK_FLAG))
> >>>> +		set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
> >>>> +
> >>>>  	/* Initialize device list */
> >>>>  	err = f2fs_scan_devices(sbi);
> >>>>  	if (err) {
> >>>> diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
> >>>> index 1d4b196291d6..aaca9f5d5f5c 100644
> >>>> --- a/include/linux/f2fs_fs.h
> >>>> +++ b/include/linux/f2fs_fs.h
> >>>> @@ -115,6 +115,7 @@ struct f2fs_super_block {
> >>>>  /*
> >>>>   * For checkpoint
> >>>>   */
> >>>> +#define CP_QUOTA_NEED_FSCK_FLAG		0x00000800
> >>>>  #define CP_LARGE_NAT_BITMAP_FLAG	0x00000400
> >>>>  #define CP_NOCRC_RECOVERY_FLAG	0x00000200
> >>>>  #define CP_TRIMMED_FLAG		0x00000100
> >>>> -- 
> >>>> 2.18.0

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v11] f2fs: guarantee journalled quota data by checkpoint
@ 2018-10-01  1:49           ` Jaegeuk Kim
  0 siblings, 0 replies; 18+ messages in thread
From: Jaegeuk Kim @ 2018-10-01  1:49 UTC (permalink / raw)
  To: Chao Yu; +Cc: linux-kernel, linux-f2fs-devel

On 10/01, Chao Yu wrote:
> On 2018-10-1 9:29, Jaegeuk Kim wrote:
> > On 10/01, Chao Yu wrote:
> >> Hi Jaegeuk,
> >>
> >> On 2018-10-1 8:06, Jaegeuk Kim wrote:
> >>> Hi Chao,
> >>>
> >>> This fails on fsstress with godown without fault injection. Could you please
> >>> test a bit? I assumed that this patch should give no fsck failure along with
> >>> valid checkpoint having no flag.
> >>
> >> Okay, let me reproduce with that case.
> >>
> >>>
> >>> BTW, I'm in doubt that f2fs_lock_all covers entire quota modification. What
> >>> about prepare_write_begin() -> f2fs_get_block() ... -> inc_valid_block_count()?
> >>
> >> If quota data changed in above path, we will detect that in below condition:
> >>
> >> block_operation()
> >>
> >> 	down_write(&sbi->node_change);
> >>
> >> 	if (__need_flush_quota(sbi)) {
> >> 		up_write(&sbi->node_change);
> >> 		f2fs_unlock_all(sbi);
> >> 		goto retry_flush_quotas;
> >> 	}
> >>
> >> So there is no problem?
> > 
> > We may need to check quota is dirty, since we have no way to detect by
> > f2fs structures?
> 
> Below condition can check that.
> 
> static bool __need_flush_quota(struct f2fs_sb_info *sbi)
> {
> ...
> 	if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_FLUSH))
> 		return true;
> 	if (get_pages(sbi, F2FS_DIRTY_QDATA))
> 		return true;
> ...
> }
> 
> static int f2fs_dquot_mark_dquot_dirty(struct dquot *dquot)
> {
> ...
> 	ret = dquot_mark_dquot_dirty(dquot);
> 
> 	/* if we are using journalled quota */
> 	if (is_journalled_quota(sbi))
> 		set_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH);
> ...
> }

Okay, then, could you please run the above stress test to reproduce this?
Thanks,

> 
> Thanks,
> 
> > 
> >>
> >> Thanks,
> >>
> >>>
> >>> On 09/20, Chao Yu wrote:
> >>>> From: Chao Yu <yuchao0@huawei.com>
> >>>>
> >>>> For journalled quota mode, let checkpoint to flush dquot dirty data
> >>>> and quota file data to guarntee persistence of all quota sysfile in
> >>>> last checkpoint, by this way, we can avoid corrupting quota sysfile
> >>>> when encountering SPO.
> >>>>
> >>>> The implementation is as below:
> >>>>
> >>>> 1. add a global state SBI_QUOTA_NEED_FLUSH to indicate that there is
> >>>> cached dquot metadata changes in quota subsystem, and later checkpoint
> >>>> should:
> >>>>  a) flush dquot metadata into quota file.
> >>>>  b) flush quota file to storage to keep file usage be consistent.
> >>>>
> >>>> 2. add a global state SBI_QUOTA_NEED_REPAIR to indicate that quota
> >>>> operation failed due to -EIO or -ENOSPC, so later,
> >>>>  a) checkpoint will skip syncing dquot metadata.
> >>>>  b) CP_QUOTA_NEED_FSCK_FLAG will be set in last cp pack to give a
> >>>>     hint for fsck repairing.
> >>>>
> >>>> 3. add a global state SBI_QUOTA_SKIP_FLUSH, in checkpoint, if quota
> >>>> data updating is very heavy, it may cause hungtask in block_operation().
> >>>> To avoid this, if our retry time exceed threshold, let's just skip
> >>>> flushing and retry in next checkpoint().
> >>>>
> >>>> Signed-off-by: Weichao Guo <guoweichao@huawei.com>
> >>>> Signed-off-by: Chao Yu <yuchao0@huawei.com>
> >>>> ---
> >>>> v11:
> >>>> - transfer quota data if fsynced inode's i_{u,g}id changed during
> >>>> recovery.
> >>>>  fs/f2fs/checkpoint.c    |  56 +++++++++++++++++--
> >>>>  fs/f2fs/data.c          |  18 ++++--
> >>>>  fs/f2fs/f2fs.h          |  50 ++++++++++++++---
> >>>>  fs/f2fs/file.c          |  31 ++++++++---
> >>>>  fs/f2fs/inline.c        |   4 +-
> >>>>  fs/f2fs/inode.c         |  11 +++-
> >>>>  fs/f2fs/namei.c         |   4 --
> >>>>  fs/f2fs/recovery.c      |  43 +++++++++++++-
> >>>>  fs/f2fs/super.c         | 120 ++++++++++++++++++++++++++++++++++++----
> >>>>  include/linux/f2fs_fs.h |   1 +
> >>>>  10 files changed, 289 insertions(+), 49 deletions(-)
> >>>>
> >>>> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> >>>> index d312d2829d5a..d624d7983197 100644
> >>>> --- a/fs/f2fs/checkpoint.c
> >>>> +++ b/fs/f2fs/checkpoint.c
> >>>> @@ -1083,6 +1083,21 @@ static void __prepare_cp_block(struct f2fs_sb_info *sbi)
> >>>>  	ckpt->next_free_nid = cpu_to_le32(last_nid);
> >>>>  }
> >>>>  
> >>>> +static bool __need_flush_quota(struct f2fs_sb_info *sbi)
> >>>> +{
> >>>> +	if (!is_journalled_quota(sbi))
> >>>> +		return false;
> >>>> +	if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH))
> >>>> +		return false;
> >>>> +	if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR))
> >>>> +		return false;
> >>>> +	if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_FLUSH))
> >>>> +		return true;
> >>>> +	if (get_pages(sbi, F2FS_DIRTY_QDATA))
> >>>> +		return true;
> >>>> +	return false;
> >>>> +}
> >>>> +
> >>>>  /*
> >>>>   * Freeze all the FS-operations for checkpoint.
> >>>>   */
> >>>> @@ -1094,12 +1109,30 @@ static int block_operations(struct f2fs_sb_info *sbi)
> >>>>  		.for_reclaim = 0,
> >>>>  	};
> >>>>  	struct blk_plug plug;
> >>>> -	int err = 0;
> >>>> +	int err = 0, cnt = 0;
> >>>>  
> >>>>  	blk_start_plug(&plug);
> >>>>  
> >>>> -retry_flush_dents:
> >>>> +retry_flush_quotas:
> >>>> +	if (__need_flush_quota(sbi)) {
> >>>> +		if (++cnt > DEFAULT_RETRY_QUOTA_FLUSH_COUNT) {
> >>>> +			set_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH);
> >>>> +			f2fs_lock_all(sbi);
> >>>> +			goto retry_flush_dents;
> >>>> +		}
> >>>> +		clear_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH);
> >>>> +
> >>>> +		f2fs_quota_sync(sbi->sb, -1);
> >>>> +	}
> >>>> +
> >>>>  	f2fs_lock_all(sbi);
> >>>> +	if (__need_flush_quota(sbi)) {
> >>>> +		f2fs_unlock_all(sbi);
> >>>> +		cond_resched();
> >>>> +		goto retry_flush_quotas;
> >>>> +	}
> >>>> +
> >>>> +retry_flush_dents:
> >>>>  	/* write all the dirty dentry pages */
> >>>>  	if (get_pages(sbi, F2FS_DIRTY_DENTS)) {
> >>>>  		f2fs_unlock_all(sbi);
> >>>> @@ -1107,7 +1140,7 @@ static int block_operations(struct f2fs_sb_info *sbi)
> >>>>  		if (err)
> >>>>  			goto out;
> >>>>  		cond_resched();
> >>>> -		goto retry_flush_dents;
> >>>> +		goto retry_flush_quotas;
> >>>>  	}
> >>>>  
> >>>>  	/*
> >>>> @@ -1116,6 +1149,12 @@ static int block_operations(struct f2fs_sb_info *sbi)
> >>>>  	 */
> >>>>  	down_write(&sbi->node_change);
> >>>>  
> >>>> +	if (__need_flush_quota(sbi)) {
> >>>> +		up_write(&sbi->node_change);
> >>>> +		f2fs_unlock_all(sbi);
> >>>> +		goto retry_flush_quotas;
> >>>> +	}
> >>>> +
> >>>>  	if (get_pages(sbi, F2FS_DIRTY_IMETA)) {
> >>>>  		up_write(&sbi->node_change);
> >>>>  		f2fs_unlock_all(sbi);
> >>>> @@ -1123,7 +1162,7 @@ static int block_operations(struct f2fs_sb_info *sbi)
> >>>>  		if (err)
> >>>>  			goto out;
> >>>>  		cond_resched();
> >>>> -		goto retry_flush_dents;
> >>>> +		goto retry_flush_quotas;
> >>>>  	}
> >>>>  
> >>>>  retry_flush_nodes:
> >>>> @@ -1214,6 +1253,14 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
> >>>>  	if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
> >>>>  		__set_ckpt_flags(ckpt, CP_FSCK_FLAG);
> >>>>  
> >>>> +	if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH))
> >>>> +		__set_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG);
> >>>> +	else
> >>>> +		__clear_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG);
> >>>> +
> >>>> +	if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR))
> >>>> +		__set_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG);
> >>>> +
> >>>>  	/* set this flag to activate crc|cp_ver for recovery */
> >>>>  	__set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG);
> >>>>  	__clear_ckpt_flags(ckpt, CP_NOCRC_RECOVERY_FLAG);
> >>>> @@ -1421,6 +1468,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
> >>>>  
> >>>>  	clear_sbi_flag(sbi, SBI_IS_DIRTY);
> >>>>  	clear_sbi_flag(sbi, SBI_NEED_CP);
> >>>> +	clear_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH);
> >>>>  	__set_cp_next_pack(sbi);
> >>>>  
> >>>>  	/*
> >>>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> >>>> index 57c0823d22e0..b96f8588d565 100644
> >>>> --- a/fs/f2fs/data.c
> >>>> +++ b/fs/f2fs/data.c
> >>>> @@ -46,7 +46,7 @@ static bool __is_cp_guaranteed(struct page *page)
> >>>>  			inode->i_ino ==  F2FS_NODE_INO(sbi) ||
> >>>>  			S_ISDIR(inode->i_mode) ||
> >>>>  			(S_ISREG(inode->i_mode) &&
> >>>> -			is_inode_flag_set(inode, FI_ATOMIC_FILE)) ||
> >>>> +			(f2fs_is_atomic_file(inode) || IS_NOQUOTA(inode))) ||
> >>>>  			is_cold_data(page))
> >>>>  		return true;
> >>>>  	return false;
> >>>> @@ -975,7 +975,7 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
> >>>>  	return err;
> >>>>  }
> >>>>  
> >>>> -static inline void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
> >>>> +void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
> >>>>  {
> >>>>  	if (flag == F2FS_GET_BLOCK_PRE_AIO) {
> >>>>  		if (lock)
> >>>> @@ -1716,6 +1716,8 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
> >>>>  		return true;
> >>>>  	if (S_ISDIR(inode->i_mode))
> >>>>  		return true;
> >>>> +	if (IS_NOQUOTA(inode))
> >>>> +		return true;
> >>>>  	if (f2fs_is_atomic_file(inode))
> >>>>  		return true;
> >>>>  	if (fio) {
> >>>> @@ -1960,7 +1962,7 @@ static int __write_data_page(struct page *page, bool *submitted,
> >>>>  	}
> >>>>  
> >>>>  	unlock_page(page);
> >>>> -	if (!S_ISDIR(inode->i_mode))
> >>>> +	if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode))
> >>>>  		f2fs_balance_fs(sbi, need_balance_fs);
> >>>>  
> >>>>  	if (unlikely(f2fs_cp_error(sbi))) {
> >>>> @@ -2151,6 +2153,8 @@ static inline bool __should_serialize_io(struct inode *inode,
> >>>>  {
> >>>>  	if (!S_ISREG(inode->i_mode))
> >>>>  		return false;
> >>>> +	if (IS_NOQUOTA(inode))
> >>>> +		return false;
> >>>>  	if (wbc->sync_mode != WB_SYNC_ALL)
> >>>>  		return true;
> >>>>  	if (get_dirty_pages(inode) >= SM_I(F2FS_I_SB(inode))->min_seq_blocks)
> >>>> @@ -2180,7 +2184,8 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
> >>>>  	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
> >>>>  		goto skip_write;
> >>>>  
> >>>> -	if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE &&
> >>>> +	if ((S_ISDIR(inode->i_mode) || IS_NOQUOTA(inode)) &&
> >>>> +			wbc->sync_mode == WB_SYNC_NONE &&
> >>>>  			get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) &&
> >>>>  			f2fs_available_free_memory(sbi, DIRTY_DENTS))
> >>>>  		goto skip_write;
> >>>> @@ -2245,7 +2250,7 @@ static void f2fs_write_failed(struct address_space *mapping, loff_t to)
> >>>>  		down_write(&F2FS_I(inode)->i_mmap_sem);
> >>>>  
> >>>>  		truncate_pagecache(inode, i_size);
> >>>> -		f2fs_truncate_blocks(inode, i_size, true);
> >>>> +		f2fs_truncate_blocks(inode, i_size, true, true);
> >>>>  
> >>>>  		up_write(&F2FS_I(inode)->i_mmap_sem);
> >>>>  		up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
> >>>> @@ -2380,7 +2385,8 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
> >>>>  	if (err)
> >>>>  		goto fail;
> >>>>  
> >>>> -	if (need_balance && has_not_enough_free_secs(sbi, 0, 0)) {
> >>>> +	if (need_balance && !IS_NOQUOTA(inode) &&
> >>>> +			has_not_enough_free_secs(sbi, 0, 0)) {
> >>>>  		unlock_page(page);
> >>>>  		f2fs_balance_fs(sbi, true);
> >>>>  		lock_page(page);
> >>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> >>>> index 917b2ca76aac..b5e400be73e0 100644
> >>>> --- a/fs/f2fs/f2fs.h
> >>>> +++ b/fs/f2fs/f2fs.h
> >>>> @@ -525,6 +525,9 @@ enum {
> >>>>  
> >>>>  #define DEFAULT_RETRY_IO_COUNT	8	/* maximum retry read IO count */
> >>>>  
> >>>> +/* maximum retry quota flush count */
> >>>> +#define DEFAULT_RETRY_QUOTA_FLUSH_COUNT		8
> >>>> +
> >>>>  #define F2FS_LINK_MAX	0xffffffff	/* maximum link count per file */
> >>>>  
> >>>>  #define MAX_DIR_RA_PAGES	4	/* maximum ra pages of dir */
> >>>> @@ -1088,6 +1091,9 @@ enum {
> >>>>  	SBI_NEED_CP,				/* need to checkpoint */
> >>>>  	SBI_IS_SHUTDOWN,			/* shutdown by ioctl */
> >>>>  	SBI_IS_RECOVERED,			/* recovered orphan/data */
> >>>> +	SBI_QUOTA_NEED_FLUSH,			/* need to flush quota info in CP */
> >>>> +	SBI_QUOTA_SKIP_FLUSH,			/* skip flushing quota in current CP */
> >>>> +	SBI_QUOTA_NEED_REPAIR,			/* quota file may be corrupted */
> >>>>  };
> >>>>  
> >>>>  enum {
> >>>> @@ -1891,12 +1897,18 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
> >>>>  {
> >>>>  	block_t	valid_block_count;
> >>>>  	unsigned int valid_node_count;
> >>>> -	bool quota = inode && !is_inode;
> >>>> +	int err;
> >>>>  
> >>>> -	if (quota) {
> >>>> -		int ret = dquot_reserve_block(inode, 1);
> >>>> -		if (ret)
> >>>> -			return ret;
> >>>> +	if (is_inode) {
> >>>> +		if (inode) {
> >>>> +			err = dquot_alloc_inode(inode);
> >>>> +			if (err)
> >>>> +				return err;
> >>>> +		}
> >>>> +	} else {
> >>>> +		err = dquot_reserve_block(inode, 1);
> >>>> +		if (err)
> >>>> +			return err;
> >>>>  	}
> >>>>  
> >>>>  	if (time_to_inject(sbi, FAULT_BLOCK)) {
> >>>> @@ -1938,8 +1950,12 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
> >>>>  	return 0;
> >>>>  
> >>>>  enospc:
> >>>> -	if (quota)
> >>>> +	if (is_inode) {
> >>>> +		if (inode)
> >>>> +			dquot_free_inode(inode);
> >>>> +	} else {
> >>>>  		dquot_release_reservation_block(inode, 1);
> >>>> +	}
> >>>>  	return -ENOSPC;
> >>>>  }
> >>>>  
> >>>> @@ -1960,7 +1976,9 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi,
> >>>>  
> >>>>  	spin_unlock(&sbi->stat_lock);
> >>>>  
> >>>> -	if (!is_inode)
> >>>> +	if (is_inode)
> >>>> +		dquot_free_inode(inode);
> >>>> +	else
> >>>>  		f2fs_i_blocks_write(inode, 1, false, true);
> >>>>  }
> >>>>  
> >>>> @@ -2739,7 +2757,8 @@ static inline bool is_valid_data_blkaddr(struct f2fs_sb_info *sbi,
> >>>>   */
> >>>>  int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);
> >>>>  void f2fs_truncate_data_blocks(struct dnode_of_data *dn);
> >>>> -int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock);
> >>>> +int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock,
> >>>> +							bool buf_write);
> >>>>  int f2fs_truncate(struct inode *inode);
> >>>>  int f2fs_getattr(const struct path *path, struct kstat *stat,
> >>>>  			u32 request_mask, unsigned int flags);
> >>>> @@ -2827,6 +2846,7 @@ static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode)
> >>>>  int f2fs_inode_dirtied(struct inode *inode, bool sync);
> >>>>  void f2fs_inode_synced(struct inode *inode);
> >>>>  int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly);
> >>>> +int f2fs_quota_sync(struct super_block *sb, int type);
> >>>>  void f2fs_quota_off_umount(struct super_block *sb);
> >>>>  int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover);
> >>>>  int f2fs_sync_fs(struct super_block *sb, int sync);
> >>>> @@ -3025,6 +3045,7 @@ struct page *f2fs_get_lock_data_page(struct inode *inode, pgoff_t index,
> >>>>  struct page *f2fs_get_new_data_page(struct inode *inode,
> >>>>  			struct page *ipage, pgoff_t index, bool new_i_size);
> >>>>  int f2fs_do_write_data_page(struct f2fs_io_info *fio);
> >>>> +void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock);
> >>>>  int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
> >>>>  			int create, int flag);
> >>>>  int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
> >>>> @@ -3456,3 +3477,16 @@ extern void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate,
> >>>>  #endif
> >>>>  
> >>>>  #endif
> >>>> +
> >>>> +static inline bool is_journalled_quota(struct f2fs_sb_info *sbi)
> >>>> +{
> >>>> +#ifdef CONFIG_QUOTA
> >>>> +	if (f2fs_sb_has_quota_ino(sbi->sb))
> >>>> +		return true;
> >>>> +	if (F2FS_OPTION(sbi).s_qf_names[USRQUOTA] ||
> >>>> +		F2FS_OPTION(sbi).s_qf_names[GRPQUOTA] ||
> >>>> +		F2FS_OPTION(sbi).s_qf_names[PRJQUOTA])
> >>>> +		return true;
> >>>> +#endif
> >>>> +	return false;
> >>>> +}
> >>>> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> >>>> index 357422a4c319..a75f3e145bf1 100644
> >>>> --- a/fs/f2fs/file.c
> >>>> +++ b/fs/f2fs/file.c
> >>>> @@ -586,7 +586,8 @@ static int truncate_partial_data_page(struct inode *inode, u64 from,
> >>>>  	return 0;
> >>>>  }
> >>>>  
> >>>> -int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock)
> >>>> +int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock,
> >>>> +							bool buf_write)
> >>>>  {
> >>>>  	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> >>>>  	struct dnode_of_data dn;
> >>>> @@ -594,6 +595,7 @@ int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock)
> >>>>  	int count = 0, err = 0;
> >>>>  	struct page *ipage;
> >>>>  	bool truncate_page = false;
> >>>> +	int flag = buf_write ? F2FS_GET_BLOCK_PRE_AIO : F2FS_GET_BLOCK_PRE_DIO;
> >>>>  
> >>>>  	trace_f2fs_truncate_blocks_enter(inode, from);
> >>>>  
> >>>> @@ -603,7 +605,7 @@ int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock)
> >>>>  		goto free_partial;
> >>>>  
> >>>>  	if (lock)
> >>>> -		f2fs_lock_op(sbi);
> >>>> +		__do_map_lock(sbi, flag, true);
> >>>>  
> >>>>  	ipage = f2fs_get_node_page(sbi, inode->i_ino);
> >>>>  	if (IS_ERR(ipage)) {
> >>>> @@ -641,7 +643,7 @@ int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock)
> >>>>  	err = f2fs_truncate_inode_blocks(inode, free_from);
> >>>>  out:
> >>>>  	if (lock)
> >>>> -		f2fs_unlock_op(sbi);
> >>>> +		__do_map_lock(sbi, flag, false);
> >>>>  free_partial:
> >>>>  	/* lastly zero out the first data page */
> >>>>  	if (!err)
> >>>> @@ -676,7 +678,7 @@ int f2fs_truncate(struct inode *inode)
> >>>>  			return err;
> >>>>  	}
> >>>>  
> >>>> -	err = f2fs_truncate_blocks(inode, i_size_read(inode), true);
> >>>> +	err = f2fs_truncate_blocks(inode, i_size_read(inode), true, false);
> >>>>  	if (err)
> >>>>  		return err;
> >>>>  
> >>>> @@ -785,9 +787,24 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
> >>>>  		!uid_eq(attr->ia_uid, inode->i_uid)) ||
> >>>>  		(attr->ia_valid & ATTR_GID &&
> >>>>  		!gid_eq(attr->ia_gid, inode->i_gid))) {
> >>>> +		f2fs_lock_op(F2FS_I_SB(inode));
> >>>>  		err = dquot_transfer(inode, attr);
> >>>> -		if (err)
> >>>> +		if (err) {
> >>>> +			set_sbi_flag(F2FS_I_SB(inode),
> >>>> +					SBI_QUOTA_NEED_REPAIR);
> >>>> +			f2fs_unlock_op(F2FS_I_SB(inode));
> >>>>  			return err;
> >>>> +		}
> >>>> +		/*
> >>>> +		 * update uid/gid under lock_op(), so that dquot and inode can
> >>>> +		 * be updated atomically.
> >>>> +		 */
> >>>> +		if (attr->ia_valid & ATTR_UID)
> >>>> +			inode->i_uid = attr->ia_uid;
> >>>> +		if (attr->ia_valid & ATTR_GID)
> >>>> +			inode->i_gid = attr->ia_gid;
> >>>> +		f2fs_mark_inode_dirty_sync(inode, true);
> >>>> +		f2fs_unlock_op(F2FS_I_SB(inode));
> >>>>  	}
> >>>>  
> >>>>  	if (attr->ia_valid & ATTR_SIZE) {
> >>>> @@ -1242,7 +1259,7 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
> >>>>  	new_size = i_size_read(inode) - len;
> >>>>  	truncate_pagecache(inode, new_size);
> >>>>  
> >>>> -	ret = f2fs_truncate_blocks(inode, new_size, true);
> >>>> +	ret = f2fs_truncate_blocks(inode, new_size, true, false);
> >>>>  	up_write(&F2FS_I(inode)->i_mmap_sem);
> >>>>  	if (!ret)
> >>>>  		f2fs_i_size_write(inode, new_size);
> >>>> @@ -1427,7 +1444,7 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
> >>>>  	f2fs_balance_fs(sbi, true);
> >>>>  
> >>>>  	down_write(&F2FS_I(inode)->i_mmap_sem);
> >>>> -	ret = f2fs_truncate_blocks(inode, i_size_read(inode), true);
> >>>> +	ret = f2fs_truncate_blocks(inode, i_size_read(inode), true, false);
> >>>>  	up_write(&F2FS_I(inode)->i_mmap_sem);
> >>>>  	if (ret)
> >>>>  		return ret;
> >>>> diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
> >>>> index 425d740f87fd..cb31a719b048 100644
> >>>> --- a/fs/f2fs/inline.c
> >>>> +++ b/fs/f2fs/inline.c
> >>>> @@ -298,7 +298,7 @@ bool f2fs_recover_inline_data(struct inode *inode, struct page *npage)
> >>>>  		clear_inode_flag(inode, FI_INLINE_DATA);
> >>>>  		f2fs_put_page(ipage, 1);
> >>>>  	} else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) {
> >>>> -		if (f2fs_truncate_blocks(inode, 0, false))
> >>>> +		if (f2fs_truncate_blocks(inode, 0, false, false))
> >>>>  			return false;
> >>>>  		goto process_inline;
> >>>>  	}
> >>>> @@ -470,7 +470,7 @@ static int f2fs_add_inline_entries(struct inode *dir, void *inline_dentry)
> >>>>  	return 0;
> >>>>  punch_dentry_pages:
> >>>>  	truncate_inode_pages(&dir->i_data, 0);
> >>>> -	f2fs_truncate_blocks(dir, 0, false);
> >>>> +	f2fs_truncate_blocks(dir, 0, false, false);
> >>>>  	f2fs_remove_dirty_inode(dir);
> >>>>  	return err;
> >>>>  }
> >>>> diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
> >>>> index 86e7333d60c1..3c278e63d1a3 100644
> >>>> --- a/fs/f2fs/inode.c
> >>>> +++ b/fs/f2fs/inode.c
> >>>> @@ -645,7 +645,11 @@ void f2fs_evict_inode(struct inode *inode)
> >>>>  	if (inode->i_nlink || is_bad_inode(inode))
> >>>>  		goto no_delete;
> >>>>  
> >>>> -	dquot_initialize(inode);
> >>>> +	err = dquot_initialize(inode);
> >>>> +	if (err) {
> >>>> +		err = 0;
> >>>> +		set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
> >>>> +	}
> >>>>  
> >>>>  	f2fs_remove_ino_entry(sbi, inode->i_ino, APPEND_INO);
> >>>>  	f2fs_remove_ino_entry(sbi, inode->i_ino, UPDATE_INO);
> >>>> @@ -677,9 +681,10 @@ void f2fs_evict_inode(struct inode *inode)
> >>>>  		goto retry;
> >>>>  	}
> >>>>  
> >>>> -	if (err)
> >>>> +	if (err) {
> >>>>  		f2fs_update_inode_page(inode);
> >>>> -	dquot_free_inode(inode);
> >>>> +		set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
> >>>> +	}
> >>>>  	sb_end_intwrite(inode->i_sb);
> >>>>  no_delete:
> >>>>  	dquot_drop(inode);
> >>>> diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
> >>>> index 9ad451ac2cec..b65491a63115 100644
> >>>> --- a/fs/f2fs/namei.c
> >>>> +++ b/fs/f2fs/namei.c
> >>>> @@ -71,10 +71,6 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
> >>>>  	if (err)
> >>>>  		goto fail_drop;
> >>>>  
> >>>> -	err = dquot_alloc_inode(inode);
> >>>> -	if (err)
> >>>> -		goto fail_drop;
> >>>> -
> >>>>  	set_inode_flag(inode, FI_NEW_INODE);
> >>>>  
> >>>>  	/* If the directory encrypted, then we should encrypt the inode. */
> >>>> diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
> >>>> index 41f2c0fe6d8e..70f05650191e 100644
> >>>> --- a/fs/f2fs/recovery.c
> >>>> +++ b/fs/f2fs/recovery.c
> >>>> @@ -191,6 +191,33 @@ static int recover_dentry(struct inode *inode, struct page *ipage,
> >>>>  	return err;
> >>>>  }
> >>>>  
> >>>> +static int recover_quota_data(struct inode *inode, struct page *page)
> >>>> +{
> >>>> +	struct f2fs_inode *raw = F2FS_INODE(page);
> >>>> +	struct iattr attr;
> >>>> +	uid_t i_uid = le32_to_cpu(raw->i_uid);
> >>>> +	gid_t i_gid = le32_to_cpu(raw->i_gid);
> >>>> +	int err;
> >>>> +
> >>>> +	memset(&attr, 0, sizeof(attr));
> >>>> +
> >>>> +	attr.ia_uid = make_kuid(inode->i_sb->s_user_ns, i_uid);
> >>>> +	attr.ia_gid = make_kgid(inode->i_sb->s_user_ns, i_gid);
> >>>> +
> >>>> +	if (!uid_eq(attr.ia_uid, inode->i_uid))
> >>>> +		attr.ia_valid |= ATTR_UID;
> >>>> +	if (!gid_eq(attr.ia_gid, inode->i_gid))
> >>>> +		attr.ia_valid |= ATTR_GID;
> >>>> +
> >>>> +	if (!attr.ia_valid)
> >>>> +		return 0;
> >>>> +
> >>>> +	err = dquot_transfer(inode, &attr);
> >>>> +	if (err)
> >>>> +		set_sbi_flag(F2FS_I_SB(inode), SBI_QUOTA_NEED_REPAIR);
> >>>> +	return err;
> >>>> +}
> >>>> +
> >>>>  static void recover_inline_flags(struct inode *inode, struct f2fs_inode *ri)
> >>>>  {
> >>>>  	if (ri->i_inline & F2FS_PIN_FILE)
> >>>> @@ -203,12 +230,18 @@ static void recover_inline_flags(struct inode *inode, struct f2fs_inode *ri)
> >>>>  		clear_inode_flag(inode, FI_DATA_EXIST);
> >>>>  }
> >>>>  
> >>>> -static void recover_inode(struct inode *inode, struct page *page)
> >>>> +static int recover_inode(struct inode *inode, struct page *page)
> >>>>  {
> >>>>  	struct f2fs_inode *raw = F2FS_INODE(page);
> >>>>  	char *name;
> >>>> +	int err;
> >>>>  
> >>>>  	inode->i_mode = le16_to_cpu(raw->i_mode);
> >>>> +
> >>>> +	err = recover_quota_data(inode, page);
> >>>> +	if (err)
> >>>> +		return err;
> >>>> +
> >>>>  	i_uid_write(inode, le32_to_cpu(raw->i_uid));
> >>>>  	i_gid_write(inode, le32_to_cpu(raw->i_gid));
> >>>>  	f2fs_i_size_write(inode, le64_to_cpu(raw->i_size));
> >>>> @@ -231,6 +264,7 @@ static void recover_inode(struct inode *inode, struct page *page)
> >>>>  	f2fs_msg(inode->i_sb, KERN_NOTICE,
> >>>>  		"recover_inode: ino = %x, name = %s, inline = %x",
> >>>>  			ino_of_node(page), name, raw->i_inline);
> >>>> +	return 0;
> >>>>  }
> >>>>  
> >>>>  static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
> >>>> @@ -597,8 +631,11 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
> >>>>  		 * In this case, we can lose the latest inode(x).
> >>>>  		 * So, call recover_inode for the inode update.
> >>>>  		 */
> >>>> -		if (IS_INODE(page))
> >>>> -			recover_inode(entry->inode, page);
> >>>> +		if (IS_INODE(page)) {
> >>>> +			err = recover_inode(entry->inode, page);
> >>>> +			if (err)
> >>>> +				break;
> >>>> +		}
> >>>>  		if (entry->last_dentry == blkaddr) {
> >>>>  			err = recover_dentry(entry->inode, page, dir_list);
> >>>>  			if (err) {
> >>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> >>>> index 945468968d4e..3a46c9b81188 100644
> >>>> --- a/fs/f2fs/super.c
> >>>> +++ b/fs/f2fs/super.c
> >>>> @@ -1689,6 +1689,13 @@ static qsize_t *f2fs_get_reserved_space(struct inode *inode)
> >>>>  
> >>>>  static int f2fs_quota_on_mount(struct f2fs_sb_info *sbi, int type)
> >>>>  {
> >>>> +
> >>>> +	if (is_set_ckpt_flags(sbi, CP_QUOTA_NEED_FSCK_FLAG)) {
> >>>> +		f2fs_msg(sbi->sb, KERN_ERR,
> >>>> +			"quota sysfile may be corrupted, skip loading it");
> >>>> +		return 0;
> >>>> +	}
> >>>> +
> >>>>  	return dquot_quota_on_mount(sbi->sb, F2FS_OPTION(sbi).s_qf_names[type],
> >>>>  					F2FS_OPTION(sbi).s_jquota_fmt, type);
> >>>>  }
> >>>> @@ -1759,7 +1766,14 @@ static int f2fs_enable_quotas(struct super_block *sb)
> >>>>  		test_opt(F2FS_SB(sb), PRJQUOTA),
> >>>>  	};
> >>>>  
> >>>> -	sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE | DQUOT_NOLIST_DIRTY;
> >>>> +	if (is_set_ckpt_flags(F2FS_SB(sb), CP_QUOTA_NEED_FSCK_FLAG)) {
> >>>> +		f2fs_msg(sb, KERN_ERR,
> >>>> +			"quota file may be corrupted, skip loading it");
> >>>> +		return 0;
> >>>> +	}
> >>>> +
> >>>> +	sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
> >>>> +
> >>>>  	for (type = 0; type < MAXQUOTAS; type++) {
> >>>>  		qf_inum = f2fs_qf_ino(sb, type);
> >>>>  		if (qf_inum) {
> >>>> @@ -1773,6 +1787,8 @@ static int f2fs_enable_quotas(struct super_block *sb)
> >>>>  					"fsck to fix.", type, err);
> >>>>  				for (type--; type >= 0; type--)
> >>>>  					dquot_quota_off(sb, type);
> >>>> +				set_sbi_flag(F2FS_SB(sb),
> >>>> +						SBI_QUOTA_NEED_REPAIR);
> >>>>  				return err;
> >>>>  			}
> >>>>  		}
> >>>> @@ -1780,35 +1796,51 @@ static int f2fs_enable_quotas(struct super_block *sb)
> >>>>  	return 0;
> >>>>  }
> >>>>  
> >>>> -static int f2fs_quota_sync(struct super_block *sb, int type)
> >>>> +int f2fs_quota_sync(struct super_block *sb, int type)
> >>>>  {
> >>>> +	struct f2fs_sb_info *sbi = F2FS_SB(sb);
> >>>>  	struct quota_info *dqopt = sb_dqopt(sb);
> >>>>  	int cnt;
> >>>>  	int ret;
> >>>>  
> >>>>  	ret = dquot_writeback_dquots(sb, type);
> >>>>  	if (ret)
> >>>> -		return ret;
> >>>> +		goto out;
> >>>>  
> >>>>  	/*
> >>>>  	 * Now when everything is written we can discard the pagecache so
> >>>>  	 * that userspace sees the changes.
> >>>>  	 */
> >>>>  	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
> >>>> +		struct address_space *mapping;
> >>>> +
> >>>>  		if (type != -1 && cnt != type)
> >>>>  			continue;
> >>>>  		if (!sb_has_quota_active(sb, cnt))
> >>>>  			continue;
> >>>>  
> >>>> -		ret = filemap_write_and_wait(dqopt->files[cnt]->i_mapping);
> >>>> +		mapping = dqopt->files[cnt]->i_mapping;
> >>>> +
> >>>> +		ret = filemap_fdatawrite(mapping);
> >>>> +		if (ret)
> >>>> +			goto out;
> >>>> +
> >>>> +		/* if we are using journalled quota */
> >>>> +		if (is_journalled_quota(sbi))
> >>>> +			continue;
> >>>> +
> >>>> +		ret = filemap_fdatawait(mapping);
> >>>>  		if (ret)
> >>>> -			return ret;
> >>>> +			set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
> >>>>  
> >>>>  		inode_lock(dqopt->files[cnt]);
> >>>>  		truncate_inode_pages(&dqopt->files[cnt]->i_data, 0);
> >>>>  		inode_unlock(dqopt->files[cnt]);
> >>>>  	}
> >>>> -	return 0;
> >>>> +out:
> >>>> +	if (ret)
> >>>> +		set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
> >>>> +	return ret;
> >>>>  }
> >>>>  
> >>>>  static int f2fs_quota_on(struct super_block *sb, int type, int format_id,
> >>>> @@ -1877,11 +1909,67 @@ void f2fs_quota_off_umount(struct super_block *sb)
> >>>>  				"Fail to turn off disk quota "
> >>>>  				"(type: %d, err: %d, ret:%d), Please "
> >>>>  				"run fsck to fix it.", type, err, ret);
> >>>> -			set_sbi_flag(F2FS_SB(sb), SBI_NEED_FSCK);
> >>>> +			set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
> >>>>  		}
> >>>>  	}
> >>>>  }
> >>>>  
> >>>> +static int f2fs_dquot_commit(struct dquot *dquot)
> >>>> +{
> >>>> +	int ret;
> >>>> +
> >>>> +	ret = dquot_commit(dquot);
> >>>> +	if (ret < 0)
> >>>> +		set_sbi_flag(F2FS_SB(dquot->dq_sb), SBI_QUOTA_NEED_REPAIR);
> >>>> +	return ret;
> >>>> +}
> >>>> +
> >>>> +static int f2fs_dquot_acquire(struct dquot *dquot)
> >>>> +{
> >>>> +	int ret;
> >>>> +
> >>>> +	ret = dquot_acquire(dquot);
> >>>> +	if (ret < 0)
> >>>> +		set_sbi_flag(F2FS_SB(dquot->dq_sb), SBI_QUOTA_NEED_REPAIR);
> >>>> +
> >>>> +	return ret;
> >>>> +}
> >>>> +
> >>>> +static int f2fs_dquot_release(struct dquot *dquot)
> >>>> +{
> >>>> +	int ret;
> >>>> +
> >>>> +	ret = dquot_release(dquot);
> >>>> +	if (ret < 0)
> >>>> +		set_sbi_flag(F2FS_SB(dquot->dq_sb), SBI_QUOTA_NEED_REPAIR);
> >>>> +	return ret;
> >>>> +}
> >>>> +
> >>>> +static int f2fs_dquot_mark_dquot_dirty(struct dquot *dquot)
> >>>> +{
> >>>> +	struct super_block *sb = dquot->dq_sb;
> >>>> +	struct f2fs_sb_info *sbi = F2FS_SB(sb);
> >>>> +	int ret;
> >>>> +
> >>>> +	ret = dquot_mark_dquot_dirty(dquot);
> >>>> +
> >>>> +	/* if we are using journalled quota */
> >>>> +	if (is_journalled_quota(sbi))
> >>>> +		set_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH);
> >>>> +
> >>>> +	return ret;
> >>>> +}
> >>>> +
> >>>> +static int f2fs_dquot_commit_info(struct super_block *sb, int type)
> >>>> +{
> >>>> +	int ret;
> >>>> +
> >>>> +	ret = dquot_commit_info(sb, type);
> >>>> +	if (ret < 0)
> >>>> +		set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
> >>>> +	return ret;
> >>>> +}
> >>>> +
> >>>>  static int f2fs_get_projid(struct inode *inode, kprojid_t *projid)
> >>>>  {
> >>>>  	*projid = F2FS_I(inode)->i_projid;
> >>>> @@ -1890,11 +1978,11 @@ static int f2fs_get_projid(struct inode *inode, kprojid_t *projid)
> >>>>  
> >>>>  static const struct dquot_operations f2fs_quota_operations = {
> >>>>  	.get_reserved_space = f2fs_get_reserved_space,
> >>>> -	.write_dquot	= dquot_commit,
> >>>> -	.acquire_dquot	= dquot_acquire,
> >>>> -	.release_dquot	= dquot_release,
> >>>> -	.mark_dirty	= dquot_mark_dquot_dirty,
> >>>> -	.write_info	= dquot_commit_info,
> >>>> +	.write_dquot	= f2fs_dquot_commit,
> >>>> +	.acquire_dquot	= f2fs_dquot_acquire,
> >>>> +	.release_dquot	= f2fs_dquot_release,
> >>>> +	.mark_dirty	= f2fs_dquot_mark_dquot_dirty,
> >>>> +	.write_info	= f2fs_dquot_commit_info,
> >>>>  	.alloc_dquot	= dquot_alloc,
> >>>>  	.destroy_dquot	= dquot_destroy,
> >>>>  	.get_projid	= f2fs_get_projid,
> >>>> @@ -1912,6 +2000,11 @@ static const struct quotactl_ops f2fs_quotactl_ops = {
> >>>>  	.get_nextdqblk	= dquot_get_next_dqblk,
> >>>>  };
> >>>>  #else
> >>>> +int f2fs_quota_sync(struct super_block *sb, int type)
> >>>> +{
> >>>> +	return 0;
> >>>> +}
> >>>> +
> >>>>  void f2fs_quota_off_umount(struct super_block *sb)
> >>>>  {
> >>>>  }
> >>>> @@ -2951,6 +3044,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
> >>>>  		goto free_meta_inode;
> >>>>  	}
> >>>>  
> >>>> +	if (__is_set_ckpt_flags(F2FS_CKPT(sbi), CP_QUOTA_NEED_FSCK_FLAG))
> >>>> +		set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
> >>>> +
> >>>>  	/* Initialize device list */
> >>>>  	err = f2fs_scan_devices(sbi);
> >>>>  	if (err) {
> >>>> diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
> >>>> index 1d4b196291d6..aaca9f5d5f5c 100644
> >>>> --- a/include/linux/f2fs_fs.h
> >>>> +++ b/include/linux/f2fs_fs.h
> >>>> @@ -115,6 +115,7 @@ struct f2fs_super_block {
> >>>>  /*
> >>>>   * For checkpoint
> >>>>   */
> >>>> +#define CP_QUOTA_NEED_FSCK_FLAG		0x00000800
> >>>>  #define CP_LARGE_NAT_BITMAP_FLAG	0x00000400
> >>>>  #define CP_NOCRC_RECOVERY_FLAG	0x00000200
> >>>>  #define CP_TRIMMED_FLAG		0x00000100
> >>>> -- 
> >>>> 2.18.0

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v11] f2fs: guarantee journalled quota data by checkpoint
  2018-10-01  1:49           ` Jaegeuk Kim
@ 2018-10-01  2:04             ` Chao Yu
  -1 siblings, 0 replies; 18+ messages in thread
From: Chao Yu @ 2018-10-01  2:04 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: linux-f2fs-devel, linux-kernel, Chao Yu, Weichao Guo

On 2018-10-1 9:49, Jaegeuk Kim wrote:
> On 10/01, Chao Yu wrote:
>> On 2018-10-1 9:29, Jaegeuk Kim wrote:
>>> On 10/01, Chao Yu wrote:
>>>> Hi Jaegeuk,
>>>>
>>>> On 2018-10-1 8:06, Jaegeuk Kim wrote:
>>>>> Hi Chao,
>>>>>
>>>>> This fails on fsstress with godown without fault injection. Could you please
>>>>> test a bit? I assumed that this patch should give no fsck failure along with
>>>>> valid checkpoint having no flag.
>>>>
>>>> Okay, let me reproduce with that case.
>>>>
>>>>>
>>>>> BTW, I'm in doubt that f2fs_lock_all covers entire quota modification. What
>>>>> about prepare_write_begin() -> f2fs_get_block() ... -> inc_valid_block_count()?
>>>>
>>>> If quota data changed in above path, we will detect that in below condition:
>>>>
>>>> block_operation()
>>>>
>>>> 	down_write(&sbi->node_change);
>>>>
>>>> 	if (__need_flush_quota(sbi)) {
>>>> 		up_write(&sbi->node_change);
>>>> 		f2fs_unlock_all(sbi);
>>>> 		goto retry_flush_quotas;
>>>> 	}
>>>>
>>>> So there is no problem?
>>>
>>> We may need to check quota is dirty, since we have no way to detect by
>>> f2fs structures?
>>
>> Below condition can check that.
>>
>> static bool __need_flush_quota(struct f2fs_sb_info *sbi)
>> {
>> ...
>> 	if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_FLUSH))
>> 		return true;
>> 	if (get_pages(sbi, F2FS_DIRTY_QDATA))
>> 		return true;
>> ...
>> }
>>
>> static int f2fs_dquot_mark_dquot_dirty(struct dquot *dquot)
>> {
>> ...
>> 	ret = dquot_mark_dquot_dirty(dquot);
>>
>> 	/* if we are using journalled quota */
>> 	if (is_journalled_quota(sbi))
>> 		set_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH);
>> ...
>> }
> 
> Okay, then, could you please run the above stress test to reproduce this?

Sure, let me try this case and fix it.

Could you check other patches in mailing list, and test them instead?

Thanks,

> Thanks,
> 
>>
>> Thanks,
>>
>>>
>>>>
>>>> Thanks,
>>>>
>>>>>
>>>>> On 09/20, Chao Yu wrote:
>>>>>> From: Chao Yu <yuchao0@huawei.com>
>>>>>>
>>>>>> For journalled quota mode, let checkpoint to flush dquot dirty data
>>>>>> and quota file data to guarntee persistence of all quota sysfile in
>>>>>> last checkpoint, by this way, we can avoid corrupting quota sysfile
>>>>>> when encountering SPO.
>>>>>>
>>>>>> The implementation is as below:
>>>>>>
>>>>>> 1. add a global state SBI_QUOTA_NEED_FLUSH to indicate that there is
>>>>>> cached dquot metadata changes in quota subsystem, and later checkpoint
>>>>>> should:
>>>>>>  a) flush dquot metadata into quota file.
>>>>>>  b) flush quota file to storage to keep file usage be consistent.
>>>>>>
>>>>>> 2. add a global state SBI_QUOTA_NEED_REPAIR to indicate that quota
>>>>>> operation failed due to -EIO or -ENOSPC, so later,
>>>>>>  a) checkpoint will skip syncing dquot metadata.
>>>>>>  b) CP_QUOTA_NEED_FSCK_FLAG will be set in last cp pack to give a
>>>>>>     hint for fsck repairing.
>>>>>>
>>>>>> 3. add a global state SBI_QUOTA_SKIP_FLUSH, in checkpoint, if quota
>>>>>> data updating is very heavy, it may cause hungtask in block_operation().
>>>>>> To avoid this, if our retry time exceed threshold, let's just skip
>>>>>> flushing and retry in next checkpoint().
>>>>>>
>>>>>> Signed-off-by: Weichao Guo <guoweichao@huawei.com>
>>>>>> Signed-off-by: Chao Yu <yuchao0@huawei.com>
>>>>>> ---
>>>>>> v11:
>>>>>> - transfer quota data if fsynced inode's i_{u,g}id changed during
>>>>>> recovery.
>>>>>>  fs/f2fs/checkpoint.c    |  56 +++++++++++++++++--
>>>>>>  fs/f2fs/data.c          |  18 ++++--
>>>>>>  fs/f2fs/f2fs.h          |  50 ++++++++++++++---
>>>>>>  fs/f2fs/file.c          |  31 ++++++++---
>>>>>>  fs/f2fs/inline.c        |   4 +-
>>>>>>  fs/f2fs/inode.c         |  11 +++-
>>>>>>  fs/f2fs/namei.c         |   4 --
>>>>>>  fs/f2fs/recovery.c      |  43 +++++++++++++-
>>>>>>  fs/f2fs/super.c         | 120 ++++++++++++++++++++++++++++++++++++----
>>>>>>  include/linux/f2fs_fs.h |   1 +
>>>>>>  10 files changed, 289 insertions(+), 49 deletions(-)
>>>>>>
>>>>>> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
>>>>>> index d312d2829d5a..d624d7983197 100644
>>>>>> --- a/fs/f2fs/checkpoint.c
>>>>>> +++ b/fs/f2fs/checkpoint.c
>>>>>> @@ -1083,6 +1083,21 @@ static void __prepare_cp_block(struct f2fs_sb_info *sbi)
>>>>>>  	ckpt->next_free_nid = cpu_to_le32(last_nid);
>>>>>>  }
>>>>>>  
>>>>>> +static bool __need_flush_quota(struct f2fs_sb_info *sbi)
>>>>>> +{
>>>>>> +	if (!is_journalled_quota(sbi))
>>>>>> +		return false;
>>>>>> +	if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH))
>>>>>> +		return false;
>>>>>> +	if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR))
>>>>>> +		return false;
>>>>>> +	if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_FLUSH))
>>>>>> +		return true;
>>>>>> +	if (get_pages(sbi, F2FS_DIRTY_QDATA))
>>>>>> +		return true;
>>>>>> +	return false;
>>>>>> +}
>>>>>> +
>>>>>>  /*
>>>>>>   * Freeze all the FS-operations for checkpoint.
>>>>>>   */
>>>>>> @@ -1094,12 +1109,30 @@ static int block_operations(struct f2fs_sb_info *sbi)
>>>>>>  		.for_reclaim = 0,
>>>>>>  	};
>>>>>>  	struct blk_plug plug;
>>>>>> -	int err = 0;
>>>>>> +	int err = 0, cnt = 0;
>>>>>>  
>>>>>>  	blk_start_plug(&plug);
>>>>>>  
>>>>>> -retry_flush_dents:
>>>>>> +retry_flush_quotas:
>>>>>> +	if (__need_flush_quota(sbi)) {
>>>>>> +		if (++cnt > DEFAULT_RETRY_QUOTA_FLUSH_COUNT) {
>>>>>> +			set_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH);
>>>>>> +			f2fs_lock_all(sbi);
>>>>>> +			goto retry_flush_dents;
>>>>>> +		}
>>>>>> +		clear_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH);
>>>>>> +
>>>>>> +		f2fs_quota_sync(sbi->sb, -1);
>>>>>> +	}
>>>>>> +
>>>>>>  	f2fs_lock_all(sbi);
>>>>>> +	if (__need_flush_quota(sbi)) {
>>>>>> +		f2fs_unlock_all(sbi);
>>>>>> +		cond_resched();
>>>>>> +		goto retry_flush_quotas;
>>>>>> +	}
>>>>>> +
>>>>>> +retry_flush_dents:
>>>>>>  	/* write all the dirty dentry pages */
>>>>>>  	if (get_pages(sbi, F2FS_DIRTY_DENTS)) {
>>>>>>  		f2fs_unlock_all(sbi);
>>>>>> @@ -1107,7 +1140,7 @@ static int block_operations(struct f2fs_sb_info *sbi)
>>>>>>  		if (err)
>>>>>>  			goto out;
>>>>>>  		cond_resched();
>>>>>> -		goto retry_flush_dents;
>>>>>> +		goto retry_flush_quotas;
>>>>>>  	}
>>>>>>  
>>>>>>  	/*
>>>>>> @@ -1116,6 +1149,12 @@ static int block_operations(struct f2fs_sb_info *sbi)
>>>>>>  	 */
>>>>>>  	down_write(&sbi->node_change);
>>>>>>  
>>>>>> +	if (__need_flush_quota(sbi)) {
>>>>>> +		up_write(&sbi->node_change);
>>>>>> +		f2fs_unlock_all(sbi);
>>>>>> +		goto retry_flush_quotas;
>>>>>> +	}
>>>>>> +
>>>>>>  	if (get_pages(sbi, F2FS_DIRTY_IMETA)) {
>>>>>>  		up_write(&sbi->node_change);
>>>>>>  		f2fs_unlock_all(sbi);
>>>>>> @@ -1123,7 +1162,7 @@ static int block_operations(struct f2fs_sb_info *sbi)
>>>>>>  		if (err)
>>>>>>  			goto out;
>>>>>>  		cond_resched();
>>>>>> -		goto retry_flush_dents;
>>>>>> +		goto retry_flush_quotas;
>>>>>>  	}
>>>>>>  
>>>>>>  retry_flush_nodes:
>>>>>> @@ -1214,6 +1253,14 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>>>>>>  	if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
>>>>>>  		__set_ckpt_flags(ckpt, CP_FSCK_FLAG);
>>>>>>  
>>>>>> +	if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH))
>>>>>> +		__set_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG);
>>>>>> +	else
>>>>>> +		__clear_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG);
>>>>>> +
>>>>>> +	if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR))
>>>>>> +		__set_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG);
>>>>>> +
>>>>>>  	/* set this flag to activate crc|cp_ver for recovery */
>>>>>>  	__set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG);
>>>>>>  	__clear_ckpt_flags(ckpt, CP_NOCRC_RECOVERY_FLAG);
>>>>>> @@ -1421,6 +1468,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>>>>>>  
>>>>>>  	clear_sbi_flag(sbi, SBI_IS_DIRTY);
>>>>>>  	clear_sbi_flag(sbi, SBI_NEED_CP);
>>>>>> +	clear_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH);
>>>>>>  	__set_cp_next_pack(sbi);
>>>>>>  
>>>>>>  	/*
>>>>>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
>>>>>> index 57c0823d22e0..b96f8588d565 100644
>>>>>> --- a/fs/f2fs/data.c
>>>>>> +++ b/fs/f2fs/data.c
>>>>>> @@ -46,7 +46,7 @@ static bool __is_cp_guaranteed(struct page *page)
>>>>>>  			inode->i_ino ==  F2FS_NODE_INO(sbi) ||
>>>>>>  			S_ISDIR(inode->i_mode) ||
>>>>>>  			(S_ISREG(inode->i_mode) &&
>>>>>> -			is_inode_flag_set(inode, FI_ATOMIC_FILE)) ||
>>>>>> +			(f2fs_is_atomic_file(inode) || IS_NOQUOTA(inode))) ||
>>>>>>  			is_cold_data(page))
>>>>>>  		return true;
>>>>>>  	return false;
>>>>>> @@ -975,7 +975,7 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
>>>>>>  	return err;
>>>>>>  }
>>>>>>  
>>>>>> -static inline void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
>>>>>> +void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
>>>>>>  {
>>>>>>  	if (flag == F2FS_GET_BLOCK_PRE_AIO) {
>>>>>>  		if (lock)
>>>>>> @@ -1716,6 +1716,8 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
>>>>>>  		return true;
>>>>>>  	if (S_ISDIR(inode->i_mode))
>>>>>>  		return true;
>>>>>> +	if (IS_NOQUOTA(inode))
>>>>>> +		return true;
>>>>>>  	if (f2fs_is_atomic_file(inode))
>>>>>>  		return true;
>>>>>>  	if (fio) {
>>>>>> @@ -1960,7 +1962,7 @@ static int __write_data_page(struct page *page, bool *submitted,
>>>>>>  	}
>>>>>>  
>>>>>>  	unlock_page(page);
>>>>>> -	if (!S_ISDIR(inode->i_mode))
>>>>>> +	if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode))
>>>>>>  		f2fs_balance_fs(sbi, need_balance_fs);
>>>>>>  
>>>>>>  	if (unlikely(f2fs_cp_error(sbi))) {
>>>>>> @@ -2151,6 +2153,8 @@ static inline bool __should_serialize_io(struct inode *inode,
>>>>>>  {
>>>>>>  	if (!S_ISREG(inode->i_mode))
>>>>>>  		return false;
>>>>>> +	if (IS_NOQUOTA(inode))
>>>>>> +		return false;
>>>>>>  	if (wbc->sync_mode != WB_SYNC_ALL)
>>>>>>  		return true;
>>>>>>  	if (get_dirty_pages(inode) >= SM_I(F2FS_I_SB(inode))->min_seq_blocks)
>>>>>> @@ -2180,7 +2184,8 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
>>>>>>  	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
>>>>>>  		goto skip_write;
>>>>>>  
>>>>>> -	if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE &&
>>>>>> +	if ((S_ISDIR(inode->i_mode) || IS_NOQUOTA(inode)) &&
>>>>>> +			wbc->sync_mode == WB_SYNC_NONE &&
>>>>>>  			get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) &&
>>>>>>  			f2fs_available_free_memory(sbi, DIRTY_DENTS))
>>>>>>  		goto skip_write;
>>>>>> @@ -2245,7 +2250,7 @@ static void f2fs_write_failed(struct address_space *mapping, loff_t to)
>>>>>>  		down_write(&F2FS_I(inode)->i_mmap_sem);
>>>>>>  
>>>>>>  		truncate_pagecache(inode, i_size);
>>>>>> -		f2fs_truncate_blocks(inode, i_size, true);
>>>>>> +		f2fs_truncate_blocks(inode, i_size, true, true);
>>>>>>  
>>>>>>  		up_write(&F2FS_I(inode)->i_mmap_sem);
>>>>>>  		up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
>>>>>> @@ -2380,7 +2385,8 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
>>>>>>  	if (err)
>>>>>>  		goto fail;
>>>>>>  
>>>>>> -	if (need_balance && has_not_enough_free_secs(sbi, 0, 0)) {
>>>>>> +	if (need_balance && !IS_NOQUOTA(inode) &&
>>>>>> +			has_not_enough_free_secs(sbi, 0, 0)) {
>>>>>>  		unlock_page(page);
>>>>>>  		f2fs_balance_fs(sbi, true);
>>>>>>  		lock_page(page);
>>>>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>>>>> index 917b2ca76aac..b5e400be73e0 100644
>>>>>> --- a/fs/f2fs/f2fs.h
>>>>>> +++ b/fs/f2fs/f2fs.h
>>>>>> @@ -525,6 +525,9 @@ enum {
>>>>>>  
>>>>>>  #define DEFAULT_RETRY_IO_COUNT	8	/* maximum retry read IO count */
>>>>>>  
>>>>>> +/* maximum retry quota flush count */
>>>>>> +#define DEFAULT_RETRY_QUOTA_FLUSH_COUNT		8
>>>>>> +
>>>>>>  #define F2FS_LINK_MAX	0xffffffff	/* maximum link count per file */
>>>>>>  
>>>>>>  #define MAX_DIR_RA_PAGES	4	/* maximum ra pages of dir */
>>>>>> @@ -1088,6 +1091,9 @@ enum {
>>>>>>  	SBI_NEED_CP,				/* need to checkpoint */
>>>>>>  	SBI_IS_SHUTDOWN,			/* shutdown by ioctl */
>>>>>>  	SBI_IS_RECOVERED,			/* recovered orphan/data */
>>>>>> +	SBI_QUOTA_NEED_FLUSH,			/* need to flush quota info in CP */
>>>>>> +	SBI_QUOTA_SKIP_FLUSH,			/* skip flushing quota in current CP */
>>>>>> +	SBI_QUOTA_NEED_REPAIR,			/* quota file may be corrupted */
>>>>>>  };
>>>>>>  
>>>>>>  enum {
>>>>>> @@ -1891,12 +1897,18 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
>>>>>>  {
>>>>>>  	block_t	valid_block_count;
>>>>>>  	unsigned int valid_node_count;
>>>>>> -	bool quota = inode && !is_inode;
>>>>>> +	int err;
>>>>>>  
>>>>>> -	if (quota) {
>>>>>> -		int ret = dquot_reserve_block(inode, 1);
>>>>>> -		if (ret)
>>>>>> -			return ret;
>>>>>> +	if (is_inode) {
>>>>>> +		if (inode) {
>>>>>> +			err = dquot_alloc_inode(inode);
>>>>>> +			if (err)
>>>>>> +				return err;
>>>>>> +		}
>>>>>> +	} else {
>>>>>> +		err = dquot_reserve_block(inode, 1);
>>>>>> +		if (err)
>>>>>> +			return err;
>>>>>>  	}
>>>>>>  
>>>>>>  	if (time_to_inject(sbi, FAULT_BLOCK)) {
>>>>>> @@ -1938,8 +1950,12 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
>>>>>>  	return 0;
>>>>>>  
>>>>>>  enospc:
>>>>>> -	if (quota)
>>>>>> +	if (is_inode) {
>>>>>> +		if (inode)
>>>>>> +			dquot_free_inode(inode);
>>>>>> +	} else {
>>>>>>  		dquot_release_reservation_block(inode, 1);
>>>>>> +	}
>>>>>>  	return -ENOSPC;
>>>>>>  }
>>>>>>  
>>>>>> @@ -1960,7 +1976,9 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi,
>>>>>>  
>>>>>>  	spin_unlock(&sbi->stat_lock);
>>>>>>  
>>>>>> -	if (!is_inode)
>>>>>> +	if (is_inode)
>>>>>> +		dquot_free_inode(inode);
>>>>>> +	else
>>>>>>  		f2fs_i_blocks_write(inode, 1, false, true);
>>>>>>  }
>>>>>>  
>>>>>> @@ -2739,7 +2757,8 @@ static inline bool is_valid_data_blkaddr(struct f2fs_sb_info *sbi,
>>>>>>   */
>>>>>>  int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);
>>>>>>  void f2fs_truncate_data_blocks(struct dnode_of_data *dn);
>>>>>> -int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock);
>>>>>> +int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock,
>>>>>> +							bool buf_write);
>>>>>>  int f2fs_truncate(struct inode *inode);
>>>>>>  int f2fs_getattr(const struct path *path, struct kstat *stat,
>>>>>>  			u32 request_mask, unsigned int flags);
>>>>>> @@ -2827,6 +2846,7 @@ static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode)
>>>>>>  int f2fs_inode_dirtied(struct inode *inode, bool sync);
>>>>>>  void f2fs_inode_synced(struct inode *inode);
>>>>>>  int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly);
>>>>>> +int f2fs_quota_sync(struct super_block *sb, int type);
>>>>>>  void f2fs_quota_off_umount(struct super_block *sb);
>>>>>>  int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover);
>>>>>>  int f2fs_sync_fs(struct super_block *sb, int sync);
>>>>>> @@ -3025,6 +3045,7 @@ struct page *f2fs_get_lock_data_page(struct inode *inode, pgoff_t index,
>>>>>>  struct page *f2fs_get_new_data_page(struct inode *inode,
>>>>>>  			struct page *ipage, pgoff_t index, bool new_i_size);
>>>>>>  int f2fs_do_write_data_page(struct f2fs_io_info *fio);
>>>>>> +void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock);
>>>>>>  int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
>>>>>>  			int create, int flag);
>>>>>>  int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
>>>>>> @@ -3456,3 +3477,16 @@ extern void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate,
>>>>>>  #endif
>>>>>>  
>>>>>>  #endif
>>>>>> +
>>>>>> +static inline bool is_journalled_quota(struct f2fs_sb_info *sbi)
>>>>>> +{
>>>>>> +#ifdef CONFIG_QUOTA
>>>>>> +	if (f2fs_sb_has_quota_ino(sbi->sb))
>>>>>> +		return true;
>>>>>> +	if (F2FS_OPTION(sbi).s_qf_names[USRQUOTA] ||
>>>>>> +		F2FS_OPTION(sbi).s_qf_names[GRPQUOTA] ||
>>>>>> +		F2FS_OPTION(sbi).s_qf_names[PRJQUOTA])
>>>>>> +		return true;
>>>>>> +#endif
>>>>>> +	return false;
>>>>>> +}
>>>>>> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
>>>>>> index 357422a4c319..a75f3e145bf1 100644
>>>>>> --- a/fs/f2fs/file.c
>>>>>> +++ b/fs/f2fs/file.c
>>>>>> @@ -586,7 +586,8 @@ static int truncate_partial_data_page(struct inode *inode, u64 from,
>>>>>>  	return 0;
>>>>>>  }
>>>>>>  
>>>>>> -int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock)
>>>>>> +int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock,
>>>>>> +							bool buf_write)
>>>>>>  {
>>>>>>  	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
>>>>>>  	struct dnode_of_data dn;
>>>>>> @@ -594,6 +595,7 @@ int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock)
>>>>>>  	int count = 0, err = 0;
>>>>>>  	struct page *ipage;
>>>>>>  	bool truncate_page = false;
>>>>>> +	int flag = buf_write ? F2FS_GET_BLOCK_PRE_AIO : F2FS_GET_BLOCK_PRE_DIO;
>>>>>>  
>>>>>>  	trace_f2fs_truncate_blocks_enter(inode, from);
>>>>>>  
>>>>>> @@ -603,7 +605,7 @@ int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock)
>>>>>>  		goto free_partial;
>>>>>>  
>>>>>>  	if (lock)
>>>>>> -		f2fs_lock_op(sbi);
>>>>>> +		__do_map_lock(sbi, flag, true);
>>>>>>  
>>>>>>  	ipage = f2fs_get_node_page(sbi, inode->i_ino);
>>>>>>  	if (IS_ERR(ipage)) {
>>>>>> @@ -641,7 +643,7 @@ int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock)
>>>>>>  	err = f2fs_truncate_inode_blocks(inode, free_from);
>>>>>>  out:
>>>>>>  	if (lock)
>>>>>> -		f2fs_unlock_op(sbi);
>>>>>> +		__do_map_lock(sbi, flag, false);
>>>>>>  free_partial:
>>>>>>  	/* lastly zero out the first data page */
>>>>>>  	if (!err)
>>>>>> @@ -676,7 +678,7 @@ int f2fs_truncate(struct inode *inode)
>>>>>>  			return err;
>>>>>>  	}
>>>>>>  
>>>>>> -	err = f2fs_truncate_blocks(inode, i_size_read(inode), true);
>>>>>> +	err = f2fs_truncate_blocks(inode, i_size_read(inode), true, false);
>>>>>>  	if (err)
>>>>>>  		return err;
>>>>>>  
>>>>>> @@ -785,9 +787,24 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
>>>>>>  		!uid_eq(attr->ia_uid, inode->i_uid)) ||
>>>>>>  		(attr->ia_valid & ATTR_GID &&
>>>>>>  		!gid_eq(attr->ia_gid, inode->i_gid))) {
>>>>>> +		f2fs_lock_op(F2FS_I_SB(inode));
>>>>>>  		err = dquot_transfer(inode, attr);
>>>>>> -		if (err)
>>>>>> +		if (err) {
>>>>>> +			set_sbi_flag(F2FS_I_SB(inode),
>>>>>> +					SBI_QUOTA_NEED_REPAIR);
>>>>>> +			f2fs_unlock_op(F2FS_I_SB(inode));
>>>>>>  			return err;
>>>>>> +		}
>>>>>> +		/*
>>>>>> +		 * update uid/gid under lock_op(), so that dquot and inode can
>>>>>> +		 * be updated atomically.
>>>>>> +		 */
>>>>>> +		if (attr->ia_valid & ATTR_UID)
>>>>>> +			inode->i_uid = attr->ia_uid;
>>>>>> +		if (attr->ia_valid & ATTR_GID)
>>>>>> +			inode->i_gid = attr->ia_gid;
>>>>>> +		f2fs_mark_inode_dirty_sync(inode, true);
>>>>>> +		f2fs_unlock_op(F2FS_I_SB(inode));
>>>>>>  	}
>>>>>>  
>>>>>>  	if (attr->ia_valid & ATTR_SIZE) {
>>>>>> @@ -1242,7 +1259,7 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
>>>>>>  	new_size = i_size_read(inode) - len;
>>>>>>  	truncate_pagecache(inode, new_size);
>>>>>>  
>>>>>> -	ret = f2fs_truncate_blocks(inode, new_size, true);
>>>>>> +	ret = f2fs_truncate_blocks(inode, new_size, true, false);
>>>>>>  	up_write(&F2FS_I(inode)->i_mmap_sem);
>>>>>>  	if (!ret)
>>>>>>  		f2fs_i_size_write(inode, new_size);
>>>>>> @@ -1427,7 +1444,7 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
>>>>>>  	f2fs_balance_fs(sbi, true);
>>>>>>  
>>>>>>  	down_write(&F2FS_I(inode)->i_mmap_sem);
>>>>>> -	ret = f2fs_truncate_blocks(inode, i_size_read(inode), true);
>>>>>> +	ret = f2fs_truncate_blocks(inode, i_size_read(inode), true, false);
>>>>>>  	up_write(&F2FS_I(inode)->i_mmap_sem);
>>>>>>  	if (ret)
>>>>>>  		return ret;
>>>>>> diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
>>>>>> index 425d740f87fd..cb31a719b048 100644
>>>>>> --- a/fs/f2fs/inline.c
>>>>>> +++ b/fs/f2fs/inline.c
>>>>>> @@ -298,7 +298,7 @@ bool f2fs_recover_inline_data(struct inode *inode, struct page *npage)
>>>>>>  		clear_inode_flag(inode, FI_INLINE_DATA);
>>>>>>  		f2fs_put_page(ipage, 1);
>>>>>>  	} else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) {
>>>>>> -		if (f2fs_truncate_blocks(inode, 0, false))
>>>>>> +		if (f2fs_truncate_blocks(inode, 0, false, false))
>>>>>>  			return false;
>>>>>>  		goto process_inline;
>>>>>>  	}
>>>>>> @@ -470,7 +470,7 @@ static int f2fs_add_inline_entries(struct inode *dir, void *inline_dentry)
>>>>>>  	return 0;
>>>>>>  punch_dentry_pages:
>>>>>>  	truncate_inode_pages(&dir->i_data, 0);
>>>>>> -	f2fs_truncate_blocks(dir, 0, false);
>>>>>> +	f2fs_truncate_blocks(dir, 0, false, false);
>>>>>>  	f2fs_remove_dirty_inode(dir);
>>>>>>  	return err;
>>>>>>  }
>>>>>> diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
>>>>>> index 86e7333d60c1..3c278e63d1a3 100644
>>>>>> --- a/fs/f2fs/inode.c
>>>>>> +++ b/fs/f2fs/inode.c
>>>>>> @@ -645,7 +645,11 @@ void f2fs_evict_inode(struct inode *inode)
>>>>>>  	if (inode->i_nlink || is_bad_inode(inode))
>>>>>>  		goto no_delete;
>>>>>>  
>>>>>> -	dquot_initialize(inode);
>>>>>> +	err = dquot_initialize(inode);
>>>>>> +	if (err) {
>>>>>> +		err = 0;
>>>>>> +		set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
>>>>>> +	}
>>>>>>  
>>>>>>  	f2fs_remove_ino_entry(sbi, inode->i_ino, APPEND_INO);
>>>>>>  	f2fs_remove_ino_entry(sbi, inode->i_ino, UPDATE_INO);
>>>>>> @@ -677,9 +681,10 @@ void f2fs_evict_inode(struct inode *inode)
>>>>>>  		goto retry;
>>>>>>  	}
>>>>>>  
>>>>>> -	if (err)
>>>>>> +	if (err) {
>>>>>>  		f2fs_update_inode_page(inode);
>>>>>> -	dquot_free_inode(inode);
>>>>>> +		set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
>>>>>> +	}
>>>>>>  	sb_end_intwrite(inode->i_sb);
>>>>>>  no_delete:
>>>>>>  	dquot_drop(inode);
>>>>>> diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
>>>>>> index 9ad451ac2cec..b65491a63115 100644
>>>>>> --- a/fs/f2fs/namei.c
>>>>>> +++ b/fs/f2fs/namei.c
>>>>>> @@ -71,10 +71,6 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
>>>>>>  	if (err)
>>>>>>  		goto fail_drop;
>>>>>>  
>>>>>> -	err = dquot_alloc_inode(inode);
>>>>>> -	if (err)
>>>>>> -		goto fail_drop;
>>>>>> -
>>>>>>  	set_inode_flag(inode, FI_NEW_INODE);
>>>>>>  
>>>>>>  	/* If the directory encrypted, then we should encrypt the inode. */
>>>>>> diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
>>>>>> index 41f2c0fe6d8e..70f05650191e 100644
>>>>>> --- a/fs/f2fs/recovery.c
>>>>>> +++ b/fs/f2fs/recovery.c
>>>>>> @@ -191,6 +191,33 @@ static int recover_dentry(struct inode *inode, struct page *ipage,
>>>>>>  	return err;
>>>>>>  }
>>>>>>  
>>>>>> +static int recover_quota_data(struct inode *inode, struct page *page)
>>>>>> +{
>>>>>> +	struct f2fs_inode *raw = F2FS_INODE(page);
>>>>>> +	struct iattr attr;
>>>>>> +	uid_t i_uid = le32_to_cpu(raw->i_uid);
>>>>>> +	gid_t i_gid = le32_to_cpu(raw->i_gid);
>>>>>> +	int err;
>>>>>> +
>>>>>> +	memset(&attr, 0, sizeof(attr));
>>>>>> +
>>>>>> +	attr.ia_uid = make_kuid(inode->i_sb->s_user_ns, i_uid);
>>>>>> +	attr.ia_gid = make_kgid(inode->i_sb->s_user_ns, i_gid);
>>>>>> +
>>>>>> +	if (!uid_eq(attr.ia_uid, inode->i_uid))
>>>>>> +		attr.ia_valid |= ATTR_UID;
>>>>>> +	if (!gid_eq(attr.ia_gid, inode->i_gid))
>>>>>> +		attr.ia_valid |= ATTR_GID;
>>>>>> +
>>>>>> +	if (!attr.ia_valid)
>>>>>> +		return 0;
>>>>>> +
>>>>>> +	err = dquot_transfer(inode, &attr);
>>>>>> +	if (err)
>>>>>> +		set_sbi_flag(F2FS_I_SB(inode), SBI_QUOTA_NEED_REPAIR);
>>>>>> +	return err;
>>>>>> +}
>>>>>> +
>>>>>>  static void recover_inline_flags(struct inode *inode, struct f2fs_inode *ri)
>>>>>>  {
>>>>>>  	if (ri->i_inline & F2FS_PIN_FILE)
>>>>>> @@ -203,12 +230,18 @@ static void recover_inline_flags(struct inode *inode, struct f2fs_inode *ri)
>>>>>>  		clear_inode_flag(inode, FI_DATA_EXIST);
>>>>>>  }
>>>>>>  
>>>>>> -static void recover_inode(struct inode *inode, struct page *page)
>>>>>> +static int recover_inode(struct inode *inode, struct page *page)
>>>>>>  {
>>>>>>  	struct f2fs_inode *raw = F2FS_INODE(page);
>>>>>>  	char *name;
>>>>>> +	int err;
>>>>>>  
>>>>>>  	inode->i_mode = le16_to_cpu(raw->i_mode);
>>>>>> +
>>>>>> +	err = recover_quota_data(inode, page);
>>>>>> +	if (err)
>>>>>> +		return err;
>>>>>> +
>>>>>>  	i_uid_write(inode, le32_to_cpu(raw->i_uid));
>>>>>>  	i_gid_write(inode, le32_to_cpu(raw->i_gid));
>>>>>>  	f2fs_i_size_write(inode, le64_to_cpu(raw->i_size));
>>>>>> @@ -231,6 +264,7 @@ static void recover_inode(struct inode *inode, struct page *page)
>>>>>>  	f2fs_msg(inode->i_sb, KERN_NOTICE,
>>>>>>  		"recover_inode: ino = %x, name = %s, inline = %x",
>>>>>>  			ino_of_node(page), name, raw->i_inline);
>>>>>> +	return 0;
>>>>>>  }
>>>>>>  
>>>>>>  static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
>>>>>> @@ -597,8 +631,11 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
>>>>>>  		 * In this case, we can lose the latest inode(x).
>>>>>>  		 * So, call recover_inode for the inode update.
>>>>>>  		 */
>>>>>> -		if (IS_INODE(page))
>>>>>> -			recover_inode(entry->inode, page);
>>>>>> +		if (IS_INODE(page)) {
>>>>>> +			err = recover_inode(entry->inode, page);
>>>>>> +			if (err)
>>>>>> +				break;
>>>>>> +		}
>>>>>>  		if (entry->last_dentry == blkaddr) {
>>>>>>  			err = recover_dentry(entry->inode, page, dir_list);
>>>>>>  			if (err) {
>>>>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>>>>>> index 945468968d4e..3a46c9b81188 100644
>>>>>> --- a/fs/f2fs/super.c
>>>>>> +++ b/fs/f2fs/super.c
>>>>>> @@ -1689,6 +1689,13 @@ static qsize_t *f2fs_get_reserved_space(struct inode *inode)
>>>>>>  
>>>>>>  static int f2fs_quota_on_mount(struct f2fs_sb_info *sbi, int type)
>>>>>>  {
>>>>>> +
>>>>>> +	if (is_set_ckpt_flags(sbi, CP_QUOTA_NEED_FSCK_FLAG)) {
>>>>>> +		f2fs_msg(sbi->sb, KERN_ERR,
>>>>>> +			"quota sysfile may be corrupted, skip loading it");
>>>>>> +		return 0;
>>>>>> +	}
>>>>>> +
>>>>>>  	return dquot_quota_on_mount(sbi->sb, F2FS_OPTION(sbi).s_qf_names[type],
>>>>>>  					F2FS_OPTION(sbi).s_jquota_fmt, type);
>>>>>>  }
>>>>>> @@ -1759,7 +1766,14 @@ static int f2fs_enable_quotas(struct super_block *sb)
>>>>>>  		test_opt(F2FS_SB(sb), PRJQUOTA),
>>>>>>  	};
>>>>>>  
>>>>>> -	sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE | DQUOT_NOLIST_DIRTY;
>>>>>> +	if (is_set_ckpt_flags(F2FS_SB(sb), CP_QUOTA_NEED_FSCK_FLAG)) {
>>>>>> +		f2fs_msg(sb, KERN_ERR,
>>>>>> +			"quota file may be corrupted, skip loading it");
>>>>>> +		return 0;
>>>>>> +	}
>>>>>> +
>>>>>> +	sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
>>>>>> +
>>>>>>  	for (type = 0; type < MAXQUOTAS; type++) {
>>>>>>  		qf_inum = f2fs_qf_ino(sb, type);
>>>>>>  		if (qf_inum) {
>>>>>> @@ -1773,6 +1787,8 @@ static int f2fs_enable_quotas(struct super_block *sb)
>>>>>>  					"fsck to fix.", type, err);
>>>>>>  				for (type--; type >= 0; type--)
>>>>>>  					dquot_quota_off(sb, type);
>>>>>> +				set_sbi_flag(F2FS_SB(sb),
>>>>>> +						SBI_QUOTA_NEED_REPAIR);
>>>>>>  				return err;
>>>>>>  			}
>>>>>>  		}
>>>>>> @@ -1780,35 +1796,51 @@ static int f2fs_enable_quotas(struct super_block *sb)
>>>>>>  	return 0;
>>>>>>  }
>>>>>>  
>>>>>> -static int f2fs_quota_sync(struct super_block *sb, int type)
>>>>>> +int f2fs_quota_sync(struct super_block *sb, int type)
>>>>>>  {
>>>>>> +	struct f2fs_sb_info *sbi = F2FS_SB(sb);
>>>>>>  	struct quota_info *dqopt = sb_dqopt(sb);
>>>>>>  	int cnt;
>>>>>>  	int ret;
>>>>>>  
>>>>>>  	ret = dquot_writeback_dquots(sb, type);
>>>>>>  	if (ret)
>>>>>> -		return ret;
>>>>>> +		goto out;
>>>>>>  
>>>>>>  	/*
>>>>>>  	 * Now when everything is written we can discard the pagecache so
>>>>>>  	 * that userspace sees the changes.
>>>>>>  	 */
>>>>>>  	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
>>>>>> +		struct address_space *mapping;
>>>>>> +
>>>>>>  		if (type != -1 && cnt != type)
>>>>>>  			continue;
>>>>>>  		if (!sb_has_quota_active(sb, cnt))
>>>>>>  			continue;
>>>>>>  
>>>>>> -		ret = filemap_write_and_wait(dqopt->files[cnt]->i_mapping);
>>>>>> +		mapping = dqopt->files[cnt]->i_mapping;
>>>>>> +
>>>>>> +		ret = filemap_fdatawrite(mapping);
>>>>>> +		if (ret)
>>>>>> +			goto out;
>>>>>> +
>>>>>> +		/* if we are using journalled quota */
>>>>>> +		if (is_journalled_quota(sbi))
>>>>>> +			continue;
>>>>>> +
>>>>>> +		ret = filemap_fdatawait(mapping);
>>>>>>  		if (ret)
>>>>>> -			return ret;
>>>>>> +			set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
>>>>>>  
>>>>>>  		inode_lock(dqopt->files[cnt]);
>>>>>>  		truncate_inode_pages(&dqopt->files[cnt]->i_data, 0);
>>>>>>  		inode_unlock(dqopt->files[cnt]);
>>>>>>  	}
>>>>>> -	return 0;
>>>>>> +out:
>>>>>> +	if (ret)
>>>>>> +		set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
>>>>>> +	return ret;
>>>>>>  }
>>>>>>  
>>>>>>  static int f2fs_quota_on(struct super_block *sb, int type, int format_id,
>>>>>> @@ -1877,11 +1909,67 @@ void f2fs_quota_off_umount(struct super_block *sb)
>>>>>>  				"Fail to turn off disk quota "
>>>>>>  				"(type: %d, err: %d, ret:%d), Please "
>>>>>>  				"run fsck to fix it.", type, err, ret);
>>>>>> -			set_sbi_flag(F2FS_SB(sb), SBI_NEED_FSCK);
>>>>>> +			set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
>>>>>>  		}
>>>>>>  	}
>>>>>>  }
>>>>>>  
>>>>>> +static int f2fs_dquot_commit(struct dquot *dquot)
>>>>>> +{
>>>>>> +	int ret;
>>>>>> +
>>>>>> +	ret = dquot_commit(dquot);
>>>>>> +	if (ret < 0)
>>>>>> +		set_sbi_flag(F2FS_SB(dquot->dq_sb), SBI_QUOTA_NEED_REPAIR);
>>>>>> +	return ret;
>>>>>> +}
>>>>>> +
>>>>>> +static int f2fs_dquot_acquire(struct dquot *dquot)
>>>>>> +{
>>>>>> +	int ret;
>>>>>> +
>>>>>> +	ret = dquot_acquire(dquot);
>>>>>> +	if (ret < 0)
>>>>>> +		set_sbi_flag(F2FS_SB(dquot->dq_sb), SBI_QUOTA_NEED_REPAIR);
>>>>>> +
>>>>>> +	return ret;
>>>>>> +}
>>>>>> +
>>>>>> +static int f2fs_dquot_release(struct dquot *dquot)
>>>>>> +{
>>>>>> +	int ret;
>>>>>> +
>>>>>> +	ret = dquot_release(dquot);
>>>>>> +	if (ret < 0)
>>>>>> +		set_sbi_flag(F2FS_SB(dquot->dq_sb), SBI_QUOTA_NEED_REPAIR);
>>>>>> +	return ret;
>>>>>> +}
>>>>>> +
>>>>>> +static int f2fs_dquot_mark_dquot_dirty(struct dquot *dquot)
>>>>>> +{
>>>>>> +	struct super_block *sb = dquot->dq_sb;
>>>>>> +	struct f2fs_sb_info *sbi = F2FS_SB(sb);
>>>>>> +	int ret;
>>>>>> +
>>>>>> +	ret = dquot_mark_dquot_dirty(dquot);
>>>>>> +
>>>>>> +	/* if we are using journalled quota */
>>>>>> +	if (is_journalled_quota(sbi))
>>>>>> +		set_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH);
>>>>>> +
>>>>>> +	return ret;
>>>>>> +}
>>>>>> +
>>>>>> +static int f2fs_dquot_commit_info(struct super_block *sb, int type)
>>>>>> +{
>>>>>> +	int ret;
>>>>>> +
>>>>>> +	ret = dquot_commit_info(sb, type);
>>>>>> +	if (ret < 0)
>>>>>> +		set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
>>>>>> +	return ret;
>>>>>> +}
>>>>>> +
>>>>>>  static int f2fs_get_projid(struct inode *inode, kprojid_t *projid)
>>>>>>  {
>>>>>>  	*projid = F2FS_I(inode)->i_projid;
>>>>>> @@ -1890,11 +1978,11 @@ static int f2fs_get_projid(struct inode *inode, kprojid_t *projid)
>>>>>>  
>>>>>>  static const struct dquot_operations f2fs_quota_operations = {
>>>>>>  	.get_reserved_space = f2fs_get_reserved_space,
>>>>>> -	.write_dquot	= dquot_commit,
>>>>>> -	.acquire_dquot	= dquot_acquire,
>>>>>> -	.release_dquot	= dquot_release,
>>>>>> -	.mark_dirty	= dquot_mark_dquot_dirty,
>>>>>> -	.write_info	= dquot_commit_info,
>>>>>> +	.write_dquot	= f2fs_dquot_commit,
>>>>>> +	.acquire_dquot	= f2fs_dquot_acquire,
>>>>>> +	.release_dquot	= f2fs_dquot_release,
>>>>>> +	.mark_dirty	= f2fs_dquot_mark_dquot_dirty,
>>>>>> +	.write_info	= f2fs_dquot_commit_info,
>>>>>>  	.alloc_dquot	= dquot_alloc,
>>>>>>  	.destroy_dquot	= dquot_destroy,
>>>>>>  	.get_projid	= f2fs_get_projid,
>>>>>> @@ -1912,6 +2000,11 @@ static const struct quotactl_ops f2fs_quotactl_ops = {
>>>>>>  	.get_nextdqblk	= dquot_get_next_dqblk,
>>>>>>  };
>>>>>>  #else
>>>>>> +int f2fs_quota_sync(struct super_block *sb, int type)
>>>>>> +{
>>>>>> +	return 0;
>>>>>> +}
>>>>>> +
>>>>>>  void f2fs_quota_off_umount(struct super_block *sb)
>>>>>>  {
>>>>>>  }
>>>>>> @@ -2951,6 +3044,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>>>>>>  		goto free_meta_inode;
>>>>>>  	}
>>>>>>  
>>>>>> +	if (__is_set_ckpt_flags(F2FS_CKPT(sbi), CP_QUOTA_NEED_FSCK_FLAG))
>>>>>> +		set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
>>>>>> +
>>>>>>  	/* Initialize device list */
>>>>>>  	err = f2fs_scan_devices(sbi);
>>>>>>  	if (err) {
>>>>>> diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
>>>>>> index 1d4b196291d6..aaca9f5d5f5c 100644
>>>>>> --- a/include/linux/f2fs_fs.h
>>>>>> +++ b/include/linux/f2fs_fs.h
>>>>>> @@ -115,6 +115,7 @@ struct f2fs_super_block {
>>>>>>  /*
>>>>>>   * For checkpoint
>>>>>>   */
>>>>>> +#define CP_QUOTA_NEED_FSCK_FLAG		0x00000800
>>>>>>  #define CP_LARGE_NAT_BITMAP_FLAG	0x00000400
>>>>>>  #define CP_NOCRC_RECOVERY_FLAG	0x00000200
>>>>>>  #define CP_TRIMMED_FLAG		0x00000100
>>>>>> -- 
>>>>>> 2.18.0

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v11] f2fs: guarantee journalled quota data by checkpoint
@ 2018-10-01  2:04             ` Chao Yu
  0 siblings, 0 replies; 18+ messages in thread
From: Chao Yu @ 2018-10-01  2:04 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: linux-kernel, linux-f2fs-devel

On 2018-10-1 9:49, Jaegeuk Kim wrote:
> On 10/01, Chao Yu wrote:
>> On 2018-10-1 9:29, Jaegeuk Kim wrote:
>>> On 10/01, Chao Yu wrote:
>>>> Hi Jaegeuk,
>>>>
>>>> On 2018-10-1 8:06, Jaegeuk Kim wrote:
>>>>> Hi Chao,
>>>>>
>>>>> This fails on fsstress with godown without fault injection. Could you please
>>>>> test a bit? I assumed that this patch should give no fsck failure along with
>>>>> valid checkpoint having no flag.
>>>>
>>>> Okay, let me reproduce with that case.
>>>>
>>>>>
>>>>> BTW, I'm in doubt that f2fs_lock_all covers entire quota modification. What
>>>>> about prepare_write_begin() -> f2fs_get_block() ... -> inc_valid_block_count()?
>>>>
>>>> If quota data changed in above path, we will detect that in below condition:
>>>>
>>>> block_operation()
>>>>
>>>> 	down_write(&sbi->node_change);
>>>>
>>>> 	if (__need_flush_quota(sbi)) {
>>>> 		up_write(&sbi->node_change);
>>>> 		f2fs_unlock_all(sbi);
>>>> 		goto retry_flush_quotas;
>>>> 	}
>>>>
>>>> So there is no problem?
>>>
>>> We may need to check quota is dirty, since we have no way to detect by
>>> f2fs structures?
>>
>> Below condition can check that.
>>
>> static bool __need_flush_quota(struct f2fs_sb_info *sbi)
>> {
>> ...
>> 	if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_FLUSH))
>> 		return true;
>> 	if (get_pages(sbi, F2FS_DIRTY_QDATA))
>> 		return true;
>> ...
>> }
>>
>> static int f2fs_dquot_mark_dquot_dirty(struct dquot *dquot)
>> {
>> ...
>> 	ret = dquot_mark_dquot_dirty(dquot);
>>
>> 	/* if we are using journalled quota */
>> 	if (is_journalled_quota(sbi))
>> 		set_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH);
>> ...
>> }
> 
> Okay, then, could you please run the above stress test to reproduce this?

Sure, let me try this case and fix it.

Could you check other patches in mailing list, and test them instead?

Thanks,

> Thanks,
> 
>>
>> Thanks,
>>
>>>
>>>>
>>>> Thanks,
>>>>
>>>>>
>>>>> On 09/20, Chao Yu wrote:
>>>>>> From: Chao Yu <yuchao0@huawei.com>
>>>>>>
>>>>>> For journalled quota mode, let checkpoint to flush dquot dirty data
>>>>>> and quota file data to guarntee persistence of all quota sysfile in
>>>>>> last checkpoint, by this way, we can avoid corrupting quota sysfile
>>>>>> when encountering SPO.
>>>>>>
>>>>>> The implementation is as below:
>>>>>>
>>>>>> 1. add a global state SBI_QUOTA_NEED_FLUSH to indicate that there is
>>>>>> cached dquot metadata changes in quota subsystem, and later checkpoint
>>>>>> should:
>>>>>>  a) flush dquot metadata into quota file.
>>>>>>  b) flush quota file to storage to keep file usage be consistent.
>>>>>>
>>>>>> 2. add a global state SBI_QUOTA_NEED_REPAIR to indicate that quota
>>>>>> operation failed due to -EIO or -ENOSPC, so later,
>>>>>>  a) checkpoint will skip syncing dquot metadata.
>>>>>>  b) CP_QUOTA_NEED_FSCK_FLAG will be set in last cp pack to give a
>>>>>>     hint for fsck repairing.
>>>>>>
>>>>>> 3. add a global state SBI_QUOTA_SKIP_FLUSH, in checkpoint, if quota
>>>>>> data updating is very heavy, it may cause hungtask in block_operation().
>>>>>> To avoid this, if our retry time exceed threshold, let's just skip
>>>>>> flushing and retry in next checkpoint().
>>>>>>
>>>>>> Signed-off-by: Weichao Guo <guoweichao@huawei.com>
>>>>>> Signed-off-by: Chao Yu <yuchao0@huawei.com>
>>>>>> ---
>>>>>> v11:
>>>>>> - transfer quota data if fsynced inode's i_{u,g}id changed during
>>>>>> recovery.
>>>>>>  fs/f2fs/checkpoint.c    |  56 +++++++++++++++++--
>>>>>>  fs/f2fs/data.c          |  18 ++++--
>>>>>>  fs/f2fs/f2fs.h          |  50 ++++++++++++++---
>>>>>>  fs/f2fs/file.c          |  31 ++++++++---
>>>>>>  fs/f2fs/inline.c        |   4 +-
>>>>>>  fs/f2fs/inode.c         |  11 +++-
>>>>>>  fs/f2fs/namei.c         |   4 --
>>>>>>  fs/f2fs/recovery.c      |  43 +++++++++++++-
>>>>>>  fs/f2fs/super.c         | 120 ++++++++++++++++++++++++++++++++++++----
>>>>>>  include/linux/f2fs_fs.h |   1 +
>>>>>>  10 files changed, 289 insertions(+), 49 deletions(-)
>>>>>>
>>>>>> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
>>>>>> index d312d2829d5a..d624d7983197 100644
>>>>>> --- a/fs/f2fs/checkpoint.c
>>>>>> +++ b/fs/f2fs/checkpoint.c
>>>>>> @@ -1083,6 +1083,21 @@ static void __prepare_cp_block(struct f2fs_sb_info *sbi)
>>>>>>  	ckpt->next_free_nid = cpu_to_le32(last_nid);
>>>>>>  }
>>>>>>  
>>>>>> +static bool __need_flush_quota(struct f2fs_sb_info *sbi)
>>>>>> +{
>>>>>> +	if (!is_journalled_quota(sbi))
>>>>>> +		return false;
>>>>>> +	if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH))
>>>>>> +		return false;
>>>>>> +	if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR))
>>>>>> +		return false;
>>>>>> +	if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_FLUSH))
>>>>>> +		return true;
>>>>>> +	if (get_pages(sbi, F2FS_DIRTY_QDATA))
>>>>>> +		return true;
>>>>>> +	return false;
>>>>>> +}
>>>>>> +
>>>>>>  /*
>>>>>>   * Freeze all the FS-operations for checkpoint.
>>>>>>   */
>>>>>> @@ -1094,12 +1109,30 @@ static int block_operations(struct f2fs_sb_info *sbi)
>>>>>>  		.for_reclaim = 0,
>>>>>>  	};
>>>>>>  	struct blk_plug plug;
>>>>>> -	int err = 0;
>>>>>> +	int err = 0, cnt = 0;
>>>>>>  
>>>>>>  	blk_start_plug(&plug);
>>>>>>  
>>>>>> -retry_flush_dents:
>>>>>> +retry_flush_quotas:
>>>>>> +	if (__need_flush_quota(sbi)) {
>>>>>> +		if (++cnt > DEFAULT_RETRY_QUOTA_FLUSH_COUNT) {
>>>>>> +			set_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH);
>>>>>> +			f2fs_lock_all(sbi);
>>>>>> +			goto retry_flush_dents;
>>>>>> +		}
>>>>>> +		clear_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH);
>>>>>> +
>>>>>> +		f2fs_quota_sync(sbi->sb, -1);
>>>>>> +	}
>>>>>> +
>>>>>>  	f2fs_lock_all(sbi);
>>>>>> +	if (__need_flush_quota(sbi)) {
>>>>>> +		f2fs_unlock_all(sbi);
>>>>>> +		cond_resched();
>>>>>> +		goto retry_flush_quotas;
>>>>>> +	}
>>>>>> +
>>>>>> +retry_flush_dents:
>>>>>>  	/* write all the dirty dentry pages */
>>>>>>  	if (get_pages(sbi, F2FS_DIRTY_DENTS)) {
>>>>>>  		f2fs_unlock_all(sbi);
>>>>>> @@ -1107,7 +1140,7 @@ static int block_operations(struct f2fs_sb_info *sbi)
>>>>>>  		if (err)
>>>>>>  			goto out;
>>>>>>  		cond_resched();
>>>>>> -		goto retry_flush_dents;
>>>>>> +		goto retry_flush_quotas;
>>>>>>  	}
>>>>>>  
>>>>>>  	/*
>>>>>> @@ -1116,6 +1149,12 @@ static int block_operations(struct f2fs_sb_info *sbi)
>>>>>>  	 */
>>>>>>  	down_write(&sbi->node_change);
>>>>>>  
>>>>>> +	if (__need_flush_quota(sbi)) {
>>>>>> +		up_write(&sbi->node_change);
>>>>>> +		f2fs_unlock_all(sbi);
>>>>>> +		goto retry_flush_quotas;
>>>>>> +	}
>>>>>> +
>>>>>>  	if (get_pages(sbi, F2FS_DIRTY_IMETA)) {
>>>>>>  		up_write(&sbi->node_change);
>>>>>>  		f2fs_unlock_all(sbi);
>>>>>> @@ -1123,7 +1162,7 @@ static int block_operations(struct f2fs_sb_info *sbi)
>>>>>>  		if (err)
>>>>>>  			goto out;
>>>>>>  		cond_resched();
>>>>>> -		goto retry_flush_dents;
>>>>>> +		goto retry_flush_quotas;
>>>>>>  	}
>>>>>>  
>>>>>>  retry_flush_nodes:
>>>>>> @@ -1214,6 +1253,14 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>>>>>>  	if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
>>>>>>  		__set_ckpt_flags(ckpt, CP_FSCK_FLAG);
>>>>>>  
>>>>>> +	if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH))
>>>>>> +		__set_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG);
>>>>>> +	else
>>>>>> +		__clear_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG);
>>>>>> +
>>>>>> +	if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR))
>>>>>> +		__set_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG);
>>>>>> +
>>>>>>  	/* set this flag to activate crc|cp_ver for recovery */
>>>>>>  	__set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG);
>>>>>>  	__clear_ckpt_flags(ckpt, CP_NOCRC_RECOVERY_FLAG);
>>>>>> @@ -1421,6 +1468,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>>>>>>  
>>>>>>  	clear_sbi_flag(sbi, SBI_IS_DIRTY);
>>>>>>  	clear_sbi_flag(sbi, SBI_NEED_CP);
>>>>>> +	clear_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH);
>>>>>>  	__set_cp_next_pack(sbi);
>>>>>>  
>>>>>>  	/*
>>>>>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
>>>>>> index 57c0823d22e0..b96f8588d565 100644
>>>>>> --- a/fs/f2fs/data.c
>>>>>> +++ b/fs/f2fs/data.c
>>>>>> @@ -46,7 +46,7 @@ static bool __is_cp_guaranteed(struct page *page)
>>>>>>  			inode->i_ino ==  F2FS_NODE_INO(sbi) ||
>>>>>>  			S_ISDIR(inode->i_mode) ||
>>>>>>  			(S_ISREG(inode->i_mode) &&
>>>>>> -			is_inode_flag_set(inode, FI_ATOMIC_FILE)) ||
>>>>>> +			(f2fs_is_atomic_file(inode) || IS_NOQUOTA(inode))) ||
>>>>>>  			is_cold_data(page))
>>>>>>  		return true;
>>>>>>  	return false;
>>>>>> @@ -975,7 +975,7 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
>>>>>>  	return err;
>>>>>>  }
>>>>>>  
>>>>>> -static inline void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
>>>>>> +void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
>>>>>>  {
>>>>>>  	if (flag == F2FS_GET_BLOCK_PRE_AIO) {
>>>>>>  		if (lock)
>>>>>> @@ -1716,6 +1716,8 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
>>>>>>  		return true;
>>>>>>  	if (S_ISDIR(inode->i_mode))
>>>>>>  		return true;
>>>>>> +	if (IS_NOQUOTA(inode))
>>>>>> +		return true;
>>>>>>  	if (f2fs_is_atomic_file(inode))
>>>>>>  		return true;
>>>>>>  	if (fio) {
>>>>>> @@ -1960,7 +1962,7 @@ static int __write_data_page(struct page *page, bool *submitted,
>>>>>>  	}
>>>>>>  
>>>>>>  	unlock_page(page);
>>>>>> -	if (!S_ISDIR(inode->i_mode))
>>>>>> +	if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode))
>>>>>>  		f2fs_balance_fs(sbi, need_balance_fs);
>>>>>>  
>>>>>>  	if (unlikely(f2fs_cp_error(sbi))) {
>>>>>> @@ -2151,6 +2153,8 @@ static inline bool __should_serialize_io(struct inode *inode,
>>>>>>  {
>>>>>>  	if (!S_ISREG(inode->i_mode))
>>>>>>  		return false;
>>>>>> +	if (IS_NOQUOTA(inode))
>>>>>> +		return false;
>>>>>>  	if (wbc->sync_mode != WB_SYNC_ALL)
>>>>>>  		return true;
>>>>>>  	if (get_dirty_pages(inode) >= SM_I(F2FS_I_SB(inode))->min_seq_blocks)
>>>>>> @@ -2180,7 +2184,8 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
>>>>>>  	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
>>>>>>  		goto skip_write;
>>>>>>  
>>>>>> -	if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE &&
>>>>>> +	if ((S_ISDIR(inode->i_mode) || IS_NOQUOTA(inode)) &&
>>>>>> +			wbc->sync_mode == WB_SYNC_NONE &&
>>>>>>  			get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) &&
>>>>>>  			f2fs_available_free_memory(sbi, DIRTY_DENTS))
>>>>>>  		goto skip_write;
>>>>>> @@ -2245,7 +2250,7 @@ static void f2fs_write_failed(struct address_space *mapping, loff_t to)
>>>>>>  		down_write(&F2FS_I(inode)->i_mmap_sem);
>>>>>>  
>>>>>>  		truncate_pagecache(inode, i_size);
>>>>>> -		f2fs_truncate_blocks(inode, i_size, true);
>>>>>> +		f2fs_truncate_blocks(inode, i_size, true, true);
>>>>>>  
>>>>>>  		up_write(&F2FS_I(inode)->i_mmap_sem);
>>>>>>  		up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
>>>>>> @@ -2380,7 +2385,8 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
>>>>>>  	if (err)
>>>>>>  		goto fail;
>>>>>>  
>>>>>> -	if (need_balance && has_not_enough_free_secs(sbi, 0, 0)) {
>>>>>> +	if (need_balance && !IS_NOQUOTA(inode) &&
>>>>>> +			has_not_enough_free_secs(sbi, 0, 0)) {
>>>>>>  		unlock_page(page);
>>>>>>  		f2fs_balance_fs(sbi, true);
>>>>>>  		lock_page(page);
>>>>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>>>>> index 917b2ca76aac..b5e400be73e0 100644
>>>>>> --- a/fs/f2fs/f2fs.h
>>>>>> +++ b/fs/f2fs/f2fs.h
>>>>>> @@ -525,6 +525,9 @@ enum {
>>>>>>  
>>>>>>  #define DEFAULT_RETRY_IO_COUNT	8	/* maximum retry read IO count */
>>>>>>  
>>>>>> +/* maximum retry quota flush count */
>>>>>> +#define DEFAULT_RETRY_QUOTA_FLUSH_COUNT		8
>>>>>> +
>>>>>>  #define F2FS_LINK_MAX	0xffffffff	/* maximum link count per file */
>>>>>>  
>>>>>>  #define MAX_DIR_RA_PAGES	4	/* maximum ra pages of dir */
>>>>>> @@ -1088,6 +1091,9 @@ enum {
>>>>>>  	SBI_NEED_CP,				/* need to checkpoint */
>>>>>>  	SBI_IS_SHUTDOWN,			/* shutdown by ioctl */
>>>>>>  	SBI_IS_RECOVERED,			/* recovered orphan/data */
>>>>>> +	SBI_QUOTA_NEED_FLUSH,			/* need to flush quota info in CP */
>>>>>> +	SBI_QUOTA_SKIP_FLUSH,			/* skip flushing quota in current CP */
>>>>>> +	SBI_QUOTA_NEED_REPAIR,			/* quota file may be corrupted */
>>>>>>  };
>>>>>>  
>>>>>>  enum {
>>>>>> @@ -1891,12 +1897,18 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
>>>>>>  {
>>>>>>  	block_t	valid_block_count;
>>>>>>  	unsigned int valid_node_count;
>>>>>> -	bool quota = inode && !is_inode;
>>>>>> +	int err;
>>>>>>  
>>>>>> -	if (quota) {
>>>>>> -		int ret = dquot_reserve_block(inode, 1);
>>>>>> -		if (ret)
>>>>>> -			return ret;
>>>>>> +	if (is_inode) {
>>>>>> +		if (inode) {
>>>>>> +			err = dquot_alloc_inode(inode);
>>>>>> +			if (err)
>>>>>> +				return err;
>>>>>> +		}
>>>>>> +	} else {
>>>>>> +		err = dquot_reserve_block(inode, 1);
>>>>>> +		if (err)
>>>>>> +			return err;
>>>>>>  	}
>>>>>>  
>>>>>>  	if (time_to_inject(sbi, FAULT_BLOCK)) {
>>>>>> @@ -1938,8 +1950,12 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
>>>>>>  	return 0;
>>>>>>  
>>>>>>  enospc:
>>>>>> -	if (quota)
>>>>>> +	if (is_inode) {
>>>>>> +		if (inode)
>>>>>> +			dquot_free_inode(inode);
>>>>>> +	} else {
>>>>>>  		dquot_release_reservation_block(inode, 1);
>>>>>> +	}
>>>>>>  	return -ENOSPC;
>>>>>>  }
>>>>>>  
>>>>>> @@ -1960,7 +1976,9 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi,
>>>>>>  
>>>>>>  	spin_unlock(&sbi->stat_lock);
>>>>>>  
>>>>>> -	if (!is_inode)
>>>>>> +	if (is_inode)
>>>>>> +		dquot_free_inode(inode);
>>>>>> +	else
>>>>>>  		f2fs_i_blocks_write(inode, 1, false, true);
>>>>>>  }
>>>>>>  
>>>>>> @@ -2739,7 +2757,8 @@ static inline bool is_valid_data_blkaddr(struct f2fs_sb_info *sbi,
>>>>>>   */
>>>>>>  int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);
>>>>>>  void f2fs_truncate_data_blocks(struct dnode_of_data *dn);
>>>>>> -int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock);
>>>>>> +int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock,
>>>>>> +							bool buf_write);
>>>>>>  int f2fs_truncate(struct inode *inode);
>>>>>>  int f2fs_getattr(const struct path *path, struct kstat *stat,
>>>>>>  			u32 request_mask, unsigned int flags);
>>>>>> @@ -2827,6 +2846,7 @@ static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode)
>>>>>>  int f2fs_inode_dirtied(struct inode *inode, bool sync);
>>>>>>  void f2fs_inode_synced(struct inode *inode);
>>>>>>  int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly);
>>>>>> +int f2fs_quota_sync(struct super_block *sb, int type);
>>>>>>  void f2fs_quota_off_umount(struct super_block *sb);
>>>>>>  int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover);
>>>>>>  int f2fs_sync_fs(struct super_block *sb, int sync);
>>>>>> @@ -3025,6 +3045,7 @@ struct page *f2fs_get_lock_data_page(struct inode *inode, pgoff_t index,
>>>>>>  struct page *f2fs_get_new_data_page(struct inode *inode,
>>>>>>  			struct page *ipage, pgoff_t index, bool new_i_size);
>>>>>>  int f2fs_do_write_data_page(struct f2fs_io_info *fio);
>>>>>> +void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock);
>>>>>>  int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
>>>>>>  			int create, int flag);
>>>>>>  int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
>>>>>> @@ -3456,3 +3477,16 @@ extern void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate,
>>>>>>  #endif
>>>>>>  
>>>>>>  #endif
>>>>>> +
>>>>>> +static inline bool is_journalled_quota(struct f2fs_sb_info *sbi)
>>>>>> +{
>>>>>> +#ifdef CONFIG_QUOTA
>>>>>> +	if (f2fs_sb_has_quota_ino(sbi->sb))
>>>>>> +		return true;
>>>>>> +	if (F2FS_OPTION(sbi).s_qf_names[USRQUOTA] ||
>>>>>> +		F2FS_OPTION(sbi).s_qf_names[GRPQUOTA] ||
>>>>>> +		F2FS_OPTION(sbi).s_qf_names[PRJQUOTA])
>>>>>> +		return true;
>>>>>> +#endif
>>>>>> +	return false;
>>>>>> +}
>>>>>> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
>>>>>> index 357422a4c319..a75f3e145bf1 100644
>>>>>> --- a/fs/f2fs/file.c
>>>>>> +++ b/fs/f2fs/file.c
>>>>>> @@ -586,7 +586,8 @@ static int truncate_partial_data_page(struct inode *inode, u64 from,
>>>>>>  	return 0;
>>>>>>  }
>>>>>>  
>>>>>> -int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock)
>>>>>> +int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock,
>>>>>> +							bool buf_write)
>>>>>>  {
>>>>>>  	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
>>>>>>  	struct dnode_of_data dn;
>>>>>> @@ -594,6 +595,7 @@ int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock)
>>>>>>  	int count = 0, err = 0;
>>>>>>  	struct page *ipage;
>>>>>>  	bool truncate_page = false;
>>>>>> +	int flag = buf_write ? F2FS_GET_BLOCK_PRE_AIO : F2FS_GET_BLOCK_PRE_DIO;
>>>>>>  
>>>>>>  	trace_f2fs_truncate_blocks_enter(inode, from);
>>>>>>  
>>>>>> @@ -603,7 +605,7 @@ int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock)
>>>>>>  		goto free_partial;
>>>>>>  
>>>>>>  	if (lock)
>>>>>> -		f2fs_lock_op(sbi);
>>>>>> +		__do_map_lock(sbi, flag, true);
>>>>>>  
>>>>>>  	ipage = f2fs_get_node_page(sbi, inode->i_ino);
>>>>>>  	if (IS_ERR(ipage)) {
>>>>>> @@ -641,7 +643,7 @@ int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock)
>>>>>>  	err = f2fs_truncate_inode_blocks(inode, free_from);
>>>>>>  out:
>>>>>>  	if (lock)
>>>>>> -		f2fs_unlock_op(sbi);
>>>>>> +		__do_map_lock(sbi, flag, false);
>>>>>>  free_partial:
>>>>>>  	/* lastly zero out the first data page */
>>>>>>  	if (!err)
>>>>>> @@ -676,7 +678,7 @@ int f2fs_truncate(struct inode *inode)
>>>>>>  			return err;
>>>>>>  	}
>>>>>>  
>>>>>> -	err = f2fs_truncate_blocks(inode, i_size_read(inode), true);
>>>>>> +	err = f2fs_truncate_blocks(inode, i_size_read(inode), true, false);
>>>>>>  	if (err)
>>>>>>  		return err;
>>>>>>  
>>>>>> @@ -785,9 +787,24 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
>>>>>>  		!uid_eq(attr->ia_uid, inode->i_uid)) ||
>>>>>>  		(attr->ia_valid & ATTR_GID &&
>>>>>>  		!gid_eq(attr->ia_gid, inode->i_gid))) {
>>>>>> +		f2fs_lock_op(F2FS_I_SB(inode));
>>>>>>  		err = dquot_transfer(inode, attr);
>>>>>> -		if (err)
>>>>>> +		if (err) {
>>>>>> +			set_sbi_flag(F2FS_I_SB(inode),
>>>>>> +					SBI_QUOTA_NEED_REPAIR);
>>>>>> +			f2fs_unlock_op(F2FS_I_SB(inode));
>>>>>>  			return err;
>>>>>> +		}
>>>>>> +		/*
>>>>>> +		 * update uid/gid under lock_op(), so that dquot and inode can
>>>>>> +		 * be updated atomically.
>>>>>> +		 */
>>>>>> +		if (attr->ia_valid & ATTR_UID)
>>>>>> +			inode->i_uid = attr->ia_uid;
>>>>>> +		if (attr->ia_valid & ATTR_GID)
>>>>>> +			inode->i_gid = attr->ia_gid;
>>>>>> +		f2fs_mark_inode_dirty_sync(inode, true);
>>>>>> +		f2fs_unlock_op(F2FS_I_SB(inode));
>>>>>>  	}
>>>>>>  
>>>>>>  	if (attr->ia_valid & ATTR_SIZE) {
>>>>>> @@ -1242,7 +1259,7 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
>>>>>>  	new_size = i_size_read(inode) - len;
>>>>>>  	truncate_pagecache(inode, new_size);
>>>>>>  
>>>>>> -	ret = f2fs_truncate_blocks(inode, new_size, true);
>>>>>> +	ret = f2fs_truncate_blocks(inode, new_size, true, false);
>>>>>>  	up_write(&F2FS_I(inode)->i_mmap_sem);
>>>>>>  	if (!ret)
>>>>>>  		f2fs_i_size_write(inode, new_size);
>>>>>> @@ -1427,7 +1444,7 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
>>>>>>  	f2fs_balance_fs(sbi, true);
>>>>>>  
>>>>>>  	down_write(&F2FS_I(inode)->i_mmap_sem);
>>>>>> -	ret = f2fs_truncate_blocks(inode, i_size_read(inode), true);
>>>>>> +	ret = f2fs_truncate_blocks(inode, i_size_read(inode), true, false);
>>>>>>  	up_write(&F2FS_I(inode)->i_mmap_sem);
>>>>>>  	if (ret)
>>>>>>  		return ret;
>>>>>> diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
>>>>>> index 425d740f87fd..cb31a719b048 100644
>>>>>> --- a/fs/f2fs/inline.c
>>>>>> +++ b/fs/f2fs/inline.c
>>>>>> @@ -298,7 +298,7 @@ bool f2fs_recover_inline_data(struct inode *inode, struct page *npage)
>>>>>>  		clear_inode_flag(inode, FI_INLINE_DATA);
>>>>>>  		f2fs_put_page(ipage, 1);
>>>>>>  	} else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) {
>>>>>> -		if (f2fs_truncate_blocks(inode, 0, false))
>>>>>> +		if (f2fs_truncate_blocks(inode, 0, false, false))
>>>>>>  			return false;
>>>>>>  		goto process_inline;
>>>>>>  	}
>>>>>> @@ -470,7 +470,7 @@ static int f2fs_add_inline_entries(struct inode *dir, void *inline_dentry)
>>>>>>  	return 0;
>>>>>>  punch_dentry_pages:
>>>>>>  	truncate_inode_pages(&dir->i_data, 0);
>>>>>> -	f2fs_truncate_blocks(dir, 0, false);
>>>>>> +	f2fs_truncate_blocks(dir, 0, false, false);
>>>>>>  	f2fs_remove_dirty_inode(dir);
>>>>>>  	return err;
>>>>>>  }
>>>>>> diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
>>>>>> index 86e7333d60c1..3c278e63d1a3 100644
>>>>>> --- a/fs/f2fs/inode.c
>>>>>> +++ b/fs/f2fs/inode.c
>>>>>> @@ -645,7 +645,11 @@ void f2fs_evict_inode(struct inode *inode)
>>>>>>  	if (inode->i_nlink || is_bad_inode(inode))
>>>>>>  		goto no_delete;
>>>>>>  
>>>>>> -	dquot_initialize(inode);
>>>>>> +	err = dquot_initialize(inode);
>>>>>> +	if (err) {
>>>>>> +		err = 0;
>>>>>> +		set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
>>>>>> +	}
>>>>>>  
>>>>>>  	f2fs_remove_ino_entry(sbi, inode->i_ino, APPEND_INO);
>>>>>>  	f2fs_remove_ino_entry(sbi, inode->i_ino, UPDATE_INO);
>>>>>> @@ -677,9 +681,10 @@ void f2fs_evict_inode(struct inode *inode)
>>>>>>  		goto retry;
>>>>>>  	}
>>>>>>  
>>>>>> -	if (err)
>>>>>> +	if (err) {
>>>>>>  		f2fs_update_inode_page(inode);
>>>>>> -	dquot_free_inode(inode);
>>>>>> +		set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
>>>>>> +	}
>>>>>>  	sb_end_intwrite(inode->i_sb);
>>>>>>  no_delete:
>>>>>>  	dquot_drop(inode);
>>>>>> diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
>>>>>> index 9ad451ac2cec..b65491a63115 100644
>>>>>> --- a/fs/f2fs/namei.c
>>>>>> +++ b/fs/f2fs/namei.c
>>>>>> @@ -71,10 +71,6 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
>>>>>>  	if (err)
>>>>>>  		goto fail_drop;
>>>>>>  
>>>>>> -	err = dquot_alloc_inode(inode);
>>>>>> -	if (err)
>>>>>> -		goto fail_drop;
>>>>>> -
>>>>>>  	set_inode_flag(inode, FI_NEW_INODE);
>>>>>>  
>>>>>>  	/* If the directory encrypted, then we should encrypt the inode. */
>>>>>> diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
>>>>>> index 41f2c0fe6d8e..70f05650191e 100644
>>>>>> --- a/fs/f2fs/recovery.c
>>>>>> +++ b/fs/f2fs/recovery.c
>>>>>> @@ -191,6 +191,33 @@ static int recover_dentry(struct inode *inode, struct page *ipage,
>>>>>>  	return err;
>>>>>>  }
>>>>>>  
>>>>>> +static int recover_quota_data(struct inode *inode, struct page *page)
>>>>>> +{
>>>>>> +	struct f2fs_inode *raw = F2FS_INODE(page);
>>>>>> +	struct iattr attr;
>>>>>> +	uid_t i_uid = le32_to_cpu(raw->i_uid);
>>>>>> +	gid_t i_gid = le32_to_cpu(raw->i_gid);
>>>>>> +	int err;
>>>>>> +
>>>>>> +	memset(&attr, 0, sizeof(attr));
>>>>>> +
>>>>>> +	attr.ia_uid = make_kuid(inode->i_sb->s_user_ns, i_uid);
>>>>>> +	attr.ia_gid = make_kgid(inode->i_sb->s_user_ns, i_gid);
>>>>>> +
>>>>>> +	if (!uid_eq(attr.ia_uid, inode->i_uid))
>>>>>> +		attr.ia_valid |= ATTR_UID;
>>>>>> +	if (!gid_eq(attr.ia_gid, inode->i_gid))
>>>>>> +		attr.ia_valid |= ATTR_GID;
>>>>>> +
>>>>>> +	if (!attr.ia_valid)
>>>>>> +		return 0;
>>>>>> +
>>>>>> +	err = dquot_transfer(inode, &attr);
>>>>>> +	if (err)
>>>>>> +		set_sbi_flag(F2FS_I_SB(inode), SBI_QUOTA_NEED_REPAIR);
>>>>>> +	return err;
>>>>>> +}
>>>>>> +
>>>>>>  static void recover_inline_flags(struct inode *inode, struct f2fs_inode *ri)
>>>>>>  {
>>>>>>  	if (ri->i_inline & F2FS_PIN_FILE)
>>>>>> @@ -203,12 +230,18 @@ static void recover_inline_flags(struct inode *inode, struct f2fs_inode *ri)
>>>>>>  		clear_inode_flag(inode, FI_DATA_EXIST);
>>>>>>  }
>>>>>>  
>>>>>> -static void recover_inode(struct inode *inode, struct page *page)
>>>>>> +static int recover_inode(struct inode *inode, struct page *page)
>>>>>>  {
>>>>>>  	struct f2fs_inode *raw = F2FS_INODE(page);
>>>>>>  	char *name;
>>>>>> +	int err;
>>>>>>  
>>>>>>  	inode->i_mode = le16_to_cpu(raw->i_mode);
>>>>>> +
>>>>>> +	err = recover_quota_data(inode, page);
>>>>>> +	if (err)
>>>>>> +		return err;
>>>>>> +
>>>>>>  	i_uid_write(inode, le32_to_cpu(raw->i_uid));
>>>>>>  	i_gid_write(inode, le32_to_cpu(raw->i_gid));
>>>>>>  	f2fs_i_size_write(inode, le64_to_cpu(raw->i_size));
>>>>>> @@ -231,6 +264,7 @@ static void recover_inode(struct inode *inode, struct page *page)
>>>>>>  	f2fs_msg(inode->i_sb, KERN_NOTICE,
>>>>>>  		"recover_inode: ino = %x, name = %s, inline = %x",
>>>>>>  			ino_of_node(page), name, raw->i_inline);
>>>>>> +	return 0;
>>>>>>  }
>>>>>>  
>>>>>>  static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
>>>>>> @@ -597,8 +631,11 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
>>>>>>  		 * In this case, we can lose the latest inode(x).
>>>>>>  		 * So, call recover_inode for the inode update.
>>>>>>  		 */
>>>>>> -		if (IS_INODE(page))
>>>>>> -			recover_inode(entry->inode, page);
>>>>>> +		if (IS_INODE(page)) {
>>>>>> +			err = recover_inode(entry->inode, page);
>>>>>> +			if (err)
>>>>>> +				break;
>>>>>> +		}
>>>>>>  		if (entry->last_dentry == blkaddr) {
>>>>>>  			err = recover_dentry(entry->inode, page, dir_list);
>>>>>>  			if (err) {
>>>>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>>>>>> index 945468968d4e..3a46c9b81188 100644
>>>>>> --- a/fs/f2fs/super.c
>>>>>> +++ b/fs/f2fs/super.c
>>>>>> @@ -1689,6 +1689,13 @@ static qsize_t *f2fs_get_reserved_space(struct inode *inode)
>>>>>>  
>>>>>>  static int f2fs_quota_on_mount(struct f2fs_sb_info *sbi, int type)
>>>>>>  {
>>>>>> +
>>>>>> +	if (is_set_ckpt_flags(sbi, CP_QUOTA_NEED_FSCK_FLAG)) {
>>>>>> +		f2fs_msg(sbi->sb, KERN_ERR,
>>>>>> +			"quota sysfile may be corrupted, skip loading it");
>>>>>> +		return 0;
>>>>>> +	}
>>>>>> +
>>>>>>  	return dquot_quota_on_mount(sbi->sb, F2FS_OPTION(sbi).s_qf_names[type],
>>>>>>  					F2FS_OPTION(sbi).s_jquota_fmt, type);
>>>>>>  }
>>>>>> @@ -1759,7 +1766,14 @@ static int f2fs_enable_quotas(struct super_block *sb)
>>>>>>  		test_opt(F2FS_SB(sb), PRJQUOTA),
>>>>>>  	};
>>>>>>  
>>>>>> -	sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE | DQUOT_NOLIST_DIRTY;
>>>>>> +	if (is_set_ckpt_flags(F2FS_SB(sb), CP_QUOTA_NEED_FSCK_FLAG)) {
>>>>>> +		f2fs_msg(sb, KERN_ERR,
>>>>>> +			"quota file may be corrupted, skip loading it");
>>>>>> +		return 0;
>>>>>> +	}
>>>>>> +
>>>>>> +	sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
>>>>>> +
>>>>>>  	for (type = 0; type < MAXQUOTAS; type++) {
>>>>>>  		qf_inum = f2fs_qf_ino(sb, type);
>>>>>>  		if (qf_inum) {
>>>>>> @@ -1773,6 +1787,8 @@ static int f2fs_enable_quotas(struct super_block *sb)
>>>>>>  					"fsck to fix.", type, err);
>>>>>>  				for (type--; type >= 0; type--)
>>>>>>  					dquot_quota_off(sb, type);
>>>>>> +				set_sbi_flag(F2FS_SB(sb),
>>>>>> +						SBI_QUOTA_NEED_REPAIR);
>>>>>>  				return err;
>>>>>>  			}
>>>>>>  		}
>>>>>> @@ -1780,35 +1796,51 @@ static int f2fs_enable_quotas(struct super_block *sb)
>>>>>>  	return 0;
>>>>>>  }
>>>>>>  
>>>>>> -static int f2fs_quota_sync(struct super_block *sb, int type)
>>>>>> +int f2fs_quota_sync(struct super_block *sb, int type)
>>>>>>  {
>>>>>> +	struct f2fs_sb_info *sbi = F2FS_SB(sb);
>>>>>>  	struct quota_info *dqopt = sb_dqopt(sb);
>>>>>>  	int cnt;
>>>>>>  	int ret;
>>>>>>  
>>>>>>  	ret = dquot_writeback_dquots(sb, type);
>>>>>>  	if (ret)
>>>>>> -		return ret;
>>>>>> +		goto out;
>>>>>>  
>>>>>>  	/*
>>>>>>  	 * Now when everything is written we can discard the pagecache so
>>>>>>  	 * that userspace sees the changes.
>>>>>>  	 */
>>>>>>  	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
>>>>>> +		struct address_space *mapping;
>>>>>> +
>>>>>>  		if (type != -1 && cnt != type)
>>>>>>  			continue;
>>>>>>  		if (!sb_has_quota_active(sb, cnt))
>>>>>>  			continue;
>>>>>>  
>>>>>> -		ret = filemap_write_and_wait(dqopt->files[cnt]->i_mapping);
>>>>>> +		mapping = dqopt->files[cnt]->i_mapping;
>>>>>> +
>>>>>> +		ret = filemap_fdatawrite(mapping);
>>>>>> +		if (ret)
>>>>>> +			goto out;
>>>>>> +
>>>>>> +		/* if we are using journalled quota */
>>>>>> +		if (is_journalled_quota(sbi))
>>>>>> +			continue;
>>>>>> +
>>>>>> +		ret = filemap_fdatawait(mapping);
>>>>>>  		if (ret)
>>>>>> -			return ret;
>>>>>> +			set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
>>>>>>  
>>>>>>  		inode_lock(dqopt->files[cnt]);
>>>>>>  		truncate_inode_pages(&dqopt->files[cnt]->i_data, 0);
>>>>>>  		inode_unlock(dqopt->files[cnt]);
>>>>>>  	}
>>>>>> -	return 0;
>>>>>> +out:
>>>>>> +	if (ret)
>>>>>> +		set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
>>>>>> +	return ret;
>>>>>>  }
>>>>>>  
>>>>>>  static int f2fs_quota_on(struct super_block *sb, int type, int format_id,
>>>>>> @@ -1877,11 +1909,67 @@ void f2fs_quota_off_umount(struct super_block *sb)
>>>>>>  				"Fail to turn off disk quota "
>>>>>>  				"(type: %d, err: %d, ret:%d), Please "
>>>>>>  				"run fsck to fix it.", type, err, ret);
>>>>>> -			set_sbi_flag(F2FS_SB(sb), SBI_NEED_FSCK);
>>>>>> +			set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
>>>>>>  		}
>>>>>>  	}
>>>>>>  }
>>>>>>  
>>>>>> +static int f2fs_dquot_commit(struct dquot *dquot)
>>>>>> +{
>>>>>> +	int ret;
>>>>>> +
>>>>>> +	ret = dquot_commit(dquot);
>>>>>> +	if (ret < 0)
>>>>>> +		set_sbi_flag(F2FS_SB(dquot->dq_sb), SBI_QUOTA_NEED_REPAIR);
>>>>>> +	return ret;
>>>>>> +}
>>>>>> +
>>>>>> +static int f2fs_dquot_acquire(struct dquot *dquot)
>>>>>> +{
>>>>>> +	int ret;
>>>>>> +
>>>>>> +	ret = dquot_acquire(dquot);
>>>>>> +	if (ret < 0)
>>>>>> +		set_sbi_flag(F2FS_SB(dquot->dq_sb), SBI_QUOTA_NEED_REPAIR);
>>>>>> +
>>>>>> +	return ret;
>>>>>> +}
>>>>>> +
>>>>>> +static int f2fs_dquot_release(struct dquot *dquot)
>>>>>> +{
>>>>>> +	int ret;
>>>>>> +
>>>>>> +	ret = dquot_release(dquot);
>>>>>> +	if (ret < 0)
>>>>>> +		set_sbi_flag(F2FS_SB(dquot->dq_sb), SBI_QUOTA_NEED_REPAIR);
>>>>>> +	return ret;
>>>>>> +}
>>>>>> +
>>>>>> +static int f2fs_dquot_mark_dquot_dirty(struct dquot *dquot)
>>>>>> +{
>>>>>> +	struct super_block *sb = dquot->dq_sb;
>>>>>> +	struct f2fs_sb_info *sbi = F2FS_SB(sb);
>>>>>> +	int ret;
>>>>>> +
>>>>>> +	ret = dquot_mark_dquot_dirty(dquot);
>>>>>> +
>>>>>> +	/* if we are using journalled quota */
>>>>>> +	if (is_journalled_quota(sbi))
>>>>>> +		set_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH);
>>>>>> +
>>>>>> +	return ret;
>>>>>> +}
>>>>>> +
>>>>>> +static int f2fs_dquot_commit_info(struct super_block *sb, int type)
>>>>>> +{
>>>>>> +	int ret;
>>>>>> +
>>>>>> +	ret = dquot_commit_info(sb, type);
>>>>>> +	if (ret < 0)
>>>>>> +		set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
>>>>>> +	return ret;
>>>>>> +}
>>>>>> +
>>>>>>  static int f2fs_get_projid(struct inode *inode, kprojid_t *projid)
>>>>>>  {
>>>>>>  	*projid = F2FS_I(inode)->i_projid;
>>>>>> @@ -1890,11 +1978,11 @@ static int f2fs_get_projid(struct inode *inode, kprojid_t *projid)
>>>>>>  
>>>>>>  static const struct dquot_operations f2fs_quota_operations = {
>>>>>>  	.get_reserved_space = f2fs_get_reserved_space,
>>>>>> -	.write_dquot	= dquot_commit,
>>>>>> -	.acquire_dquot	= dquot_acquire,
>>>>>> -	.release_dquot	= dquot_release,
>>>>>> -	.mark_dirty	= dquot_mark_dquot_dirty,
>>>>>> -	.write_info	= dquot_commit_info,
>>>>>> +	.write_dquot	= f2fs_dquot_commit,
>>>>>> +	.acquire_dquot	= f2fs_dquot_acquire,
>>>>>> +	.release_dquot	= f2fs_dquot_release,
>>>>>> +	.mark_dirty	= f2fs_dquot_mark_dquot_dirty,
>>>>>> +	.write_info	= f2fs_dquot_commit_info,
>>>>>>  	.alloc_dquot	= dquot_alloc,
>>>>>>  	.destroy_dquot	= dquot_destroy,
>>>>>>  	.get_projid	= f2fs_get_projid,
>>>>>> @@ -1912,6 +2000,11 @@ static const struct quotactl_ops f2fs_quotactl_ops = {
>>>>>>  	.get_nextdqblk	= dquot_get_next_dqblk,
>>>>>>  };
>>>>>>  #else
>>>>>> +int f2fs_quota_sync(struct super_block *sb, int type)
>>>>>> +{
>>>>>> +	return 0;
>>>>>> +}
>>>>>> +
>>>>>>  void f2fs_quota_off_umount(struct super_block *sb)
>>>>>>  {
>>>>>>  }
>>>>>> @@ -2951,6 +3044,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>>>>>>  		goto free_meta_inode;
>>>>>>  	}
>>>>>>  
>>>>>> +	if (__is_set_ckpt_flags(F2FS_CKPT(sbi), CP_QUOTA_NEED_FSCK_FLAG))
>>>>>> +		set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
>>>>>> +
>>>>>>  	/* Initialize device list */
>>>>>>  	err = f2fs_scan_devices(sbi);
>>>>>>  	if (err) {
>>>>>> diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
>>>>>> index 1d4b196291d6..aaca9f5d5f5c 100644
>>>>>> --- a/include/linux/f2fs_fs.h
>>>>>> +++ b/include/linux/f2fs_fs.h
>>>>>> @@ -115,6 +115,7 @@ struct f2fs_super_block {
>>>>>>  /*
>>>>>>   * For checkpoint
>>>>>>   */
>>>>>> +#define CP_QUOTA_NEED_FSCK_FLAG		0x00000800
>>>>>>  #define CP_LARGE_NAT_BITMAP_FLAG	0x00000400
>>>>>>  #define CP_NOCRC_RECOVERY_FLAG	0x00000200
>>>>>>  #define CP_TRIMMED_FLAG		0x00000100
>>>>>> -- 
>>>>>> 2.18.0

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v11] f2fs: guarantee journalled quota data by checkpoint
  2018-10-01  2:04             ` Chao Yu
  (?)
@ 2018-10-02 16:45             ` Jaegeuk Kim
  2018-10-17  3:30                 ` Chao Yu
  -1 siblings, 1 reply; 18+ messages in thread
From: Jaegeuk Kim @ 2018-10-02 16:45 UTC (permalink / raw)
  To: Chao Yu; +Cc: linux-f2fs-devel, linux-kernel, Chao Yu, Weichao Guo

On 10/01, Chao Yu wrote:
> On 2018-10-1 9:49, Jaegeuk Kim wrote:
> > On 10/01, Chao Yu wrote:
> >> On 2018-10-1 9:29, Jaegeuk Kim wrote:
> >>> On 10/01, Chao Yu wrote:
> >>>> Hi Jaegeuk,
> >>>>
> >>>> On 2018-10-1 8:06, Jaegeuk Kim wrote:
> >>>>> Hi Chao,
> >>>>>
> >>>>> This fails on fsstress with godown without fault injection. Could you please
> >>>>> test a bit? I assumed that this patch should give no fsck failure along with
> >>>>> valid checkpoint having no flag.
> >>>>
> >>>> Okay, let me reproduce with that case.
> >>>>
> >>>>>
> >>>>> BTW, I'm in doubt that f2fs_lock_all covers entire quota modification. What
> >>>>> about prepare_write_begin() -> f2fs_get_block() ... -> inc_valid_block_count()?
> >>>>
> >>>> If quota data changed in above path, we will detect that in below condition:
> >>>>
> >>>> block_operation()
> >>>>
> >>>> 	down_write(&sbi->node_change);
> >>>>
> >>>> 	if (__need_flush_quota(sbi)) {
> >>>> 		up_write(&sbi->node_change);
> >>>> 		f2fs_unlock_all(sbi);
> >>>> 		goto retry_flush_quotas;
> >>>> 	}
> >>>>
> >>>> So there is no problem?
> >>>
> >>> We may need to check quota is dirty, since we have no way to detect by
> >>> f2fs structures?
> >>
> >> Below condition can check that.
> >>
> >> static bool __need_flush_quota(struct f2fs_sb_info *sbi)
> >> {
> >> ...
> >> 	if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_FLUSH))
> >> 		return true;
> >> 	if (get_pages(sbi, F2FS_DIRTY_QDATA))
> >> 		return true;
> >> ...
> >> }
> >>
> >> static int f2fs_dquot_mark_dquot_dirty(struct dquot *dquot)
> >> {
> >> ...
> >> 	ret = dquot_mark_dquot_dirty(dquot);
> >>
> >> 	/* if we are using journalled quota */
> >> 	if (is_journalled_quota(sbi))
> >> 		set_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH);
> >> ...
> >> }
> > 
> > Okay, then, could you please run the above stress test to reproduce this?
> 
> Sure, let me try this case and fix it.
> 
> Could you check other patches in mailing list, and test them instead?

With the below change, the test result is much better for now.
Let me know, if you have further concern.

---
 fs/f2fs/checkpoint.c | 6 ++++++
 fs/f2fs/super.c      | 4 +++-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index a1facfbfc5c7..b111c6201023 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -1111,6 +1111,8 @@ static int block_operations(struct f2fs_sb_info *sbi)
 
 retry_flush_quotas:
 	if (__need_flush_quota(sbi)) {
+		int locked;
+
 		if (++cnt > DEFAULT_RETRY_QUOTA_FLUSH_COUNT) {
 			set_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH);
 			f2fs_lock_all(sbi);
@@ -1118,7 +1120,11 @@ static int block_operations(struct f2fs_sb_info *sbi)
 		}
 		clear_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH);
 
+		/* only failed during mount/umount/freeze/quotactl */
+		locked = down_read_trylock(&sbi->sb->s_umount);
 		f2fs_quota_sync(sbi->sb, -1);
+		if (locked)
+			up_read(&sbi->sb->s_umount);
 	}
 
 	f2fs_lock_all(sbi);
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index a28c245b1288..b39f60d57120 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1706,6 +1706,7 @@ static ssize_t f2fs_quota_read(struct super_block *sb, int type, char *data,
 				congestion_wait(BLK_RW_ASYNC, HZ/50);
 				goto repeat;
 			}
+			set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
 			return PTR_ERR(page);
 		}
 
@@ -1717,6 +1718,7 @@ static ssize_t f2fs_quota_read(struct super_block *sb, int type, char *data,
 		}
 		if (unlikely(!PageUptodate(page))) {
 			f2fs_put_page(page, 1);
+			set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
 			return -EIO;
 		}
 
@@ -1758,6 +1760,7 @@ static ssize_t f2fs_quota_write(struct super_block *sb, int type,
 				congestion_wait(BLK_RW_ASYNC, HZ/50);
 				goto retry;
 			}
+			set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
 			break;
 		}
 
@@ -1794,7 +1797,6 @@ static qsize_t *f2fs_get_reserved_space(struct inode *inode)
 
 static int f2fs_quota_on_mount(struct f2fs_sb_info *sbi, int type)
 {
-
 	if (is_set_ckpt_flags(sbi, CP_QUOTA_NEED_FSCK_FLAG)) {
 		f2fs_msg(sbi->sb, KERN_ERR,
 			"quota sysfile may be corrupted, skip loading it");
-- 
2.19.0.605.g01d371f741-goog


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH v11] f2fs: guarantee journalled quota data by checkpoint
  2018-10-02 16:45             ` Jaegeuk Kim
@ 2018-10-17  3:30                 ` Chao Yu
  0 siblings, 0 replies; 18+ messages in thread
From: Chao Yu @ 2018-10-17  3:30 UTC (permalink / raw)
  To: Jaegeuk Kim, Chao Yu; +Cc: linux-f2fs-devel, linux-kernel, Weichao Guo

Jaegeuk,

Sorry for the long delay, I'm busy on other thing.

I'm trying your fixing code on both fsck and kernel with 'run.sh
por_fsstress' case.

And got below output, is that normal in updated fsck? I didn't have time to
look into this.

Info: checkpoint state = 8c6 :  quota_need_fsck nat_bits crc
compacted_summary orphan_inodes sudden-power-off
[fsck_chk_quota_files:1755] Fixing Quota file ([  0] ino [0x4])
[ERROR] quotaio_tree.c:83:write_blk:: Cannot write block (1320):
Inappropriate ioctl for device
[ERROR] quotaio_tree.c:110:get_free_dqblk:: Cannot allocate new quota block
(out of disk space).
[ERROR] quotaio_tree.c:315:dq_insert_tree:: Cannot write quota (id
67368348): Inappropriate ioctl for device
[fsck_chk_quota_files:1755] Fixing Quota file ([  1] ino [0x5])
[ERROR] quotaio_tree.c:83:write_blk:: Cannot write block (1332):
Inappropriate ioctl for device
[ERROR] quotaio_tree.c:110:get_free_dqblk:: Cannot allocate new quota block
(out of disk space).
[ERROR] quotaio_tree.c:315:dq_insert_tree:: Cannot write quota (id
73435216): Inappropriate ioctl for device

Thanks,


On 2018/10/3 0:45, Jaegeuk Kim wrote:
> On 10/01, Chao Yu wrote:
>> On 2018-10-1 9:49, Jaegeuk Kim wrote:
>>> On 10/01, Chao Yu wrote:
>>>> On 2018-10-1 9:29, Jaegeuk Kim wrote:
>>>>> On 10/01, Chao Yu wrote:
>>>>>> Hi Jaegeuk,
>>>>>>
>>>>>> On 2018-10-1 8:06, Jaegeuk Kim wrote:
>>>>>>> Hi Chao,
>>>>>>>
>>>>>>> This fails on fsstress with godown without fault injection. Could you please
>>>>>>> test a bit? I assumed that this patch should give no fsck failure along with
>>>>>>> valid checkpoint having no flag.
>>>>>>
>>>>>> Okay, let me reproduce with that case.
>>>>>>
>>>>>>>
>>>>>>> BTW, I'm in doubt that f2fs_lock_all covers entire quota modification. What
>>>>>>> about prepare_write_begin() -> f2fs_get_block() ... -> inc_valid_block_count()?
>>>>>>
>>>>>> If quota data changed in above path, we will detect that in below condition:
>>>>>>
>>>>>> block_operation()
>>>>>>
>>>>>> 	down_write(&sbi->node_change);
>>>>>>
>>>>>> 	if (__need_flush_quota(sbi)) {
>>>>>> 		up_write(&sbi->node_change);
>>>>>> 		f2fs_unlock_all(sbi);
>>>>>> 		goto retry_flush_quotas;
>>>>>> 	}
>>>>>>
>>>>>> So there is no problem?
>>>>>
>>>>> We may need to check quota is dirty, since we have no way to detect by
>>>>> f2fs structures?
>>>>
>>>> Below condition can check that.
>>>>
>>>> static bool __need_flush_quota(struct f2fs_sb_info *sbi)
>>>> {
>>>> ...
>>>> 	if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_FLUSH))
>>>> 		return true;
>>>> 	if (get_pages(sbi, F2FS_DIRTY_QDATA))
>>>> 		return true;
>>>> ...
>>>> }
>>>>
>>>> static int f2fs_dquot_mark_dquot_dirty(struct dquot *dquot)
>>>> {
>>>> ...
>>>> 	ret = dquot_mark_dquot_dirty(dquot);
>>>>
>>>> 	/* if we are using journalled quota */
>>>> 	if (is_journalled_quota(sbi))
>>>> 		set_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH);
>>>> ...
>>>> }
>>>
>>> Okay, then, could you please run the above stress test to reproduce this?
>>
>> Sure, let me try this case and fix it.
>>
>> Could you check other patches in mailing list, and test them instead?
> 
> With the below change, the test result is much better for now.
> Let me know, if you have further concern.
> 
> ---
>  fs/f2fs/checkpoint.c | 6 ++++++
>  fs/f2fs/super.c      | 4 +++-
>  2 files changed, 9 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> index a1facfbfc5c7..b111c6201023 100644
> --- a/fs/f2fs/checkpoint.c
> +++ b/fs/f2fs/checkpoint.c
> @@ -1111,6 +1111,8 @@ static int block_operations(struct f2fs_sb_info *sbi)
>  
>  retry_flush_quotas:
>  	if (__need_flush_quota(sbi)) {
> +		int locked;
> +
>  		if (++cnt > DEFAULT_RETRY_QUOTA_FLUSH_COUNT) {
>  			set_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH);
>  			f2fs_lock_all(sbi);
> @@ -1118,7 +1120,11 @@ static int block_operations(struct f2fs_sb_info *sbi)
>  		}
>  		clear_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH);
>  
> +		/* only failed during mount/umount/freeze/quotactl */
> +		locked = down_read_trylock(&sbi->sb->s_umount);
>  		f2fs_quota_sync(sbi->sb, -1);
> +		if (locked)
> +			up_read(&sbi->sb->s_umount);
>  	}
>  
>  	f2fs_lock_all(sbi);
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index a28c245b1288..b39f60d57120 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -1706,6 +1706,7 @@ static ssize_t f2fs_quota_read(struct super_block *sb, int type, char *data,
>  				congestion_wait(BLK_RW_ASYNC, HZ/50);
>  				goto repeat;
>  			}
> +			set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
>  			return PTR_ERR(page);
>  		}
>  
> @@ -1717,6 +1718,7 @@ static ssize_t f2fs_quota_read(struct super_block *sb, int type, char *data,
>  		}
>  		if (unlikely(!PageUptodate(page))) {
>  			f2fs_put_page(page, 1);
> +			set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
>  			return -EIO;
>  		}
>  
> @@ -1758,6 +1760,7 @@ static ssize_t f2fs_quota_write(struct super_block *sb, int type,
>  				congestion_wait(BLK_RW_ASYNC, HZ/50);
>  				goto retry;
>  			}
> +			set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
>  			break;
>  		}
>  
> @@ -1794,7 +1797,6 @@ static qsize_t *f2fs_get_reserved_space(struct inode *inode)
>  
>  static int f2fs_quota_on_mount(struct f2fs_sb_info *sbi, int type)
>  {
> -
>  	if (is_set_ckpt_flags(sbi, CP_QUOTA_NEED_FSCK_FLAG)) {
>  		f2fs_msg(sbi->sb, KERN_ERR,
>  			"quota sysfile may be corrupted, skip loading it");
> 


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v11] f2fs: guarantee journalled quota data by checkpoint
@ 2018-10-17  3:30                 ` Chao Yu
  0 siblings, 0 replies; 18+ messages in thread
From: Chao Yu @ 2018-10-17  3:30 UTC (permalink / raw)
  To: Jaegeuk Kim, Chao Yu; +Cc: linux-f2fs-devel, linux-kernel, Weichao Guo

Jaegeuk,

Sorry for the long delay, I'm busy on other thing.

I'm trying your fixing code on both fsck and kernel with 'run.sh
por_fsstress' case.

And got below output, is that normal in updated fsck? I didn't have time to
look into this.

Info: checkpoint state = 8c6 :  quota_need_fsck nat_bits crc
compacted_summary orphan_inodes sudden-power-off
[fsck_chk_quota_files:1755] Fixing Quota file ([  0] ino [0x4])
[ERROR] quotaio_tree.c:83:write_blk:: Cannot write block (1320):
Inappropriate ioctl for device
[ERROR] quotaio_tree.c:110:get_free_dqblk:: Cannot allocate new quota block
(out of disk space).
[ERROR] quotaio_tree.c:315:dq_insert_tree:: Cannot write quota (id
67368348): Inappropriate ioctl for device
[fsck_chk_quota_files:1755] Fixing Quota file ([  1] ino [0x5])
[ERROR] quotaio_tree.c:83:write_blk:: Cannot write block (1332):
Inappropriate ioctl for device
[ERROR] quotaio_tree.c:110:get_free_dqblk:: Cannot allocate new quota block
(out of disk space).
[ERROR] quotaio_tree.c:315:dq_insert_tree:: Cannot write quota (id
73435216): Inappropriate ioctl for device

Thanks,


On 2018/10/3 0:45, Jaegeuk Kim wrote:
> On 10/01, Chao Yu wrote:
>> On 2018-10-1 9:49, Jaegeuk Kim wrote:
>>> On 10/01, Chao Yu wrote:
>>>> On 2018-10-1 9:29, Jaegeuk Kim wrote:
>>>>> On 10/01, Chao Yu wrote:
>>>>>> Hi Jaegeuk,
>>>>>>
>>>>>> On 2018-10-1 8:06, Jaegeuk Kim wrote:
>>>>>>> Hi Chao,
>>>>>>>
>>>>>>> This fails on fsstress with godown without fault injection. Could you please
>>>>>>> test a bit? I assumed that this patch should give no fsck failure along with
>>>>>>> valid checkpoint having no flag.
>>>>>>
>>>>>> Okay, let me reproduce with that case.
>>>>>>
>>>>>>>
>>>>>>> BTW, I'm in doubt that f2fs_lock_all covers entire quota modification. What
>>>>>>> about prepare_write_begin() -> f2fs_get_block() ... -> inc_valid_block_count()?
>>>>>>
>>>>>> If quota data changed in above path, we will detect that in below condition:
>>>>>>
>>>>>> block_operation()
>>>>>>
>>>>>> 	down_write(&sbi->node_change);
>>>>>>
>>>>>> 	if (__need_flush_quota(sbi)) {
>>>>>> 		up_write(&sbi->node_change);
>>>>>> 		f2fs_unlock_all(sbi);
>>>>>> 		goto retry_flush_quotas;
>>>>>> 	}
>>>>>>
>>>>>> So there is no problem?
>>>>>
>>>>> We may need to check quota is dirty, since we have no way to detect by
>>>>> f2fs structures?
>>>>
>>>> Below condition can check that.
>>>>
>>>> static bool __need_flush_quota(struct f2fs_sb_info *sbi)
>>>> {
>>>> ...
>>>> 	if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_FLUSH))
>>>> 		return true;
>>>> 	if (get_pages(sbi, F2FS_DIRTY_QDATA))
>>>> 		return true;
>>>> ...
>>>> }
>>>>
>>>> static int f2fs_dquot_mark_dquot_dirty(struct dquot *dquot)
>>>> {
>>>> ...
>>>> 	ret = dquot_mark_dquot_dirty(dquot);
>>>>
>>>> 	/* if we are using journalled quota */
>>>> 	if (is_journalled_quota(sbi))
>>>> 		set_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH);
>>>> ...
>>>> }
>>>
>>> Okay, then, could you please run the above stress test to reproduce this?
>>
>> Sure, let me try this case and fix it.
>>
>> Could you check other patches in mailing list, and test them instead?
> 
> With the below change, the test result is much better for now.
> Let me know, if you have further concern.
> 
> ---
>  fs/f2fs/checkpoint.c | 6 ++++++
>  fs/f2fs/super.c      | 4 +++-
>  2 files changed, 9 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> index a1facfbfc5c7..b111c6201023 100644
> --- a/fs/f2fs/checkpoint.c
> +++ b/fs/f2fs/checkpoint.c
> @@ -1111,6 +1111,8 @@ static int block_operations(struct f2fs_sb_info *sbi)
>  
>  retry_flush_quotas:
>  	if (__need_flush_quota(sbi)) {
> +		int locked;
> +
>  		if (++cnt > DEFAULT_RETRY_QUOTA_FLUSH_COUNT) {
>  			set_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH);
>  			f2fs_lock_all(sbi);
> @@ -1118,7 +1120,11 @@ static int block_operations(struct f2fs_sb_info *sbi)
>  		}
>  		clear_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH);
>  
> +		/* only failed during mount/umount/freeze/quotactl */
> +		locked = down_read_trylock(&sbi->sb->s_umount);
>  		f2fs_quota_sync(sbi->sb, -1);
> +		if (locked)
> +			up_read(&sbi->sb->s_umount);
>  	}
>  
>  	f2fs_lock_all(sbi);
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index a28c245b1288..b39f60d57120 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -1706,6 +1706,7 @@ static ssize_t f2fs_quota_read(struct super_block *sb, int type, char *data,
>  				congestion_wait(BLK_RW_ASYNC, HZ/50);
>  				goto repeat;
>  			}
> +			set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
>  			return PTR_ERR(page);
>  		}
>  
> @@ -1717,6 +1718,7 @@ static ssize_t f2fs_quota_read(struct super_block *sb, int type, char *data,
>  		}
>  		if (unlikely(!PageUptodate(page))) {
>  			f2fs_put_page(page, 1);
> +			set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
>  			return -EIO;
>  		}
>  
> @@ -1758,6 +1760,7 @@ static ssize_t f2fs_quota_write(struct super_block *sb, int type,
>  				congestion_wait(BLK_RW_ASYNC, HZ/50);
>  				goto retry;
>  			}
> +			set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
>  			break;
>  		}
>  
> @@ -1794,7 +1797,6 @@ static qsize_t *f2fs_get_reserved_space(struct inode *inode)
>  
>  static int f2fs_quota_on_mount(struct f2fs_sb_info *sbi, int type)
>  {
> -
>  	if (is_set_ckpt_flags(sbi, CP_QUOTA_NEED_FSCK_FLAG)) {
>  		f2fs_msg(sbi->sb, KERN_ERR,
>  			"quota sysfile may be corrupted, skip loading it");
> 

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v11] f2fs: guarantee journalled quota data by checkpoint
  2018-10-17  3:30                 ` Chao Yu
  (?)
@ 2018-10-18 19:04                 ` Jaegeuk Kim
  -1 siblings, 0 replies; 18+ messages in thread
From: Jaegeuk Kim @ 2018-10-18 19:04 UTC (permalink / raw)
  To: Chao Yu; +Cc: Chao Yu, linux-f2fs-devel, linux-kernel, Weichao Guo

On 10/17, Chao Yu wrote:
> Jaegeuk,
> 
> Sorry for the long delay, I'm busy on other thing.
> 
> I'm trying your fixing code on both fsck and kernel with 'run.sh
> por_fsstress' case.
> 
> And got below output, is that normal in updated fsck? I didn't have time to
> look into this.

Yeah, I just stopped to allocate new blocks to fix quota, if there may have
some data to be reocvered at the next mount time.

> 
> Info: checkpoint state = 8c6 :  quota_need_fsck nat_bits crc
> compacted_summary orphan_inodes sudden-power-off
> [fsck_chk_quota_files:1755] Fixing Quota file ([  0] ino [0x4])
> [ERROR] quotaio_tree.c:83:write_blk:: Cannot write block (1320):
> Inappropriate ioctl for device
> [ERROR] quotaio_tree.c:110:get_free_dqblk:: Cannot allocate new quota block
> (out of disk space).
> [ERROR] quotaio_tree.c:315:dq_insert_tree:: Cannot write quota (id
> 67368348): Inappropriate ioctl for device
> [fsck_chk_quota_files:1755] Fixing Quota file ([  1] ino [0x5])
> [ERROR] quotaio_tree.c:83:write_blk:: Cannot write block (1332):
> Inappropriate ioctl for device
> [ERROR] quotaio_tree.c:110:get_free_dqblk:: Cannot allocate new quota block
> (out of disk space).
> [ERROR] quotaio_tree.c:315:dq_insert_tree:: Cannot write quota (id
> 73435216): Inappropriate ioctl for device
> 
> Thanks,
> 
> 
> On 2018/10/3 0:45, Jaegeuk Kim wrote:
> > On 10/01, Chao Yu wrote:
> >> On 2018-10-1 9:49, Jaegeuk Kim wrote:
> >>> On 10/01, Chao Yu wrote:
> >>>> On 2018-10-1 9:29, Jaegeuk Kim wrote:
> >>>>> On 10/01, Chao Yu wrote:
> >>>>>> Hi Jaegeuk,
> >>>>>>
> >>>>>> On 2018-10-1 8:06, Jaegeuk Kim wrote:
> >>>>>>> Hi Chao,
> >>>>>>>
> >>>>>>> This fails on fsstress with godown without fault injection. Could you please
> >>>>>>> test a bit? I assumed that this patch should give no fsck failure along with
> >>>>>>> valid checkpoint having no flag.
> >>>>>>
> >>>>>> Okay, let me reproduce with that case.
> >>>>>>
> >>>>>>>
> >>>>>>> BTW, I'm in doubt that f2fs_lock_all covers entire quota modification. What
> >>>>>>> about prepare_write_begin() -> f2fs_get_block() ... -> inc_valid_block_count()?
> >>>>>>
> >>>>>> If quota data changed in above path, we will detect that in below condition:
> >>>>>>
> >>>>>> block_operation()
> >>>>>>
> >>>>>> 	down_write(&sbi->node_change);
> >>>>>>
> >>>>>> 	if (__need_flush_quota(sbi)) {
> >>>>>> 		up_write(&sbi->node_change);
> >>>>>> 		f2fs_unlock_all(sbi);
> >>>>>> 		goto retry_flush_quotas;
> >>>>>> 	}
> >>>>>>
> >>>>>> So there is no problem?
> >>>>>
> >>>>> We may need to check quota is dirty, since we have no way to detect by
> >>>>> f2fs structures?
> >>>>
> >>>> Below condition can check that.
> >>>>
> >>>> static bool __need_flush_quota(struct f2fs_sb_info *sbi)
> >>>> {
> >>>> ...
> >>>> 	if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_FLUSH))
> >>>> 		return true;
> >>>> 	if (get_pages(sbi, F2FS_DIRTY_QDATA))
> >>>> 		return true;
> >>>> ...
> >>>> }
> >>>>
> >>>> static int f2fs_dquot_mark_dquot_dirty(struct dquot *dquot)
> >>>> {
> >>>> ...
> >>>> 	ret = dquot_mark_dquot_dirty(dquot);
> >>>>
> >>>> 	/* if we are using journalled quota */
> >>>> 	if (is_journalled_quota(sbi))
> >>>> 		set_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH);
> >>>> ...
> >>>> }
> >>>
> >>> Okay, then, could you please run the above stress test to reproduce this?
> >>
> >> Sure, let me try this case and fix it.
> >>
> >> Could you check other patches in mailing list, and test them instead?
> > 
> > With the below change, the test result is much better for now.
> > Let me know, if you have further concern.
> > 
> > ---
> >  fs/f2fs/checkpoint.c | 6 ++++++
> >  fs/f2fs/super.c      | 4 +++-
> >  2 files changed, 9 insertions(+), 1 deletion(-)
> > 
> > diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> > index a1facfbfc5c7..b111c6201023 100644
> > --- a/fs/f2fs/checkpoint.c
> > +++ b/fs/f2fs/checkpoint.c
> > @@ -1111,6 +1111,8 @@ static int block_operations(struct f2fs_sb_info *sbi)
> >  
> >  retry_flush_quotas:
> >  	if (__need_flush_quota(sbi)) {
> > +		int locked;
> > +
> >  		if (++cnt > DEFAULT_RETRY_QUOTA_FLUSH_COUNT) {
> >  			set_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH);
> >  			f2fs_lock_all(sbi);
> > @@ -1118,7 +1120,11 @@ static int block_operations(struct f2fs_sb_info *sbi)
> >  		}
> >  		clear_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH);
> >  
> > +		/* only failed during mount/umount/freeze/quotactl */
> > +		locked = down_read_trylock(&sbi->sb->s_umount);
> >  		f2fs_quota_sync(sbi->sb, -1);
> > +		if (locked)
> > +			up_read(&sbi->sb->s_umount);
> >  	}
> >  
> >  	f2fs_lock_all(sbi);
> > diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> > index a28c245b1288..b39f60d57120 100644
> > --- a/fs/f2fs/super.c
> > +++ b/fs/f2fs/super.c
> > @@ -1706,6 +1706,7 @@ static ssize_t f2fs_quota_read(struct super_block *sb, int type, char *data,
> >  				congestion_wait(BLK_RW_ASYNC, HZ/50);
> >  				goto repeat;
> >  			}
> > +			set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
> >  			return PTR_ERR(page);
> >  		}
> >  
> > @@ -1717,6 +1718,7 @@ static ssize_t f2fs_quota_read(struct super_block *sb, int type, char *data,
> >  		}
> >  		if (unlikely(!PageUptodate(page))) {
> >  			f2fs_put_page(page, 1);
> > +			set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
> >  			return -EIO;
> >  		}
> >  
> > @@ -1758,6 +1760,7 @@ static ssize_t f2fs_quota_write(struct super_block *sb, int type,
> >  				congestion_wait(BLK_RW_ASYNC, HZ/50);
> >  				goto retry;
> >  			}
> > +			set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
> >  			break;
> >  		}
> >  
> > @@ -1794,7 +1797,6 @@ static qsize_t *f2fs_get_reserved_space(struct inode *inode)
> >  
> >  static int f2fs_quota_on_mount(struct f2fs_sb_info *sbi, int type)
> >  {
> > -
> >  	if (is_set_ckpt_flags(sbi, CP_QUOTA_NEED_FSCK_FLAG)) {
> >  		f2fs_msg(sbi->sb, KERN_ERR,
> >  			"quota sysfile may be corrupted, skip loading it");
> > 

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v11] f2fs: guarantee journalled quota data by checkpoint
  2018-09-20 12:05 [PATCH v11] f2fs: guarantee journalled quota data by checkpoint Chao Yu
@ 2019-03-06  6:49   ` Chao Yu
  2019-03-06  6:49   ` Chao Yu
  1 sibling, 0 replies; 18+ messages in thread
From: Chao Yu @ 2019-03-06  6:49 UTC (permalink / raw)
  To: Chao Yu, jaegeuk; +Cc: linux-f2fs-devel, linux-kernel, Weichao Guo

Hi Jaegeuk,

On 2018/9/20 20:05, Chao Yu wrote:
> @@ -3456,3 +3477,16 @@
> 
>  extern void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate,
> 
> #endif
> #endif
> +
> +static inline bool is_journalled_quota(struct f2fs_sb_info *sbi)
> +{
> +#ifdef CONFIG_QUOTA
> + if (f2fs_sb_has_quota_ino(sbi->sb))
> + return true;
> + if (F2FS_OPTION(sbi).s_qf_names[USRQUOTA] ||
> + F2FS_OPTION(sbi).s_qf_names[GRPQUOTA] ||
> + F2FS_OPTION(sbi).s_qf_names[PRJQUOTA])
> + return true;
> +#endif
> + return false;
> +}

I checked dev branch, and found one patch we didn't send to mailing list,
could you please send it out?

f2fs: fix wrong #endif

I think my quota patch caused this 'wrong #endif' issue, could you add
single line comment to notice which #if the last #endif is corresponding
to? so that we can avoid same mistake as quota patch made.

Thanks,


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v11] f2fs: guarantee journalled quota data by checkpoint
@ 2019-03-06  6:49   ` Chao Yu
  0 siblings, 0 replies; 18+ messages in thread
From: Chao Yu @ 2019-03-06  6:49 UTC (permalink / raw)
  To: Chao Yu, jaegeuk; +Cc: linux-f2fs-devel, linux-kernel, Weichao Guo

Hi Jaegeuk,

On 2018/9/20 20:05, Chao Yu wrote:
> @@ -3456,3 +3477,16 @@
> 
>  extern void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate,
> 
> #endif
> #endif
> +
> +static inline bool is_journalled_quota(struct f2fs_sb_info *sbi)
> +{
> +#ifdef CONFIG_QUOTA
> + if (f2fs_sb_has_quota_ino(sbi->sb))
> + return true;
> + if (F2FS_OPTION(sbi).s_qf_names[USRQUOTA] ||
> + F2FS_OPTION(sbi).s_qf_names[GRPQUOTA] ||
> + F2FS_OPTION(sbi).s_qf_names[PRJQUOTA])
> + return true;
> +#endif
> + return false;
> +}

I checked dev branch, and found one patch we didn't send to mailing list,
could you please send it out?

f2fs: fix wrong #endif

I think my quota patch caused this 'wrong #endif' issue, could you add
single line comment to notice which #if the last #endif is corresponding
to? so that we can avoid same mistake as quota patch made.

Thanks,

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v11] f2fs: guarantee journalled quota data by checkpoint
  2019-03-06  6:49   ` Chao Yu
  (?)
@ 2019-03-12 18:28   ` Jaegeuk Kim
  2019-03-13  1:17       ` Chao Yu
  -1 siblings, 1 reply; 18+ messages in thread
From: Jaegeuk Kim @ 2019-03-12 18:28 UTC (permalink / raw)
  To: Chao Yu; +Cc: Chao Yu, linux-f2fs-devel, linux-kernel, Weichao Guo

On 03/06, Chao Yu wrote:
> Hi Jaegeuk,
> 
> On 2018/9/20 20:05, Chao Yu wrote:
> > @@ -3456,3 +3477,16 @@
> > 
> >  extern void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate,
> > 
> > #endif
> > #endif
> > +
> > +static inline bool is_journalled_quota(struct f2fs_sb_info *sbi)
> > +{
> > +#ifdef CONFIG_QUOTA
> > + if (f2fs_sb_has_quota_ino(sbi->sb))
> > + return true;
> > + if (F2FS_OPTION(sbi).s_qf_names[USRQUOTA] ||
> > + F2FS_OPTION(sbi).s_qf_names[GRPQUOTA] ||
> > + F2FS_OPTION(sbi).s_qf_names[PRJQUOTA])
> > + return true;
> > +#endif
> > + return false;
> > +}
> 
> I checked dev branch, and found one patch we didn't send to mailing list,
> could you please send it out?
> 
> f2fs: fix wrong #endif
> 
> I think my quota patch caused this 'wrong #endif' issue, could you add
> single line comment to notice which #if the last #endif is corresponding
> to? so that we can avoid same mistake as quota patch made.

Ah, I forgot to post the patch. It's quite late but obvious, and thus let me
keep the patch as is in order to avoid messing up the commit history.

Thanks,

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v11] f2fs: guarantee journalled quota data by checkpoint
  2019-03-12 18:28   ` Jaegeuk Kim
@ 2019-03-13  1:17       ` Chao Yu
  0 siblings, 0 replies; 18+ messages in thread
From: Chao Yu @ 2019-03-13  1:17 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: Chao Yu, linux-f2fs-devel, linux-kernel, Weichao Guo

On 2019/3/13 2:28, Jaegeuk Kim wrote:
> On 03/06, Chao Yu wrote:
>> Hi Jaegeuk,
>>
>> On 2018/9/20 20:05, Chao Yu wrote:
>>> @@ -3456,3 +3477,16 @@
>>>
>>>  extern void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate,
>>>
>>> #endif
>>> #endif
>>> +
>>> +static inline bool is_journalled_quota(struct f2fs_sb_info *sbi)
>>> +{
>>> +#ifdef CONFIG_QUOTA
>>> + if (f2fs_sb_has_quota_ino(sbi->sb))
>>> + return true;
>>> + if (F2FS_OPTION(sbi).s_qf_names[USRQUOTA] ||
>>> + F2FS_OPTION(sbi).s_qf_names[GRPQUOTA] ||
>>> + F2FS_OPTION(sbi).s_qf_names[PRJQUOTA])
>>> + return true;
>>> +#endif
>>> + return false;
>>> +}
>>
>> I checked dev branch, and found one patch we didn't send to mailing list,
>> could you please send it out?
>>
>> f2fs: fix wrong #endif
>>
>> I think my quota patch caused this 'wrong #endif' issue, could you add
>> single line comment to notice which #if the last #endif is corresponding
>> to? so that we can avoid same mistake as quota patch made.
> 
> Ah, I forgot to post the patch. It's quite late but obvious, and thus let me
> keep the patch as is in order to avoid messing up the commit history.

Okay, let me add one other patch for that. :)

Thanks,

> 
> Thanks,
> 
> .
> 


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v11] f2fs: guarantee journalled quota data by checkpoint
@ 2019-03-13  1:17       ` Chao Yu
  0 siblings, 0 replies; 18+ messages in thread
From: Chao Yu @ 2019-03-13  1:17 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: Chao Yu, linux-f2fs-devel, linux-kernel, Weichao Guo

On 2019/3/13 2:28, Jaegeuk Kim wrote:
> On 03/06, Chao Yu wrote:
>> Hi Jaegeuk,
>>
>> On 2018/9/20 20:05, Chao Yu wrote:
>>> @@ -3456,3 +3477,16 @@
>>>
>>>  extern void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate,
>>>
>>> #endif
>>> #endif
>>> +
>>> +static inline bool is_journalled_quota(struct f2fs_sb_info *sbi)
>>> +{
>>> +#ifdef CONFIG_QUOTA
>>> + if (f2fs_sb_has_quota_ino(sbi->sb))
>>> + return true;
>>> + if (F2FS_OPTION(sbi).s_qf_names[USRQUOTA] ||
>>> + F2FS_OPTION(sbi).s_qf_names[GRPQUOTA] ||
>>> + F2FS_OPTION(sbi).s_qf_names[PRJQUOTA])
>>> + return true;
>>> +#endif
>>> + return false;
>>> +}
>>
>> I checked dev branch, and found one patch we didn't send to mailing list,
>> could you please send it out?
>>
>> f2fs: fix wrong #endif
>>
>> I think my quota patch caused this 'wrong #endif' issue, could you add
>> single line comment to notice which #if the last #endif is corresponding
>> to? so that we can avoid same mistake as quota patch made.
> 
> Ah, I forgot to post the patch. It's quite late but obvious, and thus let me
> keep the patch as is in order to avoid messing up the commit history.

Okay, let me add one other patch for that. :)

Thanks,

> 
> Thanks,
> 
> .
> 

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2019-03-13  1:17 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-09-20 12:05 [PATCH v11] f2fs: guarantee journalled quota data by checkpoint Chao Yu
2018-10-01  0:06 ` Jaegeuk Kim
2018-10-01  0:46   ` Chao Yu
2018-10-01  1:29     ` Jaegeuk Kim
2018-10-01  1:39       ` Chao Yu
2018-10-01  1:49         ` Jaegeuk Kim
2018-10-01  1:49           ` Jaegeuk Kim
2018-10-01  2:04           ` Chao Yu
2018-10-01  2:04             ` Chao Yu
2018-10-02 16:45             ` Jaegeuk Kim
2018-10-17  3:30               ` Chao Yu
2018-10-17  3:30                 ` Chao Yu
2018-10-18 19:04                 ` Jaegeuk Kim
2019-03-06  6:49 ` Chao Yu
2019-03-06  6:49   ` Chao Yu
2019-03-12 18:28   ` Jaegeuk Kim
2019-03-13  1:17     ` Chao Yu
2019-03-13  1:17       ` Chao Yu

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.