linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/3] f2fs: introduce a flag to represent each nat entry information
@ 2014-09-18  5:51 Jaegeuk Kim
  2014-09-18  5:51 ` [PATCH 2/3] f2fs: fix conditions to remain recovery information in f2fs_sync_file Jaegeuk Kim
  2014-09-18  5:51 ` [PATCH 3/3] f2fs: fix roll-forward missing scenarios Jaegeuk Kim
  0 siblings, 2 replies; 12+ messages in thread
From: Jaegeuk Kim @ 2014-09-18  5:51 UTC (permalink / raw)
  To: linux-kernel, linux-fsdevel, linux-f2fs-devel; +Cc: Jaegeuk Kim

This patch introduces a flag in the nat entry structure to merge various
information such as checkpointed and fsync_done marks.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/node.c | 13 +++++++------
 fs/f2fs/node.h | 28 ++++++++++++++++++++++++----
 2 files changed, 31 insertions(+), 10 deletions(-)

diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index b32eb56..d19d6b1 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -131,7 +131,7 @@ int is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
 
 	read_lock(&nm_i->nat_tree_lock);
 	e = __lookup_nat_cache(nm_i, nid);
-	if (e && !e->checkpointed)
+	if (e && !get_nat_flag(e, IS_CHECKPOINTED))
 		is_cp = 0;
 	read_unlock(&nm_i->nat_tree_lock);
 	return is_cp;
@@ -146,7 +146,7 @@ bool fsync_mark_done(struct f2fs_sb_info *sbi, nid_t nid)
 	read_lock(&nm_i->nat_tree_lock);
 	e = __lookup_nat_cache(nm_i, nid);
 	if (e)
-		fsync_done = e->fsync_done;
+		fsync_done = get_nat_flag(e, HAS_FSYNC_MARK);
 	read_unlock(&nm_i->nat_tree_lock);
 	return fsync_done;
 }
@@ -159,7 +159,7 @@ void fsync_mark_clear(struct f2fs_sb_info *sbi, nid_t nid)
 	write_lock(&nm_i->nat_tree_lock);
 	e = __lookup_nat_cache(nm_i, nid);
 	if (e)
-		e->fsync_done = false;
+		set_nat_flag(e, HAS_FSYNC_MARK, false);
 	write_unlock(&nm_i->nat_tree_lock);
 }
 
@@ -176,7 +176,7 @@ static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid)
 	}
 	memset(new, 0, sizeof(struct nat_entry));
 	nat_set_nid(new, nid);
-	new->checkpointed = true;
+	set_nat_flag(new, IS_CHECKPOINTED, true);
 	list_add_tail(&new->list, &nm_i->nat_entries);
 	nm_i->nat_cnt++;
 	return new;
@@ -249,7 +249,7 @@ retry:
 	/* update fsync_mark if its inode nat entry is still alive */
 	e = __lookup_nat_cache(nm_i, ni->ino);
 	if (e)
-		e->fsync_done = fsync_done;
+		set_nat_flag(e, HAS_FSYNC_MARK, fsync_done);
 	write_unlock(&nm_i->nat_tree_lock);
 }
 
@@ -1349,7 +1349,8 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
 		read_lock(&nm_i->nat_tree_lock);
 		ne = __lookup_nat_cache(nm_i, nid);
 		if (ne &&
-			(!ne->checkpointed || nat_get_blkaddr(ne) != NULL_ADDR))
+			(!get_nat_flag(ne, IS_CHECKPOINTED) ||
+				nat_get_blkaddr(ne) != NULL_ADDR))
 			allocated = true;
 		read_unlock(&nm_i->nat_tree_lock);
 		if (allocated)
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index 324917d..3043778 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -39,10 +39,14 @@ struct node_info {
 	unsigned char version;	/* version of the node */
 };
 
+enum {
+	IS_CHECKPOINTED,	/* is it checkpointed before? */
+	HAS_FSYNC_MARK,		/* has the latest node fsync mark? */
+};
+
 struct nat_entry {
 	struct list_head list;	/* for clean or dirty nat list */
-	bool checkpointed;	/* whether it is checkpointed or not */
-	bool fsync_done;	/* whether the latest node has fsync mark */
+	unsigned char flag;	/* for node information bits */
 	struct node_info ni;	/* in-memory node information */
 };
 
@@ -57,16 +61,32 @@ struct nat_entry {
 
 #define __set_nat_cache_dirty(nm_i, ne)					\
 	do {								\
-		ne->checkpointed = false;				\
+		set_nat_flag(ne, IS_CHECKPOINTED, false);		\
 		list_move_tail(&ne->list, &nm_i->dirty_nat_entries);	\
 	} while (0)
 #define __clear_nat_cache_dirty(nm_i, ne)				\
 	do {								\
-		ne->checkpointed = true;				\
+		set_nat_flag(ne, IS_CHECKPOINTED, true);		\
 		list_move_tail(&ne->list, &nm_i->nat_entries);		\
 	} while (0)
 #define inc_node_version(version)	(++version)
 
+static inline void set_nat_flag(struct nat_entry *ne,
+				unsigned int type, bool set)
+{
+	unsigned char mask = 0x01 << type;
+	if (set)
+		ne->flag |= mask;
+	else
+		ne->flag &= ~mask;
+}
+
+static inline bool get_nat_flag(struct nat_entry *ne, unsigned int type)
+{
+	unsigned char mask = 0x01 << type;
+	return ne->flag & mask;
+}
+
 static inline void node_info_from_raw_nat(struct node_info *ni,
 						struct f2fs_nat_entry *raw_ne)
 {
-- 
1.8.5.2 (Apple Git-48)


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 2/3] f2fs: fix conditions to remain recovery information in f2fs_sync_file
  2014-09-18  5:51 [PATCH 1/3] f2fs: introduce a flag to represent each nat entry information Jaegeuk Kim
@ 2014-09-18  5:51 ` Jaegeuk Kim
  2014-09-22  7:24   ` [f2fs-dev] " Chao Yu
  2014-09-18  5:51 ` [PATCH 3/3] f2fs: fix roll-forward missing scenarios Jaegeuk Kim
  1 sibling, 1 reply; 12+ messages in thread
From: Jaegeuk Kim @ 2014-09-18  5:51 UTC (permalink / raw)
  To: linux-kernel, linux-fsdevel, linux-f2fs-devel; +Cc: Jaegeuk Kim, Huang Ying

This patch revisited whole the recovery information during the f2fs_sync_file.

In this patch, there are three information to make a decision.

a) IS_CHECKPOINTED,	/* is it checkpointed before? */
b) HAS_FSYNCED_INODE,	/* is the inode fsynced before? */
c) HAS_LAST_FSYNC,	/* has the latest node fsync mark? */

And, the scenarios for our rule are based on:

[Term] F: fsync_mark, D: dentry_mark

1. inode(x) | CP | inode(x) | dnode(F)
2. inode(x) | CP | inode(F) | dnode(F)
3. inode(x) | CP | dnode(F) | inode(x) | inode(F)
4. inode(x) | CP | dnode(F) | inode(F)
5. CP | inode(x) | dnode(F) | inode(DF)
6. CP | inode(DF) | dnode(F)
7. CP | dnode(F) | inode(DF)
8. CP | dnode(F) | inode(x) | inode(DF)

For example, #3, the three conditions should be changed as follows.

   inode(x) | CP | dnode(F) | inode(x) | inode(F)
a)    x       o      o          o          o
b)    x       x      x          x          o
c)    x       o      o          x          o

If f2fs_sync_file stops   ------^,
 it should write inode(F)    --------------^

So, the need_inode_block_update should return true, since
 c) get_nat_flag(e, HAS_LAST_FSYNC), is false.

For example, #8,
      CP | alloc | dnode(F) | inode(x) | inode(DF)
a)    o      x        x          x          x
b)    x               x          x          o
c)    o               o          x          o

If f2fs_sync_file stops   -------^,
 it should write inode(DF)    --------------^

Note that, the roll-forward policy should follow this rule, which means,
if there are any missing blocks, we doesn't need to recover that inode.

Signed-off-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/data.c |  3 ---
 fs/f2fs/f2fs.h |  6 +++---
 fs/f2fs/file.c | 10 ++++++----
 fs/f2fs/node.c | 56 +++++++++++++++++++++++++++++++++++---------------------
 fs/f2fs/node.h | 21 ++++++++++++---------
 5 files changed, 56 insertions(+), 40 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 0e37658..fdc3dbe 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1089,9 +1089,6 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
 	if (check_direct_IO(inode, rw, iter, offset))
 		return 0;
 
-	/* clear fsync mark to recover these blocks */
-	fsync_mark_clear(F2FS_I_SB(inode), inode->i_ino);
-
 	trace_f2fs_direct_IO_enter(inode, offset, count, rw);
 
 	err = blockdev_direct_IO(rw, iocb, inode, iter, offset, get_data_block);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 9fc1bcd..fd44083 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1224,9 +1224,9 @@ struct dnode_of_data;
 struct node_info;
 
 bool available_free_memory(struct f2fs_sb_info *, int);
-int is_checkpointed_node(struct f2fs_sb_info *, nid_t);
-bool fsync_mark_done(struct f2fs_sb_info *, nid_t);
-void fsync_mark_clear(struct f2fs_sb_info *, nid_t);
+bool is_checkpointed_node(struct f2fs_sb_info *, nid_t);
+bool has_fsynced_inode(struct f2fs_sb_info *, nid_t);
+bool need_inode_block_update(struct f2fs_sb_info *, nid_t);
 void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *);
 int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int);
 int truncate_inode_blocks(struct inode *, pgoff_t);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index af06e22..3035c79 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -207,15 +207,17 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
 			up_write(&fi->i_sem);
 		}
 	} else {
-		/* if there is no written node page, write its inode page */
-		while (!sync_node_pages(sbi, ino, &wbc)) {
-			if (fsync_mark_done(sbi, ino))
-				goto out;
+sync_nodes:
+		sync_node_pages(sbi, ino, &wbc);
+
+		if (need_inode_block_update(sbi, ino)) {
 			mark_inode_dirty_sync(inode);
 			ret = f2fs_write_inode(inode, NULL);
 			if (ret)
 				goto out;
+			goto sync_nodes;
 		}
+
 		ret = wait_on_node_pages_writeback(sbi, ino);
 		if (ret)
 			goto out;
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index d19d6b1..7a2d9c9 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -123,44 +123,48 @@ static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e)
 	kmem_cache_free(nat_entry_slab, e);
 }
 
-int is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
+bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
 {
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
 	struct nat_entry *e;
-	int is_cp = 1;
+	bool is_cp = true;
 
 	read_lock(&nm_i->nat_tree_lock);
 	e = __lookup_nat_cache(nm_i, nid);
 	if (e && !get_nat_flag(e, IS_CHECKPOINTED))
-		is_cp = 0;
+		is_cp = false;
 	read_unlock(&nm_i->nat_tree_lock);
 	return is_cp;
 }
 
-bool fsync_mark_done(struct f2fs_sb_info *sbi, nid_t nid)
+bool has_fsynced_inode(struct f2fs_sb_info *sbi, nid_t ino)
 {
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
 	struct nat_entry *e;
-	bool fsync_done = false;
+	bool fsynced = false;
 
 	read_lock(&nm_i->nat_tree_lock);
-	e = __lookup_nat_cache(nm_i, nid);
-	if (e)
-		fsync_done = get_nat_flag(e, HAS_FSYNC_MARK);
+	e = __lookup_nat_cache(nm_i, ino);
+	if (e && get_nat_flag(e, HAS_FSYNCED_INODE))
+		fsynced = true;
 	read_unlock(&nm_i->nat_tree_lock);
-	return fsync_done;
+	return fsynced;
 }
 
-void fsync_mark_clear(struct f2fs_sb_info *sbi, nid_t nid)
+bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino)
 {
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
 	struct nat_entry *e;
+	bool need_update = true;
 
-	write_lock(&nm_i->nat_tree_lock);
-	e = __lookup_nat_cache(nm_i, nid);
-	if (e)
-		set_nat_flag(e, HAS_FSYNC_MARK, false);
-	write_unlock(&nm_i->nat_tree_lock);
+	read_lock(&nm_i->nat_tree_lock);
+	e = __lookup_nat_cache(nm_i, ino);
+	if (e && get_nat_flag(e, HAS_LAST_FSYNC) &&
+			(get_nat_flag(e, IS_CHECKPOINTED) ||
+			 get_nat_flag(e, HAS_FSYNCED_INODE)))
+		need_update = false;
+	read_unlock(&nm_i->nat_tree_lock);
+	return need_update;
 }
 
 static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid)
@@ -176,7 +180,7 @@ static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid)
 	}
 	memset(new, 0, sizeof(struct nat_entry));
 	nat_set_nid(new, nid);
-	set_nat_flag(new, IS_CHECKPOINTED, true);
+	nat_reset_flag(new);
 	list_add_tail(&new->list, &nm_i->nat_entries);
 	nm_i->nat_cnt++;
 	return new;
@@ -244,12 +248,17 @@ retry:
 
 	/* change address */
 	nat_set_blkaddr(e, new_blkaddr);
+	if (new_blkaddr == NEW_ADDR || new_blkaddr == NULL_ADDR)
+		set_nat_flag(e, IS_CHECKPOINTED, false);
 	__set_nat_cache_dirty(nm_i, e);
 
 	/* update fsync_mark if its inode nat entry is still alive */
 	e = __lookup_nat_cache(nm_i, ni->ino);
-	if (e)
-		set_nat_flag(e, HAS_FSYNC_MARK, fsync_done);
+	if (e) {
+		if (fsync_done && ni->nid == ni->ino)
+			set_nat_flag(e, HAS_FSYNCED_INODE, true);
+		set_nat_flag(e, HAS_LAST_FSYNC, fsync_done);
+	}
 	write_unlock(&nm_i->nat_tree_lock);
 }
 
@@ -1121,10 +1130,14 @@ continue_unlock:
 
 			/* called by fsync() */
 			if (ino && IS_DNODE(page)) {
-				int mark = !is_checkpointed_node(sbi, ino);
 				set_fsync_mark(page, 1);
-				if (IS_INODE(page))
-					set_dentry_mark(page, mark);
+				if (IS_INODE(page)) {
+					if (!is_checkpointed_node(sbi, ino) &&
+						!has_fsynced_inode(sbi, ino))
+						set_dentry_mark(page, 1);
+					else
+						set_dentry_mark(page, 0);
+				}
 				nwritten++;
 			} else {
 				set_fsync_mark(page, 0);
@@ -1912,6 +1925,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
 				write_unlock(&nm_i->nat_tree_lock);
 			} else {
 				write_lock(&nm_i->nat_tree_lock);
+				nat_reset_flag(ne);
 				__clear_nat_cache_dirty(nm_i, ne);
 				write_unlock(&nm_i->nat_tree_lock);
 			}
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index 3043778..b8ba63c 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -41,7 +41,8 @@ struct node_info {
 
 enum {
 	IS_CHECKPOINTED,	/* is it checkpointed before? */
-	HAS_FSYNC_MARK,		/* has the latest node fsync mark? */
+	HAS_FSYNCED_INODE,	/* is the inode fsynced before? */
+	HAS_LAST_FSYNC,		/* has the latest node fsync mark? */
 };
 
 struct nat_entry {
@@ -60,15 +61,9 @@ struct nat_entry {
 #define nat_set_version(nat, v)		(nat->ni.version = v)
 
 #define __set_nat_cache_dirty(nm_i, ne)					\
-	do {								\
-		set_nat_flag(ne, IS_CHECKPOINTED, false);		\
-		list_move_tail(&ne->list, &nm_i->dirty_nat_entries);	\
-	} while (0)
+		list_move_tail(&ne->list, &nm_i->dirty_nat_entries);
 #define __clear_nat_cache_dirty(nm_i, ne)				\
-	do {								\
-		set_nat_flag(ne, IS_CHECKPOINTED, true);		\
-		list_move_tail(&ne->list, &nm_i->nat_entries);		\
-	} while (0)
+		list_move_tail(&ne->list, &nm_i->nat_entries);
 #define inc_node_version(version)	(++version)
 
 static inline void set_nat_flag(struct nat_entry *ne,
@@ -87,6 +82,14 @@ static inline bool get_nat_flag(struct nat_entry *ne, unsigned int type)
 	return ne->flag & mask;
 }
 
+static inline void nat_reset_flag(struct nat_entry *ne)
+{
+	/* these states can be set only after checkpoint was done */
+	set_nat_flag(ne, IS_CHECKPOINTED, true);
+	set_nat_flag(ne, HAS_FSYNCED_INODE, false);
+	set_nat_flag(ne, HAS_LAST_FSYNC, true);
+}
+
 static inline void node_info_from_raw_nat(struct node_info *ni,
 						struct f2fs_nat_entry *raw_ne)
 {
-- 
1.8.5.2 (Apple Git-48)


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 3/3] f2fs: fix roll-forward missing scenarios
  2014-09-18  5:51 [PATCH 1/3] f2fs: introduce a flag to represent each nat entry information Jaegeuk Kim
  2014-09-18  5:51 ` [PATCH 2/3] f2fs: fix conditions to remain recovery information in f2fs_sync_file Jaegeuk Kim
@ 2014-09-18  5:51 ` Jaegeuk Kim
       [not found]   ` <CAC=cRTPotgNXbuPwG95HHuWiicS2OiD5R3HsOqdMG+2b=8ZyEg@mail.gmail.com>
  1 sibling, 1 reply; 12+ messages in thread
From: Jaegeuk Kim @ 2014-09-18  5:51 UTC (permalink / raw)
  To: linux-kernel, linux-fsdevel, linux-f2fs-devel; +Cc: Jaegeuk Kim, Huang Ying

We can summarize the roll forward recovery scenarios as follows.

[Term] F: fsync_mark, D: dentry_mark

1. inode(x) | CP | inode(x) | dnode(F)
-> Update the latest inode(x).

2. inode(x) | CP | inode(F) | dnode(F)
-> No problem.

3. inode(x) | CP | dnode(F) | inode(x)
-> Recover to the latest dnode(F), and drop the last inode(x)

4. inode(x) | CP | dnode(F) | inode(F)
-> No problem.

5. CP | inode(x) | dnode(F)
-> The inode(DF) was missing. Should drop this dnode(F).

6. CP | inode(DF) | dnode(F)
-> No problem.

7. CP | dnode(F) | inode(DF)
-> If f2fs_iget fails, then goto next to find inode(DF).

8. CP | dnode(F) | inode(x)
-> If f2fs_iget fails, then goto next to find inode(DF).
   But it will fail due to no inode(DF).

So, this patch adds some missing points such as #1, #5, #7, and #8.

Signed-off-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/recovery.c | 71 +++++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 60 insertions(+), 11 deletions(-)

diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 36d4f73..a4eb978 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -14,6 +14,37 @@
 #include "node.h"
 #include "segment.h"
 
+/*
+ * Roll forward recovery scenarios.
+ *
+ * [Term] F: fsync_mark, D: dentry_mark
+ *
+ * 1. inode(x) | CP | inode(x) | dnode(F)
+ * -> Update the latest inode(x).
+ *
+ * 2. inode(x) | CP | inode(F) | dnode(F)
+ * -> No problem.
+ *
+ * 3. inode(x) | CP | dnode(F) | inode(x)
+ * -> Recover to the latest dnode(F), and drop the last inode(x)
+ *
+ * 4. inode(x) | CP | dnode(F) | inode(F)
+ * -> No problem.
+ *
+ * 5. CP | inode(x) | dnode(F)
+ * -> The inode(DF) was missing. Should drop this dnode(F).
+ *
+ * 6. CP | inode(DF) | dnode(F)
+ * -> No problem.
+ *
+ * 7. CP | dnode(F) | inode(DF)
+ * -> If f2fs_iget fails, then goto next to find inode(DF).
+ *
+ * 8. CP | dnode(F) | inode(x)
+ * -> If f2fs_iget fails, then goto next to find inode(DF).
+ *    But it will fail due to no inode(DF).
+ */
+
 static struct kmem_cache *fsync_entry_slab;
 
 bool space_for_roll_forward(struct f2fs_sb_info *sbi)
@@ -110,27 +141,32 @@ out:
 	return err;
 }
 
-static int recover_inode(struct inode *inode, struct page *node_page)
+static void __recover_inode(struct inode *inode, struct page *page)
 {
-	struct f2fs_inode *raw_inode = F2FS_INODE(node_page);
+	struct f2fs_inode *raw = F2FS_INODE(page);
+
+	inode->i_mode = le16_to_cpu(raw->i_mode);
+	i_size_write(inode, le64_to_cpu(raw->i_size));
+	inode->i_atime.tv_sec = le64_to_cpu(raw->i_mtime);
+	inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime);
+	inode->i_mtime.tv_sec = le64_to_cpu(raw->i_mtime);
+	inode->i_atime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
+	inode->i_ctime.tv_nsec = le32_to_cpu(raw->i_ctime_nsec);
+	inode->i_mtime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
+}
 
+static int recover_inode(struct inode *inode, struct page *node_page)
+{
 	if (!IS_INODE(node_page))
 		return 0;
 
-	inode->i_mode = le16_to_cpu(raw_inode->i_mode);
-	i_size_write(inode, le64_to_cpu(raw_inode->i_size));
-	inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime);
-	inode->i_ctime.tv_sec = le64_to_cpu(raw_inode->i_ctime);
-	inode->i_mtime.tv_sec = le64_to_cpu(raw_inode->i_mtime);
-	inode->i_atime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec);
-	inode->i_ctime.tv_nsec = le32_to_cpu(raw_inode->i_ctime_nsec);
-	inode->i_mtime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec);
+	__recover_inode(inode, node_page);
 
 	if (is_dent_dnode(node_page))
 		return recover_dentry(node_page, inode);
 
 	f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode: ino = %x, name = %s",
-			ino_of_node(node_page), raw_inode->i_name);
+			ino_of_node(node_page), F2FS_INODE(node_page)->i_name);
 	return 0;
 }
 
@@ -183,10 +219,16 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
 				break;
 			}
 
+			/*
+			 * CP | dnode(F) | inode(DF)
+			 * For this case, we should not give up now.
+			 */
 			entry->inode = f2fs_iget(sbi->sb, ino_of_node(page));
 			if (IS_ERR(entry->inode)) {
 				err = PTR_ERR(entry->inode);
 				kmem_cache_free(fsync_entry_slab, entry);
+				if (err == -ENOENT)
+					goto next;
 				break;
 			}
 			list_add_tail(&entry->list, head);
@@ -423,6 +465,13 @@ static int recover_data(struct f2fs_sb_info *sbi,
 		entry = get_fsync_inode(head, ino_of_node(page));
 		if (!entry)
 			goto next;
+		/*
+		 * inode(x) | CP | inode(x) | dnode(F)
+		 * In this case, we can lose the latest inode(x).
+		 * So, call __recover_inode for the inode update.
+		 */
+		if (IS_INODE(page))
+			__recover_inode(entry->inode, page);
 
 		err = do_recover_data(sbi, entry->inode, page, blkaddr);
 		if (err) {
-- 
1.8.5.2 (Apple Git-48)


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH 3/3] f2fs: fix roll-forward missing scenarios
       [not found]   ` <CAC=cRTPotgNXbuPwG95HHuWiicS2OiD5R3HsOqdMG+2b=8ZyEg@mail.gmail.com>
@ 2014-09-20 16:23     ` Jaegeuk Kim
  2014-09-20 23:22       ` Huang Ying
  0 siblings, 1 reply; 12+ messages in thread
From: Jaegeuk Kim @ 2014-09-20 16:23 UTC (permalink / raw)
  To: huang ying; +Cc: LKML, linux-fsdevel, linux-f2fs-devel, Huang Ying

On Thu, Sep 18, 2014 at 09:04:11PM +0800, huang ying wrote:
> On Thu, Sep 18, 2014 at 1:51 PM, Jaegeuk Kim <jaegeuk@kernel.org> wrote:
> 
> > We can summarize the roll forward recovery scenarios as follows.
> >
> > [Term] F: fsync_mark, D: dentry_mark
> >
> > 1. inode(x) | CP | inode(x) | dnode(F)
> > -> Update the latest inode(x).
> >
> > 2. inode(x) | CP | inode(F) | dnode(F)
> > -> No problem.
> >
> > 3. inode(x) | CP | dnode(F) | inode(x)
> > -> Recover to the latest dnode(F), and drop the last inode(x)
> >
> > 4. inode(x) | CP | dnode(F) | inode(F)
> > -> No problem.
> >
> > 5. CP | inode(x) | dnode(F)
> > -> The inode(DF) was missing. Should drop this dnode(F).
> >
> > 6. CP | inode(DF) | dnode(F)
> > -> No problem.
> >
> > 7. CP | dnode(F) | inode(DF)
> > -> If f2fs_iget fails, then goto next to find inode(DF).
> >
> > 8. CP | dnode(F) | inode(x)
> > -> If f2fs_iget fails, then goto next to find inode(DF).
> >    But it will fail due to no inode(DF).
> >
> > So, this patch adds some missing points such as #1, #5, #7, and #8.
> >
> > Signed-off-by: Huang Ying <ying.huang@intel.com>
> > Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> > ---
> >  fs/f2fs/recovery.c | 71
> > +++++++++++++++++++++++++++++++++++++++++++++---------
> >  1 file changed, 60 insertions(+), 11 deletions(-)
> >
> > diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
> > index 36d4f73..a4eb978 100644
> > --- a/fs/f2fs/recovery.c
> > +++ b/fs/f2fs/recovery.c
> > @@ -14,6 +14,37 @@
> >  #include "node.h"
> >  #include "segment.h"
> >
> > +/*
> > + * Roll forward recovery scenarios.
> > + *
> > + * [Term] F: fsync_mark, D: dentry_mark
> > + *
> > + * 1. inode(x) | CP | inode(x) | dnode(F)
> > + * -> Update the latest inode(x).
> > + *
> > + * 2. inode(x) | CP | inode(F) | dnode(F)
> > + * -> No problem.
> > + *
> > + * 3. inode(x) | CP | dnode(F) | inode(x)
> > + * -> Recover to the latest dnode(F), and drop the last inode(x)
> > + *
> > + * 4. inode(x) | CP | dnode(F) | inode(F)
> > + * -> No problem.
> > + *
> > + * 5. CP | inode(x) | dnode(F)
> > + * -> The inode(DF) was missing. Should drop this dnode(F).
> > + *
> > + * 6. CP | inode(DF) | dnode(F)
> > + * -> No problem.
> > + *
> > + * 7. CP | dnode(F) | inode(DF)
> > + * -> If f2fs_iget fails, then goto next to find inode(DF).
> > + *
> > + * 8. CP | dnode(F) | inode(x)
> > + * -> If f2fs_iget fails, then goto next to find inode(DF).
> > + *    But it will fail due to no inode(DF).
> > + */
> > +
> >  static struct kmem_cache *fsync_entry_slab;
> >
> >  bool space_for_roll_forward(struct f2fs_sb_info *sbi)
> > @@ -110,27 +141,32 @@ out:
> >         return err;
> >  }
> >
> > -static int recover_inode(struct inode *inode, struct page *node_page)
> > +static void __recover_inode(struct inode *inode, struct page *page)
> >  {
> > -       struct f2fs_inode *raw_inode = F2FS_INODE(node_page);
> > +       struct f2fs_inode *raw = F2FS_INODE(page);
> > +
> > +       inode->i_mode = le16_to_cpu(raw->i_mode);
> > +       i_size_write(inode, le64_to_cpu(raw->i_size));
> > +       inode->i_atime.tv_sec = le64_to_cpu(raw->i_mtime);
> > +       inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime);
> > +       inode->i_mtime.tv_sec = le64_to_cpu(raw->i_mtime);
> > +       inode->i_atime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
> > +       inode->i_ctime.tv_nsec = le32_to_cpu(raw->i_ctime_nsec);
> > +       inode->i_mtime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
> > +}
> >
> > +static int recover_inode(struct inode *inode, struct page *node_page)
> > +{
> >         if (!IS_INODE(node_page))
> >                 return 0;
> >
> > -       inode->i_mode = le16_to_cpu(raw_inode->i_mode);
> > -       i_size_write(inode, le64_to_cpu(raw_inode->i_size));
> > -       inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime);
> > -       inode->i_ctime.tv_sec = le64_to_cpu(raw_inode->i_ctime);
> > -       inode->i_mtime.tv_sec = le64_to_cpu(raw_inode->i_mtime);
> > -       inode->i_atime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec);
> > -       inode->i_ctime.tv_nsec = le32_to_cpu(raw_inode->i_ctime_nsec);
> > -       inode->i_mtime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec);
> > +       __recover_inode(inode, node_page);
> >
> >         if (is_dent_dnode(node_page))
> >                 return recover_dentry(node_page, inode);
> >
> >         f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode: ino = %x, name
> > = %s",
> > -                       ino_of_node(node_page), raw_inode->i_name);
> > +                       ino_of_node(node_page),
> > F2FS_INODE(node_page)->i_name);
> >         return 0;
> >  }
> >
> > @@ -183,10 +219,16 @@ static int find_fsync_dnodes(struct f2fs_sb_info
> > *sbi, struct list_head *head)
> >                                 break;
> >                         }
> >
> > +                       /*
> > +                        * CP | dnode(F) | inode(DF)
> > +                        * For this case, we should not give up now.
> > +                        */
> >                         entry->inode = f2fs_iget(sbi->sb,
> > ino_of_node(page));
> >                         if (IS_ERR(entry->inode)) {
> >                                 err = PTR_ERR(entry->inode);
> >                                 kmem_cache_free(fsync_entry_slab, entry);
> > +                               if (err == -ENOENT)
> > +                                       goto next;
> >                                 break;
> >                         }
> >                         list_add_tail(&entry->list, head);
> > @@ -423,6 +465,13 @@ static int recover_data(struct f2fs_sb_info *sbi,
> >                 entry = get_fsync_inode(head, ino_of_node(page));
> >                 if (!entry)
> >                         goto next;
> > +               /*
> > +                * inode(x) | CP | inode(x) | dnode(F)
> > +                * In this case, we can lose the latest inode(x).
> > +                * So, call __recover_inode for the inode update.
> > +                */
> >
> 
> With 2/3, because both IS_CHECKPOINTED and HAS_FSYNCED_INODE flag are unset
> for inode, we will append a inode(F).

No, inode(F) is not appended.
Please check the fsync rule #1.
Thanks,

> So we do not need to call
> __recover_inode here?
> 
> Best Regards,
> Huang, Ying
> 
> 
> > +               if (IS_INODE(page))
> > +                       __recover_inode(entry->inode, page);
> >
> >                 err = do_recover_data(sbi, entry->inode, page, blkaddr);
> >                 if (err) {
> > --
> >

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 3/3] f2fs: fix roll-forward missing scenarios
  2014-09-20 16:23     ` Jaegeuk Kim
@ 2014-09-20 23:22       ` Huang Ying
  2014-09-21  3:36         ` Jaegeuk Kim
  0 siblings, 1 reply; 12+ messages in thread
From: Huang Ying @ 2014-09-20 23:22 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: huang ying, LKML, linux-fsdevel, linux-f2fs-devel

On Sat, 2014-09-20 at 09:23 -0700, Jaegeuk Kim wrote:
> On Thu, Sep 18, 2014 at 09:04:11PM +0800, huang ying wrote:
> > On Thu, Sep 18, 2014 at 1:51 PM, Jaegeuk Kim <jaegeuk@kernel.org> wrote:
> > 
> > > We can summarize the roll forward recovery scenarios as follows.
> > >
> > > [Term] F: fsync_mark, D: dentry_mark
> > >
> > > 1. inode(x) | CP | inode(x) | dnode(F)
> > > -> Update the latest inode(x).
> > >
> > > 2. inode(x) | CP | inode(F) | dnode(F)
> > > -> No problem.
> > >
> > > 3. inode(x) | CP | dnode(F) | inode(x)
> > > -> Recover to the latest dnode(F), and drop the last inode(x)
> > >
> > > 4. inode(x) | CP | dnode(F) | inode(F)
> > > -> No problem.
> > >
> > > 5. CP | inode(x) | dnode(F)
> > > -> The inode(DF) was missing. Should drop this dnode(F).
> > >
> > > 6. CP | inode(DF) | dnode(F)
> > > -> No problem.
> > >
> > > 7. CP | dnode(F) | inode(DF)
> > > -> If f2fs_iget fails, then goto next to find inode(DF).
> > >
> > > 8. CP | dnode(F) | inode(x)
> > > -> If f2fs_iget fails, then goto next to find inode(DF).
> > >    But it will fail due to no inode(DF).
> > >
> > > So, this patch adds some missing points such as #1, #5, #7, and #8.
> > >
> > > Signed-off-by: Huang Ying <ying.huang@intel.com>
> > > Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> > > ---
> > >  fs/f2fs/recovery.c | 71
> > > +++++++++++++++++++++++++++++++++++++++++++++---------
> > >  1 file changed, 60 insertions(+), 11 deletions(-)
> > >
> > > diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
> > > index 36d4f73..a4eb978 100644
> > > --- a/fs/f2fs/recovery.c
> > > +++ b/fs/f2fs/recovery.c
> > > @@ -14,6 +14,37 @@
> > >  #include "node.h"
> > >  #include "segment.h"
> > >
> > > +/*
> > > + * Roll forward recovery scenarios.
> > > + *
> > > + * [Term] F: fsync_mark, D: dentry_mark
> > > + *
> > > + * 1. inode(x) | CP | inode(x) | dnode(F)
> > > + * -> Update the latest inode(x).
> > > + *
> > > + * 2. inode(x) | CP | inode(F) | dnode(F)
> > > + * -> No problem.
> > > + *
> > > + * 3. inode(x) | CP | dnode(F) | inode(x)
> > > + * -> Recover to the latest dnode(F), and drop the last inode(x)
> > > + *
> > > + * 4. inode(x) | CP | dnode(F) | inode(F)
> > > + * -> No problem.
> > > + *
> > > + * 5. CP | inode(x) | dnode(F)
> > > + * -> The inode(DF) was missing. Should drop this dnode(F).
> > > + *
> > > + * 6. CP | inode(DF) | dnode(F)
> > > + * -> No problem.
> > > + *
> > > + * 7. CP | dnode(F) | inode(DF)
> > > + * -> If f2fs_iget fails, then goto next to find inode(DF).
> > > + *
> > > + * 8. CP | dnode(F) | inode(x)
> > > + * -> If f2fs_iget fails, then goto next to find inode(DF).
> > > + *    But it will fail due to no inode(DF).
> > > + */
> > > +
> > >  static struct kmem_cache *fsync_entry_slab;
> > >
> > >  bool space_for_roll_forward(struct f2fs_sb_info *sbi)
> > > @@ -110,27 +141,32 @@ out:
> > >         return err;
> > >  }
> > >
> > > -static int recover_inode(struct inode *inode, struct page *node_page)
> > > +static void __recover_inode(struct inode *inode, struct page *page)
> > >  {
> > > -       struct f2fs_inode *raw_inode = F2FS_INODE(node_page);
> > > +       struct f2fs_inode *raw = F2FS_INODE(page);
> > > +
> > > +       inode->i_mode = le16_to_cpu(raw->i_mode);
> > > +       i_size_write(inode, le64_to_cpu(raw->i_size));
> > > +       inode->i_atime.tv_sec = le64_to_cpu(raw->i_mtime);
> > > +       inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime);
> > > +       inode->i_mtime.tv_sec = le64_to_cpu(raw->i_mtime);
> > > +       inode->i_atime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
> > > +       inode->i_ctime.tv_nsec = le32_to_cpu(raw->i_ctime_nsec);
> > > +       inode->i_mtime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
> > > +}
> > >
> > > +static int recover_inode(struct inode *inode, struct page *node_page)
> > > +{
> > >         if (!IS_INODE(node_page))
> > >                 return 0;
> > >
> > > -       inode->i_mode = le16_to_cpu(raw_inode->i_mode);
> > > -       i_size_write(inode, le64_to_cpu(raw_inode->i_size));
> > > -       inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime);
> > > -       inode->i_ctime.tv_sec = le64_to_cpu(raw_inode->i_ctime);
> > > -       inode->i_mtime.tv_sec = le64_to_cpu(raw_inode->i_mtime);
> > > -       inode->i_atime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec);
> > > -       inode->i_ctime.tv_nsec = le32_to_cpu(raw_inode->i_ctime_nsec);
> > > -       inode->i_mtime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec);
> > > +       __recover_inode(inode, node_page);
> > >
> > >         if (is_dent_dnode(node_page))
> > >                 return recover_dentry(node_page, inode);
> > >
> > >         f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode: ino = %x, name
> > > = %s",
> > > -                       ino_of_node(node_page), raw_inode->i_name);
> > > +                       ino_of_node(node_page),
> > > F2FS_INODE(node_page)->i_name);
> > >         return 0;
> > >  }
> > >
> > > @@ -183,10 +219,16 @@ static int find_fsync_dnodes(struct f2fs_sb_info
> > > *sbi, struct list_head *head)
> > >                                 break;
> > >                         }
> > >
> > > +                       /*
> > > +                        * CP | dnode(F) | inode(DF)
> > > +                        * For this case, we should not give up now.
> > > +                        */
> > >                         entry->inode = f2fs_iget(sbi->sb,
> > > ino_of_node(page));
> > >                         if (IS_ERR(entry->inode)) {
> > >                                 err = PTR_ERR(entry->inode);
> > >                                 kmem_cache_free(fsync_entry_slab, entry);
> > > +                               if (err == -ENOENT)
> > > +                                       goto next;
> > >                                 break;
> > >                         }
> > >                         list_add_tail(&entry->list, head);
> > > @@ -423,6 +465,13 @@ static int recover_data(struct f2fs_sb_info *sbi,
> > >                 entry = get_fsync_inode(head, ino_of_node(page));
> > >                 if (!entry)
> > >                         goto next;
> > > +               /*
> > > +                * inode(x) | CP | inode(x) | dnode(F)
> > > +                * In this case, we can lose the latest inode(x).
> > > +                * So, call __recover_inode for the inode update.
> > > +                */
> > >
> > 
> > With 2/3, because both IS_CHECKPOINTED and HAS_FSYNCED_INODE flag are unset
> > for inode, we will append a inode(F).
> 
> No, inode(F) is not appended.
> Please check the fsync rule #1.

>From implementation of need_inode_block_update, we do not append an
inode(F) or inode(DF), only if:

  get_nat_flag(e, HAS_LAST_FSYNC) &&
          (get_nat_flag(e, IS_CHECKPOINTED) ||
           get_nat_flag(e, HAS_FSYNCED_INODE)))

e is nat entry for the inode.

For inode(x) | CP | inode(x) | dnode(F)

We have:

HAS_LAST_FSYNC:    true
IS_CHECKPOINTED:   false
HAS_FSYNCED_INODE: false

So we will append a inode(DF) here.

> > So we do not need to call
> > __recover_inode here?

Thought again, __recover_inode here may be helpful here for the
following situation:

inode(x) | CP | inode(x) | dnode(F) | inode(DF)
                                    |
                                    v
                               Sudden power off

That is, sudden power off before writing inode(DF).

So I think we should keep the code, but maybe change the comments?

Best Regards,
Huang, Ying

> > 
> > 
> > > +               if (IS_INODE(page))
> > > +                       __recover_inode(entry->inode, page);
> > >
> > >                 err = do_recover_data(sbi, entry->inode, page, blkaddr);
> > >                 if (err) {
> > > --
> > >



^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 3/3] f2fs: fix roll-forward missing scenarios
  2014-09-20 23:22       ` Huang Ying
@ 2014-09-21  3:36         ` Jaegeuk Kim
  2014-09-22  2:13           ` Huang Ying
  0 siblings, 1 reply; 12+ messages in thread
From: Jaegeuk Kim @ 2014-09-21  3:36 UTC (permalink / raw)
  To: Huang Ying; +Cc: huang ying, LKML, linux-fsdevel, linux-f2fs-devel

On Sun, Sep 21, 2014 at 07:22:32AM +0800, Huang Ying wrote:
> On Sat, 2014-09-20 at 09:23 -0700, Jaegeuk Kim wrote:
> > On Thu, Sep 18, 2014 at 09:04:11PM +0800, huang ying wrote:
> > > On Thu, Sep 18, 2014 at 1:51 PM, Jaegeuk Kim <jaegeuk@kernel.org> wrote:
> > > 
> > > > We can summarize the roll forward recovery scenarios as follows.
> > > >
> > > > [Term] F: fsync_mark, D: dentry_mark
> > > >
> > > > 1. inode(x) | CP | inode(x) | dnode(F)
> > > > -> Update the latest inode(x).
> > > >
> > > > 2. inode(x) | CP | inode(F) | dnode(F)
> > > > -> No problem.
> > > >
> > > > 3. inode(x) | CP | dnode(F) | inode(x)
> > > > -> Recover to the latest dnode(F), and drop the last inode(x)
> > > >
> > > > 4. inode(x) | CP | dnode(F) | inode(F)
> > > > -> No problem.
> > > >
> > > > 5. CP | inode(x) | dnode(F)
> > > > -> The inode(DF) was missing. Should drop this dnode(F).
> > > >
> > > > 6. CP | inode(DF) | dnode(F)
> > > > -> No problem.
> > > >
> > > > 7. CP | dnode(F) | inode(DF)
> > > > -> If f2fs_iget fails, then goto next to find inode(DF).
> > > >
> > > > 8. CP | dnode(F) | inode(x)
> > > > -> If f2fs_iget fails, then goto next to find inode(DF).
> > > >    But it will fail due to no inode(DF).
> > > >
> > > > So, this patch adds some missing points such as #1, #5, #7, and #8.
> > > >
> > > > Signed-off-by: Huang Ying <ying.huang@intel.com>
> > > > Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> > > > ---
> > > >  fs/f2fs/recovery.c | 71
> > > > +++++++++++++++++++++++++++++++++++++++++++++---------
> > > >  1 file changed, 60 insertions(+), 11 deletions(-)
> > > >
> > > > diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
> > > > index 36d4f73..a4eb978 100644
> > > > --- a/fs/f2fs/recovery.c
> > > > +++ b/fs/f2fs/recovery.c
> > > > @@ -14,6 +14,37 @@
> > > >  #include "node.h"
> > > >  #include "segment.h"
> > > >
> > > > +/*
> > > > + * Roll forward recovery scenarios.
> > > > + *
> > > > + * [Term] F: fsync_mark, D: dentry_mark
> > > > + *
> > > > + * 1. inode(x) | CP | inode(x) | dnode(F)
> > > > + * -> Update the latest inode(x).
> > > > + *
> > > > + * 2. inode(x) | CP | inode(F) | dnode(F)
> > > > + * -> No problem.
> > > > + *
> > > > + * 3. inode(x) | CP | dnode(F) | inode(x)
> > > > + * -> Recover to the latest dnode(F), and drop the last inode(x)
> > > > + *
> > > > + * 4. inode(x) | CP | dnode(F) | inode(F)
> > > > + * -> No problem.
> > > > + *
> > > > + * 5. CP | inode(x) | dnode(F)
> > > > + * -> The inode(DF) was missing. Should drop this dnode(F).
> > > > + *
> > > > + * 6. CP | inode(DF) | dnode(F)
> > > > + * -> No problem.
> > > > + *
> > > > + * 7. CP | dnode(F) | inode(DF)
> > > > + * -> If f2fs_iget fails, then goto next to find inode(DF).
> > > > + *
> > > > + * 8. CP | dnode(F) | inode(x)
> > > > + * -> If f2fs_iget fails, then goto next to find inode(DF).
> > > > + *    But it will fail due to no inode(DF).
> > > > + */
> > > > +
> > > >  static struct kmem_cache *fsync_entry_slab;
> > > >
> > > >  bool space_for_roll_forward(struct f2fs_sb_info *sbi)
> > > > @@ -110,27 +141,32 @@ out:
> > > >         return err;
> > > >  }
> > > >
> > > > -static int recover_inode(struct inode *inode, struct page *node_page)
> > > > +static void __recover_inode(struct inode *inode, struct page *page)
> > > >  {
> > > > -       struct f2fs_inode *raw_inode = F2FS_INODE(node_page);
> > > > +       struct f2fs_inode *raw = F2FS_INODE(page);
> > > > +
> > > > +       inode->i_mode = le16_to_cpu(raw->i_mode);
> > > > +       i_size_write(inode, le64_to_cpu(raw->i_size));
> > > > +       inode->i_atime.tv_sec = le64_to_cpu(raw->i_mtime);
> > > > +       inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime);
> > > > +       inode->i_mtime.tv_sec = le64_to_cpu(raw->i_mtime);
> > > > +       inode->i_atime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
> > > > +       inode->i_ctime.tv_nsec = le32_to_cpu(raw->i_ctime_nsec);
> > > > +       inode->i_mtime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
> > > > +}
> > > >
> > > > +static int recover_inode(struct inode *inode, struct page *node_page)
> > > > +{
> > > >         if (!IS_INODE(node_page))
> > > >                 return 0;
> > > >
> > > > -       inode->i_mode = le16_to_cpu(raw_inode->i_mode);
> > > > -       i_size_write(inode, le64_to_cpu(raw_inode->i_size));
> > > > -       inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime);
> > > > -       inode->i_ctime.tv_sec = le64_to_cpu(raw_inode->i_ctime);
> > > > -       inode->i_mtime.tv_sec = le64_to_cpu(raw_inode->i_mtime);
> > > > -       inode->i_atime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec);
> > > > -       inode->i_ctime.tv_nsec = le32_to_cpu(raw_inode->i_ctime_nsec);
> > > > -       inode->i_mtime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec);
> > > > +       __recover_inode(inode, node_page);
> > > >
> > > >         if (is_dent_dnode(node_page))
> > > >                 return recover_dentry(node_page, inode);
> > > >
> > > >         f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode: ino = %x, name
> > > > = %s",
> > > > -                       ino_of_node(node_page), raw_inode->i_name);
> > > > +                       ino_of_node(node_page),
> > > > F2FS_INODE(node_page)->i_name);
> > > >         return 0;
> > > >  }
> > > >
> > > > @@ -183,10 +219,16 @@ static int find_fsync_dnodes(struct f2fs_sb_info
> > > > *sbi, struct list_head *head)
> > > >                                 break;
> > > >                         }
> > > >
> > > > +                       /*
> > > > +                        * CP | dnode(F) | inode(DF)
> > > > +                        * For this case, we should not give up now.
> > > > +                        */
> > > >                         entry->inode = f2fs_iget(sbi->sb,
> > > > ino_of_node(page));
> > > >                         if (IS_ERR(entry->inode)) {
> > > >                                 err = PTR_ERR(entry->inode);
> > > >                                 kmem_cache_free(fsync_entry_slab, entry);
> > > > +                               if (err == -ENOENT)
> > > > +                                       goto next;
> > > >                                 break;
> > > >                         }
> > > >                         list_add_tail(&entry->list, head);
> > > > @@ -423,6 +465,13 @@ static int recover_data(struct f2fs_sb_info *sbi,
> > > >                 entry = get_fsync_inode(head, ino_of_node(page));
> > > >                 if (!entry)
> > > >                         goto next;
> > > > +               /*
> > > > +                * inode(x) | CP | inode(x) | dnode(F)
> > > > +                * In this case, we can lose the latest inode(x).
> > > > +                * So, call __recover_inode for the inode update.
> > > > +                */
> > > >
> > > 
> > > With 2/3, because both IS_CHECKPOINTED and HAS_FSYNCED_INODE flag are unset
> > > for inode, we will append a inode(F).
> > 
> > No, inode(F) is not appended.
> > Please check the fsync rule #1.
> 
> >From implementation of need_inode_block_update, we do not append an
> inode(F) or inode(DF), only if:
> 
>   get_nat_flag(e, HAS_LAST_FSYNC) &&
>           (get_nat_flag(e, IS_CHECKPOINTED) ||
>            get_nat_flag(e, HAS_FSYNCED_INODE)))
> 
> e is nat entry for the inode.
> 
> For inode(x) | CP | inode(x) | dnode(F)
> 
> We have:
> 
> HAS_LAST_FSYNC:    true
> IS_CHECKPOINTED:   false

IS_CHECKPOINTED: true

I changed the rule for CHECKPOINTED too.

Thanks,

> HAS_FSYNCED_INODE: false
> 
> So we will append a inode(DF) here.
> 
> > > So we do not need to call
> > > __recover_inode here?
> 
> Thought again, __recover_inode here may be helpful here for the
> following situation:
> 
> inode(x) | CP | inode(x) | dnode(F) | inode(DF)
>                                     |
>                                     v
>                                Sudden power off
> 
> That is, sudden power off before writing inode(DF).
> 
> So I think we should keep the code, but maybe change the comments?
> 
> Best Regards,
> Huang, Ying
> 
> > > 
> > > 
> > > > +               if (IS_INODE(page))
> > > > +                       __recover_inode(entry->inode, page);
> > > >
> > > >                 err = do_recover_data(sbi, entry->inode, page, blkaddr);
> > > >                 if (err) {
> > > > --
> > > >

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 3/3] f2fs: fix roll-forward missing scenarios
  2014-09-21  3:36         ` Jaegeuk Kim
@ 2014-09-22  2:13           ` Huang Ying
  0 siblings, 0 replies; 12+ messages in thread
From: Huang Ying @ 2014-09-22  2:13 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: huang ying, LKML, linux-fsdevel, linux-f2fs-devel

On Sat, 2014-09-20 at 20:36 -0700, Jaegeuk Kim wrote:
> On Sun, Sep 21, 2014 at 07:22:32AM +0800, Huang Ying wrote:
> > On Sat, 2014-09-20 at 09:23 -0700, Jaegeuk Kim wrote:
> > > On Thu, Sep 18, 2014 at 09:04:11PM +0800, huang ying wrote:
> > > > On Thu, Sep 18, 2014 at 1:51 PM, Jaegeuk Kim <jaegeuk@kernel.org> wrote:
> > > > 
> > > > > We can summarize the roll forward recovery scenarios as follows.
> > > > >
> > > > > [Term] F: fsync_mark, D: dentry_mark
> > > > >
> > > > > 1. inode(x) | CP | inode(x) | dnode(F)
> > > > > -> Update the latest inode(x).
> > > > >
> > > > > 2. inode(x) | CP | inode(F) | dnode(F)
> > > > > -> No problem.
> > > > >
> > > > > 3. inode(x) | CP | dnode(F) | inode(x)
> > > > > -> Recover to the latest dnode(F), and drop the last inode(x)
> > > > >
> > > > > 4. inode(x) | CP | dnode(F) | inode(F)
> > > > > -> No problem.
> > > > >
> > > > > 5. CP | inode(x) | dnode(F)
> > > > > -> The inode(DF) was missing. Should drop this dnode(F).
> > > > >
> > > > > 6. CP | inode(DF) | dnode(F)
> > > > > -> No problem.
> > > > >
> > > > > 7. CP | dnode(F) | inode(DF)
> > > > > -> If f2fs_iget fails, then goto next to find inode(DF).
> > > > >
> > > > > 8. CP | dnode(F) | inode(x)
> > > > > -> If f2fs_iget fails, then goto next to find inode(DF).
> > > > >    But it will fail due to no inode(DF).
> > > > >
> > > > > So, this patch adds some missing points such as #1, #5, #7, and #8.
> > > > >
> > > > > Signed-off-by: Huang Ying <ying.huang@intel.com>
> > > > > Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> > > > > ---
> > > > >  fs/f2fs/recovery.c | 71
> > > > > +++++++++++++++++++++++++++++++++++++++++++++---------
> > > > >  1 file changed, 60 insertions(+), 11 deletions(-)
> > > > >
> > > > > diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
> > > > > index 36d4f73..a4eb978 100644
> > > > > --- a/fs/f2fs/recovery.c
> > > > > +++ b/fs/f2fs/recovery.c
> > > > > @@ -14,6 +14,37 @@
> > > > >  #include "node.h"
> > > > >  #include "segment.h"
> > > > >
> > > > > +/*
> > > > > + * Roll forward recovery scenarios.
> > > > > + *
> > > > > + * [Term] F: fsync_mark, D: dentry_mark
> > > > > + *
> > > > > + * 1. inode(x) | CP | inode(x) | dnode(F)
> > > > > + * -> Update the latest inode(x).
> > > > > + *
> > > > > + * 2. inode(x) | CP | inode(F) | dnode(F)
> > > > > + * -> No problem.
> > > > > + *
> > > > > + * 3. inode(x) | CP | dnode(F) | inode(x)
> > > > > + * -> Recover to the latest dnode(F), and drop the last inode(x)
> > > > > + *
> > > > > + * 4. inode(x) | CP | dnode(F) | inode(F)
> > > > > + * -> No problem.
> > > > > + *
> > > > > + * 5. CP | inode(x) | dnode(F)
> > > > > + * -> The inode(DF) was missing. Should drop this dnode(F).
> > > > > + *
> > > > > + * 6. CP | inode(DF) | dnode(F)
> > > > > + * -> No problem.
> > > > > + *
> > > > > + * 7. CP | dnode(F) | inode(DF)
> > > > > + * -> If f2fs_iget fails, then goto next to find inode(DF).
> > > > > + *
> > > > > + * 8. CP | dnode(F) | inode(x)
> > > > > + * -> If f2fs_iget fails, then goto next to find inode(DF).
> > > > > + *    But it will fail due to no inode(DF).
> > > > > + */
> > > > > +
> > > > >  static struct kmem_cache *fsync_entry_slab;
> > > > >
> > > > >  bool space_for_roll_forward(struct f2fs_sb_info *sbi)
> > > > > @@ -110,27 +141,32 @@ out:
> > > > >         return err;
> > > > >  }
> > > > >
> > > > > -static int recover_inode(struct inode *inode, struct page *node_page)
> > > > > +static void __recover_inode(struct inode *inode, struct page *page)
> > > > >  {
> > > > > -       struct f2fs_inode *raw_inode = F2FS_INODE(node_page);
> > > > > +       struct f2fs_inode *raw = F2FS_INODE(page);
> > > > > +
> > > > > +       inode->i_mode = le16_to_cpu(raw->i_mode);
> > > > > +       i_size_write(inode, le64_to_cpu(raw->i_size));
> > > > > +       inode->i_atime.tv_sec = le64_to_cpu(raw->i_mtime);
> > > > > +       inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime);
> > > > > +       inode->i_mtime.tv_sec = le64_to_cpu(raw->i_mtime);
> > > > > +       inode->i_atime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
> > > > > +       inode->i_ctime.tv_nsec = le32_to_cpu(raw->i_ctime_nsec);
> > > > > +       inode->i_mtime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
> > > > > +}
> > > > >
> > > > > +static int recover_inode(struct inode *inode, struct page *node_page)
> > > > > +{
> > > > >         if (!IS_INODE(node_page))
> > > > >                 return 0;
> > > > >
> > > > > -       inode->i_mode = le16_to_cpu(raw_inode->i_mode);
> > > > > -       i_size_write(inode, le64_to_cpu(raw_inode->i_size));
> > > > > -       inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime);
> > > > > -       inode->i_ctime.tv_sec = le64_to_cpu(raw_inode->i_ctime);
> > > > > -       inode->i_mtime.tv_sec = le64_to_cpu(raw_inode->i_mtime);
> > > > > -       inode->i_atime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec);
> > > > > -       inode->i_ctime.tv_nsec = le32_to_cpu(raw_inode->i_ctime_nsec);
> > > > > -       inode->i_mtime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec);
> > > > > +       __recover_inode(inode, node_page);
> > > > >
> > > > >         if (is_dent_dnode(node_page))
> > > > >                 return recover_dentry(node_page, inode);
> > > > >
> > > > >         f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode: ino = %x, name
> > > > > = %s",
> > > > > -                       ino_of_node(node_page), raw_inode->i_name);
> > > > > +                       ino_of_node(node_page),
> > > > > F2FS_INODE(node_page)->i_name);
> > > > >         return 0;
> > > > >  }
> > > > >
> > > > > @@ -183,10 +219,16 @@ static int find_fsync_dnodes(struct f2fs_sb_info
> > > > > *sbi, struct list_head *head)
> > > > >                                 break;
> > > > >                         }
> > > > >
> > > > > +                       /*
> > > > > +                        * CP | dnode(F) | inode(DF)
> > > > > +                        * For this case, we should not give up now.
> > > > > +                        */
> > > > >                         entry->inode = f2fs_iget(sbi->sb,
> > > > > ino_of_node(page));
> > > > >                         if (IS_ERR(entry->inode)) {
> > > > >                                 err = PTR_ERR(entry->inode);
> > > > >                                 kmem_cache_free(fsync_entry_slab, entry);
> > > > > +                               if (err == -ENOENT)
> > > > > +                                       goto next;
> > > > >                                 break;
> > > > >                         }
> > > > >                         list_add_tail(&entry->list, head);
> > > > > @@ -423,6 +465,13 @@ static int recover_data(struct f2fs_sb_info *sbi,
> > > > >                 entry = get_fsync_inode(head, ino_of_node(page));
> > > > >                 if (!entry)
> > > > >                         goto next;
> > > > > +               /*
> > > > > +                * inode(x) | CP | inode(x) | dnode(F)
> > > > > +                * In this case, we can lose the latest inode(x).
> > > > > +                * So, call __recover_inode for the inode update.
> > > > > +                */
> > > > >
> > > > 
> > > > With 2/3, because both IS_CHECKPOINTED and HAS_FSYNCED_INODE flag are unset
> > > > for inode, we will append a inode(F).
> > > 
> > > No, inode(F) is not appended.
> > > Please check the fsync rule #1.
> > 
> > >From implementation of need_inode_block_update, we do not append an
> > inode(F) or inode(DF), only if:
> > 
> >   get_nat_flag(e, HAS_LAST_FSYNC) &&
> >           (get_nat_flag(e, IS_CHECKPOINTED) ||
> >            get_nat_flag(e, HAS_FSYNCED_INODE)))
> > 
> > e is nat entry for the inode.
> > 
> > For inode(x) | CP | inode(x) | dnode(F)
> > 
> > We have:
> > 
> > HAS_LAST_FSYNC:    true
> > IS_CHECKPOINTED:   false
> 
> IS_CHECKPOINTED: true
> 
> I changed the rule for CHECKPOINTED too.

Sorry for the noise.  I did not notice that.

Best Regards,
Huang, Ying

> > HAS_FSYNCED_INODE: false
> > 
> > So we will append a inode(DF) here.
> > 
> > > > So we do not need to call
> > > > __recover_inode here?
> > 
> > Thought again, __recover_inode here may be helpful here for the
> > following situation:
> > 
> > inode(x) | CP | inode(x) | dnode(F) | inode(DF)
> >                                     |
> >                                     v
> >                                Sudden power off
> > 
> > That is, sudden power off before writing inode(DF).
> > 
> > So I think we should keep the code, but maybe change the comments?
> > 
> > Best Regards,
> > Huang, Ying
> > 
> > > > 
> > > > 
> > > > > +               if (IS_INODE(page))
> > > > > +                       __recover_inode(entry->inode, page);
> > > > >
> > > > >                 err = do_recover_data(sbi, entry->inode, page, blkaddr);
> > > > >                 if (err) {
> > > > > --
> > > > >



^ permalink raw reply	[flat|nested] 12+ messages in thread

* RE: [f2fs-dev] [PATCH 2/3] f2fs: fix conditions to remain recovery information in f2fs_sync_file
  2014-09-18  5:51 ` [PATCH 2/3] f2fs: fix conditions to remain recovery information in f2fs_sync_file Jaegeuk Kim
@ 2014-09-22  7:24   ` Chao Yu
  2014-09-22  7:38     ` Huang Ying
  0 siblings, 1 reply; 12+ messages in thread
From: Chao Yu @ 2014-09-22  7:24 UTC (permalink / raw)
  To: 'Jaegeuk Kim', 'Huang Ying'
  Cc: linux-kernel, linux-fsdevel, linux-f2fs-devel

Hi Jaegeuk, Huang,

> -----Original Message-----
> From: Jaegeuk Kim [mailto:jaegeuk@kernel.org]
> Sent: Thursday, September 18, 2014 1:51 PM
> To: linux-kernel@vger.kernel.org; linux-fsdevel@vger.kernel.org;
> linux-f2fs-devel@lists.sourceforge.net
> Cc: Jaegeuk Kim; Huang Ying
> Subject: [f2fs-dev] [PATCH 2/3] f2fs: fix conditions to remain recovery information in
> f2fs_sync_file
> 
> This patch revisited whole the recovery information during the f2fs_sync_file.
> 
> In this patch, there are three information to make a decision.
> 
> a) IS_CHECKPOINTED,	/* is it checkpointed before? */
> b) HAS_FSYNCED_INODE,	/* is the inode fsynced before? */
> c) HAS_LAST_FSYNC,	/* has the latest node fsync mark? */
> 
> And, the scenarios for our rule are based on:
> 
> [Term] F: fsync_mark, D: dentry_mark
> 
> 1. inode(x) | CP | inode(x) | dnode(F)
> 2. inode(x) | CP | inode(F) | dnode(F)
> 3. inode(x) | CP | dnode(F) | inode(x) | inode(F)
> 4. inode(x) | CP | dnode(F) | inode(F)
> 5. CP | inode(x) | dnode(F) | inode(DF)
> 6. CP | inode(DF) | dnode(F)
> 7. CP | dnode(F) | inode(DF)
> 8. CP | dnode(F) | inode(x) | inode(DF)

No sure, do we missed these cases:
inode(x) | CP | inode(F) | dnode(x) -> write inode(F)
CP | inode(DF) | dnode(x) -> write inode(F)

In these cases we will write another inode with fsync flag because our last
dnode is written out to disk by bdi-flusher (HAS_LAST_FSYNC is not marked). But
this appended inode is not useful.

AFAIK, HAS_LAST_FSYNC(AKA fsync_done) is introduced in commit 479f40c44ae3
("f2fs: skip unnecessary node writes during fsync") to avoid writting multiple
unneeded inode page by multiple redundant fsync calls. But for now, its role can
be taken by HAS_FSYNCED_INODE.
So, can we remove this flag to simplify our logic of fsync flow?

Then in fsync flow, the rule can be:
If CPed before, there must be a inode(F) written in warm node chain;
If not CPed before, there must be a inode(DF) written in warm node chain.

Thanks,
Yu

> 
> For example, #3, the three conditions should be changed as follows.
> 
>    inode(x) | CP | dnode(F) | inode(x) | inode(F)
> a)    x       o      o          o          o
> b)    x       x      x          x          o
> c)    x       o      o          x          o
> 
> If f2fs_sync_file stops   ------^,
>  it should write inode(F)    --------------^
> 
> So, the need_inode_block_update should return true, since
>  c) get_nat_flag(e, HAS_LAST_FSYNC), is false.
> 
> For example, #8,
>       CP | alloc | dnode(F) | inode(x) | inode(DF)
> a)    o      x        x          x          x
> b)    x               x          x          o
> c)    o               o          x          o
> 
> If f2fs_sync_file stops   -------^,
>  it should write inode(DF)    --------------^
> 
> Note that, the roll-forward policy should follow this rule, which means,
> if there are any missing blocks, we doesn't need to recover that inode.
> 
> Signed-off-by: Huang Ying <ying.huang@intel.com>
> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> ---
>  fs/f2fs/data.c |  3 ---
>  fs/f2fs/f2fs.h |  6 +++---
>  fs/f2fs/file.c | 10 ++++++----
>  fs/f2fs/node.c | 56 +++++++++++++++++++++++++++++++++++---------------------
>  fs/f2fs/node.h | 21 ++++++++++++---------
>  5 files changed, 56 insertions(+), 40 deletions(-)
> 
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 0e37658..fdc3dbe 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -1089,9 +1089,6 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
>  	if (check_direct_IO(inode, rw, iter, offset))
>  		return 0;
> 
> -	/* clear fsync mark to recover these blocks */
> -	fsync_mark_clear(F2FS_I_SB(inode), inode->i_ino);
> -
>  	trace_f2fs_direct_IO_enter(inode, offset, count, rw);
> 
>  	err = blockdev_direct_IO(rw, iocb, inode, iter, offset, get_data_block);
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 9fc1bcd..fd44083 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -1224,9 +1224,9 @@ struct dnode_of_data;
>  struct node_info;
> 
>  bool available_free_memory(struct f2fs_sb_info *, int);
> -int is_checkpointed_node(struct f2fs_sb_info *, nid_t);
> -bool fsync_mark_done(struct f2fs_sb_info *, nid_t);
> -void fsync_mark_clear(struct f2fs_sb_info *, nid_t);
> +bool is_checkpointed_node(struct f2fs_sb_info *, nid_t);
> +bool has_fsynced_inode(struct f2fs_sb_info *, nid_t);
> +bool need_inode_block_update(struct f2fs_sb_info *, nid_t);
>  void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *);
>  int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int);
>  int truncate_inode_blocks(struct inode *, pgoff_t);
> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> index af06e22..3035c79 100644
> --- a/fs/f2fs/file.c
> +++ b/fs/f2fs/file.c
> @@ -207,15 +207,17 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int
> datasync)
>  			up_write(&fi->i_sem);
>  		}
>  	} else {
> -		/* if there is no written node page, write its inode page */
> -		while (!sync_node_pages(sbi, ino, &wbc)) {
> -			if (fsync_mark_done(sbi, ino))
> -				goto out;
> +sync_nodes:
> +		sync_node_pages(sbi, ino, &wbc);
> +
> +		if (need_inode_block_update(sbi, ino)) {
>  			mark_inode_dirty_sync(inode);
>  			ret = f2fs_write_inode(inode, NULL);
>  			if (ret)
>  				goto out;
> +			goto sync_nodes;
>  		}
> +
>  		ret = wait_on_node_pages_writeback(sbi, ino);
>  		if (ret)
>  			goto out;
> diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
> index d19d6b1..7a2d9c9 100644
> --- a/fs/f2fs/node.c
> +++ b/fs/f2fs/node.c
> @@ -123,44 +123,48 @@ static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct
> nat_entry *e)
>  	kmem_cache_free(nat_entry_slab, e);
>  }
> 
> -int is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
> +bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
>  {
>  	struct f2fs_nm_info *nm_i = NM_I(sbi);
>  	struct nat_entry *e;
> -	int is_cp = 1;
> +	bool is_cp = true;
> 
>  	read_lock(&nm_i->nat_tree_lock);
>  	e = __lookup_nat_cache(nm_i, nid);
>  	if (e && !get_nat_flag(e, IS_CHECKPOINTED))
> -		is_cp = 0;
> +		is_cp = false;
>  	read_unlock(&nm_i->nat_tree_lock);
>  	return is_cp;
>  }
> 
> -bool fsync_mark_done(struct f2fs_sb_info *sbi, nid_t nid)
> +bool has_fsynced_inode(struct f2fs_sb_info *sbi, nid_t ino)
>  {
>  	struct f2fs_nm_info *nm_i = NM_I(sbi);
>  	struct nat_entry *e;
> -	bool fsync_done = false;
> +	bool fsynced = false;
> 
>  	read_lock(&nm_i->nat_tree_lock);
> -	e = __lookup_nat_cache(nm_i, nid);
> -	if (e)
> -		fsync_done = get_nat_flag(e, HAS_FSYNC_MARK);
> +	e = __lookup_nat_cache(nm_i, ino);
> +	if (e && get_nat_flag(e, HAS_FSYNCED_INODE))
> +		fsynced = true;
>  	read_unlock(&nm_i->nat_tree_lock);
> -	return fsync_done;
> +	return fsynced;
>  }
> 
> -void fsync_mark_clear(struct f2fs_sb_info *sbi, nid_t nid)
> +bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino)
>  {
>  	struct f2fs_nm_info *nm_i = NM_I(sbi);
>  	struct nat_entry *e;
> +	bool need_update = true;
> 
> -	write_lock(&nm_i->nat_tree_lock);
> -	e = __lookup_nat_cache(nm_i, nid);
> -	if (e)
> -		set_nat_flag(e, HAS_FSYNC_MARK, false);
> -	write_unlock(&nm_i->nat_tree_lock);
> +	read_lock(&nm_i->nat_tree_lock);
> +	e = __lookup_nat_cache(nm_i, ino);
> +	if (e && get_nat_flag(e, HAS_LAST_FSYNC) &&
> +			(get_nat_flag(e, IS_CHECKPOINTED) ||
> +			 get_nat_flag(e, HAS_FSYNCED_INODE)))
> +		need_update = false;
> +	read_unlock(&nm_i->nat_tree_lock);
> +	return need_update;
>  }
> 
>  static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid)
> @@ -176,7 +180,7 @@ static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t
> nid)
>  	}
>  	memset(new, 0, sizeof(struct nat_entry));
>  	nat_set_nid(new, nid);
> -	set_nat_flag(new, IS_CHECKPOINTED, true);
> +	nat_reset_flag(new);
>  	list_add_tail(&new->list, &nm_i->nat_entries);
>  	nm_i->nat_cnt++;
>  	return new;
> @@ -244,12 +248,17 @@ retry:
> 
>  	/* change address */
>  	nat_set_blkaddr(e, new_blkaddr);
> +	if (new_blkaddr == NEW_ADDR || new_blkaddr == NULL_ADDR)
> +		set_nat_flag(e, IS_CHECKPOINTED, false);
>  	__set_nat_cache_dirty(nm_i, e);
> 
>  	/* update fsync_mark if its inode nat entry is still alive */
>  	e = __lookup_nat_cache(nm_i, ni->ino);
> -	if (e)
> -		set_nat_flag(e, HAS_FSYNC_MARK, fsync_done);
> +	if (e) {
> +		if (fsync_done && ni->nid == ni->ino)
> +			set_nat_flag(e, HAS_FSYNCED_INODE, true);
> +		set_nat_flag(e, HAS_LAST_FSYNC, fsync_done);
> +	}
>  	write_unlock(&nm_i->nat_tree_lock);
>  }
> 
> @@ -1121,10 +1130,14 @@ continue_unlock:
> 
>  			/* called by fsync() */
>  			if (ino && IS_DNODE(page)) {
> -				int mark = !is_checkpointed_node(sbi, ino);
>  				set_fsync_mark(page, 1);
> -				if (IS_INODE(page))
> -					set_dentry_mark(page, mark);
> +				if (IS_INODE(page)) {
> +					if (!is_checkpointed_node(sbi, ino) &&
> +						!has_fsynced_inode(sbi, ino))
> +						set_dentry_mark(page, 1);
> +					else
> +						set_dentry_mark(page, 0);
> +				}
>  				nwritten++;
>  			} else {
>  				set_fsync_mark(page, 0);
> @@ -1912,6 +1925,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
>  				write_unlock(&nm_i->nat_tree_lock);
>  			} else {
>  				write_lock(&nm_i->nat_tree_lock);
> +				nat_reset_flag(ne);
>  				__clear_nat_cache_dirty(nm_i, ne);
>  				write_unlock(&nm_i->nat_tree_lock);
>  			}
> diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
> index 3043778..b8ba63c 100644
> --- a/fs/f2fs/node.h
> +++ b/fs/f2fs/node.h
> @@ -41,7 +41,8 @@ struct node_info {
> 
>  enum {
>  	IS_CHECKPOINTED,	/* is it checkpointed before? */
> -	HAS_FSYNC_MARK,		/* has the latest node fsync mark? */
> +	HAS_FSYNCED_INODE,	/* is the inode fsynced before? */
> +	HAS_LAST_FSYNC,		/* has the latest node fsync mark? */
>  };
> 
>  struct nat_entry {
> @@ -60,15 +61,9 @@ struct nat_entry {
>  #define nat_set_version(nat, v)		(nat->ni.version = v)
> 
>  #define __set_nat_cache_dirty(nm_i, ne)					\
> -	do {								\
> -		set_nat_flag(ne, IS_CHECKPOINTED, false);		\
> -		list_move_tail(&ne->list, &nm_i->dirty_nat_entries);	\
> -	} while (0)
> +		list_move_tail(&ne->list, &nm_i->dirty_nat_entries);
>  #define __clear_nat_cache_dirty(nm_i, ne)				\
> -	do {								\
> -		set_nat_flag(ne, IS_CHECKPOINTED, true);		\
> -		list_move_tail(&ne->list, &nm_i->nat_entries);		\
> -	} while (0)
> +		list_move_tail(&ne->list, &nm_i->nat_entries);
>  #define inc_node_version(version)	(++version)
> 
>  static inline void set_nat_flag(struct nat_entry *ne,
> @@ -87,6 +82,14 @@ static inline bool get_nat_flag(struct nat_entry *ne, unsigned int type)
>  	return ne->flag & mask;
>  }
> 
> +static inline void nat_reset_flag(struct nat_entry *ne)
> +{
> +	/* these states can be set only after checkpoint was done */
> +	set_nat_flag(ne, IS_CHECKPOINTED, true);
> +	set_nat_flag(ne, HAS_FSYNCED_INODE, false);
> +	set_nat_flag(ne, HAS_LAST_FSYNC, true);
> +}
> +
>  static inline void node_info_from_raw_nat(struct node_info *ni,
>  						struct f2fs_nat_entry *raw_ne)
>  {
> --
> 1.8.5.2 (Apple Git-48)
> 
> 
> ------------------------------------------------------------------------------
> Want excitement?
> Manually upgrade your production database.
> When you want reliability, choose Perforce
> Perforce version control. Predictably reliable.
> http://pubads.g.doubleclick.net/gampad/clk?id=157508191&iu=/4140/ostg.clktrk
> _______________________________________________
> Linux-f2fs-devel mailing list
> Linux-f2fs-devel@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [f2fs-dev] [PATCH 2/3] f2fs: fix conditions to remain recovery information in f2fs_sync_file
  2014-09-22  7:24   ` [f2fs-dev] " Chao Yu
@ 2014-09-22  7:38     ` Huang Ying
  2014-09-22  9:20       ` Chao Yu
  0 siblings, 1 reply; 12+ messages in thread
From: Huang Ying @ 2014-09-22  7:38 UTC (permalink / raw)
  To: Chao Yu
  Cc: 'Jaegeuk Kim', linux-kernel, linux-fsdevel, linux-f2fs-devel

On Mon, 2014-09-22 at 15:24 +0800, Chao Yu wrote:
> Hi Jaegeuk, Huang,
> 
> > -----Original Message-----
> > From: Jaegeuk Kim [mailto:jaegeuk@kernel.org]
> > Sent: Thursday, September 18, 2014 1:51 PM
> > To: linux-kernel@vger.kernel.org; linux-fsdevel@vger.kernel.org;
> > linux-f2fs-devel@lists.sourceforge.net
> > Cc: Jaegeuk Kim; Huang Ying
> > Subject: [f2fs-dev] [PATCH 2/3] f2fs: fix conditions to remain recovery information in
> > f2fs_sync_file
> > 
> > This patch revisited whole the recovery information during the f2fs_sync_file.
> > 
> > In this patch, there are three information to make a decision.
> > 
> > a) IS_CHECKPOINTED,	/* is it checkpointed before? */
> > b) HAS_FSYNCED_INODE,	/* is the inode fsynced before? */
> > c) HAS_LAST_FSYNC,	/* has the latest node fsync mark? */
> > 
> > And, the scenarios for our rule are based on:
> > 
> > [Term] F: fsync_mark, D: dentry_mark
> > 
> > 1. inode(x) | CP | inode(x) | dnode(F)
> > 2. inode(x) | CP | inode(F) | dnode(F)
> > 3. inode(x) | CP | dnode(F) | inode(x) | inode(F)
> > 4. inode(x) | CP | dnode(F) | inode(F)
> > 5. CP | inode(x) | dnode(F) | inode(DF)
> > 6. CP | inode(DF) | dnode(F)
> > 7. CP | dnode(F) | inode(DF)
> > 8. CP | dnode(F) | inode(x) | inode(DF)
> 
> No sure, do we missed these cases:
> inode(x) | CP | inode(F) | dnode(x) -> write inode(F)
> CP | inode(DF) | dnode(x) -> write inode(F)
> 
> In these cases we will write another inode with fsync flag because our last
> dnode is written out to disk by bdi-flusher (HAS_LAST_FSYNC is not marked). But
> this appended inode is not useful.
> 
> AFAIK, HAS_LAST_FSYNC(AKA fsync_done) is introduced in commit 479f40c44ae3
> ("f2fs: skip unnecessary node writes during fsync") to avoid writting multiple
> unneeded inode page by multiple redundant fsync calls. But for now, its role can
> be taken by HAS_FSYNCED_INODE.
> So, can we remove this flag to simplify our logic of fsync flow?
> 
> Then in fsync flow, the rule can be:
> If CPed before, there must be a inode(F) written in warm node chain;

How about

inode(x) | CP | dnode(F)

> If not CPed before, there must be a inode(DF) written in warm node chain.

For example below:

1) checkpoint
2) create "a", change "a"
3) fsync "a"
4) open "a", change "a"

Do we want recovery to stop at dnode(F) in step 3) or stop at dnode(x)
produced by step 4)?

Best Regards,
Huang, Ying

> > 
> > For example, #3, the three conditions should be changed as follows.
> > 
> >    inode(x) | CP | dnode(F) | inode(x) | inode(F)
> > a)    x       o      o          o          o
> > b)    x       x      x          x          o
> > c)    x       o      o          x          o
> > 
> > If f2fs_sync_file stops   ------^,
> >  it should write inode(F)    --------------^
> > 
> > So, the need_inode_block_update should return true, since
> >  c) get_nat_flag(e, HAS_LAST_FSYNC), is false.
> > 
> > For example, #8,
> >       CP | alloc | dnode(F) | inode(x) | inode(DF)
> > a)    o      x        x          x          x
> > b)    x               x          x          o
> > c)    o               o          x          o
> > 
> > If f2fs_sync_file stops   -------^,
> >  it should write inode(DF)    --------------^
> > 
> > Note that, the roll-forward policy should follow this rule, which means,
> > if there are any missing blocks, we doesn't need to recover that inode.
> > 
> > Signed-off-by: Huang Ying <ying.huang@intel.com>
> > Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> > ---
> >  fs/f2fs/data.c |  3 ---
> >  fs/f2fs/f2fs.h |  6 +++---
> >  fs/f2fs/file.c | 10 ++++++----
> >  fs/f2fs/node.c | 56 +++++++++++++++++++++++++++++++++++---------------------
> >  fs/f2fs/node.h | 21 ++++++++++++---------
> >  5 files changed, 56 insertions(+), 40 deletions(-)
> > 
> > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> > index 0e37658..fdc3dbe 100644
> > --- a/fs/f2fs/data.c
> > +++ b/fs/f2fs/data.c
> > @@ -1089,9 +1089,6 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
> >  	if (check_direct_IO(inode, rw, iter, offset))
> >  		return 0;
> > 
> > -	/* clear fsync mark to recover these blocks */
> > -	fsync_mark_clear(F2FS_I_SB(inode), inode->i_ino);
> > -
> >  	trace_f2fs_direct_IO_enter(inode, offset, count, rw);
> > 
> >  	err = blockdev_direct_IO(rw, iocb, inode, iter, offset, get_data_block);
> > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > index 9fc1bcd..fd44083 100644
> > --- a/fs/f2fs/f2fs.h
> > +++ b/fs/f2fs/f2fs.h
> > @@ -1224,9 +1224,9 @@ struct dnode_of_data;
> >  struct node_info;
> > 
> >  bool available_free_memory(struct f2fs_sb_info *, int);
> > -int is_checkpointed_node(struct f2fs_sb_info *, nid_t);
> > -bool fsync_mark_done(struct f2fs_sb_info *, nid_t);
> > -void fsync_mark_clear(struct f2fs_sb_info *, nid_t);
> > +bool is_checkpointed_node(struct f2fs_sb_info *, nid_t);
> > +bool has_fsynced_inode(struct f2fs_sb_info *, nid_t);
> > +bool need_inode_block_update(struct f2fs_sb_info *, nid_t);
> >  void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *);
> >  int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int);
> >  int truncate_inode_blocks(struct inode *, pgoff_t);
> > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> > index af06e22..3035c79 100644
> > --- a/fs/f2fs/file.c
> > +++ b/fs/f2fs/file.c
> > @@ -207,15 +207,17 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int
> > datasync)
> >  			up_write(&fi->i_sem);
> >  		}
> >  	} else {
> > -		/* if there is no written node page, write its inode page */
> > -		while (!sync_node_pages(sbi, ino, &wbc)) {
> > -			if (fsync_mark_done(sbi, ino))
> > -				goto out;
> > +sync_nodes:
> > +		sync_node_pages(sbi, ino, &wbc);
> > +
> > +		if (need_inode_block_update(sbi, ino)) {
> >  			mark_inode_dirty_sync(inode);
> >  			ret = f2fs_write_inode(inode, NULL);
> >  			if (ret)
> >  				goto out;
> > +			goto sync_nodes;
> >  		}
> > +
> >  		ret = wait_on_node_pages_writeback(sbi, ino);
> >  		if (ret)
> >  			goto out;
> > diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
> > index d19d6b1..7a2d9c9 100644
> > --- a/fs/f2fs/node.c
> > +++ b/fs/f2fs/node.c
> > @@ -123,44 +123,48 @@ static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct
> > nat_entry *e)
> >  	kmem_cache_free(nat_entry_slab, e);
> >  }
> > 
> > -int is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
> > +bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
> >  {
> >  	struct f2fs_nm_info *nm_i = NM_I(sbi);
> >  	struct nat_entry *e;
> > -	int is_cp = 1;
> > +	bool is_cp = true;
> > 
> >  	read_lock(&nm_i->nat_tree_lock);
> >  	e = __lookup_nat_cache(nm_i, nid);
> >  	if (e && !get_nat_flag(e, IS_CHECKPOINTED))
> > -		is_cp = 0;
> > +		is_cp = false;
> >  	read_unlock(&nm_i->nat_tree_lock);
> >  	return is_cp;
> >  }
> > 
> > -bool fsync_mark_done(struct f2fs_sb_info *sbi, nid_t nid)
> > +bool has_fsynced_inode(struct f2fs_sb_info *sbi, nid_t ino)
> >  {
> >  	struct f2fs_nm_info *nm_i = NM_I(sbi);
> >  	struct nat_entry *e;
> > -	bool fsync_done = false;
> > +	bool fsynced = false;
> > 
> >  	read_lock(&nm_i->nat_tree_lock);
> > -	e = __lookup_nat_cache(nm_i, nid);
> > -	if (e)
> > -		fsync_done = get_nat_flag(e, HAS_FSYNC_MARK);
> > +	e = __lookup_nat_cache(nm_i, ino);
> > +	if (e && get_nat_flag(e, HAS_FSYNCED_INODE))
> > +		fsynced = true;
> >  	read_unlock(&nm_i->nat_tree_lock);
> > -	return fsync_done;
> > +	return fsynced;
> >  }
> > 
> > -void fsync_mark_clear(struct f2fs_sb_info *sbi, nid_t nid)
> > +bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino)
> >  {
> >  	struct f2fs_nm_info *nm_i = NM_I(sbi);
> >  	struct nat_entry *e;
> > +	bool need_update = true;
> > 
> > -	write_lock(&nm_i->nat_tree_lock);
> > -	e = __lookup_nat_cache(nm_i, nid);
> > -	if (e)
> > -		set_nat_flag(e, HAS_FSYNC_MARK, false);
> > -	write_unlock(&nm_i->nat_tree_lock);
> > +	read_lock(&nm_i->nat_tree_lock);
> > +	e = __lookup_nat_cache(nm_i, ino);
> > +	if (e && get_nat_flag(e, HAS_LAST_FSYNC) &&
> > +			(get_nat_flag(e, IS_CHECKPOINTED) ||
> > +			 get_nat_flag(e, HAS_FSYNCED_INODE)))
> > +		need_update = false;
> > +	read_unlock(&nm_i->nat_tree_lock);
> > +	return need_update;
> >  }
> > 
> >  static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid)
> > @@ -176,7 +180,7 @@ static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t
> > nid)
> >  	}
> >  	memset(new, 0, sizeof(struct nat_entry));
> >  	nat_set_nid(new, nid);
> > -	set_nat_flag(new, IS_CHECKPOINTED, true);
> > +	nat_reset_flag(new);
> >  	list_add_tail(&new->list, &nm_i->nat_entries);
> >  	nm_i->nat_cnt++;
> >  	return new;
> > @@ -244,12 +248,17 @@ retry:
> > 
> >  	/* change address */
> >  	nat_set_blkaddr(e, new_blkaddr);
> > +	if (new_blkaddr == NEW_ADDR || new_blkaddr == NULL_ADDR)
> > +		set_nat_flag(e, IS_CHECKPOINTED, false);
> >  	__set_nat_cache_dirty(nm_i, e);
> > 
> >  	/* update fsync_mark if its inode nat entry is still alive */
> >  	e = __lookup_nat_cache(nm_i, ni->ino);
> > -	if (e)
> > -		set_nat_flag(e, HAS_FSYNC_MARK, fsync_done);
> > +	if (e) {
> > +		if (fsync_done && ni->nid == ni->ino)
> > +			set_nat_flag(e, HAS_FSYNCED_INODE, true);
> > +		set_nat_flag(e, HAS_LAST_FSYNC, fsync_done);
> > +	}
> >  	write_unlock(&nm_i->nat_tree_lock);
> >  }
> > 
> > @@ -1121,10 +1130,14 @@ continue_unlock:
> > 
> >  			/* called by fsync() */
> >  			if (ino && IS_DNODE(page)) {
> > -				int mark = !is_checkpointed_node(sbi, ino);
> >  				set_fsync_mark(page, 1);
> > -				if (IS_INODE(page))
> > -					set_dentry_mark(page, mark);
> > +				if (IS_INODE(page)) {
> > +					if (!is_checkpointed_node(sbi, ino) &&
> > +						!has_fsynced_inode(sbi, ino))
> > +						set_dentry_mark(page, 1);
> > +					else
> > +						set_dentry_mark(page, 0);
> > +				}
> >  				nwritten++;
> >  			} else {
> >  				set_fsync_mark(page, 0);
> > @@ -1912,6 +1925,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
> >  				write_unlock(&nm_i->nat_tree_lock);
> >  			} else {
> >  				write_lock(&nm_i->nat_tree_lock);
> > +				nat_reset_flag(ne);
> >  				__clear_nat_cache_dirty(nm_i, ne);
> >  				write_unlock(&nm_i->nat_tree_lock);
> >  			}
> > diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
> > index 3043778..b8ba63c 100644
> > --- a/fs/f2fs/node.h
> > +++ b/fs/f2fs/node.h
> > @@ -41,7 +41,8 @@ struct node_info {
> > 
> >  enum {
> >  	IS_CHECKPOINTED,	/* is it checkpointed before? */
> > -	HAS_FSYNC_MARK,		/* has the latest node fsync mark? */
> > +	HAS_FSYNCED_INODE,	/* is the inode fsynced before? */
> > +	HAS_LAST_FSYNC,		/* has the latest node fsync mark? */
> >  };
> > 
> >  struct nat_entry {
> > @@ -60,15 +61,9 @@ struct nat_entry {
> >  #define nat_set_version(nat, v)		(nat->ni.version = v)
> > 
> >  #define __set_nat_cache_dirty(nm_i, ne)					\
> > -	do {								\
> > -		set_nat_flag(ne, IS_CHECKPOINTED, false);		\
> > -		list_move_tail(&ne->list, &nm_i->dirty_nat_entries);	\
> > -	} while (0)
> > +		list_move_tail(&ne->list, &nm_i->dirty_nat_entries);
> >  #define __clear_nat_cache_dirty(nm_i, ne)				\
> > -	do {								\
> > -		set_nat_flag(ne, IS_CHECKPOINTED, true);		\
> > -		list_move_tail(&ne->list, &nm_i->nat_entries);		\
> > -	} while (0)
> > +		list_move_tail(&ne->list, &nm_i->nat_entries);
> >  #define inc_node_version(version)	(++version)
> > 
> >  static inline void set_nat_flag(struct nat_entry *ne,
> > @@ -87,6 +82,14 @@ static inline bool get_nat_flag(struct nat_entry *ne, unsigned int type)
> >  	return ne->flag & mask;
> >  }
> > 
> > +static inline void nat_reset_flag(struct nat_entry *ne)
> > +{
> > +	/* these states can be set only after checkpoint was done */
> > +	set_nat_flag(ne, IS_CHECKPOINTED, true);
> > +	set_nat_flag(ne, HAS_FSYNCED_INODE, false);
> > +	set_nat_flag(ne, HAS_LAST_FSYNC, true);
> > +}
> > +
> >  static inline void node_info_from_raw_nat(struct node_info *ni,
> >  						struct f2fs_nat_entry *raw_ne)
> >  {
> > --
> > 1.8.5.2 (Apple Git-48)
> > 
> > 
> > ------------------------------------------------------------------------------
> > Want excitement?
> > Manually upgrade your production database.
> > When you want reliability, choose Perforce
> > Perforce version control. Predictably reliable.
> > http://pubads.g.doubleclick.net/gampad/clk?id=157508191&iu=/4140/ostg.clktrk
> > _______________________________________________
> > Linux-f2fs-devel mailing list
> > Linux-f2fs-devel@lists.sourceforge.net
> > https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
> 



^ permalink raw reply	[flat|nested] 12+ messages in thread

* RE: [f2fs-dev] [PATCH 2/3] f2fs: fix conditions to remain recovery information in f2fs_sync_file
  2014-09-22  7:38     ` Huang Ying
@ 2014-09-22  9:20       ` Chao Yu
  2014-09-23  4:52         ` Jaegeuk Kim
  0 siblings, 1 reply; 12+ messages in thread
From: Chao Yu @ 2014-09-22  9:20 UTC (permalink / raw)
  To: 'Huang Ying'
  Cc: 'Jaegeuk Kim', linux-kernel, linux-fsdevel, linux-f2fs-devel

> -----Original Message-----
> From: Huang Ying [mailto:ying.huang@intel.com]
> Sent: Monday, September 22, 2014 3:39 PM
> To: Chao Yu
> Cc: 'Jaegeuk Kim'; linux-kernel@vger.kernel.org; linux-fsdevel@vger.kernel.org;
> linux-f2fs-devel@lists.sourceforge.net
> Subject: Re: [f2fs-dev] [PATCH 2/3] f2fs: fix conditions to remain recovery information in
> f2fs_sync_file
> 
> On Mon, 2014-09-22 at 15:24 +0800, Chao Yu wrote:
> > Hi Jaegeuk, Huang,
> >
> > > -----Original Message-----
> > > From: Jaegeuk Kim [mailto:jaegeuk@kernel.org]
> > > Sent: Thursday, September 18, 2014 1:51 PM
> > > To: linux-kernel@vger.kernel.org; linux-fsdevel@vger.kernel.org;
> > > linux-f2fs-devel@lists.sourceforge.net
> > > Cc: Jaegeuk Kim; Huang Ying
> > > Subject: [f2fs-dev] [PATCH 2/3] f2fs: fix conditions to remain recovery information in
> > > f2fs_sync_file
> > >
> > > This patch revisited whole the recovery information during the f2fs_sync_file.
> > >
> > > In this patch, there are three information to make a decision.
> > >
> > > a) IS_CHECKPOINTED,	/* is it checkpointed before? */
> > > b) HAS_FSYNCED_INODE,	/* is the inode fsynced before? */
> > > c) HAS_LAST_FSYNC,	/* has the latest node fsync mark? */
> > >
> > > And, the scenarios for our rule are based on:
> > >
> > > [Term] F: fsync_mark, D: dentry_mark
> > >
> > > 1. inode(x) | CP | inode(x) | dnode(F)
> > > 2. inode(x) | CP | inode(F) | dnode(F)
> > > 3. inode(x) | CP | dnode(F) | inode(x) | inode(F)
> > > 4. inode(x) | CP | dnode(F) | inode(F)
> > > 5. CP | inode(x) | dnode(F) | inode(DF)
> > > 6. CP | inode(DF) | dnode(F)
> > > 7. CP | dnode(F) | inode(DF)
> > > 8. CP | dnode(F) | inode(x) | inode(DF)
> >
> > No sure, do we missed these cases:
> > inode(x) | CP | inode(F) | dnode(x) -> write inode(F)
> > CP | inode(DF) | dnode(x) -> write inode(F)
> >
> > In these cases we will write another inode with fsync flag because our last
> > dnode is written out to disk by bdi-flusher (HAS_LAST_FSYNC is not marked). But
> > this appended inode is not useful.
> >
> > AFAIK, HAS_LAST_FSYNC(AKA fsync_done) is introduced in commit 479f40c44ae3
> > ("f2fs: skip unnecessary node writes during fsync") to avoid writting multiple
> > unneeded inode page by multiple redundant fsync calls. But for now, its role can
> > be taken by HAS_FSYNCED_INODE.
> > So, can we remove this flag to simplify our logic of fsync flow?
> >
> > Then in fsync flow, the rule can be:
> > If CPed before, there must be a inode(F) written in warm node chain;
> 
> How about
> 
> inode(x) | CP | dnode(F)

Oh, I missed this one, thanks for remindering that.

There is another case:
inode(x) | CP | dnode(F) | dnode(x) -> write inode(F)
It seems we will append another unneeded inode(F) in this patch also due to
no HAS_LAST_FSYNC in nat entry cache of inode.

> 
> > If not CPed before, there must be a inode(DF) written in warm node chain.
> 
> For example below:
> 
> 1) checkpoint
> 2) create "a", change "a"
> 3) fsync "a"
> 4) open "a", change "a"
> 
> Do we want recovery to stop at dnode(F) in step 3) or stop at dnode(x)
> produced by step 4)?

To my understanding, we will recover all info related to fsynced nodes in warm
node chain. So will produce to step 4 if changed nodes in step 4 are flushed to
device.

Thanks,
Yu
> 
> Best Regards,
> Huang, Ying
> 
> > >
> > > For example, #3, the three conditions should be changed as follows.
> > >
> > >    inode(x) | CP | dnode(F) | inode(x) | inode(F)
> > > a)    x       o      o          o          o
> > > b)    x       x      x          x          o
> > > c)    x       o      o          x          o
> > >
> > > If f2fs_sync_file stops   ------^,
> > >  it should write inode(F)    --------------^
> > >
> > > So, the need_inode_block_update should return true, since
> > >  c) get_nat_flag(e, HAS_LAST_FSYNC), is false.
> > >
> > > For example, #8,
> > >       CP | alloc | dnode(F) | inode(x) | inode(DF)
> > > a)    o      x        x          x          x
> > > b)    x               x          x          o
> > > c)    o               o          x          o
> > >
> > > If f2fs_sync_file stops   -------^,
> > >  it should write inode(DF)    --------------^
> > >
> > > Note that, the roll-forward policy should follow this rule, which means,
> > > if there are any missing blocks, we doesn't need to recover that inode.
> > >
> > > Signed-off-by: Huang Ying <ying.huang@intel.com>
> > > Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> > > ---
> > >  fs/f2fs/data.c |  3 ---
> > >  fs/f2fs/f2fs.h |  6 +++---
> > >  fs/f2fs/file.c | 10 ++++++----
> > >  fs/f2fs/node.c | 56 +++++++++++++++++++++++++++++++++++---------------------
> > >  fs/f2fs/node.h | 21 ++++++++++++---------
> > >  5 files changed, 56 insertions(+), 40 deletions(-)
> > >
> > > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> > > index 0e37658..fdc3dbe 100644
> > > --- a/fs/f2fs/data.c
> > > +++ b/fs/f2fs/data.c
> > > @@ -1089,9 +1089,6 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
> > >  	if (check_direct_IO(inode, rw, iter, offset))
> > >  		return 0;
> > >
> > > -	/* clear fsync mark to recover these blocks */
> > > -	fsync_mark_clear(F2FS_I_SB(inode), inode->i_ino);
> > > -
> > >  	trace_f2fs_direct_IO_enter(inode, offset, count, rw);
> > >
> > >  	err = blockdev_direct_IO(rw, iocb, inode, iter, offset, get_data_block);
> > > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > > index 9fc1bcd..fd44083 100644
> > > --- a/fs/f2fs/f2fs.h
> > > +++ b/fs/f2fs/f2fs.h
> > > @@ -1224,9 +1224,9 @@ struct dnode_of_data;
> > >  struct node_info;
> > >
> > >  bool available_free_memory(struct f2fs_sb_info *, int);
> > > -int is_checkpointed_node(struct f2fs_sb_info *, nid_t);
> > > -bool fsync_mark_done(struct f2fs_sb_info *, nid_t);
> > > -void fsync_mark_clear(struct f2fs_sb_info *, nid_t);
> > > +bool is_checkpointed_node(struct f2fs_sb_info *, nid_t);
> > > +bool has_fsynced_inode(struct f2fs_sb_info *, nid_t);
> > > +bool need_inode_block_update(struct f2fs_sb_info *, nid_t);
> > >  void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *);
> > >  int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int);
> > >  int truncate_inode_blocks(struct inode *, pgoff_t);
> > > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> > > index af06e22..3035c79 100644
> > > --- a/fs/f2fs/file.c
> > > +++ b/fs/f2fs/file.c
> > > @@ -207,15 +207,17 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int
> > > datasync)
> > >  			up_write(&fi->i_sem);
> > >  		}
> > >  	} else {
> > > -		/* if there is no written node page, write its inode page */
> > > -		while (!sync_node_pages(sbi, ino, &wbc)) {
> > > -			if (fsync_mark_done(sbi, ino))
> > > -				goto out;
> > > +sync_nodes:
> > > +		sync_node_pages(sbi, ino, &wbc);
> > > +
> > > +		if (need_inode_block_update(sbi, ino)) {
> > >  			mark_inode_dirty_sync(inode);
> > >  			ret = f2fs_write_inode(inode, NULL);
> > >  			if (ret)
> > >  				goto out;
> > > +			goto sync_nodes;
> > >  		}
> > > +
> > >  		ret = wait_on_node_pages_writeback(sbi, ino);
> > >  		if (ret)
> > >  			goto out;
> > > diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
> > > index d19d6b1..7a2d9c9 100644
> > > --- a/fs/f2fs/node.c
> > > +++ b/fs/f2fs/node.c
> > > @@ -123,44 +123,48 @@ static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct
> > > nat_entry *e)
> > >  	kmem_cache_free(nat_entry_slab, e);
> > >  }
> > >
> > > -int is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
> > > +bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
> > >  {
> > >  	struct f2fs_nm_info *nm_i = NM_I(sbi);
> > >  	struct nat_entry *e;
> > > -	int is_cp = 1;
> > > +	bool is_cp = true;
> > >
> > >  	read_lock(&nm_i->nat_tree_lock);
> > >  	e = __lookup_nat_cache(nm_i, nid);
> > >  	if (e && !get_nat_flag(e, IS_CHECKPOINTED))
> > > -		is_cp = 0;
> > > +		is_cp = false;
> > >  	read_unlock(&nm_i->nat_tree_lock);
> > >  	return is_cp;
> > >  }
> > >
> > > -bool fsync_mark_done(struct f2fs_sb_info *sbi, nid_t nid)
> > > +bool has_fsynced_inode(struct f2fs_sb_info *sbi, nid_t ino)
> > >  {
> > >  	struct f2fs_nm_info *nm_i = NM_I(sbi);
> > >  	struct nat_entry *e;
> > > -	bool fsync_done = false;
> > > +	bool fsynced = false;
> > >
> > >  	read_lock(&nm_i->nat_tree_lock);
> > > -	e = __lookup_nat_cache(nm_i, nid);
> > > -	if (e)
> > > -		fsync_done = get_nat_flag(e, HAS_FSYNC_MARK);
> > > +	e = __lookup_nat_cache(nm_i, ino);
> > > +	if (e && get_nat_flag(e, HAS_FSYNCED_INODE))
> > > +		fsynced = true;
> > >  	read_unlock(&nm_i->nat_tree_lock);
> > > -	return fsync_done;
> > > +	return fsynced;
> > >  }
> > >
> > > -void fsync_mark_clear(struct f2fs_sb_info *sbi, nid_t nid)
> > > +bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino)
> > >  {
> > >  	struct f2fs_nm_info *nm_i = NM_I(sbi);
> > >  	struct nat_entry *e;
> > > +	bool need_update = true;
> > >
> > > -	write_lock(&nm_i->nat_tree_lock);
> > > -	e = __lookup_nat_cache(nm_i, nid);
> > > -	if (e)
> > > -		set_nat_flag(e, HAS_FSYNC_MARK, false);
> > > -	write_unlock(&nm_i->nat_tree_lock);
> > > +	read_lock(&nm_i->nat_tree_lock);
> > > +	e = __lookup_nat_cache(nm_i, ino);
> > > +	if (e && get_nat_flag(e, HAS_LAST_FSYNC) &&
> > > +			(get_nat_flag(e, IS_CHECKPOINTED) ||
> > > +			 get_nat_flag(e, HAS_FSYNCED_INODE)))
> > > +		need_update = false;
> > > +	read_unlock(&nm_i->nat_tree_lock);
> > > +	return need_update;
> > >  }
> > >
> > >  static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid)
> > > @@ -176,7 +180,7 @@ static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t
> > > nid)
> > >  	}
> > >  	memset(new, 0, sizeof(struct nat_entry));
> > >  	nat_set_nid(new, nid);
> > > -	set_nat_flag(new, IS_CHECKPOINTED, true);
> > > +	nat_reset_flag(new);
> > >  	list_add_tail(&new->list, &nm_i->nat_entries);
> > >  	nm_i->nat_cnt++;
> > >  	return new;
> > > @@ -244,12 +248,17 @@ retry:
> > >
> > >  	/* change address */
> > >  	nat_set_blkaddr(e, new_blkaddr);
> > > +	if (new_blkaddr == NEW_ADDR || new_blkaddr == NULL_ADDR)
> > > +		set_nat_flag(e, IS_CHECKPOINTED, false);
> > >  	__set_nat_cache_dirty(nm_i, e);
> > >
> > >  	/* update fsync_mark if its inode nat entry is still alive */
> > >  	e = __lookup_nat_cache(nm_i, ni->ino);
> > > -	if (e)
> > > -		set_nat_flag(e, HAS_FSYNC_MARK, fsync_done);
> > > +	if (e) {
> > > +		if (fsync_done && ni->nid == ni->ino)
> > > +			set_nat_flag(e, HAS_FSYNCED_INODE, true);
> > > +		set_nat_flag(e, HAS_LAST_FSYNC, fsync_done);
> > > +	}
> > >  	write_unlock(&nm_i->nat_tree_lock);
> > >  }
> > >
> > > @@ -1121,10 +1130,14 @@ continue_unlock:
> > >
> > >  			/* called by fsync() */
> > >  			if (ino && IS_DNODE(page)) {
> > > -				int mark = !is_checkpointed_node(sbi, ino);
> > >  				set_fsync_mark(page, 1);
> > > -				if (IS_INODE(page))
> > > -					set_dentry_mark(page, mark);
> > > +				if (IS_INODE(page)) {
> > > +					if (!is_checkpointed_node(sbi, ino) &&
> > > +						!has_fsynced_inode(sbi, ino))
> > > +						set_dentry_mark(page, 1);
> > > +					else
> > > +						set_dentry_mark(page, 0);
> > > +				}
> > >  				nwritten++;
> > >  			} else {
> > >  				set_fsync_mark(page, 0);
> > > @@ -1912,6 +1925,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
> > >  				write_unlock(&nm_i->nat_tree_lock);
> > >  			} else {
> > >  				write_lock(&nm_i->nat_tree_lock);
> > > +				nat_reset_flag(ne);
> > >  				__clear_nat_cache_dirty(nm_i, ne);
> > >  				write_unlock(&nm_i->nat_tree_lock);
> > >  			}
> > > diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
> > > index 3043778..b8ba63c 100644
> > > --- a/fs/f2fs/node.h
> > > +++ b/fs/f2fs/node.h
> > > @@ -41,7 +41,8 @@ struct node_info {
> > >
> > >  enum {
> > >  	IS_CHECKPOINTED,	/* is it checkpointed before? */
> > > -	HAS_FSYNC_MARK,		/* has the latest node fsync mark? */
> > > +	HAS_FSYNCED_INODE,	/* is the inode fsynced before? */
> > > +	HAS_LAST_FSYNC,		/* has the latest node fsync mark? */
> > >  };
> > >
> > >  struct nat_entry {
> > > @@ -60,15 +61,9 @@ struct nat_entry {
> > >  #define nat_set_version(nat, v)		(nat->ni.version = v)
> > >
> > >  #define __set_nat_cache_dirty(nm_i, ne)					\
> > > -	do {								\
> > > -		set_nat_flag(ne, IS_CHECKPOINTED, false);		\
> > > -		list_move_tail(&ne->list, &nm_i->dirty_nat_entries);	\
> > > -	} while (0)
> > > +		list_move_tail(&ne->list, &nm_i->dirty_nat_entries);
> > >  #define __clear_nat_cache_dirty(nm_i, ne)				\
> > > -	do {								\
> > > -		set_nat_flag(ne, IS_CHECKPOINTED, true);		\
> > > -		list_move_tail(&ne->list, &nm_i->nat_entries);		\
> > > -	} while (0)
> > > +		list_move_tail(&ne->list, &nm_i->nat_entries);
> > >  #define inc_node_version(version)	(++version)
> > >
> > >  static inline void set_nat_flag(struct nat_entry *ne,
> > > @@ -87,6 +82,14 @@ static inline bool get_nat_flag(struct nat_entry *ne, unsigned int type)
> > >  	return ne->flag & mask;
> > >  }
> > >
> > > +static inline void nat_reset_flag(struct nat_entry *ne)
> > > +{
> > > +	/* these states can be set only after checkpoint was done */
> > > +	set_nat_flag(ne, IS_CHECKPOINTED, true);
> > > +	set_nat_flag(ne, HAS_FSYNCED_INODE, false);
> > > +	set_nat_flag(ne, HAS_LAST_FSYNC, true);
> > > +}
> > > +
> > >  static inline void node_info_from_raw_nat(struct node_info *ni,
> > >  						struct f2fs_nat_entry *raw_ne)
> > >  {
> > > --
> > > 1.8.5.2 (Apple Git-48)
> > >
> > >
> > > ------------------------------------------------------------------------------
> > > Want excitement?
> > > Manually upgrade your production database.
> > > When you want reliability, choose Perforce
> > > Perforce version control. Predictably reliable.
> > > http://pubads.g.doubleclick.net/gampad/clk?id=157508191&iu=/4140/ostg.clktrk
> > > _______________________________________________
> > > Linux-f2fs-devel mailing list
> > > Linux-f2fs-devel@lists.sourceforge.net
> > > https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
> >



^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [f2fs-dev] [PATCH 2/3] f2fs: fix conditions to remain recovery information in f2fs_sync_file
  2014-09-22  9:20       ` Chao Yu
@ 2014-09-23  4:52         ` Jaegeuk Kim
  2014-09-23  8:50           ` Chao Yu
  0 siblings, 1 reply; 12+ messages in thread
From: Jaegeuk Kim @ 2014-09-23  4:52 UTC (permalink / raw)
  To: Chao Yu
  Cc: 'Huang Ying', linux-kernel, linux-fsdevel, linux-f2fs-devel

On Mon, Sep 22, 2014 at 05:20:19PM +0800, Chao Yu wrote:
> > -----Original Message-----
> > From: Huang Ying [mailto:ying.huang@intel.com]
> > Sent: Monday, September 22, 2014 3:39 PM
> > To: Chao Yu
> > Cc: 'Jaegeuk Kim'; linux-kernel@vger.kernel.org; linux-fsdevel@vger.kernel.org;
> > linux-f2fs-devel@lists.sourceforge.net
> > Subject: Re: [f2fs-dev] [PATCH 2/3] f2fs: fix conditions to remain recovery information in
> > f2fs_sync_file
> > 
> > On Mon, 2014-09-22 at 15:24 +0800, Chao Yu wrote:
> > > Hi Jaegeuk, Huang,
> > >
> > > > -----Original Message-----
> > > > From: Jaegeuk Kim [mailto:jaegeuk@kernel.org]
> > > > Sent: Thursday, September 18, 2014 1:51 PM
> > > > To: linux-kernel@vger.kernel.org; linux-fsdevel@vger.kernel.org;
> > > > linux-f2fs-devel@lists.sourceforge.net
> > > > Cc: Jaegeuk Kim; Huang Ying
> > > > Subject: [f2fs-dev] [PATCH 2/3] f2fs: fix conditions to remain recovery information in
> > > > f2fs_sync_file
> > > >
> > > > This patch revisited whole the recovery information during the f2fs_sync_file.
> > > >
> > > > In this patch, there are three information to make a decision.
> > > >
> > > > a) IS_CHECKPOINTED,	/* is it checkpointed before? */
> > > > b) HAS_FSYNCED_INODE,	/* is the inode fsynced before? */
> > > > c) HAS_LAST_FSYNC,	/* has the latest node fsync mark? */
> > > >
> > > > And, the scenarios for our rule are based on:
> > > >
> > > > [Term] F: fsync_mark, D: dentry_mark
> > > >
> > > > 1. inode(x) | CP | inode(x) | dnode(F)
> > > > 2. inode(x) | CP | inode(F) | dnode(F)
> > > > 3. inode(x) | CP | dnode(F) | inode(x) | inode(F)
> > > > 4. inode(x) | CP | dnode(F) | inode(F)
> > > > 5. CP | inode(x) | dnode(F) | inode(DF)
> > > > 6. CP | inode(DF) | dnode(F)
> > > > 7. CP | dnode(F) | inode(DF)
> > > > 8. CP | dnode(F) | inode(x) | inode(DF)
> > >
> > > No sure, do we missed these cases:
> > > inode(x) | CP | inode(F) | dnode(x) -> write inode(F)
> > > CP | inode(DF) | dnode(x) -> write inode(F)
> > >
> > > In these cases we will write another inode with fsync flag because our last
> > > dnode is written out to disk by bdi-flusher (HAS_LAST_FSYNC is not marked). But
> > > this appended inode is not useful.
> > >
> > > AFAIK, HAS_LAST_FSYNC(AKA fsync_done) is introduced in commit 479f40c44ae3
> > > ("f2fs: skip unnecessary node writes during fsync") to avoid writting multiple
> > > unneeded inode page by multiple redundant fsync calls. But for now, its role can
> > > be taken by HAS_FSYNCED_INODE.
> > > So, can we remove this flag to simplify our logic of fsync flow?
> > >
> > > Then in fsync flow, the rule can be:
> > > If CPed before, there must be a inode(F) written in warm node chain;
> > 
> > How about
> > 
> > inode(x) | CP | dnode(F)
> 
> Oh, I missed this one, thanks for remindering that.
> 
> There is another case:
> inode(x) | CP | dnode(F) | dnode(x) -> write inode(F)
> It seems we will append another unneeded inode(F) in this patch also due to
> no HAS_LAST_FSYNC in nat entry cache of inode.

As the current rule for roll-forward recovery, we need inode(F) to find the
latest mark. This can also be used to distinguish fsynced inode from writebacked
inodes.

> 
> > 
> > > If not CPed before, there must be a inode(DF) written in warm node chain.
> > 
> > For example below:
> > 
> > 1) checkpoint
> > 2) create "a", change "a"
> > 3) fsync "a"
> > 4) open "a", change "a"
> > 
> > Do we want recovery to stop at dnode(F) in step 3) or stop at dnode(x)
> > produced by step 4)?
> 
> To my understanding, we will recover all info related to fsynced nodes in warm
> node chain. So will produce to step 4 if changed nodes in step 4 are flushed to
> device.

Current rule should stop at 3) fsync "a". It won't recover 4)'s inode, since it
was just writebacked.

If we'd like to recover whole the inode and its data, we should traverse all the
recovery paths from the sketch.

Thanks,

> 
> Thanks,
> Yu
> > 
> > Best Regards,
> > Huang, Ying
> > 
> > > >
> > > > For example, #3, the three conditions should be changed as follows.
> > > >
> > > >    inode(x) | CP | dnode(F) | inode(x) | inode(F)
> > > > a)    x       o      o          o          o
> > > > b)    x       x      x          x          o
> > > > c)    x       o      o          x          o
> > > >
> > > > If f2fs_sync_file stops   ------^,
> > > >  it should write inode(F)    --------------^
> > > >
> > > > So, the need_inode_block_update should return true, since
> > > >  c) get_nat_flag(e, HAS_LAST_FSYNC), is false.
> > > >
> > > > For example, #8,
> > > >       CP | alloc | dnode(F) | inode(x) | inode(DF)
> > > > a)    o      x        x          x          x
> > > > b)    x               x          x          o
> > > > c)    o               o          x          o
> > > >
> > > > If f2fs_sync_file stops   -------^,
> > > >  it should write inode(DF)    --------------^
> > > >
> > > > Note that, the roll-forward policy should follow this rule, which means,
> > > > if there are any missing blocks, we doesn't need to recover that inode.
> > > >
> > > > Signed-off-by: Huang Ying <ying.huang@intel.com>
> > > > Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> > > > ---
> > > >  fs/f2fs/data.c |  3 ---
> > > >  fs/f2fs/f2fs.h |  6 +++---
> > > >  fs/f2fs/file.c | 10 ++++++----
> > > >  fs/f2fs/node.c | 56 +++++++++++++++++++++++++++++++++++---------------------
> > > >  fs/f2fs/node.h | 21 ++++++++++++---------
> > > >  5 files changed, 56 insertions(+), 40 deletions(-)
> > > >
> > > > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> > > > index 0e37658..fdc3dbe 100644
> > > > --- a/fs/f2fs/data.c
> > > > +++ b/fs/f2fs/data.c
> > > > @@ -1089,9 +1089,6 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
> > > >  	if (check_direct_IO(inode, rw, iter, offset))
> > > >  		return 0;
> > > >
> > > > -	/* clear fsync mark to recover these blocks */
> > > > -	fsync_mark_clear(F2FS_I_SB(inode), inode->i_ino);
> > > > -
> > > >  	trace_f2fs_direct_IO_enter(inode, offset, count, rw);
> > > >
> > > >  	err = blockdev_direct_IO(rw, iocb, inode, iter, offset, get_data_block);
> > > > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > > > index 9fc1bcd..fd44083 100644
> > > > --- a/fs/f2fs/f2fs.h
> > > > +++ b/fs/f2fs/f2fs.h
> > > > @@ -1224,9 +1224,9 @@ struct dnode_of_data;
> > > >  struct node_info;
> > > >
> > > >  bool available_free_memory(struct f2fs_sb_info *, int);
> > > > -int is_checkpointed_node(struct f2fs_sb_info *, nid_t);
> > > > -bool fsync_mark_done(struct f2fs_sb_info *, nid_t);
> > > > -void fsync_mark_clear(struct f2fs_sb_info *, nid_t);
> > > > +bool is_checkpointed_node(struct f2fs_sb_info *, nid_t);
> > > > +bool has_fsynced_inode(struct f2fs_sb_info *, nid_t);
> > > > +bool need_inode_block_update(struct f2fs_sb_info *, nid_t);
> > > >  void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *);
> > > >  int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int);
> > > >  int truncate_inode_blocks(struct inode *, pgoff_t);
> > > > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> > > > index af06e22..3035c79 100644
> > > > --- a/fs/f2fs/file.c
> > > > +++ b/fs/f2fs/file.c
> > > > @@ -207,15 +207,17 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int
> > > > datasync)
> > > >  			up_write(&fi->i_sem);
> > > >  		}
> > > >  	} else {
> > > > -		/* if there is no written node page, write its inode page */
> > > > -		while (!sync_node_pages(sbi, ino, &wbc)) {
> > > > -			if (fsync_mark_done(sbi, ino))
> > > > -				goto out;
> > > > +sync_nodes:
> > > > +		sync_node_pages(sbi, ino, &wbc);
> > > > +
> > > > +		if (need_inode_block_update(sbi, ino)) {
> > > >  			mark_inode_dirty_sync(inode);
> > > >  			ret = f2fs_write_inode(inode, NULL);
> > > >  			if (ret)
> > > >  				goto out;
> > > > +			goto sync_nodes;
> > > >  		}
> > > > +
> > > >  		ret = wait_on_node_pages_writeback(sbi, ino);
> > > >  		if (ret)
> > > >  			goto out;
> > > > diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
> > > > index d19d6b1..7a2d9c9 100644
> > > > --- a/fs/f2fs/node.c
> > > > +++ b/fs/f2fs/node.c
> > > > @@ -123,44 +123,48 @@ static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct
> > > > nat_entry *e)
> > > >  	kmem_cache_free(nat_entry_slab, e);
> > > >  }
> > > >
> > > > -int is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
> > > > +bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
> > > >  {
> > > >  	struct f2fs_nm_info *nm_i = NM_I(sbi);
> > > >  	struct nat_entry *e;
> > > > -	int is_cp = 1;
> > > > +	bool is_cp = true;
> > > >
> > > >  	read_lock(&nm_i->nat_tree_lock);
> > > >  	e = __lookup_nat_cache(nm_i, nid);
> > > >  	if (e && !get_nat_flag(e, IS_CHECKPOINTED))
> > > > -		is_cp = 0;
> > > > +		is_cp = false;
> > > >  	read_unlock(&nm_i->nat_tree_lock);
> > > >  	return is_cp;
> > > >  }
> > > >
> > > > -bool fsync_mark_done(struct f2fs_sb_info *sbi, nid_t nid)
> > > > +bool has_fsynced_inode(struct f2fs_sb_info *sbi, nid_t ino)
> > > >  {
> > > >  	struct f2fs_nm_info *nm_i = NM_I(sbi);
> > > >  	struct nat_entry *e;
> > > > -	bool fsync_done = false;
> > > > +	bool fsynced = false;
> > > >
> > > >  	read_lock(&nm_i->nat_tree_lock);
> > > > -	e = __lookup_nat_cache(nm_i, nid);
> > > > -	if (e)
> > > > -		fsync_done = get_nat_flag(e, HAS_FSYNC_MARK);
> > > > +	e = __lookup_nat_cache(nm_i, ino);
> > > > +	if (e && get_nat_flag(e, HAS_FSYNCED_INODE))
> > > > +		fsynced = true;
> > > >  	read_unlock(&nm_i->nat_tree_lock);
> > > > -	return fsync_done;
> > > > +	return fsynced;
> > > >  }
> > > >
> > > > -void fsync_mark_clear(struct f2fs_sb_info *sbi, nid_t nid)
> > > > +bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino)
> > > >  {
> > > >  	struct f2fs_nm_info *nm_i = NM_I(sbi);
> > > >  	struct nat_entry *e;
> > > > +	bool need_update = true;
> > > >
> > > > -	write_lock(&nm_i->nat_tree_lock);
> > > > -	e = __lookup_nat_cache(nm_i, nid);
> > > > -	if (e)
> > > > -		set_nat_flag(e, HAS_FSYNC_MARK, false);
> > > > -	write_unlock(&nm_i->nat_tree_lock);
> > > > +	read_lock(&nm_i->nat_tree_lock);
> > > > +	e = __lookup_nat_cache(nm_i, ino);
> > > > +	if (e && get_nat_flag(e, HAS_LAST_FSYNC) &&
> > > > +			(get_nat_flag(e, IS_CHECKPOINTED) ||
> > > > +			 get_nat_flag(e, HAS_FSYNCED_INODE)))
> > > > +		need_update = false;
> > > > +	read_unlock(&nm_i->nat_tree_lock);
> > > > +	return need_update;
> > > >  }
> > > >
> > > >  static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid)
> > > > @@ -176,7 +180,7 @@ static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t
> > > > nid)
> > > >  	}
> > > >  	memset(new, 0, sizeof(struct nat_entry));
> > > >  	nat_set_nid(new, nid);
> > > > -	set_nat_flag(new, IS_CHECKPOINTED, true);
> > > > +	nat_reset_flag(new);
> > > >  	list_add_tail(&new->list, &nm_i->nat_entries);
> > > >  	nm_i->nat_cnt++;
> > > >  	return new;
> > > > @@ -244,12 +248,17 @@ retry:
> > > >
> > > >  	/* change address */
> > > >  	nat_set_blkaddr(e, new_blkaddr);
> > > > +	if (new_blkaddr == NEW_ADDR || new_blkaddr == NULL_ADDR)
> > > > +		set_nat_flag(e, IS_CHECKPOINTED, false);
> > > >  	__set_nat_cache_dirty(nm_i, e);
> > > >
> > > >  	/* update fsync_mark if its inode nat entry is still alive */
> > > >  	e = __lookup_nat_cache(nm_i, ni->ino);
> > > > -	if (e)
> > > > -		set_nat_flag(e, HAS_FSYNC_MARK, fsync_done);
> > > > +	if (e) {
> > > > +		if (fsync_done && ni->nid == ni->ino)
> > > > +			set_nat_flag(e, HAS_FSYNCED_INODE, true);
> > > > +		set_nat_flag(e, HAS_LAST_FSYNC, fsync_done);
> > > > +	}
> > > >  	write_unlock(&nm_i->nat_tree_lock);
> > > >  }
> > > >
> > > > @@ -1121,10 +1130,14 @@ continue_unlock:
> > > >
> > > >  			/* called by fsync() */
> > > >  			if (ino && IS_DNODE(page)) {
> > > > -				int mark = !is_checkpointed_node(sbi, ino);
> > > >  				set_fsync_mark(page, 1);
> > > > -				if (IS_INODE(page))
> > > > -					set_dentry_mark(page, mark);
> > > > +				if (IS_INODE(page)) {
> > > > +					if (!is_checkpointed_node(sbi, ino) &&
> > > > +						!has_fsynced_inode(sbi, ino))
> > > > +						set_dentry_mark(page, 1);
> > > > +					else
> > > > +						set_dentry_mark(page, 0);
> > > > +				}
> > > >  				nwritten++;
> > > >  			} else {
> > > >  				set_fsync_mark(page, 0);
> > > > @@ -1912,6 +1925,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
> > > >  				write_unlock(&nm_i->nat_tree_lock);
> > > >  			} else {
> > > >  				write_lock(&nm_i->nat_tree_lock);
> > > > +				nat_reset_flag(ne);
> > > >  				__clear_nat_cache_dirty(nm_i, ne);
> > > >  				write_unlock(&nm_i->nat_tree_lock);
> > > >  			}
> > > > diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
> > > > index 3043778..b8ba63c 100644
> > > > --- a/fs/f2fs/node.h
> > > > +++ b/fs/f2fs/node.h
> > > > @@ -41,7 +41,8 @@ struct node_info {
> > > >
> > > >  enum {
> > > >  	IS_CHECKPOINTED,	/* is it checkpointed before? */
> > > > -	HAS_FSYNC_MARK,		/* has the latest node fsync mark? */
> > > > +	HAS_FSYNCED_INODE,	/* is the inode fsynced before? */
> > > > +	HAS_LAST_FSYNC,		/* has the latest node fsync mark? */
> > > >  };
> > > >
> > > >  struct nat_entry {
> > > > @@ -60,15 +61,9 @@ struct nat_entry {
> > > >  #define nat_set_version(nat, v)		(nat->ni.version = v)
> > > >
> > > >  #define __set_nat_cache_dirty(nm_i, ne)					\
> > > > -	do {								\
> > > > -		set_nat_flag(ne, IS_CHECKPOINTED, false);		\
> > > > -		list_move_tail(&ne->list, &nm_i->dirty_nat_entries);	\
> > > > -	} while (0)
> > > > +		list_move_tail(&ne->list, &nm_i->dirty_nat_entries);
> > > >  #define __clear_nat_cache_dirty(nm_i, ne)				\
> > > > -	do {								\
> > > > -		set_nat_flag(ne, IS_CHECKPOINTED, true);		\
> > > > -		list_move_tail(&ne->list, &nm_i->nat_entries);		\
> > > > -	} while (0)
> > > > +		list_move_tail(&ne->list, &nm_i->nat_entries);
> > > >  #define inc_node_version(version)	(++version)
> > > >
> > > >  static inline void set_nat_flag(struct nat_entry *ne,
> > > > @@ -87,6 +82,14 @@ static inline bool get_nat_flag(struct nat_entry *ne, unsigned int type)
> > > >  	return ne->flag & mask;
> > > >  }
> > > >
> > > > +static inline void nat_reset_flag(struct nat_entry *ne)
> > > > +{
> > > > +	/* these states can be set only after checkpoint was done */
> > > > +	set_nat_flag(ne, IS_CHECKPOINTED, true);
> > > > +	set_nat_flag(ne, HAS_FSYNCED_INODE, false);
> > > > +	set_nat_flag(ne, HAS_LAST_FSYNC, true);
> > > > +}
> > > > +
> > > >  static inline void node_info_from_raw_nat(struct node_info *ni,
> > > >  						struct f2fs_nat_entry *raw_ne)
> > > >  {
> > > > --
> > > > 1.8.5.2 (Apple Git-48)
> > > >
> > > >
> > > > ------------------------------------------------------------------------------
> > > > Want excitement?
> > > > Manually upgrade your production database.
> > > > When you want reliability, choose Perforce
> > > > Perforce version control. Predictably reliable.
> > > > http://pubads.g.doubleclick.net/gampad/clk?id=157508191&iu=/4140/ostg.clktrk
> > > > _______________________________________________
> > > > Linux-f2fs-devel mailing list
> > > > Linux-f2fs-devel@lists.sourceforge.net
> > > > https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
> > >

^ permalink raw reply	[flat|nested] 12+ messages in thread

* RE: [f2fs-dev] [PATCH 2/3] f2fs: fix conditions to remain recovery information in f2fs_sync_file
  2014-09-23  4:52         ` Jaegeuk Kim
@ 2014-09-23  8:50           ` Chao Yu
  0 siblings, 0 replies; 12+ messages in thread
From: Chao Yu @ 2014-09-23  8:50 UTC (permalink / raw)
  To: 'Jaegeuk Kim', 'Huang Ying'
  Cc: linux-kernel, linux-fsdevel, linux-f2fs-devel

> -----Original Message-----
> From: Jaegeuk Kim [mailto:jaegeuk@kernel.org]
> Sent: Tuesday, September 23, 2014 12:53 PM
> To: Chao Yu
> Cc: 'Huang Ying'; linux-kernel@vger.kernel.org; linux-fsdevel@vger.kernel.org;
> linux-f2fs-devel@lists.sourceforge.net
> Subject: Re: [f2fs-dev] [PATCH 2/3] f2fs: fix conditions to remain recovery information in
> f2fs_sync_file
> 
> On Mon, Sep 22, 2014 at 05:20:19PM +0800, Chao Yu wrote:
> > > -----Original Message-----
> > > From: Huang Ying [mailto:ying.huang@intel.com]
> > > Sent: Monday, September 22, 2014 3:39 PM
> > > To: Chao Yu
> > > Cc: 'Jaegeuk Kim'; linux-kernel@vger.kernel.org; linux-fsdevel@vger.kernel.org;
> > > linux-f2fs-devel@lists.sourceforge.net
> > > Subject: Re: [f2fs-dev] [PATCH 2/3] f2fs: fix conditions to remain recovery information
> in
> > > f2fs_sync_file
> > >
> > > On Mon, 2014-09-22 at 15:24 +0800, Chao Yu wrote:
> > > > Hi Jaegeuk, Huang,
> > > >
> > > > > -----Original Message-----
> > > > > From: Jaegeuk Kim [mailto:jaegeuk@kernel.org]
> > > > > Sent: Thursday, September 18, 2014 1:51 PM
> > > > > To: linux-kernel@vger.kernel.org; linux-fsdevel@vger.kernel.org;
> > > > > linux-f2fs-devel@lists.sourceforge.net
> > > > > Cc: Jaegeuk Kim; Huang Ying
> > > > > Subject: [f2fs-dev] [PATCH 2/3] f2fs: fix conditions to remain recovery information
> in
> > > > > f2fs_sync_file
> > > > >
> > > > > This patch revisited whole the recovery information during the f2fs_sync_file.
> > > > >
> > > > > In this patch, there are three information to make a decision.
> > > > >
> > > > > a) IS_CHECKPOINTED,	/* is it checkpointed before? */
> > > > > b) HAS_FSYNCED_INODE,	/* is the inode fsynced before? */
> > > > > c) HAS_LAST_FSYNC,	/* has the latest node fsync mark? */
> > > > >
> > > > > And, the scenarios for our rule are based on:
> > > > >
> > > > > [Term] F: fsync_mark, D: dentry_mark
> > > > >
> > > > > 1. inode(x) | CP | inode(x) | dnode(F)
> > > > > 2. inode(x) | CP | inode(F) | dnode(F)
> > > > > 3. inode(x) | CP | dnode(F) | inode(x) | inode(F)
> > > > > 4. inode(x) | CP | dnode(F) | inode(F)
> > > > > 5. CP | inode(x) | dnode(F) | inode(DF)
> > > > > 6. CP | inode(DF) | dnode(F)
> > > > > 7. CP | dnode(F) | inode(DF)
> > > > > 8. CP | dnode(F) | inode(x) | inode(DF)
> > > >
> > > > No sure, do we missed these cases:
> > > > inode(x) | CP | inode(F) | dnode(x) -> write inode(F)
> > > > CP | inode(DF) | dnode(x) -> write inode(F)
> > > >
> > > > In these cases we will write another inode with fsync flag because our last
> > > > dnode is written out to disk by bdi-flusher (HAS_LAST_FSYNC is not marked). But
> > > > this appended inode is not useful.
> > > >
> > > > AFAIK, HAS_LAST_FSYNC(AKA fsync_done) is introduced in commit 479f40c44ae3
> > > > ("f2fs: skip unnecessary node writes during fsync") to avoid writting multiple
> > > > unneeded inode page by multiple redundant fsync calls. But for now, its role can
> > > > be taken by HAS_FSYNCED_INODE.
> > > > So, can we remove this flag to simplify our logic of fsync flow?
> > > >
> > > > Then in fsync flow, the rule can be:
> > > > If CPed before, there must be a inode(F) written in warm node chain;
> > >
> > > How about
> > >
> > > inode(x) | CP | dnode(F)
> >
> > Oh, I missed this one, thanks for remindering that.
> >
> > There is another case:
> > inode(x) | CP | dnode(F) | dnode(x) -> write inode(F)
> > It seems we will append another unneeded inode(F) in this patch also due to
> > no HAS_LAST_FSYNC in nat entry cache of inode.
> 
> As the current rule for roll-forward recovery, we need inode(F) to find the
> latest mark. This can also be used to distinguish fsynced inode from writebacked
> inodes.

Got it now, thanks for your explanation!

> 
> >
> > >
> > > > If not CPed before, there must be a inode(DF) written in warm node chain.
> > >
> > > For example below:
> > >
> > > 1) checkpoint
> > > 2) create "a", change "a"
> > > 3) fsync "a"
> > > 4) open "a", change "a"
> > >
> > > Do we want recovery to stop at dnode(F) in step 3) or stop at dnode(x)
> > > produced by step 4)?
> >
> > To my understanding, we will recover all info related to fsynced nodes in warm
> > node chain. So will produce to step 4 if changed nodes in step 4 are flushed to
> > device.

To Huang,
My mistaken for the wrong explanation due to my misunderstanding about the recover
code, sorry about that!

> 
> Current rule should stop at 3) fsync "a". It won't recover 4)'s inode, since it
> was just writebacked.
> 
> If we'd like to recover whole the inode and its data, we should traverse all the
> recovery paths from the sketch.

I am missing some codes when traverse the recover flow.
You're right, and thanks for correcting me.

Regards,
Yu

> 
> Thanks,
> 
> >
> > Thanks,
> > Yu
> > >
> > > Best Regards,
> > > Huang, Ying
> > >
> > > > >
> > > > > For example, #3, the three conditions should be changed as follows.
> > > > >
> > > > >    inode(x) | CP | dnode(F) | inode(x) | inode(F)
> > > > > a)    x       o      o          o          o
> > > > > b)    x       x      x          x          o
> > > > > c)    x       o      o          x          o
> > > > >
> > > > > If f2fs_sync_file stops   ------^,
> > > > >  it should write inode(F)    --------------^
> > > > >
> > > > > So, the need_inode_block_update should return true, since
> > > > >  c) get_nat_flag(e, HAS_LAST_FSYNC), is false.
> > > > >
> > > > > For example, #8,
> > > > >       CP | alloc | dnode(F) | inode(x) | inode(DF)
> > > > > a)    o      x        x          x          x
> > > > > b)    x               x          x          o
> > > > > c)    o               o          x          o
> > > > >
> > > > > If f2fs_sync_file stops   -------^,
> > > > >  it should write inode(DF)    --------------^
> > > > >
> > > > > Note that, the roll-forward policy should follow this rule, which means,
> > > > > if there are any missing blocks, we doesn't need to recover that inode.
> > > > >
> > > > > Signed-off-by: Huang Ying <ying.huang@intel.com>
> > > > > Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> > > > > ---
> > > > >  fs/f2fs/data.c |  3 ---
> > > > >  fs/f2fs/f2fs.h |  6 +++---
> > > > >  fs/f2fs/file.c | 10 ++++++----
> > > > >  fs/f2fs/node.c | 56 +++++++++++++++++++++++++++++++++++---------------------
> > > > >  fs/f2fs/node.h | 21 ++++++++++++---------
> > > > >  5 files changed, 56 insertions(+), 40 deletions(-)
> > > > >
> > > > > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> > > > > index 0e37658..fdc3dbe 100644
> > > > > --- a/fs/f2fs/data.c
> > > > > +++ b/fs/f2fs/data.c
> > > > > @@ -1089,9 +1089,6 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
> > > > >  	if (check_direct_IO(inode, rw, iter, offset))
> > > > >  		return 0;
> > > > >
> > > > > -	/* clear fsync mark to recover these blocks */
> > > > > -	fsync_mark_clear(F2FS_I_SB(inode), inode->i_ino);
> > > > > -
> > > > >  	trace_f2fs_direct_IO_enter(inode, offset, count, rw);
> > > > >
> > > > >  	err = blockdev_direct_IO(rw, iocb, inode, iter, offset, get_data_block);
> > > > > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > > > > index 9fc1bcd..fd44083 100644
> > > > > --- a/fs/f2fs/f2fs.h
> > > > > +++ b/fs/f2fs/f2fs.h
> > > > > @@ -1224,9 +1224,9 @@ struct dnode_of_data;
> > > > >  struct node_info;
> > > > >
> > > > >  bool available_free_memory(struct f2fs_sb_info *, int);
> > > > > -int is_checkpointed_node(struct f2fs_sb_info *, nid_t);
> > > > > -bool fsync_mark_done(struct f2fs_sb_info *, nid_t);
> > > > > -void fsync_mark_clear(struct f2fs_sb_info *, nid_t);
> > > > > +bool is_checkpointed_node(struct f2fs_sb_info *, nid_t);
> > > > > +bool has_fsynced_inode(struct f2fs_sb_info *, nid_t);
> > > > > +bool need_inode_block_update(struct f2fs_sb_info *, nid_t);
> > > > >  void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *);
> > > > >  int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int);
> > > > >  int truncate_inode_blocks(struct inode *, pgoff_t);
> > > > > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> > > > > index af06e22..3035c79 100644
> > > > > --- a/fs/f2fs/file.c
> > > > > +++ b/fs/f2fs/file.c
> > > > > @@ -207,15 +207,17 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end,
> int
> > > > > datasync)
> > > > >  			up_write(&fi->i_sem);
> > > > >  		}
> > > > >  	} else {
> > > > > -		/* if there is no written node page, write its inode page */
> > > > > -		while (!sync_node_pages(sbi, ino, &wbc)) {
> > > > > -			if (fsync_mark_done(sbi, ino))
> > > > > -				goto out;
> > > > > +sync_nodes:
> > > > > +		sync_node_pages(sbi, ino, &wbc);
> > > > > +
> > > > > +		if (need_inode_block_update(sbi, ino)) {
> > > > >  			mark_inode_dirty_sync(inode);
> > > > >  			ret = f2fs_write_inode(inode, NULL);
> > > > >  			if (ret)
> > > > >  				goto out;
> > > > > +			goto sync_nodes;
> > > > >  		}
> > > > > +
> > > > >  		ret = wait_on_node_pages_writeback(sbi, ino);
> > > > >  		if (ret)
> > > > >  			goto out;
> > > > > diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
> > > > > index d19d6b1..7a2d9c9 100644
> > > > > --- a/fs/f2fs/node.c
> > > > > +++ b/fs/f2fs/node.c
> > > > > @@ -123,44 +123,48 @@ static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct
> > > > > nat_entry *e)
> > > > >  	kmem_cache_free(nat_entry_slab, e);
> > > > >  }
> > > > >
> > > > > -int is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
> > > > > +bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
> > > > >  {
> > > > >  	struct f2fs_nm_info *nm_i = NM_I(sbi);
> > > > >  	struct nat_entry *e;
> > > > > -	int is_cp = 1;
> > > > > +	bool is_cp = true;
> > > > >
> > > > >  	read_lock(&nm_i->nat_tree_lock);
> > > > >  	e = __lookup_nat_cache(nm_i, nid);
> > > > >  	if (e && !get_nat_flag(e, IS_CHECKPOINTED))
> > > > > -		is_cp = 0;
> > > > > +		is_cp = false;
> > > > >  	read_unlock(&nm_i->nat_tree_lock);
> > > > >  	return is_cp;
> > > > >  }
> > > > >
> > > > > -bool fsync_mark_done(struct f2fs_sb_info *sbi, nid_t nid)
> > > > > +bool has_fsynced_inode(struct f2fs_sb_info *sbi, nid_t ino)
> > > > >  {
> > > > >  	struct f2fs_nm_info *nm_i = NM_I(sbi);
> > > > >  	struct nat_entry *e;
> > > > > -	bool fsync_done = false;
> > > > > +	bool fsynced = false;
> > > > >
> > > > >  	read_lock(&nm_i->nat_tree_lock);
> > > > > -	e = __lookup_nat_cache(nm_i, nid);
> > > > > -	if (e)
> > > > > -		fsync_done = get_nat_flag(e, HAS_FSYNC_MARK);
> > > > > +	e = __lookup_nat_cache(nm_i, ino);
> > > > > +	if (e && get_nat_flag(e, HAS_FSYNCED_INODE))
> > > > > +		fsynced = true;
> > > > >  	read_unlock(&nm_i->nat_tree_lock);
> > > > > -	return fsync_done;
> > > > > +	return fsynced;
> > > > >  }
> > > > >
> > > > > -void fsync_mark_clear(struct f2fs_sb_info *sbi, nid_t nid)
> > > > > +bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino)
> > > > >  {
> > > > >  	struct f2fs_nm_info *nm_i = NM_I(sbi);
> > > > >  	struct nat_entry *e;
> > > > > +	bool need_update = true;
> > > > >
> > > > > -	write_lock(&nm_i->nat_tree_lock);
> > > > > -	e = __lookup_nat_cache(nm_i, nid);
> > > > > -	if (e)
> > > > > -		set_nat_flag(e, HAS_FSYNC_MARK, false);
> > > > > -	write_unlock(&nm_i->nat_tree_lock);
> > > > > +	read_lock(&nm_i->nat_tree_lock);
> > > > > +	e = __lookup_nat_cache(nm_i, ino);
> > > > > +	if (e && get_nat_flag(e, HAS_LAST_FSYNC) &&
> > > > > +			(get_nat_flag(e, IS_CHECKPOINTED) ||
> > > > > +			 get_nat_flag(e, HAS_FSYNCED_INODE)))
> > > > > +		need_update = false;
> > > > > +	read_unlock(&nm_i->nat_tree_lock);
> > > > > +	return need_update;
> > > > >  }
> > > > >
> > > > >  static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid)
> > > > > @@ -176,7 +180,7 @@ static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i,
> nid_t
> > > > > nid)
> > > > >  	}
> > > > >  	memset(new, 0, sizeof(struct nat_entry));
> > > > >  	nat_set_nid(new, nid);
> > > > > -	set_nat_flag(new, IS_CHECKPOINTED, true);
> > > > > +	nat_reset_flag(new);
> > > > >  	list_add_tail(&new->list, &nm_i->nat_entries);
> > > > >  	nm_i->nat_cnt++;
> > > > >  	return new;
> > > > > @@ -244,12 +248,17 @@ retry:
> > > > >
> > > > >  	/* change address */
> > > > >  	nat_set_blkaddr(e, new_blkaddr);
> > > > > +	if (new_blkaddr == NEW_ADDR || new_blkaddr == NULL_ADDR)
> > > > > +		set_nat_flag(e, IS_CHECKPOINTED, false);
> > > > >  	__set_nat_cache_dirty(nm_i, e);
> > > > >
> > > > >  	/* update fsync_mark if its inode nat entry is still alive */
> > > > >  	e = __lookup_nat_cache(nm_i, ni->ino);
> > > > > -	if (e)
> > > > > -		set_nat_flag(e, HAS_FSYNC_MARK, fsync_done);
> > > > > +	if (e) {
> > > > > +		if (fsync_done && ni->nid == ni->ino)
> > > > > +			set_nat_flag(e, HAS_FSYNCED_INODE, true);
> > > > > +		set_nat_flag(e, HAS_LAST_FSYNC, fsync_done);
> > > > > +	}
> > > > >  	write_unlock(&nm_i->nat_tree_lock);
> > > > >  }
> > > > >
> > > > > @@ -1121,10 +1130,14 @@ continue_unlock:
> > > > >
> > > > >  			/* called by fsync() */
> > > > >  			if (ino && IS_DNODE(page)) {
> > > > > -				int mark = !is_checkpointed_node(sbi, ino);
> > > > >  				set_fsync_mark(page, 1);
> > > > > -				if (IS_INODE(page))
> > > > > -					set_dentry_mark(page, mark);
> > > > > +				if (IS_INODE(page)) {
> > > > > +					if (!is_checkpointed_node(sbi, ino) &&
> > > > > +						!has_fsynced_inode(sbi, ino))
> > > > > +						set_dentry_mark(page, 1);
> > > > > +					else
> > > > > +						set_dentry_mark(page, 0);
> > > > > +				}
> > > > >  				nwritten++;
> > > > >  			} else {
> > > > >  				set_fsync_mark(page, 0);
> > > > > @@ -1912,6 +1925,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
> > > > >  				write_unlock(&nm_i->nat_tree_lock);
> > > > >  			} else {
> > > > >  				write_lock(&nm_i->nat_tree_lock);
> > > > > +				nat_reset_flag(ne);
> > > > >  				__clear_nat_cache_dirty(nm_i, ne);
> > > > >  				write_unlock(&nm_i->nat_tree_lock);
> > > > >  			}
> > > > > diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
> > > > > index 3043778..b8ba63c 100644
> > > > > --- a/fs/f2fs/node.h
> > > > > +++ b/fs/f2fs/node.h
> > > > > @@ -41,7 +41,8 @@ struct node_info {
> > > > >
> > > > >  enum {
> > > > >  	IS_CHECKPOINTED,	/* is it checkpointed before? */
> > > > > -	HAS_FSYNC_MARK,		/* has the latest node fsync mark? */
> > > > > +	HAS_FSYNCED_INODE,	/* is the inode fsynced before? */
> > > > > +	HAS_LAST_FSYNC,		/* has the latest node fsync mark? */
> > > > >  };
> > > > >
> > > > >  struct nat_entry {
> > > > > @@ -60,15 +61,9 @@ struct nat_entry {
> > > > >  #define nat_set_version(nat, v)		(nat->ni.version = v)
> > > > >
> > > > >  #define __set_nat_cache_dirty(nm_i, ne)					\
> > > > > -	do {								\
> > > > > -		set_nat_flag(ne, IS_CHECKPOINTED, false);		\
> > > > > -		list_move_tail(&ne->list, &nm_i->dirty_nat_entries);	\
> > > > > -	} while (0)
> > > > > +		list_move_tail(&ne->list, &nm_i->dirty_nat_entries);
> > > > >  #define __clear_nat_cache_dirty(nm_i, ne)				\
> > > > > -	do {								\
> > > > > -		set_nat_flag(ne, IS_CHECKPOINTED, true);		\
> > > > > -		list_move_tail(&ne->list, &nm_i->nat_entries);		\
> > > > > -	} while (0)
> > > > > +		list_move_tail(&ne->list, &nm_i->nat_entries);
> > > > >  #define inc_node_version(version)	(++version)
> > > > >
> > > > >  static inline void set_nat_flag(struct nat_entry *ne,
> > > > > @@ -87,6 +82,14 @@ static inline bool get_nat_flag(struct nat_entry *ne, unsigned int
> type)
> > > > >  	return ne->flag & mask;
> > > > >  }
> > > > >
> > > > > +static inline void nat_reset_flag(struct nat_entry *ne)
> > > > > +{
> > > > > +	/* these states can be set only after checkpoint was done */
> > > > > +	set_nat_flag(ne, IS_CHECKPOINTED, true);
> > > > > +	set_nat_flag(ne, HAS_FSYNCED_INODE, false);
> > > > > +	set_nat_flag(ne, HAS_LAST_FSYNC, true);
> > > > > +}
> > > > > +
> > > > >  static inline void node_info_from_raw_nat(struct node_info *ni,
> > > > >  						struct f2fs_nat_entry *raw_ne)
> > > > >  {
> > > > > --
> > > > > 1.8.5.2 (Apple Git-48)
> > > > >
> > > > >
> > > > > ------------------------------------------------------------------------------
> > > > > Want excitement?
> > > > > Manually upgrade your production database.
> > > > > When you want reliability, choose Perforce
> > > > > Perforce version control. Predictably reliable.
> > > > > http://pubads.g.doubleclick.net/gampad/clk?id=157508191&iu=/4140/ostg.clktrk
> > > > > _______________________________________________
> > > > > Linux-f2fs-devel mailing list
> > > > > Linux-f2fs-devel@lists.sourceforge.net
> > > > > https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
> > > >


^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2014-09-23  8:51 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-09-18  5:51 [PATCH 1/3] f2fs: introduce a flag to represent each nat entry information Jaegeuk Kim
2014-09-18  5:51 ` [PATCH 2/3] f2fs: fix conditions to remain recovery information in f2fs_sync_file Jaegeuk Kim
2014-09-22  7:24   ` [f2fs-dev] " Chao Yu
2014-09-22  7:38     ` Huang Ying
2014-09-22  9:20       ` Chao Yu
2014-09-23  4:52         ` Jaegeuk Kim
2014-09-23  8:50           ` Chao Yu
2014-09-18  5:51 ` [PATCH 3/3] f2fs: fix roll-forward missing scenarios Jaegeuk Kim
     [not found]   ` <CAC=cRTPotgNXbuPwG95HHuWiicS2OiD5R3HsOqdMG+2b=8ZyEg@mail.gmail.com>
2014-09-20 16:23     ` Jaegeuk Kim
2014-09-20 23:22       ` Huang Ying
2014-09-21  3:36         ` Jaegeuk Kim
2014-09-22  2:13           ` Huang Ying

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).