All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] f2fs: f2fs_get_meta_page_nofail should not be failed
@ 2020-10-02 21:32 ` Jaegeuk Kim
  0 siblings, 0 replies; 8+ messages in thread
From: Jaegeuk Kim @ 2020-10-02 21:32 UTC (permalink / raw)
  To: linux-kernel, linux-f2fs-devel, kernel-team; +Cc: Jaegeuk Kim

Otherwise, f2fs can break the the consistency.
(e.g., BUG_ON in f2fs_get_sum_page)

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/checkpoint.c | 9 +++------
 fs/f2fs/f2fs.h       | 2 --
 2 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index f18386d30f031..7bb3a741a8f16 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -110,15 +110,12 @@ struct page *f2fs_get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
 struct page *f2fs_get_meta_page_nofail(struct f2fs_sb_info *sbi, pgoff_t index)
 {
 	struct page *page;
-	int count = 0;
-
 retry:
 	page = __get_meta_page(sbi, index, true);
 	if (IS_ERR(page)) {
-		if (PTR_ERR(page) == -EIO &&
-				++count <= DEFAULT_RETRY_IO_COUNT)
-			goto retry;
-		f2fs_stop_checkpoint(sbi, false);
+		f2fs_flush_merged_writes(sbi);
+		congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT);
+		goto retry;
 	}
 	return page;
 }
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 9d58fd5dae139..d905edb42c327 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -595,8 +595,6 @@ enum {
 					 */
 };
 
-#define DEFAULT_RETRY_IO_COUNT	8	/* maximum retry read IO count */
-
 /* congestion wait timeout value, default: 20ms */
 #define	DEFAULT_IO_TIMEOUT	(msecs_to_jiffies(20))
 
-- 
2.28.0.806.g8561365e88-goog


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [f2fs-dev] [PATCH] f2fs: f2fs_get_meta_page_nofail should not be failed
@ 2020-10-02 21:32 ` Jaegeuk Kim
  0 siblings, 0 replies; 8+ messages in thread
From: Jaegeuk Kim @ 2020-10-02 21:32 UTC (permalink / raw)
  To: linux-kernel, linux-f2fs-devel, kernel-team; +Cc: Jaegeuk Kim

Otherwise, f2fs can break the the consistency.
(e.g., BUG_ON in f2fs_get_sum_page)

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/checkpoint.c | 9 +++------
 fs/f2fs/f2fs.h       | 2 --
 2 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index f18386d30f031..7bb3a741a8f16 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -110,15 +110,12 @@ struct page *f2fs_get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
 struct page *f2fs_get_meta_page_nofail(struct f2fs_sb_info *sbi, pgoff_t index)
 {
 	struct page *page;
-	int count = 0;
-
 retry:
 	page = __get_meta_page(sbi, index, true);
 	if (IS_ERR(page)) {
-		if (PTR_ERR(page) == -EIO &&
-				++count <= DEFAULT_RETRY_IO_COUNT)
-			goto retry;
-		f2fs_stop_checkpoint(sbi, false);
+		f2fs_flush_merged_writes(sbi);
+		congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT);
+		goto retry;
 	}
 	return page;
 }
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 9d58fd5dae139..d905edb42c327 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -595,8 +595,6 @@ enum {
 					 */
 };
 
-#define DEFAULT_RETRY_IO_COUNT	8	/* maximum retry read IO count */
-
 /* congestion wait timeout value, default: 20ms */
 #define	DEFAULT_IO_TIMEOUT	(msecs_to_jiffies(20))
 
-- 
2.28.0.806.g8561365e88-goog



_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] f2fs: f2fs_get_meta_page_nofail should not be failed
  2020-10-02 21:32 ` [f2fs-dev] " Jaegeuk Kim
@ 2020-10-09  4:31   ` jaegeuk
  -1 siblings, 0 replies; 8+ messages in thread
From: jaegeuk @ 2020-10-09  4:31 UTC (permalink / raw)
  To: linux-kernel, linux-f2fs-devel, kernel-team

Otherwise, f2fs can break the the consistency.
(e.g., BUG_ON in f2fs_get_sum_page)

Then, this reveals another issue where we go into an infinite loop on normal
error case. It turns out we abused f2fs_get_meta_page_nofail() in this path.

- f2fs_fill_super
 - f2fs_build_segment_manager
  - build_sit_entries
   - get_current_sit_page

Actually, we didn't have to use _nofail in this case, since we could return
error to mount(2) already with the error handler.

This was caught by syzbot as follows.

INFO: task syz-executor178:6870 can't die for more than 143 seconds.
task:syz-executor178 state:R
 stack:26960 pid: 6870 ppid:  6869 flags:0x00004006
Call Trace:

Showing all locks held in the system:
1 lock held by khungtaskd/1179:
 #0: ffffffff8a554da0 (rcu_read_lock){....}-{1:2}, at: debug_show_all_locks+0x53/0x260 kernel/locking/lockdep.c:6242
1 lock held by systemd-journal/3920:
1 lock held by in:imklog/6769:
 #0: ffff88809eebc130 (&f->f_pos_lock){+.+.}-{3:3}, at: __fdget_pos+0xe9/0x100 fs/file.c:930
1 lock held by syz-executor178/6870:
 #0: ffff8880925120e0 (&type->s_umount_key#47/1){+.+.}-{3:3}, at: alloc_super+0x201/0xaf0 fs/super.c:229

Reported-by: syzbot+ee250ac8137be41d7b13@syzkaller.appspotmail.com
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---

v2 log:
 - fix syzbot issue

 fs/f2fs/checkpoint.c | 9 +++------
 fs/f2fs/f2fs.h       | 2 --
 fs/f2fs/segment.c    | 2 +-
 3 files changed, 4 insertions(+), 9 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index f18386d30f031..7bb3a741a8f16 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -110,15 +110,12 @@ struct page *f2fs_get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
 struct page *f2fs_get_meta_page_nofail(struct f2fs_sb_info *sbi, pgoff_t index)
 {
 	struct page *page;
-	int count = 0;
-
 retry:
 	page = __get_meta_page(sbi, index, true);
 	if (IS_ERR(page)) {
-		if (PTR_ERR(page) == -EIO &&
-				++count <= DEFAULT_RETRY_IO_COUNT)
-			goto retry;
-		f2fs_stop_checkpoint(sbi, false);
+		f2fs_flush_merged_writes(sbi);
+		congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT);
+		goto retry;
 	}
 	return page;
 }
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index ae46d44bd5211..ce79b9b5b1eff 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -593,8 +593,6 @@ enum {
 					 */
 };
 
-#define DEFAULT_RETRY_IO_COUNT	8	/* maximum retry read IO count */
-
 /* congestion wait timeout value, default: 20ms */
 #define	DEFAULT_IO_TIMEOUT	(msecs_to_jiffies(20))
 
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 13ecd2c2c361b..40001d80fa86d 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -3964,7 +3964,7 @@ int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
 static struct page *get_current_sit_page(struct f2fs_sb_info *sbi,
 					unsigned int segno)
 {
-	return f2fs_get_meta_page_nofail(sbi, current_sit_addr(sbi, segno));
+	return f2fs_get_meta_page(sbi, current_sit_addr(sbi, segno));
 }
 
 static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
-- 
2.28.0.1011.ga647a8990f-goog


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [f2fs-dev] [PATCH v2] f2fs: f2fs_get_meta_page_nofail should not be failed
@ 2020-10-09  4:31   ` jaegeuk
  0 siblings, 0 replies; 8+ messages in thread
From: jaegeuk @ 2020-10-09  4:31 UTC (permalink / raw)
  To: linux-kernel, linux-f2fs-devel, kernel-team

Otherwise, f2fs can break the the consistency.
(e.g., BUG_ON in f2fs_get_sum_page)

Then, this reveals another issue where we go into an infinite loop on normal
error case. It turns out we abused f2fs_get_meta_page_nofail() in this path.

- f2fs_fill_super
 - f2fs_build_segment_manager
  - build_sit_entries
   - get_current_sit_page

Actually, we didn't have to use _nofail in this case, since we could return
error to mount(2) already with the error handler.

This was caught by syzbot as follows.

INFO: task syz-executor178:6870 can't die for more than 143 seconds.
task:syz-executor178 state:R
 stack:26960 pid: 6870 ppid:  6869 flags:0x00004006
Call Trace:

Showing all locks held in the system:
1 lock held by khungtaskd/1179:
 #0: ffffffff8a554da0 (rcu_read_lock){....}-{1:2}, at: debug_show_all_locks+0x53/0x260 kernel/locking/lockdep.c:6242
1 lock held by systemd-journal/3920:
1 lock held by in:imklog/6769:
 #0: ffff88809eebc130 (&f->f_pos_lock){+.+.}-{3:3}, at: __fdget_pos+0xe9/0x100 fs/file.c:930
1 lock held by syz-executor178/6870:
 #0: ffff8880925120e0 (&type->s_umount_key#47/1){+.+.}-{3:3}, at: alloc_super+0x201/0xaf0 fs/super.c:229

Reported-by: syzbot+ee250ac8137be41d7b13@syzkaller.appspotmail.com
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---

v2 log:
 - fix syzbot issue

 fs/f2fs/checkpoint.c | 9 +++------
 fs/f2fs/f2fs.h       | 2 --
 fs/f2fs/segment.c    | 2 +-
 3 files changed, 4 insertions(+), 9 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index f18386d30f031..7bb3a741a8f16 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -110,15 +110,12 @@ struct page *f2fs_get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
 struct page *f2fs_get_meta_page_nofail(struct f2fs_sb_info *sbi, pgoff_t index)
 {
 	struct page *page;
-	int count = 0;
-
 retry:
 	page = __get_meta_page(sbi, index, true);
 	if (IS_ERR(page)) {
-		if (PTR_ERR(page) == -EIO &&
-				++count <= DEFAULT_RETRY_IO_COUNT)
-			goto retry;
-		f2fs_stop_checkpoint(sbi, false);
+		f2fs_flush_merged_writes(sbi);
+		congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT);
+		goto retry;
 	}
 	return page;
 }
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index ae46d44bd5211..ce79b9b5b1eff 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -593,8 +593,6 @@ enum {
 					 */
 };
 
-#define DEFAULT_RETRY_IO_COUNT	8	/* maximum retry read IO count */
-
 /* congestion wait timeout value, default: 20ms */
 #define	DEFAULT_IO_TIMEOUT	(msecs_to_jiffies(20))
 
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 13ecd2c2c361b..40001d80fa86d 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -3964,7 +3964,7 @@ int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
 static struct page *get_current_sit_page(struct f2fs_sb_info *sbi,
 					unsigned int segno)
 {
-	return f2fs_get_meta_page_nofail(sbi, current_sit_addr(sbi, segno));
+	return f2fs_get_meta_page(sbi, current_sit_addr(sbi, segno));
 }
 
 static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
-- 
2.28.0.1011.ga647a8990f-goog



_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [f2fs-dev] [PATCH v3] f2fs: handle errors of f2fs_get_meta_page_nofail be failed
  2020-10-09  4:31   ` [f2fs-dev] " jaegeuk
@ 2020-10-13  3:07     ` jaegeuk
  -1 siblings, 0 replies; 8+ messages in thread
From: jaegeuk @ 2020-10-13  3:07 UTC (permalink / raw)
  To: linux-kernel, linux-f2fs-devel, kernel-team

First problem is we hit BUG_ON() in f2fs_get_sum_page given EIO on
f2fs_get_meta_page_nofail().

Quick fix was not to give any error with infinite loop, but syzbot caught
a case where it goes to that loop from fuzzed image. In turned out we abused
f2fs_get_meta_page_nofail() like in the below call stack.

- f2fs_fill_super
 - f2fs_build_segment_manager
  - build_sit_entries
   - get_current_sit_page

INFO: task syz-executor178:6870 can't die for more than 143 seconds.
task:syz-executor178 state:R
 stack:26960 pid: 6870 ppid:  6869 flags:0x00004006
Call Trace:

Showing all locks held in the system:
1 lock held by khungtaskd/1179:
 #0: ffffffff8a554da0 (rcu_read_lock){....}-{1:2}, at: debug_show_all_locks+0x53/0x260 kernel/locking/lockdep.c:6242
1 lock held by systemd-journal/3920:
1 lock held by in:imklog/6769:
 #0: ffff88809eebc130 (&f->f_pos_lock){+.+.}-{3:3}, at: __fdget_pos+0xe9/0x100 fs/file.c:930
1 lock held by syz-executor178/6870:
 #0: ffff8880925120e0 (&type->s_umount_key#47/1){+.+.}-{3:3}, at: alloc_super+0x201/0xaf0 fs/super.c:229

Actually, we didn't have to use _nofail in this case, since we could return
error to mount(2) already with the error handler.

As a result, this patch tries to 1) remove _nofail callers as much as possible,
2) deal with error case in last remaining caller, f2fs_get_sum_page().

Reported-by: syzbot+ee250ac8137be41d7b13@syzkaller.appspotmail.com
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---

Change log from v2:
 - avoid _nofail and add error handler

 fs/f2fs/checkpoint.c |  2 +-
 fs/f2fs/f2fs.h       |  2 +-
 fs/f2fs/node.c       |  2 +-
 fs/f2fs/segment.c    | 12 +++++++++---
 4 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index f18386d30f031..023462e80e58d 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -107,7 +107,7 @@ struct page *f2fs_get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
 	return __get_meta_page(sbi, index, true);
 }
 
-struct page *f2fs_get_meta_page_nofail(struct f2fs_sb_info *sbi, pgoff_t index)
+struct page *f2fs_get_meta_page_retry(struct f2fs_sb_info *sbi, pgoff_t index)
 {
 	struct page *page;
 	int count = 0;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index ae46d44bd5211..adda53d20a399 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -3422,7 +3422,7 @@ unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi,
 void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io);
 struct page *f2fs_grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index);
 struct page *f2fs_get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index);
-struct page *f2fs_get_meta_page_nofail(struct f2fs_sb_info *sbi, pgoff_t index);
+struct page *f2fs_get_meta_page_retry(struct f2fs_sb_info *sbi, pgoff_t index);
 struct page *f2fs_get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index);
 bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
 					block_t blkaddr, int type);
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 93fb34d636eb5..d5d8ce077f295 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -109,7 +109,7 @@ static void clear_node_page_dirty(struct page *page)
 
 static struct page *get_current_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
 {
-	return f2fs_get_meta_page_nofail(sbi, current_nat_addr(sbi, nid));
+	return f2fs_get_meta_page(sbi, current_nat_addr(sbi, nid));
 }
 
 static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 13ecd2c2c361b..05ab5ae2b5f7f 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -2379,7 +2379,9 @@ int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra)
  */
 struct page *f2fs_get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
 {
-	return f2fs_get_meta_page_nofail(sbi, GET_SUM_BLOCK(sbi, segno));
+	if (unlikely(f2fs_cp_error(sbi)))
+		return ERR_PTR(-EIO);
+	return f2fs_get_meta_page_retry(sbi, GET_SUM_BLOCK(sbi, segno));
 }
 
 void f2fs_update_meta_page(struct f2fs_sb_info *sbi,
@@ -2669,7 +2671,11 @@ static void change_curseg(struct f2fs_sb_info *sbi, int type, bool flush)
 	__next_free_blkoff(sbi, curseg, 0);
 
 	sum_page = f2fs_get_sum_page(sbi, new_segno);
-	f2fs_bug_on(sbi, IS_ERR(sum_page));
+	if (IS_ERR(sum_page)) {
+		/* GC won't be able to use stale summary pages by cp_error */
+		memset(curseg->sum_blk, 0, SUM_ENTRY_SIZE);
+		return;
+	}
 	sum_node = (struct f2fs_summary_block *)page_address(sum_page);
 	memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE);
 	f2fs_put_page(sum_page, 1);
@@ -3964,7 +3970,7 @@ int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
 static struct page *get_current_sit_page(struct f2fs_sb_info *sbi,
 					unsigned int segno)
 {
-	return f2fs_get_meta_page_nofail(sbi, current_sit_addr(sbi, segno));
+	return f2fs_get_meta_page(sbi, current_sit_addr(sbi, segno));
 }
 
 static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
-- 
2.29.0.rc1.297.gfa9743e501-goog


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [f2fs-dev] [PATCH v3] f2fs: handle errors of f2fs_get_meta_page_nofail be failed
@ 2020-10-13  3:07     ` jaegeuk
  0 siblings, 0 replies; 8+ messages in thread
From: jaegeuk @ 2020-10-13  3:07 UTC (permalink / raw)
  To: linux-kernel, linux-f2fs-devel, kernel-team

First problem is we hit BUG_ON() in f2fs_get_sum_page given EIO on
f2fs_get_meta_page_nofail().

Quick fix was not to give any error with infinite loop, but syzbot caught
a case where it goes to that loop from fuzzed image. In turned out we abused
f2fs_get_meta_page_nofail() like in the below call stack.

- f2fs_fill_super
 - f2fs_build_segment_manager
  - build_sit_entries
   - get_current_sit_page

INFO: task syz-executor178:6870 can't die for more than 143 seconds.
task:syz-executor178 state:R
 stack:26960 pid: 6870 ppid:  6869 flags:0x00004006
Call Trace:

Showing all locks held in the system:
1 lock held by khungtaskd/1179:
 #0: ffffffff8a554da0 (rcu_read_lock){....}-{1:2}, at: debug_show_all_locks+0x53/0x260 kernel/locking/lockdep.c:6242
1 lock held by systemd-journal/3920:
1 lock held by in:imklog/6769:
 #0: ffff88809eebc130 (&f->f_pos_lock){+.+.}-{3:3}, at: __fdget_pos+0xe9/0x100 fs/file.c:930
1 lock held by syz-executor178/6870:
 #0: ffff8880925120e0 (&type->s_umount_key#47/1){+.+.}-{3:3}, at: alloc_super+0x201/0xaf0 fs/super.c:229

Actually, we didn't have to use _nofail in this case, since we could return
error to mount(2) already with the error handler.

As a result, this patch tries to 1) remove _nofail callers as much as possible,
2) deal with error case in last remaining caller, f2fs_get_sum_page().

Reported-by: syzbot+ee250ac8137be41d7b13@syzkaller.appspotmail.com
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---

Change log from v2:
 - avoid _nofail and add error handler

 fs/f2fs/checkpoint.c |  2 +-
 fs/f2fs/f2fs.h       |  2 +-
 fs/f2fs/node.c       |  2 +-
 fs/f2fs/segment.c    | 12 +++++++++---
 4 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index f18386d30f031..023462e80e58d 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -107,7 +107,7 @@ struct page *f2fs_get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
 	return __get_meta_page(sbi, index, true);
 }
 
-struct page *f2fs_get_meta_page_nofail(struct f2fs_sb_info *sbi, pgoff_t index)
+struct page *f2fs_get_meta_page_retry(struct f2fs_sb_info *sbi, pgoff_t index)
 {
 	struct page *page;
 	int count = 0;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index ae46d44bd5211..adda53d20a399 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -3422,7 +3422,7 @@ unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi,
 void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io);
 struct page *f2fs_grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index);
 struct page *f2fs_get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index);
-struct page *f2fs_get_meta_page_nofail(struct f2fs_sb_info *sbi, pgoff_t index);
+struct page *f2fs_get_meta_page_retry(struct f2fs_sb_info *sbi, pgoff_t index);
 struct page *f2fs_get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index);
 bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
 					block_t blkaddr, int type);
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 93fb34d636eb5..d5d8ce077f295 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -109,7 +109,7 @@ static void clear_node_page_dirty(struct page *page)
 
 static struct page *get_current_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
 {
-	return f2fs_get_meta_page_nofail(sbi, current_nat_addr(sbi, nid));
+	return f2fs_get_meta_page(sbi, current_nat_addr(sbi, nid));
 }
 
 static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 13ecd2c2c361b..05ab5ae2b5f7f 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -2379,7 +2379,9 @@ int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra)
  */
 struct page *f2fs_get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
 {
-	return f2fs_get_meta_page_nofail(sbi, GET_SUM_BLOCK(sbi, segno));
+	if (unlikely(f2fs_cp_error(sbi)))
+		return ERR_PTR(-EIO);
+	return f2fs_get_meta_page_retry(sbi, GET_SUM_BLOCK(sbi, segno));
 }
 
 void f2fs_update_meta_page(struct f2fs_sb_info *sbi,
@@ -2669,7 +2671,11 @@ static void change_curseg(struct f2fs_sb_info *sbi, int type, bool flush)
 	__next_free_blkoff(sbi, curseg, 0);
 
 	sum_page = f2fs_get_sum_page(sbi, new_segno);
-	f2fs_bug_on(sbi, IS_ERR(sum_page));
+	if (IS_ERR(sum_page)) {
+		/* GC won't be able to use stale summary pages by cp_error */
+		memset(curseg->sum_blk, 0, SUM_ENTRY_SIZE);
+		return;
+	}
 	sum_node = (struct f2fs_summary_block *)page_address(sum_page);
 	memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE);
 	f2fs_put_page(sum_page, 1);
@@ -3964,7 +3970,7 @@ int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
 static struct page *get_current_sit_page(struct f2fs_sb_info *sbi,
 					unsigned int segno)
 {
-	return f2fs_get_meta_page_nofail(sbi, current_sit_addr(sbi, segno));
+	return f2fs_get_meta_page(sbi, current_sit_addr(sbi, segno));
 }
 
 static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
-- 
2.29.0.rc1.297.gfa9743e501-goog



_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [f2fs-dev] [PATCH v3] f2fs: handle errors of f2fs_get_meta_page_nofail be failed
  2020-10-13  3:07     ` jaegeuk
@ 2020-10-14  6:21       ` Chao Yu
  -1 siblings, 0 replies; 8+ messages in thread
From: Chao Yu @ 2020-10-14  6:21 UTC (permalink / raw)
  To: jaegeuk, linux-kernel, linux-f2fs-devel, kernel-team

On 2020/10/13 11:07, jaegeuk@kernel.org wrote:
> First problem is we hit BUG_ON() in f2fs_get_sum_page given EIO on
> f2fs_get_meta_page_nofail().
> 
> Quick fix was not to give any error with infinite loop, but syzbot caught
> a case where it goes to that loop from fuzzed image. In turned out we abused
> f2fs_get_meta_page_nofail() like in the below call stack.
> 
> - f2fs_fill_super
>   - f2fs_build_segment_manager
>    - build_sit_entries
>     - get_current_sit_page
> 
> INFO: task syz-executor178:6870 can't die for more than 143 seconds.
> task:syz-executor178 state:R
>   stack:26960 pid: 6870 ppid:  6869 flags:0x00004006
> Call Trace:
> 
> Showing all locks held in the system:
> 1 lock held by khungtaskd/1179:
>   #0: ffffffff8a554da0 (rcu_read_lock){....}-{1:2}, at: debug_show_all_locks+0x53/0x260 kernel/locking/lockdep.c:6242
> 1 lock held by systemd-journal/3920:
> 1 lock held by in:imklog/6769:
>   #0: ffff88809eebc130 (&f->f_pos_lock){+.+.}-{3:3}, at: __fdget_pos+0xe9/0x100 fs/file.c:930
> 1 lock held by syz-executor178/6870:
>   #0: ffff8880925120e0 (&type->s_umount_key#47/1){+.+.}-{3:3}, at: alloc_super+0x201/0xaf0 fs/super.c:229
> 
> Actually, we didn't have to use _nofail in this case, since we could return
> error to mount(2) already with the error handler.
> 
> As a result, this patch tries to 1) remove _nofail callers as much as possible,
> 2) deal with error case in last remaining caller, f2fs_get_sum_page().
> 
> Reported-by: syzbot+ee250ac8137be41d7b13@syzkaller.appspotmail.com
> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>

I prefer to use f2fs_get_meta_page() as much as possible except change_curseg()
path, however, it's minor, and the time is closing to merge window, so:

Reviewed-by: Chao Yu <yuchao0@huawei.com>

Thanks,

> ---
> 
> Change log from v2:
>   - avoid _nofail and add error handler
> 
>   fs/f2fs/checkpoint.c |  2 +-
>   fs/f2fs/f2fs.h       |  2 +-
>   fs/f2fs/node.c       |  2 +-
>   fs/f2fs/segment.c    | 12 +++++++++---
>   4 files changed, 12 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> index f18386d30f031..023462e80e58d 100644
> --- a/fs/f2fs/checkpoint.c
> +++ b/fs/f2fs/checkpoint.c
> @@ -107,7 +107,7 @@ struct page *f2fs_get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
>   	return __get_meta_page(sbi, index, true);
>   }
>   
> -struct page *f2fs_get_meta_page_nofail(struct f2fs_sb_info *sbi, pgoff_t index)
> +struct page *f2fs_get_meta_page_retry(struct f2fs_sb_info *sbi, pgoff_t index)
>   {
>   	struct page *page;
>   	int count = 0;
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index ae46d44bd5211..adda53d20a399 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -3422,7 +3422,7 @@ unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi,
>   void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io);
>   struct page *f2fs_grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index);
>   struct page *f2fs_get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index);
> -struct page *f2fs_get_meta_page_nofail(struct f2fs_sb_info *sbi, pgoff_t index);
> +struct page *f2fs_get_meta_page_retry(struct f2fs_sb_info *sbi, pgoff_t index);
>   struct page *f2fs_get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index);
>   bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
>   					block_t blkaddr, int type);
> diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
> index 93fb34d636eb5..d5d8ce077f295 100644
> --- a/fs/f2fs/node.c
> +++ b/fs/f2fs/node.c
> @@ -109,7 +109,7 @@ static void clear_node_page_dirty(struct page *page)
>   
>   static struct page *get_current_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
>   {
> -	return f2fs_get_meta_page_nofail(sbi, current_nat_addr(sbi, nid));
> +	return f2fs_get_meta_page(sbi, current_nat_addr(sbi, nid));
>   }
>   
>   static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index 13ecd2c2c361b..05ab5ae2b5f7f 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -2379,7 +2379,9 @@ int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra)
>    */
>   struct page *f2fs_get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
>   {
> -	return f2fs_get_meta_page_nofail(sbi, GET_SUM_BLOCK(sbi, segno));
> +	if (unlikely(f2fs_cp_error(sbi)))
> +		return ERR_PTR(-EIO);
> +	return f2fs_get_meta_page_retry(sbi, GET_SUM_BLOCK(sbi, segno));
>   }
>   
>   void f2fs_update_meta_page(struct f2fs_sb_info *sbi,
> @@ -2669,7 +2671,11 @@ static void change_curseg(struct f2fs_sb_info *sbi, int type, bool flush)
>   	__next_free_blkoff(sbi, curseg, 0);
>   
>   	sum_page = f2fs_get_sum_page(sbi, new_segno);
> -	f2fs_bug_on(sbi, IS_ERR(sum_page));
> +	if (IS_ERR(sum_page)) {
> +		/* GC won't be able to use stale summary pages by cp_error */
> +		memset(curseg->sum_blk, 0, SUM_ENTRY_SIZE);
> +		return;
> +	}
>   	sum_node = (struct f2fs_summary_block *)page_address(sum_page);
>   	memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE);
>   	f2fs_put_page(sum_page, 1);
> @@ -3964,7 +3970,7 @@ int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
>   static struct page *get_current_sit_page(struct f2fs_sb_info *sbi,
>   					unsigned int segno)
>   {
> -	return f2fs_get_meta_page_nofail(sbi, current_sit_addr(sbi, segno));
> +	return f2fs_get_meta_page(sbi, current_sit_addr(sbi, segno));
>   }
>   
>   static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
> 

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [f2fs-dev] [PATCH v3] f2fs: handle errors of f2fs_get_meta_page_nofail be failed
@ 2020-10-14  6:21       ` Chao Yu
  0 siblings, 0 replies; 8+ messages in thread
From: Chao Yu @ 2020-10-14  6:21 UTC (permalink / raw)
  To: jaegeuk, linux-kernel, linux-f2fs-devel, kernel-team

On 2020/10/13 11:07, jaegeuk@kernel.org wrote:
> First problem is we hit BUG_ON() in f2fs_get_sum_page given EIO on
> f2fs_get_meta_page_nofail().
> 
> Quick fix was not to give any error with infinite loop, but syzbot caught
> a case where it goes to that loop from fuzzed image. In turned out we abused
> f2fs_get_meta_page_nofail() like in the below call stack.
> 
> - f2fs_fill_super
>   - f2fs_build_segment_manager
>    - build_sit_entries
>     - get_current_sit_page
> 
> INFO: task syz-executor178:6870 can't die for more than 143 seconds.
> task:syz-executor178 state:R
>   stack:26960 pid: 6870 ppid:  6869 flags:0x00004006
> Call Trace:
> 
> Showing all locks held in the system:
> 1 lock held by khungtaskd/1179:
>   #0: ffffffff8a554da0 (rcu_read_lock){....}-{1:2}, at: debug_show_all_locks+0x53/0x260 kernel/locking/lockdep.c:6242
> 1 lock held by systemd-journal/3920:
> 1 lock held by in:imklog/6769:
>   #0: ffff88809eebc130 (&f->f_pos_lock){+.+.}-{3:3}, at: __fdget_pos+0xe9/0x100 fs/file.c:930
> 1 lock held by syz-executor178/6870:
>   #0: ffff8880925120e0 (&type->s_umount_key#47/1){+.+.}-{3:3}, at: alloc_super+0x201/0xaf0 fs/super.c:229
> 
> Actually, we didn't have to use _nofail in this case, since we could return
> error to mount(2) already with the error handler.
> 
> As a result, this patch tries to 1) remove _nofail callers as much as possible,
> 2) deal with error case in last remaining caller, f2fs_get_sum_page().
> 
> Reported-by: syzbot+ee250ac8137be41d7b13@syzkaller.appspotmail.com
> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>

I prefer to use f2fs_get_meta_page() as much as possible except change_curseg()
path, however, it's minor, and the time is closing to merge window, so:

Reviewed-by: Chao Yu <yuchao0@huawei.com>

Thanks,

> ---
> 
> Change log from v2:
>   - avoid _nofail and add error handler
> 
>   fs/f2fs/checkpoint.c |  2 +-
>   fs/f2fs/f2fs.h       |  2 +-
>   fs/f2fs/node.c       |  2 +-
>   fs/f2fs/segment.c    | 12 +++++++++---
>   4 files changed, 12 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> index f18386d30f031..023462e80e58d 100644
> --- a/fs/f2fs/checkpoint.c
> +++ b/fs/f2fs/checkpoint.c
> @@ -107,7 +107,7 @@ struct page *f2fs_get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
>   	return __get_meta_page(sbi, index, true);
>   }
>   
> -struct page *f2fs_get_meta_page_nofail(struct f2fs_sb_info *sbi, pgoff_t index)
> +struct page *f2fs_get_meta_page_retry(struct f2fs_sb_info *sbi, pgoff_t index)
>   {
>   	struct page *page;
>   	int count = 0;
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index ae46d44bd5211..adda53d20a399 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -3422,7 +3422,7 @@ unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi,
>   void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io);
>   struct page *f2fs_grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index);
>   struct page *f2fs_get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index);
> -struct page *f2fs_get_meta_page_nofail(struct f2fs_sb_info *sbi, pgoff_t index);
> +struct page *f2fs_get_meta_page_retry(struct f2fs_sb_info *sbi, pgoff_t index);
>   struct page *f2fs_get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index);
>   bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
>   					block_t blkaddr, int type);
> diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
> index 93fb34d636eb5..d5d8ce077f295 100644
> --- a/fs/f2fs/node.c
> +++ b/fs/f2fs/node.c
> @@ -109,7 +109,7 @@ static void clear_node_page_dirty(struct page *page)
>   
>   static struct page *get_current_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
>   {
> -	return f2fs_get_meta_page_nofail(sbi, current_nat_addr(sbi, nid));
> +	return f2fs_get_meta_page(sbi, current_nat_addr(sbi, nid));
>   }
>   
>   static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index 13ecd2c2c361b..05ab5ae2b5f7f 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -2379,7 +2379,9 @@ int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra)
>    */
>   struct page *f2fs_get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
>   {
> -	return f2fs_get_meta_page_nofail(sbi, GET_SUM_BLOCK(sbi, segno));
> +	if (unlikely(f2fs_cp_error(sbi)))
> +		return ERR_PTR(-EIO);
> +	return f2fs_get_meta_page_retry(sbi, GET_SUM_BLOCK(sbi, segno));
>   }
>   
>   void f2fs_update_meta_page(struct f2fs_sb_info *sbi,
> @@ -2669,7 +2671,11 @@ static void change_curseg(struct f2fs_sb_info *sbi, int type, bool flush)
>   	__next_free_blkoff(sbi, curseg, 0);
>   
>   	sum_page = f2fs_get_sum_page(sbi, new_segno);
> -	f2fs_bug_on(sbi, IS_ERR(sum_page));
> +	if (IS_ERR(sum_page)) {
> +		/* GC won't be able to use stale summary pages by cp_error */
> +		memset(curseg->sum_blk, 0, SUM_ENTRY_SIZE);
> +		return;
> +	}
>   	sum_node = (struct f2fs_summary_block *)page_address(sum_page);
>   	memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE);
>   	f2fs_put_page(sum_page, 1);
> @@ -3964,7 +3970,7 @@ int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
>   static struct page *get_current_sit_page(struct f2fs_sb_info *sbi,
>   					unsigned int segno)
>   {
> -	return f2fs_get_meta_page_nofail(sbi, current_sit_addr(sbi, segno));
> +	return f2fs_get_meta_page(sbi, current_sit_addr(sbi, segno));
>   }
>   
>   static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
> 


_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2020-10-14  6:26 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-10-02 21:32 [PATCH] f2fs: f2fs_get_meta_page_nofail should not be failed Jaegeuk Kim
2020-10-02 21:32 ` [f2fs-dev] " Jaegeuk Kim
2020-10-09  4:31 ` [PATCH v2] " jaegeuk
2020-10-09  4:31   ` [f2fs-dev] " jaegeuk
2020-10-13  3:07   ` [f2fs-dev] [PATCH v3] f2fs: handle errors of f2fs_get_meta_page_nofail " jaegeuk
2020-10-13  3:07     ` jaegeuk
2020-10-14  6:21     ` Chao Yu
2020-10-14  6:21       ` Chao Yu

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.