linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v3] f2fs: Fix indefinite loop in f2fs_gc()
@ 2019-08-07  8:52 Sahitya Tummala
  2019-08-07  9:29 ` Chao Yu
  0 siblings, 1 reply; 3+ messages in thread
From: Sahitya Tummala @ 2019-08-07  8:52 UTC (permalink / raw)
  To: Jaegeuk Kim, Chao Yu, linux-f2fs-devel; +Cc: Sahitya Tummala, linux-kernel

Policy - Foreground GC, LFS and greedy GC mode.

Under this policy, f2fs_gc() loops forever to GC as it doesn't have
enough free segements to proceed and thus it keeps calling gc_more
for the same victim segment.  This can happen if the selected victim
segment could not be GC'd due to failed blkaddr validity check i.e.
is_alive() returns false for the blocks set in current validity map.

Fix this by keeping track of such invalid segments and skip those
segments for selection in get_victim_by_default() to avoid endless
GC loop under such error scenarios.

Signed-off-by: Sahitya Tummala <stummala@codeaurora.org>
---
v3: address Chao's comments and also add logic to clear invalid_segmap

 fs/f2fs/gc.c      | 25 +++++++++++++++++++++++--
 fs/f2fs/segment.c | 10 +++++++++-
 fs/f2fs/segment.h |  3 +++
 3 files changed, 35 insertions(+), 3 deletions(-)

diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 8974672..f7b9602 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -382,6 +382,14 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
 			nsearched++;
 		}
 
+		/*
+		 * skip selecting the invalid segno (that is failed due to block
+		 * validity check failure during GC) to avoid endless GC loop in
+		 * such cases.
+		 */
+		if (test_bit(segno, sm->invalid_segmap))
+			goto next;
+
 		secno = GET_SEC_FROM_SEG(sbi, segno);
 
 		if (sec_usage_check(sbi, secno))
@@ -602,8 +610,13 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
 {
 	struct page *node_page;
 	nid_t nid;
-	unsigned int ofs_in_node;
+	unsigned int ofs_in_node, segno;
 	block_t source_blkaddr;
+	unsigned long offset;
+	struct sit_info *sit_i = SIT_I(sbi);
+
+	segno = GET_SEGNO(sbi, blkaddr);
+	offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
 
 	nid = le32_to_cpu(sum->nid);
 	ofs_in_node = le16_to_cpu(sum->ofs_in_node);
@@ -627,8 +640,16 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
 	source_blkaddr = datablock_addr(NULL, node_page, ofs_in_node);
 	f2fs_put_page(node_page, 1);
 
-	if (source_blkaddr != blkaddr)
+	if (source_blkaddr != blkaddr) {
+		if (unlikely(check_valid_map(sbi, segno, offset))) {
+			if (!test_and_set_bit(segno, sit_i->invalid_segmap)) {
+				f2fs_err(sbi, "mismatched blkaddr %u (source_blkaddr %u) in seg %u\n",
+						blkaddr, source_blkaddr, segno);
+				f2fs_bug_on(sbi, 1);
+			}
+		}
 		return false;
+	}
 	return true;
 }
 
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index a661ac3..c3ba9e7 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -806,6 +806,7 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
 		enum dirty_type dirty_type)
 {
 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+	struct sit_info *sit_i = SIT_I(sbi);
 
 	if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type]))
 		dirty_i->nr_dirty[dirty_type]--;
@@ -817,9 +818,11 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
 		if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
 			dirty_i->nr_dirty[t]--;
 
-		if (get_valid_blocks(sbi, segno, true) == 0)
+		if (get_valid_blocks(sbi, segno, true) == 0) {
 			clear_bit(GET_SEC_FROM_SEG(sbi, segno),
 						dirty_i->victim_secmap);
+			clear_bit(segno, sit_i->invalid_segmap);
+		}
 	}
 }
 
@@ -4017,6 +4020,10 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
 		return -ENOMEM;
 #endif
 
+	sit_i->invalid_segmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
+	if (!sit_i->invalid_segmap)
+		return -ENOMEM;
+
 	/* init SIT information */
 	sit_i->s_ops = &default_salloc_ops;
 
@@ -4518,6 +4525,7 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi)
 #ifdef CONFIG_F2FS_CHECK_FS
 	kvfree(sit_i->sit_bitmap_mir);
 #endif
+	kvfree(sit_i->invalid_segmap);
 	kvfree(sit_i);
 }
 
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index b746028..3918155c 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -246,6 +246,9 @@ struct sit_info {
 	unsigned long long min_mtime;		/* min. modification time */
 	unsigned long long max_mtime;		/* max. modification time */
 
+	/* bitmap of segments to be ignored by GC in case of errors */
+	unsigned long *invalid_segmap;
+
 	unsigned int last_victim[MAX_GC_POLICY]; /* last victim segment # */
 };
 
-- 
Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project.


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH v3] f2fs: Fix indefinite loop in f2fs_gc()
  2019-08-07  8:52 [PATCH v3] f2fs: Fix indefinite loop in f2fs_gc() Sahitya Tummala
@ 2019-08-07  9:29 ` Chao Yu
  2019-08-07 13:24   ` Sahitya Tummala
  0 siblings, 1 reply; 3+ messages in thread
From: Chao Yu @ 2019-08-07  9:29 UTC (permalink / raw)
  To: Sahitya Tummala, Jaegeuk Kim, linux-f2fs-devel; +Cc: linux-kernel

On 2019/8/7 16:52, Sahitya Tummala wrote:
> Policy - Foreground GC, LFS and greedy GC mode.
> 
> Under this policy, f2fs_gc() loops forever to GC as it doesn't have
> enough free segements to proceed and thus it keeps calling gc_more
> for the same victim segment.  This can happen if the selected victim
> segment could not be GC'd due to failed blkaddr validity check i.e.
> is_alive() returns false for the blocks set in current validity map.
> 
> Fix this by keeping track of such invalid segments and skip those
> segments for selection in get_victim_by_default() to avoid endless
> GC loop under such error scenarios.
> 
> Signed-off-by: Sahitya Tummala <stummala@codeaurora.org>
> ---
> v3: address Chao's comments and also add logic to clear invalid_segmap

Hi Sahitya,

I meant we could cover all invalid_segmap related codes w/ CONFIG_F2FS_CHECK_FS
in upstream code, like we did for sit_info.sit_bitmap_mir. In private code
(qualconn or others), if this issue happens frequently, we can enable it by
default before it is fixed.

How do you think?

Btw, still no fsck log on broken image?

Thanks,

> 
>  fs/f2fs/gc.c      | 25 +++++++++++++++++++++++--
>  fs/f2fs/segment.c | 10 +++++++++-
>  fs/f2fs/segment.h |  3 +++
>  3 files changed, 35 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> index 8974672..f7b9602 100644
> --- a/fs/f2fs/gc.c
> +++ b/fs/f2fs/gc.c
> @@ -382,6 +382,14 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
>  			nsearched++;
>  		}
>  
> +		/*
> +		 * skip selecting the invalid segno (that is failed due to block
> +		 * validity check failure during GC) to avoid endless GC loop in
> +		 * such cases.
> +		 */
> +		if (test_bit(segno, sm->invalid_segmap))
> +			goto next;
> +
>  		secno = GET_SEC_FROM_SEG(sbi, segno);
>  
>  		if (sec_usage_check(sbi, secno))
> @@ -602,8 +610,13 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
>  {
>  	struct page *node_page;
>  	nid_t nid;
> -	unsigned int ofs_in_node;
> +	unsigned int ofs_in_node, segno;
>  	block_t source_blkaddr;
> +	unsigned long offset;
> +	struct sit_info *sit_i = SIT_I(sbi);
> +
> +	segno = GET_SEGNO(sbi, blkaddr);
> +	offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
>  
>  	nid = le32_to_cpu(sum->nid);
>  	ofs_in_node = le16_to_cpu(sum->ofs_in_node);
> @@ -627,8 +640,16 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
>  	source_blkaddr = datablock_addr(NULL, node_page, ofs_in_node);
>  	f2fs_put_page(node_page, 1);
>  
> -	if (source_blkaddr != blkaddr)
> +	if (source_blkaddr != blkaddr) {
> +		if (unlikely(check_valid_map(sbi, segno, offset))) {
> +			if (!test_and_set_bit(segno, sit_i->invalid_segmap)) {
> +				f2fs_err(sbi, "mismatched blkaddr %u (source_blkaddr %u) in seg %u\n",
> +						blkaddr, source_blkaddr, segno);
> +				f2fs_bug_on(sbi, 1);
> +			}
> +		}
>  		return false;
> +	}
>  	return true;
>  }
>  
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index a661ac3..c3ba9e7 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -806,6 +806,7 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
>  		enum dirty_type dirty_type)
>  {
>  	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
> +	struct sit_info *sit_i = SIT_I(sbi);
>  
>  	if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type]))
>  		dirty_i->nr_dirty[dirty_type]--;
> @@ -817,9 +818,11 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
>  		if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
>  			dirty_i->nr_dirty[t]--;
>  
> -		if (get_valid_blocks(sbi, segno, true) == 0)
> +		if (get_valid_blocks(sbi, segno, true) == 0) {
>  			clear_bit(GET_SEC_FROM_SEG(sbi, segno),
>  						dirty_i->victim_secmap);
> +			clear_bit(segno, sit_i->invalid_segmap);
> +		}
>  	}
>  }
>  
> @@ -4017,6 +4020,10 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
>  		return -ENOMEM;
>  #endif
>  
> +	sit_i->invalid_segmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
> +	if (!sit_i->invalid_segmap)
> +		return -ENOMEM;
> +
>  	/* init SIT information */
>  	sit_i->s_ops = &default_salloc_ops;
>  
> @@ -4518,6 +4525,7 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi)
>  #ifdef CONFIG_F2FS_CHECK_FS
>  	kvfree(sit_i->sit_bitmap_mir);
>  #endif
> +	kvfree(sit_i->invalid_segmap);
>  	kvfree(sit_i);
>  }
>  
> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
> index b746028..3918155c 100644
> --- a/fs/f2fs/segment.h
> +++ b/fs/f2fs/segment.h
> @@ -246,6 +246,9 @@ struct sit_info {
>  	unsigned long long min_mtime;		/* min. modification time */
>  	unsigned long long max_mtime;		/* max. modification time */
>  
> +	/* bitmap of segments to be ignored by GC in case of errors */
> +	unsigned long *invalid_segmap;
> +
>  	unsigned int last_victim[MAX_GC_POLICY]; /* last victim segment # */
>  };
>  
> 

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH v3] f2fs: Fix indefinite loop in f2fs_gc()
  2019-08-07  9:29 ` Chao Yu
@ 2019-08-07 13:24   ` Sahitya Tummala
  0 siblings, 0 replies; 3+ messages in thread
From: Sahitya Tummala @ 2019-08-07 13:24 UTC (permalink / raw)
  To: Chao Yu; +Cc: Jaegeuk Kim, linux-f2fs-devel, linux-kernel, stummala

Hi Chao,

On Wed, Aug 07, 2019 at 05:29:24PM +0800, Chao Yu wrote:
> On 2019/8/7 16:52, Sahitya Tummala wrote:
> > Policy - Foreground GC, LFS and greedy GC mode.
> > 
> > Under this policy, f2fs_gc() loops forever to GC as it doesn't have
> > enough free segements to proceed and thus it keeps calling gc_more
> > for the same victim segment.  This can happen if the selected victim
> > segment could not be GC'd due to failed blkaddr validity check i.e.
> > is_alive() returns false for the blocks set in current validity map.
> > 
> > Fix this by keeping track of such invalid segments and skip those
> > segments for selection in get_victim_by_default() to avoid endless
> > GC loop under such error scenarios.
> > 
> > Signed-off-by: Sahitya Tummala <stummala@codeaurora.org>
> > ---
> > v3: address Chao's comments and also add logic to clear invalid_segmap
> 
> Hi Sahitya,
> 
> I meant we could cover all invalid_segmap related codes w/ CONFIG_F2FS_CHECK_FS
> in upstream code, like we did for sit_info.sit_bitmap_mir. In private code
> (qualconn or others), if this issue happens frequently, we can enable it by
> default before it is fixed.
> 
> How do you think?
> 
Sure, we can do it that way.

> Btw, still no fsck log on broken image?
>
I have requested customers to provide this log next time when the issue is
reproduced again. I will update you once I get the log.

Thanks,

> Thanks,
> 
> > 
> >  fs/f2fs/gc.c      | 25 +++++++++++++++++++++++--
> >  fs/f2fs/segment.c | 10 +++++++++-
> >  fs/f2fs/segment.h |  3 +++
> >  3 files changed, 35 insertions(+), 3 deletions(-)
> > 
> > diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> > index 8974672..f7b9602 100644
> > --- a/fs/f2fs/gc.c
> > +++ b/fs/f2fs/gc.c
> > @@ -382,6 +382,14 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
> >  			nsearched++;
> >  		}
> >  
> > +		/*
> > +		 * skip selecting the invalid segno (that is failed due to block
> > +		 * validity check failure during GC) to avoid endless GC loop in
> > +		 * such cases.
> > +		 */
> > +		if (test_bit(segno, sm->invalid_segmap))
> > +			goto next;
> > +
> >  		secno = GET_SEC_FROM_SEG(sbi, segno);
> >  
> >  		if (sec_usage_check(sbi, secno))
> > @@ -602,8 +610,13 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
> >  {
> >  	struct page *node_page;
> >  	nid_t nid;
> > -	unsigned int ofs_in_node;
> > +	unsigned int ofs_in_node, segno;
> >  	block_t source_blkaddr;
> > +	unsigned long offset;
> > +	struct sit_info *sit_i = SIT_I(sbi);
> > +
> > +	segno = GET_SEGNO(sbi, blkaddr);
> > +	offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
> >  
> >  	nid = le32_to_cpu(sum->nid);
> >  	ofs_in_node = le16_to_cpu(sum->ofs_in_node);
> > @@ -627,8 +640,16 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
> >  	source_blkaddr = datablock_addr(NULL, node_page, ofs_in_node);
> >  	f2fs_put_page(node_page, 1);
> >  
> > -	if (source_blkaddr != blkaddr)
> > +	if (source_blkaddr != blkaddr) {
> > +		if (unlikely(check_valid_map(sbi, segno, offset))) {
> > +			if (!test_and_set_bit(segno, sit_i->invalid_segmap)) {
> > +				f2fs_err(sbi, "mismatched blkaddr %u (source_blkaddr %u) in seg %u\n",
> > +						blkaddr, source_blkaddr, segno);
> > +				f2fs_bug_on(sbi, 1);
> > +			}
> > +		}
> >  		return false;
> > +	}
> >  	return true;
> >  }
> >  
> > diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> > index a661ac3..c3ba9e7 100644
> > --- a/fs/f2fs/segment.c
> > +++ b/fs/f2fs/segment.c
> > @@ -806,6 +806,7 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
> >  		enum dirty_type dirty_type)
> >  {
> >  	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
> > +	struct sit_info *sit_i = SIT_I(sbi);
> >  
> >  	if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type]))
> >  		dirty_i->nr_dirty[dirty_type]--;
> > @@ -817,9 +818,11 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
> >  		if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
> >  			dirty_i->nr_dirty[t]--;
> >  
> > -		if (get_valid_blocks(sbi, segno, true) == 0)
> > +		if (get_valid_blocks(sbi, segno, true) == 0) {
> >  			clear_bit(GET_SEC_FROM_SEG(sbi, segno),
> >  						dirty_i->victim_secmap);
> > +			clear_bit(segno, sit_i->invalid_segmap);
> > +		}
> >  	}
> >  }
> >  
> > @@ -4017,6 +4020,10 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
> >  		return -ENOMEM;
> >  #endif
> >  
> > +	sit_i->invalid_segmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
> > +	if (!sit_i->invalid_segmap)
> > +		return -ENOMEM;
> > +
> >  	/* init SIT information */
> >  	sit_i->s_ops = &default_salloc_ops;
> >  
> > @@ -4518,6 +4525,7 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi)
> >  #ifdef CONFIG_F2FS_CHECK_FS
> >  	kvfree(sit_i->sit_bitmap_mir);
> >  #endif
> > +	kvfree(sit_i->invalid_segmap);
> >  	kvfree(sit_i);
> >  }
> >  
> > diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
> > index b746028..3918155c 100644
> > --- a/fs/f2fs/segment.h
> > +++ b/fs/f2fs/segment.h
> > @@ -246,6 +246,9 @@ struct sit_info {
> >  	unsigned long long min_mtime;		/* min. modification time */
> >  	unsigned long long max_mtime;		/* max. modification time */
> >  
> > +	/* bitmap of segments to be ignored by GC in case of errors */
> > +	unsigned long *invalid_segmap;
> > +
> >  	unsigned int last_victim[MAX_GC_POLICY]; /* last victim segment # */
> >  };
> >  
> > 

-- 
--
Sent by a consultant of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2019-08-07 13:24 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-08-07  8:52 [PATCH v3] f2fs: Fix indefinite loop in f2fs_gc() Sahitya Tummala
2019-08-07  9:29 ` Chao Yu
2019-08-07 13:24   ` Sahitya Tummala

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).