linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/3] f2fs: check the page status filled from disk
@ 2016-01-03  1:26 Jaegeuk Kim
  2016-01-03  1:26 ` [PATCH 2/3] f2fs: cover more area with nat_tree_lock Jaegeuk Kim
                   ` (3 more replies)
  0 siblings, 4 replies; 12+ messages in thread
From: Jaegeuk Kim @ 2016-01-03  1:26 UTC (permalink / raw)
  To: linux-kernel, linux-fsdevel, linux-f2fs-devel; +Cc: Jaegeuk Kim

After reading a page, we need to check whether there is any error.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/data.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 89a978c..11b2111 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -448,6 +448,14 @@ repeat:
 
 		/* wait for read completion */
 		lock_page(page);
+		if (unlikely(!PageUptodate(page))) {
+			f2fs_put_page(page, 1);
+			return ERR_PTR(-EIO);
+		}
+		if (unlikely(page->mapping != mapping)) {
+			f2fs_put_page(page, 1);
+			goto repeat;
+		}
 	}
 got_it:
 	if (new_i_size && i_size_read(inode) <
-- 
2.6.3


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 2/3] f2fs: cover more area with nat_tree_lock
  2016-01-03  1:26 [PATCH 1/3] f2fs: check the page status filled from disk Jaegeuk Kim
@ 2016-01-03  1:26 ` Jaegeuk Kim
  2016-01-05  9:33   ` [f2fs-dev] " Chao Yu
  2016-01-03  1:26 ` [PATCH 3/3] Revert "f2fs: check the node block address of newly allocated nid" Jaegeuk Kim
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 12+ messages in thread
From: Jaegeuk Kim @ 2016-01-03  1:26 UTC (permalink / raw)
  To: linux-kernel, linux-fsdevel, linux-f2fs-devel; +Cc: Jaegeuk Kim

There was a subtle bug on nat cache management which incurs wrong nid allocation
or wrong block addresses when try_to_free_nats is triggered heavily.
This patch enlarges the previous coverage of nat_tree_lock to avoid data race.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/node.c | 29 ++++++++++++-----------------
 1 file changed, 12 insertions(+), 17 deletions(-)

diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 669c44e..4dab09f 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -262,13 +262,11 @@ static void cache_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid,
 {
 	struct nat_entry *e;
 
-	down_write(&nm_i->nat_tree_lock);
 	e = __lookup_nat_cache(nm_i, nid);
 	if (!e) {
 		e = grab_nat_entry(nm_i, nid);
 		node_info_from_raw_nat(&e->ni, ne);
 	}
-	up_write(&nm_i->nat_tree_lock);
 }
 
 static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
@@ -380,6 +378,8 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
 
 	memset(&ne, 0, sizeof(struct f2fs_nat_entry));
 
+	down_write(&nm_i->nat_tree_lock);
+
 	/* Check current segment summary */
 	mutex_lock(&curseg->curseg_mutex);
 	i = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 0);
@@ -400,6 +400,7 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
 cache:
 	/* cache nat entry */
 	cache_nat_entry(NM_I(sbi), nid, &ne);
+	up_write(&nm_i->nat_tree_lock);
 }
 
 /*
@@ -1459,13 +1460,10 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
 
 	if (build) {
 		/* do not add allocated nids */
-		down_read(&nm_i->nat_tree_lock);
 		ne = __lookup_nat_cache(nm_i, nid);
-		if (ne &&
-			(!get_nat_flag(ne, IS_CHECKPOINTED) ||
+		if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) ||
 				nat_get_blkaddr(ne) != NULL_ADDR))
 			allocated = true;
-		up_read(&nm_i->nat_tree_lock);
 		if (allocated)
 			return 0;
 	}
@@ -1551,6 +1549,8 @@ static void build_free_nids(struct f2fs_sb_info *sbi)
 	ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
 							META_NAT, true);
 
+	down_read(&nm_i->nat_tree_lock);
+
 	while (1) {
 		struct page *page = get_current_nat_page(sbi, nid);
 
@@ -1579,6 +1579,7 @@ static void build_free_nids(struct f2fs_sb_info *sbi)
 			remove_free_nid(nm_i, nid);
 	}
 	mutex_unlock(&curseg->curseg_mutex);
+	up_read(&nm_i->nat_tree_lock);
 
 	ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid),
 					nm_i->ra_nid_pages, META_NAT, false);
@@ -1861,14 +1862,12 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
 
 		raw_ne = nat_in_journal(sum, i);
 
-		down_write(&nm_i->nat_tree_lock);
 		ne = __lookup_nat_cache(nm_i, nid);
 		if (!ne) {
 			ne = grab_nat_entry(nm_i, nid);
 			node_info_from_raw_nat(&ne->ni, &raw_ne);
 		}
 		__set_nat_cache_dirty(nm_i, ne);
-		up_write(&nm_i->nat_tree_lock);
 	}
 	update_nats_in_cursum(sum, -i);
 	mutex_unlock(&curseg->curseg_mutex);
@@ -1902,7 +1901,6 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
 	struct f2fs_nat_block *nat_blk;
 	struct nat_entry *ne, *cur;
 	struct page *page = NULL;
-	struct f2fs_nm_info *nm_i = NM_I(sbi);
 
 	/*
 	 * there are two steps to flush nat entries:
@@ -1939,12 +1937,8 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
 			raw_ne = &nat_blk->entries[nid - start_nid];
 		}
 		raw_nat_from_node_info(raw_ne, &ne->ni);
-
-		down_write(&NM_I(sbi)->nat_tree_lock);
 		nat_reset_flag(ne);
 		__clear_nat_cache_dirty(NM_I(sbi), ne);
-		up_write(&NM_I(sbi)->nat_tree_lock);
-
 		if (nat_get_blkaddr(ne) == NULL_ADDR)
 			add_free_nid(sbi, nid, false);
 	}
@@ -1956,9 +1950,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
 
 	f2fs_bug_on(sbi, set->entry_cnt);
 
-	down_write(&nm_i->nat_tree_lock);
 	radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set);
-	up_write(&nm_i->nat_tree_lock);
 	kmem_cache_free(nat_entry_set_slab, set);
 }
 
@@ -1978,6 +1970,9 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
 
 	if (!nm_i->dirty_nat_cnt)
 		return;
+
+	down_write(&nm_i->nat_tree_lock);
+
 	/*
 	 * if there are no enough space in journal to store dirty nat
 	 * entries, remove all entries from journal and merge them
@@ -1986,7 +1981,6 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
 	if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt, NAT_JOURNAL))
 		remove_nats_in_journal(sbi);
 
-	down_write(&nm_i->nat_tree_lock);
 	while ((found = __gang_lookup_nat_set(nm_i,
 					set_idx, SETVEC_SIZE, setvec))) {
 		unsigned idx;
@@ -1995,12 +1989,13 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
 			__adjust_nat_entry_set(setvec[idx], &sets,
 							MAX_NAT_JENTRIES(sum));
 	}
-	up_write(&nm_i->nat_tree_lock);
 
 	/* flush dirty nats in nat entry set */
 	list_for_each_entry_safe(set, tmp, &sets, set_list)
 		__flush_nat_entry_set(sbi, set);
 
+	up_write(&nm_i->nat_tree_lock);
+
 	f2fs_bug_on(sbi, nm_i->dirty_nat_cnt);
 }
 
-- 
2.6.3


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 3/3] Revert "f2fs: check the node block address of newly allocated nid"
  2016-01-03  1:26 [PATCH 1/3] f2fs: check the page status filled from disk Jaegeuk Kim
  2016-01-03  1:26 ` [PATCH 2/3] f2fs: cover more area with nat_tree_lock Jaegeuk Kim
@ 2016-01-03  1:26 ` Jaegeuk Kim
  2016-01-05  9:31 ` [f2fs-dev] [PATCH 1/3] f2fs: check the page status filled from disk Chao Yu
  2016-01-06  4:10 ` [PATCH 1/3 v2] " Jaegeuk Kim
  3 siblings, 0 replies; 12+ messages in thread
From: Jaegeuk Kim @ 2016-01-03  1:26 UTC (permalink / raw)
  To: linux-kernel, linux-fsdevel, linux-f2fs-devel; +Cc: Jaegeuk Kim

Original issue is fixed by:

  f2fs: cover more area with nat_tree_lock

This reverts commit 24928634f81b1592e83b37dcd89ed45c28f12feb.
---
 fs/f2fs/node.c | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 4dab09f..6d5f548 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1602,8 +1602,6 @@ retry:
 
 	/* We should not use stale free nids created by build_free_nids */
 	if (nm_i->fcnt && !on_build_free_nids(nm_i)) {
-		struct node_info ni;
-
 		f2fs_bug_on(sbi, list_empty(&nm_i->free_nid_list));
 		list_for_each_entry(i, &nm_i->free_nid_list, list)
 			if (i->state == NID_NEW)
@@ -1614,13 +1612,6 @@ retry:
 		i->state = NID_ALLOC;
 		nm_i->fcnt--;
 		spin_unlock(&nm_i->free_nid_list_lock);
-
-		/* check nid is allocated already */
-		get_node_info(sbi, *nid, &ni);
-		if (ni.blk_addr != NULL_ADDR) {
-			alloc_nid_done(sbi, *nid);
-			goto retry;
-		}
 		return true;
 	}
 	spin_unlock(&nm_i->free_nid_list_lock);
-- 
2.6.3


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* RE: [f2fs-dev] [PATCH 1/3] f2fs: check the page status filled from disk
  2016-01-03  1:26 [PATCH 1/3] f2fs: check the page status filled from disk Jaegeuk Kim
  2016-01-03  1:26 ` [PATCH 2/3] f2fs: cover more area with nat_tree_lock Jaegeuk Kim
  2016-01-03  1:26 ` [PATCH 3/3] Revert "f2fs: check the node block address of newly allocated nid" Jaegeuk Kim
@ 2016-01-05  9:31 ` Chao Yu
  2016-01-05 17:48   ` Jaegeuk Kim
  2016-01-06  4:10 ` [PATCH 1/3 v2] " Jaegeuk Kim
  3 siblings, 1 reply; 12+ messages in thread
From: Chao Yu @ 2016-01-05  9:31 UTC (permalink / raw)
  To: 'Jaegeuk Kim'; +Cc: linux-kernel, linux-fsdevel, linux-f2fs-devel

Hi Jaegeuk,

> -----Original Message-----
> From: Jaegeuk Kim [mailto:jaegeuk@kernel.org]
> Sent: Sunday, January 03, 2016 9:26 AM
> To: linux-kernel@vger.kernel.org; linux-fsdevel@vger.kernel.org;
> linux-f2fs-devel@lists.sourceforge.net
> Cc: Jaegeuk Kim
> Subject: [f2fs-dev] [PATCH 1/3] f2fs: check the page status filled from disk
> 
> After reading a page, we need to check whether there is any error.
> 
> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> ---
>  fs/f2fs/data.c | 8 ++++++++
>  1 file changed, 8 insertions(+)
> 
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 89a978c..11b2111 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -448,6 +448,14 @@ repeat:
> 
>  		/* wait for read completion */
>  		lock_page(page);
> +		if (unlikely(!PageUptodate(page))) {
> +			f2fs_put_page(page, 1);
> +			return ERR_PTR(-EIO);

There is a convention in get_new_data_page, anyway we should release ipage
if there is any error occurs, but I think it will be ok to return directly
since it seems impossible the new dentry page has its real block address.

To avoid any bug here or wrong usage, how about add bug_on as following patch?

>From d92f0f34493b27ef28da67c446d552ce721b5d6f Mon Sep 17 00:00:00 2001
From: Chao Yu <chao2.yu@samsung.com>
Date: Tue, 5 Jan 2016 15:28:56 +0800
Subject: [PATCH] f2fs: add f2fs_bug_on in get_new_data_page

In get_new_data_page, locked inode page should not be hold before
get_read_data_page, this patch adds f2fs_bug_on to detect this
condition.

Signed-off-by: Chao Yu <chao2.yu@samsung.com>
---
 fs/f2fs/data.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 48f0bd3..2c5e3f6 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -440,6 +440,8 @@ repeat:
 		zero_user_segment(page, 0, PAGE_CACHE_SIZE);
 		SetPageUptodate(page);
 	} else {
+		f2fs_bug_on(F2FS_I_SB(inode), ipage);
+
 		f2fs_put_page(page, 1);
 
 		page = get_read_data_page(inode, index, READ_SYNC, true);
-- 
2.6.3


> +		}
> +		if (unlikely(page->mapping != mapping)) {
> +			f2fs_put_page(page, 1);
> +			goto repeat;
> +		}

How about use get_lock_data_page to avoid duplicated code?

>  	}
>  got_it:
>  	if (new_i_size && i_size_read(inode) <
> --
> 2.6.3
> 
> 
> ------------------------------------------------------------------------------
> _______________________________________________
> Linux-f2fs-devel mailing list
> Linux-f2fs-devel@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* RE: [f2fs-dev] [PATCH 2/3] f2fs: cover more area with nat_tree_lock
  2016-01-03  1:26 ` [PATCH 2/3] f2fs: cover more area with nat_tree_lock Jaegeuk Kim
@ 2016-01-05  9:33   ` Chao Yu
  2016-01-05 17:57     ` Jaegeuk Kim
  0 siblings, 1 reply; 12+ messages in thread
From: Chao Yu @ 2016-01-05  9:33 UTC (permalink / raw)
  To: 'Jaegeuk Kim'; +Cc: linux-kernel, linux-fsdevel, linux-f2fs-devel

Hi Jaegeuk,

> -----Original Message-----
> From: Jaegeuk Kim [mailto:jaegeuk@kernel.org]
> Sent: Sunday, January 03, 2016 9:26 AM
> To: linux-kernel@vger.kernel.org; linux-fsdevel@vger.kernel.org;
> linux-f2fs-devel@lists.sourceforge.net
> Cc: Jaegeuk Kim
> Subject: [f2fs-dev] [PATCH 2/3] f2fs: cover more area with nat_tree_lock
> 
> There was a subtle bug on nat cache management which incurs wrong nid allocation
> or wrong block addresses when try_to_free_nats is triggered heavily.
> This patch enlarges the previous coverage of nat_tree_lock to avoid data race.

Have you figured out how this happen? I'm curious about this issue,
since still I can't reproduce it and find any clue by reviewing code
so far.

Thanks,

> 
> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> ---
>  fs/f2fs/node.c | 29 ++++++++++++-----------------
>  1 file changed, 12 insertions(+), 17 deletions(-)
> 
> diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
> index 669c44e..4dab09f 100644
> --- a/fs/f2fs/node.c
> +++ b/fs/f2fs/node.c
> @@ -262,13 +262,11 @@ static void cache_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid,
>  {
>  	struct nat_entry *e;
> 
> -	down_write(&nm_i->nat_tree_lock);
>  	e = __lookup_nat_cache(nm_i, nid);
>  	if (!e) {
>  		e = grab_nat_entry(nm_i, nid);
>  		node_info_from_raw_nat(&e->ni, ne);
>  	}
> -	up_write(&nm_i->nat_tree_lock);
>  }
> 
>  static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
> @@ -380,6 +378,8 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info
> *ni)
> 
>  	memset(&ne, 0, sizeof(struct f2fs_nat_entry));
> 
> +	down_write(&nm_i->nat_tree_lock);
> +
>  	/* Check current segment summary */
>  	mutex_lock(&curseg->curseg_mutex);
>  	i = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 0);
> @@ -400,6 +400,7 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info
> *ni)
>  cache:
>  	/* cache nat entry */
>  	cache_nat_entry(NM_I(sbi), nid, &ne);
> +	up_write(&nm_i->nat_tree_lock);
>  }
> 
>  /*
> @@ -1459,13 +1460,10 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
> 
>  	if (build) {
>  		/* do not add allocated nids */
> -		down_read(&nm_i->nat_tree_lock);
>  		ne = __lookup_nat_cache(nm_i, nid);
> -		if (ne &&
> -			(!get_nat_flag(ne, IS_CHECKPOINTED) ||
> +		if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) ||
>  				nat_get_blkaddr(ne) != NULL_ADDR))
>  			allocated = true;
> -		up_read(&nm_i->nat_tree_lock);
>  		if (allocated)
>  			return 0;
>  	}
> @@ -1551,6 +1549,8 @@ static void build_free_nids(struct f2fs_sb_info *sbi)
>  	ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
>  							META_NAT, true);
> 
> +	down_read(&nm_i->nat_tree_lock);
> +
>  	while (1) {
>  		struct page *page = get_current_nat_page(sbi, nid);
> 
> @@ -1579,6 +1579,7 @@ static void build_free_nids(struct f2fs_sb_info *sbi)
>  			remove_free_nid(nm_i, nid);
>  	}
>  	mutex_unlock(&curseg->curseg_mutex);
> +	up_read(&nm_i->nat_tree_lock);
> 
>  	ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid),
>  					nm_i->ra_nid_pages, META_NAT, false);
> @@ -1861,14 +1862,12 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
> 
>  		raw_ne = nat_in_journal(sum, i);
> 
> -		down_write(&nm_i->nat_tree_lock);
>  		ne = __lookup_nat_cache(nm_i, nid);
>  		if (!ne) {
>  			ne = grab_nat_entry(nm_i, nid);
>  			node_info_from_raw_nat(&ne->ni, &raw_ne);
>  		}
>  		__set_nat_cache_dirty(nm_i, ne);
> -		up_write(&nm_i->nat_tree_lock);
>  	}
>  	update_nats_in_cursum(sum, -i);
>  	mutex_unlock(&curseg->curseg_mutex);
> @@ -1902,7 +1901,6 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
>  	struct f2fs_nat_block *nat_blk;
>  	struct nat_entry *ne, *cur;
>  	struct page *page = NULL;
> -	struct f2fs_nm_info *nm_i = NM_I(sbi);
> 
>  	/*
>  	 * there are two steps to flush nat entries:
> @@ -1939,12 +1937,8 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
>  			raw_ne = &nat_blk->entries[nid - start_nid];
>  		}
>  		raw_nat_from_node_info(raw_ne, &ne->ni);
> -
> -		down_write(&NM_I(sbi)->nat_tree_lock);
>  		nat_reset_flag(ne);
>  		__clear_nat_cache_dirty(NM_I(sbi), ne);
> -		up_write(&NM_I(sbi)->nat_tree_lock);
> -
>  		if (nat_get_blkaddr(ne) == NULL_ADDR)
>  			add_free_nid(sbi, nid, false);
>  	}
> @@ -1956,9 +1950,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
> 
>  	f2fs_bug_on(sbi, set->entry_cnt);
> 
> -	down_write(&nm_i->nat_tree_lock);
>  	radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set);
> -	up_write(&nm_i->nat_tree_lock);
>  	kmem_cache_free(nat_entry_set_slab, set);
>  }
> 
> @@ -1978,6 +1970,9 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
> 
>  	if (!nm_i->dirty_nat_cnt)
>  		return;
> +
> +	down_write(&nm_i->nat_tree_lock);
> +
>  	/*
>  	 * if there are no enough space in journal to store dirty nat
>  	 * entries, remove all entries from journal and merge them
> @@ -1986,7 +1981,6 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
>  	if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt, NAT_JOURNAL))
>  		remove_nats_in_journal(sbi);
> 
> -	down_write(&nm_i->nat_tree_lock);
>  	while ((found = __gang_lookup_nat_set(nm_i,
>  					set_idx, SETVEC_SIZE, setvec))) {
>  		unsigned idx;
> @@ -1995,12 +1989,13 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
>  			__adjust_nat_entry_set(setvec[idx], &sets,
>  							MAX_NAT_JENTRIES(sum));
>  	}
> -	up_write(&nm_i->nat_tree_lock);
> 
>  	/* flush dirty nats in nat entry set */
>  	list_for_each_entry_safe(set, tmp, &sets, set_list)
>  		__flush_nat_entry_set(sbi, set);
> 
> +	up_write(&nm_i->nat_tree_lock);
> +
>  	f2fs_bug_on(sbi, nm_i->dirty_nat_cnt);
>  }
> 
> --
> 2.6.3
> 
> 
> ------------------------------------------------------------------------------
> _______________________________________________
> Linux-f2fs-devel mailing list
> Linux-f2fs-devel@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [f2fs-dev] [PATCH 1/3] f2fs: check the page status filled from disk
  2016-01-05  9:31 ` [f2fs-dev] [PATCH 1/3] f2fs: check the page status filled from disk Chao Yu
@ 2016-01-05 17:48   ` Jaegeuk Kim
  2016-01-06  1:21     ` Chao Yu
  0 siblings, 1 reply; 12+ messages in thread
From: Jaegeuk Kim @ 2016-01-05 17:48 UTC (permalink / raw)
  To: Chao Yu; +Cc: linux-kernel, linux-fsdevel, linux-f2fs-devel

Hi Chao,

On Tue, Jan 05, 2016 at 05:31:51PM +0800, Chao Yu wrote:
> Hi Jaegeuk,
> 
> > -----Original Message-----
> > From: Jaegeuk Kim [mailto:jaegeuk@kernel.org]
> > Sent: Sunday, January 03, 2016 9:26 AM
> > To: linux-kernel@vger.kernel.org; linux-fsdevel@vger.kernel.org;
> > linux-f2fs-devel@lists.sourceforge.net
> > Cc: Jaegeuk Kim
> > Subject: [f2fs-dev] [PATCH 1/3] f2fs: check the page status filled from disk
> > 
> > After reading a page, we need to check whether there is any error.
> > 
> > Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> > ---
> >  fs/f2fs/data.c | 8 ++++++++
> >  1 file changed, 8 insertions(+)
> > 
> > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> > index 89a978c..11b2111 100644
> > --- a/fs/f2fs/data.c
> > +++ b/fs/f2fs/data.c
> > @@ -448,6 +448,14 @@ repeat:
> > 
> >  		/* wait for read completion */
> >  		lock_page(page);
> > +		if (unlikely(!PageUptodate(page))) {
> > +			f2fs_put_page(page, 1);
> > +			return ERR_PTR(-EIO);
> 
> There is a convention in get_new_data_page, anyway we should release ipage
> if there is any error occurs, but I think it will be ok to return directly
> since it seems impossible the new dentry page has its real block address.

Makes sense, but definitely ipage should be put. :)

> 
> To avoid any bug here or wrong usage, how about add bug_on as following patch?
> 
> >From d92f0f34493b27ef28da67c446d552ce721b5d6f Mon Sep 17 00:00:00 2001
> From: Chao Yu <chao2.yu@samsung.com>
> Date: Tue, 5 Jan 2016 15:28:56 +0800
> Subject: [PATCH] f2fs: add f2fs_bug_on in get_new_data_page
> 
> In get_new_data_page, locked inode page should not be hold before
> get_read_data_page, this patch adds f2fs_bug_on to detect this
> condition.
> 
> Signed-off-by: Chao Yu <chao2.yu@samsung.com>
> ---
>  fs/f2fs/data.c | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 48f0bd3..2c5e3f6 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -440,6 +440,8 @@ repeat:
>  		zero_user_segment(page, 0, PAGE_CACHE_SIZE);
>  		SetPageUptodate(page);
>  	} else {
> +		f2fs_bug_on(F2FS_I_SB(inode), ipage);
> +
>  		f2fs_put_page(page, 1);
>  
>  		page = get_read_data_page(inode, index, READ_SYNC, true);
> -- 
> 2.6.3
> 
> 
> > +		}
> > +		if (unlikely(page->mapping != mapping)) {
> > +			f2fs_put_page(page, 1);
> > +			goto repeat;
> > +		}
> 
> How about use get_lock_data_page to avoid duplicated code?

Agreed.

How about this?

>From fef77fb244a706491e8e4c46cb245e99e22003c3 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Fri, 1 Jan 2016 22:03:47 -0800
Subject: [PATCH] f2fs: check the page status filled from disk

After reading a page, we need to check whether there is any error.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/data.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 89a978c..89d633a 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -442,12 +442,16 @@ repeat:
 	} else {
 		f2fs_put_page(page, 1);
 
-		page = get_read_data_page(inode, index, READ_SYNC, true);
-		if (IS_ERR(page))
-			goto repeat;
+		f2fs_bug_on(F2FS_I_SB(inode), ipage);
 
-		/* wait for read completion */
-		lock_page(page);
+		page = get_lock_data_page(inode, index, true);
+		if (IS_ERR(page)) {
+			if (PTR_ERR(page) == -EIO) {
+				f2fs_put_page(ipage, 1);
+				return page;
+			}
+			goto repeat;
+		}
 	}
 got_it:
 	if (new_i_size && i_size_read(inode) <
-- 
2.6.3


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [f2fs-dev] [PATCH 2/3] f2fs: cover more area with nat_tree_lock
  2016-01-05  9:33   ` [f2fs-dev] " Chao Yu
@ 2016-01-05 17:57     ` Jaegeuk Kim
  2016-01-06  3:57       ` Chao Yu
  0 siblings, 1 reply; 12+ messages in thread
From: Jaegeuk Kim @ 2016-01-05 17:57 UTC (permalink / raw)
  To: Chao Yu; +Cc: linux-kernel, linux-fsdevel, linux-f2fs-devel

Hi Chao,

On Tue, Jan 05, 2016 at 05:33:31PM +0800, Chao Yu wrote:
> Hi Jaegeuk,
> 
> > -----Original Message-----
> > From: Jaegeuk Kim [mailto:jaegeuk@kernel.org]
> > Sent: Sunday, January 03, 2016 9:26 AM
> > To: linux-kernel@vger.kernel.org; linux-fsdevel@vger.kernel.org;
> > linux-f2fs-devel@lists.sourceforge.net
> > Cc: Jaegeuk Kim
> > Subject: [f2fs-dev] [PATCH 2/3] f2fs: cover more area with nat_tree_lock
> > 
> > There was a subtle bug on nat cache management which incurs wrong nid allocation
> > or wrong block addresses when try_to_free_nats is triggered heavily.
> > This patch enlarges the previous coverage of nat_tree_lock to avoid data race.
> 
> Have you figured out how this happen? I'm curious about this issue,
> since still I can't reproduce it and find any clue by reviewing code
> so far.

It's very very subtle bug. I got one panic after 2-days fsstress execution with
flushing caches very frequently.
The possible scenario was to mix a lot of directory operations, f2fs's shrinking
path, and fsyncing files.

And, the suspicious functions are
 - try_to_free_nats,
 - f2fs_find_entry->get_node_page->get_node_info->cache_nat_entry
 - fsync->checkpoint->flush_nat_entries
 - build_free_nids

I guess there is somewhat data race to grab and release nat cache entries when
flush_nat_entries is doing, since f2fs_find_entry is not covered by
f2fs_lock_op.

A good thing is that, with this patch, I couldn't get any bug again for 4 days;
still running tho.

Nevertheless, I couldn't describe this precisely, since I couldn't specify the
real root-cause.

Thanks,

> 
> Thanks,
> 
> > 
> > Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> > ---
> >  fs/f2fs/node.c | 29 ++++++++++++-----------------
> >  1 file changed, 12 insertions(+), 17 deletions(-)
> > 
> > diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
> > index 669c44e..4dab09f 100644
> > --- a/fs/f2fs/node.c
> > +++ b/fs/f2fs/node.c
> > @@ -262,13 +262,11 @@ static void cache_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid,
> >  {
> >  	struct nat_entry *e;
> > 
> > -	down_write(&nm_i->nat_tree_lock);
> >  	e = __lookup_nat_cache(nm_i, nid);
> >  	if (!e) {
> >  		e = grab_nat_entry(nm_i, nid);
> >  		node_info_from_raw_nat(&e->ni, ne);
> >  	}
> > -	up_write(&nm_i->nat_tree_lock);
> >  }
> > 
> >  static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
> > @@ -380,6 +378,8 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info
> > *ni)
> > 
> >  	memset(&ne, 0, sizeof(struct f2fs_nat_entry));
> > 
> > +	down_write(&nm_i->nat_tree_lock);
> > +
> >  	/* Check current segment summary */
> >  	mutex_lock(&curseg->curseg_mutex);
> >  	i = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 0);
> > @@ -400,6 +400,7 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info
> > *ni)
> >  cache:
> >  	/* cache nat entry */
> >  	cache_nat_entry(NM_I(sbi), nid, &ne);
> > +	up_write(&nm_i->nat_tree_lock);
> >  }
> > 
> >  /*
> > @@ -1459,13 +1460,10 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
> > 
> >  	if (build) {
> >  		/* do not add allocated nids */
> > -		down_read(&nm_i->nat_tree_lock);
> >  		ne = __lookup_nat_cache(nm_i, nid);
> > -		if (ne &&
> > -			(!get_nat_flag(ne, IS_CHECKPOINTED) ||
> > +		if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) ||
> >  				nat_get_blkaddr(ne) != NULL_ADDR))
> >  			allocated = true;
> > -		up_read(&nm_i->nat_tree_lock);
> >  		if (allocated)
> >  			return 0;
> >  	}
> > @@ -1551,6 +1549,8 @@ static void build_free_nids(struct f2fs_sb_info *sbi)
> >  	ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
> >  							META_NAT, true);
> > 
> > +	down_read(&nm_i->nat_tree_lock);
> > +
> >  	while (1) {
> >  		struct page *page = get_current_nat_page(sbi, nid);
> > 
> > @@ -1579,6 +1579,7 @@ static void build_free_nids(struct f2fs_sb_info *sbi)
> >  			remove_free_nid(nm_i, nid);
> >  	}
> >  	mutex_unlock(&curseg->curseg_mutex);
> > +	up_read(&nm_i->nat_tree_lock);
> > 
> >  	ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid),
> >  					nm_i->ra_nid_pages, META_NAT, false);
> > @@ -1861,14 +1862,12 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
> > 
> >  		raw_ne = nat_in_journal(sum, i);
> > 
> > -		down_write(&nm_i->nat_tree_lock);
> >  		ne = __lookup_nat_cache(nm_i, nid);
> >  		if (!ne) {
> >  			ne = grab_nat_entry(nm_i, nid);
> >  			node_info_from_raw_nat(&ne->ni, &raw_ne);
> >  		}
> >  		__set_nat_cache_dirty(nm_i, ne);
> > -		up_write(&nm_i->nat_tree_lock);
> >  	}
> >  	update_nats_in_cursum(sum, -i);
> >  	mutex_unlock(&curseg->curseg_mutex);
> > @@ -1902,7 +1901,6 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
> >  	struct f2fs_nat_block *nat_blk;
> >  	struct nat_entry *ne, *cur;
> >  	struct page *page = NULL;
> > -	struct f2fs_nm_info *nm_i = NM_I(sbi);
> > 
> >  	/*
> >  	 * there are two steps to flush nat entries:
> > @@ -1939,12 +1937,8 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
> >  			raw_ne = &nat_blk->entries[nid - start_nid];
> >  		}
> >  		raw_nat_from_node_info(raw_ne, &ne->ni);
> > -
> > -		down_write(&NM_I(sbi)->nat_tree_lock);
> >  		nat_reset_flag(ne);
> >  		__clear_nat_cache_dirty(NM_I(sbi), ne);
> > -		up_write(&NM_I(sbi)->nat_tree_lock);
> > -
> >  		if (nat_get_blkaddr(ne) == NULL_ADDR)
> >  			add_free_nid(sbi, nid, false);
> >  	}
> > @@ -1956,9 +1950,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
> > 
> >  	f2fs_bug_on(sbi, set->entry_cnt);
> > 
> > -	down_write(&nm_i->nat_tree_lock);
> >  	radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set);
> > -	up_write(&nm_i->nat_tree_lock);
> >  	kmem_cache_free(nat_entry_set_slab, set);
> >  }
> > 
> > @@ -1978,6 +1970,9 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
> > 
> >  	if (!nm_i->dirty_nat_cnt)
> >  		return;
> > +
> > +	down_write(&nm_i->nat_tree_lock);
> > +
> >  	/*
> >  	 * if there are no enough space in journal to store dirty nat
> >  	 * entries, remove all entries from journal and merge them
> > @@ -1986,7 +1981,6 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
> >  	if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt, NAT_JOURNAL))
> >  		remove_nats_in_journal(sbi);
> > 
> > -	down_write(&nm_i->nat_tree_lock);
> >  	while ((found = __gang_lookup_nat_set(nm_i,
> >  					set_idx, SETVEC_SIZE, setvec))) {
> >  		unsigned idx;
> > @@ -1995,12 +1989,13 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
> >  			__adjust_nat_entry_set(setvec[idx], &sets,
> >  							MAX_NAT_JENTRIES(sum));
> >  	}
> > -	up_write(&nm_i->nat_tree_lock);
> > 
> >  	/* flush dirty nats in nat entry set */
> >  	list_for_each_entry_safe(set, tmp, &sets, set_list)
> >  		__flush_nat_entry_set(sbi, set);
> > 
> > +	up_write(&nm_i->nat_tree_lock);
> > +
> >  	f2fs_bug_on(sbi, nm_i->dirty_nat_cnt);
> >  }
> > 
> > --
> > 2.6.3
> > 
> > 
> > ------------------------------------------------------------------------------
> > _______________________________________________
> > Linux-f2fs-devel mailing list
> > Linux-f2fs-devel@lists.sourceforge.net
> > https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

^ permalink raw reply	[flat|nested] 12+ messages in thread

* RE: [f2fs-dev] [PATCH 1/3] f2fs: check the page status filled from disk
  2016-01-05 17:48   ` Jaegeuk Kim
@ 2016-01-06  1:21     ` Chao Yu
  2016-01-06  2:30       ` Jaegeuk Kim
  0 siblings, 1 reply; 12+ messages in thread
From: Chao Yu @ 2016-01-06  1:21 UTC (permalink / raw)
  To: 'Jaegeuk Kim'; +Cc: linux-kernel, linux-fsdevel, linux-f2fs-devel

Hi Jaegeuk,

> -----Original Message-----
> From: Jaegeuk Kim [mailto:jaegeuk@kernel.org]
> Sent: Wednesday, January 06, 2016 1:49 AM
> To: Chao Yu
> Cc: linux-kernel@vger.kernel.org; linux-fsdevel@vger.kernel.org;
> linux-f2fs-devel@lists.sourceforge.net
> Subject: Re: [f2fs-dev] [PATCH 1/3] f2fs: check the page status filled from disk
> 
> Hi Chao,
> 
> On Tue, Jan 05, 2016 at 05:31:51PM +0800, Chao Yu wrote:
> > Hi Jaegeuk,
> >
> > > -----Original Message-----
> > > From: Jaegeuk Kim [mailto:jaegeuk@kernel.org]
> > > Sent: Sunday, January 03, 2016 9:26 AM
> > > To: linux-kernel@vger.kernel.org; linux-fsdevel@vger.kernel.org;
> > > linux-f2fs-devel@lists.sourceforge.net
> > > Cc: Jaegeuk Kim
> > > Subject: [f2fs-dev] [PATCH 1/3] f2fs: check the page status filled from disk
> > >
> > > After reading a page, we need to check whether there is any error.
> > >
> > > Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> > > ---
> > >  fs/f2fs/data.c | 8 ++++++++
> > >  1 file changed, 8 insertions(+)
> > >
> > > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> > > index 89a978c..11b2111 100644
> > > --- a/fs/f2fs/data.c
> > > +++ b/fs/f2fs/data.c
> > > @@ -448,6 +448,14 @@ repeat:
> > >
> > >  		/* wait for read completion */
> > >  		lock_page(page);
> > > +		if (unlikely(!PageUptodate(page))) {
> > > +			f2fs_put_page(page, 1);
> > > +			return ERR_PTR(-EIO);
> >
> > There is a convention in get_new_data_page, anyway we should release ipage
> > if there is any error occurs, but I think it will be ok to return directly
> > since it seems impossible the new dentry page has its real block address.
> 
> Makes sense, but definitely ipage should be put. :)

Alright. :)

> 
> >
> > To avoid any bug here or wrong usage, how about add bug_on as following patch?
> >
> > >From d92f0f34493b27ef28da67c446d552ce721b5d6f Mon Sep 17 00:00:00 2001
> > From: Chao Yu <chao2.yu@samsung.com>
> > Date: Tue, 5 Jan 2016 15:28:56 +0800
> > Subject: [PATCH] f2fs: add f2fs_bug_on in get_new_data_page
> >
> > In get_new_data_page, locked inode page should not be hold before
> > get_read_data_page, this patch adds f2fs_bug_on to detect this
> > condition.
> >
> > Signed-off-by: Chao Yu <chao2.yu@samsung.com>
> > ---
> >  fs/f2fs/data.c | 2 ++
> >  1 file changed, 2 insertions(+)
> >
> > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> > index 48f0bd3..2c5e3f6 100644
> > --- a/fs/f2fs/data.c
> > +++ b/fs/f2fs/data.c
> > @@ -440,6 +440,8 @@ repeat:
> >  		zero_user_segment(page, 0, PAGE_CACHE_SIZE);
> >  		SetPageUptodate(page);
> >  	} else {
> > +		f2fs_bug_on(F2FS_I_SB(inode), ipage);
> > +
> >  		f2fs_put_page(page, 1);
> >
> >  		page = get_read_data_page(inode, index, READ_SYNC, true);
> > --
> > 2.6.3
> >
> >
> > > +		}
> > > +		if (unlikely(page->mapping != mapping)) {
> > > +			f2fs_put_page(page, 1);
> > > +			goto repeat;
> > > +		}
> >
> > How about use get_lock_data_page to avoid duplicated code?
> 
> Agreed.
> 
> How about this?
> 
> From fef77fb244a706491e8e4c46cb245e99e22003c3 Mon Sep 17 00:00:00 2001
> From: Jaegeuk Kim <jaegeuk@kernel.org>
> Date: Fri, 1 Jan 2016 22:03:47 -0800
> Subject: [PATCH] f2fs: check the page status filled from disk
> 
> After reading a page, we need to check whether there is any error.
> 
> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> ---
>  fs/f2fs/data.c | 14 +++++++++-----
>  1 file changed, 9 insertions(+), 5 deletions(-)
> 
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 89a978c..89d633a 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -442,12 +442,16 @@ repeat:
>  	} else {
>  		f2fs_put_page(page, 1);
> 
> -		page = get_read_data_page(inode, index, READ_SYNC, true);
> -		if (IS_ERR(page))
> -			goto repeat;
> +		f2fs_bug_on(F2FS_I_SB(inode), ipage);
> 
> -		/* wait for read completion */
> -		lock_page(page);
> +		page = get_lock_data_page(inode, index, true);
> +		if (IS_ERR(page)) {
> +			if (PTR_ERR(page) == -EIO) {
> +				f2fs_put_page(ipage, 1);
> +				return page;
> +			}
> +			goto repeat;

Seems if get_lock_data_page always return -EFAULT, we may run into an
infinite loop. IMO, it's not a bad thing to tolerate other error more
than EIO returned from get_lock_data_page. How about return directly
when error is returned? And add a bug_on for ENOENT which seems not
impossible here?

Thanks,

> +		}
>  	}
>  got_it:
>  	if (new_i_size && i_size_read(inode) <
> --
> 2.6.3



^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [f2fs-dev] [PATCH 1/3] f2fs: check the page status filled from disk
  2016-01-06  1:21     ` Chao Yu
@ 2016-01-06  2:30       ` Jaegeuk Kim
  0 siblings, 0 replies; 12+ messages in thread
From: Jaegeuk Kim @ 2016-01-06  2:30 UTC (permalink / raw)
  To: Chao Yu; +Cc: linux-kernel, linux-fsdevel, linux-f2fs-devel

Hi,

On Wed, Jan 06, 2016 at 09:21:29AM +0800, Chao Yu wrote:
> Hi Jaegeuk,
> 
> > -----Original Message-----
> > From: Jaegeuk Kim [mailto:jaegeuk@kernel.org]
> > Sent: Wednesday, January 06, 2016 1:49 AM
> > To: Chao Yu
> > Cc: linux-kernel@vger.kernel.org; linux-fsdevel@vger.kernel.org;
> > linux-f2fs-devel@lists.sourceforge.net
> > Subject: Re: [f2fs-dev] [PATCH 1/3] f2fs: check the page status filled from disk
> > 
> > Hi Chao,
> > 
> > On Tue, Jan 05, 2016 at 05:31:51PM +0800, Chao Yu wrote:
> > > Hi Jaegeuk,
> > >
> > > > -----Original Message-----
> > > > From: Jaegeuk Kim [mailto:jaegeuk@kernel.org]
> > > > Sent: Sunday, January 03, 2016 9:26 AM
> > > > To: linux-kernel@vger.kernel.org; linux-fsdevel@vger.kernel.org;
> > > > linux-f2fs-devel@lists.sourceforge.net
> > > > Cc: Jaegeuk Kim
> > > > Subject: [f2fs-dev] [PATCH 1/3] f2fs: check the page status filled from disk
> > > >
> > > > After reading a page, we need to check whether there is any error.
> > > >
> > > > Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> > > > ---
> > > >  fs/f2fs/data.c | 8 ++++++++
> > > >  1 file changed, 8 insertions(+)
> > > >
> > > > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> > > > index 89a978c..11b2111 100644
> > > > --- a/fs/f2fs/data.c
> > > > +++ b/fs/f2fs/data.c
> > > > @@ -448,6 +448,14 @@ repeat:
> > > >
> > > >  		/* wait for read completion */
> > > >  		lock_page(page);
> > > > +		if (unlikely(!PageUptodate(page))) {
> > > > +			f2fs_put_page(page, 1);
> > > > +			return ERR_PTR(-EIO);
> > >
> > > There is a convention in get_new_data_page, anyway we should release ipage
> > > if there is any error occurs, but I think it will be ok to return directly
> > > since it seems impossible the new dentry page has its real block address.
> > 
> > Makes sense, but definitely ipage should be put. :)
> 
> Alright. :)
> 
> > 
> > >
> > > To avoid any bug here or wrong usage, how about add bug_on as following patch?
> > >
> > > >From d92f0f34493b27ef28da67c446d552ce721b5d6f Mon Sep 17 00:00:00 2001
> > > From: Chao Yu <chao2.yu@samsung.com>
> > > Date: Tue, 5 Jan 2016 15:28:56 +0800
> > > Subject: [PATCH] f2fs: add f2fs_bug_on in get_new_data_page
> > >
> > > In get_new_data_page, locked inode page should not be hold before
> > > get_read_data_page, this patch adds f2fs_bug_on to detect this
> > > condition.
> > >
> > > Signed-off-by: Chao Yu <chao2.yu@samsung.com>
> > > ---
> > >  fs/f2fs/data.c | 2 ++
> > >  1 file changed, 2 insertions(+)
> > >
> > > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> > > index 48f0bd3..2c5e3f6 100644
> > > --- a/fs/f2fs/data.c
> > > +++ b/fs/f2fs/data.c
> > > @@ -440,6 +440,8 @@ repeat:
> > >  		zero_user_segment(page, 0, PAGE_CACHE_SIZE);
> > >  		SetPageUptodate(page);
> > >  	} else {
> > > +		f2fs_bug_on(F2FS_I_SB(inode), ipage);
> > > +
> > >  		f2fs_put_page(page, 1);
> > >
> > >  		page = get_read_data_page(inode, index, READ_SYNC, true);
> > > --
> > > 2.6.3
> > >
> > >
> > > > +		}
> > > > +		if (unlikely(page->mapping != mapping)) {
> > > > +			f2fs_put_page(page, 1);
> > > > +			goto repeat;
> > > > +		}
> > >
> > > How about use get_lock_data_page to avoid duplicated code?
> > 
> > Agreed.
> > 
> > How about this?
> > 
> > From fef77fb244a706491e8e4c46cb245e99e22003c3 Mon Sep 17 00:00:00 2001
> > From: Jaegeuk Kim <jaegeuk@kernel.org>
> > Date: Fri, 1 Jan 2016 22:03:47 -0800
> > Subject: [PATCH] f2fs: check the page status filled from disk
> > 
> > After reading a page, we need to check whether there is any error.
> > 
> > Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> > ---
> >  fs/f2fs/data.c | 14 +++++++++-----
> >  1 file changed, 9 insertions(+), 5 deletions(-)
> > 
> > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> > index 89a978c..89d633a 100644
> > --- a/fs/f2fs/data.c
> > +++ b/fs/f2fs/data.c
> > @@ -442,12 +442,16 @@ repeat:
> >  	} else {
> >  		f2fs_put_page(page, 1);
> > 
> > -		page = get_read_data_page(inode, index, READ_SYNC, true);
> > -		if (IS_ERR(page))
> > -			goto repeat;
> > +		f2fs_bug_on(F2FS_I_SB(inode), ipage);
> > 
> > -		/* wait for read completion */
> > -		lock_page(page);
> > +		page = get_lock_data_page(inode, index, true);
> > +		if (IS_ERR(page)) {
> > +			if (PTR_ERR(page) == -EIO) {
> > +				f2fs_put_page(ipage, 1);
> > +				return page;
> > +			}
> > +			goto repeat;
> 
> Seems if get_lock_data_page always return -EFAULT, we may run into an
> infinite loop. IMO, it's not a bad thing to tolerate other error more
> than EIO returned from get_lock_data_page. How about return directly
> when error is returned? And add a bug_on for ENOENT which seems not
> impossible here?

Hmm. I can only expect EIO, ENOMEM, and ENOENT.
What condition can we get EFAULT?

Thanks,

> 
> Thanks,
> 
> > +		}
> >  	}
> >  got_it:
> >  	if (new_i_size && i_size_read(inode) <
> > --
> > 2.6.3
> 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* RE: [f2fs-dev] [PATCH 2/3] f2fs: cover more area with nat_tree_lock
  2016-01-05 17:57     ` Jaegeuk Kim
@ 2016-01-06  3:57       ` Chao Yu
  0 siblings, 0 replies; 12+ messages in thread
From: Chao Yu @ 2016-01-06  3:57 UTC (permalink / raw)
  To: 'Jaegeuk Kim'; +Cc: linux-kernel, linux-fsdevel, linux-f2fs-devel

Hi Jaegeuk,

> -----Original Message-----
> From: Jaegeuk Kim [mailto:jaegeuk@kernel.org]
> Sent: Wednesday, January 06, 2016 1:58 AM
> To: Chao Yu
> Cc: linux-kernel@vger.kernel.org; linux-fsdevel@vger.kernel.org;
> linux-f2fs-devel@lists.sourceforge.net
> Subject: Re: [f2fs-dev] [PATCH 2/3] f2fs: cover more area with nat_tree_lock
> 
> Hi Chao,
> 
> On Tue, Jan 05, 2016 at 05:33:31PM +0800, Chao Yu wrote:
> > Hi Jaegeuk,
> >
> > > -----Original Message-----
> > > From: Jaegeuk Kim [mailto:jaegeuk@kernel.org]
> > > Sent: Sunday, January 03, 2016 9:26 AM
> > > To: linux-kernel@vger.kernel.org; linux-fsdevel@vger.kernel.org;
> > > linux-f2fs-devel@lists.sourceforge.net
> > > Cc: Jaegeuk Kim
> > > Subject: [f2fs-dev] [PATCH 2/3] f2fs: cover more area with nat_tree_lock
> > >
> > > There was a subtle bug on nat cache management which incurs wrong nid allocation
> > > or wrong block addresses when try_to_free_nats is triggered heavily.
> > > This patch enlarges the previous coverage of nat_tree_lock to avoid data race.
> >
> > Have you figured out how this happen? I'm curious about this issue,
> > since still I can't reproduce it and find any clue by reviewing code
> > so far.
> 
> It's very very subtle bug. I got one panic after 2-days fsstress execution with
> flushing caches very frequently.
> The possible scenario was to mix a lot of directory operations, f2fs's shrinking
> path, and fsyncing files.
> 
> And, the suspicious functions are
>  - try_to_free_nats,
>  - f2fs_find_entry->get_node_page->get_node_info->cache_nat_entry
>  - fsync->checkpoint->flush_nat_entries
>  - build_free_nids

Thanks for your detailed explanation and clues. :)

After I investigate just now, I found one possible case:

- get_node_info
  1. miss cache in nat cache, because valid & dirty nat entry is exist in
     journal cache of curseg
  2. miss cache in journal cache of curseg, because concurrent checkpoint
     merge journal cache into nat cache before flushing.
  3. checkpoint flush valid nat entry (blkaddr is NULL) from nat cache
     to journal cache, and add it to free nid cache.
  4. try_to_free_nats free the clean nat entry in cache.
  5. get old nat entry(blkaddr is non-NULL, last valid one is store in
     journal of curseg) after get_current_nat_page, we update nat cache
     with old nat entry.

Is that right?

Thanks,

> 
> I guess there is somewhat data race to grab and release nat cache entries when
> flush_nat_entries is doing, since f2fs_find_entry is not covered by
> f2fs_lock_op.
> 
> A good thing is that, with this patch, I couldn't get any bug again for 4 days;
> still running tho.
> 
> Nevertheless, I couldn't describe this precisely, since I couldn't specify the
> real root-cause.
> 
> Thanks,
> 
> >
> > Thanks,
> >
> > >
> > > Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> > > ---
> > >  fs/f2fs/node.c | 29 ++++++++++++-----------------
> > >  1 file changed, 12 insertions(+), 17 deletions(-)
> > >
> > > diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
> > > index 669c44e..4dab09f 100644
> > > --- a/fs/f2fs/node.c
> > > +++ b/fs/f2fs/node.c
> > > @@ -262,13 +262,11 @@ static void cache_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid,
> > >  {
> > >  	struct nat_entry *e;
> > >
> > > -	down_write(&nm_i->nat_tree_lock);
> > >  	e = __lookup_nat_cache(nm_i, nid);
> > >  	if (!e) {
> > >  		e = grab_nat_entry(nm_i, nid);
> > >  		node_info_from_raw_nat(&e->ni, ne);
> > >  	}
> > > -	up_write(&nm_i->nat_tree_lock);
> > >  }
> > >
> > >  static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
> > > @@ -380,6 +378,8 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info
> > > *ni)
> > >
> > >  	memset(&ne, 0, sizeof(struct f2fs_nat_entry));
> > >
> > > +	down_write(&nm_i->nat_tree_lock);
> > > +
> > >  	/* Check current segment summary */
> > >  	mutex_lock(&curseg->curseg_mutex);
> > >  	i = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 0);
> > > @@ -400,6 +400,7 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info
> > > *ni)
> > >  cache:
> > >  	/* cache nat entry */
> > >  	cache_nat_entry(NM_I(sbi), nid, &ne);
> > > +	up_write(&nm_i->nat_tree_lock);
> > >  }
> > >
> > >  /*
> > > @@ -1459,13 +1460,10 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool
> build)
> > >
> > >  	if (build) {
> > >  		/* do not add allocated nids */
> > > -		down_read(&nm_i->nat_tree_lock);
> > >  		ne = __lookup_nat_cache(nm_i, nid);
> > > -		if (ne &&
> > > -			(!get_nat_flag(ne, IS_CHECKPOINTED) ||
> > > +		if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) ||
> > >  				nat_get_blkaddr(ne) != NULL_ADDR))
> > >  			allocated = true;
> > > -		up_read(&nm_i->nat_tree_lock);
> > >  		if (allocated)
> > >  			return 0;
> > >  	}
> > > @@ -1551,6 +1549,8 @@ static void build_free_nids(struct f2fs_sb_info *sbi)
> > >  	ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
> > >  							META_NAT, true);
> > >
> > > +	down_read(&nm_i->nat_tree_lock);
> > > +
> > >  	while (1) {
> > >  		struct page *page = get_current_nat_page(sbi, nid);
> > >
> > > @@ -1579,6 +1579,7 @@ static void build_free_nids(struct f2fs_sb_info *sbi)
> > >  			remove_free_nid(nm_i, nid);
> > >  	}
> > >  	mutex_unlock(&curseg->curseg_mutex);
> > > +	up_read(&nm_i->nat_tree_lock);
> > >
> > >  	ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid),
> > >  					nm_i->ra_nid_pages, META_NAT, false);
> > > @@ -1861,14 +1862,12 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
> > >
> > >  		raw_ne = nat_in_journal(sum, i);
> > >
> > > -		down_write(&nm_i->nat_tree_lock);
> > >  		ne = __lookup_nat_cache(nm_i, nid);
> > >  		if (!ne) {
> > >  			ne = grab_nat_entry(nm_i, nid);
> > >  			node_info_from_raw_nat(&ne->ni, &raw_ne);
> > >  		}
> > >  		__set_nat_cache_dirty(nm_i, ne);
> > > -		up_write(&nm_i->nat_tree_lock);
> > >  	}
> > >  	update_nats_in_cursum(sum, -i);
> > >  	mutex_unlock(&curseg->curseg_mutex);
> > > @@ -1902,7 +1901,6 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
> > >  	struct f2fs_nat_block *nat_blk;
> > >  	struct nat_entry *ne, *cur;
> > >  	struct page *page = NULL;
> > > -	struct f2fs_nm_info *nm_i = NM_I(sbi);
> > >
> > >  	/*
> > >  	 * there are two steps to flush nat entries:
> > > @@ -1939,12 +1937,8 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
> > >  			raw_ne = &nat_blk->entries[nid - start_nid];
> > >  		}
> > >  		raw_nat_from_node_info(raw_ne, &ne->ni);
> > > -
> > > -		down_write(&NM_I(sbi)->nat_tree_lock);
> > >  		nat_reset_flag(ne);
> > >  		__clear_nat_cache_dirty(NM_I(sbi), ne);
> > > -		up_write(&NM_I(sbi)->nat_tree_lock);
> > > -
> > >  		if (nat_get_blkaddr(ne) == NULL_ADDR)
> > >  			add_free_nid(sbi, nid, false);
> > >  	}
> > > @@ -1956,9 +1950,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
> > >
> > >  	f2fs_bug_on(sbi, set->entry_cnt);
> > >
> > > -	down_write(&nm_i->nat_tree_lock);
> > >  	radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set);
> > > -	up_write(&nm_i->nat_tree_lock);
> > >  	kmem_cache_free(nat_entry_set_slab, set);
> > >  }
> > >
> > > @@ -1978,6 +1970,9 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
> > >
> > >  	if (!nm_i->dirty_nat_cnt)
> > >  		return;
> > > +
> > > +	down_write(&nm_i->nat_tree_lock);
> > > +
> > >  	/*
> > >  	 * if there are no enough space in journal to store dirty nat
> > >  	 * entries, remove all entries from journal and merge them
> > > @@ -1986,7 +1981,6 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
> > >  	if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt, NAT_JOURNAL))
> > >  		remove_nats_in_journal(sbi);
> > >
> > > -	down_write(&nm_i->nat_tree_lock);
> > >  	while ((found = __gang_lookup_nat_set(nm_i,
> > >  					set_idx, SETVEC_SIZE, setvec))) {
> > >  		unsigned idx;
> > > @@ -1995,12 +1989,13 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
> > >  			__adjust_nat_entry_set(setvec[idx], &sets,
> > >  							MAX_NAT_JENTRIES(sum));
> > >  	}
> > > -	up_write(&nm_i->nat_tree_lock);
> > >
> > >  	/* flush dirty nats in nat entry set */
> > >  	list_for_each_entry_safe(set, tmp, &sets, set_list)
> > >  		__flush_nat_entry_set(sbi, set);
> > >
> > > +	up_write(&nm_i->nat_tree_lock);
> > > +
> > >  	f2fs_bug_on(sbi, nm_i->dirty_nat_cnt);
> > >  }
> > >
> > > --
> > > 2.6.3
> > >
> > >
> > > ------------------------------------------------------------------------------
> > > _______________________________________________
> > > Linux-f2fs-devel mailing list
> > > Linux-f2fs-devel@lists.sourceforge.net
> > > https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/3 v2] f2fs: check the page status filled from disk
  2016-01-03  1:26 [PATCH 1/3] f2fs: check the page status filled from disk Jaegeuk Kim
                   ` (2 preceding siblings ...)
  2016-01-05  9:31 ` [f2fs-dev] [PATCH 1/3] f2fs: check the page status filled from disk Chao Yu
@ 2016-01-06  4:10 ` Jaegeuk Kim
  2016-01-06  5:20   ` [f2fs-dev] " Chao Yu
  3 siblings, 1 reply; 12+ messages in thread
From: Jaegeuk Kim @ 2016-01-06  4:10 UTC (permalink / raw)
  To: linux-kernel, linux-fsdevel, linux-f2fs-devel

Change log from v1:
 - use get_lock_data_page and return error, suggested by Chao

>From 4882f227e71b482c53e55a5c3ad559d33f620a20 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Fri, 1 Jan 2016 22:03:47 -0800
Subject: [PATCH] f2fs: check the page status filled from disk

After reading a page, we need to check whether there is any error.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/data.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 89a978c..0bfa02a 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -442,12 +442,11 @@ repeat:
 	} else {
 		f2fs_put_page(page, 1);
 
-		page = get_read_data_page(inode, index, READ_SYNC, true);
+		/* if ipage exists, blkaddr should be NEW_ADDR */
+		f2fs_bug_on(F2FS_I_SB(inode), ipage);
+		page = get_lock_data_page(inode, index, true);
 		if (IS_ERR(page))
-			goto repeat;
-
-		/* wait for read completion */
-		lock_page(page);
+			return page;
 	}
 got_it:
 	if (new_i_size && i_size_read(inode) <
-- 
2.6.3


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* RE: [f2fs-dev] [PATCH 1/3 v2] f2fs: check the page status filled from disk
  2016-01-06  4:10 ` [PATCH 1/3 v2] " Jaegeuk Kim
@ 2016-01-06  5:20   ` Chao Yu
  0 siblings, 0 replies; 12+ messages in thread
From: Chao Yu @ 2016-01-06  5:20 UTC (permalink / raw)
  To: 'Jaegeuk Kim'; +Cc: linux-kernel, linux-fsdevel, linux-f2fs-devel

Hi Jaegeuk,

> -----Original Message-----
> From: Jaegeuk Kim [mailto:jaegeuk@kernel.org]
> Sent: Wednesday, January 06, 2016 12:11 PM
> To: linux-kernel@vger.kernel.org; linux-fsdevel@vger.kernel.org;
> linux-f2fs-devel@lists.sourceforge.net
> Subject: Re: [f2fs-dev] [PATCH 1/3 v2] f2fs: check the page status filled from disk
> 
> Change log from v1:
>  - use get_lock_data_page and return error, suggested by Chao
> 
> >From 4882f227e71b482c53e55a5c3ad559d33f620a20 Mon Sep 17 00:00:00 2001
> From: Jaegeuk Kim <jaegeuk@kernel.org>
> Date: Fri, 1 Jan 2016 22:03:47 -0800
> Subject: [PATCH] f2fs: check the page status filled from disk
> 
> After reading a page, we need to check whether there is any error.
> 
> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>

Looks good to me! :)

Reviewed-by: Chao Yu <chao2.yu@samsung.com>


^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2016-01-06  5:21 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-01-03  1:26 [PATCH 1/3] f2fs: check the page status filled from disk Jaegeuk Kim
2016-01-03  1:26 ` [PATCH 2/3] f2fs: cover more area with nat_tree_lock Jaegeuk Kim
2016-01-05  9:33   ` [f2fs-dev] " Chao Yu
2016-01-05 17:57     ` Jaegeuk Kim
2016-01-06  3:57       ` Chao Yu
2016-01-03  1:26 ` [PATCH 3/3] Revert "f2fs: check the node block address of newly allocated nid" Jaegeuk Kim
2016-01-05  9:31 ` [f2fs-dev] [PATCH 1/3] f2fs: check the page status filled from disk Chao Yu
2016-01-05 17:48   ` Jaegeuk Kim
2016-01-06  1:21     ` Chao Yu
2016-01-06  2:30       ` Jaegeuk Kim
2016-01-06  4:10 ` [PATCH 1/3 v2] " Jaegeuk Kim
2016-01-06  5:20   ` [f2fs-dev] " Chao Yu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).