All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] f2fs: use rwlock instead of rwsem for journal
@ 2021-07-22  1:41 ` Jaegeuk Kim
  0 siblings, 0 replies; 16+ messages in thread
From: Jaegeuk Kim @ 2021-07-22  1:41 UTC (permalink / raw)
  To: linux-kernel, linux-f2fs-devel; +Cc: Jaegeuk Kim

This tries to fix priority inversion in the below condition resulting in
long checkpoint delay.

f2fs_get_node_info()
 - nat_tree_lock
  -> sleep in journal_rwsem
                                     checkpoint
                                     - waiting for nat_tree_lock

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/node.c    | 16 ++++++++--------
 fs/f2fs/segment.c | 22 +++++++++++-----------
 fs/f2fs/segment.h |  2 +-
 3 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 0be9e2d7120e..821a40e02fb4 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -567,13 +567,13 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
 	memset(&ne, 0, sizeof(struct f2fs_nat_entry));
 
 	/* Check current segment summary */
-	down_read(&curseg->journal_rwsem);
+	read_lock(&curseg->journal_rwlock);
 	i = f2fs_lookup_journal_in_cursum(journal, NAT_JOURNAL, nid, 0);
 	if (i >= 0) {
 		ne = nat_in_journal(journal, i);
 		node_info_from_raw_nat(ni, &ne);
 	}
-	up_read(&curseg->journal_rwsem);
+	read_unlock(&curseg->journal_rwlock);
 	if (i >= 0) {
 		up_read(&nm_i->nat_tree_lock);
 		goto cache;
@@ -2338,7 +2338,7 @@ static void scan_curseg_cache(struct f2fs_sb_info *sbi)
 	struct f2fs_journal *journal = curseg->journal;
 	int i;
 
-	down_read(&curseg->journal_rwsem);
+	read_lock(&curseg->journal_rwlock);
 	for (i = 0; i < nats_in_cursum(journal); i++) {
 		block_t addr;
 		nid_t nid;
@@ -2350,7 +2350,7 @@ static void scan_curseg_cache(struct f2fs_sb_info *sbi)
 		else
 			remove_free_nid(sbi, nid);
 	}
-	up_read(&curseg->journal_rwsem);
+	read_unlock(&curseg->journal_rwlock);
 }
 
 static void scan_free_nid_bits(struct f2fs_sb_info *sbi)
@@ -2799,7 +2799,7 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
 	struct f2fs_journal *journal = curseg->journal;
 	int i;
 
-	down_write(&curseg->journal_rwsem);
+	write_lock(&curseg->journal_rwlock);
 	for (i = 0; i < nats_in_cursum(journal); i++) {
 		struct nat_entry *ne;
 		struct f2fs_nat_entry raw_ne;
@@ -2831,7 +2831,7 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
 		__set_nat_cache_dirty(nm_i, ne);
 	}
 	update_nats_in_cursum(journal, -i);
-	up_write(&curseg->journal_rwsem);
+	write_unlock(&curseg->journal_rwlock);
 }
 
 static void __adjust_nat_entry_set(struct nat_entry_set *nes,
@@ -2906,7 +2906,7 @@ static int __flush_nat_entry_set(struct f2fs_sb_info *sbi,
 		to_journal = false;
 
 	if (to_journal) {
-		down_write(&curseg->journal_rwsem);
+		write_lock(&curseg->journal_rwlock);
 	} else {
 		page = get_next_nat_page(sbi, start_nid);
 		if (IS_ERR(page))
@@ -2946,7 +2946,7 @@ static int __flush_nat_entry_set(struct f2fs_sb_info *sbi,
 	}
 
 	if (to_journal) {
-		up_write(&curseg->journal_rwsem);
+		write_unlock(&curseg->journal_rwlock);
 	} else {
 		__update_nat_bits(sbi, start_nid, page);
 		f2fs_put_page(page, 1);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index f9b7fb785e1d..c397c1c271ec 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -2442,9 +2442,9 @@ static void write_current_sum_page(struct f2fs_sb_info *sbi,
 
 	mutex_lock(&curseg->curseg_mutex);
 
-	down_read(&curseg->journal_rwsem);
+	read_lock(&curseg->journal_rwlock);
 	memcpy(&dst->journal, curseg->journal, SUM_JOURNAL_SIZE);
-	up_read(&curseg->journal_rwsem);
+	read_unlock(&curseg->journal_rwlock);
 
 	memcpy(dst->entries, src->entries, SUM_ENTRY_SIZE);
 	memcpy(&dst->footer, &src->footer, SUM_FOOTER_SIZE);
@@ -3876,9 +3876,9 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
 	mutex_lock(&curseg->curseg_mutex);
 
 	/* update journal info */
-	down_write(&curseg->journal_rwsem);
+	write_lock(&curseg->journal_rwlock);
 	memcpy(curseg->journal, &sum->journal, SUM_JOURNAL_SIZE);
-	up_write(&curseg->journal_rwsem);
+	write_unlock(&curseg->journal_rwlock);
 
 	memcpy(curseg->sum_blk->entries, sum->entries, SUM_ENTRY_SIZE);
 	memcpy(&curseg->sum_blk->footer, &sum->footer, SUM_FOOTER_SIZE);
@@ -4136,7 +4136,7 @@ static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
 	struct f2fs_journal *journal = curseg->journal;
 	int i;
 
-	down_write(&curseg->journal_rwsem);
+	write_lock(&curseg->journal_rwlock);
 	for (i = 0; i < sits_in_cursum(journal); i++) {
 		unsigned int segno;
 		bool dirtied;
@@ -4148,7 +4148,7 @@ static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
 			add_sit_entry(segno, &SM_I(sbi)->sit_entry_set);
 	}
 	update_sits_in_cursum(journal, -i);
-	up_write(&curseg->journal_rwsem);
+	write_unlock(&curseg->journal_rwlock);
 }
 
 /*
@@ -4204,7 +4204,7 @@ void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 			to_journal = false;
 
 		if (to_journal) {
-			down_write(&curseg->journal_rwsem);
+			write_lock(&curseg->journal_rwlock);
 		} else {
 			page = get_next_sit_page(sbi, start_segno);
 			raw_sit = page_address(page);
@@ -4251,7 +4251,7 @@ void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 		}
 
 		if (to_journal)
-			up_write(&curseg->journal_rwsem);
+			write_unlock(&curseg->journal_rwlock);
 		else
 			f2fs_put_page(page, 1);
 
@@ -4432,7 +4432,7 @@ static int build_curseg(struct f2fs_sb_info *sbi)
 		array[i].sum_blk = f2fs_kzalloc(sbi, PAGE_SIZE, GFP_KERNEL);
 		if (!array[i].sum_blk)
 			return -ENOMEM;
-		init_rwsem(&array[i].journal_rwsem);
+		rwlock_init(&array[i].journal_rwlock);
 		array[i].journal = f2fs_kzalloc(sbi,
 				sizeof(struct f2fs_journal), GFP_KERNEL);
 		if (!array[i].journal)
@@ -4509,7 +4509,7 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
 		start_blk += readed;
 	} while (start_blk < sit_blk_cnt);
 
-	down_read(&curseg->journal_rwsem);
+	read_lock(&curseg->journal_rwlock);
 	for (i = 0; i < sits_in_cursum(journal); i++) {
 		unsigned int old_valid_blocks;
 
@@ -4551,7 +4551,7 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
 							old_valid_blocks;
 		}
 	}
-	up_read(&curseg->journal_rwsem);
+	read_unlock(&curseg->journal_rwlock);
 
 	if (!err && total_node_blocks != valid_node_count(sbi)) {
 		f2fs_err(sbi, "SIT is corrupted node# %u vs %u",
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 050230c70a53..afb9041dc2e6 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -306,7 +306,7 @@ struct victim_selection {
 struct curseg_info {
 	struct mutex curseg_mutex;		/* lock for consistency */
 	struct f2fs_summary_block *sum_blk;	/* cached summary block */
-	struct rw_semaphore journal_rwsem;	/* protect journal area */
+	rwlock_t journal_rwlock;		/* protect journal area */
 	struct f2fs_journal *journal;		/* cached journal info */
 	unsigned char alloc_type;		/* current allocation type */
 	unsigned short seg_type;		/* segment type like CURSEG_XXX_TYPE */
-- 
2.32.0.402.g57bb445576-goog


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [f2fs-dev] [PATCH] f2fs: use rwlock instead of rwsem for journal
@ 2021-07-22  1:41 ` Jaegeuk Kim
  0 siblings, 0 replies; 16+ messages in thread
From: Jaegeuk Kim @ 2021-07-22  1:41 UTC (permalink / raw)
  To: linux-kernel, linux-f2fs-devel; +Cc: Jaegeuk Kim

This tries to fix priority inversion in the below condition resulting in
long checkpoint delay.

f2fs_get_node_info()
 - nat_tree_lock
  -> sleep in journal_rwsem
                                     checkpoint
                                     - waiting for nat_tree_lock

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/node.c    | 16 ++++++++--------
 fs/f2fs/segment.c | 22 +++++++++++-----------
 fs/f2fs/segment.h |  2 +-
 3 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 0be9e2d7120e..821a40e02fb4 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -567,13 +567,13 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
 	memset(&ne, 0, sizeof(struct f2fs_nat_entry));
 
 	/* Check current segment summary */
-	down_read(&curseg->journal_rwsem);
+	read_lock(&curseg->journal_rwlock);
 	i = f2fs_lookup_journal_in_cursum(journal, NAT_JOURNAL, nid, 0);
 	if (i >= 0) {
 		ne = nat_in_journal(journal, i);
 		node_info_from_raw_nat(ni, &ne);
 	}
-	up_read(&curseg->journal_rwsem);
+	read_unlock(&curseg->journal_rwlock);
 	if (i >= 0) {
 		up_read(&nm_i->nat_tree_lock);
 		goto cache;
@@ -2338,7 +2338,7 @@ static void scan_curseg_cache(struct f2fs_sb_info *sbi)
 	struct f2fs_journal *journal = curseg->journal;
 	int i;
 
-	down_read(&curseg->journal_rwsem);
+	read_lock(&curseg->journal_rwlock);
 	for (i = 0; i < nats_in_cursum(journal); i++) {
 		block_t addr;
 		nid_t nid;
@@ -2350,7 +2350,7 @@ static void scan_curseg_cache(struct f2fs_sb_info *sbi)
 		else
 			remove_free_nid(sbi, nid);
 	}
-	up_read(&curseg->journal_rwsem);
+	read_unlock(&curseg->journal_rwlock);
 }
 
 static void scan_free_nid_bits(struct f2fs_sb_info *sbi)
@@ -2799,7 +2799,7 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
 	struct f2fs_journal *journal = curseg->journal;
 	int i;
 
-	down_write(&curseg->journal_rwsem);
+	write_lock(&curseg->journal_rwlock);
 	for (i = 0; i < nats_in_cursum(journal); i++) {
 		struct nat_entry *ne;
 		struct f2fs_nat_entry raw_ne;
@@ -2831,7 +2831,7 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
 		__set_nat_cache_dirty(nm_i, ne);
 	}
 	update_nats_in_cursum(journal, -i);
-	up_write(&curseg->journal_rwsem);
+	write_unlock(&curseg->journal_rwlock);
 }
 
 static void __adjust_nat_entry_set(struct nat_entry_set *nes,
@@ -2906,7 +2906,7 @@ static int __flush_nat_entry_set(struct f2fs_sb_info *sbi,
 		to_journal = false;
 
 	if (to_journal) {
-		down_write(&curseg->journal_rwsem);
+		write_lock(&curseg->journal_rwlock);
 	} else {
 		page = get_next_nat_page(sbi, start_nid);
 		if (IS_ERR(page))
@@ -2946,7 +2946,7 @@ static int __flush_nat_entry_set(struct f2fs_sb_info *sbi,
 	}
 
 	if (to_journal) {
-		up_write(&curseg->journal_rwsem);
+		write_unlock(&curseg->journal_rwlock);
 	} else {
 		__update_nat_bits(sbi, start_nid, page);
 		f2fs_put_page(page, 1);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index f9b7fb785e1d..c397c1c271ec 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -2442,9 +2442,9 @@ static void write_current_sum_page(struct f2fs_sb_info *sbi,
 
 	mutex_lock(&curseg->curseg_mutex);
 
-	down_read(&curseg->journal_rwsem);
+	read_lock(&curseg->journal_rwlock);
 	memcpy(&dst->journal, curseg->journal, SUM_JOURNAL_SIZE);
-	up_read(&curseg->journal_rwsem);
+	read_unlock(&curseg->journal_rwlock);
 
 	memcpy(dst->entries, src->entries, SUM_ENTRY_SIZE);
 	memcpy(&dst->footer, &src->footer, SUM_FOOTER_SIZE);
@@ -3876,9 +3876,9 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
 	mutex_lock(&curseg->curseg_mutex);
 
 	/* update journal info */
-	down_write(&curseg->journal_rwsem);
+	write_lock(&curseg->journal_rwlock);
 	memcpy(curseg->journal, &sum->journal, SUM_JOURNAL_SIZE);
-	up_write(&curseg->journal_rwsem);
+	write_unlock(&curseg->journal_rwlock);
 
 	memcpy(curseg->sum_blk->entries, sum->entries, SUM_ENTRY_SIZE);
 	memcpy(&curseg->sum_blk->footer, &sum->footer, SUM_FOOTER_SIZE);
@@ -4136,7 +4136,7 @@ static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
 	struct f2fs_journal *journal = curseg->journal;
 	int i;
 
-	down_write(&curseg->journal_rwsem);
+	write_lock(&curseg->journal_rwlock);
 	for (i = 0; i < sits_in_cursum(journal); i++) {
 		unsigned int segno;
 		bool dirtied;
@@ -4148,7 +4148,7 @@ static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
 			add_sit_entry(segno, &SM_I(sbi)->sit_entry_set);
 	}
 	update_sits_in_cursum(journal, -i);
-	up_write(&curseg->journal_rwsem);
+	write_unlock(&curseg->journal_rwlock);
 }
 
 /*
@@ -4204,7 +4204,7 @@ void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 			to_journal = false;
 
 		if (to_journal) {
-			down_write(&curseg->journal_rwsem);
+			write_lock(&curseg->journal_rwlock);
 		} else {
 			page = get_next_sit_page(sbi, start_segno);
 			raw_sit = page_address(page);
@@ -4251,7 +4251,7 @@ void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 		}
 
 		if (to_journal)
-			up_write(&curseg->journal_rwsem);
+			write_unlock(&curseg->journal_rwlock);
 		else
 			f2fs_put_page(page, 1);
 
@@ -4432,7 +4432,7 @@ static int build_curseg(struct f2fs_sb_info *sbi)
 		array[i].sum_blk = f2fs_kzalloc(sbi, PAGE_SIZE, GFP_KERNEL);
 		if (!array[i].sum_blk)
 			return -ENOMEM;
-		init_rwsem(&array[i].journal_rwsem);
+		rwlock_init(&array[i].journal_rwlock);
 		array[i].journal = f2fs_kzalloc(sbi,
 				sizeof(struct f2fs_journal), GFP_KERNEL);
 		if (!array[i].journal)
@@ -4509,7 +4509,7 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
 		start_blk += readed;
 	} while (start_blk < sit_blk_cnt);
 
-	down_read(&curseg->journal_rwsem);
+	read_lock(&curseg->journal_rwlock);
 	for (i = 0; i < sits_in_cursum(journal); i++) {
 		unsigned int old_valid_blocks;
 
@@ -4551,7 +4551,7 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
 							old_valid_blocks;
 		}
 	}
-	up_read(&curseg->journal_rwsem);
+	read_unlock(&curseg->journal_rwlock);
 
 	if (!err && total_node_blocks != valid_node_count(sbi)) {
 		f2fs_err(sbi, "SIT is corrupted node# %u vs %u",
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 050230c70a53..afb9041dc2e6 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -306,7 +306,7 @@ struct victim_selection {
 struct curseg_info {
 	struct mutex curseg_mutex;		/* lock for consistency */
 	struct f2fs_summary_block *sum_blk;	/* cached summary block */
-	struct rw_semaphore journal_rwsem;	/* protect journal area */
+	rwlock_t journal_rwlock;		/* protect journal area */
 	struct f2fs_journal *journal;		/* cached journal info */
 	unsigned char alloc_type;		/* current allocation type */
 	unsigned short seg_type;		/* segment type like CURSEG_XXX_TYPE */
-- 
2.32.0.402.g57bb445576-goog



_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [f2fs-dev] [PATCH] f2fs: use rwlock instead of rwsem for journal
  2021-07-22  1:41 ` [f2fs-dev] " Jaegeuk Kim
  (?)
@ 2021-07-22  3:06 ` Hillf Danton
  -1 siblings, 0 replies; 16+ messages in thread
From: Hillf Danton @ 2021-07-22  3:06 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: linux-kernel, linux-f2fs-devel

On Wed, 21 Jul 2021 18:41:49 -0700 Jaegeuk Kim wrote:
>
>This tries to fix priority inversion in the below condition resulting in
>long checkpoint delay.

If you are right and the patch makes real sense, feel free to specify a bit
more on the priority inversion you are trying to fix, and how the fix helps
shorten the checkpoint delay. And test data is welcome.

>
>f2fs_get_node_info()
> - nat_tree_lock
>  -> sleep in journal_rwsem
>                                     checkpoint
>                                     - waiting for nat_tree_lock


_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [f2fs-dev] [PATCH] f2fs: use rwlock instead of rwsem for journal
  2021-07-22  1:41 ` [f2fs-dev] " Jaegeuk Kim
@ 2021-07-22 13:41   ` Chao Yu
  -1 siblings, 0 replies; 16+ messages in thread
From: Chao Yu @ 2021-07-22 13:41 UTC (permalink / raw)
  To: Jaegeuk Kim, linux-kernel, linux-f2fs-devel

On 2021/7/22 9:41, Jaegeuk Kim wrote:
> This tries to fix priority inversion in the below condition resulting in
> long checkpoint delay.
> 
> f2fs_get_node_info()
>   - nat_tree_lock
>    -> sleep in journal_rwsem
>                                       checkpoint
>                                       - waiting for nat_tree_lock
> 
> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> ---
>   fs/f2fs/node.c    | 16 ++++++++--------
>   fs/f2fs/segment.c | 22 +++++++++++-----------
>   fs/f2fs/segment.h |  2 +-
>   3 files changed, 20 insertions(+), 20 deletions(-)
> 
> diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
> index 0be9e2d7120e..821a40e02fb4 100644
> --- a/fs/f2fs/node.c
> +++ b/fs/f2fs/node.c
> @@ -567,13 +567,13 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
>   	memset(&ne, 0, sizeof(struct f2fs_nat_entry));
>   
>   	/* Check current segment summary */
> -	down_read(&curseg->journal_rwsem);
> +	read_lock(&curseg->journal_rwlock);

It seem journal_rwsem covers too many operations, e.g.

- scan_curseg_cache
  - add_free_nid
   - f2fs_kmem_cache_alloc
    - kmem_cache_alloc(__GFP_NOFAIL)

Thanks,

>   	i = f2fs_lookup_journal_in_cursum(journal, NAT_JOURNAL, nid, 0);
>   	if (i >= 0) {
>   		ne = nat_in_journal(journal, i);
>   		node_info_from_raw_nat(ni, &ne);
>   	}
> -	up_read(&curseg->journal_rwsem);
> +	read_unlock(&curseg->journal_rwlock);
>   	if (i >= 0) {
>   		up_read(&nm_i->nat_tree_lock);
>   		goto cache;
> @@ -2338,7 +2338,7 @@ static void scan_curseg_cache(struct f2fs_sb_info *sbi)
>   	struct f2fs_journal *journal = curseg->journal;
>   	int i;
>   
> -	down_read(&curseg->journal_rwsem);
> +	read_lock(&curseg->journal_rwlock);
>   	for (i = 0; i < nats_in_cursum(journal); i++) {
>   		block_t addr;
>   		nid_t nid;
> @@ -2350,7 +2350,7 @@ static void scan_curseg_cache(struct f2fs_sb_info *sbi)
>   		else
>   			remove_free_nid(sbi, nid);
>   	}
> -	up_read(&curseg->journal_rwsem);
> +	read_unlock(&curseg->journal_rwlock);
>   }
>   
>   static void scan_free_nid_bits(struct f2fs_sb_info *sbi)
> @@ -2799,7 +2799,7 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
>   	struct f2fs_journal *journal = curseg->journal;
>   	int i;
>   
> -	down_write(&curseg->journal_rwsem);
> +	write_lock(&curseg->journal_rwlock);
>   	for (i = 0; i < nats_in_cursum(journal); i++) {
>   		struct nat_entry *ne;
>   		struct f2fs_nat_entry raw_ne;
> @@ -2831,7 +2831,7 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
>   		__set_nat_cache_dirty(nm_i, ne);
>   	}
>   	update_nats_in_cursum(journal, -i);
> -	up_write(&curseg->journal_rwsem);
> +	write_unlock(&curseg->journal_rwlock);
>   }
>   
>   static void __adjust_nat_entry_set(struct nat_entry_set *nes,
> @@ -2906,7 +2906,7 @@ static int __flush_nat_entry_set(struct f2fs_sb_info *sbi,
>   		to_journal = false;
>   
>   	if (to_journal) {
> -		down_write(&curseg->journal_rwsem);
> +		write_lock(&curseg->journal_rwlock);
>   	} else {
>   		page = get_next_nat_page(sbi, start_nid);
>   		if (IS_ERR(page))
> @@ -2946,7 +2946,7 @@ static int __flush_nat_entry_set(struct f2fs_sb_info *sbi,
>   	}
>   
>   	if (to_journal) {
> -		up_write(&curseg->journal_rwsem);
> +		write_unlock(&curseg->journal_rwlock);
>   	} else {
>   		__update_nat_bits(sbi, start_nid, page);
>   		f2fs_put_page(page, 1);
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index f9b7fb785e1d..c397c1c271ec 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -2442,9 +2442,9 @@ static void write_current_sum_page(struct f2fs_sb_info *sbi,
>   
>   	mutex_lock(&curseg->curseg_mutex);
>   
> -	down_read(&curseg->journal_rwsem);
> +	read_lock(&curseg->journal_rwlock);
>   	memcpy(&dst->journal, curseg->journal, SUM_JOURNAL_SIZE);
> -	up_read(&curseg->journal_rwsem);
> +	read_unlock(&curseg->journal_rwlock);
>   
>   	memcpy(dst->entries, src->entries, SUM_ENTRY_SIZE);
>   	memcpy(&dst->footer, &src->footer, SUM_FOOTER_SIZE);
> @@ -3876,9 +3876,9 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
>   	mutex_lock(&curseg->curseg_mutex);
>   
>   	/* update journal info */
> -	down_write(&curseg->journal_rwsem);
> +	write_lock(&curseg->journal_rwlock);
>   	memcpy(curseg->journal, &sum->journal, SUM_JOURNAL_SIZE);
> -	up_write(&curseg->journal_rwsem);
> +	write_unlock(&curseg->journal_rwlock);
>   
>   	memcpy(curseg->sum_blk->entries, sum->entries, SUM_ENTRY_SIZE);
>   	memcpy(&curseg->sum_blk->footer, &sum->footer, SUM_FOOTER_SIZE);
> @@ -4136,7 +4136,7 @@ static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
>   	struct f2fs_journal *journal = curseg->journal;
>   	int i;
>   
> -	down_write(&curseg->journal_rwsem);
> +	write_lock(&curseg->journal_rwlock);
>   	for (i = 0; i < sits_in_cursum(journal); i++) {
>   		unsigned int segno;
>   		bool dirtied;
> @@ -4148,7 +4148,7 @@ static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
>   			add_sit_entry(segno, &SM_I(sbi)->sit_entry_set);
>   	}
>   	update_sits_in_cursum(journal, -i);
> -	up_write(&curseg->journal_rwsem);
> +	write_unlock(&curseg->journal_rwlock);
>   }
>   
>   /*
> @@ -4204,7 +4204,7 @@ void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>   			to_journal = false;
>   
>   		if (to_journal) {
> -			down_write(&curseg->journal_rwsem);
> +			write_lock(&curseg->journal_rwlock);
>   		} else {
>   			page = get_next_sit_page(sbi, start_segno);
>   			raw_sit = page_address(page);
> @@ -4251,7 +4251,7 @@ void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>   		}
>   
>   		if (to_journal)
> -			up_write(&curseg->journal_rwsem);
> +			write_unlock(&curseg->journal_rwlock);
>   		else
>   			f2fs_put_page(page, 1);
>   
> @@ -4432,7 +4432,7 @@ static int build_curseg(struct f2fs_sb_info *sbi)
>   		array[i].sum_blk = f2fs_kzalloc(sbi, PAGE_SIZE, GFP_KERNEL);
>   		if (!array[i].sum_blk)
>   			return -ENOMEM;
> -		init_rwsem(&array[i].journal_rwsem);
> +		rwlock_init(&array[i].journal_rwlock);
>   		array[i].journal = f2fs_kzalloc(sbi,
>   				sizeof(struct f2fs_journal), GFP_KERNEL);
>   		if (!array[i].journal)
> @@ -4509,7 +4509,7 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
>   		start_blk += readed;
>   	} while (start_blk < sit_blk_cnt);
>   
> -	down_read(&curseg->journal_rwsem);
> +	read_lock(&curseg->journal_rwlock);
>   	for (i = 0; i < sits_in_cursum(journal); i++) {
>   		unsigned int old_valid_blocks;
>   
> @@ -4551,7 +4551,7 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
>   							old_valid_blocks;
>   		}
>   	}
> -	up_read(&curseg->journal_rwsem);
> +	read_unlock(&curseg->journal_rwlock);
>   
>   	if (!err && total_node_blocks != valid_node_count(sbi)) {
>   		f2fs_err(sbi, "SIT is corrupted node# %u vs %u",
> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
> index 050230c70a53..afb9041dc2e6 100644
> --- a/fs/f2fs/segment.h
> +++ b/fs/f2fs/segment.h
> @@ -306,7 +306,7 @@ struct victim_selection {
>   struct curseg_info {
>   	struct mutex curseg_mutex;		/* lock for consistency */
>   	struct f2fs_summary_block *sum_blk;	/* cached summary block */
> -	struct rw_semaphore journal_rwsem;	/* protect journal area */
> +	rwlock_t journal_rwlock;		/* protect journal area */
>   	struct f2fs_journal *journal;		/* cached journal info */
>   	unsigned char alloc_type;		/* current allocation type */
>   	unsigned short seg_type;		/* segment type like CURSEG_XXX_TYPE */
> 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [f2fs-dev] [PATCH] f2fs: use rwlock instead of rwsem for journal
@ 2021-07-22 13:41   ` Chao Yu
  0 siblings, 0 replies; 16+ messages in thread
From: Chao Yu @ 2021-07-22 13:41 UTC (permalink / raw)
  To: Jaegeuk Kim, linux-kernel, linux-f2fs-devel

On 2021/7/22 9:41, Jaegeuk Kim wrote:
> This tries to fix priority inversion in the below condition resulting in
> long checkpoint delay.
> 
> f2fs_get_node_info()
>   - nat_tree_lock
>    -> sleep in journal_rwsem
>                                       checkpoint
>                                       - waiting for nat_tree_lock
> 
> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> ---
>   fs/f2fs/node.c    | 16 ++++++++--------
>   fs/f2fs/segment.c | 22 +++++++++++-----------
>   fs/f2fs/segment.h |  2 +-
>   3 files changed, 20 insertions(+), 20 deletions(-)
> 
> diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
> index 0be9e2d7120e..821a40e02fb4 100644
> --- a/fs/f2fs/node.c
> +++ b/fs/f2fs/node.c
> @@ -567,13 +567,13 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
>   	memset(&ne, 0, sizeof(struct f2fs_nat_entry));
>   
>   	/* Check current segment summary */
> -	down_read(&curseg->journal_rwsem);
> +	read_lock(&curseg->journal_rwlock);

It seem journal_rwsem covers too many operations, e.g.

- scan_curseg_cache
  - add_free_nid
   - f2fs_kmem_cache_alloc
    - kmem_cache_alloc(__GFP_NOFAIL)

Thanks,

>   	i = f2fs_lookup_journal_in_cursum(journal, NAT_JOURNAL, nid, 0);
>   	if (i >= 0) {
>   		ne = nat_in_journal(journal, i);
>   		node_info_from_raw_nat(ni, &ne);
>   	}
> -	up_read(&curseg->journal_rwsem);
> +	read_unlock(&curseg->journal_rwlock);
>   	if (i >= 0) {
>   		up_read(&nm_i->nat_tree_lock);
>   		goto cache;
> @@ -2338,7 +2338,7 @@ static void scan_curseg_cache(struct f2fs_sb_info *sbi)
>   	struct f2fs_journal *journal = curseg->journal;
>   	int i;
>   
> -	down_read(&curseg->journal_rwsem);
> +	read_lock(&curseg->journal_rwlock);
>   	for (i = 0; i < nats_in_cursum(journal); i++) {
>   		block_t addr;
>   		nid_t nid;
> @@ -2350,7 +2350,7 @@ static void scan_curseg_cache(struct f2fs_sb_info *sbi)
>   		else
>   			remove_free_nid(sbi, nid);
>   	}
> -	up_read(&curseg->journal_rwsem);
> +	read_unlock(&curseg->journal_rwlock);
>   }
>   
>   static void scan_free_nid_bits(struct f2fs_sb_info *sbi)
> @@ -2799,7 +2799,7 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
>   	struct f2fs_journal *journal = curseg->journal;
>   	int i;
>   
> -	down_write(&curseg->journal_rwsem);
> +	write_lock(&curseg->journal_rwlock);
>   	for (i = 0; i < nats_in_cursum(journal); i++) {
>   		struct nat_entry *ne;
>   		struct f2fs_nat_entry raw_ne;
> @@ -2831,7 +2831,7 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
>   		__set_nat_cache_dirty(nm_i, ne);
>   	}
>   	update_nats_in_cursum(journal, -i);
> -	up_write(&curseg->journal_rwsem);
> +	write_unlock(&curseg->journal_rwlock);
>   }
>   
>   static void __adjust_nat_entry_set(struct nat_entry_set *nes,
> @@ -2906,7 +2906,7 @@ static int __flush_nat_entry_set(struct f2fs_sb_info *sbi,
>   		to_journal = false;
>   
>   	if (to_journal) {
> -		down_write(&curseg->journal_rwsem);
> +		write_lock(&curseg->journal_rwlock);
>   	} else {
>   		page = get_next_nat_page(sbi, start_nid);
>   		if (IS_ERR(page))
> @@ -2946,7 +2946,7 @@ static int __flush_nat_entry_set(struct f2fs_sb_info *sbi,
>   	}
>   
>   	if (to_journal) {
> -		up_write(&curseg->journal_rwsem);
> +		write_unlock(&curseg->journal_rwlock);
>   	} else {
>   		__update_nat_bits(sbi, start_nid, page);
>   		f2fs_put_page(page, 1);
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index f9b7fb785e1d..c397c1c271ec 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -2442,9 +2442,9 @@ static void write_current_sum_page(struct f2fs_sb_info *sbi,
>   
>   	mutex_lock(&curseg->curseg_mutex);
>   
> -	down_read(&curseg->journal_rwsem);
> +	read_lock(&curseg->journal_rwlock);
>   	memcpy(&dst->journal, curseg->journal, SUM_JOURNAL_SIZE);
> -	up_read(&curseg->journal_rwsem);
> +	read_unlock(&curseg->journal_rwlock);
>   
>   	memcpy(dst->entries, src->entries, SUM_ENTRY_SIZE);
>   	memcpy(&dst->footer, &src->footer, SUM_FOOTER_SIZE);
> @@ -3876,9 +3876,9 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
>   	mutex_lock(&curseg->curseg_mutex);
>   
>   	/* update journal info */
> -	down_write(&curseg->journal_rwsem);
> +	write_lock(&curseg->journal_rwlock);
>   	memcpy(curseg->journal, &sum->journal, SUM_JOURNAL_SIZE);
> -	up_write(&curseg->journal_rwsem);
> +	write_unlock(&curseg->journal_rwlock);
>   
>   	memcpy(curseg->sum_blk->entries, sum->entries, SUM_ENTRY_SIZE);
>   	memcpy(&curseg->sum_blk->footer, &sum->footer, SUM_FOOTER_SIZE);
> @@ -4136,7 +4136,7 @@ static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
>   	struct f2fs_journal *journal = curseg->journal;
>   	int i;
>   
> -	down_write(&curseg->journal_rwsem);
> +	write_lock(&curseg->journal_rwlock);
>   	for (i = 0; i < sits_in_cursum(journal); i++) {
>   		unsigned int segno;
>   		bool dirtied;
> @@ -4148,7 +4148,7 @@ static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
>   			add_sit_entry(segno, &SM_I(sbi)->sit_entry_set);
>   	}
>   	update_sits_in_cursum(journal, -i);
> -	up_write(&curseg->journal_rwsem);
> +	write_unlock(&curseg->journal_rwlock);
>   }
>   
>   /*
> @@ -4204,7 +4204,7 @@ void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>   			to_journal = false;
>   
>   		if (to_journal) {
> -			down_write(&curseg->journal_rwsem);
> +			write_lock(&curseg->journal_rwlock);
>   		} else {
>   			page = get_next_sit_page(sbi, start_segno);
>   			raw_sit = page_address(page);
> @@ -4251,7 +4251,7 @@ void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>   		}
>   
>   		if (to_journal)
> -			up_write(&curseg->journal_rwsem);
> +			write_unlock(&curseg->journal_rwlock);
>   		else
>   			f2fs_put_page(page, 1);
>   
> @@ -4432,7 +4432,7 @@ static int build_curseg(struct f2fs_sb_info *sbi)
>   		array[i].sum_blk = f2fs_kzalloc(sbi, PAGE_SIZE, GFP_KERNEL);
>   		if (!array[i].sum_blk)
>   			return -ENOMEM;
> -		init_rwsem(&array[i].journal_rwsem);
> +		rwlock_init(&array[i].journal_rwlock);
>   		array[i].journal = f2fs_kzalloc(sbi,
>   				sizeof(struct f2fs_journal), GFP_KERNEL);
>   		if (!array[i].journal)
> @@ -4509,7 +4509,7 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
>   		start_blk += readed;
>   	} while (start_blk < sit_blk_cnt);
>   
> -	down_read(&curseg->journal_rwsem);
> +	read_lock(&curseg->journal_rwlock);
>   	for (i = 0; i < sits_in_cursum(journal); i++) {
>   		unsigned int old_valid_blocks;
>   
> @@ -4551,7 +4551,7 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
>   							old_valid_blocks;
>   		}
>   	}
> -	up_read(&curseg->journal_rwsem);
> +	read_unlock(&curseg->journal_rwlock);
>   
>   	if (!err && total_node_blocks != valid_node_count(sbi)) {
>   		f2fs_err(sbi, "SIT is corrupted node# %u vs %u",
> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
> index 050230c70a53..afb9041dc2e6 100644
> --- a/fs/f2fs/segment.h
> +++ b/fs/f2fs/segment.h
> @@ -306,7 +306,7 @@ struct victim_selection {
>   struct curseg_info {
>   	struct mutex curseg_mutex;		/* lock for consistency */
>   	struct f2fs_summary_block *sum_blk;	/* cached summary block */
> -	struct rw_semaphore journal_rwsem;	/* protect journal area */
> +	rwlock_t journal_rwlock;		/* protect journal area */
>   	struct f2fs_journal *journal;		/* cached journal info */
>   	unsigned char alloc_type;		/* current allocation type */
>   	unsigned short seg_type;		/* segment type like CURSEG_XXX_TYPE */
> 


_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2] f2fs: don't sleep while grabing nat_tree_lock
  2021-07-22  1:41 ` [f2fs-dev] " Jaegeuk Kim
@ 2021-07-22 17:44   ` Jaegeuk Kim
  -1 siblings, 0 replies; 16+ messages in thread
From: Jaegeuk Kim @ 2021-07-22 17:44 UTC (permalink / raw)
  To: linux-kernel, linux-f2fs-devel

This tries to fix priority inversion in the below condition resulting in
long checkpoint delay.

f2fs_get_node_info()
 - nat_tree_lock
  -> sleep to grab journal_rwsem by contention

                                     checkpoint
                                     - waiting for nat_tree_lock

In order to let checkpoint go, let's release nat_tree_lock, if there's a
journal_rwsem contention.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 Change log from v1:
  - drop rwlock but take retry logic to reduce lock-holding time

 fs/f2fs/node.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 0be9e2d7120e..c60ba4179bb2 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -552,7 +552,7 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
 	int i;
 
 	ni->nid = nid;
-
+retry:
 	/* Check nat cache */
 	down_read(&nm_i->nat_tree_lock);
 	e = __lookup_nat_cache(nm_i, nid);
@@ -564,10 +564,16 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
 		return 0;
 	}
 
-	memset(&ne, 0, sizeof(struct f2fs_nat_entry));
+	/*
+	 * Check current segment summary by trying to grab journal_rwsem first.
+	 * This sem is on the critical path on the checkpoint requiring the above
+	 * nat_tree_lock. Therefore, we should retry, if we failed to grab here.
+	 */
+	if (!down_read_trylock(&curseg->journal_rwsem)) {
+		up_read(&nm_i->nat_tree_lock);
+		goto retry;
+	}
 
-	/* Check current segment summary */
-	down_read(&curseg->journal_rwsem);
 	i = f2fs_lookup_journal_in_cursum(journal, NAT_JOURNAL, nid, 0);
 	if (i >= 0) {
 		ne = nat_in_journal(journal, i);
-- 
2.32.0.432.gabb21c7263-goog


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [f2fs-dev] [PATCH v2] f2fs: don't sleep while grabing nat_tree_lock
@ 2021-07-22 17:44   ` Jaegeuk Kim
  0 siblings, 0 replies; 16+ messages in thread
From: Jaegeuk Kim @ 2021-07-22 17:44 UTC (permalink / raw)
  To: linux-kernel, linux-f2fs-devel

This tries to fix priority inversion in the below condition resulting in
long checkpoint delay.

f2fs_get_node_info()
 - nat_tree_lock
  -> sleep to grab journal_rwsem by contention

                                     checkpoint
                                     - waiting for nat_tree_lock

In order to let checkpoint go, let's release nat_tree_lock, if there's a
journal_rwsem contention.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 Change log from v1:
  - drop rwlock but take retry logic to reduce lock-holding time

 fs/f2fs/node.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 0be9e2d7120e..c60ba4179bb2 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -552,7 +552,7 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
 	int i;
 
 	ni->nid = nid;
-
+retry:
 	/* Check nat cache */
 	down_read(&nm_i->nat_tree_lock);
 	e = __lookup_nat_cache(nm_i, nid);
@@ -564,10 +564,16 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
 		return 0;
 	}
 
-	memset(&ne, 0, sizeof(struct f2fs_nat_entry));
+	/*
+	 * Check current segment summary by trying to grab journal_rwsem first.
+	 * This sem is on the critical path on the checkpoint requiring the above
+	 * nat_tree_lock. Therefore, we should retry, if we failed to grab here.
+	 */
+	if (!down_read_trylock(&curseg->journal_rwsem)) {
+		up_read(&nm_i->nat_tree_lock);
+		goto retry;
+	}
 
-	/* Check current segment summary */
-	down_read(&curseg->journal_rwsem);
 	i = f2fs_lookup_journal_in_cursum(journal, NAT_JOURNAL, nid, 0);
 	if (i >= 0) {
 		ne = nat_in_journal(journal, i);
-- 
2.32.0.432.gabb21c7263-goog



_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [f2fs-dev] [PATCH v2] f2fs: don't sleep while grabing nat_tree_lock
  2021-07-22 17:44   ` [f2fs-dev] " Jaegeuk Kim
@ 2021-07-23  0:39     ` Chao Yu
  -1 siblings, 0 replies; 16+ messages in thread
From: Chao Yu @ 2021-07-23  0:39 UTC (permalink / raw)
  To: Jaegeuk Kim, linux-kernel, linux-f2fs-devel

On 2021/7/23 1:44, Jaegeuk Kim wrote:
> This tries to fix priority inversion in the below condition resulting in
> long checkpoint delay.
> 
> f2fs_get_node_info()
>   - nat_tree_lock
>    -> sleep to grab journal_rwsem by contention
> 
>                                       checkpoint
>                                       - waiting for nat_tree_lock
> 
> In order to let checkpoint go, let's release nat_tree_lock, if there's a
> journal_rwsem contention.

Write lock of nat_tree_lock is held from many places, how about just
retrying unlock/lock only if checkpoint() is flushing nat blocks?

---
  fs/f2fs/f2fs.h | 1 +
  fs/f2fs/node.c | 8 ++++++--
  2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index ad8f99d7235f..05f41a15fda4 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -877,6 +877,7 @@ struct f2fs_nm_info {
  	spinlock_t nat_list_lock;	/* protect clean nat entry list */
  	unsigned int nat_cnt[MAX_NAT_STATE]; /* the # of cached nat entries */
  	unsigned int nat_blocks;	/* # of nat blocks */
+	bool flushing_nat;		/* indicate checkpoint() is flushing nat blocks */

  	/* free node ids management */
  	struct radix_tree_root free_nid_root;/* root of the free_nid cache */
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index c60ba4179bb2..2caa171a68f8 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -569,7 +569,7 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
  	 * This sem is on the critical path on the checkpoint requiring the above
  	 * nat_tree_lock. Therefore, we should retry, if we failed to grab here.
  	 */
-	if (!down_read_trylock(&curseg->journal_rwsem)) {
+	if (!down_read_trylock(&curseg->journal_rwsem) && nm_i->flushing_nat) {
  		up_read(&nm_i->nat_tree_lock);
  		goto retry;
  	}
@@ -2981,6 +2981,8 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
  	LIST_HEAD(sets);
  	int err = 0;

+	nm_i->flushing_nat = true;
+
  	/*
  	 * during unmount, let's flush nat_bits before checking
  	 * nat_cnt[DIRTY_NAT].
@@ -2992,7 +2994,7 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
  	}

  	if (!nm_i->nat_cnt[DIRTY_NAT])
-		return 0;
+		goto out;

  	down_write(&nm_i->nat_tree_lock);

@@ -3026,6 +3028,8 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
  	up_write(&nm_i->nat_tree_lock);
  	/* Allow dirty nats by node block allocation in write_begin */

+out:
+	nm_i->flushing_nat = false;
  	return err;
  }

-- 
2.22.1



^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [f2fs-dev] [PATCH v2] f2fs: don't sleep while grabing nat_tree_lock
@ 2021-07-23  0:39     ` Chao Yu
  0 siblings, 0 replies; 16+ messages in thread
From: Chao Yu @ 2021-07-23  0:39 UTC (permalink / raw)
  To: Jaegeuk Kim, linux-kernel, linux-f2fs-devel

On 2021/7/23 1:44, Jaegeuk Kim wrote:
> This tries to fix priority inversion in the below condition resulting in
> long checkpoint delay.
> 
> f2fs_get_node_info()
>   - nat_tree_lock
>    -> sleep to grab journal_rwsem by contention
> 
>                                       checkpoint
>                                       - waiting for nat_tree_lock
> 
> In order to let checkpoint go, let's release nat_tree_lock, if there's a
> journal_rwsem contention.

Write lock of nat_tree_lock is held from many places, how about just
retrying unlock/lock only if checkpoint() is flushing nat blocks?

---
  fs/f2fs/f2fs.h | 1 +
  fs/f2fs/node.c | 8 ++++++--
  2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index ad8f99d7235f..05f41a15fda4 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -877,6 +877,7 @@ struct f2fs_nm_info {
  	spinlock_t nat_list_lock;	/* protect clean nat entry list */
  	unsigned int nat_cnt[MAX_NAT_STATE]; /* the # of cached nat entries */
  	unsigned int nat_blocks;	/* # of nat blocks */
+	bool flushing_nat;		/* indicate checkpoint() is flushing nat blocks */

  	/* free node ids management */
  	struct radix_tree_root free_nid_root;/* root of the free_nid cache */
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index c60ba4179bb2..2caa171a68f8 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -569,7 +569,7 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
  	 * This sem is on the critical path on the checkpoint requiring the above
  	 * nat_tree_lock. Therefore, we should retry, if we failed to grab here.
  	 */
-	if (!down_read_trylock(&curseg->journal_rwsem)) {
+	if (!down_read_trylock(&curseg->journal_rwsem) && nm_i->flushing_nat) {
  		up_read(&nm_i->nat_tree_lock);
  		goto retry;
  	}
@@ -2981,6 +2981,8 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
  	LIST_HEAD(sets);
  	int err = 0;

+	nm_i->flushing_nat = true;
+
  	/*
  	 * during unmount, let's flush nat_bits before checking
  	 * nat_cnt[DIRTY_NAT].
@@ -2992,7 +2994,7 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
  	}

  	if (!nm_i->nat_cnt[DIRTY_NAT])
-		return 0;
+		goto out;

  	down_write(&nm_i->nat_tree_lock);

@@ -3026,6 +3028,8 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
  	up_write(&nm_i->nat_tree_lock);
  	/* Allow dirty nats by node block allocation in write_begin */

+out:
+	nm_i->flushing_nat = false;
  	return err;
  }

-- 
2.22.1




_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH v3] f2fs: don't sleep while grabing nat_tree_lock
  2021-07-22 17:44   ` [f2fs-dev] " Jaegeuk Kim
@ 2021-07-23  2:35     ` Jaegeuk Kim
  -1 siblings, 0 replies; 16+ messages in thread
From: Jaegeuk Kim @ 2021-07-23  2:35 UTC (permalink / raw)
  To: linux-kernel, linux-f2fs-devel

This tries to fix priority inversion in the below condition resulting in
long checkpoint delay.

f2fs_get_node_info()
 - nat_tree_lock
  -> sleep to grab journal_rwsem by contention

                                     checkpoint
                                     - waiting for nat_tree_lock

In order to let checkpoint go, let's release nat_tree_lock, if there's a
journal_rwsem contention.

Signed-off-by: Daeho Jeong <daehojeong@google.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 Change log from v2:
  - don't bother checkpoint

 fs/f2fs/node.c | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 0be9e2d7120e..b26642daa3d2 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -552,7 +552,7 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
 	int i;
 
 	ni->nid = nid;
-
+retry:
 	/* Check nat cache */
 	down_read(&nm_i->nat_tree_lock);
 	e = __lookup_nat_cache(nm_i, nid);
@@ -564,10 +564,19 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
 		return 0;
 	}
 
-	memset(&ne, 0, sizeof(struct f2fs_nat_entry));
+	/*
+	 * Check current segment summary by trying to grab journal_rwsem first.
+	 * This sem is on the critical path on the checkpoint requiring the above
+	 * nat_tree_lock. Therefore, we should retry, if we failed to grab here
+	 * while not bothering checkpoint.
+	 */
+	if (rwsem_is_locked(&sbi->cp_global_sem)) {
+		down_read(&curseg->journal_rwsem);
+	} else if (!down_read_trylock(&curseg->journal_rwsem)) {
+		up_read(&nm_i->nat_tree_lock);
+		goto retry;
+	}
 
-	/* Check current segment summary */
-	down_read(&curseg->journal_rwsem);
 	i = f2fs_lookup_journal_in_cursum(journal, NAT_JOURNAL, nid, 0);
 	if (i >= 0) {
 		ne = nat_in_journal(journal, i);
-- 
2.32.0.432.gabb21c7263-goog


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [f2fs-dev] [PATCH v3] f2fs: don't sleep while grabing nat_tree_lock
@ 2021-07-23  2:35     ` Jaegeuk Kim
  0 siblings, 0 replies; 16+ messages in thread
From: Jaegeuk Kim @ 2021-07-23  2:35 UTC (permalink / raw)
  To: linux-kernel, linux-f2fs-devel

This tries to fix priority inversion in the below condition resulting in
long checkpoint delay.

f2fs_get_node_info()
 - nat_tree_lock
  -> sleep to grab journal_rwsem by contention

                                     checkpoint
                                     - waiting for nat_tree_lock

In order to let checkpoint go, let's release nat_tree_lock, if there's a
journal_rwsem contention.

Signed-off-by: Daeho Jeong <daehojeong@google.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 Change log from v2:
  - don't bother checkpoint

 fs/f2fs/node.c | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 0be9e2d7120e..b26642daa3d2 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -552,7 +552,7 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
 	int i;
 
 	ni->nid = nid;
-
+retry:
 	/* Check nat cache */
 	down_read(&nm_i->nat_tree_lock);
 	e = __lookup_nat_cache(nm_i, nid);
@@ -564,10 +564,19 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
 		return 0;
 	}
 
-	memset(&ne, 0, sizeof(struct f2fs_nat_entry));
+	/*
+	 * Check current segment summary by trying to grab journal_rwsem first.
+	 * This sem is on the critical path on the checkpoint requiring the above
+	 * nat_tree_lock. Therefore, we should retry, if we failed to grab here
+	 * while not bothering checkpoint.
+	 */
+	if (rwsem_is_locked(&sbi->cp_global_sem)) {
+		down_read(&curseg->journal_rwsem);
+	} else if (!down_read_trylock(&curseg->journal_rwsem)) {
+		up_read(&nm_i->nat_tree_lock);
+		goto retry;
+	}
 
-	/* Check current segment summary */
-	down_read(&curseg->journal_rwsem);
 	i = f2fs_lookup_journal_in_cursum(journal, NAT_JOURNAL, nid, 0);
 	if (i >= 0) {
 		ne = nat_in_journal(journal, i);
-- 
2.32.0.432.gabb21c7263-goog



_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [f2fs-dev] [PATCH v3] f2fs: don't sleep while grabing nat_tree_lock
  2021-07-23  2:35     ` [f2fs-dev] " Jaegeuk Kim
@ 2021-07-23  2:41       ` Chao Yu
  -1 siblings, 0 replies; 16+ messages in thread
From: Chao Yu @ 2021-07-23  2:41 UTC (permalink / raw)
  To: Jaegeuk Kim, linux-kernel, linux-f2fs-devel

On 2021/7/23 10:35, Jaegeuk Kim wrote:
> This tries to fix priority inversion in the below condition resulting in
> long checkpoint delay.
> 
> f2fs_get_node_info()
>   - nat_tree_lock
>    -> sleep to grab journal_rwsem by contention
> 
>                                       checkpoint
>                                       - waiting for nat_tree_lock
> 
> In order to let checkpoint go, let's release nat_tree_lock, if there's a
> journal_rwsem contention.
> 
> Signed-off-by: Daeho Jeong <daehojeong@google.com>
> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> ---
>   Change log from v2:
>    - don't bother checkpoint
> 
>   fs/f2fs/node.c | 17 +++++++++++++----
>   1 file changed, 13 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
> index 0be9e2d7120e..b26642daa3d2 100644
> --- a/fs/f2fs/node.c
> +++ b/fs/f2fs/node.c
> @@ -552,7 +552,7 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
>   	int i;
>   
>   	ni->nid = nid;
> -
> +retry:
>   	/* Check nat cache */
>   	down_read(&nm_i->nat_tree_lock);
>   	e = __lookup_nat_cache(nm_i, nid);
> @@ -564,10 +564,19 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
>   		return 0;
>   	}
>   
> -	memset(&ne, 0, sizeof(struct f2fs_nat_entry));
> +	/*
> +	 * Check current segment summary by trying to grab journal_rwsem first.
> +	 * This sem is on the critical path on the checkpoint requiring the above
> +	 * nat_tree_lock. Therefore, we should retry, if we failed to grab here
> +	 * while not bothering checkpoint.
> +	 */
> +	if (rwsem_is_locked(&sbi->cp_global_sem)) {

You mean: if (!rwsem_is_locked()) ?

IMO, once CP is processing, all readers who needs read nat will be blocked...

Thanks,

> +		down_read(&curseg->journal_rwsem);
> +	} else if (!down_read_trylock(&curseg->journal_rwsem)) {
> +		up_read(&nm_i->nat_tree_lock);
> +		goto retry;
> +	}
>   
> -	/* Check current segment summary */
> -	down_read(&curseg->journal_rwsem);
>   	i = f2fs_lookup_journal_in_cursum(journal, NAT_JOURNAL, nid, 0);
>   	if (i >= 0) {
>   		ne = nat_in_journal(journal, i);
> 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [f2fs-dev] [PATCH v3] f2fs: don't sleep while grabing nat_tree_lock
@ 2021-07-23  2:41       ` Chao Yu
  0 siblings, 0 replies; 16+ messages in thread
From: Chao Yu @ 2021-07-23  2:41 UTC (permalink / raw)
  To: Jaegeuk Kim, linux-kernel, linux-f2fs-devel

On 2021/7/23 10:35, Jaegeuk Kim wrote:
> This tries to fix priority inversion in the below condition resulting in
> long checkpoint delay.
> 
> f2fs_get_node_info()
>   - nat_tree_lock
>    -> sleep to grab journal_rwsem by contention
> 
>                                       checkpoint
>                                       - waiting for nat_tree_lock
> 
> In order to let checkpoint go, let's release nat_tree_lock, if there's a
> journal_rwsem contention.
> 
> Signed-off-by: Daeho Jeong <daehojeong@google.com>
> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> ---
>   Change log from v2:
>    - don't bother checkpoint
> 
>   fs/f2fs/node.c | 17 +++++++++++++----
>   1 file changed, 13 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
> index 0be9e2d7120e..b26642daa3d2 100644
> --- a/fs/f2fs/node.c
> +++ b/fs/f2fs/node.c
> @@ -552,7 +552,7 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
>   	int i;
>   
>   	ni->nid = nid;
> -
> +retry:
>   	/* Check nat cache */
>   	down_read(&nm_i->nat_tree_lock);
>   	e = __lookup_nat_cache(nm_i, nid);
> @@ -564,10 +564,19 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
>   		return 0;
>   	}
>   
> -	memset(&ne, 0, sizeof(struct f2fs_nat_entry));
> +	/*
> +	 * Check current segment summary by trying to grab journal_rwsem first.
> +	 * This sem is on the critical path on the checkpoint requiring the above
> +	 * nat_tree_lock. Therefore, we should retry, if we failed to grab here
> +	 * while not bothering checkpoint.
> +	 */
> +	if (rwsem_is_locked(&sbi->cp_global_sem)) {

You mean: if (!rwsem_is_locked()) ?

IMO, once CP is processing, all readers who needs read nat will be blocked...

Thanks,

> +		down_read(&curseg->journal_rwsem);
> +	} else if (!down_read_trylock(&curseg->journal_rwsem)) {
> +		up_read(&nm_i->nat_tree_lock);
> +		goto retry;
> +	}
>   
> -	/* Check current segment summary */
> -	down_read(&curseg->journal_rwsem);
>   	i = f2fs_lookup_journal_in_cursum(journal, NAT_JOURNAL, nid, 0);
>   	if (i >= 0) {
>   		ne = nat_in_journal(journal, i);
> 


_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [f2fs-dev] [PATCH v4] f2fs: don't sleep while grabing nat_tree_lock
  2021-07-23  2:35     ` [f2fs-dev] " Jaegeuk Kim
@ 2021-07-23  4:27       ` Jaegeuk Kim
  -1 siblings, 0 replies; 16+ messages in thread
From: Jaegeuk Kim @ 2021-07-23  4:27 UTC (permalink / raw)
  To: linux-kernel, linux-f2fs-devel

This tries to fix priority inversion in the below condition resulting in
long checkpoint delay.

f2fs_get_node_info()
 - nat_tree_lock
  -> sleep to grab journal_rwsem by contention

                                     checkpoint
                                     - waiting for nat_tree_lock

In order to let checkpoint go, let's release nat_tree_lock, if there's a
journal_rwsem contention.

Signed-off-by: Daeho Jeong <daehojeong@google.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---

 Change log from v3:
  - fix wrong condition check

 fs/f2fs/node.c | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 0be9e2d7120e..c945a9730f3c 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -552,7 +552,7 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
 	int i;
 
 	ni->nid = nid;
-
+retry:
 	/* Check nat cache */
 	down_read(&nm_i->nat_tree_lock);
 	e = __lookup_nat_cache(nm_i, nid);
@@ -564,10 +564,19 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
 		return 0;
 	}
 
-	memset(&ne, 0, sizeof(struct f2fs_nat_entry));
+	/*
+	 * Check current segment summary by trying to grab journal_rwsem first.
+	 * This sem is on the critical path on the checkpoint requiring the above
+	 * nat_tree_lock. Therefore, we should retry, if we failed to grab here
+	 * while not bothering checkpoint.
+	 */
+	if (!rwsem_is_locked(&sbi->cp_global_sem)) {
+		down_read(&curseg->journal_rwsem);
+	} else if (!down_read_trylock(&curseg->journal_rwsem)) {
+		up_read(&nm_i->nat_tree_lock);
+		goto retry;
+	}
 
-	/* Check current segment summary */
-	down_read(&curseg->journal_rwsem);
 	i = f2fs_lookup_journal_in_cursum(journal, NAT_JOURNAL, nid, 0);
 	if (i >= 0) {
 		ne = nat_in_journal(journal, i);
-- 
2.32.0.432.gabb21c7263-goog


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [f2fs-dev] [PATCH v4] f2fs: don't sleep while grabing nat_tree_lock
@ 2021-07-23  4:27       ` Jaegeuk Kim
  0 siblings, 0 replies; 16+ messages in thread
From: Jaegeuk Kim @ 2021-07-23  4:27 UTC (permalink / raw)
  To: linux-kernel, linux-f2fs-devel

This tries to fix priority inversion in the below condition resulting in
long checkpoint delay.

f2fs_get_node_info()
 - nat_tree_lock
  -> sleep to grab journal_rwsem by contention

                                     checkpoint
                                     - waiting for nat_tree_lock

In order to let checkpoint go, let's release nat_tree_lock, if there's a
journal_rwsem contention.

Signed-off-by: Daeho Jeong <daehojeong@google.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---

 Change log from v3:
  - fix wrong condition check

 fs/f2fs/node.c | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 0be9e2d7120e..c945a9730f3c 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -552,7 +552,7 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
 	int i;
 
 	ni->nid = nid;
-
+retry:
 	/* Check nat cache */
 	down_read(&nm_i->nat_tree_lock);
 	e = __lookup_nat_cache(nm_i, nid);
@@ -564,10 +564,19 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
 		return 0;
 	}
 
-	memset(&ne, 0, sizeof(struct f2fs_nat_entry));
+	/*
+	 * Check current segment summary by trying to grab journal_rwsem first.
+	 * This sem is on the critical path on the checkpoint requiring the above
+	 * nat_tree_lock. Therefore, we should retry, if we failed to grab here
+	 * while not bothering checkpoint.
+	 */
+	if (!rwsem_is_locked(&sbi->cp_global_sem)) {
+		down_read(&curseg->journal_rwsem);
+	} else if (!down_read_trylock(&curseg->journal_rwsem)) {
+		up_read(&nm_i->nat_tree_lock);
+		goto retry;
+	}
 
-	/* Check current segment summary */
-	down_read(&curseg->journal_rwsem);
 	i = f2fs_lookup_journal_in_cursum(journal, NAT_JOURNAL, nid, 0);
 	if (i >= 0) {
 		ne = nat_in_journal(journal, i);
-- 
2.32.0.432.gabb21c7263-goog



_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH] f2fs: use rwlock instead of rwsem for journal
@ 2021-07-23  1:25 kernel test robot
  0 siblings, 0 replies; 16+ messages in thread
From: kernel test robot @ 2021-07-23  1:25 UTC (permalink / raw)
  To: kbuild

[-- Attachment #1: Type: text/plain, Size: 8805 bytes --]

CC: kbuild-all(a)lists.01.org
In-Reply-To: <20210722014149.525166-1-jaegeuk@kernel.org>
References: <20210722014149.525166-1-jaegeuk@kernel.org>
TO: Jaegeuk Kim <jaegeuk@kernel.org>
TO: linux-kernel(a)vger.kernel.org
TO: linux-f2fs-devel(a)lists.sourceforge.net
CC: Jaegeuk Kim <jaegeuk@kernel.org>

Hi Jaegeuk,

I love your patch! Perhaps something to improve:

[auto build test WARNING on f2fs/dev-test]
[also build test WARNING on linux/master linus/master v5.14-rc2 next-20210722]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Jaegeuk-Kim/f2fs-use-rwlock-instead-of-rwsem-for-journal/20210722-094334
base:   https://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git dev-test
:::::: branch date: 24 hours ago
:::::: commit date: 24 hours ago
config: arm64-randconfig-s032-20210722 (attached as .config)
compiler: aarch64-linux-gcc (GCC) 10.3.0
reproduce:
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # apt-get install sparse
        # sparse version: v0.6.3-341-g8af24329-dirty
        # https://github.com/0day-ci/linux/commit/d6b325e856f8fd166ea9722d4489f897cf468edf
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Jaegeuk-Kim/f2fs-use-rwlock-instead-of-rwsem-for-journal/20210722-094334
        git checkout d6b325e856f8fd166ea9722d4489f897cf468edf
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-10.3.0 make.cross C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' O=build_dir ARCH=arm64 SHELL=/bin/bash fs/f2fs/

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>


sparse warnings: (new ones prefixed by >>)
>> fs/f2fs/node.c:2956:9: sparse: sparse: context imbalance in '__flush_nat_entry_set' - different lock contexts for basic block
--
   fs/f2fs/segment.c: note: in included file (through include/linux/irqflags.h, include/linux/spinlock.h, include/linux/wait.h, ...):
   arch/arm64/include/asm/percpu.h:126:1: sparse: sparse: cast truncates bits from constant value (ffffffff becomes ff)
   arch/arm64/include/asm/percpu.h:126:1: sparse: sparse: cast truncates bits from constant value (ffffffff becomes ffff)
   arch/arm64/include/asm/percpu.h:126:1: sparse: sparse: cast truncates bits from constant value (ffffffff becomes ff)
   arch/arm64/include/asm/percpu.h:126:1: sparse: sparse: cast truncates bits from constant value (ffffffff becomes ffff)
>> fs/f2fs/segment.c:4258:17: sparse: sparse: context imbalance in 'f2fs_flush_sit_entries' - different lock contexts for basic block

vim +/__flush_nat_entry_set +2956 fs/f2fs/node.c

22ad0b6ab46683 Jaegeuk Kim  2017-02-09  2887  
edc55aaf0d1712 Jaegeuk Kim  2018-09-17  2888  static int __flush_nat_entry_set(struct f2fs_sb_info *sbi,
22ad0b6ab46683 Jaegeuk Kim  2017-02-09  2889  		struct nat_entry_set *set, struct cp_control *cpc)
309cc2b6e7ae66 Jaegeuk Kim  2014-09-22  2890  {
309cc2b6e7ae66 Jaegeuk Kim  2014-09-22  2891  	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
b7ad7512b84b26 Chao Yu      2016-02-19  2892  	struct f2fs_journal *journal = curseg->journal;
309cc2b6e7ae66 Jaegeuk Kim  2014-09-22  2893  	nid_t start_nid = set->set * NAT_ENTRY_PER_BLOCK;
309cc2b6e7ae66 Jaegeuk Kim  2014-09-22  2894  	bool to_journal = true;
309cc2b6e7ae66 Jaegeuk Kim  2014-09-22  2895  	struct f2fs_nat_block *nat_blk;
309cc2b6e7ae66 Jaegeuk Kim  2014-09-22  2896  	struct nat_entry *ne, *cur;
309cc2b6e7ae66 Jaegeuk Kim  2014-09-22  2897  	struct page *page = NULL;
e05df3b115e730 Jaegeuk Kim  2012-11-02  2898  
e05df3b115e730 Jaegeuk Kim  2012-11-02  2899  	/*
aec71382c68135 Chao Yu      2014-06-24  2900  	 * there are two steps to flush nat entries:
aec71382c68135 Chao Yu      2014-06-24  2901  	 * #1, flush nat entries to journal in current hot data summary block.
aec71382c68135 Chao Yu      2014-06-24  2902  	 * #2, flush nat entries to nat page.
e05df3b115e730 Jaegeuk Kim  2012-11-02  2903  	 */
22ad0b6ab46683 Jaegeuk Kim  2017-02-09  2904  	if (enabled_nat_bits(sbi, cpc) ||
22ad0b6ab46683 Jaegeuk Kim  2017-02-09  2905  		!__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL))
aec71382c68135 Chao Yu      2014-06-24  2906  		to_journal = false;
aec71382c68135 Chao Yu      2014-06-24  2907  
aec71382c68135 Chao Yu      2014-06-24  2908  	if (to_journal) {
d6b325e856f8fd Jaegeuk Kim  2021-07-21  2909  		write_lock(&curseg->journal_rwlock);
aec71382c68135 Chao Yu      2014-06-24  2910  	} else {
e05df3b115e730 Jaegeuk Kim  2012-11-02  2911  		page = get_next_nat_page(sbi, start_nid);
edc55aaf0d1712 Jaegeuk Kim  2018-09-17  2912  		if (IS_ERR(page))
edc55aaf0d1712 Jaegeuk Kim  2018-09-17  2913  			return PTR_ERR(page);
edc55aaf0d1712 Jaegeuk Kim  2018-09-17  2914  
e05df3b115e730 Jaegeuk Kim  2012-11-02  2915  		nat_blk = page_address(page);
9850cf4a890888 Jaegeuk Kim  2014-09-02  2916  		f2fs_bug_on(sbi, !nat_blk);
e05df3b115e730 Jaegeuk Kim  2012-11-02  2917  	}
e05df3b115e730 Jaegeuk Kim  2012-11-02  2918  
aec71382c68135 Chao Yu      2014-06-24  2919  	/* flush dirty nats in nat entry set */
309cc2b6e7ae66 Jaegeuk Kim  2014-09-22  2920  	list_for_each_entry_safe(ne, cur, &set->entry_list, list) {
aec71382c68135 Chao Yu      2014-06-24  2921  		struct f2fs_nat_entry *raw_ne;
aec71382c68135 Chao Yu      2014-06-24  2922  		nid_t nid = nat_get_nid(ne);
aec71382c68135 Chao Yu      2014-06-24  2923  		int offset;
e05df3b115e730 Jaegeuk Kim  2012-11-02  2924  
febeca6d375531 Chao Yu      2017-06-05  2925  		f2fs_bug_on(sbi, nat_get_blkaddr(ne) == NEW_ADDR);
309cc2b6e7ae66 Jaegeuk Kim  2014-09-22  2926  
aec71382c68135 Chao Yu      2014-06-24  2927  		if (to_journal) {
4d57b86dd86404 Chao Yu      2018-05-30  2928  			offset = f2fs_lookup_journal_in_cursum(journal,
aec71382c68135 Chao Yu      2014-06-24  2929  							NAT_JOURNAL, nid, 1);
9850cf4a890888 Jaegeuk Kim  2014-09-02  2930  			f2fs_bug_on(sbi, offset < 0);
dfc08a12e49a64 Chao Yu      2016-02-14  2931  			raw_ne = &nat_in_journal(journal, offset);
dfc08a12e49a64 Chao Yu      2016-02-14  2932  			nid_in_journal(journal, offset) = cpu_to_le32(nid);
aec71382c68135 Chao Yu      2014-06-24  2933  		} else {
aec71382c68135 Chao Yu      2014-06-24  2934  			raw_ne = &nat_blk->entries[nid - start_nid];
e05df3b115e730 Jaegeuk Kim  2012-11-02  2935  		}
aec71382c68135 Chao Yu      2014-06-24  2936  		raw_nat_from_node_info(raw_ne, &ne->ni);
88bd02c9472a16 Jaegeuk Kim  2014-09-15  2937  		nat_reset_flag(ne);
0b28b71e298fb7 Kinglong Mee 2017-02-28  2938  		__clear_nat_cache_dirty(NM_I(sbi), set, ne);
04d47e673863c6 Chao Yu      2016-11-17  2939  		if (nat_get_blkaddr(ne) == NULL_ADDR) {
5921aaa185908c LiFan        2017-11-22  2940  			add_free_nid(sbi, nid, false, true);
4ac912427c4214 Chao Yu      2017-02-23  2941  		} else {
4ac912427c4214 Chao Yu      2017-02-23  2942  			spin_lock(&NM_I(sbi)->nid_list_lock);
346fe752c431bc Chao Yu      2017-03-13  2943  			update_free_nid_bitmap(sbi, nid, false, false);
04d47e673863c6 Chao Yu      2016-11-17  2944  			spin_unlock(&NM_I(sbi)->nid_list_lock);
04d47e673863c6 Chao Yu      2016-11-17  2945  		}
e05df3b115e730 Jaegeuk Kim  2012-11-02  2946  	}
aec71382c68135 Chao Yu      2014-06-24  2947  
22ad0b6ab46683 Jaegeuk Kim  2017-02-09  2948  	if (to_journal) {
d6b325e856f8fd Jaegeuk Kim  2021-07-21  2949  		write_unlock(&curseg->journal_rwlock);
22ad0b6ab46683 Jaegeuk Kim  2017-02-09  2950  	} else {
22ad0b6ab46683 Jaegeuk Kim  2017-02-09  2951  		__update_nat_bits(sbi, start_nid, page);
e05df3b115e730 Jaegeuk Kim  2012-11-02  2952  		f2fs_put_page(page, 1);
22ad0b6ab46683 Jaegeuk Kim  2017-02-09  2953  	}
aec71382c68135 Chao Yu      2014-06-24  2954  
59c9081bc86ef0 Yunlei He    2017-03-13  2955  	/* Allow dirty nats by node block allocation in write_begin */
59c9081bc86ef0 Yunlei He    2017-03-13 @2956  	if (!set->entry_cnt) {
309cc2b6e7ae66 Jaegeuk Kim  2014-09-22  2957  		radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set);
309cc2b6e7ae66 Jaegeuk Kim  2014-09-22  2958  		kmem_cache_free(nat_entry_set_slab, set);
309cc2b6e7ae66 Jaegeuk Kim  2014-09-22  2959  	}
edc55aaf0d1712 Jaegeuk Kim  2018-09-17  2960  	return 0;
59c9081bc86ef0 Yunlei He    2017-03-13  2961  }
309cc2b6e7ae66 Jaegeuk Kim  2014-09-22  2962  

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org

[-- Attachment #2: config.gz --]
[-- Type: application/gzip, Size: 32586 bytes --]

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2021-07-23  4:27 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-22  1:41 [PATCH] f2fs: use rwlock instead of rwsem for journal Jaegeuk Kim
2021-07-22  1:41 ` [f2fs-dev] " Jaegeuk Kim
2021-07-22  3:06 ` Hillf Danton
2021-07-22 13:41 ` Chao Yu
2021-07-22 13:41   ` Chao Yu
2021-07-22 17:44 ` [PATCH v2] f2fs: don't sleep while grabing nat_tree_lock Jaegeuk Kim
2021-07-22 17:44   ` [f2fs-dev] " Jaegeuk Kim
2021-07-23  0:39   ` Chao Yu
2021-07-23  0:39     ` Chao Yu
2021-07-23  2:35   ` [PATCH v3] " Jaegeuk Kim
2021-07-23  2:35     ` [f2fs-dev] " Jaegeuk Kim
2021-07-23  2:41     ` Chao Yu
2021-07-23  2:41       ` Chao Yu
2021-07-23  4:27     ` [f2fs-dev] [PATCH v4] " Jaegeuk Kim
2021-07-23  4:27       ` Jaegeuk Kim
2021-07-23  1:25 [PATCH] f2fs: use rwlock instead of rwsem for journal kernel test robot

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.