* [PATCH] f2fs: use rwlock instead of rwsem for journal
@ 2021-07-22 1:41 ` Jaegeuk Kim
0 siblings, 0 replies; 16+ messages in thread
From: Jaegeuk Kim @ 2021-07-22 1:41 UTC (permalink / raw)
To: linux-kernel, linux-f2fs-devel; +Cc: Jaegeuk Kim
This tries to fix priority inversion in the below condition resulting in
long checkpoint delay.
f2fs_get_node_info()
- nat_tree_lock
-> sleep in journal_rwsem
checkpoint
- waiting for nat_tree_lock
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
fs/f2fs/node.c | 16 ++++++++--------
fs/f2fs/segment.c | 22 +++++++++++-----------
fs/f2fs/segment.h | 2 +-
3 files changed, 20 insertions(+), 20 deletions(-)
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 0be9e2d7120e..821a40e02fb4 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -567,13 +567,13 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
memset(&ne, 0, sizeof(struct f2fs_nat_entry));
/* Check current segment summary */
- down_read(&curseg->journal_rwsem);
+ read_lock(&curseg->journal_rwlock);
i = f2fs_lookup_journal_in_cursum(journal, NAT_JOURNAL, nid, 0);
if (i >= 0) {
ne = nat_in_journal(journal, i);
node_info_from_raw_nat(ni, &ne);
}
- up_read(&curseg->journal_rwsem);
+ read_unlock(&curseg->journal_rwlock);
if (i >= 0) {
up_read(&nm_i->nat_tree_lock);
goto cache;
@@ -2338,7 +2338,7 @@ static void scan_curseg_cache(struct f2fs_sb_info *sbi)
struct f2fs_journal *journal = curseg->journal;
int i;
- down_read(&curseg->journal_rwsem);
+ read_lock(&curseg->journal_rwlock);
for (i = 0; i < nats_in_cursum(journal); i++) {
block_t addr;
nid_t nid;
@@ -2350,7 +2350,7 @@ static void scan_curseg_cache(struct f2fs_sb_info *sbi)
else
remove_free_nid(sbi, nid);
}
- up_read(&curseg->journal_rwsem);
+ read_unlock(&curseg->journal_rwlock);
}
static void scan_free_nid_bits(struct f2fs_sb_info *sbi)
@@ -2799,7 +2799,7 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
struct f2fs_journal *journal = curseg->journal;
int i;
- down_write(&curseg->journal_rwsem);
+ write_lock(&curseg->journal_rwlock);
for (i = 0; i < nats_in_cursum(journal); i++) {
struct nat_entry *ne;
struct f2fs_nat_entry raw_ne;
@@ -2831,7 +2831,7 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
__set_nat_cache_dirty(nm_i, ne);
}
update_nats_in_cursum(journal, -i);
- up_write(&curseg->journal_rwsem);
+ write_unlock(&curseg->journal_rwlock);
}
static void __adjust_nat_entry_set(struct nat_entry_set *nes,
@@ -2906,7 +2906,7 @@ static int __flush_nat_entry_set(struct f2fs_sb_info *sbi,
to_journal = false;
if (to_journal) {
- down_write(&curseg->journal_rwsem);
+ write_lock(&curseg->journal_rwlock);
} else {
page = get_next_nat_page(sbi, start_nid);
if (IS_ERR(page))
@@ -2946,7 +2946,7 @@ static int __flush_nat_entry_set(struct f2fs_sb_info *sbi,
}
if (to_journal) {
- up_write(&curseg->journal_rwsem);
+ write_unlock(&curseg->journal_rwlock);
} else {
__update_nat_bits(sbi, start_nid, page);
f2fs_put_page(page, 1);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index f9b7fb785e1d..c397c1c271ec 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -2442,9 +2442,9 @@ static void write_current_sum_page(struct f2fs_sb_info *sbi,
mutex_lock(&curseg->curseg_mutex);
- down_read(&curseg->journal_rwsem);
+ read_lock(&curseg->journal_rwlock);
memcpy(&dst->journal, curseg->journal, SUM_JOURNAL_SIZE);
- up_read(&curseg->journal_rwsem);
+ read_unlock(&curseg->journal_rwlock);
memcpy(dst->entries, src->entries, SUM_ENTRY_SIZE);
memcpy(&dst->footer, &src->footer, SUM_FOOTER_SIZE);
@@ -3876,9 +3876,9 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
mutex_lock(&curseg->curseg_mutex);
/* update journal info */
- down_write(&curseg->journal_rwsem);
+ write_lock(&curseg->journal_rwlock);
memcpy(curseg->journal, &sum->journal, SUM_JOURNAL_SIZE);
- up_write(&curseg->journal_rwsem);
+ write_unlock(&curseg->journal_rwlock);
memcpy(curseg->sum_blk->entries, sum->entries, SUM_ENTRY_SIZE);
memcpy(&curseg->sum_blk->footer, &sum->footer, SUM_FOOTER_SIZE);
@@ -4136,7 +4136,7 @@ static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
struct f2fs_journal *journal = curseg->journal;
int i;
- down_write(&curseg->journal_rwsem);
+ write_lock(&curseg->journal_rwlock);
for (i = 0; i < sits_in_cursum(journal); i++) {
unsigned int segno;
bool dirtied;
@@ -4148,7 +4148,7 @@ static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
add_sit_entry(segno, &SM_I(sbi)->sit_entry_set);
}
update_sits_in_cursum(journal, -i);
- up_write(&curseg->journal_rwsem);
+ write_unlock(&curseg->journal_rwlock);
}
/*
@@ -4204,7 +4204,7 @@ void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
to_journal = false;
if (to_journal) {
- down_write(&curseg->journal_rwsem);
+ write_lock(&curseg->journal_rwlock);
} else {
page = get_next_sit_page(sbi, start_segno);
raw_sit = page_address(page);
@@ -4251,7 +4251,7 @@ void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
}
if (to_journal)
- up_write(&curseg->journal_rwsem);
+ write_unlock(&curseg->journal_rwlock);
else
f2fs_put_page(page, 1);
@@ -4432,7 +4432,7 @@ static int build_curseg(struct f2fs_sb_info *sbi)
array[i].sum_blk = f2fs_kzalloc(sbi, PAGE_SIZE, GFP_KERNEL);
if (!array[i].sum_blk)
return -ENOMEM;
- init_rwsem(&array[i].journal_rwsem);
+ rwlock_init(&array[i].journal_rwlock);
array[i].journal = f2fs_kzalloc(sbi,
sizeof(struct f2fs_journal), GFP_KERNEL);
if (!array[i].journal)
@@ -4509,7 +4509,7 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
start_blk += readed;
} while (start_blk < sit_blk_cnt);
- down_read(&curseg->journal_rwsem);
+ read_lock(&curseg->journal_rwlock);
for (i = 0; i < sits_in_cursum(journal); i++) {
unsigned int old_valid_blocks;
@@ -4551,7 +4551,7 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
old_valid_blocks;
}
}
- up_read(&curseg->journal_rwsem);
+ read_unlock(&curseg->journal_rwlock);
if (!err && total_node_blocks != valid_node_count(sbi)) {
f2fs_err(sbi, "SIT is corrupted node# %u vs %u",
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 050230c70a53..afb9041dc2e6 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -306,7 +306,7 @@ struct victim_selection {
struct curseg_info {
struct mutex curseg_mutex; /* lock for consistency */
struct f2fs_summary_block *sum_blk; /* cached summary block */
- struct rw_semaphore journal_rwsem; /* protect journal area */
+ rwlock_t journal_rwlock; /* protect journal area */
struct f2fs_journal *journal; /* cached journal info */
unsigned char alloc_type; /* current allocation type */
unsigned short seg_type; /* segment type like CURSEG_XXX_TYPE */
--
2.32.0.402.g57bb445576-goog
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [f2fs-dev] [PATCH] f2fs: use rwlock instead of rwsem for journal
@ 2021-07-22 1:41 ` Jaegeuk Kim
0 siblings, 0 replies; 16+ messages in thread
From: Jaegeuk Kim @ 2021-07-22 1:41 UTC (permalink / raw)
To: linux-kernel, linux-f2fs-devel; +Cc: Jaegeuk Kim
This tries to fix priority inversion in the below condition resulting in
long checkpoint delay.
f2fs_get_node_info()
- nat_tree_lock
-> sleep in journal_rwsem
checkpoint
- waiting for nat_tree_lock
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
fs/f2fs/node.c | 16 ++++++++--------
fs/f2fs/segment.c | 22 +++++++++++-----------
fs/f2fs/segment.h | 2 +-
3 files changed, 20 insertions(+), 20 deletions(-)
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 0be9e2d7120e..821a40e02fb4 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -567,13 +567,13 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
memset(&ne, 0, sizeof(struct f2fs_nat_entry));
/* Check current segment summary */
- down_read(&curseg->journal_rwsem);
+ read_lock(&curseg->journal_rwlock);
i = f2fs_lookup_journal_in_cursum(journal, NAT_JOURNAL, nid, 0);
if (i >= 0) {
ne = nat_in_journal(journal, i);
node_info_from_raw_nat(ni, &ne);
}
- up_read(&curseg->journal_rwsem);
+ read_unlock(&curseg->journal_rwlock);
if (i >= 0) {
up_read(&nm_i->nat_tree_lock);
goto cache;
@@ -2338,7 +2338,7 @@ static void scan_curseg_cache(struct f2fs_sb_info *sbi)
struct f2fs_journal *journal = curseg->journal;
int i;
- down_read(&curseg->journal_rwsem);
+ read_lock(&curseg->journal_rwlock);
for (i = 0; i < nats_in_cursum(journal); i++) {
block_t addr;
nid_t nid;
@@ -2350,7 +2350,7 @@ static void scan_curseg_cache(struct f2fs_sb_info *sbi)
else
remove_free_nid(sbi, nid);
}
- up_read(&curseg->journal_rwsem);
+ read_unlock(&curseg->journal_rwlock);
}
static void scan_free_nid_bits(struct f2fs_sb_info *sbi)
@@ -2799,7 +2799,7 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
struct f2fs_journal *journal = curseg->journal;
int i;
- down_write(&curseg->journal_rwsem);
+ write_lock(&curseg->journal_rwlock);
for (i = 0; i < nats_in_cursum(journal); i++) {
struct nat_entry *ne;
struct f2fs_nat_entry raw_ne;
@@ -2831,7 +2831,7 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
__set_nat_cache_dirty(nm_i, ne);
}
update_nats_in_cursum(journal, -i);
- up_write(&curseg->journal_rwsem);
+ write_unlock(&curseg->journal_rwlock);
}
static void __adjust_nat_entry_set(struct nat_entry_set *nes,
@@ -2906,7 +2906,7 @@ static int __flush_nat_entry_set(struct f2fs_sb_info *sbi,
to_journal = false;
if (to_journal) {
- down_write(&curseg->journal_rwsem);
+ write_lock(&curseg->journal_rwlock);
} else {
page = get_next_nat_page(sbi, start_nid);
if (IS_ERR(page))
@@ -2946,7 +2946,7 @@ static int __flush_nat_entry_set(struct f2fs_sb_info *sbi,
}
if (to_journal) {
- up_write(&curseg->journal_rwsem);
+ write_unlock(&curseg->journal_rwlock);
} else {
__update_nat_bits(sbi, start_nid, page);
f2fs_put_page(page, 1);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index f9b7fb785e1d..c397c1c271ec 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -2442,9 +2442,9 @@ static void write_current_sum_page(struct f2fs_sb_info *sbi,
mutex_lock(&curseg->curseg_mutex);
- down_read(&curseg->journal_rwsem);
+ read_lock(&curseg->journal_rwlock);
memcpy(&dst->journal, curseg->journal, SUM_JOURNAL_SIZE);
- up_read(&curseg->journal_rwsem);
+ read_unlock(&curseg->journal_rwlock);
memcpy(dst->entries, src->entries, SUM_ENTRY_SIZE);
memcpy(&dst->footer, &src->footer, SUM_FOOTER_SIZE);
@@ -3876,9 +3876,9 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
mutex_lock(&curseg->curseg_mutex);
/* update journal info */
- down_write(&curseg->journal_rwsem);
+ write_lock(&curseg->journal_rwlock);
memcpy(curseg->journal, &sum->journal, SUM_JOURNAL_SIZE);
- up_write(&curseg->journal_rwsem);
+ write_unlock(&curseg->journal_rwlock);
memcpy(curseg->sum_blk->entries, sum->entries, SUM_ENTRY_SIZE);
memcpy(&curseg->sum_blk->footer, &sum->footer, SUM_FOOTER_SIZE);
@@ -4136,7 +4136,7 @@ static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
struct f2fs_journal *journal = curseg->journal;
int i;
- down_write(&curseg->journal_rwsem);
+ write_lock(&curseg->journal_rwlock);
for (i = 0; i < sits_in_cursum(journal); i++) {
unsigned int segno;
bool dirtied;
@@ -4148,7 +4148,7 @@ static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
add_sit_entry(segno, &SM_I(sbi)->sit_entry_set);
}
update_sits_in_cursum(journal, -i);
- up_write(&curseg->journal_rwsem);
+ write_unlock(&curseg->journal_rwlock);
}
/*
@@ -4204,7 +4204,7 @@ void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
to_journal = false;
if (to_journal) {
- down_write(&curseg->journal_rwsem);
+ write_lock(&curseg->journal_rwlock);
} else {
page = get_next_sit_page(sbi, start_segno);
raw_sit = page_address(page);
@@ -4251,7 +4251,7 @@ void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
}
if (to_journal)
- up_write(&curseg->journal_rwsem);
+ write_unlock(&curseg->journal_rwlock);
else
f2fs_put_page(page, 1);
@@ -4432,7 +4432,7 @@ static int build_curseg(struct f2fs_sb_info *sbi)
array[i].sum_blk = f2fs_kzalloc(sbi, PAGE_SIZE, GFP_KERNEL);
if (!array[i].sum_blk)
return -ENOMEM;
- init_rwsem(&array[i].journal_rwsem);
+ rwlock_init(&array[i].journal_rwlock);
array[i].journal = f2fs_kzalloc(sbi,
sizeof(struct f2fs_journal), GFP_KERNEL);
if (!array[i].journal)
@@ -4509,7 +4509,7 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
start_blk += readed;
} while (start_blk < sit_blk_cnt);
- down_read(&curseg->journal_rwsem);
+ read_lock(&curseg->journal_rwlock);
for (i = 0; i < sits_in_cursum(journal); i++) {
unsigned int old_valid_blocks;
@@ -4551,7 +4551,7 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
old_valid_blocks;
}
}
- up_read(&curseg->journal_rwsem);
+ read_unlock(&curseg->journal_rwlock);
if (!err && total_node_blocks != valid_node_count(sbi)) {
f2fs_err(sbi, "SIT is corrupted node# %u vs %u",
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 050230c70a53..afb9041dc2e6 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -306,7 +306,7 @@ struct victim_selection {
struct curseg_info {
struct mutex curseg_mutex; /* lock for consistency */
struct f2fs_summary_block *sum_blk; /* cached summary block */
- struct rw_semaphore journal_rwsem; /* protect journal area */
+ rwlock_t journal_rwlock; /* protect journal area */
struct f2fs_journal *journal; /* cached journal info */
unsigned char alloc_type; /* current allocation type */
unsigned short seg_type; /* segment type like CURSEG_XXX_TYPE */
--
2.32.0.402.g57bb445576-goog
_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
^ permalink raw reply related [flat|nested] 16+ messages in thread
* Re: [f2fs-dev] [PATCH] f2fs: use rwlock instead of rwsem for journal
2021-07-22 1:41 ` [f2fs-dev] " Jaegeuk Kim
(?)
@ 2021-07-22 3:06 ` Hillf Danton
-1 siblings, 0 replies; 16+ messages in thread
From: Hillf Danton @ 2021-07-22 3:06 UTC (permalink / raw)
To: Jaegeuk Kim; +Cc: linux-kernel, linux-f2fs-devel
On Wed, 21 Jul 2021 18:41:49 -0700 Jaegeuk Kim wrote:
>
>This tries to fix priority inversion in the below condition resulting in
>long checkpoint delay.
If you are right and the patch makes real sense, feel free to specify a bit
more on the priority inversion you are trying to fix, and how the fix helps
shorten the checkpoint delay. And test data is welcome.
>
>f2fs_get_node_info()
> - nat_tree_lock
> -> sleep in journal_rwsem
> checkpoint
> - waiting for nat_tree_lock
_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [f2fs-dev] [PATCH] f2fs: use rwlock instead of rwsem for journal
2021-07-22 1:41 ` [f2fs-dev] " Jaegeuk Kim
@ 2021-07-22 13:41 ` Chao Yu
-1 siblings, 0 replies; 16+ messages in thread
From: Chao Yu @ 2021-07-22 13:41 UTC (permalink / raw)
To: Jaegeuk Kim, linux-kernel, linux-f2fs-devel
On 2021/7/22 9:41, Jaegeuk Kim wrote:
> This tries to fix priority inversion in the below condition resulting in
> long checkpoint delay.
>
> f2fs_get_node_info()
> - nat_tree_lock
> -> sleep in journal_rwsem
> checkpoint
> - waiting for nat_tree_lock
>
> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> ---
> fs/f2fs/node.c | 16 ++++++++--------
> fs/f2fs/segment.c | 22 +++++++++++-----------
> fs/f2fs/segment.h | 2 +-
> 3 files changed, 20 insertions(+), 20 deletions(-)
>
> diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
> index 0be9e2d7120e..821a40e02fb4 100644
> --- a/fs/f2fs/node.c
> +++ b/fs/f2fs/node.c
> @@ -567,13 +567,13 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
> memset(&ne, 0, sizeof(struct f2fs_nat_entry));
>
> /* Check current segment summary */
> - down_read(&curseg->journal_rwsem);
> + read_lock(&curseg->journal_rwlock);
It seem journal_rwsem covers too many operations, e.g.
- scan_curseg_cache
- add_free_nid
- f2fs_kmem_cache_alloc
- kmem_cache_alloc(__GFP_NOFAIL)
Thanks,
> i = f2fs_lookup_journal_in_cursum(journal, NAT_JOURNAL, nid, 0);
> if (i >= 0) {
> ne = nat_in_journal(journal, i);
> node_info_from_raw_nat(ni, &ne);
> }
> - up_read(&curseg->journal_rwsem);
> + read_unlock(&curseg->journal_rwlock);
> if (i >= 0) {
> up_read(&nm_i->nat_tree_lock);
> goto cache;
> @@ -2338,7 +2338,7 @@ static void scan_curseg_cache(struct f2fs_sb_info *sbi)
> struct f2fs_journal *journal = curseg->journal;
> int i;
>
> - down_read(&curseg->journal_rwsem);
> + read_lock(&curseg->journal_rwlock);
> for (i = 0; i < nats_in_cursum(journal); i++) {
> block_t addr;
> nid_t nid;
> @@ -2350,7 +2350,7 @@ static void scan_curseg_cache(struct f2fs_sb_info *sbi)
> else
> remove_free_nid(sbi, nid);
> }
> - up_read(&curseg->journal_rwsem);
> + read_unlock(&curseg->journal_rwlock);
> }
>
> static void scan_free_nid_bits(struct f2fs_sb_info *sbi)
> @@ -2799,7 +2799,7 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
> struct f2fs_journal *journal = curseg->journal;
> int i;
>
> - down_write(&curseg->journal_rwsem);
> + write_lock(&curseg->journal_rwlock);
> for (i = 0; i < nats_in_cursum(journal); i++) {
> struct nat_entry *ne;
> struct f2fs_nat_entry raw_ne;
> @@ -2831,7 +2831,7 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
> __set_nat_cache_dirty(nm_i, ne);
> }
> update_nats_in_cursum(journal, -i);
> - up_write(&curseg->journal_rwsem);
> + write_unlock(&curseg->journal_rwlock);
> }
>
> static void __adjust_nat_entry_set(struct nat_entry_set *nes,
> @@ -2906,7 +2906,7 @@ static int __flush_nat_entry_set(struct f2fs_sb_info *sbi,
> to_journal = false;
>
> if (to_journal) {
> - down_write(&curseg->journal_rwsem);
> + write_lock(&curseg->journal_rwlock);
> } else {
> page = get_next_nat_page(sbi, start_nid);
> if (IS_ERR(page))
> @@ -2946,7 +2946,7 @@ static int __flush_nat_entry_set(struct f2fs_sb_info *sbi,
> }
>
> if (to_journal) {
> - up_write(&curseg->journal_rwsem);
> + write_unlock(&curseg->journal_rwlock);
> } else {
> __update_nat_bits(sbi, start_nid, page);
> f2fs_put_page(page, 1);
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index f9b7fb785e1d..c397c1c271ec 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -2442,9 +2442,9 @@ static void write_current_sum_page(struct f2fs_sb_info *sbi,
>
> mutex_lock(&curseg->curseg_mutex);
>
> - down_read(&curseg->journal_rwsem);
> + read_lock(&curseg->journal_rwlock);
> memcpy(&dst->journal, curseg->journal, SUM_JOURNAL_SIZE);
> - up_read(&curseg->journal_rwsem);
> + read_unlock(&curseg->journal_rwlock);
>
> memcpy(dst->entries, src->entries, SUM_ENTRY_SIZE);
> memcpy(&dst->footer, &src->footer, SUM_FOOTER_SIZE);
> @@ -3876,9 +3876,9 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
> mutex_lock(&curseg->curseg_mutex);
>
> /* update journal info */
> - down_write(&curseg->journal_rwsem);
> + write_lock(&curseg->journal_rwlock);
> memcpy(curseg->journal, &sum->journal, SUM_JOURNAL_SIZE);
> - up_write(&curseg->journal_rwsem);
> + write_unlock(&curseg->journal_rwlock);
>
> memcpy(curseg->sum_blk->entries, sum->entries, SUM_ENTRY_SIZE);
> memcpy(&curseg->sum_blk->footer, &sum->footer, SUM_FOOTER_SIZE);
> @@ -4136,7 +4136,7 @@ static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
> struct f2fs_journal *journal = curseg->journal;
> int i;
>
> - down_write(&curseg->journal_rwsem);
> + write_lock(&curseg->journal_rwlock);
> for (i = 0; i < sits_in_cursum(journal); i++) {
> unsigned int segno;
> bool dirtied;
> @@ -4148,7 +4148,7 @@ static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
> add_sit_entry(segno, &SM_I(sbi)->sit_entry_set);
> }
> update_sits_in_cursum(journal, -i);
> - up_write(&curseg->journal_rwsem);
> + write_unlock(&curseg->journal_rwlock);
> }
>
> /*
> @@ -4204,7 +4204,7 @@ void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
> to_journal = false;
>
> if (to_journal) {
> - down_write(&curseg->journal_rwsem);
> + write_lock(&curseg->journal_rwlock);
> } else {
> page = get_next_sit_page(sbi, start_segno);
> raw_sit = page_address(page);
> @@ -4251,7 +4251,7 @@ void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
> }
>
> if (to_journal)
> - up_write(&curseg->journal_rwsem);
> + write_unlock(&curseg->journal_rwlock);
> else
> f2fs_put_page(page, 1);
>
> @@ -4432,7 +4432,7 @@ static int build_curseg(struct f2fs_sb_info *sbi)
> array[i].sum_blk = f2fs_kzalloc(sbi, PAGE_SIZE, GFP_KERNEL);
> if (!array[i].sum_blk)
> return -ENOMEM;
> - init_rwsem(&array[i].journal_rwsem);
> + rwlock_init(&array[i].journal_rwlock);
> array[i].journal = f2fs_kzalloc(sbi,
> sizeof(struct f2fs_journal), GFP_KERNEL);
> if (!array[i].journal)
> @@ -4509,7 +4509,7 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
> start_blk += readed;
> } while (start_blk < sit_blk_cnt);
>
> - down_read(&curseg->journal_rwsem);
> + read_lock(&curseg->journal_rwlock);
> for (i = 0; i < sits_in_cursum(journal); i++) {
> unsigned int old_valid_blocks;
>
> @@ -4551,7 +4551,7 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
> old_valid_blocks;
> }
> }
> - up_read(&curseg->journal_rwsem);
> + read_unlock(&curseg->journal_rwlock);
>
> if (!err && total_node_blocks != valid_node_count(sbi)) {
> f2fs_err(sbi, "SIT is corrupted node# %u vs %u",
> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
> index 050230c70a53..afb9041dc2e6 100644
> --- a/fs/f2fs/segment.h
> +++ b/fs/f2fs/segment.h
> @@ -306,7 +306,7 @@ struct victim_selection {
> struct curseg_info {
> struct mutex curseg_mutex; /* lock for consistency */
> struct f2fs_summary_block *sum_blk; /* cached summary block */
> - struct rw_semaphore journal_rwsem; /* protect journal area */
> + rwlock_t journal_rwlock; /* protect journal area */
> struct f2fs_journal *journal; /* cached journal info */
> unsigned char alloc_type; /* current allocation type */
> unsigned short seg_type; /* segment type like CURSEG_XXX_TYPE */
>
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [f2fs-dev] [PATCH] f2fs: use rwlock instead of rwsem for journal
@ 2021-07-22 13:41 ` Chao Yu
0 siblings, 0 replies; 16+ messages in thread
From: Chao Yu @ 2021-07-22 13:41 UTC (permalink / raw)
To: Jaegeuk Kim, linux-kernel, linux-f2fs-devel
On 2021/7/22 9:41, Jaegeuk Kim wrote:
> This tries to fix priority inversion in the below condition resulting in
> long checkpoint delay.
>
> f2fs_get_node_info()
> - nat_tree_lock
> -> sleep in journal_rwsem
> checkpoint
> - waiting for nat_tree_lock
>
> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> ---
> fs/f2fs/node.c | 16 ++++++++--------
> fs/f2fs/segment.c | 22 +++++++++++-----------
> fs/f2fs/segment.h | 2 +-
> 3 files changed, 20 insertions(+), 20 deletions(-)
>
> diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
> index 0be9e2d7120e..821a40e02fb4 100644
> --- a/fs/f2fs/node.c
> +++ b/fs/f2fs/node.c
> @@ -567,13 +567,13 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
> memset(&ne, 0, sizeof(struct f2fs_nat_entry));
>
> /* Check current segment summary */
> - down_read(&curseg->journal_rwsem);
> + read_lock(&curseg->journal_rwlock);
It seem journal_rwsem covers too many operations, e.g.
- scan_curseg_cache
- add_free_nid
- f2fs_kmem_cache_alloc
- kmem_cache_alloc(__GFP_NOFAIL)
Thanks,
> i = f2fs_lookup_journal_in_cursum(journal, NAT_JOURNAL, nid, 0);
> if (i >= 0) {
> ne = nat_in_journal(journal, i);
> node_info_from_raw_nat(ni, &ne);
> }
> - up_read(&curseg->journal_rwsem);
> + read_unlock(&curseg->journal_rwlock);
> if (i >= 0) {
> up_read(&nm_i->nat_tree_lock);
> goto cache;
> @@ -2338,7 +2338,7 @@ static void scan_curseg_cache(struct f2fs_sb_info *sbi)
> struct f2fs_journal *journal = curseg->journal;
> int i;
>
> - down_read(&curseg->journal_rwsem);
> + read_lock(&curseg->journal_rwlock);
> for (i = 0; i < nats_in_cursum(journal); i++) {
> block_t addr;
> nid_t nid;
> @@ -2350,7 +2350,7 @@ static void scan_curseg_cache(struct f2fs_sb_info *sbi)
> else
> remove_free_nid(sbi, nid);
> }
> - up_read(&curseg->journal_rwsem);
> + read_unlock(&curseg->journal_rwlock);
> }
>
> static void scan_free_nid_bits(struct f2fs_sb_info *sbi)
> @@ -2799,7 +2799,7 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
> struct f2fs_journal *journal = curseg->journal;
> int i;
>
> - down_write(&curseg->journal_rwsem);
> + write_lock(&curseg->journal_rwlock);
> for (i = 0; i < nats_in_cursum(journal); i++) {
> struct nat_entry *ne;
> struct f2fs_nat_entry raw_ne;
> @@ -2831,7 +2831,7 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
> __set_nat_cache_dirty(nm_i, ne);
> }
> update_nats_in_cursum(journal, -i);
> - up_write(&curseg->journal_rwsem);
> + write_unlock(&curseg->journal_rwlock);
> }
>
> static void __adjust_nat_entry_set(struct nat_entry_set *nes,
> @@ -2906,7 +2906,7 @@ static int __flush_nat_entry_set(struct f2fs_sb_info *sbi,
> to_journal = false;
>
> if (to_journal) {
> - down_write(&curseg->journal_rwsem);
> + write_lock(&curseg->journal_rwlock);
> } else {
> page = get_next_nat_page(sbi, start_nid);
> if (IS_ERR(page))
> @@ -2946,7 +2946,7 @@ static int __flush_nat_entry_set(struct f2fs_sb_info *sbi,
> }
>
> if (to_journal) {
> - up_write(&curseg->journal_rwsem);
> + write_unlock(&curseg->journal_rwlock);
> } else {
> __update_nat_bits(sbi, start_nid, page);
> f2fs_put_page(page, 1);
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index f9b7fb785e1d..c397c1c271ec 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -2442,9 +2442,9 @@ static void write_current_sum_page(struct f2fs_sb_info *sbi,
>
> mutex_lock(&curseg->curseg_mutex);
>
> - down_read(&curseg->journal_rwsem);
> + read_lock(&curseg->journal_rwlock);
> memcpy(&dst->journal, curseg->journal, SUM_JOURNAL_SIZE);
> - up_read(&curseg->journal_rwsem);
> + read_unlock(&curseg->journal_rwlock);
>
> memcpy(dst->entries, src->entries, SUM_ENTRY_SIZE);
> memcpy(&dst->footer, &src->footer, SUM_FOOTER_SIZE);
> @@ -3876,9 +3876,9 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
> mutex_lock(&curseg->curseg_mutex);
>
> /* update journal info */
> - down_write(&curseg->journal_rwsem);
> + write_lock(&curseg->journal_rwlock);
> memcpy(curseg->journal, &sum->journal, SUM_JOURNAL_SIZE);
> - up_write(&curseg->journal_rwsem);
> + write_unlock(&curseg->journal_rwlock);
>
> memcpy(curseg->sum_blk->entries, sum->entries, SUM_ENTRY_SIZE);
> memcpy(&curseg->sum_blk->footer, &sum->footer, SUM_FOOTER_SIZE);
> @@ -4136,7 +4136,7 @@ static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
> struct f2fs_journal *journal = curseg->journal;
> int i;
>
> - down_write(&curseg->journal_rwsem);
> + write_lock(&curseg->journal_rwlock);
> for (i = 0; i < sits_in_cursum(journal); i++) {
> unsigned int segno;
> bool dirtied;
> @@ -4148,7 +4148,7 @@ static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
> add_sit_entry(segno, &SM_I(sbi)->sit_entry_set);
> }
> update_sits_in_cursum(journal, -i);
> - up_write(&curseg->journal_rwsem);
> + write_unlock(&curseg->journal_rwlock);
> }
>
> /*
> @@ -4204,7 +4204,7 @@ void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
> to_journal = false;
>
> if (to_journal) {
> - down_write(&curseg->journal_rwsem);
> + write_lock(&curseg->journal_rwlock);
> } else {
> page = get_next_sit_page(sbi, start_segno);
> raw_sit = page_address(page);
> @@ -4251,7 +4251,7 @@ void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
> }
>
> if (to_journal)
> - up_write(&curseg->journal_rwsem);
> + write_unlock(&curseg->journal_rwlock);
> else
> f2fs_put_page(page, 1);
>
> @@ -4432,7 +4432,7 @@ static int build_curseg(struct f2fs_sb_info *sbi)
> array[i].sum_blk = f2fs_kzalloc(sbi, PAGE_SIZE, GFP_KERNEL);
> if (!array[i].sum_blk)
> return -ENOMEM;
> - init_rwsem(&array[i].journal_rwsem);
> + rwlock_init(&array[i].journal_rwlock);
> array[i].journal = f2fs_kzalloc(sbi,
> sizeof(struct f2fs_journal), GFP_KERNEL);
> if (!array[i].journal)
> @@ -4509,7 +4509,7 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
> start_blk += readed;
> } while (start_blk < sit_blk_cnt);
>
> - down_read(&curseg->journal_rwsem);
> + read_lock(&curseg->journal_rwlock);
> for (i = 0; i < sits_in_cursum(journal); i++) {
> unsigned int old_valid_blocks;
>
> @@ -4551,7 +4551,7 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
> old_valid_blocks;
> }
> }
> - up_read(&curseg->journal_rwsem);
> + read_unlock(&curseg->journal_rwlock);
>
> if (!err && total_node_blocks != valid_node_count(sbi)) {
> f2fs_err(sbi, "SIT is corrupted node# %u vs %u",
> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
> index 050230c70a53..afb9041dc2e6 100644
> --- a/fs/f2fs/segment.h
> +++ b/fs/f2fs/segment.h
> @@ -306,7 +306,7 @@ struct victim_selection {
> struct curseg_info {
> struct mutex curseg_mutex; /* lock for consistency */
> struct f2fs_summary_block *sum_blk; /* cached summary block */
> - struct rw_semaphore journal_rwsem; /* protect journal area */
> + rwlock_t journal_rwlock; /* protect journal area */
> struct f2fs_journal *journal; /* cached journal info */
> unsigned char alloc_type; /* current allocation type */
> unsigned short seg_type; /* segment type like CURSEG_XXX_TYPE */
>
_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH v2] f2fs: don't sleep while grabing nat_tree_lock
2021-07-22 1:41 ` [f2fs-dev] " Jaegeuk Kim
@ 2021-07-22 17:44 ` Jaegeuk Kim
-1 siblings, 0 replies; 16+ messages in thread
From: Jaegeuk Kim @ 2021-07-22 17:44 UTC (permalink / raw)
To: linux-kernel, linux-f2fs-devel
This tries to fix priority inversion in the below condition resulting in
long checkpoint delay.
f2fs_get_node_info()
- nat_tree_lock
-> sleep to grab journal_rwsem by contention
checkpoint
- waiting for nat_tree_lock
In order to let checkpoint go, let's release nat_tree_lock, if there's a
journal_rwsem contention.
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
Change log from v1:
- drop rwlock but take retry logic to reduce lock-holding time
fs/f2fs/node.c | 14 ++++++++++----
1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 0be9e2d7120e..c60ba4179bb2 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -552,7 +552,7 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
int i;
ni->nid = nid;
-
+retry:
/* Check nat cache */
down_read(&nm_i->nat_tree_lock);
e = __lookup_nat_cache(nm_i, nid);
@@ -564,10 +564,16 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
return 0;
}
- memset(&ne, 0, sizeof(struct f2fs_nat_entry));
+ /*
+ * Check current segment summary by trying to grab journal_rwsem first.
+ * This sem is on the critical path on the checkpoint requiring the above
+ * nat_tree_lock. Therefore, we should retry, if we failed to grab here.
+ */
+ if (!down_read_trylock(&curseg->journal_rwsem)) {
+ up_read(&nm_i->nat_tree_lock);
+ goto retry;
+ }
- /* Check current segment summary */
- down_read(&curseg->journal_rwsem);
i = f2fs_lookup_journal_in_cursum(journal, NAT_JOURNAL, nid, 0);
if (i >= 0) {
ne = nat_in_journal(journal, i);
--
2.32.0.432.gabb21c7263-goog
^ permalink raw reply related [flat|nested] 16+ messages in thread
* Re: [f2fs-dev] [PATCH v2] f2fs: don't sleep while grabing nat_tree_lock
@ 2021-07-22 17:44 ` Jaegeuk Kim
0 siblings, 0 replies; 16+ messages in thread
From: Jaegeuk Kim @ 2021-07-22 17:44 UTC (permalink / raw)
To: linux-kernel, linux-f2fs-devel
This tries to fix priority inversion in the below condition resulting in
long checkpoint delay.
f2fs_get_node_info()
- nat_tree_lock
-> sleep to grab journal_rwsem by contention
checkpoint
- waiting for nat_tree_lock
In order to let checkpoint go, let's release nat_tree_lock, if there's a
journal_rwsem contention.
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
Change log from v1:
- drop rwlock but take retry logic to reduce lock-holding time
fs/f2fs/node.c | 14 ++++++++++----
1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 0be9e2d7120e..c60ba4179bb2 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -552,7 +552,7 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
int i;
ni->nid = nid;
-
+retry:
/* Check nat cache */
down_read(&nm_i->nat_tree_lock);
e = __lookup_nat_cache(nm_i, nid);
@@ -564,10 +564,16 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
return 0;
}
- memset(&ne, 0, sizeof(struct f2fs_nat_entry));
+ /*
+ * Check current segment summary by trying to grab journal_rwsem first.
+ * This sem is on the critical path on the checkpoint requiring the above
+ * nat_tree_lock. Therefore, we should retry, if we failed to grab here.
+ */
+ if (!down_read_trylock(&curseg->journal_rwsem)) {
+ up_read(&nm_i->nat_tree_lock);
+ goto retry;
+ }
- /* Check current segment summary */
- down_read(&curseg->journal_rwsem);
i = f2fs_lookup_journal_in_cursum(journal, NAT_JOURNAL, nid, 0);
if (i >= 0) {
ne = nat_in_journal(journal, i);
--
2.32.0.432.gabb21c7263-goog
_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
^ permalink raw reply related [flat|nested] 16+ messages in thread
* Re: [f2fs-dev] [PATCH v2] f2fs: don't sleep while grabing nat_tree_lock
2021-07-22 17:44 ` [f2fs-dev] " Jaegeuk Kim
@ 2021-07-23 0:39 ` Chao Yu
-1 siblings, 0 replies; 16+ messages in thread
From: Chao Yu @ 2021-07-23 0:39 UTC (permalink / raw)
To: Jaegeuk Kim, linux-kernel, linux-f2fs-devel
On 2021/7/23 1:44, Jaegeuk Kim wrote:
> This tries to fix priority inversion in the below condition resulting in
> long checkpoint delay.
>
> f2fs_get_node_info()
> - nat_tree_lock
> -> sleep to grab journal_rwsem by contention
>
> checkpoint
> - waiting for nat_tree_lock
>
> In order to let checkpoint go, let's release nat_tree_lock, if there's a
> journal_rwsem contention.
Write lock of nat_tree_lock is held from many places, how about just
retrying unlock/lock only if checkpoint() is flushing nat blocks?
---
fs/f2fs/f2fs.h | 1 +
fs/f2fs/node.c | 8 ++++++--
2 files changed, 7 insertions(+), 2 deletions(-)
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index ad8f99d7235f..05f41a15fda4 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -877,6 +877,7 @@ struct f2fs_nm_info {
spinlock_t nat_list_lock; /* protect clean nat entry list */
unsigned int nat_cnt[MAX_NAT_STATE]; /* the # of cached nat entries */
unsigned int nat_blocks; /* # of nat blocks */
+ bool flushing_nat; /* indicate checkpoint() is flushing nat blocks */
/* free node ids management */
struct radix_tree_root free_nid_root;/* root of the free_nid cache */
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index c60ba4179bb2..2caa171a68f8 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -569,7 +569,7 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
* This sem is on the critical path on the checkpoint requiring the above
* nat_tree_lock. Therefore, we should retry, if we failed to grab here.
*/
- if (!down_read_trylock(&curseg->journal_rwsem)) {
+ if (!down_read_trylock(&curseg->journal_rwsem) && nm_i->flushing_nat) {
up_read(&nm_i->nat_tree_lock);
goto retry;
}
@@ -2981,6 +2981,8 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
LIST_HEAD(sets);
int err = 0;
+ nm_i->flushing_nat = true;
+
/*
* during unmount, let's flush nat_bits before checking
* nat_cnt[DIRTY_NAT].
@@ -2992,7 +2994,7 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
}
if (!nm_i->nat_cnt[DIRTY_NAT])
- return 0;
+ goto out;
down_write(&nm_i->nat_tree_lock);
@@ -3026,6 +3028,8 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
up_write(&nm_i->nat_tree_lock);
/* Allow dirty nats by node block allocation in write_begin */
+out:
+ nm_i->flushing_nat = false;
return err;
}
--
2.22.1
^ permalink raw reply related [flat|nested] 16+ messages in thread
* Re: [f2fs-dev] [PATCH v2] f2fs: don't sleep while grabing nat_tree_lock
@ 2021-07-23 0:39 ` Chao Yu
0 siblings, 0 replies; 16+ messages in thread
From: Chao Yu @ 2021-07-23 0:39 UTC (permalink / raw)
To: Jaegeuk Kim, linux-kernel, linux-f2fs-devel
On 2021/7/23 1:44, Jaegeuk Kim wrote:
> This tries to fix priority inversion in the below condition resulting in
> long checkpoint delay.
>
> f2fs_get_node_info()
> - nat_tree_lock
> -> sleep to grab journal_rwsem by contention
>
> checkpoint
> - waiting for nat_tree_lock
>
> In order to let checkpoint go, let's release nat_tree_lock, if there's a
> journal_rwsem contention.
Write lock of nat_tree_lock is held from many places, how about just
retrying unlock/lock only if checkpoint() is flushing nat blocks?
---
fs/f2fs/f2fs.h | 1 +
fs/f2fs/node.c | 8 ++++++--
2 files changed, 7 insertions(+), 2 deletions(-)
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index ad8f99d7235f..05f41a15fda4 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -877,6 +877,7 @@ struct f2fs_nm_info {
spinlock_t nat_list_lock; /* protect clean nat entry list */
unsigned int nat_cnt[MAX_NAT_STATE]; /* the # of cached nat entries */
unsigned int nat_blocks; /* # of nat blocks */
+ bool flushing_nat; /* indicate checkpoint() is flushing nat blocks */
/* free node ids management */
struct radix_tree_root free_nid_root;/* root of the free_nid cache */
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index c60ba4179bb2..2caa171a68f8 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -569,7 +569,7 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
* This sem is on the critical path on the checkpoint requiring the above
* nat_tree_lock. Therefore, we should retry, if we failed to grab here.
*/
- if (!down_read_trylock(&curseg->journal_rwsem)) {
+ if (!down_read_trylock(&curseg->journal_rwsem) && nm_i->flushing_nat) {
up_read(&nm_i->nat_tree_lock);
goto retry;
}
@@ -2981,6 +2981,8 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
LIST_HEAD(sets);
int err = 0;
+ nm_i->flushing_nat = true;
+
/*
* during unmount, let's flush nat_bits before checking
* nat_cnt[DIRTY_NAT].
@@ -2992,7 +2994,7 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
}
if (!nm_i->nat_cnt[DIRTY_NAT])
- return 0;
+ goto out;
down_write(&nm_i->nat_tree_lock);
@@ -3026,6 +3028,8 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
up_write(&nm_i->nat_tree_lock);
/* Allow dirty nats by node block allocation in write_begin */
+out:
+ nm_i->flushing_nat = false;
return err;
}
--
2.22.1
_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
^ permalink raw reply related [flat|nested] 16+ messages in thread
* Re: [PATCH v3] f2fs: don't sleep while grabing nat_tree_lock
2021-07-22 17:44 ` [f2fs-dev] " Jaegeuk Kim
@ 2021-07-23 2:35 ` Jaegeuk Kim
-1 siblings, 0 replies; 16+ messages in thread
From: Jaegeuk Kim @ 2021-07-23 2:35 UTC (permalink / raw)
To: linux-kernel, linux-f2fs-devel
This tries to fix priority inversion in the below condition resulting in
long checkpoint delay.
f2fs_get_node_info()
- nat_tree_lock
-> sleep to grab journal_rwsem by contention
checkpoint
- waiting for nat_tree_lock
In order to let checkpoint go, let's release nat_tree_lock, if there's a
journal_rwsem contention.
Signed-off-by: Daeho Jeong <daehojeong@google.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
Change log from v2:
- don't bother checkpoint
fs/f2fs/node.c | 17 +++++++++++++----
1 file changed, 13 insertions(+), 4 deletions(-)
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 0be9e2d7120e..b26642daa3d2 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -552,7 +552,7 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
int i;
ni->nid = nid;
-
+retry:
/* Check nat cache */
down_read(&nm_i->nat_tree_lock);
e = __lookup_nat_cache(nm_i, nid);
@@ -564,10 +564,19 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
return 0;
}
- memset(&ne, 0, sizeof(struct f2fs_nat_entry));
+ /*
+ * Check current segment summary by trying to grab journal_rwsem first.
+ * This sem is on the critical path on the checkpoint requiring the above
+ * nat_tree_lock. Therefore, we should retry, if we failed to grab here
+ * while not bothering checkpoint.
+ */
+ if (rwsem_is_locked(&sbi->cp_global_sem)) {
+ down_read(&curseg->journal_rwsem);
+ } else if (!down_read_trylock(&curseg->journal_rwsem)) {
+ up_read(&nm_i->nat_tree_lock);
+ goto retry;
+ }
- /* Check current segment summary */
- down_read(&curseg->journal_rwsem);
i = f2fs_lookup_journal_in_cursum(journal, NAT_JOURNAL, nid, 0);
if (i >= 0) {
ne = nat_in_journal(journal, i);
--
2.32.0.432.gabb21c7263-goog
^ permalink raw reply related [flat|nested] 16+ messages in thread
* Re: [f2fs-dev] [PATCH v3] f2fs: don't sleep while grabing nat_tree_lock
@ 2021-07-23 2:35 ` Jaegeuk Kim
0 siblings, 0 replies; 16+ messages in thread
From: Jaegeuk Kim @ 2021-07-23 2:35 UTC (permalink / raw)
To: linux-kernel, linux-f2fs-devel
This tries to fix priority inversion in the below condition resulting in
long checkpoint delay.
f2fs_get_node_info()
- nat_tree_lock
-> sleep to grab journal_rwsem by contention
checkpoint
- waiting for nat_tree_lock
In order to let checkpoint go, let's release nat_tree_lock, if there's a
journal_rwsem contention.
Signed-off-by: Daeho Jeong <daehojeong@google.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
Change log from v2:
- don't bother checkpoint
fs/f2fs/node.c | 17 +++++++++++++----
1 file changed, 13 insertions(+), 4 deletions(-)
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 0be9e2d7120e..b26642daa3d2 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -552,7 +552,7 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
int i;
ni->nid = nid;
-
+retry:
/* Check nat cache */
down_read(&nm_i->nat_tree_lock);
e = __lookup_nat_cache(nm_i, nid);
@@ -564,10 +564,19 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
return 0;
}
- memset(&ne, 0, sizeof(struct f2fs_nat_entry));
+ /*
+ * Check current segment summary by trying to grab journal_rwsem first.
+ * This sem is on the critical path on the checkpoint requiring the above
+ * nat_tree_lock. Therefore, we should retry, if we failed to grab here
+ * while not bothering checkpoint.
+ */
+ if (rwsem_is_locked(&sbi->cp_global_sem)) {
+ down_read(&curseg->journal_rwsem);
+ } else if (!down_read_trylock(&curseg->journal_rwsem)) {
+ up_read(&nm_i->nat_tree_lock);
+ goto retry;
+ }
- /* Check current segment summary */
- down_read(&curseg->journal_rwsem);
i = f2fs_lookup_journal_in_cursum(journal, NAT_JOURNAL, nid, 0);
if (i >= 0) {
ne = nat_in_journal(journal, i);
--
2.32.0.432.gabb21c7263-goog
_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
^ permalink raw reply related [flat|nested] 16+ messages in thread
* Re: [f2fs-dev] [PATCH v3] f2fs: don't sleep while grabing nat_tree_lock
2021-07-23 2:35 ` [f2fs-dev] " Jaegeuk Kim
@ 2021-07-23 2:41 ` Chao Yu
-1 siblings, 0 replies; 16+ messages in thread
From: Chao Yu @ 2021-07-23 2:41 UTC (permalink / raw)
To: Jaegeuk Kim, linux-kernel, linux-f2fs-devel
On 2021/7/23 10:35, Jaegeuk Kim wrote:
> This tries to fix priority inversion in the below condition resulting in
> long checkpoint delay.
>
> f2fs_get_node_info()
> - nat_tree_lock
> -> sleep to grab journal_rwsem by contention
>
> checkpoint
> - waiting for nat_tree_lock
>
> In order to let checkpoint go, let's release nat_tree_lock, if there's a
> journal_rwsem contention.
>
> Signed-off-by: Daeho Jeong <daehojeong@google.com>
> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> ---
> Change log from v2:
> - don't bother checkpoint
>
> fs/f2fs/node.c | 17 +++++++++++++----
> 1 file changed, 13 insertions(+), 4 deletions(-)
>
> diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
> index 0be9e2d7120e..b26642daa3d2 100644
> --- a/fs/f2fs/node.c
> +++ b/fs/f2fs/node.c
> @@ -552,7 +552,7 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
> int i;
>
> ni->nid = nid;
> -
> +retry:
> /* Check nat cache */
> down_read(&nm_i->nat_tree_lock);
> e = __lookup_nat_cache(nm_i, nid);
> @@ -564,10 +564,19 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
> return 0;
> }
>
> - memset(&ne, 0, sizeof(struct f2fs_nat_entry));
> + /*
> + * Check current segment summary by trying to grab journal_rwsem first.
> + * This sem is on the critical path on the checkpoint requiring the above
> + * nat_tree_lock. Therefore, we should retry, if we failed to grab here
> + * while not bothering checkpoint.
> + */
> + if (rwsem_is_locked(&sbi->cp_global_sem)) {
You mean: if (!rwsem_is_locked()) ?
IMO, once CP is processing, all readers who needs read nat will be blocked...
Thanks,
> + down_read(&curseg->journal_rwsem);
> + } else if (!down_read_trylock(&curseg->journal_rwsem)) {
> + up_read(&nm_i->nat_tree_lock);
> + goto retry;
> + }
>
> - /* Check current segment summary */
> - down_read(&curseg->journal_rwsem);
> i = f2fs_lookup_journal_in_cursum(journal, NAT_JOURNAL, nid, 0);
> if (i >= 0) {
> ne = nat_in_journal(journal, i);
>
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [f2fs-dev] [PATCH v3] f2fs: don't sleep while grabing nat_tree_lock
@ 2021-07-23 2:41 ` Chao Yu
0 siblings, 0 replies; 16+ messages in thread
From: Chao Yu @ 2021-07-23 2:41 UTC (permalink / raw)
To: Jaegeuk Kim, linux-kernel, linux-f2fs-devel
On 2021/7/23 10:35, Jaegeuk Kim wrote:
> This tries to fix priority inversion in the below condition resulting in
> long checkpoint delay.
>
> f2fs_get_node_info()
> - nat_tree_lock
> -> sleep to grab journal_rwsem by contention
>
> checkpoint
> - waiting for nat_tree_lock
>
> In order to let checkpoint go, let's release nat_tree_lock, if there's a
> journal_rwsem contention.
>
> Signed-off-by: Daeho Jeong <daehojeong@google.com>
> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> ---
> Change log from v2:
> - don't bother checkpoint
>
> fs/f2fs/node.c | 17 +++++++++++++----
> 1 file changed, 13 insertions(+), 4 deletions(-)
>
> diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
> index 0be9e2d7120e..b26642daa3d2 100644
> --- a/fs/f2fs/node.c
> +++ b/fs/f2fs/node.c
> @@ -552,7 +552,7 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
> int i;
>
> ni->nid = nid;
> -
> +retry:
> /* Check nat cache */
> down_read(&nm_i->nat_tree_lock);
> e = __lookup_nat_cache(nm_i, nid);
> @@ -564,10 +564,19 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
> return 0;
> }
>
> - memset(&ne, 0, sizeof(struct f2fs_nat_entry));
> + /*
> + * Check current segment summary by trying to grab journal_rwsem first.
> + * This sem is on the critical path on the checkpoint requiring the above
> + * nat_tree_lock. Therefore, we should retry, if we failed to grab here
> + * while not bothering checkpoint.
> + */
> + if (rwsem_is_locked(&sbi->cp_global_sem)) {
You mean: if (!rwsem_is_locked()) ?
IMO, once CP is processing, all readers who needs read nat will be blocked...
Thanks,
> + down_read(&curseg->journal_rwsem);
> + } else if (!down_read_trylock(&curseg->journal_rwsem)) {
> + up_read(&nm_i->nat_tree_lock);
> + goto retry;
> + }
>
> - /* Check current segment summary */
> - down_read(&curseg->journal_rwsem);
> i = f2fs_lookup_journal_in_cursum(journal, NAT_JOURNAL, nid, 0);
> if (i >= 0) {
> ne = nat_in_journal(journal, i);
>
_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [f2fs-dev] [PATCH v4] f2fs: don't sleep while grabing nat_tree_lock
2021-07-23 2:35 ` [f2fs-dev] " Jaegeuk Kim
@ 2021-07-23 4:27 ` Jaegeuk Kim
-1 siblings, 0 replies; 16+ messages in thread
From: Jaegeuk Kim @ 2021-07-23 4:27 UTC (permalink / raw)
To: linux-kernel, linux-f2fs-devel
This tries to fix priority inversion in the below condition resulting in
long checkpoint delay.
f2fs_get_node_info()
- nat_tree_lock
-> sleep to grab journal_rwsem by contention
checkpoint
- waiting for nat_tree_lock
In order to let checkpoint go, let's release nat_tree_lock, if there's a
journal_rwsem contention.
Signed-off-by: Daeho Jeong <daehojeong@google.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
Change log from v3:
- fix wrong condition check
fs/f2fs/node.c | 17 +++++++++++++----
1 file changed, 13 insertions(+), 4 deletions(-)
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 0be9e2d7120e..c945a9730f3c 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -552,7 +552,7 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
int i;
ni->nid = nid;
-
+retry:
/* Check nat cache */
down_read(&nm_i->nat_tree_lock);
e = __lookup_nat_cache(nm_i, nid);
@@ -564,10 +564,19 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
return 0;
}
- memset(&ne, 0, sizeof(struct f2fs_nat_entry));
+ /*
+ * Check current segment summary by trying to grab journal_rwsem first.
+ * This sem is on the critical path on the checkpoint requiring the above
+ * nat_tree_lock. Therefore, we should retry, if we failed to grab here
+ * while not bothering checkpoint.
+ */
+ if (!rwsem_is_locked(&sbi->cp_global_sem)) {
+ down_read(&curseg->journal_rwsem);
+ } else if (!down_read_trylock(&curseg->journal_rwsem)) {
+ up_read(&nm_i->nat_tree_lock);
+ goto retry;
+ }
- /* Check current segment summary */
- down_read(&curseg->journal_rwsem);
i = f2fs_lookup_journal_in_cursum(journal, NAT_JOURNAL, nid, 0);
if (i >= 0) {
ne = nat_in_journal(journal, i);
--
2.32.0.432.gabb21c7263-goog
^ permalink raw reply related [flat|nested] 16+ messages in thread
* Re: [f2fs-dev] [PATCH v4] f2fs: don't sleep while grabing nat_tree_lock
@ 2021-07-23 4:27 ` Jaegeuk Kim
0 siblings, 0 replies; 16+ messages in thread
From: Jaegeuk Kim @ 2021-07-23 4:27 UTC (permalink / raw)
To: linux-kernel, linux-f2fs-devel
This tries to fix priority inversion in the below condition resulting in
long checkpoint delay.
f2fs_get_node_info()
- nat_tree_lock
-> sleep to grab journal_rwsem by contention
checkpoint
- waiting for nat_tree_lock
In order to let checkpoint go, let's release nat_tree_lock, if there's a
journal_rwsem contention.
Signed-off-by: Daeho Jeong <daehojeong@google.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
Change log from v3:
- fix wrong condition check
fs/f2fs/node.c | 17 +++++++++++++----
1 file changed, 13 insertions(+), 4 deletions(-)
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 0be9e2d7120e..c945a9730f3c 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -552,7 +552,7 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
int i;
ni->nid = nid;
-
+retry:
/* Check nat cache */
down_read(&nm_i->nat_tree_lock);
e = __lookup_nat_cache(nm_i, nid);
@@ -564,10 +564,19 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
return 0;
}
- memset(&ne, 0, sizeof(struct f2fs_nat_entry));
+ /*
+ * Check current segment summary by trying to grab journal_rwsem first.
+ * This sem is on the critical path on the checkpoint requiring the above
+ * nat_tree_lock. Therefore, we should retry, if we failed to grab here
+ * while not bothering checkpoint.
+ */
+ if (!rwsem_is_locked(&sbi->cp_global_sem)) {
+ down_read(&curseg->journal_rwsem);
+ } else if (!down_read_trylock(&curseg->journal_rwsem)) {
+ up_read(&nm_i->nat_tree_lock);
+ goto retry;
+ }
- /* Check current segment summary */
- down_read(&curseg->journal_rwsem);
i = f2fs_lookup_journal_in_cursum(journal, NAT_JOURNAL, nid, 0);
if (i >= 0) {
ne = nat_in_journal(journal, i);
--
2.32.0.432.gabb21c7263-goog
_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
^ permalink raw reply related [flat|nested] 16+ messages in thread
* Re: [PATCH] f2fs: use rwlock instead of rwsem for journal
@ 2021-07-23 1:25 kernel test robot
0 siblings, 0 replies; 16+ messages in thread
From: kernel test robot @ 2021-07-23 1:25 UTC (permalink / raw)
To: kbuild
[-- Attachment #1: Type: text/plain, Size: 8805 bytes --]
CC: kbuild-all(a)lists.01.org
In-Reply-To: <20210722014149.525166-1-jaegeuk@kernel.org>
References: <20210722014149.525166-1-jaegeuk@kernel.org>
TO: Jaegeuk Kim <jaegeuk@kernel.org>
TO: linux-kernel(a)vger.kernel.org
TO: linux-f2fs-devel(a)lists.sourceforge.net
CC: Jaegeuk Kim <jaegeuk@kernel.org>
Hi Jaegeuk,
I love your patch! Perhaps something to improve:
[auto build test WARNING on f2fs/dev-test]
[also build test WARNING on linux/master linus/master v5.14-rc2 next-20210722]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/Jaegeuk-Kim/f2fs-use-rwlock-instead-of-rwsem-for-journal/20210722-094334
base: https://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git dev-test
:::::: branch date: 24 hours ago
:::::: commit date: 24 hours ago
config: arm64-randconfig-s032-20210722 (attached as .config)
compiler: aarch64-linux-gcc (GCC) 10.3.0
reproduce:
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# apt-get install sparse
# sparse version: v0.6.3-341-g8af24329-dirty
# https://github.com/0day-ci/linux/commit/d6b325e856f8fd166ea9722d4489f897cf468edf
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review Jaegeuk-Kim/f2fs-use-rwlock-instead-of-rwsem-for-journal/20210722-094334
git checkout d6b325e856f8fd166ea9722d4489f897cf468edf
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-10.3.0 make.cross C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' O=build_dir ARCH=arm64 SHELL=/bin/bash fs/f2fs/
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>
sparse warnings: (new ones prefixed by >>)
>> fs/f2fs/node.c:2956:9: sparse: sparse: context imbalance in '__flush_nat_entry_set' - different lock contexts for basic block
--
fs/f2fs/segment.c: note: in included file (through include/linux/irqflags.h, include/linux/spinlock.h, include/linux/wait.h, ...):
arch/arm64/include/asm/percpu.h:126:1: sparse: sparse: cast truncates bits from constant value (ffffffff becomes ff)
arch/arm64/include/asm/percpu.h:126:1: sparse: sparse: cast truncates bits from constant value (ffffffff becomes ffff)
arch/arm64/include/asm/percpu.h:126:1: sparse: sparse: cast truncates bits from constant value (ffffffff becomes ff)
arch/arm64/include/asm/percpu.h:126:1: sparse: sparse: cast truncates bits from constant value (ffffffff becomes ffff)
>> fs/f2fs/segment.c:4258:17: sparse: sparse: context imbalance in 'f2fs_flush_sit_entries' - different lock contexts for basic block
vim +/__flush_nat_entry_set +2956 fs/f2fs/node.c
22ad0b6ab46683 Jaegeuk Kim 2017-02-09 2887
edc55aaf0d1712 Jaegeuk Kim 2018-09-17 2888 static int __flush_nat_entry_set(struct f2fs_sb_info *sbi,
22ad0b6ab46683 Jaegeuk Kim 2017-02-09 2889 struct nat_entry_set *set, struct cp_control *cpc)
309cc2b6e7ae66 Jaegeuk Kim 2014-09-22 2890 {
309cc2b6e7ae66 Jaegeuk Kim 2014-09-22 2891 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
b7ad7512b84b26 Chao Yu 2016-02-19 2892 struct f2fs_journal *journal = curseg->journal;
309cc2b6e7ae66 Jaegeuk Kim 2014-09-22 2893 nid_t start_nid = set->set * NAT_ENTRY_PER_BLOCK;
309cc2b6e7ae66 Jaegeuk Kim 2014-09-22 2894 bool to_journal = true;
309cc2b6e7ae66 Jaegeuk Kim 2014-09-22 2895 struct f2fs_nat_block *nat_blk;
309cc2b6e7ae66 Jaegeuk Kim 2014-09-22 2896 struct nat_entry *ne, *cur;
309cc2b6e7ae66 Jaegeuk Kim 2014-09-22 2897 struct page *page = NULL;
e05df3b115e730 Jaegeuk Kim 2012-11-02 2898
e05df3b115e730 Jaegeuk Kim 2012-11-02 2899 /*
aec71382c68135 Chao Yu 2014-06-24 2900 * there are two steps to flush nat entries:
aec71382c68135 Chao Yu 2014-06-24 2901 * #1, flush nat entries to journal in current hot data summary block.
aec71382c68135 Chao Yu 2014-06-24 2902 * #2, flush nat entries to nat page.
e05df3b115e730 Jaegeuk Kim 2012-11-02 2903 */
22ad0b6ab46683 Jaegeuk Kim 2017-02-09 2904 if (enabled_nat_bits(sbi, cpc) ||
22ad0b6ab46683 Jaegeuk Kim 2017-02-09 2905 !__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL))
aec71382c68135 Chao Yu 2014-06-24 2906 to_journal = false;
aec71382c68135 Chao Yu 2014-06-24 2907
aec71382c68135 Chao Yu 2014-06-24 2908 if (to_journal) {
d6b325e856f8fd Jaegeuk Kim 2021-07-21 2909 write_lock(&curseg->journal_rwlock);
aec71382c68135 Chao Yu 2014-06-24 2910 } else {
e05df3b115e730 Jaegeuk Kim 2012-11-02 2911 page = get_next_nat_page(sbi, start_nid);
edc55aaf0d1712 Jaegeuk Kim 2018-09-17 2912 if (IS_ERR(page))
edc55aaf0d1712 Jaegeuk Kim 2018-09-17 2913 return PTR_ERR(page);
edc55aaf0d1712 Jaegeuk Kim 2018-09-17 2914
e05df3b115e730 Jaegeuk Kim 2012-11-02 2915 nat_blk = page_address(page);
9850cf4a890888 Jaegeuk Kim 2014-09-02 2916 f2fs_bug_on(sbi, !nat_blk);
e05df3b115e730 Jaegeuk Kim 2012-11-02 2917 }
e05df3b115e730 Jaegeuk Kim 2012-11-02 2918
aec71382c68135 Chao Yu 2014-06-24 2919 /* flush dirty nats in nat entry set */
309cc2b6e7ae66 Jaegeuk Kim 2014-09-22 2920 list_for_each_entry_safe(ne, cur, &set->entry_list, list) {
aec71382c68135 Chao Yu 2014-06-24 2921 struct f2fs_nat_entry *raw_ne;
aec71382c68135 Chao Yu 2014-06-24 2922 nid_t nid = nat_get_nid(ne);
aec71382c68135 Chao Yu 2014-06-24 2923 int offset;
e05df3b115e730 Jaegeuk Kim 2012-11-02 2924
febeca6d375531 Chao Yu 2017-06-05 2925 f2fs_bug_on(sbi, nat_get_blkaddr(ne) == NEW_ADDR);
309cc2b6e7ae66 Jaegeuk Kim 2014-09-22 2926
aec71382c68135 Chao Yu 2014-06-24 2927 if (to_journal) {
4d57b86dd86404 Chao Yu 2018-05-30 2928 offset = f2fs_lookup_journal_in_cursum(journal,
aec71382c68135 Chao Yu 2014-06-24 2929 NAT_JOURNAL, nid, 1);
9850cf4a890888 Jaegeuk Kim 2014-09-02 2930 f2fs_bug_on(sbi, offset < 0);
dfc08a12e49a64 Chao Yu 2016-02-14 2931 raw_ne = &nat_in_journal(journal, offset);
dfc08a12e49a64 Chao Yu 2016-02-14 2932 nid_in_journal(journal, offset) = cpu_to_le32(nid);
aec71382c68135 Chao Yu 2014-06-24 2933 } else {
aec71382c68135 Chao Yu 2014-06-24 2934 raw_ne = &nat_blk->entries[nid - start_nid];
e05df3b115e730 Jaegeuk Kim 2012-11-02 2935 }
aec71382c68135 Chao Yu 2014-06-24 2936 raw_nat_from_node_info(raw_ne, &ne->ni);
88bd02c9472a16 Jaegeuk Kim 2014-09-15 2937 nat_reset_flag(ne);
0b28b71e298fb7 Kinglong Mee 2017-02-28 2938 __clear_nat_cache_dirty(NM_I(sbi), set, ne);
04d47e673863c6 Chao Yu 2016-11-17 2939 if (nat_get_blkaddr(ne) == NULL_ADDR) {
5921aaa185908c LiFan 2017-11-22 2940 add_free_nid(sbi, nid, false, true);
4ac912427c4214 Chao Yu 2017-02-23 2941 } else {
4ac912427c4214 Chao Yu 2017-02-23 2942 spin_lock(&NM_I(sbi)->nid_list_lock);
346fe752c431bc Chao Yu 2017-03-13 2943 update_free_nid_bitmap(sbi, nid, false, false);
04d47e673863c6 Chao Yu 2016-11-17 2944 spin_unlock(&NM_I(sbi)->nid_list_lock);
04d47e673863c6 Chao Yu 2016-11-17 2945 }
e05df3b115e730 Jaegeuk Kim 2012-11-02 2946 }
aec71382c68135 Chao Yu 2014-06-24 2947
22ad0b6ab46683 Jaegeuk Kim 2017-02-09 2948 if (to_journal) {
d6b325e856f8fd Jaegeuk Kim 2021-07-21 2949 write_unlock(&curseg->journal_rwlock);
22ad0b6ab46683 Jaegeuk Kim 2017-02-09 2950 } else {
22ad0b6ab46683 Jaegeuk Kim 2017-02-09 2951 __update_nat_bits(sbi, start_nid, page);
e05df3b115e730 Jaegeuk Kim 2012-11-02 2952 f2fs_put_page(page, 1);
22ad0b6ab46683 Jaegeuk Kim 2017-02-09 2953 }
aec71382c68135 Chao Yu 2014-06-24 2954
59c9081bc86ef0 Yunlei He 2017-03-13 2955 /* Allow dirty nats by node block allocation in write_begin */
59c9081bc86ef0 Yunlei He 2017-03-13 @2956 if (!set->entry_cnt) {
309cc2b6e7ae66 Jaegeuk Kim 2014-09-22 2957 radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set);
309cc2b6e7ae66 Jaegeuk Kim 2014-09-22 2958 kmem_cache_free(nat_entry_set_slab, set);
309cc2b6e7ae66 Jaegeuk Kim 2014-09-22 2959 }
edc55aaf0d1712 Jaegeuk Kim 2018-09-17 2960 return 0;
59c9081bc86ef0 Yunlei He 2017-03-13 2961 }
309cc2b6e7ae66 Jaegeuk Kim 2014-09-22 2962
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org
[-- Attachment #2: config.gz --]
[-- Type: application/gzip, Size: 32586 bytes --]
^ permalink raw reply [flat|nested] 16+ messages in thread
end of thread, other threads:[~2021-07-23 4:27 UTC | newest]
Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-22 1:41 [PATCH] f2fs: use rwlock instead of rwsem for journal Jaegeuk Kim
2021-07-22 1:41 ` [f2fs-dev] " Jaegeuk Kim
2021-07-22 3:06 ` Hillf Danton
2021-07-22 13:41 ` Chao Yu
2021-07-22 13:41 ` Chao Yu
2021-07-22 17:44 ` [PATCH v2] f2fs: don't sleep while grabing nat_tree_lock Jaegeuk Kim
2021-07-22 17:44 ` [f2fs-dev] " Jaegeuk Kim
2021-07-23 0:39 ` Chao Yu
2021-07-23 0:39 ` Chao Yu
2021-07-23 2:35 ` [PATCH v3] " Jaegeuk Kim
2021-07-23 2:35 ` [f2fs-dev] " Jaegeuk Kim
2021-07-23 2:41 ` Chao Yu
2021-07-23 2:41 ` Chao Yu
2021-07-23 4:27 ` [f2fs-dev] [PATCH v4] " Jaegeuk Kim
2021-07-23 4:27 ` Jaegeuk Kim
2021-07-23 1:25 [PATCH] f2fs: use rwlock instead of rwsem for journal kernel test robot
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.