All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jaegeuk Kim <jaegeuk@kernel.org>
To: linux-kernel@vger.kernel.org, linux-f2fs-devel@lists.sourceforge.net
Cc: Jaegeuk Kim <jaegeuk@kernel.org>
Subject: [PATCH] f2fs: use rwlock instead of rwsem for journal
Date: Wed, 21 Jul 2021 18:41:49 -0700	[thread overview]
Message-ID: <20210722014149.525166-1-jaegeuk@kernel.org> (raw)

This tries to fix priority inversion in the below condition resulting in
long checkpoint delay.

f2fs_get_node_info()
 - nat_tree_lock
  -> sleep in journal_rwsem
                                     checkpoint
                                     - waiting for nat_tree_lock

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/node.c    | 16 ++++++++--------
 fs/f2fs/segment.c | 22 +++++++++++-----------
 fs/f2fs/segment.h |  2 +-
 3 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 0be9e2d7120e..821a40e02fb4 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -567,13 +567,13 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
 	memset(&ne, 0, sizeof(struct f2fs_nat_entry));
 
 	/* Check current segment summary */
-	down_read(&curseg->journal_rwsem);
+	read_lock(&curseg->journal_rwlock);
 	i = f2fs_lookup_journal_in_cursum(journal, NAT_JOURNAL, nid, 0);
 	if (i >= 0) {
 		ne = nat_in_journal(journal, i);
 		node_info_from_raw_nat(ni, &ne);
 	}
-	up_read(&curseg->journal_rwsem);
+	read_unlock(&curseg->journal_rwlock);
 	if (i >= 0) {
 		up_read(&nm_i->nat_tree_lock);
 		goto cache;
@@ -2338,7 +2338,7 @@ static void scan_curseg_cache(struct f2fs_sb_info *sbi)
 	struct f2fs_journal *journal = curseg->journal;
 	int i;
 
-	down_read(&curseg->journal_rwsem);
+	read_lock(&curseg->journal_rwlock);
 	for (i = 0; i < nats_in_cursum(journal); i++) {
 		block_t addr;
 		nid_t nid;
@@ -2350,7 +2350,7 @@ static void scan_curseg_cache(struct f2fs_sb_info *sbi)
 		else
 			remove_free_nid(sbi, nid);
 	}
-	up_read(&curseg->journal_rwsem);
+	read_unlock(&curseg->journal_rwlock);
 }
 
 static void scan_free_nid_bits(struct f2fs_sb_info *sbi)
@@ -2799,7 +2799,7 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
 	struct f2fs_journal *journal = curseg->journal;
 	int i;
 
-	down_write(&curseg->journal_rwsem);
+	write_lock(&curseg->journal_rwlock);
 	for (i = 0; i < nats_in_cursum(journal); i++) {
 		struct nat_entry *ne;
 		struct f2fs_nat_entry raw_ne;
@@ -2831,7 +2831,7 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
 		__set_nat_cache_dirty(nm_i, ne);
 	}
 	update_nats_in_cursum(journal, -i);
-	up_write(&curseg->journal_rwsem);
+	write_unlock(&curseg->journal_rwlock);
 }
 
 static void __adjust_nat_entry_set(struct nat_entry_set *nes,
@@ -2906,7 +2906,7 @@ static int __flush_nat_entry_set(struct f2fs_sb_info *sbi,
 		to_journal = false;
 
 	if (to_journal) {
-		down_write(&curseg->journal_rwsem);
+		write_lock(&curseg->journal_rwlock);
 	} else {
 		page = get_next_nat_page(sbi, start_nid);
 		if (IS_ERR(page))
@@ -2946,7 +2946,7 @@ static int __flush_nat_entry_set(struct f2fs_sb_info *sbi,
 	}
 
 	if (to_journal) {
-		up_write(&curseg->journal_rwsem);
+		write_unlock(&curseg->journal_rwlock);
 	} else {
 		__update_nat_bits(sbi, start_nid, page);
 		f2fs_put_page(page, 1);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index f9b7fb785e1d..c397c1c271ec 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -2442,9 +2442,9 @@ static void write_current_sum_page(struct f2fs_sb_info *sbi,
 
 	mutex_lock(&curseg->curseg_mutex);
 
-	down_read(&curseg->journal_rwsem);
+	read_lock(&curseg->journal_rwlock);
 	memcpy(&dst->journal, curseg->journal, SUM_JOURNAL_SIZE);
-	up_read(&curseg->journal_rwsem);
+	read_unlock(&curseg->journal_rwlock);
 
 	memcpy(dst->entries, src->entries, SUM_ENTRY_SIZE);
 	memcpy(&dst->footer, &src->footer, SUM_FOOTER_SIZE);
@@ -3876,9 +3876,9 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
 	mutex_lock(&curseg->curseg_mutex);
 
 	/* update journal info */
-	down_write(&curseg->journal_rwsem);
+	write_lock(&curseg->journal_rwlock);
 	memcpy(curseg->journal, &sum->journal, SUM_JOURNAL_SIZE);
-	up_write(&curseg->journal_rwsem);
+	write_unlock(&curseg->journal_rwlock);
 
 	memcpy(curseg->sum_blk->entries, sum->entries, SUM_ENTRY_SIZE);
 	memcpy(&curseg->sum_blk->footer, &sum->footer, SUM_FOOTER_SIZE);
@@ -4136,7 +4136,7 @@ static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
 	struct f2fs_journal *journal = curseg->journal;
 	int i;
 
-	down_write(&curseg->journal_rwsem);
+	write_lock(&curseg->journal_rwlock);
 	for (i = 0; i < sits_in_cursum(journal); i++) {
 		unsigned int segno;
 		bool dirtied;
@@ -4148,7 +4148,7 @@ static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
 			add_sit_entry(segno, &SM_I(sbi)->sit_entry_set);
 	}
 	update_sits_in_cursum(journal, -i);
-	up_write(&curseg->journal_rwsem);
+	write_unlock(&curseg->journal_rwlock);
 }
 
 /*
@@ -4204,7 +4204,7 @@ void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 			to_journal = false;
 
 		if (to_journal) {
-			down_write(&curseg->journal_rwsem);
+			write_lock(&curseg->journal_rwlock);
 		} else {
 			page = get_next_sit_page(sbi, start_segno);
 			raw_sit = page_address(page);
@@ -4251,7 +4251,7 @@ void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 		}
 
 		if (to_journal)
-			up_write(&curseg->journal_rwsem);
+			write_unlock(&curseg->journal_rwlock);
 		else
 			f2fs_put_page(page, 1);
 
@@ -4432,7 +4432,7 @@ static int build_curseg(struct f2fs_sb_info *sbi)
 		array[i].sum_blk = f2fs_kzalloc(sbi, PAGE_SIZE, GFP_KERNEL);
 		if (!array[i].sum_blk)
 			return -ENOMEM;
-		init_rwsem(&array[i].journal_rwsem);
+		rwlock_init(&array[i].journal_rwlock);
 		array[i].journal = f2fs_kzalloc(sbi,
 				sizeof(struct f2fs_journal), GFP_KERNEL);
 		if (!array[i].journal)
@@ -4509,7 +4509,7 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
 		start_blk += readed;
 	} while (start_blk < sit_blk_cnt);
 
-	down_read(&curseg->journal_rwsem);
+	read_lock(&curseg->journal_rwlock);
 	for (i = 0; i < sits_in_cursum(journal); i++) {
 		unsigned int old_valid_blocks;
 
@@ -4551,7 +4551,7 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
 							old_valid_blocks;
 		}
 	}
-	up_read(&curseg->journal_rwsem);
+	read_unlock(&curseg->journal_rwlock);
 
 	if (!err && total_node_blocks != valid_node_count(sbi)) {
 		f2fs_err(sbi, "SIT is corrupted node# %u vs %u",
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 050230c70a53..afb9041dc2e6 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -306,7 +306,7 @@ struct victim_selection {
 struct curseg_info {
 	struct mutex curseg_mutex;		/* lock for consistency */
 	struct f2fs_summary_block *sum_blk;	/* cached summary block */
-	struct rw_semaphore journal_rwsem;	/* protect journal area */
+	rwlock_t journal_rwlock;		/* protect journal area */
 	struct f2fs_journal *journal;		/* cached journal info */
 	unsigned char alloc_type;		/* current allocation type */
 	unsigned short seg_type;		/* segment type like CURSEG_XXX_TYPE */
-- 
2.32.0.402.g57bb445576-goog


WARNING: multiple messages have this Message-ID (diff)
From: Jaegeuk Kim <jaegeuk@kernel.org>
To: linux-kernel@vger.kernel.org, linux-f2fs-devel@lists.sourceforge.net
Cc: Jaegeuk Kim <jaegeuk@kernel.org>
Subject: [f2fs-dev] [PATCH] f2fs: use rwlock instead of rwsem for journal
Date: Wed, 21 Jul 2021 18:41:49 -0700	[thread overview]
Message-ID: <20210722014149.525166-1-jaegeuk@kernel.org> (raw)

This tries to fix priority inversion in the below condition resulting in
long checkpoint delay.

f2fs_get_node_info()
 - nat_tree_lock
  -> sleep in journal_rwsem
                                     checkpoint
                                     - waiting for nat_tree_lock

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/node.c    | 16 ++++++++--------
 fs/f2fs/segment.c | 22 +++++++++++-----------
 fs/f2fs/segment.h |  2 +-
 3 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 0be9e2d7120e..821a40e02fb4 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -567,13 +567,13 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
 	memset(&ne, 0, sizeof(struct f2fs_nat_entry));
 
 	/* Check current segment summary */
-	down_read(&curseg->journal_rwsem);
+	read_lock(&curseg->journal_rwlock);
 	i = f2fs_lookup_journal_in_cursum(journal, NAT_JOURNAL, nid, 0);
 	if (i >= 0) {
 		ne = nat_in_journal(journal, i);
 		node_info_from_raw_nat(ni, &ne);
 	}
-	up_read(&curseg->journal_rwsem);
+	read_unlock(&curseg->journal_rwlock);
 	if (i >= 0) {
 		up_read(&nm_i->nat_tree_lock);
 		goto cache;
@@ -2338,7 +2338,7 @@ static void scan_curseg_cache(struct f2fs_sb_info *sbi)
 	struct f2fs_journal *journal = curseg->journal;
 	int i;
 
-	down_read(&curseg->journal_rwsem);
+	read_lock(&curseg->journal_rwlock);
 	for (i = 0; i < nats_in_cursum(journal); i++) {
 		block_t addr;
 		nid_t nid;
@@ -2350,7 +2350,7 @@ static void scan_curseg_cache(struct f2fs_sb_info *sbi)
 		else
 			remove_free_nid(sbi, nid);
 	}
-	up_read(&curseg->journal_rwsem);
+	read_unlock(&curseg->journal_rwlock);
 }
 
 static void scan_free_nid_bits(struct f2fs_sb_info *sbi)
@@ -2799,7 +2799,7 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
 	struct f2fs_journal *journal = curseg->journal;
 	int i;
 
-	down_write(&curseg->journal_rwsem);
+	write_lock(&curseg->journal_rwlock);
 	for (i = 0; i < nats_in_cursum(journal); i++) {
 		struct nat_entry *ne;
 		struct f2fs_nat_entry raw_ne;
@@ -2831,7 +2831,7 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
 		__set_nat_cache_dirty(nm_i, ne);
 	}
 	update_nats_in_cursum(journal, -i);
-	up_write(&curseg->journal_rwsem);
+	write_unlock(&curseg->journal_rwlock);
 }
 
 static void __adjust_nat_entry_set(struct nat_entry_set *nes,
@@ -2906,7 +2906,7 @@ static int __flush_nat_entry_set(struct f2fs_sb_info *sbi,
 		to_journal = false;
 
 	if (to_journal) {
-		down_write(&curseg->journal_rwsem);
+		write_lock(&curseg->journal_rwlock);
 	} else {
 		page = get_next_nat_page(sbi, start_nid);
 		if (IS_ERR(page))
@@ -2946,7 +2946,7 @@ static int __flush_nat_entry_set(struct f2fs_sb_info *sbi,
 	}
 
 	if (to_journal) {
-		up_write(&curseg->journal_rwsem);
+		write_unlock(&curseg->journal_rwlock);
 	} else {
 		__update_nat_bits(sbi, start_nid, page);
 		f2fs_put_page(page, 1);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index f9b7fb785e1d..c397c1c271ec 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -2442,9 +2442,9 @@ static void write_current_sum_page(struct f2fs_sb_info *sbi,
 
 	mutex_lock(&curseg->curseg_mutex);
 
-	down_read(&curseg->journal_rwsem);
+	read_lock(&curseg->journal_rwlock);
 	memcpy(&dst->journal, curseg->journal, SUM_JOURNAL_SIZE);
-	up_read(&curseg->journal_rwsem);
+	read_unlock(&curseg->journal_rwlock);
 
 	memcpy(dst->entries, src->entries, SUM_ENTRY_SIZE);
 	memcpy(&dst->footer, &src->footer, SUM_FOOTER_SIZE);
@@ -3876,9 +3876,9 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
 	mutex_lock(&curseg->curseg_mutex);
 
 	/* update journal info */
-	down_write(&curseg->journal_rwsem);
+	write_lock(&curseg->journal_rwlock);
 	memcpy(curseg->journal, &sum->journal, SUM_JOURNAL_SIZE);
-	up_write(&curseg->journal_rwsem);
+	write_unlock(&curseg->journal_rwlock);
 
 	memcpy(curseg->sum_blk->entries, sum->entries, SUM_ENTRY_SIZE);
 	memcpy(&curseg->sum_blk->footer, &sum->footer, SUM_FOOTER_SIZE);
@@ -4136,7 +4136,7 @@ static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
 	struct f2fs_journal *journal = curseg->journal;
 	int i;
 
-	down_write(&curseg->journal_rwsem);
+	write_lock(&curseg->journal_rwlock);
 	for (i = 0; i < sits_in_cursum(journal); i++) {
 		unsigned int segno;
 		bool dirtied;
@@ -4148,7 +4148,7 @@ static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
 			add_sit_entry(segno, &SM_I(sbi)->sit_entry_set);
 	}
 	update_sits_in_cursum(journal, -i);
-	up_write(&curseg->journal_rwsem);
+	write_unlock(&curseg->journal_rwlock);
 }
 
 /*
@@ -4204,7 +4204,7 @@ void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 			to_journal = false;
 
 		if (to_journal) {
-			down_write(&curseg->journal_rwsem);
+			write_lock(&curseg->journal_rwlock);
 		} else {
 			page = get_next_sit_page(sbi, start_segno);
 			raw_sit = page_address(page);
@@ -4251,7 +4251,7 @@ void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 		}
 
 		if (to_journal)
-			up_write(&curseg->journal_rwsem);
+			write_unlock(&curseg->journal_rwlock);
 		else
 			f2fs_put_page(page, 1);
 
@@ -4432,7 +4432,7 @@ static int build_curseg(struct f2fs_sb_info *sbi)
 		array[i].sum_blk = f2fs_kzalloc(sbi, PAGE_SIZE, GFP_KERNEL);
 		if (!array[i].sum_blk)
 			return -ENOMEM;
-		init_rwsem(&array[i].journal_rwsem);
+		rwlock_init(&array[i].journal_rwlock);
 		array[i].journal = f2fs_kzalloc(sbi,
 				sizeof(struct f2fs_journal), GFP_KERNEL);
 		if (!array[i].journal)
@@ -4509,7 +4509,7 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
 		start_blk += readed;
 	} while (start_blk < sit_blk_cnt);
 
-	down_read(&curseg->journal_rwsem);
+	read_lock(&curseg->journal_rwlock);
 	for (i = 0; i < sits_in_cursum(journal); i++) {
 		unsigned int old_valid_blocks;
 
@@ -4551,7 +4551,7 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
 							old_valid_blocks;
 		}
 	}
-	up_read(&curseg->journal_rwsem);
+	read_unlock(&curseg->journal_rwlock);
 
 	if (!err && total_node_blocks != valid_node_count(sbi)) {
 		f2fs_err(sbi, "SIT is corrupted node# %u vs %u",
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 050230c70a53..afb9041dc2e6 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -306,7 +306,7 @@ struct victim_selection {
 struct curseg_info {
 	struct mutex curseg_mutex;		/* lock for consistency */
 	struct f2fs_summary_block *sum_blk;	/* cached summary block */
-	struct rw_semaphore journal_rwsem;	/* protect journal area */
+	rwlock_t journal_rwlock;		/* protect journal area */
 	struct f2fs_journal *journal;		/* cached journal info */
 	unsigned char alloc_type;		/* current allocation type */
 	unsigned short seg_type;		/* segment type like CURSEG_XXX_TYPE */
-- 
2.32.0.402.g57bb445576-goog



_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

             reply	other threads:[~2021-07-22  1:41 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-07-22  1:41 Jaegeuk Kim [this message]
2021-07-22  1:41 ` [f2fs-dev] [PATCH] f2fs: use rwlock instead of rwsem for journal Jaegeuk Kim
2021-07-22  3:06 ` Hillf Danton
2021-07-22 13:41 ` Chao Yu
2021-07-22 13:41   ` Chao Yu
2021-07-22 17:44 ` [PATCH v2] f2fs: don't sleep while grabing nat_tree_lock Jaegeuk Kim
2021-07-22 17:44   ` [f2fs-dev] " Jaegeuk Kim
2021-07-23  0:39   ` Chao Yu
2021-07-23  0:39     ` Chao Yu
2021-07-23  2:35   ` [PATCH v3] " Jaegeuk Kim
2021-07-23  2:35     ` [f2fs-dev] " Jaegeuk Kim
2021-07-23  2:41     ` Chao Yu
2021-07-23  2:41       ` Chao Yu
2021-07-23  4:27     ` [f2fs-dev] [PATCH v4] " Jaegeuk Kim
2021-07-23  4:27       ` Jaegeuk Kim
2021-07-23  1:25 [PATCH] f2fs: use rwlock instead of rwsem for journal kernel test robot

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210722014149.525166-1-jaegeuk@kernel.org \
    --to=jaegeuk@kernel.org \
    --cc=linux-f2fs-devel@lists.sourceforge.net \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.