linux-btrfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Qu Wenruo <wqu@suse.com>
To: linux-btrfs@vger.kernel.org
Cc: josef@toxicpanda.com, Johannes.Thumshirn@wdc.com
Subject: [PATCH v4 2/6] btrfs: lock subpage ranges in one go for writepage_delalloc()
Date: Sat, 11 May 2024 09:45:18 +0930	[thread overview]
Message-ID: <91da2c388f3152f40af0e3e8c7ca7c7f8f6687db.1715386434.git.wqu@suse.com> (raw)
In-Reply-To: <cover.1715386434.git.wqu@suse.com>

If we have a subpage range like this for a 16K page with 4K sectorsize:

    0     4K     8K     12K     16K
    |/////|      |//////|       |

    |/////| = dirty range

Currently writepage_delalloc() would go through the following steps:

- lock range [0, 4K)
- run delalloc range for [0, 4K)
- lock range [8K, 12K)
- run delalloc range for [8K 12K)

So far it's fine for regular subpage, as btrfs_run_delalloc_range() can
only go into one of  run_delalloc_nocow(), cow_file_range() and
run_delalloc_compressed().

But there is a special pitfall for zoned subpage, where we will go
through run_delalloc_cow(), which would create the ordered extent for the
range and immediately submit the range.
This would unlock the whole page range, causing all kinds of different
ASSERT()s related to locked page.

This patch would not yet address the run_delalloc_cow() problem, but
would prepare for the proper fix by changing the workflow to the
following one:

- lock range [0, 4K)
- lock range [8K, 12K)
- run delalloc range for [0, 4K)
- run delalloc range for [8K, 12K)

So later for subpage cases we can do extra work to ensure
run_delalloc_cow() to only unlock the full page until the last lock
user.

To save the previously locked range, we utilize a structure called
locked_delalloc_list, which would cached the last hit delalloc range,
thus for non-subpage cases, it would use that cached value.

For subpage cases since we can hit multiple delalloc ranges inside a
page, a list would be utilized and we will allocate memory for them.
This introduced a new memory allocation thus extra error paths, but this
method is only tempoarary, we will later use subpage bitmap to avoid
the memory allocation.

Signed-off-by: Qu Wenruo <wqu@suse.com>
---
 fs/btrfs/extent_io.c | 140 +++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 136 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 8a4a7d00795f..6f237c77699a 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1213,6 +1213,101 @@ static inline void contiguous_readpages(struct page *pages[], int nr_pages,
 	}
 }
 
+struct locked_delalloc_range {
+	struct list_head list;
+	u64 delalloc_start;
+	u32 delalloc_len;
+};
+
+/*
+ * Save the locked delalloc range.
+ *
+ * This is for subpage only, as for regular sectorsize, there will be at most
+ * one locked delalloc range for a page.
+ */
+struct locked_delalloc_list {
+	u64 last_delalloc_start;
+	u32 last_delalloc_len;
+	struct list_head head;
+};
+
+static void init_locked_delalloc_list(struct locked_delalloc_list *locked_list)
+{
+	INIT_LIST_HEAD(&locked_list->head);
+	locked_list->last_delalloc_start = 0;
+	locked_list->last_delalloc_len = 0;
+}
+
+static void release_locked_delalloc_list(struct locked_delalloc_list *locked_list)
+{
+	while (!list_empty(&locked_list->head)) {
+		struct locked_delalloc_range *entry;
+
+		entry = list_entry(locked_list->head.next,
+				   struct locked_delalloc_range, list);
+
+		list_del_init(&entry->list);
+		kfree(entry);
+	}
+}
+
+static int add_locked_delalloc_range(struct btrfs_fs_info *fs_info,
+				     struct locked_delalloc_list *locked_list,
+				     u64 start, u32 len)
+{
+	struct locked_delalloc_range *entry;
+
+	entry = kmalloc(sizeof(*entry), GFP_NOFS);
+	if (!entry)
+		return -ENOMEM;
+
+	if (locked_list->last_delalloc_len == 0) {
+		locked_list->last_delalloc_start = start;
+		locked_list->last_delalloc_len = len;
+		return 0;
+	}
+	/* The new entry must be beyond the current one. */
+	ASSERT(start >= locked_list->last_delalloc_start +
+			locked_list->last_delalloc_len);
+
+	/* Only subpage case can have more than one delalloc ranges inside a page. */
+	ASSERT(fs_info->sectorsize < PAGE_SIZE);
+
+	entry->delalloc_start = locked_list->last_delalloc_start;
+	entry->delalloc_len = locked_list->last_delalloc_len;
+	locked_list->last_delalloc_start = start;
+	locked_list->last_delalloc_len = len;
+	list_add_tail(&entry->list, &locked_list->head);
+	return 0;
+}
+
+static void __cold unlock_one_locked_delalloc_range(struct btrfs_inode *binode,
+						    struct page *locked_page,
+						    u64 start, u32 len)
+{
+	u64 delalloc_end = start + len - 1;
+
+	unlock_extent(&binode->io_tree, start, delalloc_end, NULL);
+	__unlock_for_delalloc(&binode->vfs_inode, locked_page, start,
+			      delalloc_end);
+}
+
+static void unlock_locked_delalloc_list(struct btrfs_inode *binode,
+					struct page *locked_page,
+					struct locked_delalloc_list *locked_list)
+{
+	struct locked_delalloc_range *entry;
+
+	list_for_each_entry(entry, &locked_list->head, list)
+		unlock_one_locked_delalloc_range(binode, locked_page,
+				entry->delalloc_start, entry->delalloc_len);
+	if (locked_list->last_delalloc_len) {
+		unlock_one_locked_delalloc_range(binode, locked_page,
+				locked_list->last_delalloc_start,
+				locked_list->last_delalloc_len);
+	}
+}
+
 /*
  * helper for __extent_writepage, doing all of the delayed allocation setup.
  *
@@ -1226,13 +1321,17 @@ static inline void contiguous_readpages(struct page *pages[], int nr_pages,
 static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
 		struct page *page, struct writeback_control *wbc)
 {
+	struct btrfs_fs_info *fs_info = inode_to_fs_info(&inode->vfs_inode);
 	const u64 page_start = page_offset(page);
 	const u64 page_end = page_start + PAGE_SIZE - 1;
+	struct locked_delalloc_list locked_list;
+	struct locked_delalloc_range *entry;
 	u64 delalloc_start = page_start;
 	u64 delalloc_end = page_end;
 	u64 delalloc_to_write = 0;
 	int ret = 0;
 
+	init_locked_delalloc_list(&locked_list);
 	while (delalloc_start < page_end) {
 		delalloc_end = page_end;
 		if (!find_lock_delalloc_range(&inode->vfs_inode, page,
@@ -1240,14 +1339,47 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
 			delalloc_start = delalloc_end + 1;
 			continue;
 		}
-
-		ret = btrfs_run_delalloc_range(inode, page, delalloc_start,
-					       delalloc_end, wbc);
-		if (ret < 0)
+		ret = add_locked_delalloc_range(fs_info, &locked_list,
+				delalloc_start, delalloc_end + 1 - delalloc_start);
+		if (ret < 0) {
+			unlock_locked_delalloc_list(inode, page, &locked_list);
+			release_locked_delalloc_list(&locked_list);
 			return ret;
+		}
 
 		delalloc_start = delalloc_end + 1;
 	}
+	list_for_each_entry(entry, &locked_list.head, list) {
+		delalloc_end = entry->delalloc_start + entry->delalloc_len - 1;
+
+		/*
+		 * Hit error in the previous run, cleanup the locked
+		 * extents/pages.
+		 */
+		if (ret < 0) {
+			unlock_one_locked_delalloc_range(inode, page,
+					entry->delalloc_start, entry->delalloc_len);
+			continue;
+		}
+		ret = btrfs_run_delalloc_range(inode, page, entry->delalloc_start,
+					       delalloc_end, wbc);
+	}
+	if (locked_list.last_delalloc_len) {
+		delalloc_end = locked_list.last_delalloc_start +
+			       locked_list.last_delalloc_len - 1;
+
+		if (ret < 0)
+			unlock_one_locked_delalloc_range(inode, page,
+					locked_list.last_delalloc_start,
+					locked_list.last_delalloc_len);
+		else
+			ret = btrfs_run_delalloc_range(inode, page,
+					locked_list.last_delalloc_start,
+					delalloc_end, wbc);
+	}
+	release_locked_delalloc_list(&locked_list);
+	if (ret < 0)
+		return ret;
 
 	/*
 	 * delalloc_end is already one less than the total length, so
-- 
2.45.0


  parent reply	other threads:[~2024-05-11  0:15 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-05-11  0:15 [PATCH v4 0/6] btrfs: subpage + zoned fixes Qu Wenruo
2024-05-11  0:15 ` [PATCH v4 1/6] btrfs: make __extent_writepage_io() to write specified range only Qu Wenruo
2024-05-12 16:31   ` Johannes Thumshirn
2024-05-11  0:15 ` Qu Wenruo [this message]
2024-05-12 17:08   ` [PATCH v4 2/6] btrfs: lock subpage ranges in one go for writepage_delalloc() Johannes Thumshirn
2024-05-12 22:09     ` Qu Wenruo
2024-05-11  0:15 ` [PATCH v4 3/6] btrfs: subpage: introduce helpers to handle subpage delalloc locking Qu Wenruo
2024-05-12 17:10   ` Johannes Thumshirn
2024-05-11  0:15 ` [PATCH v4 4/6] btrfs: migrate writepage_delalloc() to use subpage helpers Qu Wenruo
2024-05-12 17:14   ` Johannes Thumshirn
2024-05-11  0:15 ` [PATCH v4 5/6] btrfs: do not clear page dirty inside extent_write_locked_range() Qu Wenruo
2024-05-12 17:19   ` Johannes Thumshirn
2024-05-11  0:15 ` [PATCH v4 6/6] btrfs: make extent_write_locked_range() to handle subpage writeback correctly Qu Wenruo
2024-05-12 17:21   ` Johannes Thumshirn

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=91da2c388f3152f40af0e3e8c7ca7c7f8f6687db.1715386434.git.wqu@suse.com \
    --to=wqu@suse.com \
    --cc=Johannes.Thumshirn@wdc.com \
    --cc=josef@toxicpanda.com \
    --cc=linux-btrfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).