All of lore.kernel.org
 help / color / mirror / Atom feed
From: Qu Wenruo <wqu@suse.com>
To: linux-btrfs@vger.kernel.org
Subject: [PATCH v4 21/30] btrfs: make relocate_one_page() to handle subpage case
Date: Mon, 31 May 2021 16:50:57 +0800	[thread overview]
Message-ID: <20210531085106.259490-22-wqu@suse.com> (raw)
In-Reply-To: <20210531085106.259490-1-wqu@suse.com>

For subpage case, one page of data reloc inode can contain several file
extents, like this:

|<--- File extent A --->| FE B | FE C |<--- File extent D -->|
		|<--------- Page --------->|

We can no longer use PAGE_SIZE directly for various operations.

This patch will relocate_one_page() to handle subpage case by:
- Iterating through all extents of a cluster when marking pages
  When marking pages dirty and delalloc, we need to check the cluster
  extent boundary.
  Now we introduce a loop to go extent by extent of a page, until we
  either finished the last extent, or reach the page end.

  By this, regular sectorsize == PAGE_SIZE can still work as usual, since
  we will do that loop only once.

- Iteration start from max(page_start, extent_start)
  Since we can have the following case:
			| FE B | FE C |<--- File extent D -->|
		|<--------- Page --------->|
  Thus we can't always start from page_start, but do a
  max(page_start, extent_start)

- Iteration end when the cluster is exhausted
  Similar to previous case, the last file extent can end before the page
  end:
|<--- File extent A --->| FE B | FE C |
		|<--------- Page --------->|
  In this case, we need to manually exit the loop after we have finished
  the last extent of the cluster.

- Reserve metadata space for each extent range
  Since now we can hit multiple ranges in one page, we should reserve
  metadata for each range, not simply PAGE_SIZE.

Signed-off-by: Qu Wenruo <wqu@suse.com>
---
 fs/btrfs/relocation.c | 108 ++++++++++++++++++++++++++++++------------
 1 file changed, 79 insertions(+), 29 deletions(-)

diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 862fe5247c76..cd50559c6d17 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -24,6 +24,7 @@
 #include "block-group.h"
 #include "backref.h"
 #include "misc.h"
+#include "subpage.h"
 
 /*
  * Relocation overview
@@ -2885,6 +2886,17 @@ noinline int btrfs_should_cancel_balance(struct btrfs_fs_info *fs_info)
 }
 ALLOW_ERROR_INJECTION(btrfs_should_cancel_balance, TRUE);
 
+static u64 get_cluster_boundary_end(struct file_extent_cluster *cluster,
+				    int cluster_nr)
+{
+	/* Last extent, use cluster end directly */
+	if (cluster_nr >= cluster->nr - 1)
+		return cluster->end;
+
+	/* Use next boundary start*/
+	return cluster->boundary[cluster_nr + 1] - 1;
+}
+
 static int relocate_one_page(struct inode *inode, struct file_ra_state *ra,
 			     struct file_extent_cluster *cluster,
 			     int *cluster_nr, unsigned long page_index)
@@ -2896,22 +2908,17 @@ static int relocate_one_page(struct inode *inode, struct file_ra_state *ra,
 	struct page *page;
 	u64 page_start;
 	u64 page_end;
+	u64 cur;
 	int ret;
 
 	ASSERT(page_index <= last_index);
-	ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode), PAGE_SIZE);
-	if (ret)
-		return ret;
-
 	page = find_lock_page(inode->i_mapping, page_index);
 	if (!page) {
 		page_cache_sync_readahead(inode->i_mapping, ra, NULL,
 				page_index, last_index + 1 - page_index);
 		page = find_or_create_page(inode->i_mapping, page_index, mask);
-		if (!page) {
-			ret = -ENOMEM;
-			goto release_delalloc;
-		}
+		if (!page)
+			return -ENOMEM;
 	}
 	ret = set_page_extent_mapped(page);
 	if (ret < 0)
@@ -2933,30 +2940,76 @@ static int relocate_one_page(struct inode *inode, struct file_ra_state *ra,
 	page_start = page_offset(page);
 	page_end = page_start + PAGE_SIZE - 1;
 
-	lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end);
-
-	if (*cluster_nr < cluster->nr &&
-	    page_start + offset == cluster->boundary[*cluster_nr]) {
-		set_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end,
-				EXTENT_BOUNDARY);
-		(*cluster_nr)++;
-	}
+	/*
+	 * Start from the cluster, as for subpage case, the cluster can start
+	 * inside the page.
+	 */
+	cur = max(page_start, cluster->boundary[*cluster_nr] - offset);
+	while (cur <= page_end) {
+		u64 extent_start = cluster->boundary[*cluster_nr] - offset;
+		u64 extent_end = get_cluster_boundary_end(cluster,
+						*cluster_nr) - offset;
+		u64 clamped_start = max(page_start, extent_start);
+		u64 clamped_end = min(page_end, extent_end);
+		u32 clamped_len = clamped_end + 1 - clamped_start;
+
+		/* Reserve metadata for this range */
+		ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode),
+						      clamped_len);
+		if (ret)
+			goto release_page;
 
-	ret = btrfs_set_extent_delalloc(BTRFS_I(inode), page_start, page_end,
-					0, NULL);
-	if (ret) {
-		clear_extent_bits(&BTRFS_I(inode)->io_tree, page_start,
-				  page_end, EXTENT_LOCKED | EXTENT_BOUNDARY);
-		goto release_page;
+		/* Mark the range delalloc and dirty for later writeback */
+		lock_extent(&BTRFS_I(inode)->io_tree, clamped_start,
+				clamped_end);
+		ret = btrfs_set_extent_delalloc(BTRFS_I(inode), clamped_start,
+				clamped_end, 0, NULL);
+		if (ret) {
+			clear_extent_bits(&BTRFS_I(inode)->io_tree,
+					clamped_start, clamped_end,
+					EXTENT_LOCKED | EXTENT_BOUNDARY);
+			btrfs_delalloc_release_metadata(BTRFS_I(inode),
+							clamped_len, true);
+			btrfs_delalloc_release_extents(BTRFS_I(inode),
+							clamped_len);
+			goto release_page;
+		}
+		btrfs_page_set_dirty(fs_info, page, clamped_start, clamped_len);
 
+		/*
+		 * Set the boundary if it's inside the page.
+		 * Data relocation requires the destination extents have the
+		 * same size as the source.
+		 * EXTENT_BOUNDARY bit prevent current extent from being merged
+		 * with previous extent.
+		 */
+		if (in_range(cluster->boundary[*cluster_nr] - offset,
+			     page_start, PAGE_SIZE)) {
+			u64 boundary_start = cluster->boundary[*cluster_nr] -
+						offset;
+			u64 boundary_end = boundary_start +
+					   fs_info->sectorsize - 1;
+
+			set_extent_bits(&BTRFS_I(inode)->io_tree,
+					boundary_start, boundary_end,
+					EXTENT_BOUNDARY);
+		}
+		unlock_extent(&BTRFS_I(inode)->io_tree, clamped_start,
+			      clamped_end);
+		btrfs_delalloc_release_extents(BTRFS_I(inode), clamped_len);
+		cur += clamped_len;
+
+		/* Crossed extent end, go to next extent */
+		if (cur >= extent_end) {
+			(*cluster_nr)++;
+			/* Just finished the last extent of the cluster, exit. */
+			if (*cluster_nr >= cluster->nr)
+				break;
+		}
 	}
-	set_page_dirty(page);
-
-	unlock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end);
 	unlock_page(page);
 	put_page(page);
 
-	btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
 	balance_dirty_pages_ratelimited(inode->i_mapping);
 	btrfs_throttle(fs_info);
 	if (btrfs_should_cancel_balance(fs_info))
@@ -2966,9 +3019,6 @@ static int relocate_one_page(struct inode *inode, struct file_ra_state *ra,
 release_page:
 	unlock_page(page);
 	put_page(page);
-release_delalloc:
-	btrfs_delalloc_release_metadata(BTRFS_I(inode), PAGE_SIZE, true);
-	btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
 	return ret;
 }
 
-- 
2.31.1


  parent reply	other threads:[~2021-05-31  8:52 UTC|newest]

Thread overview: 56+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-05-31  8:50 [PATCH v4 00/30] btrfs: add data write support for subpage Qu Wenruo
2021-05-31  8:50 ` [PATCH v4 01/30] btrfs: pass bytenr directly to __process_pages_contig() Qu Wenruo
2021-05-31  8:50 ` [PATCH v4 02/30] btrfs: refactor the page status update into process_one_page() Qu Wenruo
2021-05-31  8:50 ` [PATCH v4 03/30] btrfs: provide btrfs_page_clamp_*() helpers Qu Wenruo
2021-05-31  8:50 ` [PATCH v4 04/30] btrfs: only require sector size alignment for end_bio_extent_writepage() Qu Wenruo
2021-05-31  8:50 ` [PATCH v4 05/30] btrfs: make btrfs_dirty_pages() to be subpage compatible Qu Wenruo
2021-05-31  8:50 ` [PATCH v4 06/30] btrfs: make __process_pages_contig() to handle subpage dirty/error/writeback status Qu Wenruo
2021-05-31  8:50 ` [PATCH v4 07/30] btrfs: make end_bio_extent_writepage() to be subpage compatible Qu Wenruo
2021-05-31  8:50 ` [PATCH v4 08/30] btrfs: make process_one_page() to handle subpage locking Qu Wenruo
2021-05-31  8:50 ` [PATCH v4 09/30] btrfs: introduce helpers for subpage ordered status Qu Wenruo
2021-05-31  8:50 ` [PATCH v4 10/30] btrfs: make page Ordered bit to be subpage compatible Qu Wenruo
2021-05-31  8:50 ` [PATCH v4 11/30] btrfs: update locked page dirty/writeback/error bits in __process_pages_contig Qu Wenruo
2021-05-31  8:50 ` [PATCH v4 12/30] btrfs: prevent extent_clear_unlock_delalloc() to unlock page not locked by __process_pages_contig() Qu Wenruo
2021-05-31  8:50 ` [PATCH v4 13/30] btrfs: make btrfs_set_range_writeback() subpage compatible Qu Wenruo
2021-05-31  8:50 ` [PATCH v4 14/30] btrfs: make __extent_writepage_io() only submit dirty range for subpage Qu Wenruo
2021-06-04 14:58   ` Qu Wenruo
2021-05-31  8:50 ` [PATCH v4 15/30] btrfs: make btrfs_truncate_block() to be subpage compatible Qu Wenruo
2021-05-31  8:50 ` [PATCH v4 16/30] btrfs: make btrfs_page_mkwrite() " Qu Wenruo
2021-05-31  8:50 ` [PATCH v4 17/30] btrfs: reflink: make copy_inline_to_page() " Qu Wenruo
2021-05-31  8:50 ` [PATCH v4 18/30] btrfs: fix the filemap_range_has_page() call in btrfs_punch_hole_lock_range() Qu Wenruo
2021-05-31  8:50 ` [PATCH v4 19/30] btrfs: don't clear page extent mapped if we're not invalidating the full page Qu Wenruo
2021-05-31  8:50 ` [PATCH v4 20/30] btrfs: extract relocation page read and dirty part into its own function Qu Wenruo
2021-05-31  8:50 ` Qu Wenruo [this message]
2021-05-31  8:50 ` [PATCH v4 22/30] btrfs: fix wild subpage writeback which does not have ordered extent Qu Wenruo
2021-06-02 16:25   ` David Sterba
2021-05-31  8:50 ` [PATCH v4 23/30] btrfs: disable inline extent creation for subpage Qu Wenruo
2021-05-31  8:51 ` [PATCH v4 24/30] btrfs: allow submit_extent_page() to do bio split " Qu Wenruo
2021-05-31  8:51 ` [PATCH v4 25/30] btrfs: reject raid5/6 fs " Qu Wenruo
2021-05-31  8:51 ` [PATCH v4 26/30] btrfs: fix a crash caused by race between prepare_pages() and btrfs_releasepage() Qu Wenruo
2021-05-31  8:51 ` [PATCH v4 27/30] btrfs: fix a use-after-free bug in writeback subpage helper Qu Wenruo
2021-06-02 16:48   ` David Sterba
2021-05-31  8:51 ` [PATCH v4 28/30] btrfs: fix a subpage false alert for relocating partial preallocated data extents Qu Wenruo
2021-05-31  8:51 ` [PATCH v4 29/30] btrfs: fix a subpage relocation data corruption Qu Wenruo
2021-05-31 10:26   ` Qu Wenruo
2021-06-01  1:07     ` Qu Wenruo
2021-06-02 17:10       ` David Sterba
2021-05-31  8:51 ` [PATCH v4 30/30] btrfs: allow read-write for 4K sectorsize on 64K page size systems Qu Wenruo
2021-06-02 17:37   ` David Sterba
2021-05-31  9:47 ` [PATCH v4 00/30] btrfs: add data write support for subpage Neal Gompa
2021-05-31  9:50   ` Qu Wenruo
2021-05-31 12:17     ` Neal Gompa
2021-05-31 13:08       ` Qu Wenruo
2021-05-31 14:09 ` David Sterba
2021-06-01  0:21   ` Qu Wenruo
2021-06-02  2:22 ` riteshh
2021-06-02  2:24   ` Qu Wenruo
2021-06-02  2:27     ` riteshh
2021-06-02 17:39   ` David Sterba
2021-06-02 17:57 ` David Sterba
2021-06-03  6:20   ` Qu Wenruo
2021-06-08  8:23 ` Anand Jain
2021-06-08  9:02   ` Qu Wenruo
2021-06-08  9:45     ` Anand Jain
2021-06-08  9:50       ` Qu Wenruo
2021-06-08 11:11         ` Anand Jain
2021-06-17 20:40           ` David Sterba

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210531085106.259490-22-wqu@suse.com \
    --to=wqu@suse.com \
    --cc=linux-btrfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.