All of lore.kernel.org
 help / color / mirror / Atom feed
From: Qu Wenruo <wqu@suse.com>
To: linux-btrfs@vger.kernel.org
Subject: [Patch v2 38/42] btrfs: allow submit_extent_page() to do bio split for subpage
Date: Wed, 28 Apr 2021 07:03:45 +0800	[thread overview]
Message-ID: <20210427230349.369603-39-wqu@suse.com> (raw)
In-Reply-To: <20210427230349.369603-1-wqu@suse.com>

Current submit_extent_page() just if the current page range can fit into
the current bio, and if not, submit then re-add.

But this behavior has a problem, it can't handle subpage cases.

For subpage case, the problem is in the page size, 64K, which is also
the same size as stripe size.

This means, if we can't fit a full 64K into a bio, due to stripe limit,
then it won't fit into next bio without crossing stripe either.

The proper way to handle it is:
- Check how many bytes we can put into current bio
- Put as many bytes as possible into current bio first
- Submit current bio
- Create new bio
- Add the remaining bytes into the new bio

Refactor submit_extent_page() so that it does the above iteration.

The main loop inside submit_extent_page() will look like this:

	cur = pg_offset;
	while (cur < pg_offset + size) {
		u32 offset = cur - pg_offset;
		int added;
		if (!bio_ctrl->bio) {
			/* Allocate new bio if needed */
		}
		/* Add as many bytes into the bio */
		if (added < size - offset) {
			/* The current bio is full, submit it */
		}
		cur += added;
	}

Also, since we're doing new bio allocation deep inside the main loop,
extra that code into a new function, alloc_new_bio().

Signed-off-by: Qu Wenruo <wqu@suse.com>
---
 fs/btrfs/extent_io.c | 183 ++++++++++++++++++++++++++++---------------
 1 file changed, 122 insertions(+), 61 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index c4615e087446..14ab11381d49 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -172,6 +172,7 @@ int __must_check submit_one_bio(struct bio *bio, int mirror_num,
 
 	bio->bi_private = NULL;
 
+	ASSERT(bio->bi_iter.bi_size);
 	if (is_data_inode(tree->private_data))
 		ret = btrfs_submit_data_bio(tree->private_data, bio, mirror_num,
 					    bio_flags);
@@ -3201,13 +3202,13 @@ struct bio *btrfs_bio_clone_partial(struct bio *orig, int offset, int size)
  * @size:	portion of page that we want to write
  * @prev_bio_flags:  flags of previous bio to see if we can merge the current one
  * @bio_flags:	flags of the current bio to see if we can merge them
- * @return:	true if page was added, false otherwise
  *
  * Attempt to add a page to bio considering stripe alignment etc.
  *
- * Return true if successfully page added. Otherwise, return false.
+ * Return >= 0 for the number of bytes added to the bio.
+ * Return <0 for error.
  */
-static bool btrfs_bio_add_page(struct btrfs_bio_ctrl *bio_ctrl,
+static int btrfs_bio_add_page(struct btrfs_bio_ctrl *bio_ctrl,
 			       struct page *page,
 			       u64 disk_bytenr, unsigned int size,
 			       unsigned int pg_offset,
@@ -3215,6 +3216,7 @@ static bool btrfs_bio_add_page(struct btrfs_bio_ctrl *bio_ctrl,
 {
 	struct bio *bio = bio_ctrl->bio;
 	u32 bio_size = bio->bi_iter.bi_size;
+	u32 real_size;
 	const sector_t sector = disk_bytenr >> SECTOR_SHIFT;
 	bool contig;
 	int ret;
@@ -3223,26 +3225,33 @@ static bool btrfs_bio_add_page(struct btrfs_bio_ctrl *bio_ctrl,
 	/* The limit should be calculated when bio_ctrl->bio is allocated */
 	ASSERT(bio_ctrl->len_to_oe_boundary &&
 	       bio_ctrl->len_to_stripe_boundary);
+
 	if (bio_ctrl->bio_flags != bio_flags)
-		return false;
+		return 0;
 
 	if (bio_ctrl->bio_flags & EXTENT_BIO_COMPRESSED)
 		contig = bio->bi_iter.bi_sector == sector;
 	else
 		contig = bio_end_sector(bio) == sector;
 	if (!contig)
-		return false;
+		return 0;
 
-	if (bio_size + size > bio_ctrl->len_to_oe_boundary ||
-	    bio_size + size > bio_ctrl->len_to_stripe_boundary)
-		return false;
+	real_size = min(bio_ctrl->len_to_oe_boundary,
+			bio_ctrl->len_to_stripe_boundary) - bio_size;
+	real_size = min(real_size, size);
+	/*
+	 * If real_size is 0, never call bio_add_*_page(), as even size is 0,
+	 * bio will still execute its endio function on the page!
+	 */
+	if (real_size == 0)
+		return 0;
 
 	if (bio_op(bio) == REQ_OP_ZONE_APPEND)
-		ret = bio_add_zone_append_page(bio, page, size, pg_offset);
+		ret = bio_add_zone_append_page(bio, page, real_size, pg_offset);
 	else
-		ret = bio_add_page(bio, page, size, pg_offset);
+		ret = bio_add_page(bio, page, real_size, pg_offset);
 
-	return ret == size;
+	return ret;
 }
 
 static int calc_bio_boundaries(struct btrfs_bio_ctrl *bio_ctrl,
@@ -3301,6 +3310,61 @@ static int calc_bio_boundaries(struct btrfs_bio_ctrl *bio_ctrl,
 	return 0;
 }
 
+static int alloc_new_bio(struct btrfs_inode *inode,
+			 struct btrfs_bio_ctrl *bio_ctrl,
+			 unsigned int opf,
+			 bio_end_io_t end_io_func,
+			 u64 disk_bytenr, u32 offset,
+			 unsigned long bio_flags)
+{
+	struct btrfs_fs_info *fs_info = inode->root->fs_info;
+	struct bio *bio;
+	int ret;
+
+	/*
+	 * For compressed page range, its disk_bytenr is always
+	 * @disk_bytenr passed in, no matter if we have added
+	 * any range into previous bio.
+	 */
+	if (bio_flags & EXTENT_BIO_COMPRESSED)
+		bio = btrfs_bio_alloc(disk_bytenr);
+	else
+		bio = btrfs_bio_alloc(disk_bytenr + offset);
+	bio_ctrl->bio = bio;
+	bio_ctrl->bio_flags = bio_flags;
+	ret = calc_bio_boundaries(bio_ctrl, inode);
+	if (ret < 0) {
+		bio_ctrl->bio = NULL;
+		bio->bi_status = errno_to_blk_status(ret);
+		bio_endio(bio);
+		return ret;
+	}
+	bio->bi_end_io = end_io_func;
+	bio->bi_private = &inode->io_tree;
+	bio->bi_write_hint = inode->vfs_inode.i_write_hint;
+	bio->bi_opf = opf;
+	if (btrfs_is_zoned(fs_info) && bio_op(bio) == REQ_OP_ZONE_APPEND) {
+		struct extent_map *em;
+		struct map_lookup *map;
+
+		em = btrfs_get_chunk_map(fs_info, disk_bytenr,
+					 fs_info->sectorsize);
+		if (IS_ERR(em)) {
+			bio_ctrl->bio = NULL;
+			bio->bi_status = errno_to_blk_status(ret);
+			bio_endio(bio);
+			return ret;
+		}
+
+		map = em->map_lookup;
+		/* We only support single profile for now */
+		ASSERT(map->num_stripes == 1);
+		btrfs_io_bio(bio)->device = map->stripes[0].dev;
+
+		free_extent_map(em);
+	}
+	return 0;
+}
 /*
  * @opf:	bio REQ_OP_* and REQ_* flags as one value
  * @wbc:	optional writeback control for io accounting
@@ -3326,67 +3390,64 @@ static int submit_extent_page(unsigned int opf,
 			      bool force_bio_submit)
 {
 	int ret = 0;
-	struct bio *bio;
-	size_t io_size = min_t(size_t, size, PAGE_SIZE);
 	struct btrfs_inode *inode = BTRFS_I(page->mapping->host);
-	struct extent_io_tree *tree = &inode->io_tree;
-	struct btrfs_fs_info *fs_info = inode->root->fs_info;
+	unsigned int cur = pg_offset;
 
 	ASSERT(bio_ctrl);
 
 	ASSERT(pg_offset < PAGE_SIZE && size <= PAGE_SIZE &&
 	       pg_offset + size <= PAGE_SIZE);
-	if (bio_ctrl->bio) {
-		bio = bio_ctrl->bio;
-		if (force_bio_submit ||
-		    !btrfs_bio_add_page(bio_ctrl, page, disk_bytenr, io_size,
-					pg_offset, bio_flags)) {
-			ret = submit_one_bio(bio, mirror_num, bio_ctrl->bio_flags);
+	if (force_bio_submit && bio_ctrl->bio) {
+		ret = submit_one_bio(bio_ctrl->bio, mirror_num,
+				     bio_ctrl->bio_flags);
+		bio_ctrl->bio = NULL;
+		if (ret < 0)
+			return ret;
+	}
+	while (cur < pg_offset + size) {
+		u32 offset = cur - pg_offset;
+		int added;
+		/* Allocate new bio if needed */
+		if (!bio_ctrl->bio) {
+			ret = alloc_new_bio(inode, bio_ctrl, opf, end_io_func,
+					    disk_bytenr, offset, bio_flags);
+			if (ret < 0)
+				return ret;
+		}
+		/*
+		 * We must go through btrfs_bio_add_page() to ensure each
+		 * page range won't cross various boundaries.
+		 */
+		if (bio_flags & EXTENT_BIO_COMPRESSED)
+			added = btrfs_bio_add_page(bio_ctrl, page, disk_bytenr,
+					size - offset, pg_offset + offset,
+					bio_flags);
+		else
+			added = btrfs_bio_add_page(bio_ctrl, page,
+					disk_bytenr + offset, size - offset,
+					pg_offset + offset, bio_flags);
+
+		/* Metadata page range should never be split */
+		if (!is_data_inode(&inode->vfs_inode))
+			ASSERT(added == 0 || added == size);
+
+		/* At least we added some page, update the account */
+		if (wbc && added)
+			wbc_account_cgroup_owner(wbc, page, added);
+
+		/* We have reached boundary, submit right now */
+		if (added < size - offset) {
+			/* The bio should contain some page(s) */
+			ASSERT(bio_ctrl->bio->bi_iter.bi_size);
+			ret = submit_one_bio(bio_ctrl->bio, mirror_num,
+					bio_ctrl->bio_flags);
 			bio_ctrl->bio = NULL;
 			if (ret < 0)
 				return ret;
-		} else {
-			if (wbc)
-				wbc_account_cgroup_owner(wbc, page, io_size);
-			return 0;
 		}
+		cur += added;
 	}
-
-	bio = btrfs_bio_alloc(disk_bytenr);
-	bio_add_page(bio, page, io_size, pg_offset);
-	bio->bi_end_io = end_io_func;
-	bio->bi_private = tree;
-	bio->bi_write_hint = page->mapping->host->i_write_hint;
-	bio->bi_opf = opf;
-	if (wbc) {
-		struct block_device *bdev;
-
-		bdev = fs_info->fs_devices->latest_bdev;
-		bio_set_dev(bio, bdev);
-		wbc_init_bio(wbc, bio);
-		wbc_account_cgroup_owner(wbc, page, io_size);
-	}
-	if (btrfs_is_zoned(fs_info) && bio_op(bio) == REQ_OP_ZONE_APPEND) {
-		struct extent_map *em;
-		struct map_lookup *map;
-
-		em = btrfs_get_chunk_map(fs_info, disk_bytenr, io_size);
-		if (IS_ERR(em))
-			return PTR_ERR(em);
-
-		map = em->map_lookup;
-		/* We only support single profile for now */
-		ASSERT(map->num_stripes == 1);
-		btrfs_io_bio(bio)->device = map->stripes[0].dev;
-
-		free_extent_map(em);
-	}
-
-	bio_ctrl->bio = bio;
-	bio_ctrl->bio_flags = bio_flags;
-	ret = calc_bio_boundaries(bio_ctrl, inode);
-
-	return ret;
+	return 0;
 }
 
 static int attach_extent_buffer_page(struct extent_buffer *eb,
-- 
2.31.1


  parent reply	other threads:[~2021-04-27 23:05 UTC|newest]

Thread overview: 118+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-04-27 23:03 [Patch v2 00/42] btrfs: add data write support for subpage Qu Wenruo
2021-04-27 23:03 ` [Patch v2 01/42] btrfs: scrub: fix subpage scrub repair error caused by hardcoded PAGE_SIZE Qu Wenruo
2021-05-13 22:57   ` David Sterba
2021-05-13 23:32     ` Qu Wenruo
2021-04-27 23:03 ` [Patch v2 02/42] btrfs: make free space cache size consistent across different PAGE_SIZE Qu Wenruo
2021-04-27 23:03 ` [Patch v2 03/42] btrfs: remove the unused parameter @len for btrfs_bio_fits_in_stripe() Qu Wenruo
2021-05-13 22:58   ` David Sterba
2021-05-13 23:07   ` David Sterba
2021-04-27 23:03 ` [Patch v2 04/42] btrfs: allow btrfs_bio_fits_in_stripe() to accept bio without any page Qu Wenruo
2021-04-27 23:03 ` [Patch v2 05/42] btrfs: refactor submit_extent_page() to make bio and its flag tracing easier Qu Wenruo
2021-05-13 23:03   ` David Sterba
2021-05-21 11:06   ` Johannes Thumshirn
2021-05-21 11:26     ` Qu Wenruo
2021-05-21 13:30       ` David Sterba
2021-04-27 23:03 ` [Patch v2 06/42] btrfs: make subpage metadata write path to call its own endio functions Qu Wenruo
2021-04-27 23:03 ` [Patch v2 07/42] btrfs: pass btrfs_inode into btrfs_writepage_endio_finish_ordered() Qu Wenruo
2021-05-13 23:06   ` David Sterba
2021-05-13 23:35     ` Qu Wenruo
2021-05-21 14:27   ` Josef Bacik
2021-05-21 20:22     ` David Sterba
2021-05-22  0:24     ` Qu Wenruo
2021-05-23  7:40       ` Qu Wenruo
2021-05-23 13:43         ` Josef Bacik
2021-05-23 13:50           ` Qu Wenruo
2021-05-23 14:08             ` Josef Bacik
2021-04-27 23:03 ` [Patch v2 08/42] btrfs: make Private2 lifespan more consistent Qu Wenruo
2021-04-27 23:03 ` [Patch v2 09/42] btrfs: refactor how we finish ordered extent io for endio functions Qu Wenruo
2021-05-13 23:11   ` David Sterba
2021-04-27 23:03 ` [Patch v2 10/42] btrfs: update the comments in btrfs_invalidatepage() Qu Wenruo
2021-04-27 23:03 ` [Patch v2 11/42] btrfs: introduce btrfs_lookup_first_ordered_range() Qu Wenruo
2021-05-13 23:13   ` David Sterba
2021-04-27 23:03 ` [Patch v2 12/42] btrfs: refactor btrfs_invalidatepage() Qu Wenruo
2021-04-27 23:03 ` [Patch v2 13/42] btrfs: rename PagePrivate2 to PageOrdered inside btrfs Qu Wenruo
2021-04-27 23:03 ` [Patch v2 14/42] btrfs: pass bytenr directly to __process_pages_contig() Qu Wenruo
2021-04-27 23:03 ` [Patch v2 15/42] btrfs: refactor the page status update into process_one_page() Qu Wenruo
2021-04-27 23:03 ` [Patch v2 16/42] btrfs: provide btrfs_page_clamp_*() helpers Qu Wenruo
2021-04-27 23:03 ` [Patch v2 17/42] btrfs: only require sector size alignment for end_bio_extent_writepage() Qu Wenruo
2021-04-27 23:03 ` [Patch v2 18/42] btrfs: make btrfs_dirty_pages() to be subpage compatible Qu Wenruo
2021-04-27 23:03 ` [Patch v2 19/42] btrfs: make __process_pages_contig() to handle subpage dirty/error/writeback status Qu Wenruo
2021-04-27 23:03 ` [Patch v2 20/42] btrfs: make end_bio_extent_writepage() to be subpage compatible Qu Wenruo
2021-04-27 23:03 ` [Patch v2 21/42] btrfs: make process_one_page() to handle subpage locking Qu Wenruo
2021-04-27 23:03 ` [Patch v2 22/42] btrfs: introduce helpers for subpage ordered status Qu Wenruo
2021-04-27 23:03 ` [Patch v2 23/42] btrfs: make page Ordered bit to be subpage compatible Qu Wenruo
2021-04-27 23:03 ` [Patch v2 24/42] btrfs: update locked page dirty/writeback/error bits in __process_pages_contig Qu Wenruo
2021-04-27 23:03 ` [Patch v2 25/42] btrfs: prevent extent_clear_unlock_delalloc() to unlock page not locked by __process_pages_contig() Qu Wenruo
2021-04-27 23:03 ` [Patch v2 26/42] btrfs: make btrfs_set_range_writeback() subpage compatible Qu Wenruo
2021-04-27 23:03 ` [Patch v2 27/42] btrfs: make __extent_writepage_io() only submit dirty range for subpage Qu Wenruo
2021-04-27 23:03 ` [Patch v2 28/42] btrfs: make btrfs_truncate_block() to be subpage compatible Qu Wenruo
2021-04-27 23:03 ` [Patch v2 29/42] btrfs: make btrfs_page_mkwrite() " Qu Wenruo
2021-04-27 23:03 ` [Patch v2 30/42] btrfs: reflink: make copy_inline_to_page() " Qu Wenruo
2021-04-27 23:03 ` [Patch v2 31/42] btrfs: fix the filemap_range_has_page() call in btrfs_punch_hole_lock_range() Qu Wenruo
2021-04-27 23:03 ` [Patch v2 32/42] btrfs: don't clear page extent mapped if we're not invalidating the full page Qu Wenruo
2021-04-27 23:03 ` [Patch v2 33/42] btrfs: extract relocation page read and dirty part into its own function Qu Wenruo
2021-04-27 23:03 ` [Patch v2 34/42] btrfs: make relocate_one_page() to handle subpage case Qu Wenruo
2021-04-27 23:03 ` [Patch v2 35/42] btrfs: fix wild subpage writeback which does not have ordered extent Qu Wenruo
2021-04-27 23:03 ` [Patch v2 36/42] btrfs: disable inline extent creation for subpage Qu Wenruo
2021-05-04  4:28   ` Qu Wenruo
2021-04-27 23:03 ` [Patch v2 37/42] btrfs: skip validation for subpage read repair Qu Wenruo
2021-04-27 23:03 ` Qu Wenruo [this message]
2021-04-27 23:03 ` [Patch v2 39/42] btrfs: reject raid5/6 fs for subpage Qu Wenruo
2021-04-28 14:22   ` Neal Gompa
2021-04-28 23:11     ` Qu Wenruo
2021-05-12 22:04       ` David Sterba
2021-04-27 23:03 ` [Patch v2 40/42] btrfs: fix a crash caused by race between prepare_pages() and btrfs_releasepage() Qu Wenruo
2021-04-28 10:56   ` Filipe Manana
2021-04-27 23:03 ` [Patch v2 41/42] btrfs: fix the use-after-free bug in writeback subpage helper Qu Wenruo
2021-05-06 23:46   ` Qu Wenruo
2021-05-07  4:57     ` Ritesh Harjani
2021-05-07  5:14       ` Qu Wenruo
2021-05-10  8:38         ` Qu Wenruo
2021-05-10 12:29           ` Ritesh Harjani
2021-05-10 13:10             ` Qu Wenruo
2021-05-11 10:48               ` Ritesh Harjani
2021-05-11 11:15                 ` Qu Wenruo
2021-05-12  1:49                   ` Qu Wenruo
2021-05-12  7:09                     ` Ritesh Harjani
2021-05-13 16:33                       ` Ritesh Harjani
2021-05-13 21:36                         ` Ritesh Harjani
2021-05-13 23:41                           ` Qu Wenruo
2021-05-14 15:08                             ` Ritesh Harjani
2021-05-14 17:53                               ` Ritesh Harjani
2021-05-14 22:22                                 ` Qu Wenruo
2021-05-15  9:59                                   ` Ritesh Harjani
2021-05-15 10:15                                     ` Qu Wenruo
2021-05-25  4:43                                       ` Ritesh Harjani
2021-05-25  5:52                                         ` Qu Wenruo
2021-05-25  6:14                                           ` Qu Wenruo
2021-05-25  9:23                                             ` Ritesh Harjani
2021-05-25  9:45                                               ` Qu Wenruo
2021-05-25  9:49                                                 ` Qu Wenruo
2021-05-25 10:20                                                   ` Ritesh Harjani
2021-05-25 11:41                                                     ` Qu Wenruo
2021-05-25 13:02                                                       ` Ritesh Harjani
2021-05-26  5:29                                                         ` Ritesh Harjani
2021-05-26  5:58                                                           ` Qu Wenruo
2021-05-26 13:45                                                             ` Ritesh Harjani
2021-05-28  8:26                                                               ` Qu Wenruo
2021-05-28  8:59                                                                 ` Ritesh Harjani
2021-05-28 10:25                                                                   ` Qu Wenruo
2021-05-30  1:50                                                                     ` Qu Wenruo
2021-04-27 23:03 ` [Patch v2 42/42] btrfs: allow read-write for 4K sectorsize on 64K page size systems Qu Wenruo
2021-05-12 22:18 ` [Patch v2 00/42] btrfs: add data write support for subpage David Sterba
2021-05-12 23:48   ` Qu Wenruo
2021-05-13  2:21     ` Qu Wenruo
2021-05-13 22:54       ` David Sterba
2021-05-14  1:41         ` Qu Wenruo
2021-05-14  2:26           ` riteshh
2021-05-14 10:28             ` riteshh
2021-05-14 11:28               ` David Sterba
2021-05-14 14:38                 ` riteshh
2021-05-14 11:30       ` David Sterba
2021-05-14 22:25         ` David Sterba
2021-05-14 22:45         ` Qu Wenruo
2021-05-14 23:05           ` David Sterba
2021-05-14 23:17             ` Qu Wenruo
2021-05-17 13:22               ` David Sterba
2021-05-17 23:20                 ` Qu Wenruo
2021-04-28  4:14 [Patch v2 38/42] btrfs: allow submit_extent_page() to do bio split " kernel test robot

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210427230349.369603-39-wqu@suse.com \
    --to=wqu@suse.com \
    --cc=linux-btrfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.