All of lore.kernel.org
 help / color / mirror / Atom feed
* cleanup btrfs bio handling, part 2 v4
@ 2022-05-26  7:36 Christoph Hellwig
  2022-05-26  7:36 ` [PATCH 01/10] btrfs: move more work into btrfs_end_bioc Christoph Hellwig
                   ` (10 more replies)
  0 siblings, 11 replies; 31+ messages in thread
From: Christoph Hellwig @ 2022-05-26  7:36 UTC (permalink / raw)
  To: David Sterba, Josef Bacik, Qu Wenruo; +Cc: linux-btrfs

Hi all,

this series removes the need to allocate a separate object for I/O
completions for all read and some write I/Os, and reduced the memory
usage of the low-level bios cloned by btrfs_map_bio by using plain bios
instead of the much larger btrfs_bio.

Changes since v3:
 - rebased to the latest for-next tree
 - move "btrfs: don't double-defer bio completions for compressed reads"
   back to where it was before in the patch order

Changes since v2:
 - rebased to the latests misc-next tree
 - fixed an incorrectly assigned bi_end_io handler in the raid56 code

Changes since v1:
 - rebased to the latests misc-next tree
 - don't call destroy_workqueue on NULL workqueues
 - drop a marginal and slightly controversial cleanup to btrfs_map_bio

Diffstat:

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH 01/10] btrfs: move more work into btrfs_end_bioc
  2022-05-26  7:36 cleanup btrfs bio handling, part 2 v4 Christoph Hellwig
@ 2022-05-26  7:36 ` Christoph Hellwig
  2022-05-26  7:36 ` [PATCH 02/10] btrfs: cleanup btrfs_submit_dio_bio Christoph Hellwig
                   ` (9 subsequent siblings)
  10 siblings, 0 replies; 31+ messages in thread
From: Christoph Hellwig @ 2022-05-26  7:36 UTC (permalink / raw)
  To: David Sterba, Josef Bacik, Qu Wenruo; +Cc: linux-btrfs, Johannes Thumshirn

Assign ->mirror_num and ->bi_status in btrfs_end_bioc instead of
duplicating the logic in the callers.  Also remove the bio argument as
it always must be bioc->orig_bio and the now pointless bioc_error that
did nothing but assign bi_sector to the same value just sampled in the
caller.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Qu Wenruo <wqu@suse.com>
---
 fs/btrfs/volumes.c | 72 ++++++++++++++--------------------------------
 1 file changed, 22 insertions(+), 50 deletions(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index a2bb0928dc066..03d941c19f4cb 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6626,19 +6626,29 @@ int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
 	return __btrfs_map_block(fs_info, op, logical, length, bioc_ret, 0, 1);
 }
 
-static inline void btrfs_end_bioc(struct btrfs_io_context *bioc, struct bio *bio)
+static inline void btrfs_end_bioc(struct btrfs_io_context *bioc)
 {
-	bio->bi_private = bioc->private;
-	bio->bi_end_io = bioc->end_io;
-	bio_endio(bio);
+	struct bio *orig_bio = bioc->orig_bio;
 
+	btrfs_bio(orig_bio)->mirror_num = bioc->mirror_num;
+	orig_bio->bi_private = bioc->private;
+	orig_bio->bi_end_io = bioc->end_io;
+
+	/*
+	 * Only send an error to the higher layers if it is beyond the tolerance
+	 * threshold.
+	 */
+	if (atomic_read(&bioc->error) > bioc->max_errors)
+		orig_bio->bi_status = BLK_STS_IOERR;
+	else
+		orig_bio->bi_status = BLK_STS_OK;
+	bio_endio(orig_bio);
 	btrfs_put_bioc(bioc);
 }
 
 static void btrfs_end_bio(struct bio *bio)
 {
 	struct btrfs_io_context *bioc = bio->bi_private;
-	int is_orig_bio = 0;
 
 	if (bio->bi_status) {
 		atomic_inc(&bioc->error);
@@ -6659,35 +6669,12 @@ static void btrfs_end_bio(struct bio *bio)
 		}
 	}
 
-	if (bio == bioc->orig_bio)
-		is_orig_bio = 1;
+	if (bio != bioc->orig_bio)
+		bio_put(bio);
 
 	btrfs_bio_counter_dec(bioc->fs_info);
-
-	if (atomic_dec_and_test(&bioc->stripes_pending)) {
-		if (!is_orig_bio) {
-			bio_put(bio);
-			bio = bioc->orig_bio;
-		}
-
-		btrfs_bio(bio)->mirror_num = bioc->mirror_num;
-		/* only send an error to the higher layers if it is
-		 * beyond the tolerance of the btrfs bio
-		 */
-		if (atomic_read(&bioc->error) > bioc->max_errors) {
-			bio->bi_status = BLK_STS_IOERR;
-		} else {
-			/*
-			 * this bio is actually up to date, we didn't
-			 * go over the max number of errors
-			 */
-			bio->bi_status = BLK_STS_OK;
-		}
-
-		btrfs_end_bioc(bioc, bio);
-	} else if (!is_orig_bio) {
-		bio_put(bio);
-	}
+	if (atomic_dec_and_test(&bioc->stripes_pending))
+		btrfs_end_bioc(bioc);
 }
 
 static void submit_stripe_bio(struct btrfs_io_context *bioc, struct bio *bio,
@@ -6725,23 +6712,6 @@ static void submit_stripe_bio(struct btrfs_io_context *bioc, struct bio *bio,
 	submit_bio(bio);
 }
 
-static void bioc_error(struct btrfs_io_context *bioc, struct bio *bio, u64 logical)
-{
-	atomic_inc(&bioc->error);
-	if (atomic_dec_and_test(&bioc->stripes_pending)) {
-		/* Should be the original bio. */
-		WARN_ON(bio != bioc->orig_bio);
-
-		btrfs_bio(bio)->mirror_num = bioc->mirror_num;
-		bio->bi_iter.bi_sector = logical >> 9;
-		if (atomic_read(&bioc->error) > bioc->max_errors)
-			bio->bi_status = BLK_STS_IOERR;
-		else
-			bio->bi_status = BLK_STS_OK;
-		btrfs_end_bioc(bioc, bio);
-	}
-}
-
 blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
 			   int mirror_num)
 {
@@ -6800,7 +6770,9 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
 						   &dev->dev_state) ||
 		    (btrfs_op(first_bio) == BTRFS_MAP_WRITE &&
 		    !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))) {
-			bioc_error(bioc, first_bio, logical);
+			atomic_inc(&bioc->error);
+			if (atomic_dec_and_test(&bioc->stripes_pending))
+				btrfs_end_bioc(bioc);
 			continue;
 		}
 
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH 02/10] btrfs: cleanup btrfs_submit_dio_bio
  2022-05-26  7:36 cleanup btrfs bio handling, part 2 v4 Christoph Hellwig
  2022-05-26  7:36 ` [PATCH 01/10] btrfs: move more work into btrfs_end_bioc Christoph Hellwig
@ 2022-05-26  7:36 ` Christoph Hellwig
  2022-06-01 19:32   ` David Sterba
  2022-05-26  7:36 ` [PATCH 03/10] btrfs: split btrfs_submit_data_bio Christoph Hellwig
                   ` (8 subsequent siblings)
  10 siblings, 1 reply; 31+ messages in thread
From: Christoph Hellwig @ 2022-05-26  7:36 UTC (permalink / raw)
  To: David Sterba, Josef Bacik, Qu Wenruo; +Cc: linux-btrfs, Johannes Thumshirn

Remove the pointless goto just to return err and clean up the code flow
to be a little more straight forward.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Qu Wenruo <wqu@suse.com>
---
 fs/btrfs/inode.c | 22 ++++++++--------------
 1 file changed, 8 insertions(+), 14 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 67bb7876fdfa8..03335ce83a949 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7966,39 +7966,33 @@ static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio,
 	bool write = btrfs_op(bio) == BTRFS_MAP_WRITE;
 	blk_status_t ret;
 
-	/* Check btrfs_submit_bio_hook() for rules about async submit. */
-	if (async_submit)
-		async_submit = !atomic_read(&BTRFS_I(inode)->sync_writers);
-
 	if (!write) {
 		ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA);
 		if (ret)
-			goto err;
+			return ret;
 	}
 
 	if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
 		goto map;
 
-	if (write && async_submit) {
-		ret = btrfs_wq_submit_bio(inode, bio, 0, file_offset,
-					  btrfs_submit_bio_start_direct_io);
-		goto err;
-	} else if (write) {
+	if (write) {
+		/* check btrfs_submit_data_bio() for async submit rules */
+		if (async_submit && !atomic_read(&BTRFS_I(inode)->sync_writers))
+			return btrfs_wq_submit_bio(inode, bio, 0, file_offset,
+					btrfs_submit_bio_start_direct_io);
 		/*
 		 * If we aren't doing async submit, calculate the csum of the
 		 * bio now.
 		 */
 		ret = btrfs_csum_one_bio(BTRFS_I(inode), bio, file_offset, false);
 		if (ret)
-			goto err;
+			return ret;
 	} else {
 		btrfs_bio(bio)->csum = btrfs_csum_ptr(fs_info, dip->csums,
 						      file_offset - dip->file_offset);
 	}
 map:
-	ret = btrfs_map_bio(fs_info, bio, 0);
-err:
-	return ret;
+	return btrfs_map_bio(fs_info, bio, 0);
 }
 
 static void btrfs_submit_direct(const struct iomap_iter *iter,
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH 03/10] btrfs: split btrfs_submit_data_bio
  2022-05-26  7:36 cleanup btrfs bio handling, part 2 v4 Christoph Hellwig
  2022-05-26  7:36 ` [PATCH 01/10] btrfs: move more work into btrfs_end_bioc Christoph Hellwig
  2022-05-26  7:36 ` [PATCH 02/10] btrfs: cleanup btrfs_submit_dio_bio Christoph Hellwig
@ 2022-05-26  7:36 ` Christoph Hellwig
  2022-05-26  7:36 ` [PATCH 04/10] btrfs: defer I/O completion based on the btrfs_raid_bio Christoph Hellwig
                   ` (7 subsequent siblings)
  10 siblings, 0 replies; 31+ messages in thread
From: Christoph Hellwig @ 2022-05-26  7:36 UTC (permalink / raw)
  To: David Sterba, Josef Bacik, Qu Wenruo; +Cc: linux-btrfs

Split btrfs_submit_data_bio into one helper for reads and one for writes.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/btrfs/ctree.h     |   6 +-
 fs/btrfs/extent_io.c |  14 +++--
 fs/btrfs/inode.c     | 132 ++++++++++++++++++++-----------------------
 3 files changed, 75 insertions(+), 77 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 55dee1564e909..a1ee0f5f2af09 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3259,8 +3259,10 @@ void btrfs_inode_safe_disk_i_size_write(struct btrfs_inode *inode, u64 new_i_siz
 u64 btrfs_file_extent_end(const struct btrfs_path *path);
 
 /* inode.c */
-void btrfs_submit_data_bio(struct inode *inode, struct bio *bio,
-			   int mirror_num, enum btrfs_compression_type compress_type);
+void btrfs_submit_data_write_bio(struct inode *inode, struct bio *bio,
+		int mirror_num);
+void btrfs_submit_data_read_bio(struct inode *inode, struct bio *bio,
+		int mirror_num, enum btrfs_compression_type compress_type);
 int btrfs_check_sector_csum(struct btrfs_fs_info *fs_info, struct page *page,
 			    u32 pgoff, u8 *csum, const u8 * const csum_expected);
 unsigned int btrfs_verify_data_csum(struct btrfs_bio *bbio,
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 3dda178c7d778..00c38811ee0f5 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -182,17 +182,21 @@ static void submit_one_bio(struct bio *bio, int mirror_num,
 			   enum btrfs_compression_type compress_type)
 {
 	struct extent_io_tree *tree = bio->bi_private;
+	struct inode *inode = tree->private_data;
 
 	bio->bi_private = NULL;
 
 	/* Caller should ensure the bio has at least some range added */
 	ASSERT(bio->bi_iter.bi_size);
 
-	if (is_data_inode(tree->private_data))
-		btrfs_submit_data_bio(tree->private_data, bio, mirror_num,
-					    compress_type);
+	if (!is_data_inode(tree->private_data))
+		btrfs_submit_metadata_bio(inode, bio, mirror_num);
+	else if (btrfs_op(bio) == BTRFS_MAP_WRITE)
+		btrfs_submit_data_write_bio(inode, bio, mirror_num);
 	else
-		btrfs_submit_metadata_bio(tree->private_data, bio, mirror_num);
+		btrfs_submit_data_read_bio(inode, bio, mirror_num,
+					   compress_type);
+
 	/*
 	 * Above submission hooks will handle the error by ending the bio,
 	 * which will do the cleanup properly.  So here we should not return
@@ -2786,7 +2790,7 @@ static void submit_data_read_repair(struct inode *inode, struct bio *failed_bio,
 		ret = btrfs_repair_one_sector(inode, failed_bio,
 				bio_offset + offset,
 				page, pgoff + offset, start + offset,
-				failed_mirror, btrfs_submit_data_bio);
+				failed_mirror, btrfs_submit_data_read_bio);
 		if (!ret) {
 			/*
 			 * We have submitted the read repair, the page release
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 03335ce83a949..7129bcaa53297 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2580,90 +2580,82 @@ static blk_status_t extract_ordered_extent(struct btrfs_inode *inode,
 	return errno_to_blk_status(ret);
 }
 
-/*
- * extent_io.c submission hook. This does the right thing for csum calculation
- * on write, or reading the csums from the tree before a read.
- *
- * Rules about async/sync submit,
- * a) read:				sync submit
- *
- * b) write without checksum:		sync submit
- *
- * c) write with checksum:
- *    c-1) if bio is issued by fsync:	sync submit
- *         (sync_writers != 0)
- *
- *    c-2) if root is reloc root:	sync submit
- *         (only in case of buffered IO)
- *
- *    c-3) otherwise:			async submit
- */
-void btrfs_submit_data_bio(struct inode *inode, struct bio *bio,
-			   int mirror_num, enum btrfs_compression_type compress_type)
+void btrfs_submit_data_write_bio(struct inode *inode, struct bio *bio,
+		int mirror_num)
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-	struct btrfs_root *root = BTRFS_I(inode)->root;
-	enum btrfs_wq_endio_type metadata = BTRFS_WQ_ENDIO_DATA;
-	blk_status_t ret = 0;
-	int skip_sum;
-	int async = !atomic_read(&BTRFS_I(inode)->sync_writers);
-
-	skip_sum = (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) ||
-		test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state);
-
-	if (btrfs_is_free_space_inode(BTRFS_I(inode)))
-		metadata = BTRFS_WQ_ENDIO_FREE_SPACE;
+	struct btrfs_inode *bi = BTRFS_I(inode);
+	blk_status_t ret;
 
 	if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
-		struct page *page = bio_first_bvec_all(bio)->bv_page;
-		loff_t file_offset = page_offset(page);
-
-		ret = extract_ordered_extent(BTRFS_I(inode), bio, file_offset);
+		ret = extract_ordered_extent(bi, bio,
+				page_offset(bio_first_bvec_all(bio)->bv_page));
 		if (ret)
 			goto out;
 	}
 
-	if (btrfs_op(bio) != BTRFS_MAP_WRITE) {
-		ret = btrfs_bio_wq_end_io(fs_info, bio, metadata);
-		if (ret)
-			goto out;
-
-		if (compress_type != BTRFS_COMPRESS_NONE) {
-			/*
-			 * btrfs_submit_compressed_read will handle completing
-			 * the bio if there were any errors, so just return
-			 * here.
-			 */
-			btrfs_submit_compressed_read(inode, bio, mirror_num);
-			return;
-		} else {
-			/*
-			 * Lookup bio sums does extra checks around whether we
-			 * need to csum or not, which is why we ignore skip_sum
-			 * here.
-			 */
-			ret = btrfs_lookup_bio_sums(inode, bio, NULL);
+	/*
+	 * Rules for async/sync submit:
+	 *   a) write without checksum:			sync submit
+	 *   b) write with checksum:
+	 *      b-1) if bio is issued by fsync:		sync submit
+	 *           (sync_writers != 0)
+	 *      b-2) if root is reloc root:		sync submit
+	 *           (only in case of buffered IO)
+	 *      b-3) otherwise:				async submit
+	 */
+	if (!(bi->flags & BTRFS_INODE_NODATASUM) &&
+	    !test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state)) {
+		if (atomic_read(&bi->sync_writers)) {
+			ret = btrfs_csum_one_bio(bi, bio, (u64)-1, false);
 			if (ret)
 				goto out;
+		} else if (btrfs_is_data_reloc_root(bi->root)) {
+			; /* csum items have already been cloned */
+		} else {
+			ret = btrfs_wq_submit_bio(inode, bio,
+					mirror_num, 0,
+					btrfs_submit_bio_start);
+			goto out;
 		}
-		goto mapit;
-	} else if (async && !skip_sum) {
-		/* csum items have already been cloned */
-		if (btrfs_is_data_reloc_root(root))
-			goto mapit;
-		/* we're doing a write, do the async checksumming */
-		ret = btrfs_wq_submit_bio(inode, bio, mirror_num,
-					  0, btrfs_submit_bio_start);
+	}
+	ret = btrfs_map_bio(fs_info, bio, mirror_num);
+out:
+	if (ret) {
+		bio->bi_status = ret;
+		bio_endio(bio);
+	}
+}
+
+void btrfs_submit_data_read_bio(struct inode *inode, struct bio *bio,
+		int mirror_num, enum btrfs_compression_type compress_type)
+{
+	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+	blk_status_t ret;
+
+	ret = btrfs_bio_wq_end_io(fs_info, bio,
+			btrfs_is_free_space_inode(BTRFS_I(inode)) ?
+			BTRFS_WQ_ENDIO_FREE_SPACE : BTRFS_WQ_ENDIO_DATA);
+	if (ret)
 		goto out;
-	} else if (!skip_sum) {
-		ret = btrfs_csum_one_bio(BTRFS_I(inode), bio, (u64)-1, false);
-		if (ret)
-			goto out;
+
+	if (compress_type != BTRFS_COMPRESS_NONE) {
+		/*
+		 * btrfs_submit_compressed_read will handle completing the bio
+		 * if there were any errors, so just return here.
+		 */
+		btrfs_submit_compressed_read(inode, bio, mirror_num);
+		return;
 	}
 
-mapit:
+	/*
+	 * Lookup bio sums does extra checks around whether we need to csum or
+	 * not, which is why we ignore skip_sum here.
+	 */
+	ret = btrfs_lookup_bio_sums(inode, bio, NULL);
+	if (ret)
+		goto out;
 	ret = btrfs_map_bio(fs_info, bio, mirror_num);
-
 out:
 	if (ret) {
 		bio->bi_status = ret;
@@ -7976,7 +7968,7 @@ static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio,
 		goto map;
 
 	if (write) {
-		/* check btrfs_submit_data_bio() for async submit rules */
+		/* check btrfs_submit_data_write_bio() for async submit rules */
 		if (async_submit && !atomic_read(&BTRFS_I(inode)->sync_writers))
 			return btrfs_wq_submit_bio(inode, bio, 0, file_offset,
 					btrfs_submit_bio_start_direct_io);
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH 04/10] btrfs: defer I/O completion based on the btrfs_raid_bio
  2022-05-26  7:36 cleanup btrfs bio handling, part 2 v4 Christoph Hellwig
                   ` (2 preceding siblings ...)
  2022-05-26  7:36 ` [PATCH 03/10] btrfs: split btrfs_submit_data_bio Christoph Hellwig
@ 2022-05-26  7:36 ` Christoph Hellwig
  2022-05-26  7:36 ` [PATCH 05/10] btrfs: don't double-defer bio completions for compressed reads Christoph Hellwig
                   ` (6 subsequent siblings)
  10 siblings, 0 replies; 31+ messages in thread
From: Christoph Hellwig @ 2022-05-26  7:36 UTC (permalink / raw)
  To: David Sterba, Josef Bacik, Qu Wenruo; +Cc: linux-btrfs

Instead of attaching a an extra allocation an indirect call to each
low-level bio issued by the RAID code, add a work_struct to struct
btrfs_raid_bio and only defer the per-rbio completion action.  The
per-bio action for all the I/Os are trivial and can be safely done
from interrupt context.

As a nice side effect this also allows sharing the boilerplate code
for the per-bio completions

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/btrfs/ctree.h   |   2 +-
 fs/btrfs/disk-io.c |  12 ++---
 fs/btrfs/disk-io.h |   1 -
 fs/btrfs/raid56.c  | 109 ++++++++++++++++++---------------------------
 4 files changed, 48 insertions(+), 76 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index a1ee0f5f2af09..a7baae1e8fc7d 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -853,7 +853,7 @@ struct btrfs_fs_info {
 	struct btrfs_workqueue *flush_workers;
 	struct btrfs_workqueue *endio_workers;
 	struct btrfs_workqueue *endio_meta_workers;
-	struct btrfs_workqueue *endio_raid56_workers;
+	struct workqueue_struct *endio_raid56_workers;
 	struct workqueue_struct *rmw_workers;
 	struct btrfs_workqueue *endio_meta_write_workers;
 	struct btrfs_workqueue *endio_write_workers;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 5812ab64fa2c3..a05925dac3467 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -753,14 +753,10 @@ static void end_workqueue_bio(struct bio *bio)
 			wq = fs_info->endio_meta_write_workers;
 		else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE)
 			wq = fs_info->endio_freespace_worker;
-		else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56)
-			wq = fs_info->endio_raid56_workers;
 		else
 			wq = fs_info->endio_write_workers;
 	} else {
-		if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56)
-			wq = fs_info->endio_raid56_workers;
-		else if (end_io_wq->metadata)
+		if (end_io_wq->metadata)
 			wq = fs_info->endio_meta_workers;
 		else
 			wq = fs_info->endio_workers;
@@ -2273,7 +2269,8 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
 	btrfs_destroy_workqueue(fs_info->hipri_workers);
 	btrfs_destroy_workqueue(fs_info->workers);
 	btrfs_destroy_workqueue(fs_info->endio_workers);
-	btrfs_destroy_workqueue(fs_info->endio_raid56_workers);
+	if (fs_info->endio_raid56_workers)
+		destroy_workqueue(fs_info->endio_raid56_workers);
 	if (fs_info->rmw_workers)
 		destroy_workqueue(fs_info->rmw_workers);
 	btrfs_destroy_workqueue(fs_info->endio_write_workers);
@@ -2475,8 +2472,7 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info)
 		btrfs_alloc_workqueue(fs_info, "endio-meta-write", flags,
 				      max_active, 2);
 	fs_info->endio_raid56_workers =
-		btrfs_alloc_workqueue(fs_info, "endio-raid56", flags,
-				      max_active, 4);
+		alloc_workqueue("btrfs-endio-raid56", flags, max_active);
 	fs_info->rmw_workers = alloc_workqueue("btrfs-rmw", flags, max_active);
 	fs_info->endio_write_workers =
 		btrfs_alloc_workqueue(fs_info, "endio-write", flags,
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 4ee8c42c9f783..809ef065f1666 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -21,7 +21,6 @@ enum btrfs_wq_endio_type {
 	BTRFS_WQ_ENDIO_DATA,
 	BTRFS_WQ_ENDIO_METADATA,
 	BTRFS_WQ_ENDIO_FREE_SPACE,
-	BTRFS_WQ_ENDIO_RAID56,
 };
 
 static inline u64 btrfs_sb_offset(int mirror)
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index e03a38af12cdb..ac91881b275b5 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -164,6 +164,9 @@ struct btrfs_raid_bio {
 	atomic_t stripes_pending;
 
 	atomic_t error;
+
+	struct work_struct end_io_work;
+
 	/*
 	 * these are two arrays of pointers.  We allocate the
 	 * rbio big enough to hold them both and setup their
@@ -1552,15 +1555,7 @@ static void set_bio_pages_uptodate(struct btrfs_raid_bio *rbio, struct bio *bio)
 	}
 }
 
-/*
- * end io for the read phase of the rmw cycle.  All the bios here are physical
- * stripe bios we've read from the disk so we can recalculate the parity of the
- * stripe.
- *
- * This will usually kick off finish_rmw once all the bios are read in, but it
- * may trigger parity reconstruction if we had any errors along the way
- */
-static void raid_rmw_end_io(struct bio *bio)
+static void raid56_bio_end_io(struct bio *bio)
 {
 	struct btrfs_raid_bio *rbio = bio->bi_private;
 
@@ -1571,23 +1566,34 @@ static void raid_rmw_end_io(struct bio *bio)
 
 	bio_put(bio);
 
-	if (!atomic_dec_and_test(&rbio->stripes_pending))
-		return;
+	if (atomic_dec_and_test(&rbio->stripes_pending))
+		queue_work(rbio->bioc->fs_info->endio_raid56_workers,
+			   &rbio->end_io_work);
+}
 
-	if (atomic_read(&rbio->error) > rbio->bioc->max_errors)
-		goto cleanup;
+/*
+ * End io handler for the read phase of the rmw cycle.  All the bios here are
+ * physical stripe bios we've read from the disk so we can recalculate the
+ * parity of the stripe.
+ *
+ * This will usually kick off finish_rmw once all the bios are read in, but it
+ * may trigger parity reconstruction if we had any errors along the way
+ */
+static void raid56_rmw_end_io_work(struct work_struct *work)
+{
+	struct btrfs_raid_bio *rbio =
+		container_of(work, struct btrfs_raid_bio, end_io_work);
+
+	if (atomic_read(&rbio->error) > rbio->bioc->max_errors) {
+		rbio_orig_end_io(rbio, BLK_STS_IOERR);
+		return;
+	}
 
 	/*
-	 * this will normally call finish_rmw to start our write
-	 * but if there are any failed stripes we'll reconstruct
-	 * from parity first
+	 * This will normally call finish_rmw to start our write but if there
+	 * are any failed stripes we'll reconstruct from parity first.
 	 */
 	validate_rbio_for_rmw(rbio);
-	return;
-
-cleanup:
-
-	rbio_orig_end_io(rbio, BLK_STS_IOERR);
 }
 
 /*
@@ -1662,11 +1668,9 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio)
 	 * touch it after that.
 	 */
 	atomic_set(&rbio->stripes_pending, bios_to_read);
+	INIT_WORK(&rbio->end_io_work, raid56_rmw_end_io_work);
 	while ((bio = bio_list_pop(&bio_list))) {
-		bio->bi_end_io = raid_rmw_end_io;
-
-		btrfs_bio_wq_end_io(rbio->bioc->fs_info, bio, BTRFS_WQ_ENDIO_RAID56);
-
+		bio->bi_end_io = raid56_bio_end_io;
 		submit_bio(bio);
 	}
 	/* the actual write will happen once the reads are done */
@@ -2108,25 +2112,13 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
 }
 
 /*
- * This is called only for stripes we've read from disk to
- * reconstruct the parity.
+ * This is called only for stripes we've read from disk to reconstruct the
+ * parity.
  */
-static void raid_recover_end_io(struct bio *bio)
+static void raid_recover_end_io_work(struct work_struct *work)
 {
-	struct btrfs_raid_bio *rbio = bio->bi_private;
-
-	/*
-	 * we only read stripe pages off the disk, set them
-	 * up to date if there were no errors
-	 */
-	if (bio->bi_status)
-		fail_bio_stripe(rbio, bio);
-	else
-		set_bio_pages_uptodate(rbio, bio);
-	bio_put(bio);
-
-	if (!atomic_dec_and_test(&rbio->stripes_pending))
-		return;
+	struct btrfs_raid_bio *rbio =
+		container_of(work, struct btrfs_raid_bio, end_io_work);
 
 	if (atomic_read(&rbio->error) > rbio->bioc->max_errors)
 		rbio_orig_end_io(rbio, BLK_STS_IOERR);
@@ -2209,11 +2201,9 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
 	 * touch it after that.
 	 */
 	atomic_set(&rbio->stripes_pending, bios_to_read);
+	INIT_WORK(&rbio->end_io_work, raid_recover_end_io_work);
 	while ((bio = bio_list_pop(&bio_list))) {
-		bio->bi_end_io = raid_recover_end_io;
-
-		btrfs_bio_wq_end_io(rbio->bioc->fs_info, bio, BTRFS_WQ_ENDIO_RAID56);
-
+		bio->bi_end_io = raid56_bio_end_io;
 		submit_bio(bio);
 	}
 
@@ -2583,7 +2573,6 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
 
 	while ((bio = bio_list_pop(&bio_list))) {
 		bio->bi_end_io = raid_write_end_io;
-
 		submit_bio(bio);
 	}
 	return;
@@ -2671,24 +2660,14 @@ static void validate_rbio_for_parity_scrub(struct btrfs_raid_bio *rbio)
  * This will usually kick off finish_rmw once all the bios are read in, but it
  * may trigger parity reconstruction if we had any errors along the way
  */
-static void raid56_parity_scrub_end_io(struct bio *bio)
+static void raid56_parity_scrub_end_io_work(struct work_struct *work)
 {
-	struct btrfs_raid_bio *rbio = bio->bi_private;
-
-	if (bio->bi_status)
-		fail_bio_stripe(rbio, bio);
-	else
-		set_bio_pages_uptodate(rbio, bio);
-
-	bio_put(bio);
-
-	if (!atomic_dec_and_test(&rbio->stripes_pending))
-		return;
+	struct btrfs_raid_bio *rbio =
+		container_of(work, struct btrfs_raid_bio, end_io_work);
 
 	/*
-	 * this will normally call finish_rmw to start our write
-	 * but if there are any failed stripes we'll reconstruct
-	 * from parity first
+	 * This will normally call finish_rmw to start our write, but if there
+	 * are any failed stripes we'll reconstruct from parity first
 	 */
 	validate_rbio_for_parity_scrub(rbio);
 }
@@ -2758,11 +2737,9 @@ static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio)
 	 * touch it after that.
 	 */
 	atomic_set(&rbio->stripes_pending, bios_to_read);
+	INIT_WORK(&rbio->end_io_work, raid56_parity_scrub_end_io_work);
 	while ((bio = bio_list_pop(&bio_list))) {
-		bio->bi_end_io = raid56_parity_scrub_end_io;
-
-		btrfs_bio_wq_end_io(rbio->bioc->fs_info, bio, BTRFS_WQ_ENDIO_RAID56);
-
+		bio->bi_end_io = raid56_bio_end_io;
 		submit_bio(bio);
 	}
 	/* the actual write will happen once the reads are done */
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH 05/10] btrfs: don't double-defer bio completions for compressed reads
  2022-05-26  7:36 cleanup btrfs bio handling, part 2 v4 Christoph Hellwig
                   ` (3 preceding siblings ...)
  2022-05-26  7:36 ` [PATCH 04/10] btrfs: defer I/O completion based on the btrfs_raid_bio Christoph Hellwig
@ 2022-05-26  7:36 ` Christoph Hellwig
  2022-05-26  7:36 ` [PATCH 06/10] btrfs: don't use btrfs_bio_wq_end_io for compressed writes Christoph Hellwig
                   ` (5 subsequent siblings)
  10 siblings, 0 replies; 31+ messages in thread
From: Christoph Hellwig @ 2022-05-26  7:36 UTC (permalink / raw)
  To: David Sterba, Josef Bacik, Qu Wenruo; +Cc: linux-btrfs

The bio completion handler of the bio used for the compressed data is
already run in a workqueue using btrfs_bio_wq_end_io, so don't schedule
the completion of the original bio to the same workqueue again but just
execute it directly.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/btrfs/inode.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 7129bcaa53297..c01880fb5a646 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2633,12 +2633,6 @@ void btrfs_submit_data_read_bio(struct inode *inode, struct bio *bio,
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	blk_status_t ret;
 
-	ret = btrfs_bio_wq_end_io(fs_info, bio,
-			btrfs_is_free_space_inode(BTRFS_I(inode)) ?
-			BTRFS_WQ_ENDIO_FREE_SPACE : BTRFS_WQ_ENDIO_DATA);
-	if (ret)
-		goto out;
-
 	if (compress_type != BTRFS_COMPRESS_NONE) {
 		/*
 		 * btrfs_submit_compressed_read will handle completing the bio
@@ -2648,6 +2642,12 @@ void btrfs_submit_data_read_bio(struct inode *inode, struct bio *bio,
 		return;
 	}
 
+	ret = btrfs_bio_wq_end_io(fs_info, bio,
+			btrfs_is_free_space_inode(BTRFS_I(inode)) ?
+			BTRFS_WQ_ENDIO_FREE_SPACE : BTRFS_WQ_ENDIO_DATA);
+	if (ret)
+		goto out;
+
 	/*
 	 * Lookup bio sums does extra checks around whether we need to csum or
 	 * not, which is why we ignore skip_sum here.
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH 06/10] btrfs: don't use btrfs_bio_wq_end_io for compressed writes
  2022-05-26  7:36 cleanup btrfs bio handling, part 2 v4 Christoph Hellwig
                   ` (4 preceding siblings ...)
  2022-05-26  7:36 ` [PATCH 05/10] btrfs: don't double-defer bio completions for compressed reads Christoph Hellwig
@ 2022-05-26  7:36 ` Christoph Hellwig
  2022-06-01 19:33   ` David Sterba
  2022-05-26  7:36 ` [PATCH 07/10] btrfs: centralize setting REQ_META Christoph Hellwig
                   ` (4 subsequent siblings)
  10 siblings, 1 reply; 31+ messages in thread
From: Christoph Hellwig @ 2022-05-26  7:36 UTC (permalink / raw)
  To: David Sterba, Josef Bacik, Qu Wenruo; +Cc: linux-btrfs

Compressed write bio completion is the only user of btrfs_bio_wq_end_io
for writes, and the use of btrfs_bio_wq_end_io is a little suboptimal
here as we only real need user context for the final completion of a
compressed_bio structure, and not every single bio completion.

Add a work_struct to struct compressed_bio instead and use that to call
finish_compressed_bio_write.  This allows to remove all handling of
write bios in the btrfs_bio_wq_end_io infrastructure.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/btrfs/compression.c | 45 +++++++++++++++++++++---------------------
 fs/btrfs/compression.h |  7 +++++--
 fs/btrfs/ctree.h       |  2 +-
 fs/btrfs/disk-io.c     | 30 +++++++++++-----------------
 fs/btrfs/super.c       |  2 --
 5 files changed, 41 insertions(+), 45 deletions(-)

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 6ab82e142f1f8..d966d917a49ed 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -398,6 +398,14 @@ static void finish_compressed_bio_write(struct compressed_bio *cb)
 	kfree(cb);
 }
 
+static void btrfs_finish_compressed_write_work(struct work_struct *work)
+{
+	struct compressed_bio *cb =
+		container_of(work, struct compressed_bio, write_end_work);
+
+	finish_compressed_bio_write(cb);
+}
+
 /*
  * Do the cleanup once all the compressed pages hit the disk.  This will clear
  * writeback on the file pages and free the compressed pages.
@@ -409,29 +417,16 @@ static void end_compressed_bio_write(struct bio *bio)
 {
 	struct compressed_bio *cb = bio->bi_private;
 
-	if (!dec_and_test_compressed_bio(cb, bio))
-		goto out;
-
-	btrfs_record_physical_zoned(cb->inode, cb->start, bio);
+	if (dec_and_test_compressed_bio(cb, bio)) {
+		struct btrfs_fs_info *fs_info = btrfs_sb(cb->inode->i_sb);
 
-	finish_compressed_bio_write(cb);
-out:
+		btrfs_record_physical_zoned(cb->inode, cb->start, bio);
+		queue_work(fs_info->compressed_write_workers,
+			   &cb->write_end_work);
+	}
 	bio_put(bio);
 }
 
-static blk_status_t submit_compressed_bio(struct btrfs_fs_info *fs_info,
-					  struct bio *bio, int mirror_num)
-{
-	blk_status_t ret;
-
-	ASSERT(bio->bi_iter.bi_size);
-	ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA);
-	if (ret)
-		return ret;
-	ret = btrfs_map_bio(fs_info, bio, mirror_num);
-	return ret;
-}
-
 /*
  * Allocate a compressed_bio, which will be used to read/write on-disk
  * (aka, compressed) * data.
@@ -528,7 +523,7 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
 	cb->compressed_pages = compressed_pages;
 	cb->compressed_len = compressed_len;
 	cb->writeback = writeback;
-	cb->orig_bio = NULL;
+	INIT_WORK(&cb->write_end_work, btrfs_finish_compressed_write_work);
 	cb->nr_pages = nr_pages;
 
 	if (blkcg_css)
@@ -598,7 +593,8 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
 					goto finish_cb;
 			}
 
-			ret = submit_compressed_bio(fs_info, bio, 0);
+			ASSERT(bio->bi_iter.bi_size);
+			ret = btrfs_map_bio(fs_info, bio, 0);
 			if (ret)
 				goto finish_cb;
 			bio = NULL;
@@ -936,7 +932,12 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 						  fs_info->sectorsize);
 			sums += fs_info->csum_size * nr_sectors;
 
-			ret = submit_compressed_bio(fs_info, comp_bio, mirror_num);
+			ASSERT(comp_bio->bi_iter.bi_size);
+			ret = btrfs_bio_wq_end_io(fs_info, comp_bio,
+						  BTRFS_WQ_ENDIO_DATA);
+			if (ret)
+				goto finish_cb;
+			ret = btrfs_map_bio(fs_info, comp_bio, mirror_num);
 			if (ret)
 				goto finish_cb;
 			comp_bio = NULL;
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index 2707404389a5d..4a40725cbf1db 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -61,8 +61,11 @@ struct compressed_bio {
 	blk_status_t status;
 	int mirror_num;
 
-	/* for reads, this is the bio we are copying the data into */
-	struct bio *orig_bio;
+	union {
+		/* for reads, this is the bio we are copying the data into */
+		struct bio *orig_bio;
+		struct work_struct write_end_work;
+	};
 
 	/*
 	 * the start of a variable length array of checksums only
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index a7baae1e8fc7d..b5a67cdd1dc2f 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -855,7 +855,7 @@ struct btrfs_fs_info {
 	struct btrfs_workqueue *endio_meta_workers;
 	struct workqueue_struct *endio_raid56_workers;
 	struct workqueue_struct *rmw_workers;
-	struct btrfs_workqueue *endio_meta_write_workers;
+	struct workqueue_struct *compressed_write_workers;
 	struct btrfs_workqueue *endio_write_workers;
 	struct btrfs_workqueue *endio_freespace_worker;
 	struct btrfs_workqueue *caching_workers;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index a05925dac3467..2598c347a4bea 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -748,19 +748,10 @@ static void end_workqueue_bio(struct bio *bio)
 	fs_info = end_io_wq->info;
 	end_io_wq->status = bio->bi_status;
 
-	if (btrfs_op(bio) == BTRFS_MAP_WRITE) {
-		if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA)
-			wq = fs_info->endio_meta_write_workers;
-		else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE)
-			wq = fs_info->endio_freespace_worker;
-		else
-			wq = fs_info->endio_write_workers;
-	} else {
-		if (end_io_wq->metadata)
-			wq = fs_info->endio_meta_workers;
-		else
-			wq = fs_info->endio_workers;
-	}
+	if (end_io_wq->metadata)
+		wq = fs_info->endio_meta_workers;
+	else
+		wq = fs_info->endio_workers;
 
 	btrfs_init_work(&end_io_wq->work, end_workqueue_fn, NULL, NULL);
 	btrfs_queue_work(wq, &end_io_wq->work);
@@ -771,6 +762,9 @@ blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
 {
 	struct btrfs_end_io_wq *end_io_wq;
 
+	if (WARN_ON_ONCE(btrfs_op(bio) != BTRFS_MAP_WRITE))
+		return BLK_STS_IOERR;
+
 	end_io_wq = kmem_cache_alloc(btrfs_end_io_wq_cache, GFP_NOFS);
 	if (!end_io_wq)
 		return BLK_STS_RESOURCE;
@@ -2273,6 +2267,8 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
 		destroy_workqueue(fs_info->endio_raid56_workers);
 	if (fs_info->rmw_workers)
 		destroy_workqueue(fs_info->rmw_workers);
+	if (fs_info->compressed_write_workers)
+		destroy_workqueue(fs_info->compressed_write_workers);
 	btrfs_destroy_workqueue(fs_info->endio_write_workers);
 	btrfs_destroy_workqueue(fs_info->endio_freespace_worker);
 	btrfs_destroy_workqueue(fs_info->delayed_workers);
@@ -2287,7 +2283,6 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
 	 * queues can do metadata I/O operations.
 	 */
 	btrfs_destroy_workqueue(fs_info->endio_meta_workers);
-	btrfs_destroy_workqueue(fs_info->endio_meta_write_workers);
 }
 
 static void free_root_extent_buffers(struct btrfs_root *root)
@@ -2468,15 +2463,14 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info)
 	fs_info->endio_meta_workers =
 		btrfs_alloc_workqueue(fs_info, "endio-meta", flags,
 				      max_active, 4);
-	fs_info->endio_meta_write_workers =
-		btrfs_alloc_workqueue(fs_info, "endio-meta-write", flags,
-				      max_active, 2);
 	fs_info->endio_raid56_workers =
 		alloc_workqueue("btrfs-endio-raid56", flags, max_active);
 	fs_info->rmw_workers = alloc_workqueue("btrfs-rmw", flags, max_active);
 	fs_info->endio_write_workers =
 		btrfs_alloc_workqueue(fs_info, "endio-write", flags,
 				      max_active, 2);
+	fs_info->compressed_write_workers =
+		alloc_workqueue("btrfs-compressed-write", flags, max_active);
 	fs_info->endio_freespace_worker =
 		btrfs_alloc_workqueue(fs_info, "freespace-write", flags,
 				      max_active, 0);
@@ -2491,7 +2485,7 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info)
 	if (!(fs_info->workers && fs_info->hipri_workers &&
 	      fs_info->delalloc_workers && fs_info->flush_workers &&
 	      fs_info->endio_workers && fs_info->endio_meta_workers &&
-	      fs_info->endio_meta_write_workers &&
+	      fs_info->compressed_write_workers &&
 	      fs_info->endio_write_workers && fs_info->endio_raid56_workers &&
 	      fs_info->endio_freespace_worker && fs_info->rmw_workers &&
 	      fs_info->caching_workers && fs_info->fixup_workers &&
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 9c202ad5e1262..2d5ac5f730a16 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1908,8 +1908,6 @@ static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info,
 	btrfs_workqueue_set_max(fs_info->caching_workers, new_pool_size);
 	btrfs_workqueue_set_max(fs_info->endio_workers, new_pool_size);
 	btrfs_workqueue_set_max(fs_info->endio_meta_workers, new_pool_size);
-	btrfs_workqueue_set_max(fs_info->endio_meta_write_workers,
-				new_pool_size);
 	btrfs_workqueue_set_max(fs_info->endio_write_workers, new_pool_size);
 	btrfs_workqueue_set_max(fs_info->endio_freespace_worker, new_pool_size);
 	btrfs_workqueue_set_max(fs_info->delayed_workers, new_pool_size);
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH 07/10] btrfs: centralize setting REQ_META
  2022-05-26  7:36 cleanup btrfs bio handling, part 2 v4 Christoph Hellwig
                   ` (5 preceding siblings ...)
  2022-05-26  7:36 ` [PATCH 06/10] btrfs: don't use btrfs_bio_wq_end_io for compressed writes Christoph Hellwig
@ 2022-05-26  7:36 ` Christoph Hellwig
  2022-05-26  7:36 ` [PATCH 08/10] btrfs: remove btrfs_end_io_wq Christoph Hellwig
                   ` (3 subsequent siblings)
  10 siblings, 0 replies; 31+ messages in thread
From: Christoph Hellwig @ 2022-05-26  7:36 UTC (permalink / raw)
  To: David Sterba, Josef Bacik, Qu Wenruo; +Cc: linux-btrfs

Set REQ_META in btrfs_submit_metadata_bio instead of the various callers.
We'll start relying on this flag inside of btrfs in a bit, and this
ensures it is always set correctly.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Qu Wenruo <wqu@suse.com>
---
 fs/btrfs/disk-io.c   | 2 ++
 fs/btrfs/extent_io.c | 8 ++++----
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 2598c347a4bea..46de02ac11aac 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -913,6 +913,8 @@ void btrfs_submit_metadata_bio(struct inode *inode, struct bio *bio, int mirror_
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	blk_status_t ret;
 
+	bio->bi_opf |= REQ_META;
+
 	if (btrfs_op(bio) != BTRFS_MAP_WRITE) {
 		/*
 		 * called for a read, do the setup so that checksum validation
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 00c38811ee0f5..cbe04b3decee9 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4571,7 +4571,7 @@ static int write_one_subpage_eb(struct extent_buffer *eb,
 {
 	struct btrfs_fs_info *fs_info = eb->fs_info;
 	struct page *page = eb->pages[0];
-	unsigned int write_flags = wbc_to_write_flags(wbc) | REQ_META;
+	unsigned int write_flags = wbc_to_write_flags(wbc);
 	bool no_dirty_ebs = false;
 	int ret;
 
@@ -4616,7 +4616,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
 {
 	u64 disk_bytenr = eb->start;
 	int i, num_pages;
-	unsigned int write_flags = wbc_to_write_flags(wbc) | REQ_META;
+	unsigned int write_flags = wbc_to_write_flags(wbc);
 	int ret = 0;
 
 	prepare_eb_write(eb);
@@ -6626,7 +6626,7 @@ static int read_extent_buffer_subpage(struct extent_buffer *eb, int wait,
 	btrfs_subpage_clear_error(fs_info, page, eb->start, eb->len);
 
 	btrfs_subpage_start_reader(fs_info, page, eb->start, eb->len);
-	ret = submit_extent_page(REQ_OP_READ | REQ_META, NULL, &bio_ctrl,
+	ret = submit_extent_page(REQ_OP_READ, NULL, &bio_ctrl,
 				 page, eb->start, eb->len,
 				 eb->start - page_offset(page),
 				 end_bio_extent_readpage, mirror_num, 0,
@@ -6733,7 +6733,7 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num)
 			}
 
 			ClearPageError(page);
-			err = submit_extent_page(REQ_OP_READ | REQ_META, NULL,
+			err = submit_extent_page(REQ_OP_READ, NULL,
 					 &bio_ctrl, page, page_offset(page),
 					 PAGE_SIZE, 0, end_bio_extent_readpage,
 					 mirror_num, 0, false);
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH 08/10] btrfs: remove btrfs_end_io_wq
  2022-05-26  7:36 cleanup btrfs bio handling, part 2 v4 Christoph Hellwig
                   ` (6 preceding siblings ...)
  2022-05-26  7:36 ` [PATCH 07/10] btrfs: centralize setting REQ_META Christoph Hellwig
@ 2022-05-26  7:36 ` Christoph Hellwig
  2022-05-26  7:36 ` [PATCH 09/10] btrfs: refactor btrfs_map_bio Christoph Hellwig
                   ` (2 subsequent siblings)
  10 siblings, 0 replies; 31+ messages in thread
From: Christoph Hellwig @ 2022-05-26  7:36 UTC (permalink / raw)
  To: David Sterba, Josef Bacik, Qu Wenruo; +Cc: linux-btrfs

All reads bio that go through btrfs_map_bio need to be completed in
user context.  And read I/Os are the most common and timing critical
in almost any file system workloads.

Embedd a work_struct into struct btrfs_bio and use it to complete all
read bios submitted through btrfs_map, using the REQ_META flag to decide
which workqueue they are placed on.

This removes the need for a separate 128 byte allocation (typically
rounded up to 192 bytes by slab) for all reads with a size increase
of 24 bytes for struct btrfs_bio.  Future patches will reorgnize
struct btrfs_bio to make use of this extra space for writes as well.

(all sizes are based a on typical 64-bit non-debug build)

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Qu Wenruo <wqu@suse.com>
---
 fs/btrfs/compression.c |   4 --
 fs/btrfs/ctree.h       |   4 +-
 fs/btrfs/disk-io.c     | 120 +++--------------------------------------
 fs/btrfs/disk-io.h     |  10 ----
 fs/btrfs/inode.c       |  24 +--------
 fs/btrfs/super.c       |  11 +---
 fs/btrfs/volumes.c     |  33 ++++++++++--
 fs/btrfs/volumes.h     |   3 ++
 8 files changed, 42 insertions(+), 167 deletions(-)

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index d966d917a49ed..d2d70a8b9ed08 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -933,10 +933,6 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 			sums += fs_info->csum_size * nr_sectors;
 
 			ASSERT(comp_bio->bi_iter.bi_size);
-			ret = btrfs_bio_wq_end_io(fs_info, comp_bio,
-						  BTRFS_WQ_ENDIO_DATA);
-			if (ret)
-				goto finish_cb;
 			ret = btrfs_map_bio(fs_info, comp_bio, mirror_num);
 			if (ret)
 				goto finish_cb;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index b5a67cdd1dc2f..1e0300d098fc2 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -851,8 +851,8 @@ struct btrfs_fs_info {
 	struct btrfs_workqueue *hipri_workers;
 	struct btrfs_workqueue *delalloc_workers;
 	struct btrfs_workqueue *flush_workers;
-	struct btrfs_workqueue *endio_workers;
-	struct btrfs_workqueue *endio_meta_workers;
+	struct workqueue_struct *endio_workers;
+	struct workqueue_struct *endio_meta_workers;
 	struct workqueue_struct *endio_raid56_workers;
 	struct workqueue_struct *rmw_workers;
 	struct workqueue_struct *compressed_write_workers;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 46de02ac11aac..0f926d18e6caf 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -50,7 +50,6 @@
 				 BTRFS_SUPER_FLAG_METADUMP |\
 				 BTRFS_SUPER_FLAG_METADUMP_V2)
 
-static void end_workqueue_fn(struct btrfs_work *work);
 static void btrfs_destroy_ordered_extents(struct btrfs_root *root);
 static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
 				      struct btrfs_fs_info *fs_info);
@@ -63,40 +62,6 @@ static int btrfs_destroy_pinned_extent(struct btrfs_fs_info *fs_info,
 static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info);
 static void btrfs_error_commit_super(struct btrfs_fs_info *fs_info);
 
-/*
- * btrfs_end_io_wq structs are used to do processing in task context when an IO
- * is complete.  This is used during reads to verify checksums, and it is used
- * by writes to insert metadata for new file extents after IO is complete.
- */
-struct btrfs_end_io_wq {
-	struct bio *bio;
-	bio_end_io_t *end_io;
-	void *private;
-	struct btrfs_fs_info *info;
-	blk_status_t status;
-	enum btrfs_wq_endio_type metadata;
-	struct btrfs_work work;
-};
-
-static struct kmem_cache *btrfs_end_io_wq_cache;
-
-int __init btrfs_end_io_wq_init(void)
-{
-	btrfs_end_io_wq_cache = kmem_cache_create("btrfs_end_io_wq",
-					sizeof(struct btrfs_end_io_wq),
-					0,
-					SLAB_MEM_SPREAD,
-					NULL);
-	if (!btrfs_end_io_wq_cache)
-		return -ENOMEM;
-	return 0;
-}
-
-void __cold btrfs_end_io_wq_exit(void)
-{
-	kmem_cache_destroy(btrfs_end_io_wq_cache);
-}
-
 static void btrfs_free_csum_hash(struct btrfs_fs_info *fs_info)
 {
 	if (fs_info->csum_shash)
@@ -739,48 +704,6 @@ int btrfs_validate_metadata_buffer(struct btrfs_bio *bbio,
 	return ret;
 }
 
-static void end_workqueue_bio(struct bio *bio)
-{
-	struct btrfs_end_io_wq *end_io_wq = bio->bi_private;
-	struct btrfs_fs_info *fs_info;
-	struct btrfs_workqueue *wq;
-
-	fs_info = end_io_wq->info;
-	end_io_wq->status = bio->bi_status;
-
-	if (end_io_wq->metadata)
-		wq = fs_info->endio_meta_workers;
-	else
-		wq = fs_info->endio_workers;
-
-	btrfs_init_work(&end_io_wq->work, end_workqueue_fn, NULL, NULL);
-	btrfs_queue_work(wq, &end_io_wq->work);
-}
-
-blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
-			enum btrfs_wq_endio_type metadata)
-{
-	struct btrfs_end_io_wq *end_io_wq;
-
-	if (WARN_ON_ONCE(btrfs_op(bio) != BTRFS_MAP_WRITE))
-		return BLK_STS_IOERR;
-
-	end_io_wq = kmem_cache_alloc(btrfs_end_io_wq_cache, GFP_NOFS);
-	if (!end_io_wq)
-		return BLK_STS_RESOURCE;
-
-	end_io_wq->private = bio->bi_private;
-	end_io_wq->end_io = bio->bi_end_io;
-	end_io_wq->info = info;
-	end_io_wq->status = 0;
-	end_io_wq->bio = bio;
-	end_io_wq->metadata = metadata;
-
-	bio->bi_private = end_io_wq;
-	bio->bi_end_io = end_workqueue_bio;
-	return 0;
-}
-
 static void run_one_async_start(struct btrfs_work *work)
 {
 	struct async_submit_bio *async;
@@ -916,14 +839,7 @@ void btrfs_submit_metadata_bio(struct inode *inode, struct bio *bio, int mirror_
 	bio->bi_opf |= REQ_META;
 
 	if (btrfs_op(bio) != BTRFS_MAP_WRITE) {
-		/*
-		 * called for a read, do the setup so that checksum validation
-		 * can happen in the async kernel threads
-		 */
-		ret = btrfs_bio_wq_end_io(fs_info, bio,
-					  BTRFS_WQ_ENDIO_METADATA);
-		if (!ret)
-			ret = btrfs_map_bio(fs_info, bio, mirror_num);
+		ret = btrfs_map_bio(fs_info, bio, mirror_num);
 	} else if (!should_async_write(fs_info, BTRFS_I(inode))) {
 		ret = btree_csum_one_bio(bio);
 		if (!ret)
@@ -1939,25 +1855,6 @@ struct btrfs_root *btrfs_get_fs_root_commit_root(struct btrfs_fs_info *fs_info,
 	return root;
 }
 
-/*
- * called by the kthread helper functions to finally call the bio end_io
- * functions.  This is where read checksum verification actually happens
- */
-static void end_workqueue_fn(struct btrfs_work *work)
-{
-	struct bio *bio;
-	struct btrfs_end_io_wq *end_io_wq;
-
-	end_io_wq = container_of(work, struct btrfs_end_io_wq, work);
-	bio = end_io_wq->bio;
-
-	bio->bi_status = end_io_wq->status;
-	bio->bi_private = end_io_wq->private;
-	bio->bi_end_io = end_io_wq->end_io;
-	bio_endio(bio);
-	kmem_cache_free(btrfs_end_io_wq_cache, end_io_wq);
-}
-
 static int cleaner_kthread(void *arg)
 {
 	struct btrfs_fs_info *fs_info = arg;
@@ -2264,7 +2161,8 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
 	btrfs_destroy_workqueue(fs_info->delalloc_workers);
 	btrfs_destroy_workqueue(fs_info->hipri_workers);
 	btrfs_destroy_workqueue(fs_info->workers);
-	btrfs_destroy_workqueue(fs_info->endio_workers);
+	if (fs_info->endio_workers)
+		destroy_workqueue(fs_info->endio_workers);
 	if (fs_info->endio_raid56_workers)
 		destroy_workqueue(fs_info->endio_raid56_workers);
 	if (fs_info->rmw_workers)
@@ -2284,7 +2182,8 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
 	 * the queues used for metadata I/O, since tasks from those other work
 	 * queues can do metadata I/O operations.
 	 */
-	btrfs_destroy_workqueue(fs_info->endio_meta_workers);
+	if (fs_info->endio_meta_workers)
+		destroy_workqueue(fs_info->endio_meta_workers);
 }
 
 static void free_root_extent_buffers(struct btrfs_root *root)
@@ -2456,15 +2355,10 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info)
 	fs_info->fixup_workers =
 		btrfs_alloc_workqueue(fs_info, "fixup", flags, 1, 0);
 
-	/*
-	 * endios are largely parallel and should have a very
-	 * low idle thresh
-	 */
 	fs_info->endio_workers =
-		btrfs_alloc_workqueue(fs_info, "endio", flags, max_active, 4);
+		alloc_workqueue("btrfs-endio", flags, max_active);
 	fs_info->endio_meta_workers =
-		btrfs_alloc_workqueue(fs_info, "endio-meta", flags,
-				      max_active, 4);
+		alloc_workqueue("btrfs-endio-meta", flags, max_active);
 	fs_info->endio_raid56_workers =
 		alloc_workqueue("btrfs-endio-raid56", flags, max_active);
 	fs_info->rmw_workers = alloc_workqueue("btrfs-rmw", flags, max_active);
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 809ef065f1666..05e779a41a997 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -17,12 +17,6 @@
  */
 #define BTRFS_BDEV_BLOCKSIZE	(4096)
 
-enum btrfs_wq_endio_type {
-	BTRFS_WQ_ENDIO_DATA,
-	BTRFS_WQ_ENDIO_METADATA,
-	BTRFS_WQ_ENDIO_FREE_SPACE,
-};
-
 static inline u64 btrfs_sb_offset(int mirror)
 {
 	u64 start = SZ_16K;
@@ -120,8 +114,6 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
 			  int atomic);
 int btrfs_read_extent_buffer(struct extent_buffer *buf, u64 parent_transid,
 			     int level, struct btrfs_key *first_key);
-blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
-			enum btrfs_wq_endio_type metadata);
 blk_status_t btrfs_wq_submit_bio(struct inode *inode, struct bio *bio,
 				 int mirror_num, u64 dio_file_offset,
 				 extent_submit_bio_start_t *submit_bio_start);
@@ -144,8 +136,6 @@ int btree_lock_page_hook(struct page *page, void *data,
 int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags);
 int btrfs_get_free_objectid(struct btrfs_root *root, u64 *objectid);
 int btrfs_init_root_free_objectid(struct btrfs_root *root);
-int __init btrfs_end_io_wq_init(void);
-void __cold btrfs_end_io_wq_exit(void);
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 void btrfs_set_buffer_lockdep_class(u64 objectid,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index c01880fb5a646..eaeef64ea3486 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2642,12 +2642,6 @@ void btrfs_submit_data_read_bio(struct inode *inode, struct bio *bio,
 		return;
 	}
 
-	ret = btrfs_bio_wq_end_io(fs_info, bio,
-			btrfs_is_free_space_inode(BTRFS_I(inode)) ?
-			BTRFS_WQ_ENDIO_FREE_SPACE : BTRFS_WQ_ENDIO_DATA);
-	if (ret)
-		goto out;
-
 	/*
 	 * Lookup bio sums does extra checks around whether we need to csum or
 	 * not, which is why we ignore skip_sum here.
@@ -7864,9 +7858,6 @@ static void submit_dio_repair_bio(struct inode *inode, struct bio *bio,
 
 	BUG_ON(bio_op(bio) == REQ_OP_WRITE);
 
-	if (btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA))
-		return;
-
 	refcount_inc(&dip->refs);
 	if (btrfs_map_bio(fs_info, bio, mirror_num))
 		refcount_dec(&dip->refs);
@@ -7955,19 +7946,12 @@ static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio,
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	struct btrfs_dio_private *dip = bio->bi_private;
-	bool write = btrfs_op(bio) == BTRFS_MAP_WRITE;
 	blk_status_t ret;
 
-	if (!write) {
-		ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA);
-		if (ret)
-			return ret;
-	}
-
 	if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
 		goto map;
 
-	if (write) {
+	if (btrfs_op(bio) == BTRFS_MAP_WRITE) {
 		/* check btrfs_submit_data_write_bio() for async submit rules */
 		if (async_submit && !atomic_read(&BTRFS_I(inode)->sync_writers))
 			return btrfs_wq_submit_bio(inode, bio, 0, file_offset,
@@ -10294,12 +10278,6 @@ static blk_status_t submit_encoded_read_bio(struct btrfs_inode *inode,
 			return ret;
 	}
 
-	ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA);
-	if (ret) {
-		btrfs_bio_free_csum(bbio);
-		return ret;
-	}
-
 	atomic_inc(&priv->pending);
 	ret = btrfs_map_bio(fs_info, bio, mirror_num);
 	if (ret) {
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 2d5ac5f730a16..938f42e53a8f8 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1906,8 +1906,6 @@ static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info,
 	btrfs_workqueue_set_max(fs_info->hipri_workers, new_pool_size);
 	btrfs_workqueue_set_max(fs_info->delalloc_workers, new_pool_size);
 	btrfs_workqueue_set_max(fs_info->caching_workers, new_pool_size);
-	btrfs_workqueue_set_max(fs_info->endio_workers, new_pool_size);
-	btrfs_workqueue_set_max(fs_info->endio_meta_workers, new_pool_size);
 	btrfs_workqueue_set_max(fs_info->endio_write_workers, new_pool_size);
 	btrfs_workqueue_set_max(fs_info->endio_freespace_worker, new_pool_size);
 	btrfs_workqueue_set_max(fs_info->delayed_workers, new_pool_size);
@@ -2676,13 +2674,9 @@ static int __init init_btrfs_fs(void)
 	if (err)
 		goto free_delayed_ref;
 
-	err = btrfs_end_io_wq_init();
-	if (err)
-		goto free_prelim_ref;
-
 	err = btrfs_interface_init();
 	if (err)
-		goto free_end_io_wq;
+		goto free_prelim_ref;
 
 	btrfs_print_mod_info();
 
@@ -2698,8 +2692,6 @@ static int __init init_btrfs_fs(void)
 
 unregister_ioctl:
 	btrfs_interface_exit();
-free_end_io_wq:
-	btrfs_end_io_wq_exit();
 free_prelim_ref:
 	btrfs_prelim_ref_exit();
 free_delayed_ref:
@@ -2737,7 +2729,6 @@ static void __exit exit_btrfs_fs(void)
 	extent_state_cache_exit();
 	extent_io_exit();
 	btrfs_interface_exit();
-	btrfs_end_io_wq_exit();
 	unregister_filesystem(&btrfs_fs_type);
 	btrfs_exit_sysfs();
 	btrfs_cleanup_fs_uuids();
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 03d941c19f4cb..55a6501a02cc0 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6626,11 +6626,27 @@ int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
 	return __btrfs_map_block(fs_info, op, logical, length, bioc_ret, 0, 1);
 }
 
-static inline void btrfs_end_bioc(struct btrfs_io_context *bioc)
+static struct workqueue_struct *btrfs_end_io_wq(struct btrfs_io_context *bioc)
+{
+	if (bioc->orig_bio->bi_opf & REQ_META)
+		return bioc->fs_info->endio_meta_workers;
+	return bioc->fs_info->endio_workers;
+}
+
+static void btrfs_end_bio_work(struct work_struct *work)
+{
+	struct btrfs_bio *bbio =
+		container_of(work, struct btrfs_bio, end_io_work);
+
+	bio_endio(&bbio->bio);
+}
+
+static void btrfs_end_bioc(struct btrfs_io_context *bioc, bool async)
 {
 	struct bio *orig_bio = bioc->orig_bio;
+	struct btrfs_bio *bbio = btrfs_bio(orig_bio);
 
-	btrfs_bio(orig_bio)->mirror_num = bioc->mirror_num;
+	bbio->mirror_num = bioc->mirror_num;
 	orig_bio->bi_private = bioc->private;
 	orig_bio->bi_end_io = bioc->end_io;
 
@@ -6642,7 +6658,14 @@ static inline void btrfs_end_bioc(struct btrfs_io_context *bioc)
 		orig_bio->bi_status = BLK_STS_IOERR;
 	else
 		orig_bio->bi_status = BLK_STS_OK;
-	bio_endio(orig_bio);
+
+	if (btrfs_op(orig_bio) == BTRFS_MAP_READ && async) {
+		INIT_WORK(&bbio->end_io_work, btrfs_end_bio_work);
+		queue_work(btrfs_end_io_wq(bioc), &bbio->end_io_work);
+	} else {
+		bio_endio(orig_bio);
+	}
+
 	btrfs_put_bioc(bioc);
 }
 
@@ -6674,7 +6697,7 @@ static void btrfs_end_bio(struct bio *bio)
 
 	btrfs_bio_counter_dec(bioc->fs_info);
 	if (atomic_dec_and_test(&bioc->stripes_pending))
-		btrfs_end_bioc(bioc);
+		btrfs_end_bioc(bioc, true);
 }
 
 static void submit_stripe_bio(struct btrfs_io_context *bioc, struct bio *bio,
@@ -6772,7 +6795,7 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
 		    !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))) {
 			atomic_inc(&bioc->error);
 			if (atomic_dec_and_test(&bioc->stripes_pending))
-				btrfs_end_bioc(bioc);
+				btrfs_end_bioc(bioc, false);
 			continue;
 		}
 
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 1e86c48268edd..413969430893e 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -371,6 +371,9 @@ struct btrfs_bio {
 	u8 csum_inline[BTRFS_BIO_INLINE_CSUM_SIZE];
 	struct bvec_iter iter;
 
+	/* for read end I/O handling */
+	struct work_struct end_io_work;
+
 	/*
 	 * This member must come last, bio_alloc_bioset will allocate enough
 	 * bytes for entire btrfs_bio but relies on bio being last.
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH 09/10] btrfs: refactor btrfs_map_bio
  2022-05-26  7:36 cleanup btrfs bio handling, part 2 v4 Christoph Hellwig
                   ` (7 preceding siblings ...)
  2022-05-26  7:36 ` [PATCH 08/10] btrfs: remove btrfs_end_io_wq Christoph Hellwig
@ 2022-05-26  7:36 ` Christoph Hellwig
  2022-06-01 19:36   ` David Sterba
  2022-05-26  7:36 ` [PATCH 10/10] btrfs: do not allocate a btrfs_bio for low-level bios Christoph Hellwig
  2022-06-01 19:29 ` cleanup btrfs bio handling, part 2 v4 David Sterba
  10 siblings, 1 reply; 31+ messages in thread
From: Christoph Hellwig @ 2022-05-26  7:36 UTC (permalink / raw)
  To: David Sterba, Josef Bacik, Qu Wenruo; +Cc: linux-btrfs

Move all per-stripe handling into submit_stripe_bio and use a label to
cleanup instead of duplicating the logic.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Qu Wenruo <wqu@suse.com>
---
 fs/btrfs/volumes.c | 77 +++++++++++++++++++++-------------------------
 1 file changed, 35 insertions(+), 42 deletions(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 55a6501a02cc0..bd82735fbfbbe 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6700,10 +6700,30 @@ static void btrfs_end_bio(struct bio *bio)
 		btrfs_end_bioc(bioc, true);
 }
 
-static void submit_stripe_bio(struct btrfs_io_context *bioc, struct bio *bio,
-			      u64 physical, struct btrfs_device *dev)
+static void submit_stripe_bio(struct btrfs_io_context *bioc,
+		struct bio *orig_bio, int dev_nr, bool clone)
 {
 	struct btrfs_fs_info *fs_info = bioc->fs_info;
+	struct btrfs_device *dev = bioc->stripes[dev_nr].dev;
+	u64 physical = bioc->stripes[dev_nr].physical;
+	struct bio *bio;
+
+	if (!dev || !dev->bdev ||
+	    test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) ||
+	    (btrfs_op(orig_bio) == BTRFS_MAP_WRITE &&
+	     !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))) {
+		atomic_inc(&bioc->error);
+		if (atomic_dec_and_test(&bioc->stripes_pending))
+			btrfs_end_bioc(bioc, false);
+		return;
+	}
+
+	if (clone) {
+		bio = btrfs_bio_clone(dev->bdev, orig_bio);
+	} else {
+		bio = orig_bio;
+		bio_set_dev(bio, dev->bdev);
+	}
 
 	bio->bi_private = bioc;
 	btrfs_bio(bio)->device = dev;
@@ -6738,32 +6758,25 @@ static void submit_stripe_bio(struct btrfs_io_context *bioc, struct bio *bio,
 blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
 			   int mirror_num)
 {
-	struct btrfs_device *dev;
-	struct bio *first_bio = bio;
 	u64 logical = bio->bi_iter.bi_sector << 9;
-	u64 length = 0;
-	u64 map_length;
+	u64 length = bio->bi_iter.bi_size;
+	u64 map_length = length;
 	int ret;
 	int dev_nr;
 	int total_devs;
 	struct btrfs_io_context *bioc = NULL;
 
-	length = bio->bi_iter.bi_size;
-	map_length = length;
-
 	btrfs_bio_counter_inc_blocked(fs_info);
 	ret = __btrfs_map_block(fs_info, btrfs_op(bio), logical,
 				&map_length, &bioc, mirror_num, 1);
-	if (ret) {
-		btrfs_bio_counter_dec(fs_info);
-		return errno_to_blk_status(ret);
-	}
+	if (ret)
+		goto out_dec;
 
 	total_devs = bioc->num_stripes;
-	bioc->orig_bio = first_bio;
-	bioc->private = first_bio->bi_private;
-	bioc->end_io = first_bio->bi_end_io;
-	atomic_set(&bioc->stripes_pending, bioc->num_stripes);
+	bioc->orig_bio = bio;
+	bioc->private = bio->bi_private;
+	bioc->end_io = bio->bi_end_io;
+	atomic_set(&bioc->stripes_pending, total_devs);
 
 	if ((bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) &&
 	    ((btrfs_op(bio) == BTRFS_MAP_WRITE) || (mirror_num > 1))) {
@@ -6775,9 +6788,7 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
 			ret = raid56_parity_recover(bio, bioc, map_length,
 						    mirror_num, 1);
 		}
-
-		btrfs_bio_counter_dec(fs_info);
-		return errno_to_blk_status(ret);
+		goto out_dec;
 	}
 
 	if (map_length < length) {
@@ -6787,29 +6798,11 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
 		BUG();
 	}
 
-	for (dev_nr = 0; dev_nr < total_devs; dev_nr++) {
-		dev = bioc->stripes[dev_nr].dev;
-		if (!dev || !dev->bdev || test_bit(BTRFS_DEV_STATE_MISSING,
-						   &dev->dev_state) ||
-		    (btrfs_op(first_bio) == BTRFS_MAP_WRITE &&
-		    !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))) {
-			atomic_inc(&bioc->error);
-			if (atomic_dec_and_test(&bioc->stripes_pending))
-				btrfs_end_bioc(bioc, false);
-			continue;
-		}
-
-		if (dev_nr < total_devs - 1) {
-			bio = btrfs_bio_clone(dev->bdev, first_bio);
-		} else {
-			bio = first_bio;
-			bio_set_dev(bio, dev->bdev);
-		}
-
-		submit_stripe_bio(bioc, bio, bioc->stripes[dev_nr].physical, dev);
-	}
+	for (dev_nr = 0; dev_nr < total_devs; dev_nr++)
+		submit_stripe_bio(bioc, bio, dev_nr, dev_nr < total_devs - 1);
+out_dec:
 	btrfs_bio_counter_dec(fs_info);
-	return BLK_STS_OK;
+	return errno_to_blk_status(ret);
 }
 
 static bool dev_args_match_fs_devices(const struct btrfs_dev_lookup_args *args,
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH 10/10] btrfs: do not allocate a btrfs_bio for low-level bios
  2022-05-26  7:36 cleanup btrfs bio handling, part 2 v4 Christoph Hellwig
                   ` (8 preceding siblings ...)
  2022-05-26  7:36 ` [PATCH 09/10] btrfs: refactor btrfs_map_bio Christoph Hellwig
@ 2022-05-26  7:36 ` Christoph Hellwig
  2022-06-01 19:39   ` David Sterba
  2022-06-01 19:29 ` cleanup btrfs bio handling, part 2 v4 David Sterba
  10 siblings, 1 reply; 31+ messages in thread
From: Christoph Hellwig @ 2022-05-26  7:36 UTC (permalink / raw)
  To: David Sterba, Josef Bacik, Qu Wenruo; +Cc: linux-btrfs

The bios submitted from btrfs_map_bio don't really interact with the
rest of btrfs and the only btrfs_bio member actually used in the
low-level bios is the pointer to the btrfs_io_contex used for endio
handler.

Use a union in struct btrfs_io_stripe that allows the endio handler to
find the btrfs_io_context and remove the spurious ->device assignment
so that a plain fs_bio_set bio can be used for the low-level bios
allocated inside btrfs_map_bio.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Qu Wenruo <wqu@suse.com>
---
 fs/btrfs/extent_io.c | 13 -------------
 fs/btrfs/extent_io.h |  1 -
 fs/btrfs/volumes.c   | 20 ++++++++++----------
 fs/btrfs/volumes.h   |  5 ++++-
 4 files changed, 14 insertions(+), 25 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index cbe04b3decee9..48463705ef0e6 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3202,19 +3202,6 @@ struct bio *btrfs_bio_alloc(unsigned int nr_iovecs)
 	return bio;
 }
 
-struct bio *btrfs_bio_clone(struct block_device *bdev, struct bio *bio)
-{
-	struct btrfs_bio *bbio;
-	struct bio *new;
-
-	/* Bio allocation backed by a bioset does not fail */
-	new = bio_alloc_clone(bdev, bio, GFP_NOFS, &btrfs_bioset);
-	bbio = btrfs_bio(new);
-	btrfs_bio_init(bbio);
-	bbio->iter = bio->bi_iter;
-	return new;
-}
-
 struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size)
 {
 	struct bio *bio;
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 23d4103c88316..72966cf21961e 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -247,7 +247,6 @@ void extent_clear_unlock_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
 
 int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array);
 struct bio *btrfs_bio_alloc(unsigned int nr_iovecs);
-struct bio *btrfs_bio_clone(struct block_device *bdev, struct bio *bio);
 struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size);
 
 void end_extent_writepage(struct page *page, int err, u64 start, u64 end);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index bd82735fbfbbe..1954a3e1c93a9 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6671,23 +6671,21 @@ static void btrfs_end_bioc(struct btrfs_io_context *bioc, bool async)
 
 static void btrfs_end_bio(struct bio *bio)
 {
-	struct btrfs_io_context *bioc = bio->bi_private;
+	struct btrfs_io_stripe *stripe = bio->bi_private;
+	struct btrfs_io_context *bioc = stripe->bioc;
 
 	if (bio->bi_status) {
 		atomic_inc(&bioc->error);
 		if (bio->bi_status == BLK_STS_IOERR ||
 		    bio->bi_status == BLK_STS_TARGET) {
-			struct btrfs_device *dev = btrfs_bio(bio)->device;
-
-			ASSERT(dev->bdev);
 			if (btrfs_op(bio) == BTRFS_MAP_WRITE)
-				btrfs_dev_stat_inc_and_print(dev,
+				btrfs_dev_stat_inc_and_print(stripe->dev,
 						BTRFS_DEV_STAT_WRITE_ERRS);
 			else if (!(bio->bi_opf & REQ_RAHEAD))
-				btrfs_dev_stat_inc_and_print(dev,
+				btrfs_dev_stat_inc_and_print(stripe->dev,
 						BTRFS_DEV_STAT_READ_ERRS);
 			if (bio->bi_opf & REQ_PREFLUSH)
-				btrfs_dev_stat_inc_and_print(dev,
+				btrfs_dev_stat_inc_and_print(stripe->dev,
 						BTRFS_DEV_STAT_FLUSH_ERRS);
 		}
 	}
@@ -6719,14 +6717,16 @@ static void submit_stripe_bio(struct btrfs_io_context *bioc,
 	}
 
 	if (clone) {
-		bio = btrfs_bio_clone(dev->bdev, orig_bio);
+		bio = bio_alloc_clone(dev->bdev, orig_bio, GFP_NOFS,
+				      &fs_bio_set);
 	} else {
 		bio = orig_bio;
 		bio_set_dev(bio, dev->bdev);
+		btrfs_bio(bio)->device = dev;
 	}
 
-	bio->bi_private = bioc;
-	btrfs_bio(bio)->device = dev;
+	bioc->stripes[dev_nr].bioc = bioc;
+	bio->bi_private = &bioc->stripes[dev_nr];
 	bio->bi_end_io = btrfs_end_bio;
 	bio->bi_iter.bi_sector = physical >> 9;
 	/*
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 413969430893e..517288d46ecf5 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -412,7 +412,10 @@ static inline void btrfs_bio_free_csum(struct btrfs_bio *bbio)
 
 struct btrfs_io_stripe {
 	struct btrfs_device *dev;
-	u64 physical;
+	union {
+		u64 physical;			/* block mapping */
+		struct btrfs_io_context *bioc;	/* for the endio handler */
+	};
 	u64 length; /* only used for discard mappings */
 };
 
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 31+ messages in thread

* Re: cleanup btrfs bio handling, part 2 v4
  2022-05-26  7:36 cleanup btrfs bio handling, part 2 v4 Christoph Hellwig
                   ` (9 preceding siblings ...)
  2022-05-26  7:36 ` [PATCH 10/10] btrfs: do not allocate a btrfs_bio for low-level bios Christoph Hellwig
@ 2022-06-01 19:29 ` David Sterba
  2022-06-03 16:47   ` David Sterba
  10 siblings, 1 reply; 31+ messages in thread
From: David Sterba @ 2022-06-01 19:29 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: David Sterba, Josef Bacik, Qu Wenruo, linux-btrfs

On Thu, May 26, 2022 at 09:36:32AM +0200, Christoph Hellwig wrote:
> Hi all,
> 
> this series removes the need to allocate a separate object for I/O
> completions for all read and some write I/Os, and reduced the memory
> usage of the low-level bios cloned by btrfs_map_bio by using plain bios
> instead of the much larger btrfs_bio.
> 
> Changes since v3:
>  - rebased to the latest for-next tree
>  - move "btrfs: don't double-defer bio completions for compressed reads"
>    back to where it was before in the patch order

This is a prerequisite for the raid-repair patches so I've added it to
for-next. There's a minor conflict in patch 4 after the recent changes
to raid56 from Qu, only in the context of the btrfs_bio_wq_end_io
removal and usage of differen work queue. There are also some style
comments I'll send as replies to the patches.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 02/10] btrfs: cleanup btrfs_submit_dio_bio
  2022-05-26  7:36 ` [PATCH 02/10] btrfs: cleanup btrfs_submit_dio_bio Christoph Hellwig
@ 2022-06-01 19:32   ` David Sterba
  0 siblings, 0 replies; 31+ messages in thread
From: David Sterba @ 2022-06-01 19:32 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: David Sterba, Josef Bacik, Qu Wenruo, linux-btrfs, Johannes Thumshirn

On Thu, May 26, 2022 at 09:36:34AM +0200, Christoph Hellwig wrote:
> Remove the pointless goto just to return err and clean up the code flow
> to be a little more straight forward.

Please use more descriptive subjects, "cleaup function" is too generic
and makes navigation in patches harder. The changelog text usually gives
hint what exactly is being cleaned up, in this case it's code flow so
I've used subject "simplify code flow in btrfs_submit_dio_bio".

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 06/10] btrfs: don't use btrfs_bio_wq_end_io for compressed writes
  2022-05-26  7:36 ` [PATCH 06/10] btrfs: don't use btrfs_bio_wq_end_io for compressed writes Christoph Hellwig
@ 2022-06-01 19:33   ` David Sterba
  0 siblings, 0 replies; 31+ messages in thread
From: David Sterba @ 2022-06-01 19:33 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: David Sterba, Josef Bacik, Qu Wenruo, linux-btrfs

On Thu, May 26, 2022 at 09:36:38AM +0200, Christoph Hellwig wrote:
> -	/* for reads, this is the bio we are copying the data into */
> -	struct bio *orig_bio;
> +	union {
> +		/* for reads, this is the bio we are copying the data into */

Comments should start witha capital letter unelss it's referring to an
identifier.

> +		struct bio *orig_bio;
> +		struct work_struct write_end_work;
> +	};
>  
>  	/*
>  	 * the start of a variable length array of checksums only

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 09/10] btrfs: refactor btrfs_map_bio
  2022-05-26  7:36 ` [PATCH 09/10] btrfs: refactor btrfs_map_bio Christoph Hellwig
@ 2022-06-01 19:36   ` David Sterba
  0 siblings, 0 replies; 31+ messages in thread
From: David Sterba @ 2022-06-01 19:36 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: David Sterba, Josef Bacik, Qu Wenruo, linux-btrfs

On Thu, May 26, 2022 at 09:36:41AM +0200, Christoph Hellwig wrote:
> Move all per-stripe handling into submit_stripe_bio and use a label to
> cleanup instead of duplicating the logic.

Same comment about subject here, refactoring is quite common and too
generic so "https://github.com/libfuse/sshfs".

> @@ -6787,29 +6798,11 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
>  		BUG();
>  	}
>  
> -	for (dev_nr = 0; dev_nr < total_devs; dev_nr++) {
> -		dev = bioc->stripes[dev_nr].dev;
> -		if (!dev || !dev->bdev || test_bit(BTRFS_DEV_STATE_MISSING,
> -						   &dev->dev_state) ||
> -		    (btrfs_op(first_bio) == BTRFS_MAP_WRITE &&
> -		    !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))) {
> -			atomic_inc(&bioc->error);
> -			if (atomic_dec_and_test(&bioc->stripes_pending))
> -				btrfs_end_bioc(bioc, false);
> -			continue;
> -		}
> -
> -		if (dev_nr < total_devs - 1) {
> -			bio = btrfs_bio_clone(dev->bdev, first_bio);
> -		} else {
> -			bio = first_bio;
> -			bio_set_dev(bio, dev->bdev);
> -		}
> -
> -		submit_stripe_bio(bioc, bio, bioc->stripes[dev_nr].physical, dev);
> -	}
> +	for (dev_nr = 0; dev_nr < total_devs; dev_nr++)
> +		submit_stripe_bio(bioc, bio, dev_nr, dev_nr < total_devs - 1);

Placing the conditional to parameter is really easy to overlook, it's
more clear when it's done via a temporary variable that can also
describe the semantics what the expression actually means.

I've added:

		const bool should_clone = (...);


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 10/10] btrfs: do not allocate a btrfs_bio for low-level bios
  2022-05-26  7:36 ` [PATCH 10/10] btrfs: do not allocate a btrfs_bio for low-level bios Christoph Hellwig
@ 2022-06-01 19:39   ` David Sterba
  0 siblings, 0 replies; 31+ messages in thread
From: David Sterba @ 2022-06-01 19:39 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: David Sterba, Josef Bacik, Qu Wenruo, linux-btrfs

On Thu, May 26, 2022 at 09:36:42AM +0200, Christoph Hellwig wrote:
> --- a/fs/btrfs/volumes.h
> +++ b/fs/btrfs/volumes.h
> @@ -412,7 +412,10 @@ static inline void btrfs_bio_free_csum(struct btrfs_bio *bbio)
>  
>  struct btrfs_io_stripe {
>  	struct btrfs_device *dev;
> -	u64 physical;
> +	union {
> +		u64 physical;			/* block mapping */
> +		struct btrfs_io_context *bioc;	/* for the endio handler */

The preferred struct comment style is on a separate line, so

		/* Block mapping */
		u64 physical;
		/* For the endio handler */
		struct btrfs_io_context *bioc;

The mostly complete list of style preferences is in
https://btrfs.wiki.kernel.org/index.php/Development_notes#Coding_style_preferences

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: cleanup btrfs bio handling, part 2 v4
  2022-06-01 19:29 ` cleanup btrfs bio handling, part 2 v4 David Sterba
@ 2022-06-03 16:47   ` David Sterba
  0 siblings, 0 replies; 31+ messages in thread
From: David Sterba @ 2022-06-03 16:47 UTC (permalink / raw)
  To: dsterba, Christoph Hellwig, David Sterba, Josef Bacik, Qu Wenruo,
	linux-btrfs

On Wed, Jun 01, 2022 at 09:29:29PM +0200, David Sterba wrote:
> On Thu, May 26, 2022 at 09:36:32AM +0200, Christoph Hellwig wrote:
> > Hi all,
> > 
> > this series removes the need to allocate a separate object for I/O
> > completions for all read and some write I/Os, and reduced the memory
> > usage of the low-level bios cloned by btrfs_map_bio by using plain bios
> > instead of the much larger btrfs_bio.
> > 
> > Changes since v3:
> >  - rebased to the latest for-next tree
> >  - move "btrfs: don't double-defer bio completions for compressed reads"
> >    back to where it was before in the patch order
> 
> This is a prerequisite for the raid-repair patches so I've added it to
> for-next.

Moved to misc-next, thanks.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH 03/10] btrfs: split btrfs_submit_data_bio
  2022-04-29 14:30 cleanup btrfs bio handling, part 2 v2 Christoph Hellwig
@ 2022-04-29 14:30 ` Christoph Hellwig
  0 siblings, 0 replies; 31+ messages in thread
From: Christoph Hellwig @ 2022-04-29 14:30 UTC (permalink / raw)
  To: David Sterba, Josef Bacik, Qu Wenruo; +Cc: linux-btrfs

Split btrfs_submit_data_bio into one helper for reads and one for writes.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/btrfs/ctree.h     |   6 +-
 fs/btrfs/extent_io.c |  12 ++--
 fs/btrfs/inode.c     | 130 ++++++++++++++++++++-----------------------
 3 files changed, 73 insertions(+), 75 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 371f3f6c4ea47..40a6f61559348 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3252,8 +3252,10 @@ void btrfs_inode_safe_disk_i_size_write(struct btrfs_inode *inode, u64 new_i_siz
 u64 btrfs_file_extent_end(const struct btrfs_path *path);
 
 /* inode.c */
-void btrfs_submit_data_bio(struct inode *inode, struct bio *bio,
-			   int mirror_num, unsigned long bio_flags);
+void btrfs_submit_data_write_bio(struct inode *inode, struct bio *bio,
+		int mirror_num, unsigned long bio_flags);
+void btrfs_submit_data_read_bio(struct inode *inode, struct bio *bio,
+		int mirror_num, unsigned long bio_flags);
 unsigned int btrfs_verify_data_csum(struct btrfs_bio *bbio,
 				    u32 bio_offset, struct page *page,
 				    u64 start, u64 end);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index f9d6dd310c42b..80b4482c477c6 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -186,11 +186,15 @@ static void submit_one_bio(struct bio *bio, int mirror_num, unsigned long bio_fl
 	/* Caller should ensure the bio has at least some range added */
 	ASSERT(bio->bi_iter.bi_size);
 
-	if (is_data_inode(tree->private_data))
-		btrfs_submit_data_bio(tree->private_data, bio, mirror_num,
+	if (!is_data_inode(tree->private_data))
+		btrfs_submit_metadata_bio(tree->private_data, bio, mirror_num);
+	else if (btrfs_op(bio) == BTRFS_MAP_WRITE)
+		btrfs_submit_data_write_bio(tree->private_data, bio, mirror_num,
 					    bio_flags);
 	else
-		btrfs_submit_metadata_bio(tree->private_data, bio, mirror_num);
+		btrfs_submit_data_read_bio(tree->private_data, bio, mirror_num,
+					    bio_flags);
+
 	/*
 	 * Above submission hooks will handle the error by ending the bio,
 	 * which will do the cleanup properly.  So here we should not return
@@ -2773,7 +2777,7 @@ static blk_status_t submit_data_read_repair(struct inode *inode,
 		ret = btrfs_repair_one_sector(inode, failed_bio,
 				bio_offset + offset,
 				page, pgoff + offset, start + offset,
-				failed_mirror, btrfs_submit_data_bio);
+				failed_mirror, btrfs_submit_data_read_bio);
 		if (!ret) {
 			/*
 			 * We have submitted the read repair, the page release
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 3be52ed6ed196..6476a5d9c09e9 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2552,90 +2552,82 @@ static blk_status_t extract_ordered_extent(struct btrfs_inode *inode,
 	return errno_to_blk_status(ret);
 }
 
-/*
- * extent_io.c submission hook. This does the right thing for csum calculation
- * on write, or reading the csums from the tree before a read.
- *
- * Rules about async/sync submit,
- * a) read:				sync submit
- *
- * b) write without checksum:		sync submit
- *
- * c) write with checksum:
- *    c-1) if bio is issued by fsync:	sync submit
- *         (sync_writers != 0)
- *
- *    c-2) if root is reloc root:	sync submit
- *         (only in case of buffered IO)
- *
- *    c-3) otherwise:			async submit
- */
-void btrfs_submit_data_bio(struct inode *inode, struct bio *bio,
+void btrfs_submit_data_write_bio(struct inode *inode, struct bio *bio,
 			   int mirror_num, unsigned long bio_flags)
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-	struct btrfs_root *root = BTRFS_I(inode)->root;
-	enum btrfs_wq_endio_type metadata = BTRFS_WQ_ENDIO_DATA;
-	blk_status_t ret = 0;
-	int skip_sum;
-	int async = !atomic_read(&BTRFS_I(inode)->sync_writers);
-
-	skip_sum = (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) ||
-		test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state);
-
-	if (btrfs_is_free_space_inode(BTRFS_I(inode)))
-		metadata = BTRFS_WQ_ENDIO_FREE_SPACE;
+	struct btrfs_inode *bi = BTRFS_I(inode);
+	blk_status_t ret;
 
 	if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
-		struct page *page = bio_first_bvec_all(bio)->bv_page;
-		loff_t file_offset = page_offset(page);
-
-		ret = extract_ordered_extent(BTRFS_I(inode), bio, file_offset);
+		ret = extract_ordered_extent(bi, bio,
+				page_offset(bio_first_bvec_all(bio)->bv_page));
 		if (ret)
 			goto out;
 	}
 
-	if (btrfs_op(bio) != BTRFS_MAP_WRITE) {
-		ret = btrfs_bio_wq_end_io(fs_info, bio, metadata);
-		if (ret)
-			goto out;
-
-		if (bio_flags & EXTENT_BIO_COMPRESSED) {
-			/*
-			 * btrfs_submit_compressed_read will handle completing
-			 * the bio if there were any errors, so just return
-			 * here.
-			 */
-			btrfs_submit_compressed_read(inode, bio, mirror_num);
-			return;
-		} else {
-			/*
-			 * Lookup bio sums does extra checks around whether we
-			 * need to csum or not, which is why we ignore skip_sum
-			 * here.
-			 */
-			ret = btrfs_lookup_bio_sums(inode, bio, NULL);
+	/*
+	 * Rules for async/sync submit:
+	 *   a) write without checksum:			sync submit
+	 *   b) write with checksum:
+	 *      b-1) if bio is issued by fsync:		sync submit
+	 *           (sync_writers != 0)
+	 *      b-2) if root is reloc root:		sync submit
+	 *           (only in case of buffered IO)
+	 *      b-3) otherwise:				async submit
+	 */
+	if (!(bi->flags & BTRFS_INODE_NODATASUM) &&
+	    !test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state)) {
+		if (atomic_read(&bi->sync_writers)) {
+			ret = btrfs_csum_one_bio(bi, bio, (u64)-1, false);
 			if (ret)
 				goto out;
+		} else if (btrfs_is_data_reloc_root(bi->root)) {
+			; /* csum items have already been cloned */
+		} else {
+			ret = btrfs_wq_submit_bio(inode, bio,
+					mirror_num, bio_flags, 0,
+					btrfs_submit_bio_start);
+			goto out;
 		}
-		goto mapit;
-	} else if (async && !skip_sum) {
-		/* csum items have already been cloned */
-		if (btrfs_is_data_reloc_root(root))
-			goto mapit;
-		/* we're doing a write, do the async checksumming */
-		ret = btrfs_wq_submit_bio(inode, bio, mirror_num, bio_flags,
-					  0, btrfs_submit_bio_start);
+	}
+	ret = btrfs_map_bio(fs_info, bio, mirror_num);
+out:
+	if (ret) {
+		bio->bi_status = ret;
+		bio_endio(bio);
+	}
+}
+
+void btrfs_submit_data_read_bio(struct inode *inode, struct bio *bio,
+			   int mirror_num, unsigned long bio_flags)
+{
+	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+	blk_status_t ret;
+
+	ret = btrfs_bio_wq_end_io(fs_info, bio,
+			btrfs_is_free_space_inode(BTRFS_I(inode)) ?
+			BTRFS_WQ_ENDIO_FREE_SPACE : BTRFS_WQ_ENDIO_DATA);
+	if (ret)
 		goto out;
-	} else if (!skip_sum) {
-		ret = btrfs_csum_one_bio(BTRFS_I(inode), bio, (u64)-1, false);
-		if (ret)
-			goto out;
+
+	if (bio_flags & EXTENT_BIO_COMPRESSED) {
+		/*
+		 * btrfs_submit_compressed_read will handle completing the bio
+		 * if there were any errors, so just return here.
+		 */
+		btrfs_submit_compressed_read(inode, bio, mirror_num);
+		return;
 	}
 
-mapit:
+	/*
+	 * Lookup bio sums does extra checks around whether we need to csum or
+	 * not, which is why we ignore skip_sum here.
+	 */
+	ret = btrfs_lookup_bio_sums(inode, bio, NULL);
+	if (ret)
+		goto out;
 	ret = btrfs_map_bio(fs_info, bio, mirror_num);
-
 out:
 	if (ret) {
 		bio->bi_status = ret;
@@ -7909,7 +7901,7 @@ static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio,
 		goto map;
 
 	if (write) {
-		/* check btrfs_submit_data_bio() for async submit rules */
+		/* check btrfs_submit_data_write_bio() for async submit rules */
 		if (async_submit && !atomic_read(&BTRFS_I(inode)->sync_writers))
 			return btrfs_wq_submit_bio(inode, bio, 0, 0,
 					file_offset,
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 31+ messages in thread

* Re: [PATCH 03/10] btrfs: split btrfs_submit_data_bio
  2022-04-25 17:17                     ` Christoph Hellwig
@ 2022-04-26  1:24                       ` Qu Wenruo
  0 siblings, 0 replies; 31+ messages in thread
From: Qu Wenruo @ 2022-04-26  1:24 UTC (permalink / raw)
  To: Christoph Hellwig, Qu Wenruo
  Cc: Josef Bacik, David Sterba, Naohiro Aota, linux-btrfs



On 2022/4/26 01:17, Christoph Hellwig wrote:
> On Mon, Apr 25, 2022 at 07:40:40PM +0800, Qu Wenruo wrote:
>> That's only for RAID56, aren't you going to remove btrfs_bio usage
>> completely for all write (including buffered, non-compressing write)?
>
> There are just two uses of bbio->iter in the current btrfs for-net
> tree.  One is index_one_bio, which is removed by the patch I posted,
> and the other one is btrfs_check_read_dio_bio, which is clearly read
> specific.
>
> FYI, this is the next batch that's currently being tested:
>
> http://git.infradead.org/users/hch/misc.git/shortlog/refs/heads/btrfs-bio-cleanup-part4

My bad, I forgot the fact that, my *READ* repair is only for read, no
need to use btrfs_bio for write time.

So your cleanup is completely fine, my head just short-circuited last night.

Thanks,
Qu

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 03/10] btrfs: split btrfs_submit_data_bio
  2022-04-25 11:40                   ` Qu Wenruo
  2022-04-25 11:43                     ` Qu Wenruo
@ 2022-04-25 17:17                     ` Christoph Hellwig
  2022-04-26  1:24                       ` Qu Wenruo
  1 sibling, 1 reply; 31+ messages in thread
From: Christoph Hellwig @ 2022-04-25 17:17 UTC (permalink / raw)
  To: Qu Wenruo
  Cc: Christoph Hellwig, Qu Wenruo, Josef Bacik, David Sterba,
	Naohiro Aota, linux-btrfs

On Mon, Apr 25, 2022 at 07:40:40PM +0800, Qu Wenruo wrote:
> That's only for RAID56, aren't you going to remove btrfs_bio usage 
> completely for all write (including buffered, non-compressing write)?

There are just two uses of bbio->iter in the current btrfs for-net
tree.  One is index_one_bio, which is removed by the patch I posted,
and the other one is btrfs_check_read_dio_bio, which is clearly read 
specific.

FYI, this is the next batch that's currently being tested:

http://git.infradead.org/users/hch/misc.git/shortlog/refs/heads/btrfs-bio-cleanup-part4

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 03/10] btrfs: split btrfs_submit_data_bio
  2022-04-25 11:40                   ` Qu Wenruo
@ 2022-04-25 11:43                     ` Qu Wenruo
  2022-04-25 17:17                     ` Christoph Hellwig
  1 sibling, 0 replies; 31+ messages in thread
From: Qu Wenruo @ 2022-04-25 11:43 UTC (permalink / raw)
  To: Qu Wenruo, Christoph Hellwig
  Cc: Josef Bacik, David Sterba, Naohiro Aota, linux-btrfs



On 2022/4/25 19:40, Qu Wenruo wrote:
>
>
> On 2022/4/25 19:34, Christoph Hellwig wrote:
>> On Mon, Apr 25, 2022 at 07:31:08PM +0800, Qu Wenruo wrote:
>>> Then it comes against the btrfs read time repair.
>>>
>>> Currently we split bio to make sure we never need to split bio at
>>> btrfs_map_bio() time.
>>>
>>> But this is against common layer separation.
>>>
>>> And we really want the ability to read a partially corrupted bio (some
>>> part matches csum, some doesn't), no matter if the bio is cloned or not.
>>>
>>> Especially, we already have cloned bio which needs repair (for dio).
>>
>> I have a barely working version based on your patches to split the
>> bio in btrfs_bio_map that solves this problem.  But the next step
>> only removed the save iter for writes, where the only user is
>> index_one_bio.  And the fix for that is pretty trivial :)
>
> That's only for RAID56, aren't you going to remove btrfs_bio usage
> completely for all write (including buffered, non-compressing write)?

Wait, are you going to use some methods like this to avoid
chained/cloned bio while still split the bio?

	page = grab_page_from_existing_bio();
	pgoff = grab_pgoff_from_existing_bio()
	new_bio = bio_alloc()
	bio_add_page(new_bio, page, sectorsize, pgoff);

So that you can create a regular new, non-cloned bio, but still using
the same page/pgoff from an existing bio...

Thanks,
Qu
>
> Thanks,
> Qu
>>
>> ---
>>  From c8fe61748ebc583a7f57c8e5de79f92428e5717c Mon Sep 17 00:00:00 2001
>> From: Christoph Hellwig <hch@lst.de>
>> Date: Mon, 25 Apr 2022 13:23:54 +0200
>> Subject: btrfs: stop looking at btrfs_bio->iter in index_one_bio
>>
>> All the bios that index_one_bio operates on are the bios submitted by the
>> upper layer.  These are never resubmitted to an actual device by the
>> raid56 code, and thus the iter never changes from the initial state.
>> Thus we can always just use bi_iter directly as it will be the same as
>> the saved copy.
>>
>> Signed-off-by: Christoph Hellwig <hch@lst.de>
>> ---
>>   fs/btrfs/raid56.c | 3 ---
>>   1 file changed, 3 deletions(-)
>>
>> diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
>> index 1a3c1a9b10d0b..8b40353bb89db 100644
>> --- a/fs/btrfs/raid56.c
>> +++ b/fs/btrfs/raid56.c
>> @@ -1218,9 +1218,6 @@ static void index_one_bio(struct btrfs_raid_bio
>> *rbio, struct bio *bio)
>>       u32 offset = (bio->bi_iter.bi_sector << SECTOR_SHIFT) -
>>                rbio->bioc->raid_map[0];
>> -    if (bio_flagged(bio, BIO_CLONED))
>> -        bio->bi_iter = btrfs_bio(bio)->iter;
>> -
>>       bio_for_each_segment(bvec, bio, iter) {
>>           u32 bvec_offset;
>

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 03/10] btrfs: split btrfs_submit_data_bio
  2022-04-25 11:34                 ` Christoph Hellwig
@ 2022-04-25 11:40                   ` Qu Wenruo
  2022-04-25 11:43                     ` Qu Wenruo
  2022-04-25 17:17                     ` Christoph Hellwig
  0 siblings, 2 replies; 31+ messages in thread
From: Qu Wenruo @ 2022-04-25 11:40 UTC (permalink / raw)
  To: Christoph Hellwig, Qu Wenruo
  Cc: Josef Bacik, David Sterba, Naohiro Aota, linux-btrfs



On 2022/4/25 19:34, Christoph Hellwig wrote:
> On Mon, Apr 25, 2022 at 07:31:08PM +0800, Qu Wenruo wrote:
>> Then it comes against the btrfs read time repair.
>>
>> Currently we split bio to make sure we never need to split bio at
>> btrfs_map_bio() time.
>>
>> But this is against common layer separation.
>>
>> And we really want the ability to read a partially corrupted bio (some
>> part matches csum, some doesn't), no matter if the bio is cloned or not.
>>
>> Especially, we already have cloned bio which needs repair (for dio).
> 
> I have a barely working version based on your patches to split the
> bio in btrfs_bio_map that solves this problem.  But the next step
> only removed the save iter for writes, where the only user is
> index_one_bio.  And the fix for that is pretty trivial :)

That's only for RAID56, aren't you going to remove btrfs_bio usage 
completely for all write (including buffered, non-compressing write)?

Thanks,
Qu
> 
> ---
>  From c8fe61748ebc583a7f57c8e5de79f92428e5717c Mon Sep 17 00:00:00 2001
> From: Christoph Hellwig <hch@lst.de>
> Date: Mon, 25 Apr 2022 13:23:54 +0200
> Subject: btrfs: stop looking at btrfs_bio->iter in index_one_bio
> 
> All the bios that index_one_bio operates on are the bios submitted by the
> upper layer.  These are never resubmitted to an actual device by the
> raid56 code, and thus the iter never changes from the initial state.
> Thus we can always just use bi_iter directly as it will be the same as
> the saved copy.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>   fs/btrfs/raid56.c | 3 ---
>   1 file changed, 3 deletions(-)
> 
> diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
> index 1a3c1a9b10d0b..8b40353bb89db 100644
> --- a/fs/btrfs/raid56.c
> +++ b/fs/btrfs/raid56.c
> @@ -1218,9 +1218,6 @@ static void index_one_bio(struct btrfs_raid_bio *rbio, struct bio *bio)
>   	u32 offset = (bio->bi_iter.bi_sector << SECTOR_SHIFT) -
>   		     rbio->bioc->raid_map[0];
>   
> -	if (bio_flagged(bio, BIO_CLONED))
> -		bio->bi_iter = btrfs_bio(bio)->iter;
> -
>   	bio_for_each_segment(bvec, bio, iter) {
>   		u32 bvec_offset;
>   


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 03/10] btrfs: split btrfs_submit_data_bio
  2022-04-25 11:31               ` Qu Wenruo
@ 2022-04-25 11:34                 ` Christoph Hellwig
  2022-04-25 11:40                   ` Qu Wenruo
  0 siblings, 1 reply; 31+ messages in thread
From: Christoph Hellwig @ 2022-04-25 11:34 UTC (permalink / raw)
  To: Qu Wenruo
  Cc: Christoph Hellwig, Josef Bacik, David Sterba, Qu Wenruo,
	Naohiro Aota, linux-btrfs

On Mon, Apr 25, 2022 at 07:31:08PM +0800, Qu Wenruo wrote:
> Then it comes against the btrfs read time repair.
>
> Currently we split bio to make sure we never need to split bio at
> btrfs_map_bio() time.
>
> But this is against common layer separation.
>
> And we really want the ability to read a partially corrupted bio (some
> part matches csum, some doesn't), no matter if the bio is cloned or not.
>
> Especially, we already have cloned bio which needs repair (for dio).

I have a barely working version based on your patches to split the
bio in btrfs_bio_map that solves this problem.  But the next step
only removed the save iter for writes, where the only user is
index_one_bio.  And the fix for that is pretty trivial :)

---
From c8fe61748ebc583a7f57c8e5de79f92428e5717c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 25 Apr 2022 13:23:54 +0200
Subject: btrfs: stop looking at btrfs_bio->iter in index_one_bio

All the bios that index_one_bio operates on are the bios submitted by the
upper layer.  These are never resubmitted to an actual device by the
raid56 code, and thus the iter never changes from the initial state.
Thus we can always just use bi_iter directly as it will be the same as
the saved copy.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/btrfs/raid56.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 1a3c1a9b10d0b..8b40353bb89db 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -1218,9 +1218,6 @@ static void index_one_bio(struct btrfs_raid_bio *rbio, struct bio *bio)
 	u32 offset = (bio->bi_iter.bi_sector << SECTOR_SHIFT) -
 		     rbio->bioc->raid_map[0];
 
-	if (bio_flagged(bio, BIO_CLONED))
-		bio->bi_iter = btrfs_bio(bio)->iter;
-
 	bio_for_each_segment(bvec, bio, iter) {
 		u32 bvec_offset;
 
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 31+ messages in thread

* Re: [PATCH 03/10] btrfs: split btrfs_submit_data_bio
  2022-04-25 11:19             ` Christoph Hellwig
@ 2022-04-25 11:31               ` Qu Wenruo
  2022-04-25 11:34                 ` Christoph Hellwig
  0 siblings, 1 reply; 31+ messages in thread
From: Qu Wenruo @ 2022-04-25 11:31 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Josef Bacik, David Sterba, Qu Wenruo, Naohiro Aota, linux-btrfs



On 2022/4/25 19:19, Christoph Hellwig wrote:
> On Mon, Apr 25, 2022 at 07:16:21PM +0800, Qu Wenruo wrote:
>> I'm wondering how would you iterate the bvec of a cloned bio then.
>
> We just stop doing that.  All iteration should be on the originally
> constructed bio.  I've fixed the two places that are doing that right
> in work in progress patches.

Then it comes against the btrfs read time repair.

Currently we split bio to make sure we never need to split bio at
btrfs_map_bio() time.

But this is against common layer separation.

And we really want the ability to read a partially corrupted bio (some
part matches csum, some doesn't), no matter if the bio is cloned or not.

Especially, we already have cloned bio which needs repair (for dio).

Thanks,
Qu

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 03/10] btrfs: split btrfs_submit_data_bio
  2022-04-25 11:16           ` Qu Wenruo
@ 2022-04-25 11:19             ` Christoph Hellwig
  2022-04-25 11:31               ` Qu Wenruo
  0 siblings, 1 reply; 31+ messages in thread
From: Christoph Hellwig @ 2022-04-25 11:19 UTC (permalink / raw)
  To: Qu Wenruo
  Cc: Christoph Hellwig, Josef Bacik, David Sterba, Qu Wenruo,
	Naohiro Aota, linux-btrfs

On Mon, Apr 25, 2022 at 07:16:21PM +0800, Qu Wenruo wrote:
> I'm wondering how would you iterate the bvec of a cloned bio then.

We just stop doing that.  All iteration should be on the originally
constructed bio.  I've fixed the two places that are doing that right
in work in progress patches.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 03/10] btrfs: split btrfs_submit_data_bio
  2022-04-25 11:09         ` Christoph Hellwig
@ 2022-04-25 11:16           ` Qu Wenruo
  2022-04-25 11:19             ` Christoph Hellwig
  0 siblings, 1 reply; 31+ messages in thread
From: Qu Wenruo @ 2022-04-25 11:16 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Josef Bacik, David Sterba, Qu Wenruo, Naohiro Aota, linux-btrfs



On 2022/4/25 19:09, Christoph Hellwig wrote:
> On Mon, Apr 25, 2022 at 05:37:40PM +0800, Qu Wenruo wrote:
>> Oh, please don't completely get rid of btrfs_bio, even just for writes.
>>
>> The btrfs_bio::iter is pretty important for us to grab the original
>> logical bytenr of a bio.
>> As bio::bi_iter can be modified by lower level (does dm modifies it
>> too?), or btrfs itself.
>>
>> In fact, my incoming updated btrfs repair repair code heavily rely on
>> btrfs_bio::iter, both read and write, to grab the original logical
>> bytenr of the bio.
>
> Then it's doing the wrong thing.  I actually have a series to remove
> it entirely.

I'm wondering how would you iterate the bvec of a cloned bio then.

Regular bio_for_each_segment_all() will just trigger warning on cloned bio.
If you go something like chained bio, then any error would mark the
whole range error, and in fact my repair work is going to make
read-repair work with chained bio, thus I have to directly iterate
cloned bio anyway.

Just bio_for_each_segment()? That bi_iter is no longer reliable, just
btrfs_map_block() can modify it.

Anyway, what I really need is just a proper way to:

- Iterate bvecs of a clone bio
- Grab the original logical bytenr from a bio

If you can do that with extra members, I'm fine with alternative ways.

Thanks,
Qu

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 03/10] btrfs: split btrfs_submit_data_bio
  2022-04-25  9:37       ` Qu Wenruo
@ 2022-04-25 11:09         ` Christoph Hellwig
  2022-04-25 11:16           ` Qu Wenruo
  0 siblings, 1 reply; 31+ messages in thread
From: Christoph Hellwig @ 2022-04-25 11:09 UTC (permalink / raw)
  To: Qu Wenruo
  Cc: Christoph Hellwig, Josef Bacik, David Sterba, Qu Wenruo,
	Naohiro Aota, linux-btrfs

On Mon, Apr 25, 2022 at 05:37:40PM +0800, Qu Wenruo wrote:
> Oh, please don't completely get rid of btrfs_bio, even just for writes.
>
> The btrfs_bio::iter is pretty important for us to grab the original
> logical bytenr of a bio.
> As bio::bi_iter can be modified by lower level (does dm modifies it
> too?), or btrfs itself.
>
> In fact, my incoming updated btrfs repair repair code heavily rely on
> btrfs_bio::iter, both read and write, to grab the original logical
> bytenr of the bio.

Then it's doing the wrong thing.  I actually have a series to remove
it entirely.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 03/10] btrfs: split btrfs_submit_data_bio
  2022-04-25  9:19     ` Christoph Hellwig
@ 2022-04-25  9:37       ` Qu Wenruo
  2022-04-25 11:09         ` Christoph Hellwig
  0 siblings, 1 reply; 31+ messages in thread
From: Qu Wenruo @ 2022-04-25  9:37 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Josef Bacik, David Sterba, Qu Wenruo, Naohiro Aota, linux-btrfs



On 2022/4/25 17:19, Christoph Hellwig wrote:
> On Mon, Apr 25, 2022 at 05:11:15PM +0800, Qu Wenruo wrote:
>>
>>
>> On 2022/4/25 15:54, Christoph Hellwig wrote:
>>> Split btrfs_submit_data_bio into one helper for reads and one for writes.
>>
>> If we're splitting the bio mapping, wouldn't it be better to split by
>> read/write first, then by data/meta?
>>
>> Especially for all read bios, we use workqueue to defer to a less strict
>> context, which is unrelated to data/metadata.
>
> Splitting the read vs write handling entirely and not allocating a
> btrfs_bio for writes will be the next series after this one.  You're
> getting ahead of me :)

Oh, please don't completely get rid of btrfs_bio, even just for writes.

The btrfs_bio::iter is pretty important for us to grab the original
logical bytenr of a bio.
As bio::bi_iter can be modified by lower level (does dm modifies it
too?), or btrfs itself.

In fact, my incoming updated btrfs repair repair code heavily rely on
btrfs_bio::iter, both read and write, to grab the original logical
bytenr of the bio.

Thanks,
Qu

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 03/10] btrfs: split btrfs_submit_data_bio
  2022-04-25  9:11   ` Qu Wenruo
@ 2022-04-25  9:19     ` Christoph Hellwig
  2022-04-25  9:37       ` Qu Wenruo
  0 siblings, 1 reply; 31+ messages in thread
From: Christoph Hellwig @ 2022-04-25  9:19 UTC (permalink / raw)
  To: Qu Wenruo
  Cc: Christoph Hellwig, Josef Bacik, David Sterba, Qu Wenruo,
	Naohiro Aota, linux-btrfs

On Mon, Apr 25, 2022 at 05:11:15PM +0800, Qu Wenruo wrote:
>
>
> On 2022/4/25 15:54, Christoph Hellwig wrote:
>> Split btrfs_submit_data_bio into one helper for reads and one for writes.
>
> If we're splitting the bio mapping, wouldn't it be better to split by
> read/write first, then by data/meta?
>
> Especially for all read bios, we use workqueue to defer to a less strict
> context, which is unrelated to data/metadata.

Splitting the read vs write handling entirely and not allocating a
btrfs_bio for writes will be the next series after this one.  You're
getting ahead of me :)

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 03/10] btrfs: split btrfs_submit_data_bio
  2022-04-25  7:54 ` [PATCH 03/10] btrfs: split btrfs_submit_data_bio Christoph Hellwig
@ 2022-04-25  9:11   ` Qu Wenruo
  2022-04-25  9:19     ` Christoph Hellwig
  0 siblings, 1 reply; 31+ messages in thread
From: Qu Wenruo @ 2022-04-25  9:11 UTC (permalink / raw)
  To: Christoph Hellwig, Josef Bacik, David Sterba, Qu Wenruo
  Cc: Naohiro Aota, linux-btrfs



On 2022/4/25 15:54, Christoph Hellwig wrote:
> Split btrfs_submit_data_bio into one helper for reads and one for writes.

If we're splitting the bio mapping, wouldn't it be better to split by
read/write first, then by data/meta?

Especially for all read bios, we use workqueue to defer to a less strict
context, which is unrelated to data/metadata.

Thanks,
Qu
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>   fs/btrfs/ctree.h     |   6 +-
>   fs/btrfs/extent_io.c |  12 ++--
>   fs/btrfs/inode.c     | 131 ++++++++++++++++++++-----------------------
>   3 files changed, 73 insertions(+), 76 deletions(-)
>
> diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
> index ec8487e119949..ab9a0cfed7bb0 100644
> --- a/fs/btrfs/ctree.h
> +++ b/fs/btrfs/ctree.h
> @@ -3250,8 +3250,10 @@ void btrfs_inode_safe_disk_i_size_write(struct btrfs_inode *inode, u64 new_i_siz
>   u64 btrfs_file_extent_end(const struct btrfs_path *path);
>
>   /* inode.c */
> -void btrfs_submit_data_bio(struct inode *inode, struct bio *bio,
> -			   int mirror_num, unsigned long bio_flags);
> +void btrfs_submit_data_write_bio(struct inode *inode, struct bio *bio,
> +		int mirror_num, unsigned long bio_flags);
> +void btrfs_submit_data_read_bio(struct inode *inode, struct bio *bio,
> +		int mirror_num, unsigned long bio_flags);
>   unsigned int btrfs_verify_data_csum(struct btrfs_bio *bbio,
>   				    u32 bio_offset, struct page *page,
>   				    u64 start, u64 end);
> diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
> index f9d6dd310c42b..80b4482c477c6 100644
> --- a/fs/btrfs/extent_io.c
> +++ b/fs/btrfs/extent_io.c
> @@ -186,11 +186,15 @@ static void submit_one_bio(struct bio *bio, int mirror_num, unsigned long bio_fl
>   	/* Caller should ensure the bio has at least some range added */
>   	ASSERT(bio->bi_iter.bi_size);
>
> -	if (is_data_inode(tree->private_data))
> -		btrfs_submit_data_bio(tree->private_data, bio, mirror_num,
> +	if (!is_data_inode(tree->private_data))
> +		btrfs_submit_metadata_bio(tree->private_data, bio, mirror_num);
> +	else if (btrfs_op(bio) == BTRFS_MAP_WRITE)
> +		btrfs_submit_data_write_bio(tree->private_data, bio, mirror_num,
>   					    bio_flags);
>   	else
> -		btrfs_submit_metadata_bio(tree->private_data, bio, mirror_num);
> +		btrfs_submit_data_read_bio(tree->private_data, bio, mirror_num,
> +					    bio_flags);
> +
>   	/*
>   	 * Above submission hooks will handle the error by ending the bio,
>   	 * which will do the cleanup properly.  So here we should not return
> @@ -2773,7 +2777,7 @@ static blk_status_t submit_data_read_repair(struct inode *inode,
>   		ret = btrfs_repair_one_sector(inode, failed_bio,
>   				bio_offset + offset,
>   				page, pgoff + offset, start + offset,
> -				failed_mirror, btrfs_submit_data_bio);
> +				failed_mirror, btrfs_submit_data_read_bio);
>   		if (!ret) {
>   			/*
>   			 * We have submitted the read repair, the page release
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index b188f724eff2d..4429d831793d5 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -2552,91 +2552,82 @@ static blk_status_t extract_ordered_extent(struct btrfs_inode *inode,
>   	return errno_to_blk_status(ret);
>   }
>
> -/*
> - * extent_io.c submission hook. This does the right thing for csum calculation
> - * on write, or reading the csums from the tree before a read.
> - *
> - * Rules about async/sync submit,
> - * a) read:				sync submit
> - *
> - * b) write without checksum:		sync submit
> - *
> - * c) write with checksum:
> - *    c-1) if bio is issued by fsync:	sync submit
> - *         (sync_writers != 0)
> - *
> - *    c-2) if root is reloc root:	sync submit
> - *         (only in case of buffered IO)
> - *
> - *    c-3) otherwise:			async submit
> - */
> -void btrfs_submit_data_bio(struct inode *inode, struct bio *bio,
> +void btrfs_submit_data_write_bio(struct inode *inode, struct bio *bio,
>   			   int mirror_num, unsigned long bio_flags)
>   {
>   	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
> -	struct btrfs_root *root = BTRFS_I(inode)->root;
> -	enum btrfs_wq_endio_type metadata = BTRFS_WQ_ENDIO_DATA;
> -	blk_status_t ret = 0;
> -	int skip_sum;
> -	int async = !atomic_read(&BTRFS_I(inode)->sync_writers);
> -
> -	skip_sum = (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) ||
> -		test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state);
> -
> -	if (btrfs_is_free_space_inode(BTRFS_I(inode)))
> -		metadata = BTRFS_WQ_ENDIO_FREE_SPACE;
> +	struct btrfs_inode *bi = BTRFS_I(inode);
> +	blk_status_t ret;
>
>   	if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
> -		struct page *page = bio_first_bvec_all(bio)->bv_page;
> -		loff_t file_offset = page_offset(page);
> -
> -		ret = extract_ordered_extent(BTRFS_I(inode), bio, file_offset);
> +		ret = extract_ordered_extent(bi, bio,
> +				page_offset(bio_first_bvec_all(bio)->bv_page));
>   		if (ret)
>   			goto out;
>   	}
>
> -	if (btrfs_op(bio) != BTRFS_MAP_WRITE) {
> -		ret = btrfs_bio_wq_end_io(fs_info, bio, metadata);
> -		if (ret)
> -			goto out;
> -
> -		if (bio_flags & EXTENT_BIO_COMPRESSED) {
> -			/*
> -			 * btrfs_submit_compressed_read will handle completing
> -			 * the bio if there were any errors, so just return
> -			 * here.
> -			 */
> -			btrfs_submit_compressed_read(inode, bio, mirror_num,
> -						     bio_flags);
> -			return;
> -		} else {
> -			/*
> -			 * Lookup bio sums does extra checks around whether we
> -			 * need to csum or not, which is why we ignore skip_sum
> -			 * here.
> -			 */
> -			ret = btrfs_lookup_bio_sums(inode, bio, NULL);
> +	/*
> +	 * Rules for async/sync submit:
> +	 *   a) write without checksum:			sync submit
> +	 *   b) write with checksum:
> +	 *      b-1) if bio is issued by fsync:		sync submit
> +	 *           (sync_writers != 0)
> +	 *      b-2) if root is reloc root:		sync submit
> +	 *           (only in case of buffered IO)
> +	 *      b-3) otherwise:				async submit
> +	 */
> +	if (!(bi->flags & BTRFS_INODE_NODATASUM) &&
> +	    !test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state)) {
> +		if (atomic_read(&bi->sync_writers)) {
> +			ret = btrfs_csum_one_bio(bi, bio, (u64)-1, false);
>   			if (ret)
>   				goto out;
> +		} else if (btrfs_is_data_reloc_root(bi->root)) {
> +			; /* csum items have already been cloned */
> +		} else {
> +			ret = btrfs_wq_submit_bio(inode, bio,
> +					mirror_num, bio_flags, 0,
> +					btrfs_submit_bio_start);
> +			goto out;
>   		}
> -		goto mapit;
> -	} else if (async && !skip_sum) {
> -		/* csum items have already been cloned */
> -		if (btrfs_is_data_reloc_root(root))
> -			goto mapit;
> -		/* we're doing a write, do the async checksumming */
> -		ret = btrfs_wq_submit_bio(inode, bio, mirror_num, bio_flags,
> -					  0, btrfs_submit_bio_start);
> +	}
> +	ret = btrfs_map_bio(fs_info, bio, mirror_num);
> +out:
> +	if (ret) {
> +		bio->bi_status = ret;
> +		bio_endio(bio);
> +	}
> +}
> +
> +void btrfs_submit_data_read_bio(struct inode *inode, struct bio *bio,
> +			   int mirror_num, unsigned long bio_flags)
> +{
> +	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
> +	blk_status_t ret;
> +
> +	ret = btrfs_bio_wq_end_io(fs_info, bio,
> +			btrfs_is_free_space_inode(BTRFS_I(inode)) ?
> +			BTRFS_WQ_ENDIO_FREE_SPACE : BTRFS_WQ_ENDIO_DATA);
> +	if (ret)
>   		goto out;
> -	} else if (!skip_sum) {
> -		ret = btrfs_csum_one_bio(BTRFS_I(inode), bio, (u64)-1, false);
> -		if (ret)
> -			goto out;
> +
> +	if (bio_flags & EXTENT_BIO_COMPRESSED) {
> +		/*
> +		 * btrfs_submit_compressed_read will handle completing the bio
> +		 * if there were any errors, so just return here.
> +		 */
> +		btrfs_submit_compressed_read(inode, bio, mirror_num, bio_flags);
> +		return;
>   	}
>
> -mapit:
> +	/*
> +	 * Lookup bio sums does extra checks around whether we need to csum or
> +	 * not, which is why we ignore skip_sum here.
> +	 */
> +	ret = btrfs_lookup_bio_sums(inode, bio, NULL);
> +	if (ret)
> +		goto out;
>   	ret = btrfs_map_bio(fs_info, bio, mirror_num);
> -
>   out:
>   	if (ret) {
>   		bio->bi_status = ret;
> @@ -7909,7 +7900,7 @@ static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio,
>   		goto map;
>
>   	if (write) {
> -		/* check btrfs_submit_data_bio() for async submit rules */
> +		/* check btrfs_submit_data_write_bio() for async submit rules */
>   		if (async_submit && !atomic_read(&BTRFS_I(inode)->sync_writers))
>   			return btrfs_wq_submit_bio(inode, bio, 0, 0,
>   					file_offset,

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH 03/10] btrfs: split btrfs_submit_data_bio
  2022-04-25  7:54 cleanup btrfs bio handling, part 2 Christoph Hellwig
@ 2022-04-25  7:54 ` Christoph Hellwig
  2022-04-25  9:11   ` Qu Wenruo
  0 siblings, 1 reply; 31+ messages in thread
From: Christoph Hellwig @ 2022-04-25  7:54 UTC (permalink / raw)
  To: Josef Bacik, David Sterba, Qu Wenruo; +Cc: Naohiro Aota, linux-btrfs

Split btrfs_submit_data_bio into one helper for reads and one for writes.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/btrfs/ctree.h     |   6 +-
 fs/btrfs/extent_io.c |  12 ++--
 fs/btrfs/inode.c     | 131 ++++++++++++++++++++-----------------------
 3 files changed, 73 insertions(+), 76 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index ec8487e119949..ab9a0cfed7bb0 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3250,8 +3250,10 @@ void btrfs_inode_safe_disk_i_size_write(struct btrfs_inode *inode, u64 new_i_siz
 u64 btrfs_file_extent_end(const struct btrfs_path *path);
 
 /* inode.c */
-void btrfs_submit_data_bio(struct inode *inode, struct bio *bio,
-			   int mirror_num, unsigned long bio_flags);
+void btrfs_submit_data_write_bio(struct inode *inode, struct bio *bio,
+		int mirror_num, unsigned long bio_flags);
+void btrfs_submit_data_read_bio(struct inode *inode, struct bio *bio,
+		int mirror_num, unsigned long bio_flags);
 unsigned int btrfs_verify_data_csum(struct btrfs_bio *bbio,
 				    u32 bio_offset, struct page *page,
 				    u64 start, u64 end);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index f9d6dd310c42b..80b4482c477c6 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -186,11 +186,15 @@ static void submit_one_bio(struct bio *bio, int mirror_num, unsigned long bio_fl
 	/* Caller should ensure the bio has at least some range added */
 	ASSERT(bio->bi_iter.bi_size);
 
-	if (is_data_inode(tree->private_data))
-		btrfs_submit_data_bio(tree->private_data, bio, mirror_num,
+	if (!is_data_inode(tree->private_data))
+		btrfs_submit_metadata_bio(tree->private_data, bio, mirror_num);
+	else if (btrfs_op(bio) == BTRFS_MAP_WRITE)
+		btrfs_submit_data_write_bio(tree->private_data, bio, mirror_num,
 					    bio_flags);
 	else
-		btrfs_submit_metadata_bio(tree->private_data, bio, mirror_num);
+		btrfs_submit_data_read_bio(tree->private_data, bio, mirror_num,
+					    bio_flags);
+
 	/*
 	 * Above submission hooks will handle the error by ending the bio,
 	 * which will do the cleanup properly.  So here we should not return
@@ -2773,7 +2777,7 @@ static blk_status_t submit_data_read_repair(struct inode *inode,
 		ret = btrfs_repair_one_sector(inode, failed_bio,
 				bio_offset + offset,
 				page, pgoff + offset, start + offset,
-				failed_mirror, btrfs_submit_data_bio);
+				failed_mirror, btrfs_submit_data_read_bio);
 		if (!ret) {
 			/*
 			 * We have submitted the read repair, the page release
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index b188f724eff2d..4429d831793d5 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2552,91 +2552,82 @@ static blk_status_t extract_ordered_extent(struct btrfs_inode *inode,
 	return errno_to_blk_status(ret);
 }
 
-/*
- * extent_io.c submission hook. This does the right thing for csum calculation
- * on write, or reading the csums from the tree before a read.
- *
- * Rules about async/sync submit,
- * a) read:				sync submit
- *
- * b) write without checksum:		sync submit
- *
- * c) write with checksum:
- *    c-1) if bio is issued by fsync:	sync submit
- *         (sync_writers != 0)
- *
- *    c-2) if root is reloc root:	sync submit
- *         (only in case of buffered IO)
- *
- *    c-3) otherwise:			async submit
- */
-void btrfs_submit_data_bio(struct inode *inode, struct bio *bio,
+void btrfs_submit_data_write_bio(struct inode *inode, struct bio *bio,
 			   int mirror_num, unsigned long bio_flags)
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-	struct btrfs_root *root = BTRFS_I(inode)->root;
-	enum btrfs_wq_endio_type metadata = BTRFS_WQ_ENDIO_DATA;
-	blk_status_t ret = 0;
-	int skip_sum;
-	int async = !atomic_read(&BTRFS_I(inode)->sync_writers);
-
-	skip_sum = (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) ||
-		test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state);
-
-	if (btrfs_is_free_space_inode(BTRFS_I(inode)))
-		metadata = BTRFS_WQ_ENDIO_FREE_SPACE;
+	struct btrfs_inode *bi = BTRFS_I(inode);
+	blk_status_t ret;
 
 	if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
-		struct page *page = bio_first_bvec_all(bio)->bv_page;
-		loff_t file_offset = page_offset(page);
-
-		ret = extract_ordered_extent(BTRFS_I(inode), bio, file_offset);
+		ret = extract_ordered_extent(bi, bio,
+				page_offset(bio_first_bvec_all(bio)->bv_page));
 		if (ret)
 			goto out;
 	}
 
-	if (btrfs_op(bio) != BTRFS_MAP_WRITE) {
-		ret = btrfs_bio_wq_end_io(fs_info, bio, metadata);
-		if (ret)
-			goto out;
-
-		if (bio_flags & EXTENT_BIO_COMPRESSED) {
-			/*
-			 * btrfs_submit_compressed_read will handle completing
-			 * the bio if there were any errors, so just return
-			 * here.
-			 */
-			btrfs_submit_compressed_read(inode, bio, mirror_num,
-						     bio_flags);
-			return;
-		} else {
-			/*
-			 * Lookup bio sums does extra checks around whether we
-			 * need to csum or not, which is why we ignore skip_sum
-			 * here.
-			 */
-			ret = btrfs_lookup_bio_sums(inode, bio, NULL);
+	/*
+	 * Rules for async/sync submit:
+	 *   a) write without checksum:			sync submit
+	 *   b) write with checksum:
+	 *      b-1) if bio is issued by fsync:		sync submit
+	 *           (sync_writers != 0)
+	 *      b-2) if root is reloc root:		sync submit
+	 *           (only in case of buffered IO)
+	 *      b-3) otherwise:				async submit
+	 */
+	if (!(bi->flags & BTRFS_INODE_NODATASUM) &&
+	    !test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state)) {
+		if (atomic_read(&bi->sync_writers)) {
+			ret = btrfs_csum_one_bio(bi, bio, (u64)-1, false);
 			if (ret)
 				goto out;
+		} else if (btrfs_is_data_reloc_root(bi->root)) {
+			; /* csum items have already been cloned */
+		} else {
+			ret = btrfs_wq_submit_bio(inode, bio,
+					mirror_num, bio_flags, 0,
+					btrfs_submit_bio_start);
+			goto out;
 		}
-		goto mapit;
-	} else if (async && !skip_sum) {
-		/* csum items have already been cloned */
-		if (btrfs_is_data_reloc_root(root))
-			goto mapit;
-		/* we're doing a write, do the async checksumming */
-		ret = btrfs_wq_submit_bio(inode, bio, mirror_num, bio_flags,
-					  0, btrfs_submit_bio_start);
+	}
+	ret = btrfs_map_bio(fs_info, bio, mirror_num);
+out:
+	if (ret) {
+		bio->bi_status = ret;
+		bio_endio(bio);
+	}
+}
+
+void btrfs_submit_data_read_bio(struct inode *inode, struct bio *bio,
+			   int mirror_num, unsigned long bio_flags)
+{
+	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+	blk_status_t ret;
+
+	ret = btrfs_bio_wq_end_io(fs_info, bio,
+			btrfs_is_free_space_inode(BTRFS_I(inode)) ?
+			BTRFS_WQ_ENDIO_FREE_SPACE : BTRFS_WQ_ENDIO_DATA);
+	if (ret)
 		goto out;
-	} else if (!skip_sum) {
-		ret = btrfs_csum_one_bio(BTRFS_I(inode), bio, (u64)-1, false);
-		if (ret)
-			goto out;
+
+	if (bio_flags & EXTENT_BIO_COMPRESSED) {
+		/*
+		 * btrfs_submit_compressed_read will handle completing the bio
+		 * if there were any errors, so just return here.
+		 */
+		btrfs_submit_compressed_read(inode, bio, mirror_num, bio_flags);
+		return;
 	}
 
-mapit:
+	/*
+	 * Lookup bio sums does extra checks around whether we need to csum or
+	 * not, which is why we ignore skip_sum here.
+	 */
+	ret = btrfs_lookup_bio_sums(inode, bio, NULL);
+	if (ret)
+		goto out;
 	ret = btrfs_map_bio(fs_info, bio, mirror_num);
-
 out:
 	if (ret) {
 		bio->bi_status = ret;
@@ -7909,7 +7900,7 @@ static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio,
 		goto map;
 
 	if (write) {
-		/* check btrfs_submit_data_bio() for async submit rules */
+		/* check btrfs_submit_data_write_bio() for async submit rules */
 		if (async_submit && !atomic_read(&BTRFS_I(inode)->sync_writers))
 			return btrfs_wq_submit_bio(inode, bio, 0, 0,
 					file_offset,
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 31+ messages in thread

end of thread, other threads:[~2022-06-03 16:51 UTC | newest]

Thread overview: 31+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-05-26  7:36 cleanup btrfs bio handling, part 2 v4 Christoph Hellwig
2022-05-26  7:36 ` [PATCH 01/10] btrfs: move more work into btrfs_end_bioc Christoph Hellwig
2022-05-26  7:36 ` [PATCH 02/10] btrfs: cleanup btrfs_submit_dio_bio Christoph Hellwig
2022-06-01 19:32   ` David Sterba
2022-05-26  7:36 ` [PATCH 03/10] btrfs: split btrfs_submit_data_bio Christoph Hellwig
2022-05-26  7:36 ` [PATCH 04/10] btrfs: defer I/O completion based on the btrfs_raid_bio Christoph Hellwig
2022-05-26  7:36 ` [PATCH 05/10] btrfs: don't double-defer bio completions for compressed reads Christoph Hellwig
2022-05-26  7:36 ` [PATCH 06/10] btrfs: don't use btrfs_bio_wq_end_io for compressed writes Christoph Hellwig
2022-06-01 19:33   ` David Sterba
2022-05-26  7:36 ` [PATCH 07/10] btrfs: centralize setting REQ_META Christoph Hellwig
2022-05-26  7:36 ` [PATCH 08/10] btrfs: remove btrfs_end_io_wq Christoph Hellwig
2022-05-26  7:36 ` [PATCH 09/10] btrfs: refactor btrfs_map_bio Christoph Hellwig
2022-06-01 19:36   ` David Sterba
2022-05-26  7:36 ` [PATCH 10/10] btrfs: do not allocate a btrfs_bio for low-level bios Christoph Hellwig
2022-06-01 19:39   ` David Sterba
2022-06-01 19:29 ` cleanup btrfs bio handling, part 2 v4 David Sterba
2022-06-03 16:47   ` David Sterba
  -- strict thread matches above, loose matches on Subject: below --
2022-04-29 14:30 cleanup btrfs bio handling, part 2 v2 Christoph Hellwig
2022-04-29 14:30 ` [PATCH 03/10] btrfs: split btrfs_submit_data_bio Christoph Hellwig
2022-04-25  7:54 cleanup btrfs bio handling, part 2 Christoph Hellwig
2022-04-25  7:54 ` [PATCH 03/10] btrfs: split btrfs_submit_data_bio Christoph Hellwig
2022-04-25  9:11   ` Qu Wenruo
2022-04-25  9:19     ` Christoph Hellwig
2022-04-25  9:37       ` Qu Wenruo
2022-04-25 11:09         ` Christoph Hellwig
2022-04-25 11:16           ` Qu Wenruo
2022-04-25 11:19             ` Christoph Hellwig
2022-04-25 11:31               ` Qu Wenruo
2022-04-25 11:34                 ` Christoph Hellwig
2022-04-25 11:40                   ` Qu Wenruo
2022-04-25 11:43                     ` Qu Wenruo
2022-04-25 17:17                     ` Christoph Hellwig
2022-04-26  1:24                       ` Qu Wenruo

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.