All of lore.kernel.org
 help / color / mirror / Atom feed
* cleanup btrfs bio handling, part 2
@ 2022-04-25  7:54 Christoph Hellwig
  2022-04-25  7:54 ` [PATCH 01/10] btrfs: move more work into btrfs_end_bioc Christoph Hellwig
                   ` (9 more replies)
  0 siblings, 10 replies; 32+ messages in thread
From: Christoph Hellwig @ 2022-04-25  7:54 UTC (permalink / raw)
  To: Josef Bacik, David Sterba, Qu Wenruo; +Cc: Naohiro Aota, linux-btrfs

Hi all,

this series removes the need to allocate a separate object for I/O
completions for all read and some write I/Os, and reduced the memory
usage of the low-level bios cloned by btrfs_map_bio by using plain bios
instead of the much larger btrfs_bio.

Note that this series only applies to the for-next tree, as the misc-next
tree seems to miss the workqueue cleanup series.

Diffstat:
 compression.c |   41 +++++------
 compression.h |    7 +-
 ctree.h       |   14 ++--
 disk-io.c     |  144 +++--------------------------------------
 disk-io.h     |   11 ---
 extent_io.c   |   33 +++------
 extent_io.h   |    1 
 inode.c       |  162 ++++++++++++++++++----------------------------
 raid56.c      |  111 ++++++++++++-------------------
 super.c       |   13 ---
 volumes.c     |  203 ++++++++++++++++++++++++++++------------------------------
 volumes.h     |    8 ++
 12 files changed, 271 insertions(+), 477 deletions(-)

^ permalink raw reply	[flat|nested] 32+ messages in thread

* [PATCH 01/10] btrfs: move more work into btrfs_end_bioc
  2022-04-25  7:54 cleanup btrfs bio handling, part 2 Christoph Hellwig
@ 2022-04-25  7:54 ` Christoph Hellwig
  2022-04-26  7:19   ` Johannes Thumshirn
  2022-04-25  7:54 ` [PATCH 02/10] btrfs: cleanup btrfs_submit_dio_bio Christoph Hellwig
                   ` (8 subsequent siblings)
  9 siblings, 1 reply; 32+ messages in thread
From: Christoph Hellwig @ 2022-04-25  7:54 UTC (permalink / raw)
  To: Josef Bacik, David Sterba, Qu Wenruo; +Cc: Naohiro Aota, linux-btrfs

Assign ->mirror_num and ->bi_status in btrfs_end_bioc instead of
duplicating the logic in the callers.  Also remove the bio argument as
it always must be bioc->orig_bio and the now pointless bioc_error that
did nothing but assign bi_sector to the same value just sampled in the
caller.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Qu Wenruo <wqu@suse.com>
---
 fs/btrfs/volumes.c | 72 ++++++++++++++--------------------------------
 1 file changed, 22 insertions(+), 50 deletions(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 748614b00ffa2..aeacb87457687 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6621,19 +6621,29 @@ int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
 	return __btrfs_map_block(fs_info, op, logical, length, bioc_ret, 0, 1);
 }
 
-static inline void btrfs_end_bioc(struct btrfs_io_context *bioc, struct bio *bio)
+static inline void btrfs_end_bioc(struct btrfs_io_context *bioc)
 {
-	bio->bi_private = bioc->private;
-	bio->bi_end_io = bioc->end_io;
-	bio_endio(bio);
+	struct bio *orig_bio = bioc->orig_bio;
 
+	btrfs_bio(orig_bio)->mirror_num = bioc->mirror_num;
+	orig_bio->bi_private = bioc->private;
+	orig_bio->bi_end_io = bioc->end_io;
+
+	/*
+	 * Only send an error to the higher layers if it is beyond the tolerance
+	 * threshold.
+	 */
+	if (atomic_read(&bioc->error) > bioc->max_errors)
+		orig_bio->bi_status = BLK_STS_IOERR;
+	else
+		orig_bio->bi_status = BLK_STS_OK;
+	bio_endio(orig_bio);
 	btrfs_put_bioc(bioc);
 }
 
 static void btrfs_end_bio(struct bio *bio)
 {
 	struct btrfs_io_context *bioc = bio->bi_private;
-	int is_orig_bio = 0;
 
 	if (bio->bi_status) {
 		atomic_inc(&bioc->error);
@@ -6654,35 +6664,12 @@ static void btrfs_end_bio(struct bio *bio)
 		}
 	}
 
-	if (bio == bioc->orig_bio)
-		is_orig_bio = 1;
+	if (bio != bioc->orig_bio)
+		bio_put(bio);
 
 	btrfs_bio_counter_dec(bioc->fs_info);
-
-	if (atomic_dec_and_test(&bioc->stripes_pending)) {
-		if (!is_orig_bio) {
-			bio_put(bio);
-			bio = bioc->orig_bio;
-		}
-
-		btrfs_bio(bio)->mirror_num = bioc->mirror_num;
-		/* only send an error to the higher layers if it is
-		 * beyond the tolerance of the btrfs bio
-		 */
-		if (atomic_read(&bioc->error) > bioc->max_errors) {
-			bio->bi_status = BLK_STS_IOERR;
-		} else {
-			/*
-			 * this bio is actually up to date, we didn't
-			 * go over the max number of errors
-			 */
-			bio->bi_status = BLK_STS_OK;
-		}
-
-		btrfs_end_bioc(bioc, bio);
-	} else if (!is_orig_bio) {
-		bio_put(bio);
-	}
+	if (atomic_dec_and_test(&bioc->stripes_pending))
+		btrfs_end_bioc(bioc);
 }
 
 static void submit_stripe_bio(struct btrfs_io_context *bioc, struct bio *bio,
@@ -6720,23 +6707,6 @@ static void submit_stripe_bio(struct btrfs_io_context *bioc, struct bio *bio,
 	submit_bio(bio);
 }
 
-static void bioc_error(struct btrfs_io_context *bioc, struct bio *bio, u64 logical)
-{
-	atomic_inc(&bioc->error);
-	if (atomic_dec_and_test(&bioc->stripes_pending)) {
-		/* Should be the original bio. */
-		WARN_ON(bio != bioc->orig_bio);
-
-		btrfs_bio(bio)->mirror_num = bioc->mirror_num;
-		bio->bi_iter.bi_sector = logical >> 9;
-		if (atomic_read(&bioc->error) > bioc->max_errors)
-			bio->bi_status = BLK_STS_IOERR;
-		else
-			bio->bi_status = BLK_STS_OK;
-		btrfs_end_bioc(bioc, bio);
-	}
-}
-
 blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
 			   int mirror_num)
 {
@@ -6795,7 +6765,9 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
 						   &dev->dev_state) ||
 		    (btrfs_op(first_bio) == BTRFS_MAP_WRITE &&
 		    !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))) {
-			bioc_error(bioc, first_bio, logical);
+			atomic_inc(&bioc->error);
+			if (atomic_dec_and_test(&bioc->stripes_pending))
+				btrfs_end_bioc(bioc);
 			continue;
 		}
 
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 02/10] btrfs: cleanup btrfs_submit_dio_bio
  2022-04-25  7:54 cleanup btrfs bio handling, part 2 Christoph Hellwig
  2022-04-25  7:54 ` [PATCH 01/10] btrfs: move more work into btrfs_end_bioc Christoph Hellwig
@ 2022-04-25  7:54 ` Christoph Hellwig
  2022-04-25  8:45   ` Qu Wenruo
  2022-04-26  7:21   ` Johannes Thumshirn
  2022-04-25  7:54 ` [PATCH 03/10] btrfs: split btrfs_submit_data_bio Christoph Hellwig
                   ` (7 subsequent siblings)
  9 siblings, 2 replies; 32+ messages in thread
From: Christoph Hellwig @ 2022-04-25  7:54 UTC (permalink / raw)
  To: Josef Bacik, David Sterba, Qu Wenruo; +Cc: Naohiro Aota, linux-btrfs

Remove the pointless goto just to return err and clean up the code flow
to be a little more straight forward.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/btrfs/inode.c | 23 +++++++++--------------
 1 file changed, 9 insertions(+), 14 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index ef3bee1cbc6db..b188f724eff2d 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7899,31 +7899,28 @@ static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio,
 	bool write = btrfs_op(bio) == BTRFS_MAP_WRITE;
 	blk_status_t ret;
 
-	/* Check btrfs_submit_bio_hook() for rules about async submit. */
-	if (async_submit)
-		async_submit = !atomic_read(&BTRFS_I(inode)->sync_writers);
-
 	if (!write) {
 		ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA);
 		if (ret)
-			goto err;
+			return ret;
 	}
 
 	if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
 		goto map;
 
-	if (write && async_submit) {
-		ret = btrfs_wq_submit_bio(inode, bio, 0, 0, file_offset,
-					  btrfs_submit_bio_start_direct_io);
-		goto err;
-	} else if (write) {
+	if (write) {
+		/* check btrfs_submit_data_bio() for async submit rules */
+		if (async_submit && !atomic_read(&BTRFS_I(inode)->sync_writers))
+			return btrfs_wq_submit_bio(inode, bio, 0, 0,
+					file_offset,
+					btrfs_submit_bio_start_direct_io);
 		/*
 		 * If we aren't doing async submit, calculate the csum of the
 		 * bio now.
 		 */
 		ret = btrfs_csum_one_bio(BTRFS_I(inode), bio, file_offset, false);
 		if (ret)
-			goto err;
+			return ret;
 	} else {
 		u64 csum_offset;
 
@@ -7933,9 +7930,7 @@ static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio,
 		btrfs_bio(bio)->csum = dip->csums + csum_offset;
 	}
 map:
-	ret = btrfs_map_bio(fs_info, bio, 0);
-err:
-	return ret;
+	return btrfs_map_bio(fs_info, bio, 0);
 }
 
 /*
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 03/10] btrfs: split btrfs_submit_data_bio
  2022-04-25  7:54 cleanup btrfs bio handling, part 2 Christoph Hellwig
  2022-04-25  7:54 ` [PATCH 01/10] btrfs: move more work into btrfs_end_bioc Christoph Hellwig
  2022-04-25  7:54 ` [PATCH 02/10] btrfs: cleanup btrfs_submit_dio_bio Christoph Hellwig
@ 2022-04-25  7:54 ` Christoph Hellwig
  2022-04-25  9:11   ` Qu Wenruo
  2022-04-25  7:54 ` [PATCH 04/10] btrfs: don't double-defer bio completions for compressed reads Christoph Hellwig
                   ` (6 subsequent siblings)
  9 siblings, 1 reply; 32+ messages in thread
From: Christoph Hellwig @ 2022-04-25  7:54 UTC (permalink / raw)
  To: Josef Bacik, David Sterba, Qu Wenruo; +Cc: Naohiro Aota, linux-btrfs

Split btrfs_submit_data_bio into one helper for reads and one for writes.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/btrfs/ctree.h     |   6 +-
 fs/btrfs/extent_io.c |  12 ++--
 fs/btrfs/inode.c     | 131 ++++++++++++++++++++-----------------------
 3 files changed, 73 insertions(+), 76 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index ec8487e119949..ab9a0cfed7bb0 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3250,8 +3250,10 @@ void btrfs_inode_safe_disk_i_size_write(struct btrfs_inode *inode, u64 new_i_siz
 u64 btrfs_file_extent_end(const struct btrfs_path *path);
 
 /* inode.c */
-void btrfs_submit_data_bio(struct inode *inode, struct bio *bio,
-			   int mirror_num, unsigned long bio_flags);
+void btrfs_submit_data_write_bio(struct inode *inode, struct bio *bio,
+		int mirror_num, unsigned long bio_flags);
+void btrfs_submit_data_read_bio(struct inode *inode, struct bio *bio,
+		int mirror_num, unsigned long bio_flags);
 unsigned int btrfs_verify_data_csum(struct btrfs_bio *bbio,
 				    u32 bio_offset, struct page *page,
 				    u64 start, u64 end);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index f9d6dd310c42b..80b4482c477c6 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -186,11 +186,15 @@ static void submit_one_bio(struct bio *bio, int mirror_num, unsigned long bio_fl
 	/* Caller should ensure the bio has at least some range added */
 	ASSERT(bio->bi_iter.bi_size);
 
-	if (is_data_inode(tree->private_data))
-		btrfs_submit_data_bio(tree->private_data, bio, mirror_num,
+	if (!is_data_inode(tree->private_data))
+		btrfs_submit_metadata_bio(tree->private_data, bio, mirror_num);
+	else if (btrfs_op(bio) == BTRFS_MAP_WRITE)
+		btrfs_submit_data_write_bio(tree->private_data, bio, mirror_num,
 					    bio_flags);
 	else
-		btrfs_submit_metadata_bio(tree->private_data, bio, mirror_num);
+		btrfs_submit_data_read_bio(tree->private_data, bio, mirror_num,
+					    bio_flags);
+
 	/*
 	 * Above submission hooks will handle the error by ending the bio,
 	 * which will do the cleanup properly.  So here we should not return
@@ -2773,7 +2777,7 @@ static blk_status_t submit_data_read_repair(struct inode *inode,
 		ret = btrfs_repair_one_sector(inode, failed_bio,
 				bio_offset + offset,
 				page, pgoff + offset, start + offset,
-				failed_mirror, btrfs_submit_data_bio);
+				failed_mirror, btrfs_submit_data_read_bio);
 		if (!ret) {
 			/*
 			 * We have submitted the read repair, the page release
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index b188f724eff2d..4429d831793d5 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2552,91 +2552,82 @@ static blk_status_t extract_ordered_extent(struct btrfs_inode *inode,
 	return errno_to_blk_status(ret);
 }
 
-/*
- * extent_io.c submission hook. This does the right thing for csum calculation
- * on write, or reading the csums from the tree before a read.
- *
- * Rules about async/sync submit,
- * a) read:				sync submit
- *
- * b) write without checksum:		sync submit
- *
- * c) write with checksum:
- *    c-1) if bio is issued by fsync:	sync submit
- *         (sync_writers != 0)
- *
- *    c-2) if root is reloc root:	sync submit
- *         (only in case of buffered IO)
- *
- *    c-3) otherwise:			async submit
- */
-void btrfs_submit_data_bio(struct inode *inode, struct bio *bio,
+void btrfs_submit_data_write_bio(struct inode *inode, struct bio *bio,
 			   int mirror_num, unsigned long bio_flags)
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-	struct btrfs_root *root = BTRFS_I(inode)->root;
-	enum btrfs_wq_endio_type metadata = BTRFS_WQ_ENDIO_DATA;
-	blk_status_t ret = 0;
-	int skip_sum;
-	int async = !atomic_read(&BTRFS_I(inode)->sync_writers);
-
-	skip_sum = (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) ||
-		test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state);
-
-	if (btrfs_is_free_space_inode(BTRFS_I(inode)))
-		metadata = BTRFS_WQ_ENDIO_FREE_SPACE;
+	struct btrfs_inode *bi = BTRFS_I(inode);
+	blk_status_t ret;
 
 	if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
-		struct page *page = bio_first_bvec_all(bio)->bv_page;
-		loff_t file_offset = page_offset(page);
-
-		ret = extract_ordered_extent(BTRFS_I(inode), bio, file_offset);
+		ret = extract_ordered_extent(bi, bio,
+				page_offset(bio_first_bvec_all(bio)->bv_page));
 		if (ret)
 			goto out;
 	}
 
-	if (btrfs_op(bio) != BTRFS_MAP_WRITE) {
-		ret = btrfs_bio_wq_end_io(fs_info, bio, metadata);
-		if (ret)
-			goto out;
-
-		if (bio_flags & EXTENT_BIO_COMPRESSED) {
-			/*
-			 * btrfs_submit_compressed_read will handle completing
-			 * the bio if there were any errors, so just return
-			 * here.
-			 */
-			btrfs_submit_compressed_read(inode, bio, mirror_num,
-						     bio_flags);
-			return;
-		} else {
-			/*
-			 * Lookup bio sums does extra checks around whether we
-			 * need to csum or not, which is why we ignore skip_sum
-			 * here.
-			 */
-			ret = btrfs_lookup_bio_sums(inode, bio, NULL);
+	/*
+	 * Rules for async/sync submit:
+	 *   a) write without checksum:			sync submit
+	 *   b) write with checksum:
+	 *      b-1) if bio is issued by fsync:		sync submit
+	 *           (sync_writers != 0)
+	 *      b-2) if root is reloc root:		sync submit
+	 *           (only in case of buffered IO)
+	 *      b-3) otherwise:				async submit
+	 */
+	if (!(bi->flags & BTRFS_INODE_NODATASUM) &&
+	    !test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state)) {
+		if (atomic_read(&bi->sync_writers)) {
+			ret = btrfs_csum_one_bio(bi, bio, (u64)-1, false);
 			if (ret)
 				goto out;
+		} else if (btrfs_is_data_reloc_root(bi->root)) {
+			; /* csum items have already been cloned */
+		} else {
+			ret = btrfs_wq_submit_bio(inode, bio,
+					mirror_num, bio_flags, 0,
+					btrfs_submit_bio_start);
+			goto out;
 		}
-		goto mapit;
-	} else if (async && !skip_sum) {
-		/* csum items have already been cloned */
-		if (btrfs_is_data_reloc_root(root))
-			goto mapit;
-		/* we're doing a write, do the async checksumming */
-		ret = btrfs_wq_submit_bio(inode, bio, mirror_num, bio_flags,
-					  0, btrfs_submit_bio_start);
+	}
+	ret = btrfs_map_bio(fs_info, bio, mirror_num);
+out:
+	if (ret) {
+		bio->bi_status = ret;
+		bio_endio(bio);
+	}
+}
+
+void btrfs_submit_data_read_bio(struct inode *inode, struct bio *bio,
+			   int mirror_num, unsigned long bio_flags)
+{
+	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+	blk_status_t ret;
+
+	ret = btrfs_bio_wq_end_io(fs_info, bio,
+			btrfs_is_free_space_inode(BTRFS_I(inode)) ?
+			BTRFS_WQ_ENDIO_FREE_SPACE : BTRFS_WQ_ENDIO_DATA);
+	if (ret)
 		goto out;
-	} else if (!skip_sum) {
-		ret = btrfs_csum_one_bio(BTRFS_I(inode), bio, (u64)-1, false);
-		if (ret)
-			goto out;
+
+	if (bio_flags & EXTENT_BIO_COMPRESSED) {
+		/*
+		 * btrfs_submit_compressed_read will handle completing the bio
+		 * if there were any errors, so just return here.
+		 */
+		btrfs_submit_compressed_read(inode, bio, mirror_num, bio_flags);
+		return;
 	}
 
-mapit:
+	/*
+	 * Lookup bio sums does extra checks around whether we need to csum or
+	 * not, which is why we ignore skip_sum here.
+	 */
+	ret = btrfs_lookup_bio_sums(inode, bio, NULL);
+	if (ret)
+		goto out;
 	ret = btrfs_map_bio(fs_info, bio, mirror_num);
-
 out:
 	if (ret) {
 		bio->bi_status = ret;
@@ -7909,7 +7900,7 @@ static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio,
 		goto map;
 
 	if (write) {
-		/* check btrfs_submit_data_bio() for async submit rules */
+		/* check btrfs_submit_data_write_bio() for async submit rules */
 		if (async_submit && !atomic_read(&BTRFS_I(inode)->sync_writers))
 			return btrfs_wq_submit_bio(inode, bio, 0, 0,
 					file_offset,
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 04/10] btrfs: don't double-defer bio completions for compressed reads
  2022-04-25  7:54 cleanup btrfs bio handling, part 2 Christoph Hellwig
                   ` (2 preceding siblings ...)
  2022-04-25  7:54 ` [PATCH 03/10] btrfs: split btrfs_submit_data_bio Christoph Hellwig
@ 2022-04-25  7:54 ` Christoph Hellwig
  2022-04-25  7:54 ` [PATCH 05/10] btrfs: defer I/O completion based on the btrfs_raid_bio Christoph Hellwig
                   ` (5 subsequent siblings)
  9 siblings, 0 replies; 32+ messages in thread
From: Christoph Hellwig @ 2022-04-25  7:54 UTC (permalink / raw)
  To: Josef Bacik, David Sterba, Qu Wenruo; +Cc: Naohiro Aota, linux-btrfs

The bio completion handler of the bio used for the compressed data is
already run in a workqueue using btrfs_bio_wq_end_io, so don't schedule
the completion of the original bio to the same workqueue again but just
execute it directly.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/btrfs/inode.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 4429d831793d5..d7b04e06da825 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2605,12 +2605,6 @@ void btrfs_submit_data_read_bio(struct inode *inode, struct bio *bio,
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	blk_status_t ret;
 
-	ret = btrfs_bio_wq_end_io(fs_info, bio,
-			btrfs_is_free_space_inode(BTRFS_I(inode)) ?
-			BTRFS_WQ_ENDIO_FREE_SPACE : BTRFS_WQ_ENDIO_DATA);
-	if (ret)
-		goto out;
-
 	if (bio_flags & EXTENT_BIO_COMPRESSED) {
 		/*
 		 * btrfs_submit_compressed_read will handle completing the bio
@@ -2620,6 +2614,12 @@ void btrfs_submit_data_read_bio(struct inode *inode, struct bio *bio,
 		return;
 	}
 
+	ret = btrfs_bio_wq_end_io(fs_info, bio,
+			btrfs_is_free_space_inode(BTRFS_I(inode)) ?
+			BTRFS_WQ_ENDIO_FREE_SPACE : BTRFS_WQ_ENDIO_DATA);
+	if (ret)
+		goto out;
+
 	/*
 	 * Lookup bio sums does extra checks around whether we need to csum or
 	 * not, which is why we ignore skip_sum here.
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 05/10] btrfs: defer I/O completion based on the btrfs_raid_bio
  2022-04-25  7:54 cleanup btrfs bio handling, part 2 Christoph Hellwig
                   ` (3 preceding siblings ...)
  2022-04-25  7:54 ` [PATCH 04/10] btrfs: don't double-defer bio completions for compressed reads Christoph Hellwig
@ 2022-04-25  7:54 ` Christoph Hellwig
  2022-04-25  7:54 ` [PATCH 06/10] btrfs: don't use btrfs_bio_wq_end_io for compressed writes Christoph Hellwig
                   ` (4 subsequent siblings)
  9 siblings, 0 replies; 32+ messages in thread
From: Christoph Hellwig @ 2022-04-25  7:54 UTC (permalink / raw)
  To: Josef Bacik, David Sterba, Qu Wenruo; +Cc: Naohiro Aota, linux-btrfs

Instead of attaching a an extra allocation an indirect call to each
low-level bio issued by the RAID code, add a work_struct to struct
btrfs_raid_bio and only defer the per-rbio completion action.  The
per-bio action for all the I/Os are trivial and can be safely done
from interrupt context.

As a nice side effect this also allows sharing the boilerplate code
for the per-bio completions

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/btrfs/ctree.h   |   2 +-
 fs/btrfs/disk-io.c |  11 ++---
 fs/btrfs/disk-io.h |   1 -
 fs/btrfs/raid56.c  | 111 ++++++++++++++++++---------------------------
 4 files changed, 48 insertions(+), 77 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index ab9a0cfed7bb0..a76291e4594f2 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -852,7 +852,7 @@ struct btrfs_fs_info {
 	struct btrfs_workqueue *flush_workers;
 	struct btrfs_workqueue *endio_workers;
 	struct btrfs_workqueue *endio_meta_workers;
-	struct btrfs_workqueue *endio_raid56_workers;
+	struct workqueue_struct *endio_raid56_workers;
 	struct workqueue_struct *rmw_workers;
 	struct btrfs_workqueue *endio_meta_write_workers;
 	struct btrfs_workqueue *endio_write_workers;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index c5dba8a39e952..cb8fe234fbc0c 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -754,14 +754,10 @@ static void end_workqueue_bio(struct bio *bio)
 			wq = fs_info->endio_meta_write_workers;
 		else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE)
 			wq = fs_info->endio_freespace_worker;
-		else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56)
-			wq = fs_info->endio_raid56_workers;
 		else
 			wq = fs_info->endio_write_workers;
 	} else {
-		if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56)
-			wq = fs_info->endio_raid56_workers;
-		else if (end_io_wq->metadata)
+		if (end_io_wq->metadata)
 			wq = fs_info->endio_meta_workers;
 		else
 			wq = fs_info->endio_workers;
@@ -2282,7 +2278,7 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
 	btrfs_destroy_workqueue(fs_info->hipri_workers);
 	btrfs_destroy_workqueue(fs_info->workers);
 	btrfs_destroy_workqueue(fs_info->endio_workers);
-	btrfs_destroy_workqueue(fs_info->endio_raid56_workers);
+	destroy_workqueue(fs_info->endio_raid56_workers);
 	destroy_workqueue(fs_info->rmw_workers);
 	btrfs_destroy_workqueue(fs_info->endio_write_workers);
 	btrfs_destroy_workqueue(fs_info->endio_freespace_worker);
@@ -2490,8 +2486,7 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info)
 		btrfs_alloc_workqueue(fs_info, "endio-meta-write", flags,
 				      max_active, 2);
 	fs_info->endio_raid56_workers =
-		btrfs_alloc_workqueue(fs_info, "endio-raid56", flags,
-				      max_active, 4);
+		alloc_workqueue("btrfs-endio-raid56", flags, max_active);
 	fs_info->rmw_workers = alloc_workqueue("btrfs-rmw", flags, max_active);
 	fs_info->endio_write_workers =
 		btrfs_alloc_workqueue(fs_info, "endio-write", flags,
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 9340e3266e0ac..97255e3d7e524 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -21,7 +21,6 @@ enum btrfs_wq_endio_type {
 	BTRFS_WQ_ENDIO_DATA,
 	BTRFS_WQ_ENDIO_METADATA,
 	BTRFS_WQ_ENDIO_FREE_SPACE,
-	BTRFS_WQ_ENDIO_RAID56,
 };
 
 static inline u64 btrfs_sb_offset(int mirror)
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index a5b623ee6facd..1a3c1a9b10d0b 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -164,6 +164,9 @@ struct btrfs_raid_bio {
 	atomic_t stripes_pending;
 
 	atomic_t error;
+
+	struct work_struct end_io_work;
+
 	/*
 	 * these are two arrays of pointers.  We allocate the
 	 * rbio big enough to hold them both and setup their
@@ -1552,15 +1555,7 @@ static void set_bio_pages_uptodate(struct btrfs_raid_bio *rbio, struct bio *bio)
 	}
 }
 
-/*
- * end io for the read phase of the rmw cycle.  All the bios here are physical
- * stripe bios we've read from the disk so we can recalculate the parity of the
- * stripe.
- *
- * This will usually kick off finish_rmw once all the bios are read in, but it
- * may trigger parity reconstruction if we had any errors along the way
- */
-static void raid_rmw_end_io(struct bio *bio)
+static void raid56_bio_end_io(struct bio *bio)
 {
 	struct btrfs_raid_bio *rbio = bio->bi_private;
 
@@ -1571,23 +1566,34 @@ static void raid_rmw_end_io(struct bio *bio)
 
 	bio_put(bio);
 
-	if (!atomic_dec_and_test(&rbio->stripes_pending))
-		return;
+	if (atomic_dec_and_test(&rbio->stripes_pending))
+		queue_work(rbio->bioc->fs_info->endio_raid56_workers,
+			   &rbio->end_io_work);
+}
 
-	if (atomic_read(&rbio->error) > rbio->bioc->max_errors)
-		goto cleanup;
+/*
+ * End io handler for the read phase of the rmw cycle.  All the bios here are
+ * physical stripe bios we've read from the disk so we can recalculate the
+ * parity of the stripe.
+ *
+ * This will usually kick off finish_rmw once all the bios are read in, but it
+ * may trigger parity reconstruction if we had any errors along the way
+ */
+static void raid56_rmw_end_io_work(struct work_struct *work)
+{
+	struct btrfs_raid_bio *rbio =
+		container_of(work, struct btrfs_raid_bio, end_io_work);
+
+	if (atomic_read(&rbio->error) > rbio->bioc->max_errors) {
+		rbio_orig_end_io(rbio, BLK_STS_IOERR);
+		return;
+	}
 
 	/*
-	 * this will normally call finish_rmw to start our write
-	 * but if there are any failed stripes we'll reconstruct
-	 * from parity first
+	 * This will normally call finish_rmw to start our write but if there
+	 * are any failed stripes we'll reconstruct from parity first.
 	 */
 	validate_rbio_for_rmw(rbio);
-	return;
-
-cleanup:
-
-	rbio_orig_end_io(rbio, BLK_STS_IOERR);
 }
 
 /*
@@ -1662,11 +1668,9 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio)
 	 * touch it after that.
 	 */
 	atomic_set(&rbio->stripes_pending, bios_to_read);
+	INIT_WORK(&rbio->end_io_work, raid56_rmw_end_io_work);
 	while ((bio = bio_list_pop(&bio_list))) {
-		bio->bi_end_io = raid_rmw_end_io;
-
-		btrfs_bio_wq_end_io(rbio->bioc->fs_info, bio, BTRFS_WQ_ENDIO_RAID56);
-
+		bio->bi_end_io = raid56_bio_end_io;
 		submit_bio(bio);
 	}
 	/* the actual write will happen once the reads are done */
@@ -2108,25 +2112,13 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
 }
 
 /*
- * This is called only for stripes we've read from disk to
- * reconstruct the parity.
+ * This is called only for stripes we've read from disk to reconstruct the
+ * parity.
  */
-static void raid_recover_end_io(struct bio *bio)
+static void raid_recover_end_io_work(struct work_struct *work)
 {
-	struct btrfs_raid_bio *rbio = bio->bi_private;
-
-	/*
-	 * we only read stripe pages off the disk, set them
-	 * up to date if there were no errors
-	 */
-	if (bio->bi_status)
-		fail_bio_stripe(rbio, bio);
-	else
-		set_bio_pages_uptodate(rbio, bio);
-	bio_put(bio);
-
-	if (!atomic_dec_and_test(&rbio->stripes_pending))
-		return;
+	struct btrfs_raid_bio *rbio =
+		container_of(work, struct btrfs_raid_bio, end_io_work);
 
 	if (atomic_read(&rbio->error) > rbio->bioc->max_errors)
 		rbio_orig_end_io(rbio, BLK_STS_IOERR);
@@ -2209,11 +2201,9 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
 	 * touch it after that.
 	 */
 	atomic_set(&rbio->stripes_pending, bios_to_read);
+	INIT_WORK(&rbio->end_io_work, raid_recover_end_io_work);
 	while ((bio = bio_list_pop(&bio_list))) {
-		bio->bi_end_io = raid_recover_end_io;
-
-		btrfs_bio_wq_end_io(rbio->bioc->fs_info, bio, BTRFS_WQ_ENDIO_RAID56);
-
+		bio->bi_end_io = raid56_bio_end_io;
 		submit_bio(bio);
 	}
 
@@ -2582,8 +2572,7 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
 	atomic_set(&rbio->stripes_pending, nr_data);
 
 	while ((bio = bio_list_pop(&bio_list))) {
-		bio->bi_end_io = raid_write_end_io;
-
+		bio->bi_end_io = raid56_bio_end_io;
 		submit_bio(bio);
 	}
 	return;
@@ -2671,24 +2660,14 @@ static void validate_rbio_for_parity_scrub(struct btrfs_raid_bio *rbio)
  * This will usually kick off finish_rmw once all the bios are read in, but it
  * may trigger parity reconstruction if we had any errors along the way
  */
-static void raid56_parity_scrub_end_io(struct bio *bio)
+static void raid56_parity_scrub_end_io_work(struct work_struct *work)
 {
-	struct btrfs_raid_bio *rbio = bio->bi_private;
-
-	if (bio->bi_status)
-		fail_bio_stripe(rbio, bio);
-	else
-		set_bio_pages_uptodate(rbio, bio);
-
-	bio_put(bio);
-
-	if (!atomic_dec_and_test(&rbio->stripes_pending))
-		return;
+	struct btrfs_raid_bio *rbio =
+		container_of(work, struct btrfs_raid_bio, end_io_work);
 
 	/*
-	 * this will normally call finish_rmw to start our write
-	 * but if there are any failed stripes we'll reconstruct
-	 * from parity first
+	 * This will normally call finish_rmw to start our write, but if there
+	 * are any failed stripes we'll reconstruct from parity first
 	 */
 	validate_rbio_for_parity_scrub(rbio);
 }
@@ -2758,11 +2737,9 @@ static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio)
 	 * touch it after that.
 	 */
 	atomic_set(&rbio->stripes_pending, bios_to_read);
+	INIT_WORK(&rbio->end_io_work, raid56_parity_scrub_end_io_work);
 	while ((bio = bio_list_pop(&bio_list))) {
-		bio->bi_end_io = raid56_parity_scrub_end_io;
-
-		btrfs_bio_wq_end_io(rbio->bioc->fs_info, bio, BTRFS_WQ_ENDIO_RAID56);
-
+		bio->bi_end_io = raid56_bio_end_io;
 		submit_bio(bio);
 	}
 	/* the actual write will happen once the reads are done */
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 06/10] btrfs: don't use btrfs_bio_wq_end_io for compressed writes
  2022-04-25  7:54 cleanup btrfs bio handling, part 2 Christoph Hellwig
                   ` (4 preceding siblings ...)
  2022-04-25  7:54 ` [PATCH 05/10] btrfs: defer I/O completion based on the btrfs_raid_bio Christoph Hellwig
@ 2022-04-25  7:54 ` Christoph Hellwig
  2022-04-25  7:54 ` [PATCH 07/10] btrfs: centralize setting REQ_META Christoph Hellwig
                   ` (3 subsequent siblings)
  9 siblings, 0 replies; 32+ messages in thread
From: Christoph Hellwig @ 2022-04-25  7:54 UTC (permalink / raw)
  To: Josef Bacik, David Sterba, Qu Wenruo; +Cc: Naohiro Aota, linux-btrfs

Compressed write bio completion is the only user of btrfs_bio_wq_end_io
for writes, and the use of btrfs_bio_wq_end_io is a little suboptimal
here as we only real need user context for the final completion of a
compressed_bio structure, and not every single bio completion.

Add a work_struct to struct compressed_bio instead and use that to call
finish_compressed_bio_write.  This allows to remove all handling of
write bios in the btrfs_bio_wq_end_io infrastructure.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/btrfs/compression.c | 45 +++++++++++++++++++++---------------------
 fs/btrfs/compression.h |  7 +++++--
 fs/btrfs/ctree.h       |  2 +-
 fs/btrfs/disk-io.c     | 29 +++++++++++----------------
 fs/btrfs/super.c       |  2 --
 5 files changed, 40 insertions(+), 45 deletions(-)

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 8fda38a587067..eb11ee6691f8b 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -403,6 +403,14 @@ static void finish_compressed_bio_write(struct compressed_bio *cb)
 	kfree(cb);
 }
 
+static void btrfs_finish_compressed_write_work(struct work_struct *work)
+{
+	struct compressed_bio *cb =
+		container_of(work, struct compressed_bio, write_end_work);
+
+	finish_compressed_bio_write(cb);
+}
+
 /*
  * Do the cleanup once all the compressed pages hit the disk.  This will clear
  * writeback on the file pages and free the compressed pages.
@@ -414,29 +422,16 @@ static void end_compressed_bio_write(struct bio *bio)
 {
 	struct compressed_bio *cb = bio->bi_private;
 
-	if (!dec_and_test_compressed_bio(cb, bio))
-		goto out;
-
-	btrfs_record_physical_zoned(cb->inode, cb->start, bio);
+	if (dec_and_test_compressed_bio(cb, bio)) {
+		struct btrfs_fs_info *fs_info = btrfs_sb(cb->inode->i_sb);
 
-	finish_compressed_bio_write(cb);
-out:
+		btrfs_record_physical_zoned(cb->inode, cb->start, bio);
+		queue_work(fs_info->compressed_write_workers,
+			   &cb->write_end_work);
+	}
 	bio_put(bio);
 }
 
-static blk_status_t submit_compressed_bio(struct btrfs_fs_info *fs_info,
-					  struct bio *bio, int mirror_num)
-{
-	blk_status_t ret;
-
-	ASSERT(bio->bi_iter.bi_size);
-	ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA);
-	if (ret)
-		return ret;
-	ret = btrfs_map_bio(fs_info, bio, mirror_num);
-	return ret;
-}
-
 /*
  * Allocate a compressed_bio, which will be used to read/write on-disk
  * (aka, compressed) * data.
@@ -533,7 +528,7 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
 	cb->compressed_pages = compressed_pages;
 	cb->compressed_len = compressed_len;
 	cb->writeback = writeback;
-	cb->orig_bio = NULL;
+	INIT_WORK(&cb->write_end_work, btrfs_finish_compressed_write_work);
 	cb->nr_pages = nr_pages;
 
 	if (blkcg_css)
@@ -603,7 +598,8 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
 					goto finish_cb;
 			}
 
-			ret = submit_compressed_bio(fs_info, bio, 0);
+			ASSERT(bio->bi_iter.bi_size);
+			ret = btrfs_map_bio(fs_info, bio, 0);
 			if (ret)
 				goto finish_cb;
 			bio = NULL;
@@ -941,7 +937,12 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 						  fs_info->sectorsize);
 			sums += fs_info->csum_size * nr_sectors;
 
-			ret = submit_compressed_bio(fs_info, comp_bio, mirror_num);
+			ASSERT(comp_bio->bi_iter.bi_size);
+			ret = btrfs_bio_wq_end_io(fs_info, comp_bio,
+						  BTRFS_WQ_ENDIO_DATA);
+			if (ret)
+				goto finish_cb;
+			ret = btrfs_map_bio(fs_info, comp_bio, mirror_num);
 			if (ret)
 				goto finish_cb;
 			comp_bio = NULL;
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index ac3c79f8c3492..6661e46b73460 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -61,8 +61,11 @@ struct compressed_bio {
 	blk_status_t status;
 	int mirror_num;
 
-	/* for reads, this is the bio we are copying the data into */
-	struct bio *orig_bio;
+	union {
+		/* for reads, this is the bio we are copying the data into */
+		struct bio *orig_bio;
+		struct work_struct write_end_work;
+	};
 
 	/*
 	 * the start of a variable length array of checksums only
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index a76291e4594f2..bf869da04b519 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -854,7 +854,7 @@ struct btrfs_fs_info {
 	struct btrfs_workqueue *endio_meta_workers;
 	struct workqueue_struct *endio_raid56_workers;
 	struct workqueue_struct *rmw_workers;
-	struct btrfs_workqueue *endio_meta_write_workers;
+	struct workqueue_struct *compressed_write_workers;
 	struct btrfs_workqueue *endio_write_workers;
 	struct btrfs_workqueue *endio_freespace_worker;
 	struct btrfs_workqueue *caching_workers;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index cb8fe234fbc0c..1e6ee7f1a375d 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -749,19 +749,10 @@ static void end_workqueue_bio(struct bio *bio)
 	fs_info = end_io_wq->info;
 	end_io_wq->status = bio->bi_status;
 
-	if (btrfs_op(bio) == BTRFS_MAP_WRITE) {
-		if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA)
-			wq = fs_info->endio_meta_write_workers;
-		else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE)
-			wq = fs_info->endio_freespace_worker;
-		else
-			wq = fs_info->endio_write_workers;
-	} else {
-		if (end_io_wq->metadata)
-			wq = fs_info->endio_meta_workers;
-		else
-			wq = fs_info->endio_workers;
-	}
+	if (end_io_wq->metadata)
+		wq = fs_info->endio_meta_workers;
+	else
+		wq = fs_info->endio_workers;
 
 	btrfs_init_work(&end_io_wq->work, end_workqueue_fn, NULL, NULL);
 	btrfs_queue_work(wq, &end_io_wq->work);
@@ -772,6 +763,9 @@ blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
 {
 	struct btrfs_end_io_wq *end_io_wq;
 
+	if (WARN_ON_ONCE(btrfs_op(bio) != BTRFS_MAP_WRITE))
+		return BLK_STS_IOERR;
+
 	end_io_wq = kmem_cache_alloc(btrfs_end_io_wq_cache, GFP_NOFS);
 	if (!end_io_wq)
 		return BLK_STS_RESOURCE;
@@ -2280,6 +2274,7 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
 	btrfs_destroy_workqueue(fs_info->endio_workers);
 	destroy_workqueue(fs_info->endio_raid56_workers);
 	destroy_workqueue(fs_info->rmw_workers);
+	destroy_workqueue(fs_info->compressed_write_workers);
 	btrfs_destroy_workqueue(fs_info->endio_write_workers);
 	btrfs_destroy_workqueue(fs_info->endio_freespace_worker);
 	btrfs_destroy_workqueue(fs_info->delayed_workers);
@@ -2294,7 +2289,6 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
 	 * queues can do metadata I/O operations.
 	 */
 	btrfs_destroy_workqueue(fs_info->endio_meta_workers);
-	btrfs_destroy_workqueue(fs_info->endio_meta_write_workers);
 }
 
 static void free_root_extent_buffers(struct btrfs_root *root)
@@ -2482,15 +2476,14 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info)
 	fs_info->endio_meta_workers =
 		btrfs_alloc_workqueue(fs_info, "endio-meta", flags,
 				      max_active, 4);
-	fs_info->endio_meta_write_workers =
-		btrfs_alloc_workqueue(fs_info, "endio-meta-write", flags,
-				      max_active, 2);
 	fs_info->endio_raid56_workers =
 		alloc_workqueue("btrfs-endio-raid56", flags, max_active);
 	fs_info->rmw_workers = alloc_workqueue("btrfs-rmw", flags, max_active);
 	fs_info->endio_write_workers =
 		btrfs_alloc_workqueue(fs_info, "endio-write", flags,
 				      max_active, 2);
+	fs_info->compressed_write_workers =
+		alloc_workqueue("btrfs-compressed-write", flags, max_active);
 	fs_info->endio_freespace_worker =
 		btrfs_alloc_workqueue(fs_info, "freespace-write", flags,
 				      max_active, 0);
@@ -2505,7 +2498,7 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info)
 	if (!(fs_info->workers && fs_info->hipri_workers &&
 	      fs_info->delalloc_workers && fs_info->flush_workers &&
 	      fs_info->endio_workers && fs_info->endio_meta_workers &&
-	      fs_info->endio_meta_write_workers &&
+	      fs_info->compressed_write_workers &&
 	      fs_info->endio_write_workers && fs_info->endio_raid56_workers &&
 	      fs_info->endio_freespace_worker && fs_info->rmw_workers &&
 	      fs_info->caching_workers && fs_info->fixup_workers &&
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index b1fdc6a26c76e..9c683c466d585 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1908,8 +1908,6 @@ static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info,
 	btrfs_workqueue_set_max(fs_info->caching_workers, new_pool_size);
 	btrfs_workqueue_set_max(fs_info->endio_workers, new_pool_size);
 	btrfs_workqueue_set_max(fs_info->endio_meta_workers, new_pool_size);
-	btrfs_workqueue_set_max(fs_info->endio_meta_write_workers,
-				new_pool_size);
 	btrfs_workqueue_set_max(fs_info->endio_write_workers, new_pool_size);
 	btrfs_workqueue_set_max(fs_info->endio_freespace_worker, new_pool_size);
 	btrfs_workqueue_set_max(fs_info->delayed_workers, new_pool_size);
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 07/10] btrfs: centralize setting REQ_META
  2022-04-25  7:54 cleanup btrfs bio handling, part 2 Christoph Hellwig
                   ` (5 preceding siblings ...)
  2022-04-25  7:54 ` [PATCH 06/10] btrfs: don't use btrfs_bio_wq_end_io for compressed writes Christoph Hellwig
@ 2022-04-25  7:54 ` Christoph Hellwig
  2022-04-25  9:06   ` Qu Wenruo
  2022-04-25  7:54 ` [PATCH 08/10] btrfs: remove btrfs_end_io_wq Christoph Hellwig
                   ` (2 subsequent siblings)
  9 siblings, 1 reply; 32+ messages in thread
From: Christoph Hellwig @ 2022-04-25  7:54 UTC (permalink / raw)
  To: Josef Bacik, David Sterba, Qu Wenruo; +Cc: Naohiro Aota, linux-btrfs

Set REQ_META in btrfs_submit_metadata_bio instead of the various callers.
We'll start relying on this flag inside of btrfs in a bit, and this
ensures it is always set correctly.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/btrfs/disk-io.c   | 2 ++
 fs/btrfs/extent_io.c | 8 ++++----
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 1e6ee7f1a375d..65e680895e628 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -915,6 +915,8 @@ void btrfs_submit_metadata_bio(struct inode *inode, struct bio *bio, int mirror_
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	blk_status_t ret;
 
+	bio->bi_opf |= REQ_META;
+
 	if (btrfs_op(bio) != BTRFS_MAP_WRITE) {
 		/*
 		 * called for a read, do the setup so that checksum validation
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 80b4482c477c6..a14ed9b9dc2d0 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4589,7 +4589,7 @@ static int write_one_subpage_eb(struct extent_buffer *eb,
 {
 	struct btrfs_fs_info *fs_info = eb->fs_info;
 	struct page *page = eb->pages[0];
-	unsigned int write_flags = wbc_to_write_flags(wbc) | REQ_META;
+	unsigned int write_flags = wbc_to_write_flags(wbc);
 	bool no_dirty_ebs = false;
 	int ret;
 
@@ -4634,7 +4634,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
 {
 	u64 disk_bytenr = eb->start;
 	int i, num_pages;
-	unsigned int write_flags = wbc_to_write_flags(wbc) | REQ_META;
+	unsigned int write_flags = wbc_to_write_flags(wbc);
 	int ret = 0;
 
 	prepare_eb_write(eb);
@@ -6645,7 +6645,7 @@ static int read_extent_buffer_subpage(struct extent_buffer *eb, int wait,
 	btrfs_subpage_clear_error(fs_info, page, eb->start, eb->len);
 
 	btrfs_subpage_start_reader(fs_info, page, eb->start, eb->len);
-	ret = submit_extent_page(REQ_OP_READ | REQ_META, NULL, &bio_ctrl,
+	ret = submit_extent_page(REQ_OP_READ, NULL, &bio_ctrl,
 				 page, eb->start, eb->len,
 				 eb->start - page_offset(page),
 				 end_bio_extent_readpage, mirror_num, 0,
@@ -6752,7 +6752,7 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num)
 			}
 
 			ClearPageError(page);
-			err = submit_extent_page(REQ_OP_READ | REQ_META, NULL,
+			err = submit_extent_page(REQ_OP_READ, NULL,
 					 &bio_ctrl, page, page_offset(page),
 					 PAGE_SIZE, 0, end_bio_extent_readpage,
 					 mirror_num, 0, false);
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 08/10] btrfs: remove btrfs_end_io_wq
  2022-04-25  7:54 cleanup btrfs bio handling, part 2 Christoph Hellwig
                   ` (6 preceding siblings ...)
  2022-04-25  7:54 ` [PATCH 07/10] btrfs: centralize setting REQ_META Christoph Hellwig
@ 2022-04-25  7:54 ` Christoph Hellwig
  2022-04-25  7:54 ` [PATCH 09/10] btrfs: refactor btrfs_map_bio Christoph Hellwig
  2022-04-25  7:54 ` [PATCH 10/10] btrfs: do not allocate a btrfs_bio for low-level bios Christoph Hellwig
  9 siblings, 0 replies; 32+ messages in thread
From: Christoph Hellwig @ 2022-04-25  7:54 UTC (permalink / raw)
  To: Josef Bacik, David Sterba, Qu Wenruo; +Cc: Naohiro Aota, linux-btrfs

All reads bio that go through btrfs_map_bio need to be completed in
user context.  And read I/Os are the most common and timing critical
in almost any file system workloads.

Embedd a work_struct into struct btrfs_bio and use it to complete all
read bios submitted through btrfs_map, using the REQ_META flag to decide
which workqueue they are placed on.

This removes the need for a separate 128 byte allocation (typically
rounded up to 192 bytes by slab) for all reads with a size increase
of 24 bytes for struct btrfs_bio.  Future patches will reorgnize
struct btrfs_bio to make use of this extra space for writes as well.

(all sizes are based a on typical 64-bit non-debug build)

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/btrfs/compression.c |   4 --
 fs/btrfs/ctree.h       |   4 +-
 fs/btrfs/disk-io.c     | 118 ++---------------------------------------
 fs/btrfs/disk-io.h     |  10 ----
 fs/btrfs/inode.c       |  24 +--------
 fs/btrfs/super.c       |  11 +---
 fs/btrfs/volumes.c     |  33 ++++++++++--
 fs/btrfs/volumes.h     |   3 ++
 8 files changed, 40 insertions(+), 167 deletions(-)

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index eb11ee6691f8b..49e905b7f8cfc 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -938,10 +938,6 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 			sums += fs_info->csum_size * nr_sectors;
 
 			ASSERT(comp_bio->bi_iter.bi_size);
-			ret = btrfs_bio_wq_end_io(fs_info, comp_bio,
-						  BTRFS_WQ_ENDIO_DATA);
-			if (ret)
-				goto finish_cb;
 			ret = btrfs_map_bio(fs_info, comp_bio, mirror_num);
 			if (ret)
 				goto finish_cb;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index bf869da04b519..023416ac9d705 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -850,8 +850,8 @@ struct btrfs_fs_info {
 	struct btrfs_workqueue *hipri_workers;
 	struct btrfs_workqueue *delalloc_workers;
 	struct btrfs_workqueue *flush_workers;
-	struct btrfs_workqueue *endio_workers;
-	struct btrfs_workqueue *endio_meta_workers;
+	struct workqueue_struct *endio_workers;
+	struct workqueue_struct *endio_meta_workers;
 	struct workqueue_struct *endio_raid56_workers;
 	struct workqueue_struct *rmw_workers;
 	struct workqueue_struct *compressed_write_workers;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 65e680895e628..efef6feab9010 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -51,7 +51,6 @@
 				 BTRFS_SUPER_FLAG_METADUMP |\
 				 BTRFS_SUPER_FLAG_METADUMP_V2)
 
-static void end_workqueue_fn(struct btrfs_work *work);
 static void btrfs_destroy_ordered_extents(struct btrfs_root *root);
 static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
 				      struct btrfs_fs_info *fs_info);
@@ -64,40 +63,6 @@ static int btrfs_destroy_pinned_extent(struct btrfs_fs_info *fs_info,
 static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info);
 static void btrfs_error_commit_super(struct btrfs_fs_info *fs_info);
 
-/*
- * btrfs_end_io_wq structs are used to do processing in task context when an IO
- * is complete.  This is used during reads to verify checksums, and it is used
- * by writes to insert metadata for new file extents after IO is complete.
- */
-struct btrfs_end_io_wq {
-	struct bio *bio;
-	bio_end_io_t *end_io;
-	void *private;
-	struct btrfs_fs_info *info;
-	blk_status_t status;
-	enum btrfs_wq_endio_type metadata;
-	struct btrfs_work work;
-};
-
-static struct kmem_cache *btrfs_end_io_wq_cache;
-
-int __init btrfs_end_io_wq_init(void)
-{
-	btrfs_end_io_wq_cache = kmem_cache_create("btrfs_end_io_wq",
-					sizeof(struct btrfs_end_io_wq),
-					0,
-					SLAB_MEM_SPREAD,
-					NULL);
-	if (!btrfs_end_io_wq_cache)
-		return -ENOMEM;
-	return 0;
-}
-
-void __cold btrfs_end_io_wq_exit(void)
-{
-	kmem_cache_destroy(btrfs_end_io_wq_cache);
-}
-
 static void btrfs_free_csum_hash(struct btrfs_fs_info *fs_info)
 {
 	if (fs_info->csum_shash)
@@ -740,48 +705,6 @@ int btrfs_validate_metadata_buffer(struct btrfs_bio *bbio,
 	return ret;
 }
 
-static void end_workqueue_bio(struct bio *bio)
-{
-	struct btrfs_end_io_wq *end_io_wq = bio->bi_private;
-	struct btrfs_fs_info *fs_info;
-	struct btrfs_workqueue *wq;
-
-	fs_info = end_io_wq->info;
-	end_io_wq->status = bio->bi_status;
-
-	if (end_io_wq->metadata)
-		wq = fs_info->endio_meta_workers;
-	else
-		wq = fs_info->endio_workers;
-
-	btrfs_init_work(&end_io_wq->work, end_workqueue_fn, NULL, NULL);
-	btrfs_queue_work(wq, &end_io_wq->work);
-}
-
-blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
-			enum btrfs_wq_endio_type metadata)
-{
-	struct btrfs_end_io_wq *end_io_wq;
-
-	if (WARN_ON_ONCE(btrfs_op(bio) != BTRFS_MAP_WRITE))
-		return BLK_STS_IOERR;
-
-	end_io_wq = kmem_cache_alloc(btrfs_end_io_wq_cache, GFP_NOFS);
-	if (!end_io_wq)
-		return BLK_STS_RESOURCE;
-
-	end_io_wq->private = bio->bi_private;
-	end_io_wq->end_io = bio->bi_end_io;
-	end_io_wq->info = info;
-	end_io_wq->status = 0;
-	end_io_wq->bio = bio;
-	end_io_wq->metadata = metadata;
-
-	bio->bi_private = end_io_wq;
-	bio->bi_end_io = end_workqueue_bio;
-	return 0;
-}
-
 static void run_one_async_start(struct btrfs_work *work)
 {
 	struct async_submit_bio *async;
@@ -918,14 +841,7 @@ void btrfs_submit_metadata_bio(struct inode *inode, struct bio *bio, int mirror_
 	bio->bi_opf |= REQ_META;
 
 	if (btrfs_op(bio) != BTRFS_MAP_WRITE) {
-		/*
-		 * called for a read, do the setup so that checksum validation
-		 * can happen in the async kernel threads
-		 */
-		ret = btrfs_bio_wq_end_io(fs_info, bio,
-					  BTRFS_WQ_ENDIO_METADATA);
-		if (!ret)
-			ret = btrfs_map_bio(fs_info, bio, mirror_num);
+		ret = btrfs_map_bio(fs_info, bio, mirror_num);
 	} else if (!should_async_write(fs_info, BTRFS_I(inode))) {
 		ret = btree_csum_one_bio(bio);
 		if (!ret)
@@ -1948,25 +1864,6 @@ struct btrfs_root *btrfs_get_fs_root_commit_root(struct btrfs_fs_info *fs_info,
 	return root;
 }
 
-/*
- * called by the kthread helper functions to finally call the bio end_io
- * functions.  This is where read checksum verification actually happens
- */
-static void end_workqueue_fn(struct btrfs_work *work)
-{
-	struct bio *bio;
-	struct btrfs_end_io_wq *end_io_wq;
-
-	end_io_wq = container_of(work, struct btrfs_end_io_wq, work);
-	bio = end_io_wq->bio;
-
-	bio->bi_status = end_io_wq->status;
-	bio->bi_private = end_io_wq->private;
-	bio->bi_end_io = end_io_wq->end_io;
-	bio_endio(bio);
-	kmem_cache_free(btrfs_end_io_wq_cache, end_io_wq);
-}
-
 static int cleaner_kthread(void *arg)
 {
 	struct btrfs_fs_info *fs_info = arg;
@@ -2273,7 +2170,7 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
 	btrfs_destroy_workqueue(fs_info->delalloc_workers);
 	btrfs_destroy_workqueue(fs_info->hipri_workers);
 	btrfs_destroy_workqueue(fs_info->workers);
-	btrfs_destroy_workqueue(fs_info->endio_workers);
+	destroy_workqueue(fs_info->endio_workers);
 	destroy_workqueue(fs_info->endio_raid56_workers);
 	destroy_workqueue(fs_info->rmw_workers);
 	destroy_workqueue(fs_info->compressed_write_workers);
@@ -2290,7 +2187,7 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
 	 * the queues used for metadata I/O, since tasks from those other work
 	 * queues can do metadata I/O operations.
 	 */
-	btrfs_destroy_workqueue(fs_info->endio_meta_workers);
+	destroy_workqueue(fs_info->endio_meta_workers);
 }
 
 static void free_root_extent_buffers(struct btrfs_root *root)
@@ -2469,15 +2366,10 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info)
 	fs_info->fixup_workers =
 		btrfs_alloc_workqueue(fs_info, "fixup", flags, 1, 0);
 
-	/*
-	 * endios are largely parallel and should have a very
-	 * low idle thresh
-	 */
 	fs_info->endio_workers =
-		btrfs_alloc_workqueue(fs_info, "endio", flags, max_active, 4);
+		alloc_workqueue("btrfs-endio", flags, max_active);
 	fs_info->endio_meta_workers =
-		btrfs_alloc_workqueue(fs_info, "endio-meta", flags,
-				      max_active, 4);
+		alloc_workqueue("btrfs-endio-meta", flags, max_active);
 	fs_info->endio_raid56_workers =
 		alloc_workqueue("btrfs-endio-raid56", flags, max_active);
 	fs_info->rmw_workers = alloc_workqueue("btrfs-rmw", flags, max_active);
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 97255e3d7e524..424dbfc5fd784 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -17,12 +17,6 @@
  */
 #define BTRFS_BDEV_BLOCKSIZE	(4096)
 
-enum btrfs_wq_endio_type {
-	BTRFS_WQ_ENDIO_DATA,
-	BTRFS_WQ_ENDIO_METADATA,
-	BTRFS_WQ_ENDIO_FREE_SPACE,
-};
-
 static inline u64 btrfs_sb_offset(int mirror)
 {
 	u64 start = SZ_16K;
@@ -120,8 +114,6 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
 			  int atomic);
 int btrfs_read_extent_buffer(struct extent_buffer *buf, u64 parent_transid,
 			     int level, struct btrfs_key *first_key);
-blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
-			enum btrfs_wq_endio_type metadata);
 blk_status_t btrfs_wq_submit_bio(struct inode *inode, struct bio *bio,
 				 int mirror_num, unsigned long bio_flags,
 				 u64 dio_file_offset,
@@ -145,8 +137,6 @@ int btree_lock_page_hook(struct page *page, void *data,
 int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags);
 int btrfs_get_free_objectid(struct btrfs_root *root, u64 *objectid);
 int btrfs_init_root_free_objectid(struct btrfs_root *root);
-int __init btrfs_end_io_wq_init(void);
-void __cold btrfs_end_io_wq_exit(void);
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 void btrfs_set_buffer_lockdep_class(u64 objectid,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index d7b04e06da825..80b16810e747d 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2614,12 +2614,6 @@ void btrfs_submit_data_read_bio(struct inode *inode, struct bio *bio,
 		return;
 	}
 
-	ret = btrfs_bio_wq_end_io(fs_info, bio,
-			btrfs_is_free_space_inode(BTRFS_I(inode)) ?
-			BTRFS_WQ_ENDIO_FREE_SPACE : BTRFS_WQ_ENDIO_DATA);
-	if (ret)
-		goto out;
-
 	/*
 	 * Lookup bio sums does extra checks around whether we need to csum or
 	 * not, which is why we ignore skip_sum here.
@@ -7784,9 +7778,6 @@ static void submit_dio_repair_bio(struct inode *inode, struct bio *bio,
 
 	BUG_ON(bio_op(bio) == REQ_OP_WRITE);
 
-	if (btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA))
-		return;
-
 	refcount_inc(&dip->refs);
 	if (btrfs_map_bio(fs_info, bio, mirror_num))
 		refcount_dec(&dip->refs);
@@ -7887,19 +7878,12 @@ static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio,
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	struct btrfs_dio_private *dip = bio->bi_private;
-	bool write = btrfs_op(bio) == BTRFS_MAP_WRITE;
 	blk_status_t ret;
 
-	if (!write) {
-		ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA);
-		if (ret)
-			return ret;
-	}
-
 	if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
 		goto map;
 
-	if (write) {
+	if (btrfs_op(bio) == BTRFS_MAP_WRITE) {
 		/* check btrfs_submit_data_write_bio() for async submit rules */
 		if (async_submit && !atomic_read(&BTRFS_I(inode)->sync_writers))
 			return btrfs_wq_submit_bio(inode, bio, 0, 0,
@@ -10249,12 +10233,6 @@ static blk_status_t submit_encoded_read_bio(struct btrfs_inode *inode,
 			return ret;
 	}
 
-	ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA);
-	if (ret) {
-		btrfs_bio_free_csum(bbio);
-		return ret;
-	}
-
 	atomic_inc(&priv->pending);
 	ret = btrfs_map_bio(fs_info, bio, mirror_num);
 	if (ret) {
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 9c683c466d585..64eb8aeed156f 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1906,8 +1906,6 @@ static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info,
 	btrfs_workqueue_set_max(fs_info->hipri_workers, new_pool_size);
 	btrfs_workqueue_set_max(fs_info->delalloc_workers, new_pool_size);
 	btrfs_workqueue_set_max(fs_info->caching_workers, new_pool_size);
-	btrfs_workqueue_set_max(fs_info->endio_workers, new_pool_size);
-	btrfs_workqueue_set_max(fs_info->endio_meta_workers, new_pool_size);
 	btrfs_workqueue_set_max(fs_info->endio_write_workers, new_pool_size);
 	btrfs_workqueue_set_max(fs_info->endio_freespace_worker, new_pool_size);
 	btrfs_workqueue_set_max(fs_info->delayed_workers, new_pool_size);
@@ -2668,13 +2666,9 @@ static int __init init_btrfs_fs(void)
 	if (err)
 		goto free_delayed_ref;
 
-	err = btrfs_end_io_wq_init();
-	if (err)
-		goto free_prelim_ref;
-
 	err = btrfs_interface_init();
 	if (err)
-		goto free_end_io_wq;
+		goto free_prelim_ref;
 
 	btrfs_print_mod_info();
 
@@ -2690,8 +2684,6 @@ static int __init init_btrfs_fs(void)
 
 unregister_ioctl:
 	btrfs_interface_exit();
-free_end_io_wq:
-	btrfs_end_io_wq_exit();
 free_prelim_ref:
 	btrfs_prelim_ref_exit();
 free_delayed_ref:
@@ -2729,7 +2721,6 @@ static void __exit exit_btrfs_fs(void)
 	extent_state_cache_exit();
 	extent_io_exit();
 	btrfs_interface_exit();
-	btrfs_end_io_wq_exit();
 	unregister_filesystem(&btrfs_fs_type);
 	btrfs_exit_sysfs();
 	btrfs_cleanup_fs_uuids();
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index aeacb87457687..5f18e9105fe08 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6621,11 +6621,27 @@ int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
 	return __btrfs_map_block(fs_info, op, logical, length, bioc_ret, 0, 1);
 }
 
-static inline void btrfs_end_bioc(struct btrfs_io_context *bioc)
+static struct workqueue_struct *btrfs_end_io_wq(struct btrfs_io_context *bioc)
+{
+	if (bioc->orig_bio->bi_opf & REQ_META)
+		return bioc->fs_info->endio_meta_workers;
+	return bioc->fs_info->endio_workers;
+}
+
+static void btrfs_end_bio_work(struct work_struct *work)
+{
+	struct btrfs_bio *bbio =
+		container_of(work, struct btrfs_bio, end_io_work);
+
+	bio_endio(&bbio->bio);
+}
+
+static void btrfs_end_bioc(struct btrfs_io_context *bioc, bool async)
 {
 	struct bio *orig_bio = bioc->orig_bio;
+	struct btrfs_bio *bbio = btrfs_bio(orig_bio);
 
-	btrfs_bio(orig_bio)->mirror_num = bioc->mirror_num;
+	bbio->mirror_num = bioc->mirror_num;
 	orig_bio->bi_private = bioc->private;
 	orig_bio->bi_end_io = bioc->end_io;
 
@@ -6637,7 +6653,14 @@ static inline void btrfs_end_bioc(struct btrfs_io_context *bioc)
 		orig_bio->bi_status = BLK_STS_IOERR;
 	else
 		orig_bio->bi_status = BLK_STS_OK;
-	bio_endio(orig_bio);
+
+	if (btrfs_op(orig_bio) == BTRFS_MAP_READ && async) {
+		INIT_WORK(&bbio->end_io_work, btrfs_end_bio_work);
+		queue_work(btrfs_end_io_wq(bioc), &bbio->end_io_work);
+	} else {
+		bio_endio(orig_bio);
+	}
+
 	btrfs_put_bioc(bioc);
 }
 
@@ -6669,7 +6692,7 @@ static void btrfs_end_bio(struct bio *bio)
 
 	btrfs_bio_counter_dec(bioc->fs_info);
 	if (atomic_dec_and_test(&bioc->stripes_pending))
-		btrfs_end_bioc(bioc);
+		btrfs_end_bioc(bioc, true);
 }
 
 static void submit_stripe_bio(struct btrfs_io_context *bioc, struct bio *bio,
@@ -6767,7 +6790,7 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
 		    !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))) {
 			atomic_inc(&bioc->error);
 			if (atomic_dec_and_test(&bioc->stripes_pending))
-				btrfs_end_bioc(bioc);
+				btrfs_end_bioc(bioc, false);
 			continue;
 		}
 
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 12b2af9260e92..28e28b7c48649 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -371,6 +371,9 @@ struct btrfs_bio {
 	u8 csum_inline[BTRFS_BIO_INLINE_CSUM_SIZE];
 	struct bvec_iter iter;
 
+	/* for read end I/O handling */
+	struct work_struct end_io_work;
+
 	/*
 	 * This member must come last, bio_alloc_bioset will allocate enough
 	 * bytes for entire btrfs_bio but relies on bio being last.
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 09/10] btrfs: refactor btrfs_map_bio
  2022-04-25  7:54 cleanup btrfs bio handling, part 2 Christoph Hellwig
                   ` (7 preceding siblings ...)
  2022-04-25  7:54 ` [PATCH 08/10] btrfs: remove btrfs_end_io_wq Christoph Hellwig
@ 2022-04-25  7:54 ` Christoph Hellwig
  2022-04-25  8:56   ` Qu Wenruo
  2022-04-25  7:54 ` [PATCH 10/10] btrfs: do not allocate a btrfs_bio for low-level bios Christoph Hellwig
  9 siblings, 1 reply; 32+ messages in thread
From: Christoph Hellwig @ 2022-04-25  7:54 UTC (permalink / raw)
  To: Josef Bacik, David Sterba, Qu Wenruo; +Cc: Naohiro Aota, linux-btrfs

Use a label for common cleanup, untangle the conditionals for parity
RAID and move all per-stripe handling into submit_stripe_bio.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/btrfs/volumes.c | 96 +++++++++++++++++++++++-----------------------
 1 file changed, 48 insertions(+), 48 deletions(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 5f18e9105fe08..d54aacb4f05f2 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6695,10 +6695,30 @@ static void btrfs_end_bio(struct bio *bio)
 		btrfs_end_bioc(bioc, true);
 }
 
-static void submit_stripe_bio(struct btrfs_io_context *bioc, struct bio *bio,
-			      u64 physical, struct btrfs_device *dev)
+static void submit_stripe_bio(struct btrfs_io_context *bioc,
+		struct bio *orig_bio, int dev_nr, bool clone)
 {
 	struct btrfs_fs_info *fs_info = bioc->fs_info;
+	struct btrfs_device *dev = bioc->stripes[dev_nr].dev;
+	u64 physical = bioc->stripes[dev_nr].physical;
+	struct bio *bio;
+
+	if (!dev || !dev->bdev ||
+	    test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) ||
+	    (btrfs_op(orig_bio) == BTRFS_MAP_WRITE &&
+	     !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))) {
+		atomic_inc(&bioc->error);
+		if (atomic_dec_and_test(&bioc->stripes_pending))
+			btrfs_end_bioc(bioc, false);
+		return;
+	}
+
+	if (clone) {
+		bio = btrfs_bio_clone(dev->bdev, orig_bio);
+	} else {
+		bio = orig_bio;
+		bio_set_dev(bio, dev->bdev);
+	}
 
 	bio->bi_private = bioc;
 	btrfs_bio(bio)->device = dev;
@@ -6733,46 +6753,44 @@ static void submit_stripe_bio(struct btrfs_io_context *bioc, struct bio *bio,
 blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
 			   int mirror_num)
 {
-	struct btrfs_device *dev;
-	struct bio *first_bio = bio;
 	u64 logical = bio->bi_iter.bi_sector << 9;
-	u64 length = 0;
-	u64 map_length;
+	u64 length = bio->bi_iter.bi_size;
+	u64 map_length = length;
 	int ret;
 	int dev_nr;
 	int total_devs;
 	struct btrfs_io_context *bioc = NULL;
 
-	length = bio->bi_iter.bi_size;
-	map_length = length;
-
 	btrfs_bio_counter_inc_blocked(fs_info);
 	ret = __btrfs_map_block(fs_info, btrfs_op(bio), logical,
 				&map_length, &bioc, mirror_num, 1);
-	if (ret) {
-		btrfs_bio_counter_dec(fs_info);
-		return errno_to_blk_status(ret);
-	}
+	if (ret)
+		goto out_dec;
 
 	total_devs = bioc->num_stripes;
-	bioc->orig_bio = first_bio;
-	bioc->private = first_bio->bi_private;
-	bioc->end_io = first_bio->bi_end_io;
-	atomic_set(&bioc->stripes_pending, bioc->num_stripes);
-
-	if ((bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) &&
-	    ((btrfs_op(bio) == BTRFS_MAP_WRITE) || (mirror_num > 1))) {
-		/* In this case, map_length has been set to the length of
-		   a single stripe; not the whole write */
+	bioc->orig_bio = bio;
+	bioc->private = bio->bi_private;
+	bioc->end_io = bio->bi_end_io;
+	atomic_set(&bioc->stripes_pending, total_devs);
+
+	if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
+		/*
+		 * In this case, map_length has been set to the length of a
+		 * single stripe; not the whole write.
+		 */
 		if (btrfs_op(bio) == BTRFS_MAP_WRITE) {
 			ret = raid56_parity_write(bio, bioc, map_length);
-		} else {
+			goto out_dec;
+		}
+		if (mirror_num > 1) {
 			ret = raid56_parity_recover(bio, bioc, map_length,
 						    mirror_num, 1);
+			goto out_dec;
 		}
-
-		btrfs_bio_counter_dec(fs_info);
-		return errno_to_blk_status(ret);
+		/*
+		 * Normal reads do not require special parity read handling, so
+		 * fall through here.
+		 */
 	}
 
 	if (map_length < length) {
@@ -6782,29 +6800,11 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
 		BUG();
 	}
 
-	for (dev_nr = 0; dev_nr < total_devs; dev_nr++) {
-		dev = bioc->stripes[dev_nr].dev;
-		if (!dev || !dev->bdev || test_bit(BTRFS_DEV_STATE_MISSING,
-						   &dev->dev_state) ||
-		    (btrfs_op(first_bio) == BTRFS_MAP_WRITE &&
-		    !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))) {
-			atomic_inc(&bioc->error);
-			if (atomic_dec_and_test(&bioc->stripes_pending))
-				btrfs_end_bioc(bioc, false);
-			continue;
-		}
-
-		if (dev_nr < total_devs - 1) {
-			bio = btrfs_bio_clone(dev->bdev, first_bio);
-		} else {
-			bio = first_bio;
-			bio_set_dev(bio, dev->bdev);
-		}
-
-		submit_stripe_bio(bioc, bio, bioc->stripes[dev_nr].physical, dev);
-	}
+	for (dev_nr = 0; dev_nr < total_devs; dev_nr++)
+		submit_stripe_bio(bioc, bio, dev_nr, dev_nr < total_devs - 1);
+out_dec:
 	btrfs_bio_counter_dec(fs_info);
-	return BLK_STS_OK;
+	return errno_to_blk_status(ret);
 }
 
 static bool dev_args_match_fs_devices(const struct btrfs_dev_lookup_args *args,
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 10/10] btrfs: do not allocate a btrfs_bio for low-level bios
  2022-04-25  7:54 cleanup btrfs bio handling, part 2 Christoph Hellwig
                   ` (8 preceding siblings ...)
  2022-04-25  7:54 ` [PATCH 09/10] btrfs: refactor btrfs_map_bio Christoph Hellwig
@ 2022-04-25  7:54 ` Christoph Hellwig
  2022-04-25  9:01   ` Qu Wenruo
  9 siblings, 1 reply; 32+ messages in thread
From: Christoph Hellwig @ 2022-04-25  7:54 UTC (permalink / raw)
  To: Josef Bacik, David Sterba, Qu Wenruo; +Cc: Naohiro Aota, linux-btrfs

The bios submitted from btrfs_map_bio don't really interact with the
rest of btrfs and the only btrfs_bio member actually used in the
low-level bios is the pointer to the btrfs_io_contex used for endio
handler.

Use a union in struct btrfs_io_stripe that allows the endio handler to
find the btrfs_io_context and remove the spurious ->device assignment
so that a plain fs_bio_set bio can be used for the low-level bios
allocated inside btrfs_map_bio.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/btrfs/extent_io.c | 13 -------------
 fs/btrfs/extent_io.h |  1 -
 fs/btrfs/volumes.c   | 20 ++++++++++----------
 fs/btrfs/volumes.h   |  5 ++++-
 4 files changed, 14 insertions(+), 25 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index a14ed9b9dc2d0..37f4eee418219 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3209,19 +3209,6 @@ struct bio *btrfs_bio_alloc(unsigned int nr_iovecs)
 	return bio;
 }
 
-struct bio *btrfs_bio_clone(struct block_device *bdev, struct bio *bio)
-{
-	struct btrfs_bio *bbio;
-	struct bio *new;
-
-	/* Bio allocation backed by a bioset does not fail */
-	new = bio_alloc_clone(bdev, bio, GFP_NOFS, &btrfs_bioset);
-	bbio = btrfs_bio(new);
-	btrfs_bio_init(bbio);
-	bbio->iter = bio->bi_iter;
-	return new;
-}
-
 struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size)
 {
 	struct bio *bio;
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index b390ec79f9a86..3078e90be3a99 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -265,7 +265,6 @@ void extent_clear_unlock_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
 
 int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array);
 struct bio *btrfs_bio_alloc(unsigned int nr_iovecs);
-struct bio *btrfs_bio_clone(struct block_device *bdev, struct bio *bio);
 struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size);
 
 void end_extent_writepage(struct page *page, int err, u64 start, u64 end);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index d54aacb4f05f2..c621bd631450a 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6666,23 +6666,21 @@ static void btrfs_end_bioc(struct btrfs_io_context *bioc, bool async)
 
 static void btrfs_end_bio(struct bio *bio)
 {
-	struct btrfs_io_context *bioc = bio->bi_private;
+	struct btrfs_io_stripe *stripe = bio->bi_private;
+	struct btrfs_io_context *bioc = stripe->bioc;
 
 	if (bio->bi_status) {
 		atomic_inc(&bioc->error);
 		if (bio->bi_status == BLK_STS_IOERR ||
 		    bio->bi_status == BLK_STS_TARGET) {
-			struct btrfs_device *dev = btrfs_bio(bio)->device;
-
-			ASSERT(dev->bdev);
 			if (btrfs_op(bio) == BTRFS_MAP_WRITE)
-				btrfs_dev_stat_inc_and_print(dev,
+				btrfs_dev_stat_inc_and_print(stripe->dev,
 						BTRFS_DEV_STAT_WRITE_ERRS);
 			else if (!(bio->bi_opf & REQ_RAHEAD))
-				btrfs_dev_stat_inc_and_print(dev,
+				btrfs_dev_stat_inc_and_print(stripe->dev,
 						BTRFS_DEV_STAT_READ_ERRS);
 			if (bio->bi_opf & REQ_PREFLUSH)
-				btrfs_dev_stat_inc_and_print(dev,
+				btrfs_dev_stat_inc_and_print(stripe->dev,
 						BTRFS_DEV_STAT_FLUSH_ERRS);
 		}
 	}
@@ -6714,14 +6712,16 @@ static void submit_stripe_bio(struct btrfs_io_context *bioc,
 	}
 
 	if (clone) {
-		bio = btrfs_bio_clone(dev->bdev, orig_bio);
+		bio = bio_alloc_clone(dev->bdev, orig_bio, GFP_NOFS,
+				      &fs_bio_set);
 	} else {
 		bio = orig_bio;
 		bio_set_dev(bio, dev->bdev);
+		btrfs_bio(bio)->device = dev;
 	}
 
-	bio->bi_private = bioc;
-	btrfs_bio(bio)->device = dev;
+	bioc->stripes[dev_nr].bioc = bioc;
+	bio->bi_private = &bioc->stripes[dev_nr];
 	bio->bi_end_io = btrfs_end_bio;
 	bio->bi_iter.bi_sector = physical >> 9;
 	/*
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 28e28b7c48649..825e44c82f2b0 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -396,7 +396,10 @@ static inline void btrfs_bio_free_csum(struct btrfs_bio *bbio)
 
 struct btrfs_io_stripe {
 	struct btrfs_device *dev;
-	u64 physical;
+	union {
+		u64 physical;			/* block mapping */
+		struct btrfs_io_context *bioc;	/* for the endio handler */
+	};
 	u64 length; /* only used for discard mappings */
 };
 
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 32+ messages in thread

* Re: [PATCH 02/10] btrfs: cleanup btrfs_submit_dio_bio
  2022-04-25  7:54 ` [PATCH 02/10] btrfs: cleanup btrfs_submit_dio_bio Christoph Hellwig
@ 2022-04-25  8:45   ` Qu Wenruo
  2022-04-26  7:21   ` Johannes Thumshirn
  1 sibling, 0 replies; 32+ messages in thread
From: Qu Wenruo @ 2022-04-25  8:45 UTC (permalink / raw)
  To: Christoph Hellwig, Josef Bacik, David Sterba; +Cc: Naohiro Aota, linux-btrfs



On 2022/4/25 15:54, Christoph Hellwig wrote:
> Remove the pointless goto just to return err and clean up the code flow
> to be a little more straight forward.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Reviewed-by: Qu Wenruo <wqu@suse.com>

Thanks,
Qu

> ---
>   fs/btrfs/inode.c | 23 +++++++++--------------
>   1 file changed, 9 insertions(+), 14 deletions(-)
> 
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index ef3bee1cbc6db..b188f724eff2d 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -7899,31 +7899,28 @@ static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio,
>   	bool write = btrfs_op(bio) == BTRFS_MAP_WRITE;
>   	blk_status_t ret;
>   
> -	/* Check btrfs_submit_bio_hook() for rules about async submit. */
> -	if (async_submit)
> -		async_submit = !atomic_read(&BTRFS_I(inode)->sync_writers);
> -
>   	if (!write) {
>   		ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA);
>   		if (ret)
> -			goto err;
> +			return ret;
>   	}
>   
>   	if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
>   		goto map;
>   
> -	if (write && async_submit) {
> -		ret = btrfs_wq_submit_bio(inode, bio, 0, 0, file_offset,
> -					  btrfs_submit_bio_start_direct_io);
> -		goto err;
> -	} else if (write) {
> +	if (write) {
> +		/* check btrfs_submit_data_bio() for async submit rules */
> +		if (async_submit && !atomic_read(&BTRFS_I(inode)->sync_writers))
> +			return btrfs_wq_submit_bio(inode, bio, 0, 0,
> +					file_offset,
> +					btrfs_submit_bio_start_direct_io);
>   		/*
>   		 * If we aren't doing async submit, calculate the csum of the
>   		 * bio now.
>   		 */
>   		ret = btrfs_csum_one_bio(BTRFS_I(inode), bio, file_offset, false);
>   		if (ret)
> -			goto err;
> +			return ret;
>   	} else {
>   		u64 csum_offset;
>   
> @@ -7933,9 +7930,7 @@ static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio,
>   		btrfs_bio(bio)->csum = dip->csums + csum_offset;
>   	}
>   map:
> -	ret = btrfs_map_bio(fs_info, bio, 0);
> -err:
> -	return ret;
> +	return btrfs_map_bio(fs_info, bio, 0);
>   }
>   
>   /*


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 09/10] btrfs: refactor btrfs_map_bio
  2022-04-25  7:54 ` [PATCH 09/10] btrfs: refactor btrfs_map_bio Christoph Hellwig
@ 2022-04-25  8:56   ` Qu Wenruo
  2022-04-25  9:17     ` Christoph Hellwig
  2022-04-26 13:24     ` Christoph Hellwig
  0 siblings, 2 replies; 32+ messages in thread
From: Qu Wenruo @ 2022-04-25  8:56 UTC (permalink / raw)
  To: Christoph Hellwig, Josef Bacik, David Sterba; +Cc: Naohiro Aota, linux-btrfs



On 2022/4/25 15:54, Christoph Hellwig wrote:
> Use a label for common cleanup, untangle the conditionals for parity
> RAID and move all per-stripe handling into submit_stripe_bio.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>   fs/btrfs/volumes.c | 96 +++++++++++++++++++++++-----------------------
>   1 file changed, 48 insertions(+), 48 deletions(-)
> 
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index 5f18e9105fe08..d54aacb4f05f2 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -6695,10 +6695,30 @@ static void btrfs_end_bio(struct bio *bio)
>   		btrfs_end_bioc(bioc, true);
>   }
>   
> -static void submit_stripe_bio(struct btrfs_io_context *bioc, struct bio *bio,
> -			      u64 physical, struct btrfs_device *dev)
> +static void submit_stripe_bio(struct btrfs_io_context *bioc,
> +		struct bio *orig_bio, int dev_nr, bool clone)
>   {
>   	struct btrfs_fs_info *fs_info = bioc->fs_info;
> +	struct btrfs_device *dev = bioc->stripes[dev_nr].dev;
> +	u64 physical = bioc->stripes[dev_nr].physical;
> +	struct bio *bio;
> +
> +	if (!dev || !dev->bdev ||
> +	    test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) ||
> +	    (btrfs_op(orig_bio) == BTRFS_MAP_WRITE &&
> +	     !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))) {
> +		atomic_inc(&bioc->error);
> +		if (atomic_dec_and_test(&bioc->stripes_pending))
> +			btrfs_end_bioc(bioc, false);

The bioc is allocated by btrfs_map_block(), but freed inside a helper.

This makes the allocation and free happening at different levels, not 
sure if it's a good idea.

> +		return;
> +	}
> +
> +	if (clone) {
> +		bio = btrfs_bio_clone(dev->bdev, orig_bio);
> +	} else {
> +		bio = orig_bio;
> +		bio_set_dev(bio, dev->bdev);
> +	}
>   
>   	bio->bi_private = bioc;
>   	btrfs_bio(bio)->device = dev;
> @@ -6733,46 +6753,44 @@ static void submit_stripe_bio(struct btrfs_io_context *bioc, struct bio *bio,
>   blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
>   			   int mirror_num)
>   {
> -	struct btrfs_device *dev;
> -	struct bio *first_bio = bio;
>   	u64 logical = bio->bi_iter.bi_sector << 9;
> -	u64 length = 0;
> -	u64 map_length;
> +	u64 length = bio->bi_iter.bi_size;
> +	u64 map_length = length;
>   	int ret;
>   	int dev_nr;
>   	int total_devs;
>   	struct btrfs_io_context *bioc = NULL;
>   
> -	length = bio->bi_iter.bi_size;
> -	map_length = length;
> -
>   	btrfs_bio_counter_inc_blocked(fs_info);
>   	ret = __btrfs_map_block(fs_info, btrfs_op(bio), logical,
>   				&map_length, &bioc, mirror_num, 1);
> -	if (ret) {
> -		btrfs_bio_counter_dec(fs_info);
> -		return errno_to_blk_status(ret);
> -	}
> +	if (ret)
> +		goto out_dec;
>   
>   	total_devs = bioc->num_stripes;
> -	bioc->orig_bio = first_bio;
> -	bioc->private = first_bio->bi_private;
> -	bioc->end_io = first_bio->bi_end_io;
> -	atomic_set(&bioc->stripes_pending, bioc->num_stripes);
> -
> -	if ((bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) &&
> -	    ((btrfs_op(bio) == BTRFS_MAP_WRITE) || (mirror_num > 1))) {
> -		/* In this case, map_length has been set to the length of
> -		   a single stripe; not the whole write */
> +	bioc->orig_bio = bio;
> +	bioc->private = bio->bi_private;
> +	bioc->end_io = bio->bi_end_io;
> +	atomic_set(&bioc->stripes_pending, total_devs);
> +
> +	if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
> +		/*
> +		 * In this case, map_length has been set to the length of a
> +		 * single stripe; not the whole write.
> +		 */
>   		if (btrfs_op(bio) == BTRFS_MAP_WRITE) {
>   			ret = raid56_parity_write(bio, bioc, map_length);
> -		} else {
> +			goto out_dec;
> +		}
> +		if (mirror_num > 1) {
>   			ret = raid56_parity_recover(bio, bioc, map_length,
>   						    mirror_num, 1);
> +			goto out_dec;
>   		}
> -
> -		btrfs_bio_counter_dec(fs_info);
> -		return errno_to_blk_status(ret);
> +		/*
> +		 * Normal reads do not require special parity read handling, so
> +		 * fall through here.
> +		 */

I doubt this fallback would improve the readability.

But you're also right, the original check condition for the RAID56 
branch is also not ideal.

>   	}
>   
>   	if (map_length < length) {
> @@ -6782,29 +6800,11 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
>   		BUG();
>   	}
>   
> -	for (dev_nr = 0; dev_nr < total_devs; dev_nr++) {
> -		dev = bioc->stripes[dev_nr].dev;
> -		if (!dev || !dev->bdev || test_bit(BTRFS_DEV_STATE_MISSING,
> -						   &dev->dev_state) ||
> -		    (btrfs_op(first_bio) == BTRFS_MAP_WRITE &&
> -		    !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))) {

Maybe just make the complex if () condition into a helper?

In fact I see some other locations uses similar complex expressions to 
check it's a missing device.

Thus it should help a lot of call sites.

Thanks,
Qu

> -			atomic_inc(&bioc->error);
> -			if (atomic_dec_and_test(&bioc->stripes_pending))
> -				btrfs_end_bioc(bioc, false);
> -			continue;
> -		}
> -
> -		if (dev_nr < total_devs - 1) {
> -			bio = btrfs_bio_clone(dev->bdev, first_bio);
> -		} else {
> -			bio = first_bio;
> -			bio_set_dev(bio, dev->bdev);
> -		}
> -
> -		submit_stripe_bio(bioc, bio, bioc->stripes[dev_nr].physical, dev);
> -	}
> +	for (dev_nr = 0; dev_nr < total_devs; dev_nr++)
> +		submit_stripe_bio(bioc, bio, dev_nr, dev_nr < total_devs - 1);
> +out_dec:
>   	btrfs_bio_counter_dec(fs_info);
> -	return BLK_STS_OK;
> +	return errno_to_blk_status(ret);
>   }
>   
>   static bool dev_args_match_fs_devices(const struct btrfs_dev_lookup_args *args,


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 10/10] btrfs: do not allocate a btrfs_bio for low-level bios
  2022-04-25  7:54 ` [PATCH 10/10] btrfs: do not allocate a btrfs_bio for low-level bios Christoph Hellwig
@ 2022-04-25  9:01   ` Qu Wenruo
  2022-04-25  9:18     ` Christoph Hellwig
  0 siblings, 1 reply; 32+ messages in thread
From: Qu Wenruo @ 2022-04-25  9:01 UTC (permalink / raw)
  To: Christoph Hellwig, Josef Bacik, David Sterba, Qu Wenruo
  Cc: Naohiro Aota, linux-btrfs



On 2022/4/25 15:54, Christoph Hellwig wrote:
> The bios submitted from btrfs_map_bio don't really interact with the
> rest of btrfs and the only btrfs_bio member actually used in the
> low-level bios is the pointer to the btrfs_io_contex used for endio
> handler.
>
> Use a union in struct btrfs_io_stripe that allows the endio handler to
> find the btrfs_io_context and remove the spurious ->device assignment
> so that a plain fs_bio_set bio can be used for the low-level bios
> allocated inside btrfs_map_bio.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>   fs/btrfs/extent_io.c | 13 -------------
>   fs/btrfs/extent_io.h |  1 -
>   fs/btrfs/volumes.c   | 20 ++++++++++----------
>   fs/btrfs/volumes.h   |  5 ++++-
>   4 files changed, 14 insertions(+), 25 deletions(-)
>
> diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
> index a14ed9b9dc2d0..37f4eee418219 100644
> --- a/fs/btrfs/extent_io.c
> +++ b/fs/btrfs/extent_io.c
> @@ -3209,19 +3209,6 @@ struct bio *btrfs_bio_alloc(unsigned int nr_iovecs)
>   	return bio;
>   }
>
> -struct bio *btrfs_bio_clone(struct block_device *bdev, struct bio *bio)
> -{
> -	struct btrfs_bio *bbio;
> -	struct bio *new;
> -
> -	/* Bio allocation backed by a bioset does not fail */
> -	new = bio_alloc_clone(bdev, bio, GFP_NOFS, &btrfs_bioset);
> -	bbio = btrfs_bio(new);
> -	btrfs_bio_init(bbio);
> -	bbio->iter = bio->bi_iter;
> -	return new;
> -}
> -
>   struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size)
>   {
>   	struct bio *bio;
> diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
> index b390ec79f9a86..3078e90be3a99 100644
> --- a/fs/btrfs/extent_io.h
> +++ b/fs/btrfs/extent_io.h
> @@ -265,7 +265,6 @@ void extent_clear_unlock_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
>
>   int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array);
>   struct bio *btrfs_bio_alloc(unsigned int nr_iovecs);
> -struct bio *btrfs_bio_clone(struct block_device *bdev, struct bio *bio);
>   struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size);
>
>   void end_extent_writepage(struct page *page, int err, u64 start, u64 end);
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index d54aacb4f05f2..c621bd631450a 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -6666,23 +6666,21 @@ static void btrfs_end_bioc(struct btrfs_io_context *bioc, bool async)
>
>   static void btrfs_end_bio(struct bio *bio)
>   {
> -	struct btrfs_io_context *bioc = bio->bi_private;
> +	struct btrfs_io_stripe *stripe = bio->bi_private;
> +	struct btrfs_io_context *bioc = stripe->bioc;
>
>   	if (bio->bi_status) {
>   		atomic_inc(&bioc->error);
>   		if (bio->bi_status == BLK_STS_IOERR ||
>   		    bio->bi_status == BLK_STS_TARGET) {
> -			struct btrfs_device *dev = btrfs_bio(bio)->device;
> -
> -			ASSERT(dev->bdev);
>   			if (btrfs_op(bio) == BTRFS_MAP_WRITE)
> -				btrfs_dev_stat_inc_and_print(dev,
> +				btrfs_dev_stat_inc_and_print(stripe->dev,
>   						BTRFS_DEV_STAT_WRITE_ERRS);
>   			else if (!(bio->bi_opf & REQ_RAHEAD))
> -				btrfs_dev_stat_inc_and_print(dev,
> +				btrfs_dev_stat_inc_and_print(stripe->dev,
>   						BTRFS_DEV_STAT_READ_ERRS);
>   			if (bio->bi_opf & REQ_PREFLUSH)
> -				btrfs_dev_stat_inc_and_print(dev,
> +				btrfs_dev_stat_inc_and_print(stripe->dev,
>   						BTRFS_DEV_STAT_FLUSH_ERRS);
>   		}
>   	}
> @@ -6714,14 +6712,16 @@ static void submit_stripe_bio(struct btrfs_io_context *bioc,
>   	}
>
>   	if (clone) {
> -		bio = btrfs_bio_clone(dev->bdev, orig_bio);
> +		bio = bio_alloc_clone(dev->bdev, orig_bio, GFP_NOFS,
> +				      &fs_bio_set);
>   	} else {
>   		bio = orig_bio;
>   		bio_set_dev(bio, dev->bdev);
> +		btrfs_bio(bio)->device = dev;
>   	}
>
> -	bio->bi_private = bioc;
> -	btrfs_bio(bio)->device = dev;
> +	bioc->stripes[dev_nr].bioc = bioc;
> +	bio->bi_private = &bioc->stripes[dev_nr];
>   	bio->bi_end_io = btrfs_end_bio;
>   	bio->bi_iter.bi_sector = physical >> 9;
>   	/*
> diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
> index 28e28b7c48649..825e44c82f2b0 100644
> --- a/fs/btrfs/volumes.h
> +++ b/fs/btrfs/volumes.h
> @@ -396,7 +396,10 @@ static inline void btrfs_bio_free_csum(struct btrfs_bio *bbio)
>
>   struct btrfs_io_stripe {
>   	struct btrfs_device *dev;
> -	u64 physical;
> +	union {
> +		u64 physical;			/* block mapping */
> +		struct btrfs_io_context *bioc;	/* for the endio handler */
> +	};
>   	u64 length; /* only used for discard mappings */

Isn't @length a better candidate?

Since it's only used for discard.

Thanks,
Qu

>   };
>

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 07/10] btrfs: centralize setting REQ_META
  2022-04-25  7:54 ` [PATCH 07/10] btrfs: centralize setting REQ_META Christoph Hellwig
@ 2022-04-25  9:06   ` Qu Wenruo
  0 siblings, 0 replies; 32+ messages in thread
From: Qu Wenruo @ 2022-04-25  9:06 UTC (permalink / raw)
  To: Christoph Hellwig, Josef Bacik, David Sterba, Qu Wenruo
  Cc: Naohiro Aota, linux-btrfs



On 2022/4/25 15:54, Christoph Hellwig wrote:
> Set REQ_META in btrfs_submit_metadata_bio instead of the various callers.
> We'll start relying on this flag inside of btrfs in a bit, and this
> ensures it is always set correctly.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Reviewed-by: Qu Wenruo <wqu@suse.com>

I tried to iterate through all the corner cases in my head, it looks fine.

As we use inode to determine if it's metadata, which is fine.
The only metadata that doesn't go through this is the super block, and
has its own function handling it.

Thanks,
Qu
> ---
>   fs/btrfs/disk-io.c   | 2 ++
>   fs/btrfs/extent_io.c | 8 ++++----
>   2 files changed, 6 insertions(+), 4 deletions(-)
>
> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
> index 1e6ee7f1a375d..65e680895e628 100644
> --- a/fs/btrfs/disk-io.c
> +++ b/fs/btrfs/disk-io.c
> @@ -915,6 +915,8 @@ void btrfs_submit_metadata_bio(struct inode *inode, struct bio *bio, int mirror_
>   	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
>   	blk_status_t ret;
>
> +	bio->bi_opf |= REQ_META;
> +
>   	if (btrfs_op(bio) != BTRFS_MAP_WRITE) {
>   		/*
>   		 * called for a read, do the setup so that checksum validation
> diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
> index 80b4482c477c6..a14ed9b9dc2d0 100644
> --- a/fs/btrfs/extent_io.c
> +++ b/fs/btrfs/extent_io.c
> @@ -4589,7 +4589,7 @@ static int write_one_subpage_eb(struct extent_buffer *eb,
>   {
>   	struct btrfs_fs_info *fs_info = eb->fs_info;
>   	struct page *page = eb->pages[0];
> -	unsigned int write_flags = wbc_to_write_flags(wbc) | REQ_META;
> +	unsigned int write_flags = wbc_to_write_flags(wbc);
>   	bool no_dirty_ebs = false;
>   	int ret;
>
> @@ -4634,7 +4634,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
>   {
>   	u64 disk_bytenr = eb->start;
>   	int i, num_pages;
> -	unsigned int write_flags = wbc_to_write_flags(wbc) | REQ_META;
> +	unsigned int write_flags = wbc_to_write_flags(wbc);
>   	int ret = 0;
>
>   	prepare_eb_write(eb);
> @@ -6645,7 +6645,7 @@ static int read_extent_buffer_subpage(struct extent_buffer *eb, int wait,
>   	btrfs_subpage_clear_error(fs_info, page, eb->start, eb->len);
>
>   	btrfs_subpage_start_reader(fs_info, page, eb->start, eb->len);
> -	ret = submit_extent_page(REQ_OP_READ | REQ_META, NULL, &bio_ctrl,
> +	ret = submit_extent_page(REQ_OP_READ, NULL, &bio_ctrl,
>   				 page, eb->start, eb->len,
>   				 eb->start - page_offset(page),
>   				 end_bio_extent_readpage, mirror_num, 0,
> @@ -6752,7 +6752,7 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num)
>   			}
>
>   			ClearPageError(page);
> -			err = submit_extent_page(REQ_OP_READ | REQ_META, NULL,
> +			err = submit_extent_page(REQ_OP_READ, NULL,
>   					 &bio_ctrl, page, page_offset(page),
>   					 PAGE_SIZE, 0, end_bio_extent_readpage,
>   					 mirror_num, 0, false);

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 03/10] btrfs: split btrfs_submit_data_bio
  2022-04-25  7:54 ` [PATCH 03/10] btrfs: split btrfs_submit_data_bio Christoph Hellwig
@ 2022-04-25  9:11   ` Qu Wenruo
  2022-04-25  9:19     ` Christoph Hellwig
  0 siblings, 1 reply; 32+ messages in thread
From: Qu Wenruo @ 2022-04-25  9:11 UTC (permalink / raw)
  To: Christoph Hellwig, Josef Bacik, David Sterba, Qu Wenruo
  Cc: Naohiro Aota, linux-btrfs



On 2022/4/25 15:54, Christoph Hellwig wrote:
> Split btrfs_submit_data_bio into one helper for reads and one for writes.

If we're splitting the bio mapping, wouldn't it be better to split by
read/write first, then by data/meta?

Especially for all read bios, we use workqueue to defer to a less strict
context, which is unrelated to data/metadata.

Thanks,
Qu
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>   fs/btrfs/ctree.h     |   6 +-
>   fs/btrfs/extent_io.c |  12 ++--
>   fs/btrfs/inode.c     | 131 ++++++++++++++++++++-----------------------
>   3 files changed, 73 insertions(+), 76 deletions(-)
>
> diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
> index ec8487e119949..ab9a0cfed7bb0 100644
> --- a/fs/btrfs/ctree.h
> +++ b/fs/btrfs/ctree.h
> @@ -3250,8 +3250,10 @@ void btrfs_inode_safe_disk_i_size_write(struct btrfs_inode *inode, u64 new_i_siz
>   u64 btrfs_file_extent_end(const struct btrfs_path *path);
>
>   /* inode.c */
> -void btrfs_submit_data_bio(struct inode *inode, struct bio *bio,
> -			   int mirror_num, unsigned long bio_flags);
> +void btrfs_submit_data_write_bio(struct inode *inode, struct bio *bio,
> +		int mirror_num, unsigned long bio_flags);
> +void btrfs_submit_data_read_bio(struct inode *inode, struct bio *bio,
> +		int mirror_num, unsigned long bio_flags);
>   unsigned int btrfs_verify_data_csum(struct btrfs_bio *bbio,
>   				    u32 bio_offset, struct page *page,
>   				    u64 start, u64 end);
> diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
> index f9d6dd310c42b..80b4482c477c6 100644
> --- a/fs/btrfs/extent_io.c
> +++ b/fs/btrfs/extent_io.c
> @@ -186,11 +186,15 @@ static void submit_one_bio(struct bio *bio, int mirror_num, unsigned long bio_fl
>   	/* Caller should ensure the bio has at least some range added */
>   	ASSERT(bio->bi_iter.bi_size);
>
> -	if (is_data_inode(tree->private_data))
> -		btrfs_submit_data_bio(tree->private_data, bio, mirror_num,
> +	if (!is_data_inode(tree->private_data))
> +		btrfs_submit_metadata_bio(tree->private_data, bio, mirror_num);
> +	else if (btrfs_op(bio) == BTRFS_MAP_WRITE)
> +		btrfs_submit_data_write_bio(tree->private_data, bio, mirror_num,
>   					    bio_flags);
>   	else
> -		btrfs_submit_metadata_bio(tree->private_data, bio, mirror_num);
> +		btrfs_submit_data_read_bio(tree->private_data, bio, mirror_num,
> +					    bio_flags);
> +
>   	/*
>   	 * Above submission hooks will handle the error by ending the bio,
>   	 * which will do the cleanup properly.  So here we should not return
> @@ -2773,7 +2777,7 @@ static blk_status_t submit_data_read_repair(struct inode *inode,
>   		ret = btrfs_repair_one_sector(inode, failed_bio,
>   				bio_offset + offset,
>   				page, pgoff + offset, start + offset,
> -				failed_mirror, btrfs_submit_data_bio);
> +				failed_mirror, btrfs_submit_data_read_bio);
>   		if (!ret) {
>   			/*
>   			 * We have submitted the read repair, the page release
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index b188f724eff2d..4429d831793d5 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -2552,91 +2552,82 @@ static blk_status_t extract_ordered_extent(struct btrfs_inode *inode,
>   	return errno_to_blk_status(ret);
>   }
>
> -/*
> - * extent_io.c submission hook. This does the right thing for csum calculation
> - * on write, or reading the csums from the tree before a read.
> - *
> - * Rules about async/sync submit,
> - * a) read:				sync submit
> - *
> - * b) write without checksum:		sync submit
> - *
> - * c) write with checksum:
> - *    c-1) if bio is issued by fsync:	sync submit
> - *         (sync_writers != 0)
> - *
> - *    c-2) if root is reloc root:	sync submit
> - *         (only in case of buffered IO)
> - *
> - *    c-3) otherwise:			async submit
> - */
> -void btrfs_submit_data_bio(struct inode *inode, struct bio *bio,
> +void btrfs_submit_data_write_bio(struct inode *inode, struct bio *bio,
>   			   int mirror_num, unsigned long bio_flags)
>   {
>   	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
> -	struct btrfs_root *root = BTRFS_I(inode)->root;
> -	enum btrfs_wq_endio_type metadata = BTRFS_WQ_ENDIO_DATA;
> -	blk_status_t ret = 0;
> -	int skip_sum;
> -	int async = !atomic_read(&BTRFS_I(inode)->sync_writers);
> -
> -	skip_sum = (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) ||
> -		test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state);
> -
> -	if (btrfs_is_free_space_inode(BTRFS_I(inode)))
> -		metadata = BTRFS_WQ_ENDIO_FREE_SPACE;
> +	struct btrfs_inode *bi = BTRFS_I(inode);
> +	blk_status_t ret;
>
>   	if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
> -		struct page *page = bio_first_bvec_all(bio)->bv_page;
> -		loff_t file_offset = page_offset(page);
> -
> -		ret = extract_ordered_extent(BTRFS_I(inode), bio, file_offset);
> +		ret = extract_ordered_extent(bi, bio,
> +				page_offset(bio_first_bvec_all(bio)->bv_page));
>   		if (ret)
>   			goto out;
>   	}
>
> -	if (btrfs_op(bio) != BTRFS_MAP_WRITE) {
> -		ret = btrfs_bio_wq_end_io(fs_info, bio, metadata);
> -		if (ret)
> -			goto out;
> -
> -		if (bio_flags & EXTENT_BIO_COMPRESSED) {
> -			/*
> -			 * btrfs_submit_compressed_read will handle completing
> -			 * the bio if there were any errors, so just return
> -			 * here.
> -			 */
> -			btrfs_submit_compressed_read(inode, bio, mirror_num,
> -						     bio_flags);
> -			return;
> -		} else {
> -			/*
> -			 * Lookup bio sums does extra checks around whether we
> -			 * need to csum or not, which is why we ignore skip_sum
> -			 * here.
> -			 */
> -			ret = btrfs_lookup_bio_sums(inode, bio, NULL);
> +	/*
> +	 * Rules for async/sync submit:
> +	 *   a) write without checksum:			sync submit
> +	 *   b) write with checksum:
> +	 *      b-1) if bio is issued by fsync:		sync submit
> +	 *           (sync_writers != 0)
> +	 *      b-2) if root is reloc root:		sync submit
> +	 *           (only in case of buffered IO)
> +	 *      b-3) otherwise:				async submit
> +	 */
> +	if (!(bi->flags & BTRFS_INODE_NODATASUM) &&
> +	    !test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state)) {
> +		if (atomic_read(&bi->sync_writers)) {
> +			ret = btrfs_csum_one_bio(bi, bio, (u64)-1, false);
>   			if (ret)
>   				goto out;
> +		} else if (btrfs_is_data_reloc_root(bi->root)) {
> +			; /* csum items have already been cloned */
> +		} else {
> +			ret = btrfs_wq_submit_bio(inode, bio,
> +					mirror_num, bio_flags, 0,
> +					btrfs_submit_bio_start);
> +			goto out;
>   		}
> -		goto mapit;
> -	} else if (async && !skip_sum) {
> -		/* csum items have already been cloned */
> -		if (btrfs_is_data_reloc_root(root))
> -			goto mapit;
> -		/* we're doing a write, do the async checksumming */
> -		ret = btrfs_wq_submit_bio(inode, bio, mirror_num, bio_flags,
> -					  0, btrfs_submit_bio_start);
> +	}
> +	ret = btrfs_map_bio(fs_info, bio, mirror_num);
> +out:
> +	if (ret) {
> +		bio->bi_status = ret;
> +		bio_endio(bio);
> +	}
> +}
> +
> +void btrfs_submit_data_read_bio(struct inode *inode, struct bio *bio,
> +			   int mirror_num, unsigned long bio_flags)
> +{
> +	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
> +	blk_status_t ret;
> +
> +	ret = btrfs_bio_wq_end_io(fs_info, bio,
> +			btrfs_is_free_space_inode(BTRFS_I(inode)) ?
> +			BTRFS_WQ_ENDIO_FREE_SPACE : BTRFS_WQ_ENDIO_DATA);
> +	if (ret)
>   		goto out;
> -	} else if (!skip_sum) {
> -		ret = btrfs_csum_one_bio(BTRFS_I(inode), bio, (u64)-1, false);
> -		if (ret)
> -			goto out;
> +
> +	if (bio_flags & EXTENT_BIO_COMPRESSED) {
> +		/*
> +		 * btrfs_submit_compressed_read will handle completing the bio
> +		 * if there were any errors, so just return here.
> +		 */
> +		btrfs_submit_compressed_read(inode, bio, mirror_num, bio_flags);
> +		return;
>   	}
>
> -mapit:
> +	/*
> +	 * Lookup bio sums does extra checks around whether we need to csum or
> +	 * not, which is why we ignore skip_sum here.
> +	 */
> +	ret = btrfs_lookup_bio_sums(inode, bio, NULL);
> +	if (ret)
> +		goto out;
>   	ret = btrfs_map_bio(fs_info, bio, mirror_num);
> -
>   out:
>   	if (ret) {
>   		bio->bi_status = ret;
> @@ -7909,7 +7900,7 @@ static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio,
>   		goto map;
>
>   	if (write) {
> -		/* check btrfs_submit_data_bio() for async submit rules */
> +		/* check btrfs_submit_data_write_bio() for async submit rules */
>   		if (async_submit && !atomic_read(&BTRFS_I(inode)->sync_writers))
>   			return btrfs_wq_submit_bio(inode, bio, 0, 0,
>   					file_offset,

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 09/10] btrfs: refactor btrfs_map_bio
  2022-04-25  8:56   ` Qu Wenruo
@ 2022-04-25  9:17     ` Christoph Hellwig
  2022-04-26 13:24     ` Christoph Hellwig
  1 sibling, 0 replies; 32+ messages in thread
From: Christoph Hellwig @ 2022-04-25  9:17 UTC (permalink / raw)
  To: Qu Wenruo
  Cc: Christoph Hellwig, Josef Bacik, David Sterba, Naohiro Aota, linux-btrfs

On Mon, Apr 25, 2022 at 04:56:21PM +0800, Qu Wenruo wrote:
>> +	if (!dev || !dev->bdev ||
>> +	    test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) ||
>> +	    (btrfs_op(orig_bio) == BTRFS_MAP_WRITE &&
>> +	     !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))) {
>> +		atomic_inc(&bioc->error);
>> +		if (atomic_dec_and_test(&bioc->stripes_pending))
>> +			btrfs_end_bioc(bioc, false);
>
> The bioc is allocated by btrfs_map_block(), but freed inside a helper.
>
> This makes the allocation and free happening at different levels, not sure 
> if it's a good idea.

It is always freed by the end_io handler, this helper just decrements
the pending count and potentially invokes the orig bio end I/O handling
if we never made it to a bio submission for the pending mirror.

> I doubt this fallback would improve the readability.
>
> But you're also right, the original check condition for the RAID56 branch 
> is also not ideal.

I think it helps.  But the next series will do away with this anyway.

>>   	}
>>   -	for (dev_nr = 0; dev_nr < total_devs; dev_nr++) {
>> -		dev = bioc->stripes[dev_nr].dev;
>> -		if (!dev || !dev->bdev || test_bit(BTRFS_DEV_STATE_MISSING,
>> -						   &dev->dev_state) ||
>> -		    (btrfs_op(first_bio) == BTRFS_MAP_WRITE &&
>> -		    !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))) {
>
> Maybe just make the complex if () condition into a helper?
>
> In fact I see some other locations uses similar complex expressions to 
> check it's a missing device.
>
> Thus it should help a lot of call sites.

I'll see if a helper could be useful here.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 10/10] btrfs: do not allocate a btrfs_bio for low-level bios
  2022-04-25  9:01   ` Qu Wenruo
@ 2022-04-25  9:18     ` Christoph Hellwig
  0 siblings, 0 replies; 32+ messages in thread
From: Christoph Hellwig @ 2022-04-25  9:18 UTC (permalink / raw)
  To: Qu Wenruo
  Cc: Christoph Hellwig, Josef Bacik, David Sterba, Qu Wenruo,
	Naohiro Aota, linux-btrfs

On Mon, Apr 25, 2022 at 05:01:11PM +0800, Qu Wenruo wrote:
>>   struct btrfs_io_stripe {
>>   	struct btrfs_device *dev;
>> -	u64 physical;
>> +	union {
>> +		u64 physical;			/* block mapping */
>> +		struct btrfs_io_context *bioc;	/* for the endio handler */
>> +	};
>>   	u64 length; /* only used for discard mappings */
>
> Isn't @length a better candidate?
>
> Since it's only used for discard.

I have anoter patch to be sumitted that removes length entirely
by not using btrfs_io_stripe for discards.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 03/10] btrfs: split btrfs_submit_data_bio
  2022-04-25  9:11   ` Qu Wenruo
@ 2022-04-25  9:19     ` Christoph Hellwig
  2022-04-25  9:37       ` Qu Wenruo
  0 siblings, 1 reply; 32+ messages in thread
From: Christoph Hellwig @ 2022-04-25  9:19 UTC (permalink / raw)
  To: Qu Wenruo
  Cc: Christoph Hellwig, Josef Bacik, David Sterba, Qu Wenruo,
	Naohiro Aota, linux-btrfs

On Mon, Apr 25, 2022 at 05:11:15PM +0800, Qu Wenruo wrote:
>
>
> On 2022/4/25 15:54, Christoph Hellwig wrote:
>> Split btrfs_submit_data_bio into one helper for reads and one for writes.
>
> If we're splitting the bio mapping, wouldn't it be better to split by
> read/write first, then by data/meta?
>
> Especially for all read bios, we use workqueue to defer to a less strict
> context, which is unrelated to data/metadata.

Splitting the read vs write handling entirely and not allocating a
btrfs_bio for writes will be the next series after this one.  You're
getting ahead of me :)

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 03/10] btrfs: split btrfs_submit_data_bio
  2022-04-25  9:19     ` Christoph Hellwig
@ 2022-04-25  9:37       ` Qu Wenruo
  2022-04-25 11:09         ` Christoph Hellwig
  0 siblings, 1 reply; 32+ messages in thread
From: Qu Wenruo @ 2022-04-25  9:37 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Josef Bacik, David Sterba, Qu Wenruo, Naohiro Aota, linux-btrfs



On 2022/4/25 17:19, Christoph Hellwig wrote:
> On Mon, Apr 25, 2022 at 05:11:15PM +0800, Qu Wenruo wrote:
>>
>>
>> On 2022/4/25 15:54, Christoph Hellwig wrote:
>>> Split btrfs_submit_data_bio into one helper for reads and one for writes.
>>
>> If we're splitting the bio mapping, wouldn't it be better to split by
>> read/write first, then by data/meta?
>>
>> Especially for all read bios, we use workqueue to defer to a less strict
>> context, which is unrelated to data/metadata.
>
> Splitting the read vs write handling entirely and not allocating a
> btrfs_bio for writes will be the next series after this one.  You're
> getting ahead of me :)

Oh, please don't completely get rid of btrfs_bio, even just for writes.

The btrfs_bio::iter is pretty important for us to grab the original
logical bytenr of a bio.
As bio::bi_iter can be modified by lower level (does dm modifies it
too?), or btrfs itself.

In fact, my incoming updated btrfs repair repair code heavily rely on
btrfs_bio::iter, both read and write, to grab the original logical
bytenr of the bio.

Thanks,
Qu

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 03/10] btrfs: split btrfs_submit_data_bio
  2022-04-25  9:37       ` Qu Wenruo
@ 2022-04-25 11:09         ` Christoph Hellwig
  2022-04-25 11:16           ` Qu Wenruo
  0 siblings, 1 reply; 32+ messages in thread
From: Christoph Hellwig @ 2022-04-25 11:09 UTC (permalink / raw)
  To: Qu Wenruo
  Cc: Christoph Hellwig, Josef Bacik, David Sterba, Qu Wenruo,
	Naohiro Aota, linux-btrfs

On Mon, Apr 25, 2022 at 05:37:40PM +0800, Qu Wenruo wrote:
> Oh, please don't completely get rid of btrfs_bio, even just for writes.
>
> The btrfs_bio::iter is pretty important for us to grab the original
> logical bytenr of a bio.
> As bio::bi_iter can be modified by lower level (does dm modifies it
> too?), or btrfs itself.
>
> In fact, my incoming updated btrfs repair repair code heavily rely on
> btrfs_bio::iter, both read and write, to grab the original logical
> bytenr of the bio.

Then it's doing the wrong thing.  I actually have a series to remove
it entirely.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 03/10] btrfs: split btrfs_submit_data_bio
  2022-04-25 11:09         ` Christoph Hellwig
@ 2022-04-25 11:16           ` Qu Wenruo
  2022-04-25 11:19             ` Christoph Hellwig
  0 siblings, 1 reply; 32+ messages in thread
From: Qu Wenruo @ 2022-04-25 11:16 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Josef Bacik, David Sterba, Qu Wenruo, Naohiro Aota, linux-btrfs



On 2022/4/25 19:09, Christoph Hellwig wrote:
> On Mon, Apr 25, 2022 at 05:37:40PM +0800, Qu Wenruo wrote:
>> Oh, please don't completely get rid of btrfs_bio, even just for writes.
>>
>> The btrfs_bio::iter is pretty important for us to grab the original
>> logical bytenr of a bio.
>> As bio::bi_iter can be modified by lower level (does dm modifies it
>> too?), or btrfs itself.
>>
>> In fact, my incoming updated btrfs repair repair code heavily rely on
>> btrfs_bio::iter, both read and write, to grab the original logical
>> bytenr of the bio.
>
> Then it's doing the wrong thing.  I actually have a series to remove
> it entirely.

I'm wondering how would you iterate the bvec of a cloned bio then.

Regular bio_for_each_segment_all() will just trigger warning on cloned bio.
If you go something like chained bio, then any error would mark the
whole range error, and in fact my repair work is going to make
read-repair work with chained bio, thus I have to directly iterate
cloned bio anyway.

Just bio_for_each_segment()? That bi_iter is no longer reliable, just
btrfs_map_block() can modify it.

Anyway, what I really need is just a proper way to:

- Iterate bvecs of a clone bio
- Grab the original logical bytenr from a bio

If you can do that with extra members, I'm fine with alternative ways.

Thanks,
Qu

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 03/10] btrfs: split btrfs_submit_data_bio
  2022-04-25 11:16           ` Qu Wenruo
@ 2022-04-25 11:19             ` Christoph Hellwig
  2022-04-25 11:31               ` Qu Wenruo
  0 siblings, 1 reply; 32+ messages in thread
From: Christoph Hellwig @ 2022-04-25 11:19 UTC (permalink / raw)
  To: Qu Wenruo
  Cc: Christoph Hellwig, Josef Bacik, David Sterba, Qu Wenruo,
	Naohiro Aota, linux-btrfs

On Mon, Apr 25, 2022 at 07:16:21PM +0800, Qu Wenruo wrote:
> I'm wondering how would you iterate the bvec of a cloned bio then.

We just stop doing that.  All iteration should be on the originally
constructed bio.  I've fixed the two places that are doing that right
in work in progress patches.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 03/10] btrfs: split btrfs_submit_data_bio
  2022-04-25 11:19             ` Christoph Hellwig
@ 2022-04-25 11:31               ` Qu Wenruo
  2022-04-25 11:34                 ` Christoph Hellwig
  0 siblings, 1 reply; 32+ messages in thread
From: Qu Wenruo @ 2022-04-25 11:31 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Josef Bacik, David Sterba, Qu Wenruo, Naohiro Aota, linux-btrfs



On 2022/4/25 19:19, Christoph Hellwig wrote:
> On Mon, Apr 25, 2022 at 07:16:21PM +0800, Qu Wenruo wrote:
>> I'm wondering how would you iterate the bvec of a cloned bio then.
>
> We just stop doing that.  All iteration should be on the originally
> constructed bio.  I've fixed the two places that are doing that right
> in work in progress patches.

Then it comes against the btrfs read time repair.

Currently we split bio to make sure we never need to split bio at
btrfs_map_bio() time.

But this is against common layer separation.

And we really want the ability to read a partially corrupted bio (some
part matches csum, some doesn't), no matter if the bio is cloned or not.

Especially, we already have cloned bio which needs repair (for dio).

Thanks,
Qu

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 03/10] btrfs: split btrfs_submit_data_bio
  2022-04-25 11:31               ` Qu Wenruo
@ 2022-04-25 11:34                 ` Christoph Hellwig
  2022-04-25 11:40                   ` Qu Wenruo
  0 siblings, 1 reply; 32+ messages in thread
From: Christoph Hellwig @ 2022-04-25 11:34 UTC (permalink / raw)
  To: Qu Wenruo
  Cc: Christoph Hellwig, Josef Bacik, David Sterba, Qu Wenruo,
	Naohiro Aota, linux-btrfs

On Mon, Apr 25, 2022 at 07:31:08PM +0800, Qu Wenruo wrote:
> Then it comes against the btrfs read time repair.
>
> Currently we split bio to make sure we never need to split bio at
> btrfs_map_bio() time.
>
> But this is against common layer separation.
>
> And we really want the ability to read a partially corrupted bio (some
> part matches csum, some doesn't), no matter if the bio is cloned or not.
>
> Especially, we already have cloned bio which needs repair (for dio).

I have a barely working version based on your patches to split the
bio in btrfs_bio_map that solves this problem.  But the next step
only removed the save iter for writes, where the only user is
index_one_bio.  And the fix for that is pretty trivial :)

---
From c8fe61748ebc583a7f57c8e5de79f92428e5717c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 25 Apr 2022 13:23:54 +0200
Subject: btrfs: stop looking at btrfs_bio->iter in index_one_bio

All the bios that index_one_bio operates on are the bios submitted by the
upper layer.  These are never resubmitted to an actual device by the
raid56 code, and thus the iter never changes from the initial state.
Thus we can always just use bi_iter directly as it will be the same as
the saved copy.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/btrfs/raid56.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 1a3c1a9b10d0b..8b40353bb89db 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -1218,9 +1218,6 @@ static void index_one_bio(struct btrfs_raid_bio *rbio, struct bio *bio)
 	u32 offset = (bio->bi_iter.bi_sector << SECTOR_SHIFT) -
 		     rbio->bioc->raid_map[0];
 
-	if (bio_flagged(bio, BIO_CLONED))
-		bio->bi_iter = btrfs_bio(bio)->iter;
-
 	bio_for_each_segment(bvec, bio, iter) {
 		u32 bvec_offset;
 
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 32+ messages in thread

* Re: [PATCH 03/10] btrfs: split btrfs_submit_data_bio
  2022-04-25 11:34                 ` Christoph Hellwig
@ 2022-04-25 11:40                   ` Qu Wenruo
  2022-04-25 11:43                     ` Qu Wenruo
  2022-04-25 17:17                     ` Christoph Hellwig
  0 siblings, 2 replies; 32+ messages in thread
From: Qu Wenruo @ 2022-04-25 11:40 UTC (permalink / raw)
  To: Christoph Hellwig, Qu Wenruo
  Cc: Josef Bacik, David Sterba, Naohiro Aota, linux-btrfs



On 2022/4/25 19:34, Christoph Hellwig wrote:
> On Mon, Apr 25, 2022 at 07:31:08PM +0800, Qu Wenruo wrote:
>> Then it comes against the btrfs read time repair.
>>
>> Currently we split bio to make sure we never need to split bio at
>> btrfs_map_bio() time.
>>
>> But this is against common layer separation.
>>
>> And we really want the ability to read a partially corrupted bio (some
>> part matches csum, some doesn't), no matter if the bio is cloned or not.
>>
>> Especially, we already have cloned bio which needs repair (for dio).
> 
> I have a barely working version based on your patches to split the
> bio in btrfs_bio_map that solves this problem.  But the next step
> only removed the save iter for writes, where the only user is
> index_one_bio.  And the fix for that is pretty trivial :)

That's only for RAID56, aren't you going to remove btrfs_bio usage 
completely for all write (including buffered, non-compressing write)?

Thanks,
Qu
> 
> ---
>  From c8fe61748ebc583a7f57c8e5de79f92428e5717c Mon Sep 17 00:00:00 2001
> From: Christoph Hellwig <hch@lst.de>
> Date: Mon, 25 Apr 2022 13:23:54 +0200
> Subject: btrfs: stop looking at btrfs_bio->iter in index_one_bio
> 
> All the bios that index_one_bio operates on are the bios submitted by the
> upper layer.  These are never resubmitted to an actual device by the
> raid56 code, and thus the iter never changes from the initial state.
> Thus we can always just use bi_iter directly as it will be the same as
> the saved copy.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>   fs/btrfs/raid56.c | 3 ---
>   1 file changed, 3 deletions(-)
> 
> diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
> index 1a3c1a9b10d0b..8b40353bb89db 100644
> --- a/fs/btrfs/raid56.c
> +++ b/fs/btrfs/raid56.c
> @@ -1218,9 +1218,6 @@ static void index_one_bio(struct btrfs_raid_bio *rbio, struct bio *bio)
>   	u32 offset = (bio->bi_iter.bi_sector << SECTOR_SHIFT) -
>   		     rbio->bioc->raid_map[0];
>   
> -	if (bio_flagged(bio, BIO_CLONED))
> -		bio->bi_iter = btrfs_bio(bio)->iter;
> -
>   	bio_for_each_segment(bvec, bio, iter) {
>   		u32 bvec_offset;
>   


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 03/10] btrfs: split btrfs_submit_data_bio
  2022-04-25 11:40                   ` Qu Wenruo
@ 2022-04-25 11:43                     ` Qu Wenruo
  2022-04-25 17:17                     ` Christoph Hellwig
  1 sibling, 0 replies; 32+ messages in thread
From: Qu Wenruo @ 2022-04-25 11:43 UTC (permalink / raw)
  To: Qu Wenruo, Christoph Hellwig
  Cc: Josef Bacik, David Sterba, Naohiro Aota, linux-btrfs



On 2022/4/25 19:40, Qu Wenruo wrote:
>
>
> On 2022/4/25 19:34, Christoph Hellwig wrote:
>> On Mon, Apr 25, 2022 at 07:31:08PM +0800, Qu Wenruo wrote:
>>> Then it comes against the btrfs read time repair.
>>>
>>> Currently we split bio to make sure we never need to split bio at
>>> btrfs_map_bio() time.
>>>
>>> But this is against common layer separation.
>>>
>>> And we really want the ability to read a partially corrupted bio (some
>>> part matches csum, some doesn't), no matter if the bio is cloned or not.
>>>
>>> Especially, we already have cloned bio which needs repair (for dio).
>>
>> I have a barely working version based on your patches to split the
>> bio in btrfs_bio_map that solves this problem.  But the next step
>> only removed the save iter for writes, where the only user is
>> index_one_bio.  And the fix for that is pretty trivial :)
>
> That's only for RAID56, aren't you going to remove btrfs_bio usage
> completely for all write (including buffered, non-compressing write)?

Wait, are you going to use some methods like this to avoid
chained/cloned bio while still split the bio?

	page = grab_page_from_existing_bio();
	pgoff = grab_pgoff_from_existing_bio()
	new_bio = bio_alloc()
	bio_add_page(new_bio, page, sectorsize, pgoff);

So that you can create a regular new, non-cloned bio, but still using
the same page/pgoff from an existing bio...

Thanks,
Qu
>
> Thanks,
> Qu
>>
>> ---
>>  From c8fe61748ebc583a7f57c8e5de79f92428e5717c Mon Sep 17 00:00:00 2001
>> From: Christoph Hellwig <hch@lst.de>
>> Date: Mon, 25 Apr 2022 13:23:54 +0200
>> Subject: btrfs: stop looking at btrfs_bio->iter in index_one_bio
>>
>> All the bios that index_one_bio operates on are the bios submitted by the
>> upper layer.  These are never resubmitted to an actual device by the
>> raid56 code, and thus the iter never changes from the initial state.
>> Thus we can always just use bi_iter directly as it will be the same as
>> the saved copy.
>>
>> Signed-off-by: Christoph Hellwig <hch@lst.de>
>> ---
>>   fs/btrfs/raid56.c | 3 ---
>>   1 file changed, 3 deletions(-)
>>
>> diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
>> index 1a3c1a9b10d0b..8b40353bb89db 100644
>> --- a/fs/btrfs/raid56.c
>> +++ b/fs/btrfs/raid56.c
>> @@ -1218,9 +1218,6 @@ static void index_one_bio(struct btrfs_raid_bio
>> *rbio, struct bio *bio)
>>       u32 offset = (bio->bi_iter.bi_sector << SECTOR_SHIFT) -
>>                rbio->bioc->raid_map[0];
>> -    if (bio_flagged(bio, BIO_CLONED))
>> -        bio->bi_iter = btrfs_bio(bio)->iter;
>> -
>>       bio_for_each_segment(bvec, bio, iter) {
>>           u32 bvec_offset;
>

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 03/10] btrfs: split btrfs_submit_data_bio
  2022-04-25 11:40                   ` Qu Wenruo
  2022-04-25 11:43                     ` Qu Wenruo
@ 2022-04-25 17:17                     ` Christoph Hellwig
  2022-04-26  1:24                       ` Qu Wenruo
  1 sibling, 1 reply; 32+ messages in thread
From: Christoph Hellwig @ 2022-04-25 17:17 UTC (permalink / raw)
  To: Qu Wenruo
  Cc: Christoph Hellwig, Qu Wenruo, Josef Bacik, David Sterba,
	Naohiro Aota, linux-btrfs

On Mon, Apr 25, 2022 at 07:40:40PM +0800, Qu Wenruo wrote:
> That's only for RAID56, aren't you going to remove btrfs_bio usage 
> completely for all write (including buffered, non-compressing write)?

There are just two uses of bbio->iter in the current btrfs for-net
tree.  One is index_one_bio, which is removed by the patch I posted,
and the other one is btrfs_check_read_dio_bio, which is clearly read 
specific.

FYI, this is the next batch that's currently being tested:

http://git.infradead.org/users/hch/misc.git/shortlog/refs/heads/btrfs-bio-cleanup-part4

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 03/10] btrfs: split btrfs_submit_data_bio
  2022-04-25 17:17                     ` Christoph Hellwig
@ 2022-04-26  1:24                       ` Qu Wenruo
  0 siblings, 0 replies; 32+ messages in thread
From: Qu Wenruo @ 2022-04-26  1:24 UTC (permalink / raw)
  To: Christoph Hellwig, Qu Wenruo
  Cc: Josef Bacik, David Sterba, Naohiro Aota, linux-btrfs



On 2022/4/26 01:17, Christoph Hellwig wrote:
> On Mon, Apr 25, 2022 at 07:40:40PM +0800, Qu Wenruo wrote:
>> That's only for RAID56, aren't you going to remove btrfs_bio usage
>> completely for all write (including buffered, non-compressing write)?
>
> There are just two uses of bbio->iter in the current btrfs for-net
> tree.  One is index_one_bio, which is removed by the patch I posted,
> and the other one is btrfs_check_read_dio_bio, which is clearly read
> specific.
>
> FYI, this is the next batch that's currently being tested:
>
> http://git.infradead.org/users/hch/misc.git/shortlog/refs/heads/btrfs-bio-cleanup-part4

My bad, I forgot the fact that, my *READ* repair is only for read, no
need to use btrfs_bio for write time.

So your cleanup is completely fine, my head just short-circuited last night.

Thanks,
Qu

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 01/10] btrfs: move more work into btrfs_end_bioc
  2022-04-25  7:54 ` [PATCH 01/10] btrfs: move more work into btrfs_end_bioc Christoph Hellwig
@ 2022-04-26  7:19   ` Johannes Thumshirn
  0 siblings, 0 replies; 32+ messages in thread
From: Johannes Thumshirn @ 2022-04-26  7:19 UTC (permalink / raw)
  To: Christoph Hellwig, Josef Bacik, David Sterba, Qu Wenruo
  Cc: Naohiro Aota, linux-btrfs

Meh I guess I need to rework some of my RAID stuff now.
Anyways, the end result looks way better with this.

Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 02/10] btrfs: cleanup btrfs_submit_dio_bio
  2022-04-25  7:54 ` [PATCH 02/10] btrfs: cleanup btrfs_submit_dio_bio Christoph Hellwig
  2022-04-25  8:45   ` Qu Wenruo
@ 2022-04-26  7:21   ` Johannes Thumshirn
  1 sibling, 0 replies; 32+ messages in thread
From: Johannes Thumshirn @ 2022-04-26  7:21 UTC (permalink / raw)
  To: Christoph Hellwig, Josef Bacik, David Sterba, Qu Wenruo
  Cc: Naohiro Aota, linux-btrfs

Looks good,
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 09/10] btrfs: refactor btrfs_map_bio
  2022-04-25  8:56   ` Qu Wenruo
  2022-04-25  9:17     ` Christoph Hellwig
@ 2022-04-26 13:24     ` Christoph Hellwig
  1 sibling, 0 replies; 32+ messages in thread
From: Christoph Hellwig @ 2022-04-26 13:24 UTC (permalink / raw)
  To: Qu Wenruo
  Cc: Christoph Hellwig, Josef Bacik, David Sterba, Naohiro Aota, linux-btrfs

On Mon, Apr 25, 2022 at 04:56:21PM +0800, Qu Wenruo wrote:
>>   -	for (dev_nr = 0; dev_nr < total_devs; dev_nr++) {
>> -		dev = bioc->stripes[dev_nr].dev;
>> -		if (!dev || !dev->bdev || test_bit(BTRFS_DEV_STATE_MISSING,
>> -						   &dev->dev_state) ||
>> -		    (btrfs_op(first_bio) == BTRFS_MAP_WRITE &&
>> -		    !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))) {
>
> Maybe just make the complex if () condition into a helper?
>
> In fact I see some other locations uses similar complex expressions to 
> check it's a missing device.

So I looked at this a bit, and while there's a fair amount of checks
for BTRFS_DEV_STATE_MISSING, most of the conditions looks slightly
different.  So for now I'd like to skip that cleanup for this series.

^ permalink raw reply	[flat|nested] 32+ messages in thread

end of thread, other threads:[~2022-04-26 13:24 UTC | newest]

Thread overview: 32+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-04-25  7:54 cleanup btrfs bio handling, part 2 Christoph Hellwig
2022-04-25  7:54 ` [PATCH 01/10] btrfs: move more work into btrfs_end_bioc Christoph Hellwig
2022-04-26  7:19   ` Johannes Thumshirn
2022-04-25  7:54 ` [PATCH 02/10] btrfs: cleanup btrfs_submit_dio_bio Christoph Hellwig
2022-04-25  8:45   ` Qu Wenruo
2022-04-26  7:21   ` Johannes Thumshirn
2022-04-25  7:54 ` [PATCH 03/10] btrfs: split btrfs_submit_data_bio Christoph Hellwig
2022-04-25  9:11   ` Qu Wenruo
2022-04-25  9:19     ` Christoph Hellwig
2022-04-25  9:37       ` Qu Wenruo
2022-04-25 11:09         ` Christoph Hellwig
2022-04-25 11:16           ` Qu Wenruo
2022-04-25 11:19             ` Christoph Hellwig
2022-04-25 11:31               ` Qu Wenruo
2022-04-25 11:34                 ` Christoph Hellwig
2022-04-25 11:40                   ` Qu Wenruo
2022-04-25 11:43                     ` Qu Wenruo
2022-04-25 17:17                     ` Christoph Hellwig
2022-04-26  1:24                       ` Qu Wenruo
2022-04-25  7:54 ` [PATCH 04/10] btrfs: don't double-defer bio completions for compressed reads Christoph Hellwig
2022-04-25  7:54 ` [PATCH 05/10] btrfs: defer I/O completion based on the btrfs_raid_bio Christoph Hellwig
2022-04-25  7:54 ` [PATCH 06/10] btrfs: don't use btrfs_bio_wq_end_io for compressed writes Christoph Hellwig
2022-04-25  7:54 ` [PATCH 07/10] btrfs: centralize setting REQ_META Christoph Hellwig
2022-04-25  9:06   ` Qu Wenruo
2022-04-25  7:54 ` [PATCH 08/10] btrfs: remove btrfs_end_io_wq Christoph Hellwig
2022-04-25  7:54 ` [PATCH 09/10] btrfs: refactor btrfs_map_bio Christoph Hellwig
2022-04-25  8:56   ` Qu Wenruo
2022-04-25  9:17     ` Christoph Hellwig
2022-04-26 13:24     ` Christoph Hellwig
2022-04-25  7:54 ` [PATCH 10/10] btrfs: do not allocate a btrfs_bio for low-level bios Christoph Hellwig
2022-04-25  9:01   ` Qu Wenruo
2022-04-25  9:18     ` Christoph Hellwig

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.