All of lore.kernel.org
 help / color / mirror / Atom feed
From: Josef Bacik <josef@toxicpanda.com>
To: Christoph Hellwig <hch@lst.de>
Cc: Chris Mason <clm@fb.com>, David Sterba <dsterba@suse.com>,
	Damien Le Moal <damien.lemoal@wdc.com>,
	Naohiro Aota <naohiro.aota@wdc.com>,
	Johannes Thumshirn <johannes.thumshirn@wdc.com>,
	Qu Wenruo <wqu@suse.com>, Jens Axboe <axboe@kernel.dk>,
	"Darrick J. Wong" <djwong@kernel.org>,
	linux-block@vger.kernel.org, linux-btrfs@vger.kernel.org,
	linux-fsdevel@vger.kernel.org
Subject: Re: [PATCH 23/34] btrfs: allow btrfs_submit_bio to split bios
Date: Wed, 25 Jan 2023 16:51:16 -0500	[thread overview]
Message-ID: <Y9GkVONZJFXVe8AH@localhost.localdomain> (raw)
In-Reply-To: <20230121065031.1139353-24-hch@lst.de>

On Sat, Jan 21, 2023 at 07:50:20AM +0100, Christoph Hellwig wrote:
> Currently the I/O submitters have to split bios according to the
> chunk stripe boundaries.  This leads to extra lookups in the extent
> trees and a lot of boilerplate code.
> 
> To drop this requirement, split the bio when __btrfs_map_block
> returns a mapping that is smaller than the requested size and
> keep a count of pending bios in the original btrfs_bio so that
> the upper level completion is only invoked when all clones have
> completed.
> 
> Based on a patch from Qu Wenruo.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> Reviewed-by: Josef Bacik <josef@toxicpanda.com>
> Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
> Reviewed-by: Qu Wenruo <wqu@suse.com>
> ---
>  fs/btrfs/bio.c | 108 ++++++++++++++++++++++++++++++++++++++++---------
>  fs/btrfs/bio.h |   1 +
>  2 files changed, 91 insertions(+), 18 deletions(-)
> 
> diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c
> index c7522ac7e0e71c..ff42b783902140 100644
> --- a/fs/btrfs/bio.c
> +++ b/fs/btrfs/bio.c
> @@ -17,6 +17,7 @@
>  #include "file-item.h"
>  
>  static struct bio_set btrfs_bioset;
> +static struct bio_set btrfs_clone_bioset;
>  static struct bio_set btrfs_repair_bioset;
>  static mempool_t btrfs_failed_bio_pool;
>  
> @@ -38,6 +39,7 @@ static inline void btrfs_bio_init(struct btrfs_bio *bbio,
>  	bbio->inode = inode;
>  	bbio->end_io = end_io;
>  	bbio->private = private;
> +	atomic_set(&bbio->pending_ios, 1);
>  }
>  
>  /*
> @@ -75,6 +77,58 @@ struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size,
>  	return bio;
>  }
>  
> +static struct bio *btrfs_split_bio(struct bio *orig, u64 map_length)
> +{
> +	struct btrfs_bio *orig_bbio = btrfs_bio(orig);
> +	struct bio *bio;
> +
> +	bio = bio_split(orig, map_length >> SECTOR_SHIFT, GFP_NOFS,
> +			&btrfs_clone_bioset);
> +	btrfs_bio_init(btrfs_bio(bio), orig_bbio->inode, NULL, orig_bbio);
> +
> +	btrfs_bio(bio)->file_offset = orig_bbio->file_offset;
> +	if (!(orig->bi_opf & REQ_BTRFS_ONE_ORDERED))
> +		orig_bbio->file_offset += map_length;
> +
> +	atomic_inc(&orig_bbio->pending_ios);
> +	return bio;
> +}
> +
> +static void btrfs_orig_write_end_io(struct bio *bio);
> +static void btrfs_bbio_propagate_error(struct btrfs_bio *bbio,
> +				       struct btrfs_bio *orig_bbio)
> +{
> +	/*
> +	 * For writes btrfs tolerates nr_mirrors - 1 write failures, so we
> +	 * can't just blindly propagate a write failure here.
> +	 * Instead increment the error count in the original I/O context so
> +	 * that it is guaranteed to be larger than the error tolerance.
> +	 */
> +	if (bbio->bio.bi_end_io == &btrfs_orig_write_end_io) {
> +		struct btrfs_io_stripe *orig_stripe = orig_bbio->bio.bi_private;
> +		struct btrfs_io_context *orig_bioc = orig_stripe->bioc;
> +
> +		atomic_add(orig_bioc->max_errors, &orig_bioc->error);
> +	} else {
> +		orig_bbio->bio.bi_status = bbio->bio.bi_status;
> +	}
> +}
> +
> +static void btrfs_orig_bbio_end_io(struct btrfs_bio *bbio)
> +{
> +	if (bbio->bio.bi_pool == &btrfs_clone_bioset) {
> +		struct btrfs_bio *orig_bbio = bbio->private;
> +
> +		if (bbio->bio.bi_status)
> +			btrfs_bbio_propagate_error(bbio, orig_bbio);
> +		bio_put(&bbio->bio);
> +		bbio = orig_bbio;
> +	}
> +
> +	if (atomic_dec_and_test(&bbio->pending_ios))
> +		bbio->end_io(bbio);
> +}
> +
>  static int next_repair_mirror(struct btrfs_failed_bio *fbio, int cur_mirror)
>  {
>  	if (cur_mirror == fbio->num_copies)
> @@ -92,7 +146,7 @@ static int prev_repair_mirror(struct btrfs_failed_bio *fbio, int cur_mirror)
>  static void btrfs_repair_done(struct btrfs_failed_bio *fbio)
>  {
>  	if (atomic_dec_and_test(&fbio->repair_count)) {
> -		fbio->bbio->end_io(fbio->bbio);
> +		btrfs_orig_bbio_end_io(fbio->bbio);
>  		mempool_free(fbio, &btrfs_failed_bio_pool);
>  	}
>  }
> @@ -232,7 +286,7 @@ static void btrfs_check_read_bio(struct btrfs_bio *bbio,
>  	if (unlikely(fbio))
>  		btrfs_repair_done(fbio);
>  	else
> -		bbio->end_io(bbio);
> +		btrfs_orig_bbio_end_io(bbio);
>  }
>  
>  static void btrfs_log_dev_io_error(struct bio *bio, struct btrfs_device *dev)
> @@ -286,7 +340,7 @@ static void btrfs_simple_end_io(struct bio *bio)
>  	} else {
>  		if (bio_op(bio) == REQ_OP_ZONE_APPEND)
>  			btrfs_record_physical_zoned(bbio);
> -		bbio->end_io(bbio);
> +		btrfs_orig_bbio_end_io(bbio);
>  	}
>  }
>  
> @@ -300,7 +354,7 @@ static void btrfs_raid56_end_io(struct bio *bio)
>  	if (bio_op(bio) == REQ_OP_READ && !(bbio->bio.bi_opf & REQ_META))
>  		btrfs_check_read_bio(bbio, NULL);
>  	else
> -		bbio->end_io(bbio);
> +		btrfs_orig_bbio_end_io(bbio);
>  
>  	btrfs_put_bioc(bioc);
>  }
> @@ -327,7 +381,7 @@ static void btrfs_orig_write_end_io(struct bio *bio)
>  	else
>  		bio->bi_status = BLK_STS_OK;
>  
> -	bbio->end_io(bbio);
> +	btrfs_orig_bbio_end_io(bbio);
>  	btrfs_put_bioc(bioc);
>  }
>  
> @@ -492,7 +546,7 @@ static void run_one_async_done(struct btrfs_work *work)
>  
>  	/* If an error occurred we just want to clean up the bio and move on */
>  	if (bio->bi_status) {
> -		btrfs_bio_end_io(async->bbio, bio->bi_status);
> +		btrfs_orig_bbio_end_io(async->bbio);
>  		return;
>  	}
>  
> @@ -567,8 +621,8 @@ static bool btrfs_wq_submit_bio(struct btrfs_bio *bbio,
>  	return true;
>  }
>  
> -void btrfs_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
> -		      int mirror_num)
> +static bool btrfs_submit_chunk(struct btrfs_fs_info *fs_info, struct bio *bio,
> +			       int mirror_num)
>  {
>  	struct btrfs_bio *bbio = btrfs_bio(bio);
>  	u64 logical = bio->bi_iter.bi_sector << 9;
> @@ -587,11 +641,10 @@ void btrfs_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
>  		goto fail;
>  	}
>  
> +	map_length = min(map_length, length);
>  	if (map_length < length) {
> -		btrfs_crit(fs_info,
> -			   "mapping failed logical %llu bio len %llu len %llu",
> -			   logical, length, map_length);
> -		BUG();
> +		bio = btrfs_split_bio(bio, map_length);
> +		bbio = btrfs_bio(bio);
>  	}
>  
>  	/*
> @@ -602,14 +655,14 @@ void btrfs_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
>  		bbio->saved_iter = bio->bi_iter;
>  		ret = btrfs_lookup_bio_sums(bbio);
>  		if (ret)
> -			goto fail;
> +			goto fail_put_bio;
>  	}
>  
>  	if (btrfs_op(bio) == BTRFS_MAP_WRITE) {
>  		if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
>  			ret = btrfs_extract_ordered_extent(btrfs_bio(bio));
>  			if (ret)
> -				goto fail;
> +				goto fail_put_bio;
>  		}
>  
>  		/*
> @@ -621,20 +674,33 @@ void btrfs_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
>  		    !btrfs_is_data_reloc_root(bbio->inode->root)) {
>  			if (should_async_write(bbio) &&
>  			    btrfs_wq_submit_bio(bbio, bioc, &smap, mirror_num))
> -				return;
> +				goto done;
>  
>  			ret = btrfs_bio_csum(bbio);
>  			if (ret)
> -				goto fail;
> +				goto fail_put_bio;
>  		}
>  	}
>  
>  	__btrfs_submit_bio(bio, bioc, &smap, mirror_num);
> -	return;
> +done:
> +	return map_length == length;
>  
> +fail_put_bio:
> +	if (map_length < length)
> +		bio_put(bio);

This is causing a panic in btrfs/125 because you set bbio to
btrfs_bio(split_bio), which has a NULL end_io.  You need something like the
following so that we're ending the correct bbio.  Thanks,

Josef

diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c
index 5d4b67fc44f4..f3a357c48e69 100644
--- a/fs/btrfs/bio.c
+++ b/fs/btrfs/bio.c
@@ -607,6 +607,7 @@ static bool btrfs_submit_chunk(struct btrfs_fs_info *fs_info, struct bio *bio,
 			       int mirror_num)
 {
 	struct btrfs_bio *bbio = btrfs_bio(bio);
+	struct btrfs_bio *orig_bbio = bbio;
 	u64 logical = bio->bi_iter.bi_sector << 9;
 	u64 length = bio->bi_iter.bi_size;
 	u64 map_length = length;
@@ -673,7 +674,7 @@ static bool btrfs_submit_chunk(struct btrfs_fs_info *fs_info, struct bio *bio,
 		bio_put(bio);
 fail:
 	btrfs_bio_counter_dec(fs_info);
-	btrfs_bio_end_io(bbio, ret);
+	btrfs_bio_end_io(orig_bbio, ret);
 	/* Do not submit another chunk */
 	return true;
 }

  reply	other threads:[~2023-01-25 21:52 UTC|newest]

Thread overview: 80+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-01-21  6:49 consolidate btrfs checksumming, repair and bio splitting v4 Christoph Hellwig
2023-01-21  6:49 ` [PATCH 01/34] block: export bio_split_rw Christoph Hellwig
2023-01-21  6:49 ` [PATCH 02/34] btrfs: better document struct btrfs_bio Christoph Hellwig
2023-01-22 10:19   ` Anand Jain
2023-01-23 15:25   ` Johannes Thumshirn
2023-01-21  6:50 ` [PATCH 03/34] btrfs: add a btrfs_inode pointer to " Christoph Hellwig
2023-01-22 10:20   ` Anand Jain
2023-01-23 15:26   ` Johannes Thumshirn
2023-01-23 15:47   ` Johannes Thumshirn
2023-03-07  1:44   ` Qu Wenruo
2023-03-07 14:41     ` Christoph Hellwig
2023-03-07 22:21       ` Qu Wenruo
2023-03-08  6:04         ` Qu Wenruo
2023-03-08 14:28           ` Christoph Hellwig
2023-03-09  0:08             ` Qu Wenruo
2023-03-09  9:31               ` Christoph Hellwig
2023-03-09 10:32                 ` Qu Wenruo
2023-03-09 15:18                   ` Christoph Hellwig
2023-01-21  6:50 ` [PATCH 04/34] btrfs: remove the direct I/O read checksum lookup optimization Christoph Hellwig
2023-01-23 15:59   ` Johannes Thumshirn
2023-01-24 14:55   ` Anand Jain
2023-01-24 19:55     ` Christoph Hellwig
2023-01-25  7:42       ` Anand Jain
2023-01-21  6:50 ` [PATCH 05/34] btrfs: simplify btrfs_lookup_bio_sums Christoph Hellwig
2023-01-23 16:06   ` Johannes Thumshirn
2023-01-24 15:16   ` Anand Jain
2023-01-21  6:50 ` [PATCH 06/34] btrfs: slightly refactor btrfs_submit_bio Christoph Hellwig
2023-01-23 16:13   ` Johannes Thumshirn
2023-01-24 15:27   ` Anand Jain
2023-01-21  6:50 ` [PATCH 07/34] btrfs: save the bio iter for checksum validation in common code Christoph Hellwig
2023-01-23 16:18   ` Johannes Thumshirn
2023-01-21  6:50 ` [PATCH 08/34] btrfs: pre-load data checksum for reads in btrfs_submit_bio Christoph Hellwig
2023-01-23 16:32   ` Johannes Thumshirn
2023-01-21  6:50 ` [PATCH 09/34] btrfs: add a btrfs_data_csum_ok helper Christoph Hellwig
2023-01-23 16:36   ` Johannes Thumshirn
2023-01-21  6:50 ` [PATCH 10/34] btrfs: handle checksum validation and repair at the storage layer Christoph Hellwig
2023-01-23 16:39   ` Johannes Thumshirn
2023-01-24  6:47     ` Christoph Hellwig
2023-01-24  8:16       ` Johannes Thumshirn
2023-01-21  6:50 ` [PATCH 11/34] btrfs: remove btrfs_bio_free_csum Christoph Hellwig
2023-01-23 16:41   ` Johannes Thumshirn
2023-01-21  6:50 ` [PATCH 12/34] btrfs: remove btrfs_bio_for_each_sector Christoph Hellwig
2023-01-23 16:42   ` Johannes Thumshirn
2023-01-21  6:50 ` [PATCH 13/34] btrfs: remove now unused checksumming helpers Christoph Hellwig
2023-01-23 16:49   ` Johannes Thumshirn
2023-01-23 16:53     ` Christoph Hellwig
2023-01-24  8:33       ` Johannes Thumshirn
2023-01-21  6:50 ` [PATCH 14/34] btrfs: remove the device field in struct btrfs_bio Christoph Hellwig
2023-01-24 11:01   ` Johannes Thumshirn
2023-01-21  6:50 ` [PATCH 15/34] btrfs: remove the io_failure_record infrastructure Christoph Hellwig
2023-01-24 11:04   ` Johannes Thumshirn
2023-01-21  6:50 ` [PATCH 16/34] btrfs: rename the iter field in struct btrfs_bio Christoph Hellwig
2023-01-24 11:09   ` Johannes Thumshirn
2023-01-21  6:50 ` [PATCH 17/34] btrfs: remove the is_metadata flag " Christoph Hellwig
2023-01-24 11:13   ` Johannes Thumshirn
2023-01-21  6:50 ` [PATCH 18/34] btrfs: remove the submit_bio_start helpers Christoph Hellwig
2023-01-24 11:49   ` Johannes Thumshirn
2023-01-21  6:50 ` [PATCH 19/34] btrfs: simplify the btrfs_csum_one_bio calling convention Christoph Hellwig
2023-01-21  6:50 ` [PATCH 20/34] btrfs: handle checksum generation in the storage layer Christoph Hellwig
2023-01-21  6:50 ` [PATCH 21/34] btrfs: handle recording of zoned writes " Christoph Hellwig
2023-01-21  6:50 ` [PATCH 22/34] btrfs: support cloned bios in btree_csum_one_bio Christoph Hellwig
2023-01-21  6:50 ` [PATCH 23/34] btrfs: allow btrfs_submit_bio to split bios Christoph Hellwig
2023-01-25 21:51   ` Josef Bacik [this message]
2023-01-26  5:21     ` Christoph Hellwig
2023-01-26 17:43       ` Josef Bacik
2023-01-26 17:46         ` Christoph Hellwig
2023-01-26 18:33           ` David Sterba
2023-01-27  7:02             ` test not in the auto group, was: " Christoph Hellwig
2023-01-21  6:50 ` [PATCH 24/34] btrfs: pass the iomap bio to btrfs_submit_bio Christoph Hellwig
2023-01-21  6:50 ` [PATCH 25/34] btrfs: remove stripe boundary calculation for buffered I/O Christoph Hellwig
2023-01-21  6:50 ` [PATCH 26/34] btrfs: remove stripe boundary calculation for compressed I/O Christoph Hellwig
2023-01-21  6:50 ` [PATCH 27/34] btrfs: remove stripe boundary calculation for encoded I/O Christoph Hellwig
2023-01-21  6:50 ` [PATCH 28/34] btrfs: remove struct btrfs_io_geometry Christoph Hellwig
2023-01-21  6:50 ` [PATCH 29/34] btrfs: remove submit_encoded_read_bio Christoph Hellwig
2023-01-21  6:50 ` [PATCH 30/34] btrfs: remove the fs_info argument to btrfs_submit_bio Christoph Hellwig
2023-01-21  6:50 ` [PATCH 31/34] btrfs: remove now spurious bio submission helpers Christoph Hellwig
2023-01-21  6:50 ` [PATCH 32/34] btrfs: calculate file system wide queue limit for zoned mode Christoph Hellwig
2023-01-21  6:50 ` [PATCH 33/34] btrfs: split zone append bios in btrfs_submit_bio Christoph Hellwig
2023-01-21  6:50 ` [PATCH 34/34] iomap: remove IOMAP_F_ZONE_APPEND Christoph Hellwig
2023-01-24 13:22 ` consolidate btrfs checksumming, repair and bio splitting v4 Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Y9GkVONZJFXVe8AH@localhost.localdomain \
    --to=josef@toxicpanda.com \
    --cc=axboe@kernel.dk \
    --cc=clm@fb.com \
    --cc=damien.lemoal@wdc.com \
    --cc=djwong@kernel.org \
    --cc=dsterba@suse.com \
    --cc=hch@lst.de \
    --cc=johannes.thumshirn@wdc.com \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-btrfs@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=naohiro.aota@wdc.com \
    --cc=wqu@suse.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.