Re: [PATCH RFC v2 05/12] btrfs: add a helper to queue a corrupted sector for read repair

From: Qu Wenruo <wqu@suse.com>
To: linux-btrfs@vger.kernel.org, Christoph Hellwig <hch@infradead.org>
Subject: Re: [PATCH RFC v2 05/12] btrfs: add a helper to queue a corrupted sector for read repair
Date: Thu, 28 Apr 2022 13:20:37 +0800	[thread overview]
Message-ID: <2fd10883-5a4d-5cbd-d09f-7a30bb326a4a@suse.com> (raw)
In-Reply-To: <a136fe858afe9efd29c8caa98d82cb7439d89677.1651043617.git.wqu@suse.com>

On 2022/4/27 15:18, Qu Wenruo wrote:
> The new helper, read_repair_bio_add_sector(), will grab the page and
> page_offset, and queue the sector into
> btrfs_read_repair_ctrl::read_bios for later usage.
> 
> Signed-off-by: Qu Wenruo <wqu@suse.com>
> ---
>   fs/btrfs/extent_io.c | 107 +++++++++++++++++++++++++++++++++++++++++++
>   fs/btrfs/extent_io.h |   6 +++
>   2 files changed, 113 insertions(+)
> 
> diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
> index 6304f694c8d6..fbed78ffe8e1 100644
> --- a/fs/btrfs/extent_io.c
> +++ b/fs/btrfs/extent_io.c
> @@ -2732,6 +2732,110 @@ static void end_page_read(struct page *page, bool uptodate, u64 start, u32 len)
>   		btrfs_subpage_end_reader(fs_info, page, start, len);
>   }
>   
> +static struct page *read_repair_get_sector(struct btrfs_read_repair_ctrl *ctrl,
> +					   int sector_nr, unsigned int *pgoff)
> +{
> +	const struct btrfs_fs_info *fs_info = btrfs_sb(ctrl->inode->i_sb);
> +	const u32 target_offset = sector_nr << fs_info->sectorsize_bits;
> +	struct bvec_iter iter;
> +	struct bio_vec bvec;
> +	u32 offset = 0;
> +
> +	ASSERT(pgoff);
> +	ASSERT((sector_nr << fs_info->sectorsize_bits) < ctrl->bio_size);
> +
> +	/*
> +	 * This is definitely not effecient, but I don't have better way
> +	 * to grab a specified bvec from a bio directly.
> +	 */

Also Cc to Christoph.

This function will get called very frequently, and I really want to 
avoid doing such re-search every time from the beginning of the original 
bio.

Maybe we can cache a bvec_iter and using the bi_size to check if the 
target offset is still beyond us (then advance), or re-wind and 
re-search from the beginning.

I guess there is no existing helper to do the same work, right?

Thanks,
Qu

> +	__bio_for_each_segment(bvec, ctrl->failed_bio, iter,
> +			       btrfs_bio(ctrl->failed_bio)->iter) {
> +		if (target_offset - offset < bvec.bv_len) {
> +			*pgoff = bvec.bv_offset + (target_offset - offset);
> +			return bvec.bv_page;
> +		}
> +		offset += bvec.bv_len;
> +	}
> +	return NULL;
> +}
> +
> +static void read_repair_end_bio(struct bio *bio)
> +{
> +	struct btrfs_read_repair_ctrl *ctrl = bio->bi_private;
> +	const struct btrfs_fs_info *fs_info = btrfs_sb(ctrl->inode->i_sb);
> +	struct bvec_iter_all iter_all;
> +	struct bio_vec *bvec;
> +	u64 logical = btrfs_bio(bio)->iter.bi_sector << SECTOR_SHIFT;
> +	u32 offset = 0;
> +	bool uptodate = (bio->bi_status == BLK_STS_OK);
> +
> +	/* We should not have csum in bbio */
> +	ASSERT(!btrfs_bio(bio)->csum);
> +	bio_for_each_segment_all(bvec, bio, iter_all) {
> +		/*
> +		 * If we have a successful read, clear the error bit.
> +		 * In read_repair_finish(), we will re-check the csum
> +		 * (if exists) later.
> +		 */
> +		if (uptodate)
> +			clear_bit((logical + offset - ctrl->logical) >>
> +				  fs_info->sectorsize_bits,
> +				  ctrl->cur_bad_bitmap);
> +		atomic_sub(bvec->bv_len, &ctrl->io_bytes);
> +		wake_up(&ctrl->io_wait);
> +		offset += bvec->bv_len;
> +	}
> +	bio_put(bio);
> +}
> +
> +/* Add a sector into the read repair bios list for later submission */
> +static void read_repair_bio_add_sector(struct btrfs_read_repair_ctrl *ctrl,
> +				       int sector_nr)
> +{
> +	const struct btrfs_fs_info *fs_info = btrfs_sb(ctrl->inode->i_sb);
> +	struct page *page;
> +	int pgoff;
> +	struct bio *bio;
> +	int ret;
> +
> +	page = read_repair_get_sector(ctrl, sector_nr, &pgoff);
> +	ASSERT(page);
> +
> +	/* Check if the sector can be added to the last bio */
> +	if (!bio_list_empty(&ctrl->read_bios)) {
> +		bio = ctrl->read_bios.tail;
> +		if ((bio->bi_iter.bi_sector << SECTOR_SHIFT) + bio->bi_iter.bi_size ==
> +		    ctrl->logical + (sector_nr << fs_info->sectorsize_bits))
> +			goto add;
> +	}
> +	/*
> +	 * Here we want to know the logical bytenr at endio time, so we can
> +	 * update the bitmap.
> +	 * Unfortunately our bi_private will be used, and bi_iter is not
> +	 * reliable, thus we have to alloc btrfs_bio, even we just want
> +	 * logical bytenr.
> +	 */
> +	bio = btrfs_bio_alloc(BIO_MAX_VECS);
> +	/* It's backed by mempool, thus should not fail */
> +	ASSERT(bio);
> +
> +	bio->bi_opf = REQ_OP_READ;
> +	bio->bi_iter.bi_sector = ((sector_nr << fs_info->sectorsize_bits) +
> +				  ctrl->logical) >> SECTOR_SHIFT;
> +	bio->bi_private = ctrl;
> +	bio->bi_end_io = read_repair_end_bio;
> +	bio_list_add(&ctrl->read_bios, bio);
> +
> +add:
> +	ret = bio_add_page(bio, page, fs_info->sectorsize, pgoff);
> +	/*
> +	 * We allocated the read bio with enough bvecs to contain
> +	 * the original bio, thus it should not fail to add a sector.
> +	 */
> +	ASSERT(ret == fs_info->sectorsize);
> +	atomic_add(fs_info->sectorsize, &ctrl->io_bytes);
> +}
> +
>   static int read_repair_add_sector(struct inode *inode,
>   				  struct btrfs_read_repair_ctrl *ctrl,
>   				  struct bio *failed_bio, u32 bio_offset)
> @@ -2762,6 +2866,9 @@ static int read_repair_add_sector(struct inode *inode,
>   		ctrl->init_mirror = btrfs_bio(failed_bio)->mirror_num;
>   		ctrl->num_copies = btrfs_num_copies(fs_info, ctrl->logical,
>   						    sectorsize);
> +		init_waitqueue_head(&ctrl->io_wait);
> +		bio_list_init(&ctrl->read_bios);
> +		atomic_set(&ctrl->io_bytes, 0);
>   
>   		ctrl->cur_bad_bitmap = bitmap_alloc(ctrl->bio_size >>
>   					fs_info->sectorsize_bits, GFP_NOFS);
> diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
> index eff008ba194f..4904229ee73a 100644
> --- a/fs/btrfs/extent_io.h
> +++ b/fs/btrfs/extent_io.h
> @@ -121,6 +121,12 @@ struct btrfs_read_repair_ctrl {
>   	 */
>   	unsigned long *prev_bad_bitmap;
>   
> +	struct bio_list read_bios;
> +
> +	wait_queue_head_t io_wait;
> +
> +	atomic_t io_bytes;
> +
>   	/* The logical bytenr of the original bio. */
>   	u64 logical;
>