From: Qu Wenruo <wqu@suse.com> To: linux-btrfs@vger.kernel.org Cc: linux-block@vger.kernel.org, dm-devel@redhat.com Subject: [PATCH 08/17] btrfs: make data buffered read path to handle split bio properly Date: Wed, 1 Dec 2021 13:17:47 +0800 [thread overview] Message-ID: <20211201051756.53742-9-wqu@suse.com> (raw) In-Reply-To: <20211201051756.53742-1-wqu@suse.com> This involves the following modifications: - Use bio_for_each_segment() instead of bio_for_each_segment_all() bio_for_each_segment_all() will iterate all bvecs, even if they are not referred by current bi_iter. *_all() variant can only be used if the bio is never split. Change it to __bio_for_each_segment() call so we won't have endio called on the same range by both split and parent bios, and it can handle both split and unsplit bios. - Make check_data_csum() to take bbio->offset_to_original into consideration Since btrfs bio can be split now, split/original bio can all start with some offset to the original logical bytenr. Take btrfs_bio::offset_to_original into consideration to get correct checksum offset. - Remove the BIO_CLONED ASSERT() in submit_read_repair() Signed-off-by: Qu Wenruo <wqu@suse.com> --- fs/btrfs/extent_io.c | 34 +++++++++++++++++++--------------- fs/btrfs/inode.c | 23 +++++++++++++++++++++-- fs/btrfs/volumes.h | 3 ++- 3 files changed, 42 insertions(+), 18 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 083700621b9f..e0e072c0e5c8 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2735,10 +2735,9 @@ static blk_status_t submit_read_repair(struct inode *inode, ASSERT(error_bitmap); /* - * We only get called on buffered IO, thus page must be mapped and bio - * must not be cloned. - */ - ASSERT(page->mapping && !bio_flagged(failed_bio, BIO_CLONED)); + * We only get called on buffered IO, thus page must be mapped + */ + ASSERT(page->mapping); /* Iterate through all the sectors in the range */ for (i = 0; i < nr_bits; i++) { @@ -2992,7 +2991,8 @@ static struct extent_buffer *find_extent_buffer_readpage( */ static void end_bio_extent_readpage(struct bio *bio) { - struct bio_vec *bvec; + struct bio_vec bvec; + struct bvec_iter iter; struct btrfs_bio *bbio = btrfs_bio(bio); struct extent_io_tree *tree, *failure_tree; struct processed_extent processed = { 0 }; @@ -3003,11 +3003,15 @@ static void end_bio_extent_readpage(struct bio *bio) u32 bio_offset = 0; int mirror; int ret; - struct bvec_iter_all iter_all; - bio_for_each_segment_all(bvec, bio, iter_all) { + /* + * We should have saved the orignal bi_iter, and then start iterating + * using that saved iter, as at endio time bi_iter is not reliable. + */ + ASSERT(bbio->iter.bi_size); + __bio_for_each_segment(bvec, bio, iter, bbio->iter) { bool uptodate = !bio->bi_status; - struct page *page = bvec->bv_page; + struct page *page = bvec.bv_page; struct inode *inode = page->mapping->host; struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); const u32 sectorsize = fs_info->sectorsize; @@ -3030,19 +3034,19 @@ static void end_bio_extent_readpage(struct bio *bio) * for unaligned offsets, and an error if they don't add up to * a full sector. */ - if (!IS_ALIGNED(bvec->bv_offset, sectorsize)) + if (!IS_ALIGNED(bvec.bv_offset, sectorsize)) btrfs_err(fs_info, "partial page read in btrfs with offset %u and length %u", - bvec->bv_offset, bvec->bv_len); - else if (!IS_ALIGNED(bvec->bv_offset + bvec->bv_len, + bvec.bv_offset, bvec.bv_len); + else if (!IS_ALIGNED(bvec.bv_offset + bvec.bv_len, sectorsize)) btrfs_info(fs_info, "incomplete page read with offset %u and length %u", - bvec->bv_offset, bvec->bv_len); + bvec.bv_offset, bvec.bv_len); - start = page_offset(page) + bvec->bv_offset; - end = start + bvec->bv_len - 1; - len = bvec->bv_len; + start = page_offset(page) + bvec.bv_offset; + end = start + bvec.bv_len - 1; + len = bvec.bv_len; mirror = bbio->mirror_num; if (likely(uptodate)) { diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 1aa060de917c..186304c69900 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3225,6 +3225,24 @@ void btrfs_writepage_endio_finish_ordered(struct btrfs_inode *inode, finish_ordered_fn, uptodate); } +static u8 *bbio_get_real_csum(struct btrfs_fs_info *fs_info, + struct btrfs_bio *bbio) +{ + u8 *ret; + + /* Split bbio needs to grab csum from its parent */ + if (bbio->is_split_bio) + ret = btrfs_bio(bbio->parent)->csum; + else + ret = bbio->csum; + + if (ret == NULL) + return ret; + + return ret + (bbio->offset_to_original >> fs_info->sectorsize_bits) * + fs_info->csum_size; +} + /* * check_data_csum - verify checksum of one sector of uncompressed data * @inode: inode @@ -3252,7 +3270,8 @@ static int check_data_csum(struct inode *inode, struct btrfs_bio *bbio, ASSERT(pgoff + len <= PAGE_SIZE); offset_sectors = bio_offset >> fs_info->sectorsize_bits; - csum_expected = ((u8 *)bbio->csum) + offset_sectors * csum_size; + csum_expected = bbio_get_real_csum(fs_info, bbio) + + offset_sectors * csum_size; kaddr = kmap_atomic(page); shash->tfm = fs_info->csum_shash; @@ -3310,7 +3329,7 @@ unsigned int btrfs_verify_data_csum(struct btrfs_bio *bbio, * Normally this should be covered by above check for compressed read * or the next check for NODATASUM. Just do a quicker exit here. */ - if (bbio->csum == NULL) + if (bbio_get_real_csum(fs_info, bbio) == NULL) return 0; if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index bd789544268c..8825a17d0620 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -400,7 +400,8 @@ static inline struct btrfs_bio *btrfs_bio(struct bio *bio) static inline void btrfs_bio_free_csum(struct btrfs_bio *bbio) { - if (bbio->csum != bbio->csum_inline) { + /* Only free the csum if we're not a split bio */ + if (!bbio->is_split_bio && bbio->csum != bbio->csum_inline) { kfree(bbio->csum); bbio->csum = NULL; } -- 2.34.1
WARNING: multiple messages have this Message-ID (diff)
From: Qu Wenruo <wqu@suse.com> To: linux-btrfs@vger.kernel.org Cc: linux-block@vger.kernel.org, dm-devel@redhat.com Subject: [dm-devel] [PATCH 08/17] btrfs: make data buffered read path to handle split bio properly Date: Wed, 1 Dec 2021 13:17:47 +0800 [thread overview] Message-ID: <20211201051756.53742-9-wqu@suse.com> (raw) In-Reply-To: <20211201051756.53742-1-wqu@suse.com> This involves the following modifications: - Use bio_for_each_segment() instead of bio_for_each_segment_all() bio_for_each_segment_all() will iterate all bvecs, even if they are not referred by current bi_iter. *_all() variant can only be used if the bio is never split. Change it to __bio_for_each_segment() call so we won't have endio called on the same range by both split and parent bios, and it can handle both split and unsplit bios. - Make check_data_csum() to take bbio->offset_to_original into consideration Since btrfs bio can be split now, split/original bio can all start with some offset to the original logical bytenr. Take btrfs_bio::offset_to_original into consideration to get correct checksum offset. - Remove the BIO_CLONED ASSERT() in submit_read_repair() Signed-off-by: Qu Wenruo <wqu@suse.com> --- fs/btrfs/extent_io.c | 34 +++++++++++++++++++--------------- fs/btrfs/inode.c | 23 +++++++++++++++++++++-- fs/btrfs/volumes.h | 3 ++- 3 files changed, 42 insertions(+), 18 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 083700621b9f..e0e072c0e5c8 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2735,10 +2735,9 @@ static blk_status_t submit_read_repair(struct inode *inode, ASSERT(error_bitmap); /* - * We only get called on buffered IO, thus page must be mapped and bio - * must not be cloned. - */ - ASSERT(page->mapping && !bio_flagged(failed_bio, BIO_CLONED)); + * We only get called on buffered IO, thus page must be mapped + */ + ASSERT(page->mapping); /* Iterate through all the sectors in the range */ for (i = 0; i < nr_bits; i++) { @@ -2992,7 +2991,8 @@ static struct extent_buffer *find_extent_buffer_readpage( */ static void end_bio_extent_readpage(struct bio *bio) { - struct bio_vec *bvec; + struct bio_vec bvec; + struct bvec_iter iter; struct btrfs_bio *bbio = btrfs_bio(bio); struct extent_io_tree *tree, *failure_tree; struct processed_extent processed = { 0 }; @@ -3003,11 +3003,15 @@ static void end_bio_extent_readpage(struct bio *bio) u32 bio_offset = 0; int mirror; int ret; - struct bvec_iter_all iter_all; - bio_for_each_segment_all(bvec, bio, iter_all) { + /* + * We should have saved the orignal bi_iter, and then start iterating + * using that saved iter, as at endio time bi_iter is not reliable. + */ + ASSERT(bbio->iter.bi_size); + __bio_for_each_segment(bvec, bio, iter, bbio->iter) { bool uptodate = !bio->bi_status; - struct page *page = bvec->bv_page; + struct page *page = bvec.bv_page; struct inode *inode = page->mapping->host; struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); const u32 sectorsize = fs_info->sectorsize; @@ -3030,19 +3034,19 @@ static void end_bio_extent_readpage(struct bio *bio) * for unaligned offsets, and an error if they don't add up to * a full sector. */ - if (!IS_ALIGNED(bvec->bv_offset, sectorsize)) + if (!IS_ALIGNED(bvec.bv_offset, sectorsize)) btrfs_err(fs_info, "partial page read in btrfs with offset %u and length %u", - bvec->bv_offset, bvec->bv_len); - else if (!IS_ALIGNED(bvec->bv_offset + bvec->bv_len, + bvec.bv_offset, bvec.bv_len); + else if (!IS_ALIGNED(bvec.bv_offset + bvec.bv_len, sectorsize)) btrfs_info(fs_info, "incomplete page read with offset %u and length %u", - bvec->bv_offset, bvec->bv_len); + bvec.bv_offset, bvec.bv_len); - start = page_offset(page) + bvec->bv_offset; - end = start + bvec->bv_len - 1; - len = bvec->bv_len; + start = page_offset(page) + bvec.bv_offset; + end = start + bvec.bv_len - 1; + len = bvec.bv_len; mirror = bbio->mirror_num; if (likely(uptodate)) { diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 1aa060de917c..186304c69900 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3225,6 +3225,24 @@ void btrfs_writepage_endio_finish_ordered(struct btrfs_inode *inode, finish_ordered_fn, uptodate); } +static u8 *bbio_get_real_csum(struct btrfs_fs_info *fs_info, + struct btrfs_bio *bbio) +{ + u8 *ret; + + /* Split bbio needs to grab csum from its parent */ + if (bbio->is_split_bio) + ret = btrfs_bio(bbio->parent)->csum; + else + ret = bbio->csum; + + if (ret == NULL) + return ret; + + return ret + (bbio->offset_to_original >> fs_info->sectorsize_bits) * + fs_info->csum_size; +} + /* * check_data_csum - verify checksum of one sector of uncompressed data * @inode: inode @@ -3252,7 +3270,8 @@ static int check_data_csum(struct inode *inode, struct btrfs_bio *bbio, ASSERT(pgoff + len <= PAGE_SIZE); offset_sectors = bio_offset >> fs_info->sectorsize_bits; - csum_expected = ((u8 *)bbio->csum) + offset_sectors * csum_size; + csum_expected = bbio_get_real_csum(fs_info, bbio) + + offset_sectors * csum_size; kaddr = kmap_atomic(page); shash->tfm = fs_info->csum_shash; @@ -3310,7 +3329,7 @@ unsigned int btrfs_verify_data_csum(struct btrfs_bio *bbio, * Normally this should be covered by above check for compressed read * or the next check for NODATASUM. Just do a quicker exit here. */ - if (bbio->csum == NULL) + if (bbio_get_real_csum(fs_info, bbio) == NULL) return 0; if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index bd789544268c..8825a17d0620 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -400,7 +400,8 @@ static inline struct btrfs_bio *btrfs_bio(struct bio *bio) static inline void btrfs_bio_free_csum(struct btrfs_bio *bbio) { - if (bbio->csum != bbio->csum_inline) { + /* Only free the csum if we're not a split bio */ + if (!bbio->is_split_bio && bbio->csum != bbio->csum_inline) { kfree(bbio->csum); bbio->csum = NULL; } -- 2.34.1 -- dm-devel mailing list dm-devel@redhat.com https://listman.redhat.com/mailman/listinfo/dm-devel
next prev parent reply other threads:[~2021-12-01 5:18 UTC|newest] Thread overview: 46+ messages / expand[flat|nested] mbox.gz Atom feed top 2021-12-01 5:17 [PATCH 00/17] btrfs: split bio at btrfs_map_bio() time Qu Wenruo 2021-12-01 5:17 ` [dm-devel] " Qu Wenruo 2021-12-01 5:17 ` [PATCH 01/17] btrfs: update an stale comment on btrfs_submit_bio_hook() Qu Wenruo 2021-12-01 5:17 ` [dm-devel] " Qu Wenruo 2021-12-01 5:17 ` [PATCH 02/17] btrfs: save bio::bi_iter into btrfs_bio::iter before submitting Qu Wenruo 2021-12-01 5:17 ` [dm-devel] " Qu Wenruo 2021-12-01 11:52 ` Qu Wenruo 2021-12-01 11:52 ` [dm-devel] " Qu Wenruo 2021-12-01 5:17 ` [PATCH 03/17] btrfs: use correct bio size for error message in btrfs_end_dio_bio() Qu Wenruo 2021-12-01 5:17 ` [dm-devel] " Qu Wenruo 2021-12-01 5:17 ` [PATCH 04/17] btrfs: refactor btrfs_map_bio() Qu Wenruo 2021-12-01 5:17 ` [dm-devel] " Qu Wenruo 2021-12-01 5:17 ` [PATCH 05/17] btrfs: move btrfs_bio_wq_end_io() calls into submit_stripe_bio() Qu Wenruo 2021-12-01 5:17 ` [dm-devel] " Qu Wenruo 2021-12-01 5:17 ` [PATCH 06/17] btrfs: replace btrfs_dio_private::refs with btrfs_dio_private::pending_bytes Qu Wenruo 2021-12-01 5:17 ` [dm-devel] " Qu Wenruo 2021-12-01 5:17 ` [PATCH 07/17] btrfs: introduce btrfs_bio_split() helper Qu Wenruo 2021-12-01 5:17 ` [dm-devel] " Qu Wenruo 2021-12-01 5:17 ` Qu Wenruo [this message] 2021-12-01 5:17 ` [dm-devel] [PATCH 08/17] btrfs: make data buffered read path to handle split bio properly Qu Wenruo 2021-12-01 5:17 ` [PATCH 09/17] btrfs: make data buffered write endio function to be split bio compatible Qu Wenruo 2021-12-01 5:17 ` [dm-devel] " Qu Wenruo 2021-12-01 5:17 ` [PATCH 10/17] btrfs: make metadata write endio functions " Qu Wenruo 2021-12-01 5:17 ` [dm-devel] " Qu Wenruo 2021-12-01 5:17 ` [PATCH 11/17] btrfs: make dec_and_test_compressed_bio() " Qu Wenruo 2021-12-01 5:17 ` [dm-devel] " Qu Wenruo 2022-03-16 19:46 ` Josef Bacik 2022-03-16 19:46 ` [dm-devel] " Josef Bacik 2022-03-16 23:30 ` Qu Wenruo 2022-03-16 23:30 ` [dm-devel] " Qu Wenruo 2021-12-01 5:17 ` [PATCH 12/17] btrfs: return proper mapped length for RAID56 profiles in __btrfs_map_block() Qu Wenruo 2021-12-01 5:17 ` [dm-devel] " Qu Wenruo 2021-12-01 6:52 ` Qu Wenruo 2021-12-01 6:52 ` [dm-devel] " Qu Wenruo 2021-12-01 5:17 ` [PATCH 13/17] btrfs: allow btrfs_map_bio() to split bio according to chunk stripe boundaries Qu Wenruo 2021-12-01 5:17 ` [dm-devel] " Qu Wenruo 2021-12-01 5:17 ` [PATCH 14/17] btrfs: remove buffered IO stripe boundary calculation Qu Wenruo 2021-12-01 5:17 ` [dm-devel] " Qu Wenruo 2021-12-01 5:17 ` [PATCH 15/17] btrfs: remove stripe boundary calculation for compressed IO Qu Wenruo 2021-12-01 5:17 ` [dm-devel] " Qu Wenruo 2021-12-01 5:17 ` [PATCH 16/17] btrfs: remove the stripe boundary calculation for direct IO Qu Wenruo 2021-12-01 5:17 ` [dm-devel] " Qu Wenruo 2021-12-01 5:17 ` [PATCH 17/17] btrfs: unexport btrfs_get_io_geometry() Qu Wenruo 2021-12-01 5:17 ` [dm-devel] " Qu Wenruo 2021-12-01 8:57 ` [PATCH 00/17] btrfs: split bio at btrfs_map_bio() time Qu Wenruo 2021-12-01 8:57 ` [dm-devel] " Qu Wenruo
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20211201051756.53742-9-wqu@suse.com \ --to=wqu@suse.com \ --cc=dm-devel@redhat.com \ --cc=linux-block@vger.kernel.org \ --cc=linux-btrfs@vger.kernel.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.