All of lore.kernel.org
 help / color / mirror / Atom feed
* fix read repair on compressed extents
@ 2022-06-23  5:53 Christoph Hellwig
  2022-06-23  5:53 ` [PATCH 1/4] btrfs: simplify the pending I/O counting in struct compressed_bio Christoph Hellwig
                   ` (5 more replies)
  0 siblings, 6 replies; 19+ messages in thread
From: Christoph Hellwig @ 2022-06-23  5:53 UTC (permalink / raw)
  To: Chris Mason, Josef Bacik, David Sterba; +Cc: linux-btrfs

Hi all,

while looking into the repair code I found that read repair of compressed
extents is current fundamentally broken, in that repair tries to write
the uncompressed data into a corrupted extent during a repair.  This is
demonstrated by the "btrfs: test read repair on a corrupted compressed
extent" test submitted to xfstests.

This series fixes that, but is a bit invaside as it requires both
refactoring of the compression code and changes to the repair code to
not look up the logic address on every repair attempt.  On the plus
side it removes a whole lot of code.

It is based on the for-next branch plus my "btrfs: repair all known bad
mirrors" patch.

Diffstat:
 compression.c |  287 ++++++++++++++++------------------------------------------
 compression.h |   11 --
 ctree.h       |    4 
 extent_io.c   |   93 +++++++-----------
 extent_io.h   |    9 -
 inode.c       |   34 +++---
 6 files changed, 148 insertions(+), 290 deletions(-)

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH 1/4] btrfs: simplify the pending I/O counting in struct compressed_bio
  2022-06-23  5:53 fix read repair on compressed extents Christoph Hellwig
@ 2022-06-23  5:53 ` Christoph Hellwig
  2022-06-29 23:42   ` Boris Burkov
  2022-06-23  5:53 ` [PATCH 2/4] btrfs: pass a btrfs_bio to btrfs_repair_one_sector Christoph Hellwig
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 19+ messages in thread
From: Christoph Hellwig @ 2022-06-23  5:53 UTC (permalink / raw)
  To: Chris Mason, Josef Bacik, David Sterba; +Cc: linux-btrfs

Instead of counting the bytes just count the bios, with an extra
reference held during submission.  This significantly simplifies the
submission side error handling.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/btrfs/compression.c | 126 ++++++++++-------------------------------
 fs/btrfs/compression.h |   4 +-
 2 files changed, 33 insertions(+), 97 deletions(-)

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 907fc8a4c092c..e756da640fd7b 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -191,44 +191,6 @@ static int check_compressed_csum(struct btrfs_inode *inode, struct bio *bio,
 	return 0;
 }
 
-/*
- * Reduce bio and io accounting for a compressed_bio with its corresponding bio.
- *
- * Return true if there is no pending bio nor io.
- * Return false otherwise.
- */
-static bool dec_and_test_compressed_bio(struct compressed_bio *cb, struct bio *bio)
-{
-	struct btrfs_fs_info *fs_info = btrfs_sb(cb->inode->i_sb);
-	unsigned int bi_size = 0;
-	bool last_io = false;
-	struct bio_vec *bvec;
-	struct bvec_iter_all iter_all;
-
-	/*
-	 * At endio time, bi_iter.bi_size doesn't represent the real bio size.
-	 * Thus here we have to iterate through all segments to grab correct
-	 * bio size.
-	 */
-	bio_for_each_segment_all(bvec, bio, iter_all)
-		bi_size += bvec->bv_len;
-
-	if (bio->bi_status)
-		cb->status = bio->bi_status;
-
-	ASSERT(bi_size && bi_size <= cb->compressed_len);
-	last_io = refcount_sub_and_test(bi_size >> fs_info->sectorsize_bits,
-					&cb->pending_sectors);
-	/*
-	 * Here we must wake up the possible error handler after all other
-	 * operations on @cb finished, or we can race with
-	 * finish_compressed_bio_*() which may free @cb.
-	 */
-	wake_up_var(cb);
-
-	return last_io;
-}
-
 static void finish_compressed_bio_read(struct compressed_bio *cb)
 {
 	unsigned int index;
@@ -288,7 +250,10 @@ static void end_compressed_bio_read(struct bio *bio)
 	unsigned int mirror = btrfs_bio(bio)->mirror_num;
 	int ret = 0;
 
-	if (!dec_and_test_compressed_bio(cb, bio))
+	if (bio->bi_status)
+		cb->status = bio->bi_status;
+
+	if (!refcount_dec_and_test(&cb->pending_ios))
 		goto out;
 
 	/*
@@ -417,7 +382,10 @@ static void end_compressed_bio_write(struct bio *bio)
 {
 	struct compressed_bio *cb = bio->bi_private;
 
-	if (dec_and_test_compressed_bio(cb, bio)) {
+	if (bio->bi_status)
+		cb->status = bio->bi_status;
+
+	if (refcount_dec_and_test(&cb->pending_ios)) {
 		struct btrfs_fs_info *fs_info = btrfs_sb(cb->inode->i_sb);
 
 		btrfs_record_physical_zoned(cb->inode, cb->start, bio);
@@ -476,7 +444,7 @@ static struct bio *alloc_compressed_bio(struct compressed_bio *cb, u64 disk_byte
 		return ERR_PTR(ret);
 	}
 	*next_stripe_start = disk_bytenr + geom.len;
-
+	refcount_inc(&cb->pending_ios);
 	return bio;
 }
 
@@ -503,17 +471,17 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
 	struct compressed_bio *cb;
 	u64 cur_disk_bytenr = disk_start;
 	u64 next_stripe_start;
-	blk_status_t ret;
 	int skip_sum = inode->flags & BTRFS_INODE_NODATASUM;
 	const bool use_append = btrfs_use_zone_append(inode, disk_start);
 	const unsigned int bio_op = use_append ? REQ_OP_ZONE_APPEND : REQ_OP_WRITE;
+	blk_status_t ret = BLK_STS_OK;
 
 	ASSERT(IS_ALIGNED(start, fs_info->sectorsize) &&
 	       IS_ALIGNED(len, fs_info->sectorsize));
 	cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
 	if (!cb)
 		return BLK_STS_RESOURCE;
-	refcount_set(&cb->pending_sectors, compressed_len >> fs_info->sectorsize_bits);
+	refcount_set(&cb->pending_ios, 1);
 	cb->status = BLK_STS_OK;
 	cb->inode = &inode->vfs_inode;
 	cb->start = start;
@@ -543,8 +511,7 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
 				&next_stripe_start);
 			if (IS_ERR(bio)) {
 				ret = errno_to_blk_status(PTR_ERR(bio));
-				bio = NULL;
-				goto finish_cb;
+				break;
 			}
 			if (blkcg_css)
 				bio->bi_opf |= REQ_CGROUP_PUNT;
@@ -588,8 +555,11 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
 		if (submit) {
 			if (!skip_sum) {
 				ret = btrfs_csum_one_bio(inode, bio, start, true);
-				if (ret)
-					goto finish_cb;
+				if (ret) {
+					bio->bi_status = ret;
+					bio_endio(bio);
+					break;
+				}
 			}
 
 			ASSERT(bio->bi_iter.bi_size);
@@ -598,33 +568,12 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
 		}
 		cond_resched();
 	}
-	if (blkcg_css)
-		kthread_associate_blkcg(NULL);
 
-	return 0;
-
-finish_cb:
 	if (blkcg_css)
 		kthread_associate_blkcg(NULL);
 
-	if (bio) {
-		bio->bi_status = ret;
-		bio_endio(bio);
-	}
-	/* Last byte of @cb is submitted, endio will free @cb */
-	if (cur_disk_bytenr == disk_start + compressed_len)
-		return ret;
-
-	wait_var_event(cb, refcount_read(&cb->pending_sectors) ==
-			   (disk_start + compressed_len - cur_disk_bytenr) >>
-			   fs_info->sectorsize_bits);
-	/*
-	 * Even with previous bio ended, we should still have io not yet
-	 * submitted, thus need to finish manually.
-	 */
-	ASSERT(refcount_read(&cb->pending_sectors));
-	/* Now we are the only one referring @cb, can finish it safely. */
-	finish_compressed_bio_write(cb);
+	if (refcount_dec_and_test(&cb->pending_ios))
+		finish_compressed_bio_write(cb);
 	return ret;
 }
 
@@ -830,7 +779,7 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 		goto out;
 	}
 
-	refcount_set(&cb->pending_sectors, compressed_len >> fs_info->sectorsize_bits);
+	refcount_set(&cb->pending_ios, 1);
 	cb->status = BLK_STS_OK;
 	cb->inode = inode;
 	cb->mirror_num = mirror_num;
@@ -880,9 +829,9 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 					REQ_OP_READ, end_compressed_bio_read,
 					&next_stripe_start);
 			if (IS_ERR(comp_bio)) {
-				ret = errno_to_blk_status(PTR_ERR(comp_bio));
-				comp_bio = NULL;
-				goto finish_cb;
+				cb->status =
+					errno_to_blk_status(PTR_ERR(comp_bio));
+				break;
 			}
 		}
 		/*
@@ -921,8 +870,11 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 			unsigned int nr_sectors;
 
 			ret = btrfs_lookup_bio_sums(inode, comp_bio, sums);
-			if (ret)
-				goto finish_cb;
+			if (ret) {
+				comp_bio->bi_status = ret;
+				bio_endio(comp_bio);
+				break;
+			}
 
 			nr_sectors = DIV_ROUND_UP(comp_bio->bi_iter.bi_size,
 						  fs_info->sectorsize);
@@ -933,6 +885,9 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 			comp_bio = NULL;
 		}
 	}
+
+	if (refcount_dec_and_test(&cb->pending_ios))
+		finish_compressed_bio_read(cb);
 	return;
 
 fail:
@@ -950,25 +905,6 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 	bio->bi_status = ret;
 	bio_endio(bio);
 	return;
-finish_cb:
-	if (comp_bio) {
-		comp_bio->bi_status = ret;
-		bio_endio(comp_bio);
-	}
-	/* All bytes of @cb is submitted, endio will free @cb */
-	if (cur_disk_byte == disk_bytenr + compressed_len)
-		return;
-
-	wait_var_event(cb, refcount_read(&cb->pending_sectors) ==
-			   (disk_bytenr + compressed_len - cur_disk_byte) >>
-			   fs_info->sectorsize_bits);
-	/*
-	 * Even with previous bio ended, we should still have io not yet
-	 * submitted, thus need to finish @cb manually.
-	 */
-	ASSERT(refcount_read(&cb->pending_sectors));
-	/* Now we are the only one referring @cb, can finish it safely. */
-	finish_compressed_bio_read(cb);
 }
 
 /*
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index 5fca7603e928a..0e4cbf04fd866 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -30,8 +30,8 @@ static_assert((BTRFS_MAX_COMPRESSED % PAGE_SIZE) == 0);
 #define	BTRFS_ZLIB_DEFAULT_LEVEL		3
 
 struct compressed_bio {
-	/* Number of sectors with unfinished IO (unsubmitted or unfinished) */
-	refcount_t pending_sectors;
+	/* Number of outstanding bios */
+	refcount_t pending_ios;
 
 	/* Number of compressed pages in the array */
 	unsigned int nr_pages;
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 2/4] btrfs: pass a btrfs_bio to btrfs_repair_one_sector
  2022-06-23  5:53 fix read repair on compressed extents Christoph Hellwig
  2022-06-23  5:53 ` [PATCH 1/4] btrfs: simplify the pending I/O counting in struct compressed_bio Christoph Hellwig
@ 2022-06-23  5:53 ` Christoph Hellwig
  2022-06-29 23:44   ` Boris Burkov
  2022-06-23  5:53 ` [PATCH 3/4] btrfs: remove the start argument to check_data_csum Christoph Hellwig
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 19+ messages in thread
From: Christoph Hellwig @ 2022-06-23  5:53 UTC (permalink / raw)
  To: Chris Mason, Josef Bacik, David Sterba; +Cc: linux-btrfs

Pass the btrfs_bio instead of the plain bio to btrfs_repair_one_sector,
an remove the start and failed_mirror arguments in favor of deriving
them from the btrfs_bio.  For this to work ensure that the file_offset
field is also initialized for buffered I/O.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/btrfs/extent_io.c | 47 ++++++++++++++++++++++++--------------------
 fs/btrfs/extent_io.h |  8 ++++----
 fs/btrfs/inode.c     |  5 ++---
 3 files changed, 32 insertions(+), 28 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 3778d58092dea..ec7bdb3fa0921 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -182,6 +182,7 @@ static int add_extent_changeset(struct extent_state *state, u32 bits,
 static void submit_one_bio(struct btrfs_bio_ctrl *bio_ctrl)
 {
 	struct bio *bio;
+	struct bio_vec *bv;
 	struct inode *inode;
 	int mirror_num;
 
@@ -189,12 +190,15 @@ static void submit_one_bio(struct btrfs_bio_ctrl *bio_ctrl)
 		return;
 
 	bio = bio_ctrl->bio;
-	inode = bio_first_page_all(bio)->mapping->host;
+	bv = bio_first_bvec_all(bio);
+	inode = bv->bv_page->mapping->host;
 	mirror_num = bio_ctrl->mirror_num;
 
 	/* Caller should ensure the bio has at least some range added */
 	ASSERT(bio->bi_iter.bi_size);
 
+	btrfs_bio(bio)->file_offset = page_offset(bv->bv_page) + bv->bv_offset;
+
 	if (!is_data_inode(inode))
 		btrfs_submit_metadata_bio(inode, bio, mirror_num);
 	else if (btrfs_op(bio) == BTRFS_MAP_WRITE)
@@ -2533,10 +2537,11 @@ void btrfs_free_io_failure_record(struct btrfs_inode *inode, u64 start, u64 end)
 }
 
 static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode,
-							     u64 start,
-							     int failed_mirror)
+							     struct btrfs_bio *bbio,
+							     unsigned int bio_offset)
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+	u64 start = bbio->file_offset + bio_offset;
 	struct io_failure_record *failrec;
 	struct extent_map *em;
 	struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
@@ -2556,7 +2561,7 @@ static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode
 		 * (e.g. with a list for failed_mirror) to make
 		 * clean_io_failure() clean all those errors at once.
 		 */
-		ASSERT(failrec->this_mirror == failed_mirror);
+		ASSERT(failrec->this_mirror == bbio->mirror_num);
 		ASSERT(failrec->len == fs_info->sectorsize);
 		return failrec;
 	}
@@ -2567,7 +2572,7 @@ static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode
 
 	failrec->start = start;
 	failrec->len = sectorsize;
-	failrec->failed_mirror = failrec->this_mirror = failed_mirror;
+	failrec->failed_mirror = failrec->this_mirror = bbio->mirror_num;
 	failrec->compress_type = BTRFS_COMPRESS_NONE;
 
 	read_lock(&em_tree->lock);
@@ -2632,17 +2637,17 @@ static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode
 	return failrec;
 }
 
-int btrfs_repair_one_sector(struct inode *inode,
-			    struct bio *failed_bio, u32 bio_offset,
-			    struct page *page, unsigned int pgoff,
-			    u64 start, int failed_mirror,
+int btrfs_repair_one_sector(struct inode *inode, struct btrfs_bio *failed_bbio,
+			    u32 bio_offset, struct page *page,
+			    unsigned int pgoff,
 			    submit_bio_hook_t *submit_bio_hook)
 {
+	u64 start = failed_bbio->file_offset + bio_offset;
 	struct io_failure_record *failrec;
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
 	struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
-	struct btrfs_bio *failed_bbio = btrfs_bio(failed_bio);
+	struct bio *failed_bio = &failed_bbio->bio;
 	const int icsum = bio_offset >> fs_info->sectorsize_bits;
 	struct bio *repair_bio;
 	struct btrfs_bio *repair_bbio;
@@ -2652,7 +2657,7 @@ int btrfs_repair_one_sector(struct inode *inode,
 
 	BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
 
-	failrec = btrfs_get_io_failure_record(inode, start, failed_mirror);
+	failrec = btrfs_get_io_failure_record(inode, failed_bbio, bio_offset);
 	if (IS_ERR(failrec))
 		return PTR_ERR(failrec);
 
@@ -2750,9 +2755,10 @@ static void end_sector_io(struct page *page, u64 offset, bool uptodate)
 				    offset + sectorsize - 1, &cached);
 }
 
-static void submit_data_read_repair(struct inode *inode, struct bio *failed_bio,
+static void submit_data_read_repair(struct inode *inode,
+				    struct btrfs_bio *failed_bbio,
 				    u32 bio_offset, const struct bio_vec *bvec,
-				    int failed_mirror, unsigned int error_bitmap)
+				    unsigned int error_bitmap)
 {
 	const unsigned int pgoff = bvec->bv_offset;
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
@@ -2763,7 +2769,7 @@ static void submit_data_read_repair(struct inode *inode, struct bio *failed_bio,
 	const int nr_bits = (end + 1 - start) >> fs_info->sectorsize_bits;
 	int i;
 
-	BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
+	BUG_ON(bio_op(&failed_bbio->bio) == REQ_OP_WRITE);
 
 	/* This repair is only for data */
 	ASSERT(is_data_inode(inode));
@@ -2775,7 +2781,7 @@ static void submit_data_read_repair(struct inode *inode, struct bio *failed_bio,
 	 * We only get called on buffered IO, thus page must be mapped and bio
 	 * must not be cloned.
 	 */
-	ASSERT(page->mapping && !bio_flagged(failed_bio, BIO_CLONED));
+	ASSERT(page->mapping && !bio_flagged(&failed_bbio->bio, BIO_CLONED));
 
 	/* Iterate through all the sectors in the range */
 	for (i = 0; i < nr_bits; i++) {
@@ -2792,10 +2798,9 @@ static void submit_data_read_repair(struct inode *inode, struct bio *failed_bio,
 			goto next;
 		}
 
-		ret = btrfs_repair_one_sector(inode, failed_bio,
-				bio_offset + offset,
-				page, pgoff + offset, start + offset,
-				failed_mirror, btrfs_submit_data_read_bio);
+		ret = btrfs_repair_one_sector(inode, failed_bbio,
+				bio_offset + offset, page, pgoff + offset,
+				btrfs_submit_data_read_bio);
 		if (!ret) {
 			/*
 			 * We have submitted the read repair, the page release
@@ -3127,8 +3132,8 @@ static void end_bio_extent_readpage(struct bio *bio)
 			 * submit_data_read_repair() will handle all the good
 			 * and bad sectors, we just continue to the next bvec.
 			 */
-			submit_data_read_repair(inode, bio, bio_offset, bvec,
-						mirror, error_bitmap);
+			submit_data_read_repair(inode, bbio, bio_offset, bvec,
+						error_bitmap);
 		} else {
 			/* Update page status and unlock */
 			end_page_read(page, uptodate, start, len);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 280af70c04953..a78051c7627c4 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -57,6 +57,7 @@ enum {
 #define BITMAP_LAST_BYTE_MASK(nbits) \
 	(BYTE_MASK >> (-(nbits) & (BITS_PER_BYTE - 1)))
 
+struct btrfs_bio;
 struct btrfs_root;
 struct btrfs_inode;
 struct btrfs_io_bio;
@@ -266,10 +267,9 @@ struct io_failure_record {
 	int num_copies;
 };
 
-int btrfs_repair_one_sector(struct inode *inode,
-			    struct bio *failed_bio, u32 bio_offset,
-			    struct page *page, unsigned int pgoff,
-			    u64 start, int failed_mirror,
+int btrfs_repair_one_sector(struct inode *inode, struct btrfs_bio *failed_bbio,
+			    u32 bio_offset, struct page *page,
+			    unsigned int pgoff,
 			    submit_bio_hook_t *submit_bio_hook);
 
 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 784c1ad4a9634..a627b2af9e243 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7953,9 +7953,8 @@ static blk_status_t btrfs_check_read_dio_bio(struct btrfs_dio_private *dip,
 		} else {
 			int ret;
 
-			ret = btrfs_repair_one_sector(inode, &bbio->bio, offset,
-					bv.bv_page, bv.bv_offset, start,
-					bbio->mirror_num,
+			ret = btrfs_repair_one_sector(inode, bbio, offset,
+					bv.bv_page, bv.bv_offset,
 					submit_dio_repair_bio);
 			if (ret)
 				err = errno_to_blk_status(ret);
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 3/4] btrfs: remove the start argument to check_data_csum
  2022-06-23  5:53 fix read repair on compressed extents Christoph Hellwig
  2022-06-23  5:53 ` [PATCH 1/4] btrfs: simplify the pending I/O counting in struct compressed_bio Christoph Hellwig
  2022-06-23  5:53 ` [PATCH 2/4] btrfs: pass a btrfs_bio to btrfs_repair_one_sector Christoph Hellwig
@ 2022-06-23  5:53 ` Christoph Hellwig
  2022-06-29 23:48   ` Boris Burkov
  2022-06-23  5:53 ` [PATCH 4/4] btrfs: fix repair of compressed extents Christoph Hellwig
                   ` (2 subsequent siblings)
  5 siblings, 1 reply; 19+ messages in thread
From: Christoph Hellwig @ 2022-06-23  5:53 UTC (permalink / raw)
  To: Chris Mason, Josef Bacik, David Sterba; +Cc: linux-btrfs

Just derive it from the btrfs_bio now that ->file_offset is always valid.
Also make the function available outside of inode.c as we'll need that
soon.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/btrfs/ctree.h |  2 ++
 fs/btrfs/inode.c | 22 +++++++++-------------
 2 files changed, 11 insertions(+), 13 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 4e2569f84aabc..164f54e6aa447 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3293,6 +3293,8 @@ int btrfs_check_sector_csum(struct btrfs_fs_info *fs_info, struct page *page,
 unsigned int btrfs_verify_data_csum(struct btrfs_bio *bbio,
 				    u32 bio_offset, struct page *page,
 				    u64 start, u64 end);
+int check_data_csum(struct inode *inode, struct btrfs_bio *bbio, u32 bio_offset,
+		    struct page *page, u32 pgoff);
 struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode,
 					   u64 start, u64 len);
 noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index a627b2af9e243..429428fde4a88 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3396,20 +3396,18 @@ int btrfs_check_sector_csum(struct btrfs_fs_info *fs_info, struct page *page,
 /*
  * check_data_csum - verify checksum of one sector of uncompressed data
  * @inode:	inode
- * @io_bio:	btrfs_io_bio which contains the csum
+ * @bbio:	btrfs_io_bio which contains the csum
  * @bio_offset:	offset to the beginning of the bio (in bytes)
  * @page:	page where is the data to be verified
  * @pgoff:	offset inside the page
- * @start:	logical offset in the file
  *
  * The length of such check is always one sector size.
  *
  * When csum mismatch is detected, we will also report the error and fill the
  * corrupted range with zero. (Thus it needs the extra parameters)
  */
-static int check_data_csum(struct inode *inode, struct btrfs_bio *bbio,
-			   u32 bio_offset, struct page *page, u32 pgoff,
-			   u64 start)
+int check_data_csum(struct inode *inode, struct btrfs_bio *bbio, u32 bio_offset,
+		    struct page *page, u32 pgoff)
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	u32 len = fs_info->sectorsize;
@@ -3425,8 +3423,9 @@ static int check_data_csum(struct inode *inode, struct btrfs_bio *bbio,
 	return 0;
 
 zeroit:
-	btrfs_print_data_csum_error(BTRFS_I(inode), start, csum, csum_expected,
-				    bbio->mirror_num);
+	btrfs_print_data_csum_error(BTRFS_I(inode),
+				    bbio->file_offset + bio_offset,
+				    csum, csum_expected, bbio->mirror_num);
 	if (bbio->device)
 		btrfs_dev_stat_inc_and_print(bbio->device,
 					     BTRFS_DEV_STAT_CORRUPTION_ERRS);
@@ -3495,8 +3494,7 @@ unsigned int btrfs_verify_data_csum(struct btrfs_bio *bbio,
 					  EXTENT_NODATASUM);
 			continue;
 		}
-		ret = check_data_csum(inode, bbio, bio_offset, page, pg_off,
-				      page_offset(page) + pg_off);
+		ret = check_data_csum(inode, bbio, bio_offset, page, pg_off);
 		if (ret < 0) {
 			const int nr_bit = (pg_off - offset_in_page(start)) >>
 				     root->fs_info->sectorsize_bits;
@@ -7946,7 +7944,7 @@ static blk_status_t btrfs_check_read_dio_bio(struct btrfs_dio_private *dip,
 
 		if (uptodate &&
 		    (!csum || !check_data_csum(inode, bbio, offset, bv.bv_page,
-					       bv.bv_offset, start))) {
+					       bv.bv_offset))) {
 			clean_io_failure(fs_info, failure_tree, io_tree, start,
 					 bv.bv_page, btrfs_ino(BTRFS_I(inode)),
 					 bv.bv_offset);
@@ -10324,7 +10322,6 @@ static blk_status_t btrfs_encoded_read_verify_csum(struct btrfs_bio *bbio)
 	u32 sectorsize = fs_info->sectorsize;
 	struct bio_vec *bvec;
 	struct bvec_iter_all iter_all;
-	u64 start = priv->file_offset;
 	u32 bio_offset = 0;
 
 	if (priv->skip_csum || !uptodate)
@@ -10338,9 +10335,8 @@ static blk_status_t btrfs_encoded_read_verify_csum(struct btrfs_bio *bbio)
 		for (i = 0; i < nr_sectors; i++) {
 			ASSERT(pgoff < PAGE_SIZE);
 			if (check_data_csum(&inode->vfs_inode, bbio, bio_offset,
-					    bvec->bv_page, pgoff, start))
+					    bvec->bv_page, pgoff))
 				return BLK_STS_IOERR;
-			start += sectorsize;
 			bio_offset += sectorsize;
 			pgoff += sectorsize;
 		}
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 4/4] btrfs: fix repair of compressed extents
  2022-06-23  5:53 fix read repair on compressed extents Christoph Hellwig
                   ` (2 preceding siblings ...)
  2022-06-23  5:53 ` [PATCH 3/4] btrfs: remove the start argument to check_data_csum Christoph Hellwig
@ 2022-06-23  5:53 ` Christoph Hellwig
  2022-06-30  0:18   ` Boris Burkov
  2022-06-23  8:14 ` fix read repair on " Qu Wenruo
  2022-06-29  8:42 ` Christoph Hellwig
  5 siblings, 1 reply; 19+ messages in thread
From: Christoph Hellwig @ 2022-06-23  5:53 UTC (permalink / raw)
  To: Chris Mason, Josef Bacik, David Sterba; +Cc: linux-btrfs

Currently the checksum of compressed extents is verified based on the
compressed data and the lower btrfs_bio, but the actual repair process
is driven by end_bio_extent_readpage on the upper btrfs_bio for the
decompressed data.

This has a bunch of issues, including not being able to properly
communicate the failed mirror up in case that the I/O submission got
preempted, a general loss of if an error was an I/O error or a checksum
verification failure, but most importantly that this design causes
btrfs_clean_io_failure to eventually write back the uncompressed good
data onto the disk sectors that are supposed to contain compressed data.

Fix this by moving the repair to the lower btrfs_bio.  To do so, a fair
amount of code has to be reshuffled:

 a) the lower btrfs_bio now needs a valid csum pointer.  The easiest way
    to archive that is to pass NULL btrfs_lookup_bio_sums and just use
    the btrfs_bio management of csums.  For a compressed_bio that is
    split into multiple btrfs_bios this mean additional memory
    allocations, but the code becomes a lot more regular.
 b) checksum verifiaction now runs diretly on the lower btrfs_bio instead
    of the compressed_bio.  This actually nicely simplifies the end I/O
    processing.
 c) btrfs_repair_one_sector can't just look up the logical address for
    the file offset any more, as there is no coresponding relative
    offsets that apply to the file offset and the logic address for
    compressed extents.  Instead require that the saved bvec_iter in the
    btrfs_bio is filled out for all read bios and use that, which again
    removes a fair amount of code.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/btrfs/compression.c | 171 ++++++++++++++---------------------------
 fs/btrfs/compression.h |   7 --
 fs/btrfs/ctree.h       |   2 +
 fs/btrfs/extent_io.c   |  46 +++--------
 fs/btrfs/extent_io.h   |   1 -
 fs/btrfs/inode.c       |   7 ++
 6 files changed, 77 insertions(+), 157 deletions(-)

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index e756da640fd7b..c8b14a5bd89be 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -136,66 +136,14 @@ static int compression_decompress(int type, struct list_head *ws,
 
 static int btrfs_decompress_bio(struct compressed_bio *cb);
 
-static inline int compressed_bio_size(struct btrfs_fs_info *fs_info,
-				      unsigned long disk_size)
-{
-	return sizeof(struct compressed_bio) +
-		(DIV_ROUND_UP(disk_size, fs_info->sectorsize)) * fs_info->csum_size;
-}
-
-static int check_compressed_csum(struct btrfs_inode *inode, struct bio *bio,
-				 u64 disk_start)
-{
-	struct btrfs_fs_info *fs_info = inode->root->fs_info;
-	const u32 csum_size = fs_info->csum_size;
-	const u32 sectorsize = fs_info->sectorsize;
-	struct page *page;
-	unsigned int i;
-	u8 csum[BTRFS_CSUM_SIZE];
-	struct compressed_bio *cb = bio->bi_private;
-	u8 *cb_sum = cb->sums;
-
-	if ((inode->flags & BTRFS_INODE_NODATASUM) ||
-	    test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state))
-		return 0;
-
-	for (i = 0; i < cb->nr_pages; i++) {
-		u32 pg_offset;
-		u32 bytes_left = PAGE_SIZE;
-		page = cb->compressed_pages[i];
-
-		/* Determine the remaining bytes inside the page first */
-		if (i == cb->nr_pages - 1)
-			bytes_left = cb->compressed_len - i * PAGE_SIZE;
-
-		/* Hash through the page sector by sector */
-		for (pg_offset = 0; pg_offset < bytes_left;
-		     pg_offset += sectorsize) {
-			int ret;
-
-			ret = btrfs_check_sector_csum(fs_info, page, pg_offset,
-						      csum, cb_sum);
-			if (ret) {
-				btrfs_print_data_csum_error(inode, disk_start,
-						csum, cb_sum, cb->mirror_num);
-				if (btrfs_bio(bio)->device)
-					btrfs_dev_stat_inc_and_print(
-						btrfs_bio(bio)->device,
-						BTRFS_DEV_STAT_CORRUPTION_ERRS);
-				return -EIO;
-			}
-			cb_sum += csum_size;
-			disk_start += sectorsize;
-		}
-	}
-	return 0;
-}
-
 static void finish_compressed_bio_read(struct compressed_bio *cb)
 {
 	unsigned int index;
 	struct page *page;
 
+	if (cb->status == BLK_STS_OK)
+		cb->status = errno_to_blk_status(btrfs_decompress_bio(cb));
+
 	/* Release the compressed pages */
 	for (index = 0; index < cb->nr_pages; index++) {
 		page = cb->compressed_pages[index];
@@ -233,59 +181,54 @@ static void finish_compressed_bio_read(struct compressed_bio *cb)
 	kfree(cb);
 }
 
-/* when we finish reading compressed pages from the disk, we
- * decompress them and then run the bio end_io routines on the
- * decompressed pages (in the inode address space).
- *
- * This allows the checksumming and other IO error handling routines
- * to work normally
- *
- * The compressed pages are freed here, and it must be run
- * in process context
+/*
+ * Verify the checksums and kick off repair if needed on the uncompressed data
+ * before decompressing it into the original bio and freeing the uncompressed
+ * pages.
  */
 static void end_compressed_bio_read(struct bio *bio)
 {
 	struct compressed_bio *cb = bio->bi_private;
-	struct inode *inode;
-	unsigned int mirror = btrfs_bio(bio)->mirror_num;
-	int ret = 0;
-
-	if (bio->bi_status)
-		cb->status = bio->bi_status;
-
-	if (!refcount_dec_and_test(&cb->pending_ios))
-		goto out;
-
-	/*
-	 * Record the correct mirror_num in cb->orig_bio so that
-	 * read-repair can work properly.
-	 */
-	btrfs_bio(cb->orig_bio)->mirror_num = mirror;
-	cb->mirror_num = mirror;
-
-	/*
-	 * Some IO in this cb have failed, just skip checksum as there
-	 * is no way it could be correct.
-	 */
-	if (cb->status != BLK_STS_OK)
-		goto csum_failed;
+	struct inode *inode = cb->inode;
+	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+	struct btrfs_inode *bi = BTRFS_I(inode);
+	bool csum = !(bi->flags & BTRFS_INODE_NODATASUM) &&
+		    !test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state);
+	blk_status_t status = bio->bi_status;
+	struct btrfs_bio *bbio = btrfs_bio(bio);
+	struct bvec_iter iter;
+	struct bio_vec bv;
+	u32 offset;
+
+	btrfs_bio_for_each_sector(fs_info, bv, bbio, iter, offset) {
+		u64 start = bbio->file_offset + offset;
+
+		if (!status &&
+		    (!csum || !check_data_csum(inode, bbio, offset, bv.bv_page,
+					       bv.bv_offset))) {
+			clean_io_failure(fs_info, &bi->io_failure_tree,
+					 &bi->io_tree, start, bv.bv_page,
+					 btrfs_ino(bi), bv.bv_offset);
+		} else {
+			int ret;
 
-	inode = cb->inode;
-	ret = check_compressed_csum(BTRFS_I(inode), bio,
-				    bio->bi_iter.bi_sector << 9);
-	if (ret)
-		goto csum_failed;
+			refcount_inc(&cb->pending_ios);
+			ret = btrfs_repair_one_sector(inode, bbio, offset,
+					bv.bv_page, bv.bv_offset,
+					btrfs_submit_data_read_bio);
+			if (ret) {
+				refcount_dec(&cb->pending_ios);
+				status = errno_to_blk_status(ret);
+			}
+		}
+	}
 
-	/* ok, we're the last bio for this extent, lets start
-	 * the decompression.
-	 */
-	ret = btrfs_decompress_bio(cb);
+	if (status)
+		cb->status = status;
 
-csum_failed:
-	if (ret)
-		cb->status = errno_to_blk_status(ret);
-	finish_compressed_bio_read(cb);
-out:
+	if (refcount_dec_and_test(&cb->pending_ios))
+		finish_compressed_bio_read(cb);
+	btrfs_bio_free_csum(bbio);
 	bio_put(bio);
 }
 
@@ -478,7 +421,7 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
 
 	ASSERT(IS_ALIGNED(start, fs_info->sectorsize) &&
 	       IS_ALIGNED(len, fs_info->sectorsize));
-	cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
+	cb = kmalloc(sizeof(struct compressed_bio), GFP_NOFS);
 	if (!cb)
 		return BLK_STS_RESOURCE;
 	refcount_set(&cb->pending_ios, 1);
@@ -486,7 +429,6 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
 	cb->inode = &inode->vfs_inode;
 	cb->start = start;
 	cb->len = len;
-	cb->mirror_num = 0;
 	cb->compressed_pages = compressed_pages;
 	cb->compressed_len = compressed_len;
 	cb->writeback = writeback;
@@ -755,7 +697,6 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 	blk_status_t ret;
 	int ret2;
 	int i;
-	u8 *sums;
 
 	em_tree = &BTRFS_I(inode)->extent_tree;
 
@@ -773,7 +714,7 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 
 	ASSERT(em->compress_type != BTRFS_COMPRESS_NONE);
 	compressed_len = em->block_len;
-	cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
+	cb = kmalloc(sizeof(struct compressed_bio), GFP_NOFS);
 	if (!cb) {
 		ret = BLK_STS_RESOURCE;
 		goto out;
@@ -782,8 +723,6 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 	refcount_set(&cb->pending_ios, 1);
 	cb->status = BLK_STS_OK;
 	cb->inode = inode;
-	cb->mirror_num = mirror_num;
-	sums = cb->sums;
 
 	cb->start = em->orig_start;
 	em_len = em->len;
@@ -867,19 +806,25 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 			submit = true;
 
 		if (submit) {
-			unsigned int nr_sectors;
+			/* Save the original iter for read repair */
+			if (bio_op(comp_bio) == REQ_OP_READ)
+				btrfs_bio(comp_bio)->iter = comp_bio->bi_iter;
+
+			/*
+			 * Just stash the initial offset of this chunk, as there
+			 * is no direct correlation between compressed pages and
+			 * the original file offset.  The field is only used for
+			 * priting error messages anyway.
+			 */
+			btrfs_bio(comp_bio)->file_offset = file_offset;
 
-			ret = btrfs_lookup_bio_sums(inode, comp_bio, sums);
+			ret = btrfs_lookup_bio_sums(inode, comp_bio, NULL);
 			if (ret) {
 				comp_bio->bi_status = ret;
 				bio_endio(comp_bio);
 				break;
 			}
 
-			nr_sectors = DIV_ROUND_UP(comp_bio->bi_iter.bi_size,
-						  fs_info->sectorsize);
-			sums += fs_info->csum_size * nr_sectors;
-
 			ASSERT(comp_bio->bi_iter.bi_size);
 			btrfs_submit_bio(fs_info, comp_bio, mirror_num);
 			comp_bio = NULL;
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index 0e4cbf04fd866..e9ef24034cad0 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -59,19 +59,12 @@ struct compressed_bio {
 
 	/* IO errors */
 	blk_status_t status;
-	int mirror_num;
 
 	union {
 		/* For reads, this is the bio we are copying the data into */
 		struct bio *orig_bio;
 		struct work_struct write_end_work;
 	};
-
-	/*
-	 * the start of a variable length array of checksums only
-	 * used by reads
-	 */
-	u8 sums[];
 };
 
 static inline unsigned int btrfs_compress_type(unsigned int type_level)
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 164f54e6aa447..12f59e35755fa 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3290,6 +3290,8 @@ void btrfs_submit_data_read_bio(struct inode *inode, struct bio *bio,
 			int mirror_num, enum btrfs_compression_type compress_type);
 int btrfs_check_sector_csum(struct btrfs_fs_info *fs_info, struct page *page,
 			    u32 pgoff, u8 *csum, const u8 * const csum_expected);
+int check_data_csum(struct inode *inode, struct btrfs_bio *bbio, u32 bio_offset,
+		    struct page *page, u32 pgoff);
 unsigned int btrfs_verify_data_csum(struct btrfs_bio *bbio,
 				    u32 bio_offset, struct page *page,
 				    u64 start, u64 end);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index ec7bdb3fa0921..587d2ba20b53b 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2543,13 +2543,10 @@ static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	u64 start = bbio->file_offset + bio_offset;
 	struct io_failure_record *failrec;
-	struct extent_map *em;
 	struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
 	struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
-	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
 	const u32 sectorsize = fs_info->sectorsize;
 	int ret;
-	u64 logical;
 
 	failrec = get_state_failrec(failure_tree, start);
 	if (!IS_ERR(failrec)) {
@@ -2573,41 +2570,14 @@ static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode
 	failrec->start = start;
 	failrec->len = sectorsize;
 	failrec->failed_mirror = failrec->this_mirror = bbio->mirror_num;
-	failrec->compress_type = BTRFS_COMPRESS_NONE;
-
-	read_lock(&em_tree->lock);
-	em = lookup_extent_mapping(em_tree, start, failrec->len);
-	if (!em) {
-		read_unlock(&em_tree->lock);
-		kfree(failrec);
-		return ERR_PTR(-EIO);
-	}
-
-	if (em->start > start || em->start + em->len <= start) {
-		free_extent_map(em);
-		em = NULL;
-	}
-	read_unlock(&em_tree->lock);
-	if (!em) {
-		kfree(failrec);
-		return ERR_PTR(-EIO);
-	}
-
-	logical = start - em->start;
-	logical = em->block_start + logical;
-	if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
-		logical = em->block_start;
-		failrec->compress_type = em->compress_type;
-	}
+	failrec->logical = (bbio->iter.bi_sector << SECTOR_SHIFT) + bio_offset;
 
 	btrfs_debug(fs_info,
-		    "Get IO Failure Record: (new) logical=%llu, start=%llu, len=%llu",
-		    logical, start, failrec->len);
-
-	failrec->logical = logical;
-	free_extent_map(em);
+		    "Get IO Failure Record: (new) logical=%llu, start=%llu",
+		    failrec->logical, start);
 
-	failrec->num_copies = btrfs_num_copies(fs_info, logical, sectorsize);
+	failrec->num_copies = btrfs_num_copies(fs_info, failrec->logical,
+					       sectorsize);
 	if (failrec->num_copies == 1) {
 		/*
 		 * we only have a single copy of the data, so don't bother with
@@ -2709,7 +2679,7 @@ int btrfs_repair_one_sector(struct inode *inode, struct btrfs_bio *failed_bbio,
 	 * will be handled by the endio on the repair_bio, so we can't return an
 	 * error here.
 	 */
-	submit_bio_hook(inode, repair_bio, failrec->this_mirror, failrec->compress_type);
+	submit_bio_hook(inode, repair_bio, failrec->this_mirror, 0);
 	return BLK_STS_OK;
 }
 
@@ -3115,6 +3085,10 @@ static void end_bio_extent_readpage(struct bio *bio)
 			 * Only try to repair bios that actually made it to a
 			 * device.  If the bio failed to be submitted mirror
 			 * is 0 and we need to fail it without retrying.
+			 *
+			 * This also includes the high level bios for compressed
+			 * extents - these never make it to a device and repair
+			 * is already handled on the lower compressed bio.
 			 */
 			if (mirror > 0)
 				repair = true;
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index a78051c7627c4..9dec34c009e91 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -261,7 +261,6 @@ struct io_failure_record {
 	u64 start;
 	u64 len;
 	u64 logical;
-	enum btrfs_compression_type compress_type;
 	int this_mirror;
 	int failed_mirror;
 	int num_copies;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 429428fde4a88..eea351216db33 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2707,6 +2707,9 @@ void btrfs_submit_data_read_bio(struct inode *inode, struct bio *bio,
 		return;
 	}
 
+	/* Save the original iter for read repair */
+	btrfs_bio(bio)->iter = bio->bi_iter;
+
 	/*
 	 * Lookup bio sums does extra checks around whether we need to csum or
 	 * not, which is why we ignore skip_sum here.
@@ -8000,6 +8003,10 @@ static void btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	struct btrfs_dio_private *dip = bio->bi_private;
 	blk_status_t ret;
+		
+	/* Save the original iter for read repair */
+	if (btrfs_op(bio) == BTRFS_MAP_READ)
+		btrfs_bio(bio)->iter = bio->bi_iter;
 
 	if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
 		goto map;
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: fix read repair on compressed extents
  2022-06-23  5:53 fix read repair on compressed extents Christoph Hellwig
                   ` (3 preceding siblings ...)
  2022-06-23  5:53 ` [PATCH 4/4] btrfs: fix repair of compressed extents Christoph Hellwig
@ 2022-06-23  8:14 ` Qu Wenruo
  2022-06-23 12:58   ` Christoph Hellwig
  2022-06-29  8:42 ` Christoph Hellwig
  5 siblings, 1 reply; 19+ messages in thread
From: Qu Wenruo @ 2022-06-23  8:14 UTC (permalink / raw)
  To: Christoph Hellwig, Chris Mason, Josef Bacik, David Sterba; +Cc: linux-btrfs



On 2022/6/23 13:53, Christoph Hellwig wrote:
> Hi all,
>
> while looking into the repair code I found that read repair of compressed
> extents is current fundamentally broken, in that repair tries to write
> the uncompressed data into a corrupted extent during a repair.  This is
> demonstrated by the "btrfs: test read repair on a corrupted compressed
> extent" test submitted to xfstests.
>
> This series fixes that, but is a bit invaside as it requires both
> refactoring of the compression code and changes to the repair code to
> not look up the logic address on every repair attempt.  On the plus
> side it removes a whole lot of code.

I thought we would fix that after getting the read repair thing figured
out and just use that new read repair facility to do that.

Especially considering the similarity between compressed read and dio
read path (all handling pages not from page cache, needs extra structure
member to grab logical address), it would be a perfect match for the new
read repair code.

Thanks,
Qu
>
> It is based on the for-next branch plus my "btrfs: repair all known bad
> mirrors" patch.
>
> Diffstat:
>   compression.c |  287 ++++++++++++++++------------------------------------------
>   compression.h |   11 --
>   ctree.h       |    4
>   extent_io.c   |   93 +++++++-----------
>   extent_io.h   |    9 -
>   inode.c       |   34 +++---
>   6 files changed, 148 insertions(+), 290 deletions(-)

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: fix read repair on compressed extents
  2022-06-23  8:14 ` fix read repair on " Qu Wenruo
@ 2022-06-23 12:58   ` Christoph Hellwig
  0 siblings, 0 replies; 19+ messages in thread
From: Christoph Hellwig @ 2022-06-23 12:58 UTC (permalink / raw)
  To: Qu Wenruo
  Cc: Christoph Hellwig, Chris Mason, Josef Bacik, David Sterba, linux-btrfs

On Thu, Jun 23, 2022 at 04:14:16PM +0800, Qu Wenruo wrote:
> I thought we would fix that after getting the read repair thing figured
> out and just use that new read repair facility to do that.
>
> Especially considering the similarity between compressed read and dio
> read path (all handling pages not from page cache, needs extra structure
> member to grab logical address), it would be a perfect match for the new
> read repair code.

My attempt at consolidating the code is what lead me to this discovery.
But I think I'd much rather fix such a grave bug first, which is why
I spent some extra time yesterday to extract it from a large stack of
patches.


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: fix read repair on compressed extents
  2022-06-23  5:53 fix read repair on compressed extents Christoph Hellwig
                   ` (4 preceding siblings ...)
  2022-06-23  8:14 ` fix read repair on " Qu Wenruo
@ 2022-06-29  8:42 ` Christoph Hellwig
  2022-06-29 19:04   ` Boris Burkov
  5 siblings, 1 reply; 19+ messages in thread
From: Christoph Hellwig @ 2022-06-29  8:42 UTC (permalink / raw)
  To: Chris Mason, Josef Bacik, David Sterba; +Cc: linux-btrfs

Any chance to get a review on this one?

On Thu, Jun 23, 2022 at 07:53:34AM +0200, Christoph Hellwig wrote:
> Hi all,
> 
> while looking into the repair code I found that read repair of compressed
> extents is current fundamentally broken, in that repair tries to write
> the uncompressed data into a corrupted extent during a repair.  This is
> demonstrated by the "btrfs: test read repair on a corrupted compressed
> extent" test submitted to xfstests.
> 
> This series fixes that, but is a bit invaside as it requires both
> refactoring of the compression code and changes to the repair code to
> not look up the logic address on every repair attempt.  On the plus
> side it removes a whole lot of code.
> 
> It is based on the for-next branch plus my "btrfs: repair all known bad
> mirrors" patch.
> 
> Diffstat:
>  compression.c |  287 ++++++++++++++++------------------------------------------
>  compression.h |   11 --
>  ctree.h       |    4 
>  extent_io.c   |   93 +++++++-----------
>  extent_io.h   |    9 -
>  inode.c       |   34 +++---
>  6 files changed, 148 insertions(+), 290 deletions(-)
---end quoted text---

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: fix read repair on compressed extents
  2022-06-29  8:42 ` Christoph Hellwig
@ 2022-06-29 19:04   ` Boris Burkov
  2022-06-29 19:08     ` Christoph Hellwig
  0 siblings, 1 reply; 19+ messages in thread
From: Boris Burkov @ 2022-06-29 19:04 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Chris Mason, Josef Bacik, David Sterba, linux-btrfs

On Wed, Jun 29, 2022 at 10:42:01AM +0200, Christoph Hellwig wrote:
> Any chance to get a review on this one?

Hi, I'm taking a look, and wanted to confirm my test procedure for
examining your patches is correct.

I applied this series and:
btrfs: repair all known bad mirrors
on top of for-next

and I applied
btrfs read repair: more tests
to my xfstests

Under that setup, the new btrfs/270 fails on step 4 checking if
the repair worked (the output looks all random rather than aa's)

Am I missing something?

Thanks,
Boris

> 
> On Thu, Jun 23, 2022 at 07:53:34AM +0200, Christoph Hellwig wrote:
> > Hi all,
> > 
> > while looking into the repair code I found that read repair of compressed
> > extents is current fundamentally broken, in that repair tries to write
> > the uncompressed data into a corrupted extent during a repair.  This is
> > demonstrated by the "btrfs: test read repair on a corrupted compressed
> > extent" test submitted to xfstests.
> > 
> > This series fixes that, but is a bit invaside as it requires both
> > refactoring of the compression code and changes to the repair code to
> > not look up the logic address on every repair attempt.  On the plus
> > side it removes a whole lot of code.
> > 
> > It is based on the for-next branch plus my "btrfs: repair all known bad
> > mirrors" patch.
> > 
> > Diffstat:
> >  compression.c |  287 ++++++++++++++++------------------------------------------
> >  compression.h |   11 --
> >  ctree.h       |    4 
> >  extent_io.c   |   93 +++++++-----------
> >  extent_io.h   |    9 -
> >  inode.c       |   34 +++---
> >  6 files changed, 148 insertions(+), 290 deletions(-)
> ---end quoted text---

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: fix read repair on compressed extents
  2022-06-29 19:04   ` Boris Burkov
@ 2022-06-29 19:08     ` Christoph Hellwig
  2022-06-29 19:38       ` Boris Burkov
  0 siblings, 1 reply; 19+ messages in thread
From: Christoph Hellwig @ 2022-06-29 19:08 UTC (permalink / raw)
  To: Boris Burkov
  Cc: Christoph Hellwig, Chris Mason, Josef Bacik, David Sterba, linux-btrfs

On Wed, Jun 29, 2022 at 12:04:41PM -0700, Boris Burkov wrote:
> Under that setup, the new btrfs/270 fails on step 4 checking if
> the repair worked (the output looks all random rather than aa's)

I think that is the first, incorrect version that I posted that
documents the current behavior.  The correct test is in my first
reply to it.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: fix read repair on compressed extents
  2022-06-29 19:08     ` Christoph Hellwig
@ 2022-06-29 19:38       ` Boris Burkov
  0 siblings, 0 replies; 19+ messages in thread
From: Boris Burkov @ 2022-06-29 19:38 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Chris Mason, Josef Bacik, David Sterba, linux-btrfs

On Wed, Jun 29, 2022 at 09:08:38PM +0200, Christoph Hellwig wrote:
> On Wed, Jun 29, 2022 at 12:04:41PM -0700, Boris Burkov wrote:
> > Under that setup, the new btrfs/270 fails on step 4 checking if
> > the repair worked (the output looks all random rather than aa's)
> 
> I think that is the first, incorrect version that I posted that
> documents the current behavior.  The correct test is in my first
> reply to it.

Ah, I see, thanks. That second test passes for me, and fails on
for-next, for what it's worth.

Now I'll actually dig in to the patches/tests.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/4] btrfs: simplify the pending I/O counting in struct compressed_bio
  2022-06-23  5:53 ` [PATCH 1/4] btrfs: simplify the pending I/O counting in struct compressed_bio Christoph Hellwig
@ 2022-06-29 23:42   ` Boris Burkov
  2022-06-30  4:22     ` Christoph Hellwig
  0 siblings, 1 reply; 19+ messages in thread
From: Boris Burkov @ 2022-06-29 23:42 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Chris Mason, Josef Bacik, David Sterba, linux-btrfs

On Thu, Jun 23, 2022 at 07:53:35AM +0200, Christoph Hellwig wrote:
> Instead of counting the bytes just count the bios, with an extra
> reference held during submission.  This significantly simplifies the
> submission side error handling.

Interestingly, this more or less exactly un-does the patch:

btrfs: introduce compressed_bio::pending_sectors to trace compressed bio

which introduced the sector counting, asserting that counting bios was
awkward. FWIW, in my opinion, counting from 1 feels worth it to not have
to add up the size, and simplifying the error handling.

> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Boris Burkov <boris@bur.io>
> ---
>  fs/btrfs/compression.c | 126 ++++++++++-------------------------------
>  fs/btrfs/compression.h |   4 +-
>  2 files changed, 33 insertions(+), 97 deletions(-)
> 
> diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
> index 907fc8a4c092c..e756da640fd7b 100644
> --- a/fs/btrfs/compression.c
> +++ b/fs/btrfs/compression.c
> @@ -191,44 +191,6 @@ static int check_compressed_csum(struct btrfs_inode *inode, struct bio *bio,
>  	return 0;
>  }
>  
> -/*
> - * Reduce bio and io accounting for a compressed_bio with its corresponding bio.
> - *
> - * Return true if there is no pending bio nor io.
> - * Return false otherwise.
> - */
> -static bool dec_and_test_compressed_bio(struct compressed_bio *cb, struct bio *bio)
> -{
> -	struct btrfs_fs_info *fs_info = btrfs_sb(cb->inode->i_sb);
> -	unsigned int bi_size = 0;
> -	bool last_io = false;
> -	struct bio_vec *bvec;
> -	struct bvec_iter_all iter_all;
> -
> -	/*
> -	 * At endio time, bi_iter.bi_size doesn't represent the real bio size.
> -	 * Thus here we have to iterate through all segments to grab correct
> -	 * bio size.
> -	 */
> -	bio_for_each_segment_all(bvec, bio, iter_all)
> -		bi_size += bvec->bv_len;
> -
> -	if (bio->bi_status)
> -		cb->status = bio->bi_status;
> -
> -	ASSERT(bi_size && bi_size <= cb->compressed_len);
> -	last_io = refcount_sub_and_test(bi_size >> fs_info->sectorsize_bits,
> -					&cb->pending_sectors);
> -	/*
> -	 * Here we must wake up the possible error handler after all other
> -	 * operations on @cb finished, or we can race with
> -	 * finish_compressed_bio_*() which may free @cb.
> -	 */
> -	wake_up_var(cb);
> -
> -	return last_io;
> -}
> -
>  static void finish_compressed_bio_read(struct compressed_bio *cb)
>  {
>  	unsigned int index;
> @@ -288,7 +250,10 @@ static void end_compressed_bio_read(struct bio *bio)
>  	unsigned int mirror = btrfs_bio(bio)->mirror_num;
>  	int ret = 0;
>  
> -	if (!dec_and_test_compressed_bio(cb, bio))
> +	if (bio->bi_status)
> +		cb->status = bio->bi_status;
> +
> +	if (!refcount_dec_and_test(&cb->pending_ios))
>  		goto out;
>  
>  	/*
> @@ -417,7 +382,10 @@ static void end_compressed_bio_write(struct bio *bio)
>  {
>  	struct compressed_bio *cb = bio->bi_private;
>  
> -	if (dec_and_test_compressed_bio(cb, bio)) {
> +	if (bio->bi_status)
> +		cb->status = bio->bi_status;
> +
> +	if (refcount_dec_and_test(&cb->pending_ios)) {
>  		struct btrfs_fs_info *fs_info = btrfs_sb(cb->inode->i_sb);
>  
>  		btrfs_record_physical_zoned(cb->inode, cb->start, bio);
> @@ -476,7 +444,7 @@ static struct bio *alloc_compressed_bio(struct compressed_bio *cb, u64 disk_byte
>  		return ERR_PTR(ret);
>  	}
>  	*next_stripe_start = disk_bytenr + geom.len;
> -
> +	refcount_inc(&cb->pending_ios);
>  	return bio;
>  }
>  
> @@ -503,17 +471,17 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
>  	struct compressed_bio *cb;
>  	u64 cur_disk_bytenr = disk_start;
>  	u64 next_stripe_start;
> -	blk_status_t ret;
>  	int skip_sum = inode->flags & BTRFS_INODE_NODATASUM;
>  	const bool use_append = btrfs_use_zone_append(inode, disk_start);
>  	const unsigned int bio_op = use_append ? REQ_OP_ZONE_APPEND : REQ_OP_WRITE;
> +	blk_status_t ret = BLK_STS_OK;
>  
>  	ASSERT(IS_ALIGNED(start, fs_info->sectorsize) &&
>  	       IS_ALIGNED(len, fs_info->sectorsize));
>  	cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
>  	if (!cb)
>  		return BLK_STS_RESOURCE;
> -	refcount_set(&cb->pending_sectors, compressed_len >> fs_info->sectorsize_bits);
> +	refcount_set(&cb->pending_ios, 1);
>  	cb->status = BLK_STS_OK;
>  	cb->inode = &inode->vfs_inode;
>  	cb->start = start;
> @@ -543,8 +511,7 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
>  				&next_stripe_start);
>  			if (IS_ERR(bio)) {
>  				ret = errno_to_blk_status(PTR_ERR(bio));
> -				bio = NULL;
> -				goto finish_cb;
> +				break;
>  			}
>  			if (blkcg_css)
>  				bio->bi_opf |= REQ_CGROUP_PUNT;
> @@ -588,8 +555,11 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
>  		if (submit) {
>  			if (!skip_sum) {
>  				ret = btrfs_csum_one_bio(inode, bio, start, true);
> -				if (ret)
> -					goto finish_cb;
> +				if (ret) {
> +					bio->bi_status = ret;
> +					bio_endio(bio);
> +					break;
> +				}
>  			}
>  
>  			ASSERT(bio->bi_iter.bi_size);
> @@ -598,33 +568,12 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
>  		}
>  		cond_resched();
>  	}
> -	if (blkcg_css)
> -		kthread_associate_blkcg(NULL);
>  
> -	return 0;
> -
> -finish_cb:
>  	if (blkcg_css)
>  		kthread_associate_blkcg(NULL);
>  
> -	if (bio) {
> -		bio->bi_status = ret;
> -		bio_endio(bio);
> -	}
> -	/* Last byte of @cb is submitted, endio will free @cb */
> -	if (cur_disk_bytenr == disk_start + compressed_len)
> -		return ret;
> -
> -	wait_var_event(cb, refcount_read(&cb->pending_sectors) ==
> -			   (disk_start + compressed_len - cur_disk_bytenr) >>
> -			   fs_info->sectorsize_bits);
> -	/*
> -	 * Even with previous bio ended, we should still have io not yet
> -	 * submitted, thus need to finish manually.
> -	 */
> -	ASSERT(refcount_read(&cb->pending_sectors));
> -	/* Now we are the only one referring @cb, can finish it safely. */
> -	finish_compressed_bio_write(cb);
> +	if (refcount_dec_and_test(&cb->pending_ios))
> +		finish_compressed_bio_write(cb);
>  	return ret;
>  }
>  
> @@ -830,7 +779,7 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
>  		goto out;
>  	}
>  
> -	refcount_set(&cb->pending_sectors, compressed_len >> fs_info->sectorsize_bits);
> +	refcount_set(&cb->pending_ios, 1);
>  	cb->status = BLK_STS_OK;
>  	cb->inode = inode;
>  	cb->mirror_num = mirror_num;
> @@ -880,9 +829,9 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
>  					REQ_OP_READ, end_compressed_bio_read,
>  					&next_stripe_start);
>  			if (IS_ERR(comp_bio)) {
> -				ret = errno_to_blk_status(PTR_ERR(comp_bio));
> -				comp_bio = NULL;
> -				goto finish_cb;
> +				cb->status =
> +					errno_to_blk_status(PTR_ERR(comp_bio));
> +				break;
>  			}
>  		}
>  		/*
> @@ -921,8 +870,11 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
>  			unsigned int nr_sectors;
>  
>  			ret = btrfs_lookup_bio_sums(inode, comp_bio, sums);
> -			if (ret)
> -				goto finish_cb;
> +			if (ret) {
> +				comp_bio->bi_status = ret;
> +				bio_endio(comp_bio);
> +				break;
> +			}
>  
>  			nr_sectors = DIV_ROUND_UP(comp_bio->bi_iter.bi_size,
>  						  fs_info->sectorsize);
> @@ -933,6 +885,9 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
>  			comp_bio = NULL;
>  		}
>  	}
> +
> +	if (refcount_dec_and_test(&cb->pending_ios))
> +		finish_compressed_bio_read(cb);
>  	return;
>  
>  fail:
> @@ -950,25 +905,6 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
>  	bio->bi_status = ret;
>  	bio_endio(bio);
>  	return;
> -finish_cb:
> -	if (comp_bio) {
> -		comp_bio->bi_status = ret;
> -		bio_endio(comp_bio);
> -	}
> -	/* All bytes of @cb is submitted, endio will free @cb */
> -	if (cur_disk_byte == disk_bytenr + compressed_len)
> -		return;
> -
> -	wait_var_event(cb, refcount_read(&cb->pending_sectors) ==
> -			   (disk_bytenr + compressed_len - cur_disk_byte) >>
> -			   fs_info->sectorsize_bits);
> -	/*
> -	 * Even with previous bio ended, we should still have io not yet
> -	 * submitted, thus need to finish @cb manually.
> -	 */
> -	ASSERT(refcount_read(&cb->pending_sectors));
> -	/* Now we are the only one referring @cb, can finish it safely. */
> -	finish_compressed_bio_read(cb);
>  }
>  
>  /*
> diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
> index 5fca7603e928a..0e4cbf04fd866 100644
> --- a/fs/btrfs/compression.h
> +++ b/fs/btrfs/compression.h
> @@ -30,8 +30,8 @@ static_assert((BTRFS_MAX_COMPRESSED % PAGE_SIZE) == 0);
>  #define	BTRFS_ZLIB_DEFAULT_LEVEL		3
>  
>  struct compressed_bio {
> -	/* Number of sectors with unfinished IO (unsubmitted or unfinished) */
> -	refcount_t pending_sectors;
> +	/* Number of outstanding bios */
> +	refcount_t pending_ios;
>  
>  	/* Number of compressed pages in the array */
>  	unsigned int nr_pages;
> -- 
> 2.30.2
> 

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 2/4] btrfs: pass a btrfs_bio to btrfs_repair_one_sector
  2022-06-23  5:53 ` [PATCH 2/4] btrfs: pass a btrfs_bio to btrfs_repair_one_sector Christoph Hellwig
@ 2022-06-29 23:44   ` Boris Burkov
  2022-06-30  4:23     ` Christoph Hellwig
  0 siblings, 1 reply; 19+ messages in thread
From: Boris Burkov @ 2022-06-29 23:44 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Chris Mason, Josef Bacik, David Sterba, linux-btrfs

On Thu, Jun 23, 2022 at 07:53:36AM +0200, Christoph Hellwig wrote:
> Pass the btrfs_bio instead of the plain bio to btrfs_repair_one_sector,
> an remove the start and failed_mirror arguments in favor of deriving
> them from the btrfs_bio.  For this to work ensure that the file_offset
> field is also initialized for buffered I/O.
nit: the field in volumes.h has a comment "for direct I/O" which we
should get rid of now.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Boris Burkov <boris@bur.io>
> ---
>  fs/btrfs/extent_io.c | 47 ++++++++++++++++++++++++--------------------
>  fs/btrfs/extent_io.h |  8 ++++----
>  fs/btrfs/inode.c     |  5 ++---
>  3 files changed, 32 insertions(+), 28 deletions(-)
> 
> diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
> index 3778d58092dea..ec7bdb3fa0921 100644
> --- a/fs/btrfs/extent_io.c
> +++ b/fs/btrfs/extent_io.c
> @@ -182,6 +182,7 @@ static int add_extent_changeset(struct extent_state *state, u32 bits,
>  static void submit_one_bio(struct btrfs_bio_ctrl *bio_ctrl)
>  {
>  	struct bio *bio;
> +	struct bio_vec *bv;
>  	struct inode *inode;
>  	int mirror_num;
>  
> @@ -189,12 +190,15 @@ static void submit_one_bio(struct btrfs_bio_ctrl *bio_ctrl)
>  		return;
>  
>  	bio = bio_ctrl->bio;
> -	inode = bio_first_page_all(bio)->mapping->host;
> +	bv = bio_first_bvec_all(bio);
> +	inode = bv->bv_page->mapping->host;
>  	mirror_num = bio_ctrl->mirror_num;
>  
>  	/* Caller should ensure the bio has at least some range added */
>  	ASSERT(bio->bi_iter.bi_size);
>  
> +	btrfs_bio(bio)->file_offset = page_offset(bv->bv_page) + bv->bv_offset;
> +
>  	if (!is_data_inode(inode))
>  		btrfs_submit_metadata_bio(inode, bio, mirror_num);
>  	else if (btrfs_op(bio) == BTRFS_MAP_WRITE)
> @@ -2533,10 +2537,11 @@ void btrfs_free_io_failure_record(struct btrfs_inode *inode, u64 start, u64 end)
>  }
>  
>  static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode,
> -							     u64 start,
> -							     int failed_mirror)
> +							     struct btrfs_bio *bbio,
> +							     unsigned int bio_offset)
>  {
>  	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
> +	u64 start = bbio->file_offset + bio_offset;
>  	struct io_failure_record *failrec;
>  	struct extent_map *em;
>  	struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
> @@ -2556,7 +2561,7 @@ static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode
>  		 * (e.g. with a list for failed_mirror) to make
>  		 * clean_io_failure() clean all those errors at once.
>  		 */
> -		ASSERT(failrec->this_mirror == failed_mirror);
> +		ASSERT(failrec->this_mirror == bbio->mirror_num);
>  		ASSERT(failrec->len == fs_info->sectorsize);
>  		return failrec;
>  	}
> @@ -2567,7 +2572,7 @@ static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode
>  
>  	failrec->start = start;
>  	failrec->len = sectorsize;
> -	failrec->failed_mirror = failrec->this_mirror = failed_mirror;
> +	failrec->failed_mirror = failrec->this_mirror = bbio->mirror_num;
>  	failrec->compress_type = BTRFS_COMPRESS_NONE;
>  
>  	read_lock(&em_tree->lock);
> @@ -2632,17 +2637,17 @@ static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode
>  	return failrec;
>  }
>  
> -int btrfs_repair_one_sector(struct inode *inode,
> -			    struct bio *failed_bio, u32 bio_offset,
> -			    struct page *page, unsigned int pgoff,
> -			    u64 start, int failed_mirror,
> +int btrfs_repair_one_sector(struct inode *inode, struct btrfs_bio *failed_bbio,
> +			    u32 bio_offset, struct page *page,
> +			    unsigned int pgoff,
>  			    submit_bio_hook_t *submit_bio_hook)
>  {
> +	u64 start = failed_bbio->file_offset + bio_offset;
>  	struct io_failure_record *failrec;
>  	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
>  	struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
>  	struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
> -	struct btrfs_bio *failed_bbio = btrfs_bio(failed_bio);
> +	struct bio *failed_bio = &failed_bbio->bio;
>  	const int icsum = bio_offset >> fs_info->sectorsize_bits;
>  	struct bio *repair_bio;
>  	struct btrfs_bio *repair_bbio;
> @@ -2652,7 +2657,7 @@ int btrfs_repair_one_sector(struct inode *inode,
>  
>  	BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
>  
> -	failrec = btrfs_get_io_failure_record(inode, start, failed_mirror);
> +	failrec = btrfs_get_io_failure_record(inode, failed_bbio, bio_offset);
>  	if (IS_ERR(failrec))
>  		return PTR_ERR(failrec);
>  
> @@ -2750,9 +2755,10 @@ static void end_sector_io(struct page *page, u64 offset, bool uptodate)
>  				    offset + sectorsize - 1, &cached);
>  }
>  
> -static void submit_data_read_repair(struct inode *inode, struct bio *failed_bio,
> +static void submit_data_read_repair(struct inode *inode,
> +				    struct btrfs_bio *failed_bbio,
>  				    u32 bio_offset, const struct bio_vec *bvec,
> -				    int failed_mirror, unsigned int error_bitmap)
> +				    unsigned int error_bitmap)
>  {
>  	const unsigned int pgoff = bvec->bv_offset;
>  	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
> @@ -2763,7 +2769,7 @@ static void submit_data_read_repair(struct inode *inode, struct bio *failed_bio,
>  	const int nr_bits = (end + 1 - start) >> fs_info->sectorsize_bits;
>  	int i;
>  
> -	BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
> +	BUG_ON(bio_op(&failed_bbio->bio) == REQ_OP_WRITE);
>  
>  	/* This repair is only for data */
>  	ASSERT(is_data_inode(inode));
> @@ -2775,7 +2781,7 @@ static void submit_data_read_repair(struct inode *inode, struct bio *failed_bio,
>  	 * We only get called on buffered IO, thus page must be mapped and bio
>  	 * must not be cloned.
>  	 */
> -	ASSERT(page->mapping && !bio_flagged(failed_bio, BIO_CLONED));
> +	ASSERT(page->mapping && !bio_flagged(&failed_bbio->bio, BIO_CLONED));
>  
>  	/* Iterate through all the sectors in the range */
>  	for (i = 0; i < nr_bits; i++) {
> @@ -2792,10 +2798,9 @@ static void submit_data_read_repair(struct inode *inode, struct bio *failed_bio,
>  			goto next;
>  		}
>  
> -		ret = btrfs_repair_one_sector(inode, failed_bio,
> -				bio_offset + offset,
> -				page, pgoff + offset, start + offset,
> -				failed_mirror, btrfs_submit_data_read_bio);
> +		ret = btrfs_repair_one_sector(inode, failed_bbio,
> +				bio_offset + offset, page, pgoff + offset,
> +				btrfs_submit_data_read_bio);
>  		if (!ret) {
>  			/*
>  			 * We have submitted the read repair, the page release
> @@ -3127,8 +3132,8 @@ static void end_bio_extent_readpage(struct bio *bio)
>  			 * submit_data_read_repair() will handle all the good
>  			 * and bad sectors, we just continue to the next bvec.
>  			 */
> -			submit_data_read_repair(inode, bio, bio_offset, bvec,
> -						mirror, error_bitmap);
> +			submit_data_read_repair(inode, bbio, bio_offset, bvec,
> +						error_bitmap);
>  		} else {
>  			/* Update page status and unlock */
>  			end_page_read(page, uptodate, start, len);
> diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
> index 280af70c04953..a78051c7627c4 100644
> --- a/fs/btrfs/extent_io.h
> +++ b/fs/btrfs/extent_io.h
> @@ -57,6 +57,7 @@ enum {
>  #define BITMAP_LAST_BYTE_MASK(nbits) \
>  	(BYTE_MASK >> (-(nbits) & (BITS_PER_BYTE - 1)))
>  
> +struct btrfs_bio;
>  struct btrfs_root;
>  struct btrfs_inode;
>  struct btrfs_io_bio;
> @@ -266,10 +267,9 @@ struct io_failure_record {
>  	int num_copies;
>  };
>  
> -int btrfs_repair_one_sector(struct inode *inode,
> -			    struct bio *failed_bio, u32 bio_offset,
> -			    struct page *page, unsigned int pgoff,
> -			    u64 start, int failed_mirror,
> +int btrfs_repair_one_sector(struct inode *inode, struct btrfs_bio *failed_bbio,
> +			    u32 bio_offset, struct page *page,
> +			    unsigned int pgoff,
>  			    submit_bio_hook_t *submit_bio_hook);
>  
>  #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index 784c1ad4a9634..a627b2af9e243 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -7953,9 +7953,8 @@ static blk_status_t btrfs_check_read_dio_bio(struct btrfs_dio_private *dip,
>  		} else {
>  			int ret;
>  
> -			ret = btrfs_repair_one_sector(inode, &bbio->bio, offset,
> -					bv.bv_page, bv.bv_offset, start,
> -					bbio->mirror_num,
> +			ret = btrfs_repair_one_sector(inode, bbio, offset,
> +					bv.bv_page, bv.bv_offset,
>  					submit_dio_repair_bio);
>  			if (ret)
>  				err = errno_to_blk_status(ret);
> -- 
> 2.30.2
> 

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 3/4] btrfs: remove the start argument to check_data_csum
  2022-06-23  5:53 ` [PATCH 3/4] btrfs: remove the start argument to check_data_csum Christoph Hellwig
@ 2022-06-29 23:48   ` Boris Burkov
  0 siblings, 0 replies; 19+ messages in thread
From: Boris Burkov @ 2022-06-29 23:48 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Chris Mason, Josef Bacik, David Sterba, linux-btrfs

On Thu, Jun 23, 2022 at 07:53:37AM +0200, Christoph Hellwig wrote:
> Just derive it from the btrfs_bio now that ->file_offset is always valid.
> Also make the function available outside of inode.c as we'll need that
> soon.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Boris Burkov <boris@bur.io>
> ---
>  fs/btrfs/ctree.h |  2 ++
>  fs/btrfs/inode.c | 22 +++++++++-------------
>  2 files changed, 11 insertions(+), 13 deletions(-)
> 
> diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
> index 4e2569f84aabc..164f54e6aa447 100644
> --- a/fs/btrfs/ctree.h
> +++ b/fs/btrfs/ctree.h
> @@ -3293,6 +3293,8 @@ int btrfs_check_sector_csum(struct btrfs_fs_info *fs_info, struct page *page,
>  unsigned int btrfs_verify_data_csum(struct btrfs_bio *bbio,
>  				    u32 bio_offset, struct page *page,
>  				    u64 start, u64 end);
> +int check_data_csum(struct inode *inode, struct btrfs_bio *bbio, u32 bio_offset,
> +		    struct page *page, u32 pgoff);
>  struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode,
>  					   u64 start, u64 len);
>  noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index a627b2af9e243..429428fde4a88 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -3396,20 +3396,18 @@ int btrfs_check_sector_csum(struct btrfs_fs_info *fs_info, struct page *page,
>  /*
>   * check_data_csum - verify checksum of one sector of uncompressed data
>   * @inode:	inode
> - * @io_bio:	btrfs_io_bio which contains the csum
> + * @bbio:	btrfs_io_bio which contains the csum
>   * @bio_offset:	offset to the beginning of the bio (in bytes)
>   * @page:	page where is the data to be verified
>   * @pgoff:	offset inside the page
> - * @start:	logical offset in the file
>   *
>   * The length of such check is always one sector size.
>   *
>   * When csum mismatch is detected, we will also report the error and fill the
>   * corrupted range with zero. (Thus it needs the extra parameters)
>   */
> -static int check_data_csum(struct inode *inode, struct btrfs_bio *bbio,
> -			   u32 bio_offset, struct page *page, u32 pgoff,
> -			   u64 start)
> +int check_data_csum(struct inode *inode, struct btrfs_bio *bbio, u32 bio_offset,
> +		    struct page *page, u32 pgoff)
>  {
>  	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
>  	u32 len = fs_info->sectorsize;
> @@ -3425,8 +3423,9 @@ static int check_data_csum(struct inode *inode, struct btrfs_bio *bbio,
>  	return 0;
>  
>  zeroit:
> -	btrfs_print_data_csum_error(BTRFS_I(inode), start, csum, csum_expected,
> -				    bbio->mirror_num);
> +	btrfs_print_data_csum_error(BTRFS_I(inode),
> +				    bbio->file_offset + bio_offset,
> +				    csum, csum_expected, bbio->mirror_num);
>  	if (bbio->device)
>  		btrfs_dev_stat_inc_and_print(bbio->device,
>  					     BTRFS_DEV_STAT_CORRUPTION_ERRS);
> @@ -3495,8 +3494,7 @@ unsigned int btrfs_verify_data_csum(struct btrfs_bio *bbio,
>  					  EXTENT_NODATASUM);
>  			continue;
>  		}
> -		ret = check_data_csum(inode, bbio, bio_offset, page, pg_off,
> -				      page_offset(page) + pg_off);
> +		ret = check_data_csum(inode, bbio, bio_offset, page, pg_off);
>  		if (ret < 0) {
>  			const int nr_bit = (pg_off - offset_in_page(start)) >>
>  				     root->fs_info->sectorsize_bits;
> @@ -7946,7 +7944,7 @@ static blk_status_t btrfs_check_read_dio_bio(struct btrfs_dio_private *dip,
>  
>  		if (uptodate &&
>  		    (!csum || !check_data_csum(inode, bbio, offset, bv.bv_page,
> -					       bv.bv_offset, start))) {
> +					       bv.bv_offset))) {
>  			clean_io_failure(fs_info, failure_tree, io_tree, start,
>  					 bv.bv_page, btrfs_ino(BTRFS_I(inode)),
>  					 bv.bv_offset);
> @@ -10324,7 +10322,6 @@ static blk_status_t btrfs_encoded_read_verify_csum(struct btrfs_bio *bbio)
>  	u32 sectorsize = fs_info->sectorsize;
>  	struct bio_vec *bvec;
>  	struct bvec_iter_all iter_all;
> -	u64 start = priv->file_offset;
>  	u32 bio_offset = 0;
>  
>  	if (priv->skip_csum || !uptodate)
> @@ -10338,9 +10335,8 @@ static blk_status_t btrfs_encoded_read_verify_csum(struct btrfs_bio *bbio)
>  		for (i = 0; i < nr_sectors; i++) {
>  			ASSERT(pgoff < PAGE_SIZE);
>  			if (check_data_csum(&inode->vfs_inode, bbio, bio_offset,
> -					    bvec->bv_page, pgoff, start))
> +					    bvec->bv_page, pgoff))
>  				return BLK_STS_IOERR;
> -			start += sectorsize;
>  			bio_offset += sectorsize;
>  			pgoff += sectorsize;
>  		}
> -- 
> 2.30.2
> 

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 4/4] btrfs: fix repair of compressed extents
  2022-06-23  5:53 ` [PATCH 4/4] btrfs: fix repair of compressed extents Christoph Hellwig
@ 2022-06-30  0:18   ` Boris Burkov
  2022-06-30  4:24     ` Christoph Hellwig
  0 siblings, 1 reply; 19+ messages in thread
From: Boris Burkov @ 2022-06-30  0:18 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Chris Mason, Josef Bacik, David Sterba, linux-btrfs

On Thu, Jun 23, 2022 at 07:53:38AM +0200, Christoph Hellwig wrote:
> Currently the checksum of compressed extents is verified based on the
> compressed data and the lower btrfs_bio, but the actual repair process
> is driven by end_bio_extent_readpage on the upper btrfs_bio for the
> decompressed data.
> 
> This has a bunch of issues, including not being able to properly
> communicate the failed mirror up in case that the I/O submission got
> preempted, a general loss of if an error was an I/O error or a checksum
> verification failure, but most importantly that this design causes
> btrfs_clean_io_failure to eventually write back the uncompressed good
> data onto the disk sectors that are supposed to contain compressed data.
> 
> Fix this by moving the repair to the lower btrfs_bio.  To do so, a fair
> amount of code has to be reshuffled:
> 
>  a) the lower btrfs_bio now needs a valid csum pointer.  The easiest way
>     to archive that is to pass NULL btrfs_lookup_bio_sums and just use
>     the btrfs_bio management of csums.  For a compressed_bio that is
>     split into multiple btrfs_bios this mean additional memory
>     allocations, but the code becomes a lot more regular.
>  b) checksum verifiaction now runs diretly on the lower btrfs_bio instead
>     of the compressed_bio.  This actually nicely simplifies the end I/O
>     processing.
>  c) btrfs_repair_one_sector can't just look up the logical address for
>     the file offset any more, as there is no coresponding relative
>     offsets that apply to the file offset and the logic address for
>     compressed extents.  Instead require that the saved bvec_iter in the
>     btrfs_bio is filled out for all read bios and use that, which again
>     removes a fair amount of code.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/btrfs/compression.c | 171 ++++++++++++++---------------------------
>  fs/btrfs/compression.h |   7 --
>  fs/btrfs/ctree.h       |   2 +
>  fs/btrfs/extent_io.c   |  46 +++--------
>  fs/btrfs/extent_io.h   |   1 -
>  fs/btrfs/inode.c       |   7 ++
>  6 files changed, 77 insertions(+), 157 deletions(-)
> 
> diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
> index e756da640fd7b..c8b14a5bd89be 100644
> --- a/fs/btrfs/compression.c
> +++ b/fs/btrfs/compression.c
> @@ -136,66 +136,14 @@ static int compression_decompress(int type, struct list_head *ws,
>  
>  static int btrfs_decompress_bio(struct compressed_bio *cb);
>  
> -static inline int compressed_bio_size(struct btrfs_fs_info *fs_info,
> -				      unsigned long disk_size)
> -{
> -	return sizeof(struct compressed_bio) +
> -		(DIV_ROUND_UP(disk_size, fs_info->sectorsize)) * fs_info->csum_size;
> -}
> -
> -static int check_compressed_csum(struct btrfs_inode *inode, struct bio *bio,
> -				 u64 disk_start)
> -{
> -	struct btrfs_fs_info *fs_info = inode->root->fs_info;
> -	const u32 csum_size = fs_info->csum_size;
> -	const u32 sectorsize = fs_info->sectorsize;
> -	struct page *page;
> -	unsigned int i;
> -	u8 csum[BTRFS_CSUM_SIZE];
> -	struct compressed_bio *cb = bio->bi_private;
> -	u8 *cb_sum = cb->sums;
> -
> -	if ((inode->flags & BTRFS_INODE_NODATASUM) ||
> -	    test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state))
> -		return 0;
> -
> -	for (i = 0; i < cb->nr_pages; i++) {
> -		u32 pg_offset;
> -		u32 bytes_left = PAGE_SIZE;
> -		page = cb->compressed_pages[i];
> -
> -		/* Determine the remaining bytes inside the page first */
> -		if (i == cb->nr_pages - 1)
> -			bytes_left = cb->compressed_len - i * PAGE_SIZE;
> -
> -		/* Hash through the page sector by sector */
> -		for (pg_offset = 0; pg_offset < bytes_left;
> -		     pg_offset += sectorsize) {
> -			int ret;
> -
> -			ret = btrfs_check_sector_csum(fs_info, page, pg_offset,
> -						      csum, cb_sum);
> -			if (ret) {
> -				btrfs_print_data_csum_error(inode, disk_start,
> -						csum, cb_sum, cb->mirror_num);
> -				if (btrfs_bio(bio)->device)
> -					btrfs_dev_stat_inc_and_print(
> -						btrfs_bio(bio)->device,
> -						BTRFS_DEV_STAT_CORRUPTION_ERRS);
> -				return -EIO;
> -			}
> -			cb_sum += csum_size;
> -			disk_start += sectorsize;
> -		}
> -	}
> -	return 0;
> -}
> -
>  static void finish_compressed_bio_read(struct compressed_bio *cb)
>  {
>  	unsigned int index;
>  	struct page *page;
>  
> +	if (cb->status == BLK_STS_OK)
> +		cb->status = errno_to_blk_status(btrfs_decompress_bio(cb));
> +
>  	/* Release the compressed pages */
>  	for (index = 0; index < cb->nr_pages; index++) {
>  		page = cb->compressed_pages[index];
> @@ -233,59 +181,54 @@ static void finish_compressed_bio_read(struct compressed_bio *cb)
>  	kfree(cb);
>  }
>  
> -/* when we finish reading compressed pages from the disk, we
> - * decompress them and then run the bio end_io routines on the
> - * decompressed pages (in the inode address space).
> - *
> - * This allows the checksumming and other IO error handling routines
> - * to work normally
> - *
> - * The compressed pages are freed here, and it must be run
> - * in process context
> +/*
> + * Verify the checksums and kick off repair if needed on the uncompressed data
> + * before decompressing it into the original bio and freeing the uncompressed
> + * pages.
>   */
>  static void end_compressed_bio_read(struct bio *bio)
>  {
>  	struct compressed_bio *cb = bio->bi_private;
> -	struct inode *inode;
> -	unsigned int mirror = btrfs_bio(bio)->mirror_num;
> -	int ret = 0;
> -
> -	if (bio->bi_status)
> -		cb->status = bio->bi_status;
> -
> -	if (!refcount_dec_and_test(&cb->pending_ios))
> -		goto out;
> -
> -	/*
> -	 * Record the correct mirror_num in cb->orig_bio so that
> -	 * read-repair can work properly.
> -	 */
> -	btrfs_bio(cb->orig_bio)->mirror_num = mirror;
> -	cb->mirror_num = mirror;
> -
> -	/*
> -	 * Some IO in this cb have failed, just skip checksum as there
> -	 * is no way it could be correct.
> -	 */
> -	if (cb->status != BLK_STS_OK)
> -		goto csum_failed;
> +	struct inode *inode = cb->inode;
> +	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
> +	struct btrfs_inode *bi = BTRFS_I(inode);
> +	bool csum = !(bi->flags & BTRFS_INODE_NODATASUM) &&
> +		    !test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state);
> +	blk_status_t status = bio->bi_status;
> +	struct btrfs_bio *bbio = btrfs_bio(bio);
> +	struct bvec_iter iter;
> +	struct bio_vec bv;
> +	u32 offset;
> +
> +	btrfs_bio_for_each_sector(fs_info, bv, bbio, iter, offset) {
> +		u64 start = bbio->file_offset + offset;
> +
> +		if (!status &&
> +		    (!csum || !check_data_csum(inode, bbio, offset, bv.bv_page,
> +					       bv.bv_offset))) {
> +			clean_io_failure(fs_info, &bi->io_failure_tree,
> +					 &bi->io_tree, start, bv.bv_page,
> +					 btrfs_ino(bi), bv.bv_offset);
> +		} else {
> +			int ret;
>  
> -	inode = cb->inode;
> -	ret = check_compressed_csum(BTRFS_I(inode), bio,
> -				    bio->bi_iter.bi_sector << 9);
> -	if (ret)
> -		goto csum_failed;
> +			refcount_inc(&cb->pending_ios);
> +			ret = btrfs_repair_one_sector(inode, bbio, offset,
> +					bv.bv_page, bv.bv_offset,
> +					btrfs_submit_data_read_bio);
> +			if (ret) {
> +				refcount_dec(&cb->pending_ios);
> +				status = errno_to_blk_status(ret);
> +			}
> +		}
> +	}
>  
> -	/* ok, we're the last bio for this extent, lets start
> -	 * the decompression.
> -	 */
> -	ret = btrfs_decompress_bio(cb);
> +	if (status)
> +		cb->status = status;
>  
> -csum_failed:
> -	if (ret)
> -		cb->status = errno_to_blk_status(ret);
> -	finish_compressed_bio_read(cb);
> -out:
> +	if (refcount_dec_and_test(&cb->pending_ios))
> +		finish_compressed_bio_read(cb);
> +	btrfs_bio_free_csum(bbio);
>  	bio_put(bio);
>  }
>  
> @@ -478,7 +421,7 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
>  
>  	ASSERT(IS_ALIGNED(start, fs_info->sectorsize) &&
>  	       IS_ALIGNED(len, fs_info->sectorsize));
> -	cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
> +	cb = kmalloc(sizeof(struct compressed_bio), GFP_NOFS);
>  	if (!cb)
>  		return BLK_STS_RESOURCE;
>  	refcount_set(&cb->pending_ios, 1);
> @@ -486,7 +429,6 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
>  	cb->inode = &inode->vfs_inode;
>  	cb->start = start;
>  	cb->len = len;
> -	cb->mirror_num = 0;
>  	cb->compressed_pages = compressed_pages;
>  	cb->compressed_len = compressed_len;
>  	cb->writeback = writeback;
> @@ -755,7 +697,6 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
>  	blk_status_t ret;
>  	int ret2;
>  	int i;
> -	u8 *sums;
>  
>  	em_tree = &BTRFS_I(inode)->extent_tree;
>  
> @@ -773,7 +714,7 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
>  
>  	ASSERT(em->compress_type != BTRFS_COMPRESS_NONE);
>  	compressed_len = em->block_len;
> -	cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
> +	cb = kmalloc(sizeof(struct compressed_bio), GFP_NOFS);
>  	if (!cb) {
>  		ret = BLK_STS_RESOURCE;
>  		goto out;
> @@ -782,8 +723,6 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
>  	refcount_set(&cb->pending_ios, 1);
>  	cb->status = BLK_STS_OK;
>  	cb->inode = inode;
> -	cb->mirror_num = mirror_num;
> -	sums = cb->sums;
>  
>  	cb->start = em->orig_start;
>  	em_len = em->len;
> @@ -867,19 +806,25 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
>  			submit = true;
>  
>  		if (submit) {
> -			unsigned int nr_sectors;
> +			/* Save the original iter for read repair */
> +			if (bio_op(comp_bio) == REQ_OP_READ)
> +				btrfs_bio(comp_bio)->iter = comp_bio->bi_iter;
> +
> +			/*
> +			 * Just stash the initial offset of this chunk, as there
> +			 * is no direct correlation between compressed pages and
> +			 * the original file offset.  The field is only used for
> +			 * priting error messages anyway.
> +			 */
> +			btrfs_bio(comp_bio)->file_offset = file_offset;
>  
> -			ret = btrfs_lookup_bio_sums(inode, comp_bio, sums);
> +			ret = btrfs_lookup_bio_sums(inode, comp_bio, NULL);
>  			if (ret) {
>  				comp_bio->bi_status = ret;
>  				bio_endio(comp_bio);
>  				break;
>  			}
>  
> -			nr_sectors = DIV_ROUND_UP(comp_bio->bi_iter.bi_size,
> -						  fs_info->sectorsize);
> -			sums += fs_info->csum_size * nr_sectors;
> -
>  			ASSERT(comp_bio->bi_iter.bi_size);
>  			btrfs_submit_bio(fs_info, comp_bio, mirror_num);
>  			comp_bio = NULL;
> diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
> index 0e4cbf04fd866..e9ef24034cad0 100644
> --- a/fs/btrfs/compression.h
> +++ b/fs/btrfs/compression.h
> @@ -59,19 +59,12 @@ struct compressed_bio {
>  
>  	/* IO errors */
>  	blk_status_t status;
> -	int mirror_num;
>  
>  	union {
>  		/* For reads, this is the bio we are copying the data into */
>  		struct bio *orig_bio;
>  		struct work_struct write_end_work;
>  	};
> -
> -	/*
> -	 * the start of a variable length array of checksums only
> -	 * used by reads
> -	 */
> -	u8 sums[];
>  };
>  
>  static inline unsigned int btrfs_compress_type(unsigned int type_level)
> diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
> index 164f54e6aa447..12f59e35755fa 100644
> --- a/fs/btrfs/ctree.h
> +++ b/fs/btrfs/ctree.h
> @@ -3290,6 +3290,8 @@ void btrfs_submit_data_read_bio(struct inode *inode, struct bio *bio,
>  			int mirror_num, enum btrfs_compression_type compress_type);
>  int btrfs_check_sector_csum(struct btrfs_fs_info *fs_info, struct page *page,
>  			    u32 pgoff, u8 *csum, const u8 * const csum_expected);

As far as I can tell, this is redundant with the last patch.

> +int check_data_csum(struct inode *inode, struct btrfs_bio *bbio, u32 bio_offset,
> +		    struct page *page, u32 pgoff);
>  unsigned int btrfs_verify_data_csum(struct btrfs_bio *bbio,
>  				    u32 bio_offset, struct page *page,
>  				    u64 start, u64 end);
> diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
> index ec7bdb3fa0921..587d2ba20b53b 100644
> --- a/fs/btrfs/extent_io.c
> +++ b/fs/btrfs/extent_io.c
> @@ -2543,13 +2543,10 @@ static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode
>  	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
>  	u64 start = bbio->file_offset + bio_offset;
>  	struct io_failure_record *failrec;
> -	struct extent_map *em;
>  	struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
>  	struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
> -	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
>  	const u32 sectorsize = fs_info->sectorsize;
>  	int ret;
> -	u64 logical;
>  
>  	failrec = get_state_failrec(failure_tree, start);
>  	if (!IS_ERR(failrec)) {
> @@ -2573,41 +2570,14 @@ static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode
>  	failrec->start = start;
>  	failrec->len = sectorsize;
>  	failrec->failed_mirror = failrec->this_mirror = bbio->mirror_num;
> -	failrec->compress_type = BTRFS_COMPRESS_NONE;
> -
> -	read_lock(&em_tree->lock);
> -	em = lookup_extent_mapping(em_tree, start, failrec->len);
> -	if (!em) {
> -		read_unlock(&em_tree->lock);
> -		kfree(failrec);
> -		return ERR_PTR(-EIO);
> -	}
> -
> -	if (em->start > start || em->start + em->len <= start) {
> -		free_extent_map(em);
> -		em = NULL;
> -	}
> -	read_unlock(&em_tree->lock);
> -	if (!em) {
> -		kfree(failrec);
> -		return ERR_PTR(-EIO);
> -	}
> -
> -	logical = start - em->start;
> -	logical = em->block_start + logical;
> -	if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
> -		logical = em->block_start;
> -		failrec->compress_type = em->compress_type;
> -	}
> +	failrec->logical = (bbio->iter.bi_sector << SECTOR_SHIFT) + bio_offset;
>  
>  	btrfs_debug(fs_info,
> -		    "Get IO Failure Record: (new) logical=%llu, start=%llu, len=%llu",
> -		    logical, start, failrec->len);
> -
> -	failrec->logical = logical;
> -	free_extent_map(em);
> +		    "Get IO Failure Record: (new) logical=%llu, start=%llu",
> +		    failrec->logical, start);
>  
> -	failrec->num_copies = btrfs_num_copies(fs_info, logical, sectorsize);
> +	failrec->num_copies = btrfs_num_copies(fs_info, failrec->logical,
> +					       sectorsize);
>  	if (failrec->num_copies == 1) {
>  		/*
>  		 * we only have a single copy of the data, so don't bother with
> @@ -2709,7 +2679,7 @@ int btrfs_repair_one_sector(struct inode *inode, struct btrfs_bio *failed_bbio,
>  	 * will be handled by the endio on the repair_bio, so we can't return an
>  	 * error here.
>  	 */
> -	submit_bio_hook(inode, repair_bio, failrec->this_mirror, failrec->compress_type);
> +	submit_bio_hook(inode, repair_bio, failrec->this_mirror, 0);
>  	return BLK_STS_OK;
>  }
>  
> @@ -3115,6 +3085,10 @@ static void end_bio_extent_readpage(struct bio *bio)
>  			 * Only try to repair bios that actually made it to a
>  			 * device.  If the bio failed to be submitted mirror
>  			 * is 0 and we need to fail it without retrying.
> +			 *
> +			 * This also includes the high level bios for compressed
> +			 * extents - these never make it to a device and repair
> +			 * is already handled on the lower compressed bio.
>  			 */
>  			if (mirror > 0)
>  				repair = true;
> diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
> index a78051c7627c4..9dec34c009e91 100644
> --- a/fs/btrfs/extent_io.h
> +++ b/fs/btrfs/extent_io.h
> @@ -261,7 +261,6 @@ struct io_failure_record {
>  	u64 start;
>  	u64 len;
>  	u64 logical;
> -	enum btrfs_compression_type compress_type;
>  	int this_mirror;
>  	int failed_mirror;
>  	int num_copies;
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index 429428fde4a88..eea351216db33 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -2707,6 +2707,9 @@ void btrfs_submit_data_read_bio(struct inode *inode, struct bio *bio,
>  		return;
>  	}
>  
> +	/* Save the original iter for read repair */
> +	btrfs_bio(bio)->iter = bio->bi_iter;
> +
>  	/*
>  	 * Lookup bio sums does extra checks around whether we need to csum or
>  	 * not, which is why we ignore skip_sum here.
> @@ -8000,6 +8003,10 @@ static void btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
>  	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
>  	struct btrfs_dio_private *dip = bio->bi_private;
>  	blk_status_t ret;
> +		
> +	/* Save the original iter for read repair */
> +	if (btrfs_op(bio) == BTRFS_MAP_READ)
> +		btrfs_bio(bio)->iter = bio->bi_iter;
>  
>  	if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
>  		goto map;
> -- 
> 2.30.2
> 

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/4] btrfs: simplify the pending I/O counting in struct compressed_bio
  2022-06-29 23:42   ` Boris Burkov
@ 2022-06-30  4:22     ` Christoph Hellwig
  0 siblings, 0 replies; 19+ messages in thread
From: Christoph Hellwig @ 2022-06-30  4:22 UTC (permalink / raw)
  To: Boris Burkov
  Cc: Christoph Hellwig, Chris Mason, Josef Bacik, David Sterba, linux-btrfs

On Wed, Jun 29, 2022 at 04:42:14PM -0700, Boris Burkov wrote:
> On Thu, Jun 23, 2022 at 07:53:35AM +0200, Christoph Hellwig wrote:
> > Instead of counting the bytes just count the bios, with an extra
> > reference held during submission.  This significantly simplifies the
> > submission side error handling.
> 
> Interestingly, this more or less exactly un-does the patch:
> 
> btrfs: introduce compressed_bio::pending_sectors to trace compressed bio
> 
> which introduced the sector counting, asserting that counting bios was
> awkward. FWIW, in my opinion, counting from 1 feels worth it to not have
> to add up the size, and simplifying the error handling.

Looking at the commit history: yes, it kind of does, but this new
version actually has several advantages over the version before that
commit as well, one being the extra bias on the refcount, and other
things are APIs fixed in the meantime like actually propagating the
error code.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 2/4] btrfs: pass a btrfs_bio to btrfs_repair_one_sector
  2022-06-29 23:44   ` Boris Burkov
@ 2022-06-30  4:23     ` Christoph Hellwig
  0 siblings, 0 replies; 19+ messages in thread
From: Christoph Hellwig @ 2022-06-30  4:23 UTC (permalink / raw)
  To: Boris Burkov
  Cc: Christoph Hellwig, Chris Mason, Josef Bacik, David Sterba, linux-btrfs

On Wed, Jun 29, 2022 at 04:44:59PM -0700, Boris Burkov wrote:
> On Thu, Jun 23, 2022 at 07:53:36AM +0200, Christoph Hellwig wrote:
> > Pass the btrfs_bio instead of the plain bio to btrfs_repair_one_sector,
> > an remove the start and failed_mirror arguments in favor of deriving
> > them from the btrfs_bio.  For this to work ensure that the file_offset
> > field is also initialized for buffered I/O.
> nit: the field in volumes.h has a comment "for direct I/O" which we
> should get rid of now.

Indeed.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 4/4] btrfs: fix repair of compressed extents
  2022-06-30  0:18   ` Boris Burkov
@ 2022-06-30  4:24     ` Christoph Hellwig
  0 siblings, 0 replies; 19+ messages in thread
From: Christoph Hellwig @ 2022-06-30  4:24 UTC (permalink / raw)
  To: Boris Burkov
  Cc: Christoph Hellwig, Chris Mason, Josef Bacik, David Sterba, linux-btrfs

On Wed, Jun 29, 2022 at 05:18:07PM -0700, Boris Burkov wrote:
> > @@ -3290,6 +3290,8 @@ void btrfs_submit_data_read_bio(struct inode *inode, struct bio *bio,
> >  			int mirror_num, enum btrfs_compression_type compress_type);
> >  int btrfs_check_sector_csum(struct btrfs_fs_info *fs_info, struct page *page,
> >  			    u32 pgoff, u8 *csum, const u8 * const csum_expected);
> 
> As far as I can tell, this is redundant with the last patch.

It is, this must have slipped in in the rebase.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH 2/4] btrfs: pass a btrfs_bio to btrfs_repair_one_sector
  2022-06-30 16:01 fix read repair on compressed extents v2 Christoph Hellwig
@ 2022-06-30 16:01 ` Christoph Hellwig
  0 siblings, 0 replies; 19+ messages in thread
From: Christoph Hellwig @ 2022-06-30 16:01 UTC (permalink / raw)
  To: Chris Mason, Josef Bacik, David Sterba; +Cc: linux-btrfs, Boris Burkov

Pass the btrfs_bio instead of the plain bio to btrfs_repair_one_sector,
an remove the start and failed_mirror arguments in favor of deriving
them from the btrfs_bio.  For this to work ensure that the file_offset
field is also initialized for buffered I/O.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Boris Burkov <boris@bur.io>
---
 fs/btrfs/extent_io.c | 47 ++++++++++++++++++++++++--------------------
 fs/btrfs/extent_io.h |  8 ++++----
 fs/btrfs/inode.c     |  5 ++---
 fs/btrfs/volumes.h   |  2 --
 4 files changed, 32 insertions(+), 30 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 3778d58092dea..ec7bdb3fa0921 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -182,6 +182,7 @@ static int add_extent_changeset(struct extent_state *state, u32 bits,
 static void submit_one_bio(struct btrfs_bio_ctrl *bio_ctrl)
 {
 	struct bio *bio;
+	struct bio_vec *bv;
 	struct inode *inode;
 	int mirror_num;
 
@@ -189,12 +190,15 @@ static void submit_one_bio(struct btrfs_bio_ctrl *bio_ctrl)
 		return;
 
 	bio = bio_ctrl->bio;
-	inode = bio_first_page_all(bio)->mapping->host;
+	bv = bio_first_bvec_all(bio);
+	inode = bv->bv_page->mapping->host;
 	mirror_num = bio_ctrl->mirror_num;
 
 	/* Caller should ensure the bio has at least some range added */
 	ASSERT(bio->bi_iter.bi_size);
 
+	btrfs_bio(bio)->file_offset = page_offset(bv->bv_page) + bv->bv_offset;
+
 	if (!is_data_inode(inode))
 		btrfs_submit_metadata_bio(inode, bio, mirror_num);
 	else if (btrfs_op(bio) == BTRFS_MAP_WRITE)
@@ -2533,10 +2537,11 @@ void btrfs_free_io_failure_record(struct btrfs_inode *inode, u64 start, u64 end)
 }
 
 static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode,
-							     u64 start,
-							     int failed_mirror)
+							     struct btrfs_bio *bbio,
+							     unsigned int bio_offset)
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+	u64 start = bbio->file_offset + bio_offset;
 	struct io_failure_record *failrec;
 	struct extent_map *em;
 	struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
@@ -2556,7 +2561,7 @@ static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode
 		 * (e.g. with a list for failed_mirror) to make
 		 * clean_io_failure() clean all those errors at once.
 		 */
-		ASSERT(failrec->this_mirror == failed_mirror);
+		ASSERT(failrec->this_mirror == bbio->mirror_num);
 		ASSERT(failrec->len == fs_info->sectorsize);
 		return failrec;
 	}
@@ -2567,7 +2572,7 @@ static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode
 
 	failrec->start = start;
 	failrec->len = sectorsize;
-	failrec->failed_mirror = failrec->this_mirror = failed_mirror;
+	failrec->failed_mirror = failrec->this_mirror = bbio->mirror_num;
 	failrec->compress_type = BTRFS_COMPRESS_NONE;
 
 	read_lock(&em_tree->lock);
@@ -2632,17 +2637,17 @@ static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode
 	return failrec;
 }
 
-int btrfs_repair_one_sector(struct inode *inode,
-			    struct bio *failed_bio, u32 bio_offset,
-			    struct page *page, unsigned int pgoff,
-			    u64 start, int failed_mirror,
+int btrfs_repair_one_sector(struct inode *inode, struct btrfs_bio *failed_bbio,
+			    u32 bio_offset, struct page *page,
+			    unsigned int pgoff,
 			    submit_bio_hook_t *submit_bio_hook)
 {
+	u64 start = failed_bbio->file_offset + bio_offset;
 	struct io_failure_record *failrec;
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
 	struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
-	struct btrfs_bio *failed_bbio = btrfs_bio(failed_bio);
+	struct bio *failed_bio = &failed_bbio->bio;
 	const int icsum = bio_offset >> fs_info->sectorsize_bits;
 	struct bio *repair_bio;
 	struct btrfs_bio *repair_bbio;
@@ -2652,7 +2657,7 @@ int btrfs_repair_one_sector(struct inode *inode,
 
 	BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
 
-	failrec = btrfs_get_io_failure_record(inode, start, failed_mirror);
+	failrec = btrfs_get_io_failure_record(inode, failed_bbio, bio_offset);
 	if (IS_ERR(failrec))
 		return PTR_ERR(failrec);
 
@@ -2750,9 +2755,10 @@ static void end_sector_io(struct page *page, u64 offset, bool uptodate)
 				    offset + sectorsize - 1, &cached);
 }
 
-static void submit_data_read_repair(struct inode *inode, struct bio *failed_bio,
+static void submit_data_read_repair(struct inode *inode,
+				    struct btrfs_bio *failed_bbio,
 				    u32 bio_offset, const struct bio_vec *bvec,
-				    int failed_mirror, unsigned int error_bitmap)
+				    unsigned int error_bitmap)
 {
 	const unsigned int pgoff = bvec->bv_offset;
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
@@ -2763,7 +2769,7 @@ static void submit_data_read_repair(struct inode *inode, struct bio *failed_bio,
 	const int nr_bits = (end + 1 - start) >> fs_info->sectorsize_bits;
 	int i;
 
-	BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
+	BUG_ON(bio_op(&failed_bbio->bio) == REQ_OP_WRITE);
 
 	/* This repair is only for data */
 	ASSERT(is_data_inode(inode));
@@ -2775,7 +2781,7 @@ static void submit_data_read_repair(struct inode *inode, struct bio *failed_bio,
 	 * We only get called on buffered IO, thus page must be mapped and bio
 	 * must not be cloned.
 	 */
-	ASSERT(page->mapping && !bio_flagged(failed_bio, BIO_CLONED));
+	ASSERT(page->mapping && !bio_flagged(&failed_bbio->bio, BIO_CLONED));
 
 	/* Iterate through all the sectors in the range */
 	for (i = 0; i < nr_bits; i++) {
@@ -2792,10 +2798,9 @@ static void submit_data_read_repair(struct inode *inode, struct bio *failed_bio,
 			goto next;
 		}
 
-		ret = btrfs_repair_one_sector(inode, failed_bio,
-				bio_offset + offset,
-				page, pgoff + offset, start + offset,
-				failed_mirror, btrfs_submit_data_read_bio);
+		ret = btrfs_repair_one_sector(inode, failed_bbio,
+				bio_offset + offset, page, pgoff + offset,
+				btrfs_submit_data_read_bio);
 		if (!ret) {
 			/*
 			 * We have submitted the read repair, the page release
@@ -3127,8 +3132,8 @@ static void end_bio_extent_readpage(struct bio *bio)
 			 * submit_data_read_repair() will handle all the good
 			 * and bad sectors, we just continue to the next bvec.
 			 */
-			submit_data_read_repair(inode, bio, bio_offset, bvec,
-						mirror, error_bitmap);
+			submit_data_read_repair(inode, bbio, bio_offset, bvec,
+						error_bitmap);
 		} else {
 			/* Update page status and unlock */
 			end_page_read(page, uptodate, start, len);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 280af70c04953..a78051c7627c4 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -57,6 +57,7 @@ enum {
 #define BITMAP_LAST_BYTE_MASK(nbits) \
 	(BYTE_MASK >> (-(nbits) & (BITS_PER_BYTE - 1)))
 
+struct btrfs_bio;
 struct btrfs_root;
 struct btrfs_inode;
 struct btrfs_io_bio;
@@ -266,10 +267,9 @@ struct io_failure_record {
 	int num_copies;
 };
 
-int btrfs_repair_one_sector(struct inode *inode,
-			    struct bio *failed_bio, u32 bio_offset,
-			    struct page *page, unsigned int pgoff,
-			    u64 start, int failed_mirror,
+int btrfs_repair_one_sector(struct inode *inode, struct btrfs_bio *failed_bbio,
+			    u32 bio_offset, struct page *page,
+			    unsigned int pgoff,
 			    submit_bio_hook_t *submit_bio_hook);
 
 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 784c1ad4a9634..a627b2af9e243 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7953,9 +7953,8 @@ static blk_status_t btrfs_check_read_dio_bio(struct btrfs_dio_private *dip,
 		} else {
 			int ret;
 
-			ret = btrfs_repair_one_sector(inode, &bbio->bio, offset,
-					bv.bv_page, bv.bv_offset, start,
-					bbio->mirror_num,
+			ret = btrfs_repair_one_sector(inode, bbio, offset,
+					bv.bv_page, bv.bv_offset,
 					submit_dio_repair_bio);
 			if (ret)
 				err = errno_to_blk_status(ret);
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 4324c4d409096..9cce711cc938c 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -368,8 +368,6 @@ struct btrfs_fs_devices {
  */
 struct btrfs_bio {
 	unsigned int mirror_num;
-
-	/* for direct I/O */
 	u64 file_offset;
 
 	/* @device is for stripe IO submission. */
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2022-06-30 16:01 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-06-23  5:53 fix read repair on compressed extents Christoph Hellwig
2022-06-23  5:53 ` [PATCH 1/4] btrfs: simplify the pending I/O counting in struct compressed_bio Christoph Hellwig
2022-06-29 23:42   ` Boris Burkov
2022-06-30  4:22     ` Christoph Hellwig
2022-06-23  5:53 ` [PATCH 2/4] btrfs: pass a btrfs_bio to btrfs_repair_one_sector Christoph Hellwig
2022-06-29 23:44   ` Boris Burkov
2022-06-30  4:23     ` Christoph Hellwig
2022-06-23  5:53 ` [PATCH 3/4] btrfs: remove the start argument to check_data_csum Christoph Hellwig
2022-06-29 23:48   ` Boris Burkov
2022-06-23  5:53 ` [PATCH 4/4] btrfs: fix repair of compressed extents Christoph Hellwig
2022-06-30  0:18   ` Boris Burkov
2022-06-30  4:24     ` Christoph Hellwig
2022-06-23  8:14 ` fix read repair on " Qu Wenruo
2022-06-23 12:58   ` Christoph Hellwig
2022-06-29  8:42 ` Christoph Hellwig
2022-06-29 19:04   ` Boris Burkov
2022-06-29 19:08     ` Christoph Hellwig
2022-06-29 19:38       ` Boris Burkov
2022-06-30 16:01 fix read repair on compressed extents v2 Christoph Hellwig
2022-06-30 16:01 ` [PATCH 2/4] btrfs: pass a btrfs_bio to btrfs_repair_one_sector Christoph Hellwig

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.