From: Christoph Hellwig <hch@lst.de>
To: Chris Mason <clm@fb.com>, Josef Bacik <josef@toxicpanda.com>,
David Sterba <dsterba@suse.com>
Cc: linux-btrfs@vger.kernel.org, Nikolay Borisov <nborisov@suse.com>
Subject: [PATCH 1/6] btrfs: repair all known bad mirrors
Date: Thu, 7 Jul 2022 07:33:26 +0200 [thread overview]
Message-ID: <20220707053331.211259-2-hch@lst.de> (raw)
In-Reply-To: <20220707053331.211259-1-hch@lst.de>
When there is more than a single level of redundancy there can also be
mutiple bad mirrors, and the current read repair code only repairs the
last bad one.
Restructure btrfs_repair_one_sector so that it records the originally
failed mirror and the number of copies, and then repair all known bad
copies until we reach the originally failed copy in clean_io_failure.
Note that this also means the read repair reads will always start from
the next bad mirror and not mirror 0.
This fixes btrfs/265 in xfstests.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
---
fs/btrfs/extent_io.c | 127 +++++++++++++++++++++----------------------
fs/btrfs/extent_io.h | 1 +
2 files changed, 62 insertions(+), 66 deletions(-)
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 0681c0da7926d..3778d58092dea 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2432,6 +2432,20 @@ int btrfs_repair_eb_io_failure(const struct extent_buffer *eb, int mirror_num)
return ret;
}
+static int next_mirror(struct io_failure_record *failrec, int cur_mirror)
+{
+ if (cur_mirror == failrec->num_copies)
+ return cur_mirror + 1 - failrec->num_copies;
+ return cur_mirror + 1;
+}
+
+static int prev_mirror(struct io_failure_record *failrec, int cur_mirror)
+{
+ if (cur_mirror == 1)
+ return failrec->num_copies;
+ return cur_mirror - 1;
+}
+
/*
* each time an IO finishes, we do a fast check in the IO failure tree
* to see if we need to process or clean up an io_failure_record
@@ -2444,7 +2458,7 @@ int clean_io_failure(struct btrfs_fs_info *fs_info,
u64 private;
struct io_failure_record *failrec;
struct extent_state *state;
- int num_copies;
+ int mirror;
int ret;
private = 0;
@@ -2468,20 +2482,19 @@ int clean_io_failure(struct btrfs_fs_info *fs_info,
EXTENT_LOCKED);
spin_unlock(&io_tree->lock);
- if (state && state->start <= failrec->start &&
- state->end >= failrec->start + failrec->len - 1) {
- num_copies = btrfs_num_copies(fs_info, failrec->logical,
- failrec->len);
- if (num_copies > 1) {
- repair_io_failure(fs_info, ino, start, failrec->len,
- failrec->logical, page, pg_offset,
- failrec->failed_mirror);
- }
- }
+ if (!state || state->start > failrec->start ||
+ state->end < failrec->start + failrec->len - 1)
+ goto out;
+
+ mirror = failrec->this_mirror;
+ do {
+ mirror = prev_mirror(failrec, mirror);
+ repair_io_failure(fs_info, ino, start, failrec->len,
+ failrec->logical, page, pg_offset, mirror);
+ } while (mirror != failrec->failed_mirror);
out:
free_io_failure(failure_tree, io_tree, failrec);
-
return 0;
}
@@ -2520,7 +2533,8 @@ void btrfs_free_io_failure_record(struct btrfs_inode *inode, u64 start, u64 end)
}
static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode,
- u64 start)
+ u64 start,
+ int failed_mirror)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct io_failure_record *failrec;
@@ -2542,7 +2556,8 @@ static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode
* (e.g. with a list for failed_mirror) to make
* clean_io_failure() clean all those errors at once.
*/
-
+ ASSERT(failrec->this_mirror == failed_mirror);
+ ASSERT(failrec->len == fs_info->sectorsize);
return failrec;
}
@@ -2552,7 +2567,7 @@ static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode
failrec->start = start;
failrec->len = sectorsize;
- failrec->this_mirror = 0;
+ failrec->failed_mirror = failrec->this_mirror = failed_mirror;
failrec->compress_type = BTRFS_COMPRESS_NONE;
read_lock(&em_tree->lock);
@@ -2587,6 +2602,20 @@ static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode
failrec->logical = logical;
free_extent_map(em);
+ failrec->num_copies = btrfs_num_copies(fs_info, logical, sectorsize);
+ if (failrec->num_copies == 1) {
+ /*
+ * we only have a single copy of the data, so don't bother with
+ * all the retry and error correction code that follows. no
+ * matter what the error is, it is very likely to persist.
+ */
+ btrfs_debug(fs_info,
+ "Check Repairable: cannot repair, num_copies=%d",
+ failrec->num_copies);
+ kfree(failrec);
+ return ERR_PTR(-EIO);
+ }
+
/* Set the bits in the private failure tree */
ret = set_extent_bits(failure_tree, start, start + sectorsize - 1,
EXTENT_LOCKED | EXTENT_DIRTY);
@@ -2603,54 +2632,6 @@ static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode
return failrec;
}
-static bool btrfs_check_repairable(struct inode *inode,
- struct io_failure_record *failrec,
- int failed_mirror)
-{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- int num_copies;
-
- num_copies = btrfs_num_copies(fs_info, failrec->logical, failrec->len);
- if (num_copies == 1) {
- /*
- * we only have a single copy of the data, so don't bother with
- * all the retry and error correction code that follows. no
- * matter what the error is, it is very likely to persist.
- */
- btrfs_debug(fs_info,
- "Check Repairable: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d",
- num_copies, failrec->this_mirror, failed_mirror);
- return false;
- }
-
- /* The failure record should only contain one sector */
- ASSERT(failrec->len == fs_info->sectorsize);
-
- /*
- * There are two premises:
- * a) deliver good data to the caller
- * b) correct the bad sectors on disk
- *
- * Since we're only doing repair for one sector, we only need to get
- * a good copy of the failed sector and if we succeed, we have setup
- * everything for repair_io_failure to do the rest for us.
- */
- ASSERT(failed_mirror);
- failrec->failed_mirror = failed_mirror;
- failrec->this_mirror++;
- if (failrec->this_mirror == failed_mirror)
- failrec->this_mirror++;
-
- if (failrec->this_mirror > num_copies) {
- btrfs_debug(fs_info,
- "Check Repairable: (fail) num_copies=%d, next_mirror %d, failed_mirror %d",
- num_copies, failrec->this_mirror, failed_mirror);
- return false;
- }
-
- return true;
-}
-
int btrfs_repair_one_sector(struct inode *inode,
struct bio *failed_bio, u32 bio_offset,
struct page *page, unsigned int pgoff,
@@ -2671,12 +2652,26 @@ int btrfs_repair_one_sector(struct inode *inode,
BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
- failrec = btrfs_get_io_failure_record(inode, start);
+ failrec = btrfs_get_io_failure_record(inode, start, failed_mirror);
if (IS_ERR(failrec))
return PTR_ERR(failrec);
-
- if (!btrfs_check_repairable(inode, failrec, failed_mirror)) {
+ /*
+ * There are two premises:
+ * a) deliver good data to the caller
+ * b) correct the bad sectors on disk
+ *
+ * Since we're only doing repair for one sector, we only need to get
+ * a good copy of the failed sector and if we succeed, we have setup
+ * everything for repair_io_failure to do the rest for us.
+ */
+ failrec->this_mirror = next_mirror(failrec, failrec->this_mirror);
+ if (failrec->this_mirror == failrec->failed_mirror) {
+ btrfs_debug(fs_info,
+ "Check Repairable: (fail) num_copies=%d, this_mirror %d, failed_mirror %d",
+ failrec->num_copies,
+ failrec->this_mirror,
+ failrec->failed_mirror);
free_io_failure(failure_tree, tree, failrec);
return -EIO;
}
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index a76c6ef74cd3c..280af70c04953 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -263,6 +263,7 @@ struct io_failure_record {
enum btrfs_compression_type compress_type;
int this_mirror;
int failed_mirror;
+ int num_copies;
};
int btrfs_repair_one_sector(struct inode *inode,
--
2.30.2
next prev parent reply other threads:[~2022-07-07 5:33 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-07-07 5:33 fix read repair on compressed extents v3 Christoph Hellwig
2022-07-07 5:33 ` Christoph Hellwig [this message]
2022-07-07 5:33 ` [PATCH 2/6] btrfs: simplify the pending I/O counting in struct compressed_bio Christoph Hellwig
2022-07-07 5:33 ` [PATCH 3/6] btrfs: pass a btrfs_bio to btrfs_repair_one_sector Christoph Hellwig
2022-08-12 11:21 ` Qu Wenruo
2022-08-13 5:33 ` Qu Wenruo
2022-07-07 5:33 ` [PATCH 4/6] btrfs: remove the start argument to check_data_csum Christoph Hellwig
2022-07-19 18:21 ` David Sterba
2022-07-07 5:33 ` [PATCH 5/6] btrfs: fix repair of compressed extents Christoph Hellwig
2022-07-07 5:33 ` [PATCH 6/6] btrfs: don't call btrfs_page_set_checked in finish_compressed_bio_read Christoph Hellwig
2022-07-07 13:08 ` Nikolay Borisov
2022-07-08 9:37 ` fix read repair on compressed extents v3 Nikolay Borisov
2022-07-19 18:19 ` David Sterba
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220707053331.211259-2-hch@lst.de \
--to=hch@lst.de \
--cc=clm@fb.com \
--cc=dsterba@suse.com \
--cc=josef@toxicpanda.com \
--cc=linux-btrfs@vger.kernel.org \
--cc=nborisov@suse.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).