From 586430c9a5ca9f842626baccdb5fcd1be6294bbf Mon Sep 17 00:00:00 2001 Message-Id: <586430c9a5ca9f842626baccdb5fcd1be6294bbf.1672138657.git.wqu@suse.com> From: Qu Wenruo Date: Mon, 26 Dec 2022 16:44:08 +0800 Subject: [PATCH v5] btrfs: add extra debug for level mismatch Currently I assume there is some race or uninitialized value for check::level. The extra output are for two locations: - validate_extent_buffer() Output the error message for read error and the members of check. - read_extent_buffer_pages() This will dump the stack for us to catch the offender. Signed-off-by: Qu Wenruo --- Changelog: v2: - Extra submission time output This would greately enlarge the dmesg size - Extra warning when submitting a metadata bio If we have an uninitialized check structure, do a warning and stack dump to show the offending call trace. v3: - Fix a compiling error v4: - Add extra mb for test v5: - Initialize check structure early - Move mb to the call site we initialize the check structure - Use sizeof(struct btrfs_tree_parent_check) instead of sizeof(*size) - Add extra debug output for sizeof(*check) just in case. Overall I have no idea how this can go wrong... --- fs/btrfs/ctree.c | 31 ++++++++++++++++++++----------- fs/btrfs/disk-io.c | 19 +++++++++++++++++-- fs/btrfs/extent_io.c | 20 +++++++++++++++++--- 3 files changed, 54 insertions(+), 16 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 4754c9101a4c..d858219c8886 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -958,7 +958,12 @@ struct extent_buffer *btrfs_read_node_slot(struct extent_buffer *parent, int slot) { int level = btrfs_header_level(parent); - struct btrfs_tree_parent_check check = { 0 }; + struct btrfs_tree_parent_check empty = { 0 }; + struct btrfs_tree_parent_check check = { + .level = level - 1, + .transid = btrfs_node_ptr_generation(parent, slot), + .owner_root = btrfs_header_owner(parent), + }; struct extent_buffer *eb; if (slot < 0 || slot >= btrfs_header_nritems(parent)) @@ -966,12 +971,12 @@ struct extent_buffer *btrfs_read_node_slot(struct extent_buffer *parent, BUG_ON(level == 0); - check.level = level - 1; - check.transid = btrfs_node_ptr_generation(parent, slot); - check.owner_root = btrfs_header_owner(parent); check.has_first_key = true; btrfs_node_key_to_cpu(parent, &check.first_key, slot); + smp_mb(); + WARN_ON(!memcmp(&check, &empty, + sizeof(struct btrfs_tree_parent_check))); eb = read_tree_block(parent->fs_info, btrfs_node_blockptr(parent, slot), &check); if (IS_ERR(eb)) @@ -1531,24 +1536,26 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p, struct extent_buffer **eb_ret, int level, int slot, const struct btrfs_key *key) { + u64 gen = btrfs_node_ptr_generation(*eb_ret, slot); + u8 parent_level = btrfs_header_level(*eb_ret); struct btrfs_fs_info *fs_info = root->fs_info; - struct btrfs_tree_parent_check check = { 0 }; + struct btrfs_tree_parent_check empty = { 0 }; + struct btrfs_tree_parent_check check = { + .transid = gen, + .level = parent_level - 1, + .owner_root = root->root_key.objectid, + }; u64 blocknr; - u64 gen; struct extent_buffer *tmp; int ret; - int parent_level; bool unlock_up; unlock_up = ((level + 1 < BTRFS_MAX_LEVEL) && p->locks[level + 1]); blocknr = btrfs_node_blockptr(*eb_ret, slot); - gen = btrfs_node_ptr_generation(*eb_ret, slot); - parent_level = btrfs_header_level(*eb_ret); btrfs_node_key_to_cpu(*eb_ret, &check.first_key, slot); check.has_first_key = true; - check.level = parent_level - 1; - check.transid = gen; check.owner_root = root->root_key.objectid; + smp_mb(); /* * If we need to read an extent buffer from disk and we are holding locks @@ -1587,6 +1594,8 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p, btrfs_unlock_up_safe(p, level + 1); /* now we're allowed to do a blocking uptodate check */ + WARN_ON(!memcmp(&check, &empty, + sizeof(struct btrfs_tree_parent_check))); ret = btrfs_read_extent_buffer(tmp, &check); if (ret) { free_extent_buffer(tmp); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index f8b5955f003f..49b077acf359 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -530,6 +530,10 @@ static int validate_extent_buffer(struct extent_buffer *eb, } if (found_level != check->level) { + btrfs_err(eb->fs_info, +"level verify failed on logical %llu mirror %u wanted %u found %u", + eb->start, eb->read_mirror, check->level, + found_level); ret = -EIO; goto out; } @@ -581,13 +585,20 @@ static int validate_extent_buffer(struct extent_buffer *eb, if (found_level > 0 && btrfs_check_node(eb)) ret = -EIO; +out: if (!ret) set_extent_buffer_uptodate(eb); - else + else { btrfs_err(fs_info, "read time tree block corruption detected on logical %llu mirror %u", eb->start, eb->read_mirror); -out: + btrfs_err(eb->fs_info, +"check owner_root=%llu transid=%llu first_key=(%llu %u %llu) has_first_key=%d level=%u", + check->owner_root, + check->transid, check->first_key.objectid, + check->first_key.type, check->first_key.offset, + check->has_first_key, check->level); + } return ret; } @@ -652,6 +663,7 @@ int btrfs_validate_metadata_buffer(struct btrfs_bio *bbio, int reads_done; ASSERT(page->private); + WARN_ON(!bbio->is_metadata); if (btrfs_sb(page->mapping->host->i_sb)->nodesize < PAGE_SIZE) return validate_subpage_buffer(page, start, end, mirror, @@ -833,12 +845,15 @@ void btrfs_submit_metadata_bio(struct btrfs_inode *inode, struct bio *bio, int m { struct btrfs_fs_info *fs_info = inode->root->fs_info; struct btrfs_bio *bbio = btrfs_bio(bio); + struct btrfs_tree_parent_check check = {0}; blk_status_t ret; bio->bi_opf |= REQ_META; bbio->is_metadata = 1; if (btrfs_op(bio) != BTRFS_MAP_WRITE) { + WARN_ON(!memcmp(&check, &bbio->parent_check, + sizeof(struct btrfs_tree_parent_check))); btrfs_submit_bio(fs_info, bio, mirror_num); return; } diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 83dd3aa59663..e1ee1940132c 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4878,7 +4878,8 @@ static int read_extent_buffer_subpage(struct extent_buffer *eb, int wait, */ atomic_dec(&eb->io_pages); } - memcpy(&btrfs_bio(bio_ctrl.bio)->parent_check, check, sizeof(*check)); + memcpy(&btrfs_bio(bio_ctrl.bio)->parent_check, check, + sizeof(struct btrfs_tree_parent_check)); submit_one_bio(&bio_ctrl); if (ret || wait != WAIT_COMPLETE) { free_extent_state(cached_state); @@ -4996,7 +4997,8 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num, } } - memcpy(&btrfs_bio(bio_ctrl.bio)->parent_check, check, sizeof(*check)); + memcpy(&btrfs_bio(bio_ctrl.bio)->parent_check, check, + sizeof(struct btrfs_tree_parent_check)); submit_one_bio(&bio_ctrl); if (ret || wait != WAIT_COMPLETE) @@ -5005,8 +5007,20 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num, for (i = 0; i < num_pages; i++) { page = eb->pages[i]; wait_on_page_locked(page); - if (!PageUptodate(page)) + if (!PageUptodate(page)) { ret = -EIO; + btrfs_err(eb->fs_info, +"read failed, bytenr=%llu check owner_root=%llu transid=%llu has_first_key=%d first_key=(%llu %u %llu) level=%u size=%zu", + eb->start, + check->owner_root, check->transid, + check->has_first_key, + check->first_key.objectid, + check->first_key.type, + check->first_key.offset, + check->level, + sizeof(*check)); + dump_stack(); + } } return ret; -- 2.39.0