All of lore.kernel.org
 help / color / mirror / Atom feed
From: Qu Wenruo <quwenruo@cn.fujitsu.com>
To: <linux-btrfs@vger.kernel.org>
Subject: [PATCH v3 17/19] btrfs-progs: scrub: Introduce a function to scrub one full stripe
Date: Thu, 30 Mar 2017 14:21:13 +0800	[thread overview]
Message-ID: <20170330062116.14379-18-quwenruo@cn.fujitsu.com> (raw)
In-Reply-To: <20170330062116.14379-1-quwenruo@cn.fujitsu.com>

Introduce a new function, scrub_one_full_stripe(), to check a full
stripe.

It handles the full stripe scrub in the following steps:
0) Check if we need to check full stripe
   If full stripe contains no extent, why waste our CPU and IO?

1) Read out full stripe
   Then we know how many devices are missing or have read error.
   If out of repair, then exit

   If have missing device or have read error, try recover here.

2) Check data stripe against csum
   We add data stripe with csum error as corrupted stripe, just like
   dev missing or read error.
   Then recheck if csum mismatch is still below tolerance.

Finally we check the full stripe using 2 factors only:
A) If the full stripe go through recover ever
B) If the full stripe has csum error

Combine factor A and B we get:
1) A && B: Recovered, csum mismatch
   Screwed up totally
2) A && !B: Recovered, csum match
   Recoverable, data corrupted but P/Q is good to recover
3) !A && B: Not recovered, csum mismatch
   Try to recover corrupted data stripes
   If recovered csum match, then recoverable
   Else, screwed up
4) !A && !B: Not recovered, no csum mismatch
   Best case, just check if P/Q matches.
   If P/Q matches, everything is good
   Else, just P/Q is screwed up, still recoverable.

Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
---
 scrub.c | 262 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 262 insertions(+)

diff --git a/scrub.c b/scrub.c
index a0f12380..87c15417 100644
--- a/scrub.c
+++ b/scrub.c
@@ -616,3 +616,265 @@ static int recover_from_parities(struct btrfs_fs_info *fs_info,
 	free(ptrs);
 	return ret;
 }
+
+/*
+ * Return 0 if we still have chance to recover
+ * Return <0 if we have no more chance
+ */
+static int report_recoverablity(struct scrub_full_stripe *fstripe)
+{
+	int max_tolerance;
+	u64 start = fstripe->logical_start;
+
+	if (fstripe->bg_type & BTRFS_BLOCK_GROUP_RAID5)
+		max_tolerance = 1;
+	else
+		max_tolerance = 2;
+
+	if (fstripe->nr_corrupted_stripes > max_tolerance) {
+		error(
+	"full stripe %llu CORRUPTED: too many read error or corrupted devices",
+			start);
+		error(
+	"full stripe %llu: tolerance: %d, missing: %d, read error: %d, csum error: %d",
+			start, max_tolerance, fstripe->err_read_stripes,
+			fstripe->err_missing_devs, fstripe->err_csum_dstripes);
+		return -EIO;
+	}
+	return 0;
+}
+
+static void clear_corrupted_stripe_record(struct scrub_full_stripe *fstripe)
+{
+	fstripe->corrupted_index[0] = -1;
+	fstripe->corrupted_index[1] = -1;
+	fstripe->nr_corrupted_stripes = 0;
+}
+
+static void record_corrupted_stripe(struct scrub_full_stripe *fstripe,
+				    int index)
+{
+	int i = 0;
+
+	for (i = 0; i < 2; i++) {
+		if (fstripe->corrupted_index[i] == -1) {
+			fstripe->corrupted_index[i] = index;
+			break;
+		}
+	}
+	fstripe->nr_corrupted_stripes++;
+}
+
+/*
+ * Scrub one full stripe.
+ *
+ * If everything matches, that's good.
+ * If data stripe corrupted badly, no mean to recovery, it will report it.
+ * If data stripe corrupted, try recovery first and recheck csum, to
+ * determine if it's recoverable or screwed up.
+ */
+static int scrub_one_full_stripe(struct btrfs_fs_info *fs_info,
+				 struct btrfs_scrub_progress *scrub_ctx,
+				 u64 start, u64 *next_ret)
+{
+	struct scrub_full_stripe *fstripe;
+	struct btrfs_map_block *map_block = NULL;
+	u32 stripe_len = BTRFS_STRIPE_LEN;
+	u64 bg_type;
+	u64 len;
+	int i;
+	int ret;
+
+	if (!next_ret) {
+		error("invalid argument for %s", __func__);
+		return -EINVAL;
+	}
+
+	ret = __btrfs_map_block_v2(fs_info, WRITE, start, stripe_len,
+				   &map_block);
+	if (ret < 0) {
+		/* Let caller to skip the whole block group */
+		*next_ret = (u64)-1;
+		return ret;
+	}
+	start = map_block->start;
+	len = map_block->length;
+	*next_ret = start + len;
+
+	/*
+	 * Step 0: Check if we need to scrub the full stripe
+	 *
+	 * If no extent lies in the full stripe, not need to check
+	 */
+	ret = btrfs_check_extent_exists(fs_info, start, len);
+	if (ret < 0) {
+		free(map_block);
+		return ret;
+	}
+	/* No extents in range, no need to check */
+	if (ret == 0) {
+		free(map_block);
+		return 0;
+	}
+
+	bg_type = map_block->type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
+	if (bg_type != BTRFS_BLOCK_GROUP_RAID5 &&
+	    bg_type != BTRFS_BLOCK_GROUP_RAID6) {
+		free(map_block);
+		return -EINVAL;
+	}
+
+	fstripe = alloc_full_stripe(map_block->num_stripes,
+				    map_block->stripe_len);
+	if (!fstripe)
+		return -ENOMEM;
+
+	fstripe->logical_start = map_block->start;
+	fstripe->nr_stripes = map_block->num_stripes;
+	fstripe->stripe_len = stripe_len;
+	fstripe->bg_type = bg_type;
+
+	/*
+	 * Step 1: Read out the whole full stripe
+	 *
+	 * Then we have the chance to exit early if too many devices are
+	 * missing.
+	 */
+	for (i = 0; i < map_block->num_stripes; i++) {
+		struct scrub_stripe *s_stripe = &fstripe->stripes[i];
+		struct btrfs_map_stripe *m_stripe = &map_block->stripes[i];
+
+		s_stripe->logical = m_stripe->logical;
+
+		if (m_stripe->dev->fd == -1) {
+			s_stripe->dev_missing = 1;
+			record_corrupted_stripe(fstripe, i);
+			fstripe->err_missing_devs++;
+			continue;
+		}
+
+		ret = pread(m_stripe->dev->fd, s_stripe->data, stripe_len,
+			    m_stripe->physical);
+		if (ret < stripe_len) {
+			record_corrupted_stripe(fstripe, i);
+			fstripe->err_read_stripes++;
+			continue;
+		}
+	}
+
+	ret = report_recoverablity(fstripe);
+	if (ret < 0)
+		goto out;
+
+	ret = recover_from_parities(fs_info, scrub_ctx, fstripe);
+	if (ret < 0) {
+		error("full stripe %llu CORRUPTED: failed to recover: %s\n",
+		      fstripe->logical_start, strerror(-ret));
+		goto out;
+	}
+
+	/*
+	 * Clear corrupted stripes report, since they are recovered,
+	 * and later checker need to record csum mismatch stripes reusing
+	 * these members
+	 */
+	clear_corrupted_stripe_record(fstripe);
+
+	/*
+	 * Step 2: Check each data stripes against csum
+	 */
+	for (i = 0; i < map_block->num_stripes; i++) {
+		struct scrub_stripe *stripe = &fstripe->stripes[i];
+
+		if (!is_data_stripe(stripe))
+			continue;
+		ret = scrub_one_data_stripe(fs_info, scrub_ctx, stripe,
+					    stripe_len);
+		if (ret < 0) {
+			fstripe->err_csum_dstripes++;
+			record_corrupted_stripe(fstripe, i);
+		}
+	}
+
+	ret = report_recoverablity(fstripe);
+	if (ret < 0)
+		goto out;
+
+	/*
+	 * Recovered before, but no csum error
+	 */
+	if (fstripe->err_csum_dstripes == 0 && fstripe->recovered) {
+		error(
+		"full stripe %llu RECOVERABLE: P/Q is good for recovery",
+			start);
+		ret = 0;
+		goto out;
+	}
+	/*
+	 * No csum error, not recovered before.
+	 *
+	 * Only need to check if P/Q matches.
+	 */
+	if (fstripe->err_csum_dstripes == 0 && !fstripe->recovered) {
+		ret = verify_parities(fs_info, scrub_ctx, fstripe);
+		if (ret < 0)
+			error(
+		"full stripe %llu CORRUPTED: failed to check P/Q: %s",
+				start, strerror(-ret));
+		if (ret > 0) {
+			error(
+		"full stripe %llu RECOVERABLE: only P/Q is corrupted",
+				start);
+			ret = 0;
+		}
+		goto out;
+	}
+
+	/*
+	 * Still csum error after recovery
+	 *
+	 * No mean to fix further, screwed up already.
+	 */
+	if (fstripe->err_csum_dstripes && fstripe->recovered) {
+		error(
+	"full stripe %llu CORRUPTED: csum still mismatch after recovery",
+			start);
+		ret = -EIO;
+		goto out;
+	}
+
+	/* Csum mismatch, but we still has chance to recover. */
+	ret = recover_from_parities(fs_info, scrub_ctx, fstripe);
+	if (ret < 0) {
+		error(
+	"full stripe %llu CORRUPTED: failed to recover: %s\n",
+			fstripe->logical_start, strerror(-ret));
+		goto out;
+	}
+
+	/* After recovery, recheck data stripe csum */
+	for (i = 0; i < 2; i++) {
+		int index = fstripe->corrupted_index[i];
+		struct scrub_stripe *stripe;
+
+		if (i == -1)
+			continue;
+		stripe = &fstripe->stripes[index];
+		ret = scrub_one_data_stripe(fs_info, scrub_ctx, stripe,
+					    stripe_len);
+		if (ret < 0) {
+			error(
+	"full stripe %llu CORRUPTED: csum still mismatch after recovery",
+				start);
+			goto out;
+		}
+	}
+	error(
+	"full stripe %llu RECOVERABLE: Data stripes corrupted, but P/Q is good",
+		start);
+
+out:
+	free_full_stripe(fstripe);
+	free(map_block);
+	return ret;
+}
-- 
2.12.1




  parent reply	other threads:[~2017-03-30  6:21 UTC|newest]

Thread overview: 30+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-03-30  6:20 [PATCH v3 00/19] Btrfs-progs offline scrub Qu Wenruo
2017-03-30  6:20 ` [PATCH v3 01/19] btrfs-progs: raid56: Introduce raid56 header for later recovery usage Qu Wenruo
2017-03-30  6:20 ` [PATCH v3 02/19] btrfs-progs: raid56: Introduce tables for RAID6 recovery Qu Wenruo
2017-03-30  6:20 ` [PATCH v3 03/19] btrfs-progs: raid56: Allow raid6 to recover 2 data stripes Qu Wenruo
2017-03-30  6:21 ` [PATCH v3 04/19] btrfs-progs: raid56: Allow raid6 to recover data and p Qu Wenruo
2017-03-30  6:21 ` [PATCH v3 05/19] btrfs-progs: Introduce wrapper to recover raid56 data Qu Wenruo
2017-03-30  6:21 ` [PATCH v3 06/19] btrfs-progs: Introduce new btrfs_map_block function which returns more unified result Qu Wenruo
2017-03-30  6:21 ` [PATCH v3 07/19] btrfs-progs: Allow __btrfs_map_block_v2 to remove unrelated stripes Qu Wenruo
2017-03-30  6:21 ` [PATCH v3 08/19] btrfs-progs: csum: Introduce function to read out data csums Qu Wenruo
2017-03-30  6:21 ` [PATCH v3 09/19] btrfs-progs: scrub: Introduce structures to support fsck scrub for RAID56 Qu Wenruo
2017-03-30  6:21 ` [PATCH v3 10/19] btrfs-progs: scrub: Introduce function to scrub mirror based tree block Qu Wenruo
2017-03-30  6:21 ` [PATCH v3 11/19] btrfs-progs: scrub: Introduce function to scrub mirror based data blocks Qu Wenruo
2017-03-30  6:21 ` [PATCH v3 12/19] btrfs-progs: scrub: Introduce function to scrub one extent Qu Wenruo
2017-03-30  6:21 ` [PATCH v3 13/19] btrfs-progs: scrub: Introduce function to scrub one data stripe Qu Wenruo
2017-03-30  6:21 ` [PATCH v3 14/19] btrfs-progs: scrub: Introduce function to verify parities Qu Wenruo
2017-03-30  6:21 ` [PATCH v3 15/19] btrfs-progs: extent-tree: Introduce function to check if there is any extent in given range Qu Wenruo
2017-03-30  6:21 ` [PATCH v3 16/19] btrfs-progs: scrub: Introduce function to recover data parity Qu Wenruo
2017-03-30  6:21 ` Qu Wenruo [this message]
2017-03-30  6:21 ` [PATCH v3 18/19] btrfs-progs: scrub: Introduce function to check a whole block group Qu Wenruo
2017-03-30  6:21 ` [PATCH v3 19/19] btrfs-progs: fsck: Introduce offline scrub function Qu Wenruo
2017-03-30  6:21 ` [PATCH v3 19/19] btrfs-progs: scrub: " Qu Wenruo
2017-05-09  5:46 ` [PATCH v3 00/19] Btrfs-progs offline scrub Qu Wenruo
2017-05-12 16:34   ` David Sterba
2017-05-13 13:37   ` Lakshmipathi.G
     [not found]     ` <46db6693-3508-6845-e80f-0db1192d7bd2@cn.fujitsu.com>
2017-05-22  6:27       ` Lakshmipathi.G
     [not found]         ` <f0ed81f8-a312-72b0-5da1-56cfbc1fa81e@cn.fujitsu.com>
2017-05-22  7:57           ` Lakshmipathi.G
2017-05-22  8:30             ` Lakshmipathi.G
     [not found]               ` <b57b3e03-fc72-762f-1392-9a4ccafd0fcb@cn.fujitsu.com>
2017-05-22  9:47                 ` Lakshmipathi.G
2017-05-23 14:41                   ` Lakshmipathi.G
     [not found]                     ` <cfdab92f-469e-b8f2-8d8b-3a0544d05f57@cn.fujitsu.com>
2017-05-24  3:58                       ` Lakshmipathi.G

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170330062116.14379-18-quwenruo@cn.fujitsu.com \
    --to=quwenruo@cn.fujitsu.com \
    --cc=linux-btrfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.