All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jan Schmidt <list.btrfs@jan-o-sch.net>
To: Ian Kent <raven@themaw.net>
Cc: chris.mason@oracle.com, linux-btrfs@vger.kernel.org
Subject: Re: [RFC PATCH 4/4] btrfs: Moved repair code from inode.c to extent_io.c
Date: Mon, 25 Jul 2011 10:59:19 +0200	[thread overview]
Message-ID: <4E2D3067.70007@jan-o-sch.net> (raw)
In-Reply-To: <1311566280.3401.15.camel@perseus.themaw.net>

On 25.07.2011 05:58, Ian Kent wrote:
> On Fri, 2011-07-22 at 16:58 +0200, Jan Schmidt wrote:
>> +static int bio_readpage_error(struct bio *failed_bio, struct page *page,
>> +				u64 start, u64 end, int failed_mirror,
>> +				struct extent_state *state)
>> +{
>> +	struct io_failure_record *failrec = NULL;
>> +	u64 private;
>> +	struct extent_map *em;
>> +	struct inode *inode = page->mapping->host;
>> +	struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
>> +	struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
>> +	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
>> +	struct bio *bio;
>> +	int num_copies;
>> +	int ret;
>> +	int read_mode;
>> +	u64 logical;
>> +
>> +	BUG_ON(failed_bio->bi_rw & REQ_WRITE);
>> +
>> +	ret = get_state_private(failure_tree, start, &private);
>> +	if (ret) {
>> +		failrec = kzalloc(sizeof(*failrec), GFP_NOFS);
>> +		if (!failrec)
>> +			return -ENOMEM;
>> +		failrec->start = start;
>> +		failrec->len = end - start + 1;
>> +		failrec->this_mirror = 0;
>> +		failrec->bio_flags = 0;
>> +		failrec->in_validation = 0;
>> +
>> +		read_lock(&em_tree->lock);
>> +		em = lookup_extent_mapping(em_tree, start, failrec->len);
>> +		if (!em) {
> 
> Looks like a missing "read_unlock(&em_tree->lock);" here to me?

Thanks, will be fixed in next version.

-Jan

>> +			kfree(failrec);
>> +			return -EIO;
>> +		}
>> +
>> +		if (em->start > start || em->start + em->len < start) {
>> +			free_extent_map(em);
>> +			em = NULL;
>> +		}
>> +		read_unlock(&em_tree->lock);
>> +
>> +		if (!em || IS_ERR(em)) {
>> +			kfree(failrec);
>> +			return -EIO;
>> +		}
>> +		logical = start - em->start;
>> +		logical = em->block_start + logical;
>> +		if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
>> +			logical = em->block_start;
>> +			failrec->bio_flags = EXTENT_BIO_COMPRESSED;
>> +			extent_set_compress_type(&failrec->bio_flags,
>> +						 em->compress_type);
>> +		}
>> +		pr_debug("bio_readpage_error: (new) logical=%llu, start=%llu, "
>> +			 "len=%llu\n", logical, start, failrec->len);
>> +		failrec->logical = logical;
>> +		free_extent_map(em);
>> +
>> +		/* set the bits in the private failure tree */
>> +		ret = set_extent_bits(failure_tree, start, end,
>> +					EXTENT_LOCKED | EXTENT_DIRTY, GFP_NOFS);
>> +		if (ret >= 0)
>> +			ret = set_state_private(failure_tree, start,
>> +						(u64)(unsigned long)failrec);
>> +		/* set the bits in the inode's tree */
>> +		if (ret >= 0)
>> +			ret = set_extent_bits(tree, start, end, EXTENT_DAMAGED,
>> +						GFP_NOFS);
>> +		if (ret < 0) {
>> +			kfree(failrec);
>> +			return ret;
>> +		}
>> +	} else {
>> +		failrec = (struct io_failure_record *)(unsigned long)private;
>> +		pr_debug("bio_readpage_error: (found) logical=%llu, "
>> +			 "start=%llu, len=%llu, validation=%d\n",
>> +			 failrec->logical, failrec->start, failrec->len,
>> +			 failrec->in_validation);
>> +		/*
>> +		 * when data can be on disk more than twice, add to failrec here
>> +		 * (e.g. with a list for failed_mirror) to make
>> +		 * clean_io_failure() clean all those errors at once.
>> +		 */
>> +	}
>> +	num_copies = btrfs_num_copies(
>> +			      &BTRFS_I(inode)->root->fs_info->mapping_tree,
>> +			      failrec->logical, failrec->len);
>> +	if (num_copies == 1) {
>> +		/*
>> +		 * we only have a single copy of the data, so don't bother with
>> +		 * all the retry and error correction code that follows. no
>> +		 * matter what the error is, it is very likely to persist.
>> +		 */
>> +		pr_debug("bio_readpage_error: cannot repair, num_copies == 1. "
>> +			 "state=%p, num_copies=%d, next_mirror %d, "
>> +			 "failed_mirror %d\n", state, num_copies,
>> +			 failrec->this_mirror, failed_mirror);
>> +		free_io_failure(inode, failrec, 0);
>> +		return -EIO;
>> +	}
>> +
>> +	if (!state) {
>> +		spin_lock(&tree->lock);
>> +		state = find_first_extent_bit_state(tree, failrec->start,
>> +						    EXTENT_LOCKED);
>> +		if (state && state->start != failrec->start)
>> +			state = NULL;
>> +		spin_unlock(&tree->lock);
>> +	}
>> +
>> +	/*
>> +	 * there are two premises:
>> +	 *	a) deliver good data to the caller
>> +	 *	b) correct the bad sectors on disk
>> +	 */
>> +	if (failed_bio->bi_vcnt > 1) {
>> +		/*
>> +		 * to fulfill b), we need to know the exact failing sectors, as
>> +		 * we don't want to rewrite any more than the failed ones. thus,
>> +		 * we need separate read requests for the failed bio
>> +		 *
>> +		 * if the following BUG_ON triggers, our validation request got
>> +		 * merged. we need separate requests for our algorithm to work.
>> +		 */
>> +		BUG_ON(failrec->in_validation);
>> +		failrec->in_validation = 1;
>> +		failrec->this_mirror = failed_mirror;
>> +		read_mode = READ_SYNC | REQ_FAILFAST_DEV;
>> +	} else {
>> +		/*
>> +		 * we're ready to fulfill a) and b) alongside. get a good copy
>> +		 * of the failed sector and if we succeed, we have setup
>> +		 * everything for repair_io_failure to do the rest for us.
>> +		 */
>> +		if (failrec->in_validation) {
>> +			BUG_ON(failrec->this_mirror != failed_mirror);
>> +			failrec->in_validation = 0;
>> +			failrec->this_mirror = 0;
>> +		}
>> +		failrec->failed_mirror = failed_mirror;
>> +		failrec->this_mirror++;
>> +		if (failrec->this_mirror == failed_mirror)
>> +			failrec->this_mirror++;
>> +		read_mode = READ_SYNC;
>> +	}
>> +
>> +	if (!state || failrec->this_mirror > num_copies) {
>> +		pr_debug("bio_readpage_error: (fail) state=%p, num_copies=%d, "
>> +			 "next_mirror %d, failed_mirror %d\n", state,
>> +			 num_copies, failrec->this_mirror, failed_mirror);
>> +		free_io_failure(inode, failrec, 0);
>> +		return -EIO;
>> +	}
>> +
>> +	bio = bio_alloc(GFP_NOFS, 1);
>> +	bio->bi_private = state;
>> +	bio->bi_end_io = failed_bio->bi_end_io;
>> +	bio->bi_sector = failrec->logical >> 9;
>> +	bio->bi_bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
>> +	bio->bi_size = 0;
>> +
>> +	bio_add_page(bio, page, failrec->len, start - page_offset(page));
>> +
>> +	pr_debug("bio_readpage_error: submitting new read[%#x] to "
>> +		 "this_mirror=%d, num_copies=%d, in_validation=%d\n", read_mode,
>> +		 failrec->this_mirror, num_copies, failrec->in_validation);
>> +
>> +	tree->ops->submit_bio_hook(inode, read_mode, bio, failrec->this_mirror,
>> +					failrec->bio_flags, 0);
>> +	return 0;
>> +}

      reply	other threads:[~2011-07-25  8:59 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-07-22 14:58 [RFC PATCH 0/4] btrfs: Suggestion for raid auto-repair Jan Schmidt
2011-07-22 14:58 ` [RFC PATCH 1/4] btrfs: btrfs_multi_bio replaced with btrfs_bio Jan Schmidt
2011-07-22 14:58 ` [RFC PATCH 2/4] btrfs: Do not use bio->bi_bdev after submission Jan Schmidt
2011-07-22 14:58 ` [RFC PATCH 3/4] btrfs: Put mirror_num in bi_bdev Jan Schmidt
2011-07-22 14:58 ` [RFC PATCH 4/4] btrfs: Moved repair code from inode.c to extent_io.c Jan Schmidt
2011-07-24 16:24   ` Andi Kleen
2011-07-24 17:28     ` Jan Schmidt
2011-07-24 23:01       ` Andi Kleen
2011-07-25  8:52         ` Jan Schmidt
2011-07-25  3:58   ` Ian Kent
2011-07-25  8:59     ` Jan Schmidt [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4E2D3067.70007@jan-o-sch.net \
    --to=list.btrfs@jan-o-sch.net \
    --cc=chris.mason@oracle.com \
    --cc=linux-btrfs@vger.kernel.org \
    --cc=raven@themaw.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.